{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 29.10360884749709, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005820721769499418, "grad_norm": 3.9351627826690674, "learning_rate": 3.0000000000000004e-07, "loss": 1.1516, "step": 10 }, { "epoch": 0.011641443538998836, "grad_norm": 1.7048510313034058, "learning_rate": 6.333333333333333e-07, "loss": 1.1068, "step": 20 }, { "epoch": 0.017462165308498253, "grad_norm": 2.2272894382476807, "learning_rate": 9.666666666666668e-07, "loss": 1.0856, "step": 30 }, { "epoch": 0.023282887077997673, "grad_norm": 2.0485191345214844, "learning_rate": 1.3e-06, "loss": 1.0621, "step": 40 }, { "epoch": 0.02910360884749709, "grad_norm": 1.5548977851867676, "learning_rate": 1.6333333333333333e-06, "loss": 1.0466, "step": 50 }, { "epoch": 0.034924330616996506, "grad_norm": 1.33938729763031, "learning_rate": 1.9666666666666668e-06, "loss": 1.0383, "step": 60 }, { "epoch": 0.04074505238649592, "grad_norm": 1.3746520280838013, "learning_rate": 2.3e-06, "loss": 1.0388, "step": 70 }, { "epoch": 0.046565774155995346, "grad_norm": 1.6480227708816528, "learning_rate": 2.6333333333333337e-06, "loss": 1.0188, "step": 80 }, { "epoch": 0.05238649592549476, "grad_norm": 1.2464436292648315, "learning_rate": 2.966666666666667e-06, "loss": 1.0262, "step": 90 }, { "epoch": 0.05820721769499418, "grad_norm": 1.4086661338806152, "learning_rate": 3.3e-06, "loss": 1.0085, "step": 100 }, { "epoch": 0.0640279394644936, "grad_norm": 1.1305700540542603, "learning_rate": 3.633333333333334e-06, "loss": 0.9906, "step": 110 }, { "epoch": 0.06984866123399301, "grad_norm": 1.4148164987564087, "learning_rate": 3.966666666666667e-06, "loss": 0.9835, "step": 120 }, { "epoch": 0.07566938300349244, "grad_norm": 1.2195444107055664, "learning_rate": 4.2999999999999995e-06, "loss": 1.0032, "step": 130 }, { "epoch": 0.08149010477299184, "grad_norm": 1.1321520805358887, "learning_rate": 4.633333333333334e-06, "loss": 0.9905, "step": 140 }, { "epoch": 0.08731082654249127, "grad_norm": 2.298079490661621, "learning_rate": 4.966666666666667e-06, "loss": 0.9897, "step": 150 }, { "epoch": 0.09313154831199069, "grad_norm": 3.312469244003296, "learning_rate": 5.3e-06, "loss": 0.9545, "step": 160 }, { "epoch": 0.0989522700814901, "grad_norm": 3.0866334438323975, "learning_rate": 5.633333333333333e-06, "loss": 0.9752, "step": 170 }, { "epoch": 0.10477299185098952, "grad_norm": 1.3758505582809448, "learning_rate": 5.9666666666666666e-06, "loss": 0.9653, "step": 180 }, { "epoch": 0.11059371362048893, "grad_norm": 1.2263636589050293, "learning_rate": 6.300000000000001e-06, "loss": 0.9422, "step": 190 }, { "epoch": 0.11641443538998836, "grad_norm": 2.6361782550811768, "learning_rate": 6.633333333333333e-06, "loss": 0.9757, "step": 200 }, { "epoch": 0.12223515715948778, "grad_norm": 1.3523321151733398, "learning_rate": 6.966666666666667e-06, "loss": 0.9472, "step": 210 }, { "epoch": 0.1280558789289872, "grad_norm": 1.9542300701141357, "learning_rate": 7.2999999999999996e-06, "loss": 0.9621, "step": 220 }, { "epoch": 0.13387660069848661, "grad_norm": 2.085099935531616, "learning_rate": 7.633333333333334e-06, "loss": 0.9297, "step": 230 }, { "epoch": 0.13969732246798602, "grad_norm": 2.9092581272125244, "learning_rate": 7.966666666666666e-06, "loss": 0.9229, "step": 240 }, { "epoch": 0.14551804423748546, "grad_norm": 4.291961669921875, "learning_rate": 8.3e-06, "loss": 0.9297, "step": 250 }, { "epoch": 0.15133876600698487, "grad_norm": 4.819680213928223, "learning_rate": 8.633333333333334e-06, "loss": 0.863, "step": 260 }, { "epoch": 0.15715948777648428, "grad_norm": 4.454544544219971, "learning_rate": 8.966666666666668e-06, "loss": 0.811, "step": 270 }, { "epoch": 0.1629802095459837, "grad_norm": 6.7812676429748535, "learning_rate": 9.3e-06, "loss": 0.7846, "step": 280 }, { "epoch": 0.16880093131548313, "grad_norm": 5.737008571624756, "learning_rate": 9.633333333333335e-06, "loss": 0.7033, "step": 290 }, { "epoch": 0.17462165308498254, "grad_norm": 9.117860794067383, "learning_rate": 9.966666666666667e-06, "loss": 0.6849, "step": 300 }, { "epoch": 0.18044237485448195, "grad_norm": 7.768599987030029, "learning_rate": 1.03e-05, "loss": 0.624, "step": 310 }, { "epoch": 0.18626309662398138, "grad_norm": 5.6971211433410645, "learning_rate": 1.0633333333333334e-05, "loss": 0.5473, "step": 320 }, { "epoch": 0.1920838183934808, "grad_norm": 7.239617824554443, "learning_rate": 1.0966666666666666e-05, "loss": 0.506, "step": 330 }, { "epoch": 0.1979045401629802, "grad_norm": 7.811177730560303, "learning_rate": 1.13e-05, "loss": 0.4275, "step": 340 }, { "epoch": 0.20372526193247964, "grad_norm": 8.937807083129883, "learning_rate": 1.1633333333333334e-05, "loss": 0.4441, "step": 350 }, { "epoch": 0.20954598370197905, "grad_norm": 6.753471851348877, "learning_rate": 1.1966666666666668e-05, "loss": 0.3697, "step": 360 }, { "epoch": 0.21536670547147846, "grad_norm": 6.031618595123291, "learning_rate": 1.23e-05, "loss": 0.2871, "step": 370 }, { "epoch": 0.22118742724097787, "grad_norm": 5.9313459396362305, "learning_rate": 1.2633333333333333e-05, "loss": 0.2973, "step": 380 }, { "epoch": 0.2270081490104773, "grad_norm": 7.446842193603516, "learning_rate": 1.2966666666666669e-05, "loss": 0.3153, "step": 390 }, { "epoch": 0.23282887077997672, "grad_norm": 6.2848944664001465, "learning_rate": 1.3300000000000001e-05, "loss": 0.2403, "step": 400 }, { "epoch": 0.23864959254947612, "grad_norm": 5.90664005279541, "learning_rate": 1.3633333333333334e-05, "loss": 0.2361, "step": 410 }, { "epoch": 0.24447031431897556, "grad_norm": 5.253407955169678, "learning_rate": 1.3966666666666666e-05, "loss": 0.2383, "step": 420 }, { "epoch": 0.25029103608847497, "grad_norm": 3.5932719707489014, "learning_rate": 1.43e-05, "loss": 0.2171, "step": 430 }, { "epoch": 0.2561117578579744, "grad_norm": 4.434157848358154, "learning_rate": 1.4633333333333334e-05, "loss": 0.2024, "step": 440 }, { "epoch": 0.2619324796274738, "grad_norm": 3.7341742515563965, "learning_rate": 1.4966666666666668e-05, "loss": 0.187, "step": 450 }, { "epoch": 0.26775320139697323, "grad_norm": 3.5675153732299805, "learning_rate": 1.53e-05, "loss": 0.1527, "step": 460 }, { "epoch": 0.27357392316647267, "grad_norm": 3.2708635330200195, "learning_rate": 1.563333333333333e-05, "loss": 0.1486, "step": 470 }, { "epoch": 0.27939464493597205, "grad_norm": 2.5912253856658936, "learning_rate": 1.5966666666666667e-05, "loss": 0.1171, "step": 480 }, { "epoch": 0.2852153667054715, "grad_norm": 2.370358943939209, "learning_rate": 1.63e-05, "loss": 0.1253, "step": 490 }, { "epoch": 0.2910360884749709, "grad_norm": 2.1213202476501465, "learning_rate": 1.6633333333333336e-05, "loss": 0.0872, "step": 500 }, { "epoch": 0.2968568102444703, "grad_norm": 4.791236400604248, "learning_rate": 1.6966666666666668e-05, "loss": 0.1023, "step": 510 }, { "epoch": 0.30267753201396974, "grad_norm": 1.8030214309692383, "learning_rate": 1.73e-05, "loss": 0.0739, "step": 520 }, { "epoch": 0.3084982537834691, "grad_norm": 2.7329323291778564, "learning_rate": 1.7633333333333336e-05, "loss": 0.0875, "step": 530 }, { "epoch": 0.31431897555296856, "grad_norm": 3.0136725902557373, "learning_rate": 1.796666666666667e-05, "loss": 0.0909, "step": 540 }, { "epoch": 0.320139697322468, "grad_norm": 2.140592098236084, "learning_rate": 1.83e-05, "loss": 0.0513, "step": 550 }, { "epoch": 0.3259604190919674, "grad_norm": 2.215975284576416, "learning_rate": 1.8633333333333333e-05, "loss": 0.063, "step": 560 }, { "epoch": 0.3317811408614668, "grad_norm": 3.0556840896606445, "learning_rate": 1.896666666666667e-05, "loss": 0.0736, "step": 570 }, { "epoch": 0.33760186263096625, "grad_norm": 1.9327889680862427, "learning_rate": 1.93e-05, "loss": 0.0609, "step": 580 }, { "epoch": 0.34342258440046564, "grad_norm": 1.9554654359817505, "learning_rate": 1.9633333333333334e-05, "loss": 0.0265, "step": 590 }, { "epoch": 0.3492433061699651, "grad_norm": 3.2031350135803223, "learning_rate": 1.9966666666666666e-05, "loss": 0.0442, "step": 600 }, { "epoch": 0.3550640279394645, "grad_norm": 1.4008177518844604, "learning_rate": 2.0300000000000002e-05, "loss": 0.0364, "step": 610 }, { "epoch": 0.3608847497089639, "grad_norm": 1.9808603525161743, "learning_rate": 2.0633333333333335e-05, "loss": 0.0271, "step": 620 }, { "epoch": 0.36670547147846333, "grad_norm": 2.2199501991271973, "learning_rate": 2.0966666666666667e-05, "loss": 0.0121, "step": 630 }, { "epoch": 0.37252619324796277, "grad_norm": 2.518538475036621, "learning_rate": 2.13e-05, "loss": -0.0046, "step": 640 }, { "epoch": 0.37834691501746215, "grad_norm": 1.2404508590698242, "learning_rate": 2.1633333333333332e-05, "loss": -0.004, "step": 650 }, { "epoch": 0.3841676367869616, "grad_norm": 2.057145833969116, "learning_rate": 2.1966666666666668e-05, "loss": -0.0059, "step": 660 }, { "epoch": 0.389988358556461, "grad_norm": 1.8304933309555054, "learning_rate": 2.23e-05, "loss": -0.039, "step": 670 }, { "epoch": 0.3958090803259604, "grad_norm": 1.7600386142730713, "learning_rate": 2.2633333333333336e-05, "loss": -0.0139, "step": 680 }, { "epoch": 0.40162980209545984, "grad_norm": 2.265993356704712, "learning_rate": 2.2966666666666668e-05, "loss": -0.0385, "step": 690 }, { "epoch": 0.4074505238649593, "grad_norm": 1.5035374164581299, "learning_rate": 2.3300000000000004e-05, "loss": -0.0285, "step": 700 }, { "epoch": 0.41327124563445866, "grad_norm": 1.6671541929244995, "learning_rate": 2.3633333333333336e-05, "loss": -0.0261, "step": 710 }, { "epoch": 0.4190919674039581, "grad_norm": 2.363172769546509, "learning_rate": 2.396666666666667e-05, "loss": -0.0281, "step": 720 }, { "epoch": 0.42491268917345754, "grad_norm": 2.326547384262085, "learning_rate": 2.43e-05, "loss": -0.0212, "step": 730 }, { "epoch": 0.4307334109429569, "grad_norm": 1.2889360189437866, "learning_rate": 2.4633333333333334e-05, "loss": -0.0346, "step": 740 }, { "epoch": 0.43655413271245636, "grad_norm": 1.5972001552581787, "learning_rate": 2.496666666666667e-05, "loss": -0.0406, "step": 750 }, { "epoch": 0.44237485448195574, "grad_norm": 2.005054235458374, "learning_rate": 2.5300000000000002e-05, "loss": -0.0431, "step": 760 }, { "epoch": 0.4481955762514552, "grad_norm": 1.5639278888702393, "learning_rate": 2.5633333333333338e-05, "loss": -0.0354, "step": 770 }, { "epoch": 0.4540162980209546, "grad_norm": 1.3825984001159668, "learning_rate": 2.5966666666666667e-05, "loss": -0.0446, "step": 780 }, { "epoch": 0.459837019790454, "grad_norm": 1.6973204612731934, "learning_rate": 2.6300000000000002e-05, "loss": -0.0626, "step": 790 }, { "epoch": 0.46565774155995343, "grad_norm": 1.141350507736206, "learning_rate": 2.663333333333333e-05, "loss": -0.0503, "step": 800 }, { "epoch": 0.47147846332945287, "grad_norm": 1.869985818862915, "learning_rate": 2.6966666666666667e-05, "loss": -0.0635, "step": 810 }, { "epoch": 0.47729918509895225, "grad_norm": 1.169270396232605, "learning_rate": 2.7300000000000003e-05, "loss": -0.035, "step": 820 }, { "epoch": 0.4831199068684517, "grad_norm": 1.8271631002426147, "learning_rate": 2.7633333333333332e-05, "loss": -0.0545, "step": 830 }, { "epoch": 0.4889406286379511, "grad_norm": 1.666276454925537, "learning_rate": 2.7966666666666668e-05, "loss": -0.0679, "step": 840 }, { "epoch": 0.4947613504074505, "grad_norm": 1.3866889476776123, "learning_rate": 2.83e-05, "loss": -0.0551, "step": 850 }, { "epoch": 0.5005820721769499, "grad_norm": 1.370084285736084, "learning_rate": 2.8633333333333336e-05, "loss": -0.0503, "step": 860 }, { "epoch": 0.5064027939464494, "grad_norm": 2.091080665588379, "learning_rate": 2.8966666666666668e-05, "loss": -0.0551, "step": 870 }, { "epoch": 0.5122235157159488, "grad_norm": 1.8620296716690063, "learning_rate": 2.93e-05, "loss": -0.0581, "step": 880 }, { "epoch": 0.5180442374854481, "grad_norm": 2.0053396224975586, "learning_rate": 2.9633333333333336e-05, "loss": -0.0532, "step": 890 }, { "epoch": 0.5238649592549476, "grad_norm": 0.7567626237869263, "learning_rate": 2.9966666666666672e-05, "loss": -0.0729, "step": 900 }, { "epoch": 0.529685681024447, "grad_norm": 1.191988229751587, "learning_rate": 3.03e-05, "loss": -0.0666, "step": 910 }, { "epoch": 0.5355064027939465, "grad_norm": 1.5970115661621094, "learning_rate": 3.063333333333334e-05, "loss": -0.0591, "step": 920 }, { "epoch": 0.5413271245634459, "grad_norm": 2.0107686519622803, "learning_rate": 3.096666666666666e-05, "loss": -0.0449, "step": 930 }, { "epoch": 0.5471478463329453, "grad_norm": 1.9331578016281128, "learning_rate": 3.13e-05, "loss": -0.0832, "step": 940 }, { "epoch": 0.5529685681024447, "grad_norm": 1.2138278484344482, "learning_rate": 3.1633333333333334e-05, "loss": -0.0736, "step": 950 }, { "epoch": 0.5587892898719441, "grad_norm": 2.7533183097839355, "learning_rate": 3.196666666666667e-05, "loss": -0.0341, "step": 960 }, { "epoch": 0.5646100116414435, "grad_norm": 1.590443730354309, "learning_rate": 3.2300000000000006e-05, "loss": -0.0734, "step": 970 }, { "epoch": 0.570430733410943, "grad_norm": 1.5227609872817993, "learning_rate": 3.263333333333333e-05, "loss": -0.0737, "step": 980 }, { "epoch": 0.5762514551804424, "grad_norm": 2.206758499145508, "learning_rate": 3.296666666666667e-05, "loss": -0.0714, "step": 990 }, { "epoch": 0.5820721769499418, "grad_norm": 1.0484057664871216, "learning_rate": 3.33e-05, "loss": -0.0879, "step": 1000 }, { "epoch": 0.5878928987194412, "grad_norm": 1.5317397117614746, "learning_rate": 3.3633333333333335e-05, "loss": -0.0587, "step": 1010 }, { "epoch": 0.5937136204889406, "grad_norm": 1.4011244773864746, "learning_rate": 3.396666666666667e-05, "loss": -0.0842, "step": 1020 }, { "epoch": 0.59953434225844, "grad_norm": 1.4616059064865112, "learning_rate": 3.430000000000001e-05, "loss": -0.0837, "step": 1030 }, { "epoch": 0.6053550640279395, "grad_norm": 1.6482254266738892, "learning_rate": 3.463333333333333e-05, "loss": -0.0857, "step": 1040 }, { "epoch": 0.6111757857974389, "grad_norm": 1.1357859373092651, "learning_rate": 3.496666666666667e-05, "loss": -0.0859, "step": 1050 }, { "epoch": 0.6169965075669382, "grad_norm": 1.2360305786132812, "learning_rate": 3.53e-05, "loss": -0.0744, "step": 1060 }, { "epoch": 0.6228172293364377, "grad_norm": 1.416707158088684, "learning_rate": 3.563333333333334e-05, "loss": -0.0978, "step": 1070 }, { "epoch": 0.6286379511059371, "grad_norm": 1.2688764333724976, "learning_rate": 3.596666666666667e-05, "loss": -0.0753, "step": 1080 }, { "epoch": 0.6344586728754366, "grad_norm": 1.668439507484436, "learning_rate": 3.63e-05, "loss": -0.0758, "step": 1090 }, { "epoch": 0.640279394644936, "grad_norm": 1.0859016180038452, "learning_rate": 3.6633333333333334e-05, "loss": -0.0834, "step": 1100 }, { "epoch": 0.6461001164144354, "grad_norm": 0.9204323291778564, "learning_rate": 3.6966666666666666e-05, "loss": -0.0895, "step": 1110 }, { "epoch": 0.6519208381839348, "grad_norm": 1.247637152671814, "learning_rate": 3.73e-05, "loss": -0.0955, "step": 1120 }, { "epoch": 0.6577415599534342, "grad_norm": 1.2620124816894531, "learning_rate": 3.763333333333334e-05, "loss": -0.0907, "step": 1130 }, { "epoch": 0.6635622817229336, "grad_norm": 1.7772462368011475, "learning_rate": 3.796666666666667e-05, "loss": -0.0973, "step": 1140 }, { "epoch": 0.6693830034924331, "grad_norm": 2.4042038917541504, "learning_rate": 3.83e-05, "loss": -0.0729, "step": 1150 }, { "epoch": 0.6752037252619325, "grad_norm": 1.2811710834503174, "learning_rate": 3.8633333333333335e-05, "loss": -0.091, "step": 1160 }, { "epoch": 0.681024447031432, "grad_norm": 1.0881035327911377, "learning_rate": 3.896666666666667e-05, "loss": -0.0858, "step": 1170 }, { "epoch": 0.6868451688009313, "grad_norm": 1.5334446430206299, "learning_rate": 3.9300000000000007e-05, "loss": -0.0878, "step": 1180 }, { "epoch": 0.6926658905704307, "grad_norm": 1.0810967683792114, "learning_rate": 3.963333333333333e-05, "loss": -0.0872, "step": 1190 }, { "epoch": 0.6984866123399301, "grad_norm": 0.8047418594360352, "learning_rate": 3.996666666666667e-05, "loss": -0.0878, "step": 1200 }, { "epoch": 0.7043073341094296, "grad_norm": 1.8458188772201538, "learning_rate": 4.0300000000000004e-05, "loss": -0.0689, "step": 1210 }, { "epoch": 0.710128055878929, "grad_norm": 1.7021021842956543, "learning_rate": 4.0633333333333336e-05, "loss": -0.0858, "step": 1220 }, { "epoch": 0.7159487776484285, "grad_norm": 1.5981242656707764, "learning_rate": 4.096666666666667e-05, "loss": -0.0955, "step": 1230 }, { "epoch": 0.7217694994179278, "grad_norm": 1.5293388366699219, "learning_rate": 4.13e-05, "loss": -0.0843, "step": 1240 }, { "epoch": 0.7275902211874272, "grad_norm": 1.088618516921997, "learning_rate": 4.1633333333333333e-05, "loss": -0.0986, "step": 1250 }, { "epoch": 0.7334109429569267, "grad_norm": 1.0201421976089478, "learning_rate": 4.196666666666667e-05, "loss": -0.1027, "step": 1260 }, { "epoch": 0.7392316647264261, "grad_norm": 1.4983360767364502, "learning_rate": 4.23e-05, "loss": -0.0995, "step": 1270 }, { "epoch": 0.7450523864959255, "grad_norm": 1.1330536603927612, "learning_rate": 4.263333333333334e-05, "loss": -0.0895, "step": 1280 }, { "epoch": 0.7508731082654249, "grad_norm": 1.3669791221618652, "learning_rate": 4.296666666666666e-05, "loss": -0.0917, "step": 1290 }, { "epoch": 0.7566938300349243, "grad_norm": 2.1733837127685547, "learning_rate": 4.33e-05, "loss": -0.0885, "step": 1300 }, { "epoch": 0.7625145518044237, "grad_norm": 1.3064656257629395, "learning_rate": 4.3633333333333335e-05, "loss": -0.0861, "step": 1310 }, { "epoch": 0.7683352735739232, "grad_norm": 1.4846017360687256, "learning_rate": 4.396666666666667e-05, "loss": -0.091, "step": 1320 }, { "epoch": 0.7741559953434226, "grad_norm": 1.407639503479004, "learning_rate": 4.43e-05, "loss": -0.0818, "step": 1330 }, { "epoch": 0.779976717112922, "grad_norm": 1.4194896221160889, "learning_rate": 4.463333333333334e-05, "loss": -0.0622, "step": 1340 }, { "epoch": 0.7857974388824214, "grad_norm": 0.9737577438354492, "learning_rate": 4.496666666666667e-05, "loss": -0.1029, "step": 1350 }, { "epoch": 0.7916181606519208, "grad_norm": 0.9710955619812012, "learning_rate": 4.53e-05, "loss": -0.0889, "step": 1360 }, { "epoch": 0.7974388824214202, "grad_norm": 1.308302879333496, "learning_rate": 4.5633333333333336e-05, "loss": -0.0989, "step": 1370 }, { "epoch": 0.8032596041909197, "grad_norm": 0.8904650807380676, "learning_rate": 4.596666666666667e-05, "loss": -0.0975, "step": 1380 }, { "epoch": 0.8090803259604191, "grad_norm": 1.2979768514633179, "learning_rate": 4.630000000000001e-05, "loss": -0.0894, "step": 1390 }, { "epoch": 0.8149010477299186, "grad_norm": 0.9050745368003845, "learning_rate": 4.663333333333333e-05, "loss": -0.0975, "step": 1400 }, { "epoch": 0.8207217694994179, "grad_norm": 0.9628918170928955, "learning_rate": 4.696666666666667e-05, "loss": -0.0871, "step": 1410 }, { "epoch": 0.8265424912689173, "grad_norm": 1.2653017044067383, "learning_rate": 4.73e-05, "loss": -0.0998, "step": 1420 }, { "epoch": 0.8323632130384168, "grad_norm": 1.3411428928375244, "learning_rate": 4.763333333333334e-05, "loss": -0.0969, "step": 1430 }, { "epoch": 0.8381839348079162, "grad_norm": 0.8523398041725159, "learning_rate": 4.796666666666667e-05, "loss": -0.1149, "step": 1440 }, { "epoch": 0.8440046565774156, "grad_norm": 0.8751052618026733, "learning_rate": 4.83e-05, "loss": -0.0925, "step": 1450 }, { "epoch": 0.8498253783469151, "grad_norm": 1.2758946418762207, "learning_rate": 4.8633333333333334e-05, "loss": -0.1045, "step": 1460 }, { "epoch": 0.8556461001164144, "grad_norm": 1.2131037712097168, "learning_rate": 4.8966666666666667e-05, "loss": -0.1192, "step": 1470 }, { "epoch": 0.8614668218859138, "grad_norm": 0.8067881464958191, "learning_rate": 4.93e-05, "loss": -0.108, "step": 1480 }, { "epoch": 0.8672875436554133, "grad_norm": 0.9868309497833252, "learning_rate": 4.963333333333334e-05, "loss": -0.103, "step": 1490 }, { "epoch": 0.8731082654249127, "grad_norm": 1.031684398651123, "learning_rate": 4.996666666666667e-05, "loss": -0.0935, "step": 1500 }, { "epoch": 0.8789289871944121, "grad_norm": 0.882844865322113, "learning_rate": 5.03e-05, "loss": -0.1124, "step": 1510 }, { "epoch": 0.8847497089639115, "grad_norm": 0.7262253165245056, "learning_rate": 5.0633333333333335e-05, "loss": -0.1026, "step": 1520 }, { "epoch": 0.8905704307334109, "grad_norm": 1.0288339853286743, "learning_rate": 5.0966666666666674e-05, "loss": -0.1031, "step": 1530 }, { "epoch": 0.8963911525029103, "grad_norm": 1.4736915826797485, "learning_rate": 5.130000000000001e-05, "loss": -0.0995, "step": 1540 }, { "epoch": 0.9022118742724098, "grad_norm": 1.0350465774536133, "learning_rate": 5.163333333333333e-05, "loss": -0.09, "step": 1550 }, { "epoch": 0.9080325960419092, "grad_norm": 1.214544653892517, "learning_rate": 5.196666666666667e-05, "loss": -0.1129, "step": 1560 }, { "epoch": 0.9138533178114087, "grad_norm": 0.9077620506286621, "learning_rate": 5.2300000000000004e-05, "loss": -0.1085, "step": 1570 }, { "epoch": 0.919674039580908, "grad_norm": 0.5756353735923767, "learning_rate": 5.2633333333333336e-05, "loss": -0.1055, "step": 1580 }, { "epoch": 0.9254947613504074, "grad_norm": 0.9810119867324829, "learning_rate": 5.296666666666666e-05, "loss": -0.1083, "step": 1590 }, { "epoch": 0.9313154831199069, "grad_norm": 1.2846250534057617, "learning_rate": 5.330000000000001e-05, "loss": -0.1007, "step": 1600 }, { "epoch": 0.9371362048894063, "grad_norm": 1.2574199438095093, "learning_rate": 5.3633333333333334e-05, "loss": -0.0929, "step": 1610 }, { "epoch": 0.9429569266589057, "grad_norm": 1.0201530456542969, "learning_rate": 5.3966666666666666e-05, "loss": -0.0991, "step": 1620 }, { "epoch": 0.9487776484284052, "grad_norm": 1.3054341077804565, "learning_rate": 5.4300000000000005e-05, "loss": -0.0952, "step": 1630 }, { "epoch": 0.9545983701979045, "grad_norm": 0.7503854632377625, "learning_rate": 5.463333333333334e-05, "loss": -0.1051, "step": 1640 }, { "epoch": 0.9604190919674039, "grad_norm": 0.9281660914421082, "learning_rate": 5.496666666666666e-05, "loss": -0.1129, "step": 1650 }, { "epoch": 0.9662398137369034, "grad_norm": 0.9385557174682617, "learning_rate": 5.530000000000001e-05, "loss": -0.0987, "step": 1660 }, { "epoch": 0.9720605355064028, "grad_norm": 1.0602010488510132, "learning_rate": 5.5633333333333335e-05, "loss": -0.107, "step": 1670 }, { "epoch": 0.9778812572759022, "grad_norm": 0.7111068964004517, "learning_rate": 5.596666666666667e-05, "loss": -0.1111, "step": 1680 }, { "epoch": 0.9837019790454016, "grad_norm": 0.8461886048316956, "learning_rate": 5.63e-05, "loss": -0.0895, "step": 1690 }, { "epoch": 0.989522700814901, "grad_norm": 0.6733174920082092, "learning_rate": 5.663333333333334e-05, "loss": -0.1121, "step": 1700 }, { "epoch": 0.9953434225844005, "grad_norm": 0.914417564868927, "learning_rate": 5.696666666666667e-05, "loss": -0.1149, "step": 1710 }, { "epoch": 1.0011641443538999, "grad_norm": 1.2505245208740234, "learning_rate": 5.73e-05, "loss": -0.0966, "step": 1720 }, { "epoch": 1.0069848661233993, "grad_norm": 0.7088961005210876, "learning_rate": 5.7633333333333336e-05, "loss": -0.1133, "step": 1730 }, { "epoch": 1.0128055878928988, "grad_norm": 0.7813042998313904, "learning_rate": 5.796666666666667e-05, "loss": -0.1027, "step": 1740 }, { "epoch": 1.0186263096623982, "grad_norm": 0.7412230968475342, "learning_rate": 5.83e-05, "loss": -0.1169, "step": 1750 }, { "epoch": 1.0244470314318976, "grad_norm": 0.820612370967865, "learning_rate": 5.863333333333334e-05, "loss": -0.1163, "step": 1760 }, { "epoch": 1.030267753201397, "grad_norm": 0.8233653903007507, "learning_rate": 5.896666666666667e-05, "loss": -0.1064, "step": 1770 }, { "epoch": 1.0360884749708963, "grad_norm": 0.7305741906166077, "learning_rate": 5.93e-05, "loss": -0.1177, "step": 1780 }, { "epoch": 1.0419091967403957, "grad_norm": 0.9197826385498047, "learning_rate": 5.9633333333333344e-05, "loss": -0.1044, "step": 1790 }, { "epoch": 1.0477299185098952, "grad_norm": 1.0678788423538208, "learning_rate": 5.996666666666667e-05, "loss": -0.1098, "step": 1800 }, { "epoch": 1.0535506402793946, "grad_norm": 1.0419394969940186, "learning_rate": 6.03e-05, "loss": -0.1123, "step": 1810 }, { "epoch": 1.059371362048894, "grad_norm": 1.0312749147415161, "learning_rate": 6.063333333333333e-05, "loss": -0.1194, "step": 1820 }, { "epoch": 1.0651920838183935, "grad_norm": 0.9155910015106201, "learning_rate": 6.0966666666666674e-05, "loss": -0.1116, "step": 1830 }, { "epoch": 1.071012805587893, "grad_norm": 0.7358734011650085, "learning_rate": 6.13e-05, "loss": -0.1073, "step": 1840 }, { "epoch": 1.0768335273573924, "grad_norm": 0.8051567077636719, "learning_rate": 6.163333333333333e-05, "loss": -0.1134, "step": 1850 }, { "epoch": 1.0826542491268918, "grad_norm": 0.7578628659248352, "learning_rate": 6.196666666666668e-05, "loss": -0.109, "step": 1860 }, { "epoch": 1.0884749708963912, "grad_norm": 0.544916033744812, "learning_rate": 6.23e-05, "loss": -0.1204, "step": 1870 }, { "epoch": 1.0942956926658907, "grad_norm": 0.7696540951728821, "learning_rate": 6.263333333333333e-05, "loss": -0.1141, "step": 1880 }, { "epoch": 1.1001164144353899, "grad_norm": 0.6167125105857849, "learning_rate": 6.296666666666667e-05, "loss": -0.1084, "step": 1890 }, { "epoch": 1.1059371362048893, "grad_norm": 0.8198492527008057, "learning_rate": 6.330000000000001e-05, "loss": -0.109, "step": 1900 }, { "epoch": 1.1117578579743888, "grad_norm": 0.9094312191009521, "learning_rate": 6.363333333333334e-05, "loss": -0.1199, "step": 1910 }, { "epoch": 1.1175785797438882, "grad_norm": 0.8130491971969604, "learning_rate": 6.396666666666667e-05, "loss": -0.1272, "step": 1920 }, { "epoch": 1.1233993015133876, "grad_norm": 0.6561400294303894, "learning_rate": 6.43e-05, "loss": -0.1154, "step": 1930 }, { "epoch": 1.129220023282887, "grad_norm": 0.4082295894622803, "learning_rate": 6.463333333333334e-05, "loss": -0.1266, "step": 1940 }, { "epoch": 1.1350407450523865, "grad_norm": 0.6759334206581116, "learning_rate": 6.496666666666667e-05, "loss": -0.1162, "step": 1950 }, { "epoch": 1.140861466821886, "grad_norm": 0.8319889307022095, "learning_rate": 6.53e-05, "loss": -0.1177, "step": 1960 }, { "epoch": 1.1466821885913854, "grad_norm": 0.6100814342498779, "learning_rate": 6.563333333333333e-05, "loss": -0.1139, "step": 1970 }, { "epoch": 1.1525029103608848, "grad_norm": 0.6076117753982544, "learning_rate": 6.596666666666667e-05, "loss": -0.1083, "step": 1980 }, { "epoch": 1.1583236321303843, "grad_norm": 0.7158991694450378, "learning_rate": 6.630000000000001e-05, "loss": -0.123, "step": 1990 }, { "epoch": 1.1641443538998835, "grad_norm": 0.8127427697181702, "learning_rate": 6.663333333333333e-05, "loss": -0.1155, "step": 2000 }, { "epoch": 1.1699650756693831, "grad_norm": 0.7646147608757019, "learning_rate": 6.696666666666666e-05, "loss": -0.1213, "step": 2010 }, { "epoch": 1.1757857974388823, "grad_norm": 0.8440269827842712, "learning_rate": 6.730000000000001e-05, "loss": -0.1242, "step": 2020 }, { "epoch": 1.1816065192083818, "grad_norm": 0.7618028521537781, "learning_rate": 6.763333333333334e-05, "loss": -0.1202, "step": 2030 }, { "epoch": 1.1874272409778812, "grad_norm": 0.7063201069831848, "learning_rate": 6.796666666666666e-05, "loss": -0.1199, "step": 2040 }, { "epoch": 1.1932479627473807, "grad_norm": 0.6011345386505127, "learning_rate": 6.83e-05, "loss": -0.1228, "step": 2050 }, { "epoch": 1.19906868451688, "grad_norm": 0.6721917390823364, "learning_rate": 6.863333333333334e-05, "loss": -0.1122, "step": 2060 }, { "epoch": 1.2048894062863795, "grad_norm": 0.7627427577972412, "learning_rate": 6.896666666666667e-05, "loss": -0.1141, "step": 2070 }, { "epoch": 1.210710128055879, "grad_norm": 0.6914652585983276, "learning_rate": 6.93e-05, "loss": -0.1163, "step": 2080 }, { "epoch": 1.2165308498253784, "grad_norm": 0.7171711921691895, "learning_rate": 6.963333333333334e-05, "loss": -0.1257, "step": 2090 }, { "epoch": 1.2223515715948778, "grad_norm": 0.7978622913360596, "learning_rate": 6.996666666666667e-05, "loss": -0.1264, "step": 2100 }, { "epoch": 1.2281722933643773, "grad_norm": 0.6697583198547363, "learning_rate": 7.03e-05, "loss": -0.118, "step": 2110 }, { "epoch": 1.2339930151338767, "grad_norm": 0.9119671583175659, "learning_rate": 7.063333333333333e-05, "loss": -0.1032, "step": 2120 }, { "epoch": 1.239813736903376, "grad_norm": 0.6511488556861877, "learning_rate": 7.096666666666667e-05, "loss": -0.1111, "step": 2130 }, { "epoch": 1.2456344586728754, "grad_norm": 0.37708503007888794, "learning_rate": 7.13e-05, "loss": -0.1253, "step": 2140 }, { "epoch": 1.2514551804423748, "grad_norm": 0.5550225973129272, "learning_rate": 7.163333333333334e-05, "loss": -0.1287, "step": 2150 }, { "epoch": 1.2572759022118742, "grad_norm": 0.48578813672065735, "learning_rate": 7.196666666666668e-05, "loss": -0.1158, "step": 2160 }, { "epoch": 1.2630966239813737, "grad_norm": 0.6859151721000671, "learning_rate": 7.23e-05, "loss": -0.1301, "step": 2170 }, { "epoch": 1.2689173457508731, "grad_norm": 0.39375028014183044, "learning_rate": 7.263333333333334e-05, "loss": -0.1307, "step": 2180 }, { "epoch": 1.2747380675203726, "grad_norm": 0.657090425491333, "learning_rate": 7.296666666666667e-05, "loss": -0.1227, "step": 2190 }, { "epoch": 1.280558789289872, "grad_norm": 0.4376808702945709, "learning_rate": 7.33e-05, "loss": -0.124, "step": 2200 }, { "epoch": 1.2863795110593714, "grad_norm": 0.9547446966171265, "learning_rate": 7.363333333333334e-05, "loss": -0.1151, "step": 2210 }, { "epoch": 1.2922002328288706, "grad_norm": 0.9366371035575867, "learning_rate": 7.396666666666667e-05, "loss": -0.1193, "step": 2220 }, { "epoch": 1.2980209545983703, "grad_norm": 0.714223325252533, "learning_rate": 7.43e-05, "loss": -0.1148, "step": 2230 }, { "epoch": 1.3038416763678695, "grad_norm": 0.8067750334739685, "learning_rate": 7.463333333333334e-05, "loss": -0.1204, "step": 2240 }, { "epoch": 1.309662398137369, "grad_norm": 0.6344537138938904, "learning_rate": 7.496666666666667e-05, "loss": -0.1132, "step": 2250 }, { "epoch": 1.3154831199068684, "grad_norm": 0.5406343936920166, "learning_rate": 7.53e-05, "loss": -0.104, "step": 2260 }, { "epoch": 1.3213038416763678, "grad_norm": 0.8839937448501587, "learning_rate": 7.563333333333333e-05, "loss": -0.1132, "step": 2270 }, { "epoch": 1.3271245634458673, "grad_norm": 0.7834556102752686, "learning_rate": 7.596666666666668e-05, "loss": -0.1128, "step": 2280 }, { "epoch": 1.3329452852153667, "grad_norm": 0.5506494641304016, "learning_rate": 7.630000000000001e-05, "loss": -0.1153, "step": 2290 }, { "epoch": 1.3387660069848661, "grad_norm": 0.4691511392593384, "learning_rate": 7.663333333333333e-05, "loss": -0.1225, "step": 2300 }, { "epoch": 1.3445867287543656, "grad_norm": 0.7313283681869507, "learning_rate": 7.696666666666668e-05, "loss": -0.1265, "step": 2310 }, { "epoch": 1.350407450523865, "grad_norm": 0.7039185762405396, "learning_rate": 7.730000000000001e-05, "loss": -0.117, "step": 2320 }, { "epoch": 1.3562281722933645, "grad_norm": 0.8486613035202026, "learning_rate": 7.763333333333334e-05, "loss": -0.1229, "step": 2330 }, { "epoch": 1.362048894062864, "grad_norm": 0.48512712121009827, "learning_rate": 7.796666666666666e-05, "loss": -0.1238, "step": 2340 }, { "epoch": 1.367869615832363, "grad_norm": 0.6362194418907166, "learning_rate": 7.83e-05, "loss": -0.1251, "step": 2350 }, { "epoch": 1.3736903376018628, "grad_norm": 0.5361714959144592, "learning_rate": 7.863333333333334e-05, "loss": -0.1284, "step": 2360 }, { "epoch": 1.379511059371362, "grad_norm": 0.5866290926933289, "learning_rate": 7.896666666666667e-05, "loss": -0.1199, "step": 2370 }, { "epoch": 1.3853317811408614, "grad_norm": 0.7284654974937439, "learning_rate": 7.93e-05, "loss": -0.1242, "step": 2380 }, { "epoch": 1.3911525029103609, "grad_norm": 0.5243335962295532, "learning_rate": 7.963333333333334e-05, "loss": -0.1118, "step": 2390 }, { "epoch": 1.3969732246798603, "grad_norm": 0.4365101456642151, "learning_rate": 7.996666666666667e-05, "loss": -0.1226, "step": 2400 }, { "epoch": 1.4027939464493597, "grad_norm": 0.5338398814201355, "learning_rate": 8.030000000000001e-05, "loss": -0.1206, "step": 2410 }, { "epoch": 1.4086146682188592, "grad_norm": 0.8281461596488953, "learning_rate": 8.063333333333333e-05, "loss": -0.121, "step": 2420 }, { "epoch": 1.4144353899883586, "grad_norm": 0.6032852530479431, "learning_rate": 8.096666666666667e-05, "loss": -0.1277, "step": 2430 }, { "epoch": 1.420256111757858, "grad_norm": 0.6263359785079956, "learning_rate": 8.13e-05, "loss": -0.1223, "step": 2440 }, { "epoch": 1.4260768335273575, "grad_norm": 0.6917026042938232, "learning_rate": 8.163333333333334e-05, "loss": -0.1198, "step": 2450 }, { "epoch": 1.4318975552968567, "grad_norm": 0.7114126086235046, "learning_rate": 8.196666666666668e-05, "loss": -0.117, "step": 2460 }, { "epoch": 1.4377182770663564, "grad_norm": 0.7845610976219177, "learning_rate": 8.23e-05, "loss": -0.1288, "step": 2470 }, { "epoch": 1.4435389988358556, "grad_norm": 0.6777554750442505, "learning_rate": 8.263333333333334e-05, "loss": -0.1223, "step": 2480 }, { "epoch": 1.449359720605355, "grad_norm": 0.938005268573761, "learning_rate": 8.296666666666667e-05, "loss": -0.1143, "step": 2490 }, { "epoch": 1.4551804423748544, "grad_norm": 0.4584839940071106, "learning_rate": 8.33e-05, "loss": -0.1207, "step": 2500 }, { "epoch": 1.4610011641443539, "grad_norm": 0.6665570735931396, "learning_rate": 8.363333333333334e-05, "loss": -0.1218, "step": 2510 }, { "epoch": 1.4668218859138533, "grad_norm": 0.5791141986846924, "learning_rate": 8.396666666666667e-05, "loss": -0.1325, "step": 2520 }, { "epoch": 1.4726426076833528, "grad_norm": 0.617344081401825, "learning_rate": 8.43e-05, "loss": -0.1308, "step": 2530 }, { "epoch": 1.4784633294528522, "grad_norm": 0.5027318000793457, "learning_rate": 8.463333333333335e-05, "loss": -0.1076, "step": 2540 }, { "epoch": 1.4842840512223516, "grad_norm": 0.5785815119743347, "learning_rate": 8.496666666666667e-05, "loss": -0.1286, "step": 2550 }, { "epoch": 1.490104772991851, "grad_norm": 0.4969930648803711, "learning_rate": 8.53e-05, "loss": -0.118, "step": 2560 }, { "epoch": 1.4959254947613503, "grad_norm": 0.617104172706604, "learning_rate": 8.563333333333333e-05, "loss": -0.1181, "step": 2570 }, { "epoch": 1.50174621653085, "grad_norm": 0.5942543745040894, "learning_rate": 8.596666666666668e-05, "loss": -0.1218, "step": 2580 }, { "epoch": 1.5075669383003492, "grad_norm": 0.5043347477912903, "learning_rate": 8.63e-05, "loss": -0.1255, "step": 2590 }, { "epoch": 1.5133876600698488, "grad_norm": 0.35851532220840454, "learning_rate": 8.663333333333333e-05, "loss": -0.1136, "step": 2600 }, { "epoch": 1.519208381839348, "grad_norm": 0.5805014967918396, "learning_rate": 8.696666666666668e-05, "loss": -0.1189, "step": 2610 }, { "epoch": 1.5250291036088475, "grad_norm": 0.5937091112136841, "learning_rate": 8.730000000000001e-05, "loss": -0.1301, "step": 2620 }, { "epoch": 1.530849825378347, "grad_norm": 0.5718668103218079, "learning_rate": 8.763333333333334e-05, "loss": -0.1239, "step": 2630 }, { "epoch": 1.5366705471478463, "grad_norm": 0.711712658405304, "learning_rate": 8.796666666666667e-05, "loss": -0.1259, "step": 2640 }, { "epoch": 1.5424912689173458, "grad_norm": 0.4097176492214203, "learning_rate": 8.83e-05, "loss": -0.1316, "step": 2650 }, { "epoch": 1.5483119906868452, "grad_norm": 0.45890969038009644, "learning_rate": 8.863333333333334e-05, "loss": -0.1317, "step": 2660 }, { "epoch": 1.5541327124563447, "grad_norm": 0.5507198572158813, "learning_rate": 8.896666666666667e-05, "loss": -0.1207, "step": 2670 }, { "epoch": 1.5599534342258439, "grad_norm": 0.6732059121131897, "learning_rate": 8.93e-05, "loss": -0.1343, "step": 2680 }, { "epoch": 1.5657741559953435, "grad_norm": 0.5754176378250122, "learning_rate": 8.963333333333333e-05, "loss": -0.1339, "step": 2690 }, { "epoch": 1.5715948777648427, "grad_norm": 0.4578869938850403, "learning_rate": 8.996666666666667e-05, "loss": -0.1281, "step": 2700 }, { "epoch": 1.5774155995343424, "grad_norm": 0.5842833518981934, "learning_rate": 9.030000000000001e-05, "loss": -0.1356, "step": 2710 }, { "epoch": 1.5832363213038416, "grad_norm": 0.7267454862594604, "learning_rate": 9.063333333333333e-05, "loss": -0.1289, "step": 2720 }, { "epoch": 1.589057043073341, "grad_norm": 0.6768018007278442, "learning_rate": 9.096666666666666e-05, "loss": -0.1413, "step": 2730 }, { "epoch": 1.5948777648428405, "grad_norm": 0.6187252402305603, "learning_rate": 9.130000000000001e-05, "loss": -0.1334, "step": 2740 }, { "epoch": 1.60069848661234, "grad_norm": 0.8018686771392822, "learning_rate": 9.163333333333334e-05, "loss": -0.1288, "step": 2750 }, { "epoch": 1.6065192083818394, "grad_norm": 0.6683033108711243, "learning_rate": 9.196666666666666e-05, "loss": -0.1316, "step": 2760 }, { "epoch": 1.6123399301513388, "grad_norm": 0.5799955129623413, "learning_rate": 9.230000000000001e-05, "loss": -0.134, "step": 2770 }, { "epoch": 1.6181606519208382, "grad_norm": 0.7250440716743469, "learning_rate": 9.263333333333334e-05, "loss": -0.1338, "step": 2780 }, { "epoch": 1.6239813736903375, "grad_norm": 0.6824718713760376, "learning_rate": 9.296666666666667e-05, "loss": -0.1209, "step": 2790 }, { "epoch": 1.6298020954598371, "grad_norm": 0.36886951327323914, "learning_rate": 9.33e-05, "loss": -0.1263, "step": 2800 }, { "epoch": 1.6356228172293363, "grad_norm": 0.3608442544937134, "learning_rate": 9.363333333333334e-05, "loss": -0.1315, "step": 2810 }, { "epoch": 1.641443538998836, "grad_norm": 0.7143772840499878, "learning_rate": 9.396666666666667e-05, "loss": -0.1362, "step": 2820 }, { "epoch": 1.6472642607683352, "grad_norm": 0.5199475288391113, "learning_rate": 9.43e-05, "loss": -0.1219, "step": 2830 }, { "epoch": 1.6530849825378346, "grad_norm": 0.5594648718833923, "learning_rate": 9.463333333333333e-05, "loss": -0.1251, "step": 2840 }, { "epoch": 1.658905704307334, "grad_norm": 0.35659483075141907, "learning_rate": 9.496666666666667e-05, "loss": -0.133, "step": 2850 }, { "epoch": 1.6647264260768335, "grad_norm": 0.5833654403686523, "learning_rate": 9.53e-05, "loss": -0.1326, "step": 2860 }, { "epoch": 1.670547147846333, "grad_norm": 0.6678730249404907, "learning_rate": 9.563333333333334e-05, "loss": -0.1278, "step": 2870 }, { "epoch": 1.6763678696158324, "grad_norm": 0.6178660988807678, "learning_rate": 9.596666666666668e-05, "loss": -0.1292, "step": 2880 }, { "epoch": 1.6821885913853318, "grad_norm": 0.5997801423072815, "learning_rate": 9.63e-05, "loss": -0.13, "step": 2890 }, { "epoch": 1.688009313154831, "grad_norm": 0.4414120614528656, "learning_rate": 9.663333333333334e-05, "loss": -0.1265, "step": 2900 }, { "epoch": 1.6938300349243307, "grad_norm": 0.5850778222084045, "learning_rate": 9.696666666666667e-05, "loss": -0.1327, "step": 2910 }, { "epoch": 1.69965075669383, "grad_norm": 0.6031677722930908, "learning_rate": 9.730000000000001e-05, "loss": -0.1325, "step": 2920 }, { "epoch": 1.7054714784633296, "grad_norm": 0.3583162724971771, "learning_rate": 9.763333333333334e-05, "loss": -0.133, "step": 2930 }, { "epoch": 1.7112922002328288, "grad_norm": 0.5643025636672974, "learning_rate": 9.796666666666667e-05, "loss": -0.1245, "step": 2940 }, { "epoch": 1.7171129220023282, "grad_norm": 0.7883546948432922, "learning_rate": 9.83e-05, "loss": -0.1174, "step": 2950 }, { "epoch": 1.7229336437718277, "grad_norm": 0.7326152920722961, "learning_rate": 9.863333333333334e-05, "loss": -0.1248, "step": 2960 }, { "epoch": 1.728754365541327, "grad_norm": 0.6156047582626343, "learning_rate": 9.896666666666667e-05, "loss": -0.1283, "step": 2970 }, { "epoch": 1.7345750873108265, "grad_norm": 0.5066848993301392, "learning_rate": 9.93e-05, "loss": -0.1354, "step": 2980 }, { "epoch": 1.740395809080326, "grad_norm": 0.6813083291053772, "learning_rate": 9.963333333333333e-05, "loss": -0.1279, "step": 2990 }, { "epoch": 1.7462165308498254, "grad_norm": 0.5399401187896729, "learning_rate": 9.996666666666668e-05, "loss": -0.1134, "step": 3000 }, { "epoch": 1.7520372526193246, "grad_norm": 0.5684092044830322, "learning_rate": 9.999999384858465e-05, "loss": -0.1301, "step": 3010 }, { "epoch": 1.7578579743888243, "grad_norm": 0.6319019198417664, "learning_rate": 9.999997258443473e-05, "loss": -0.1309, "step": 3020 }, { "epoch": 1.7636786961583235, "grad_norm": 0.4455578923225403, "learning_rate": 9.999993613161331e-05, "loss": -0.1305, "step": 3030 }, { "epoch": 1.7694994179278232, "grad_norm": 0.6327020525932312, "learning_rate": 9.999988449013146e-05, "loss": -0.1264, "step": 3040 }, { "epoch": 1.7753201396973224, "grad_norm": 0.46728116273880005, "learning_rate": 9.99998176600049e-05, "loss": -0.1231, "step": 3050 }, { "epoch": 1.781140861466822, "grad_norm": 0.4905753433704376, "learning_rate": 9.999973564125389e-05, "loss": -0.1364, "step": 3060 }, { "epoch": 1.7869615832363213, "grad_norm": 0.6316990256309509, "learning_rate": 9.999963843390335e-05, "loss": -0.1321, "step": 3070 }, { "epoch": 1.7927823050058207, "grad_norm": 0.7229564189910889, "learning_rate": 9.999952603798282e-05, "loss": -0.109, "step": 3080 }, { "epoch": 1.7986030267753201, "grad_norm": 0.6285746693611145, "learning_rate": 9.999939845352646e-05, "loss": -0.1277, "step": 3090 }, { "epoch": 1.8044237485448196, "grad_norm": 0.5426042079925537, "learning_rate": 9.999925568057298e-05, "loss": -0.1248, "step": 3100 }, { "epoch": 1.810244470314319, "grad_norm": 0.5440440773963928, "learning_rate": 9.999909771916578e-05, "loss": -0.1244, "step": 3110 }, { "epoch": 1.8160651920838184, "grad_norm": 0.5064862966537476, "learning_rate": 9.999892456935285e-05, "loss": -0.1287, "step": 3120 }, { "epoch": 1.8218859138533179, "grad_norm": 0.5335593819618225, "learning_rate": 9.999873623118679e-05, "loss": -0.1312, "step": 3130 }, { "epoch": 1.827706635622817, "grad_norm": 0.5987112522125244, "learning_rate": 9.999853270472479e-05, "loss": -0.1311, "step": 3140 }, { "epoch": 1.8335273573923168, "grad_norm": 0.5431009531021118, "learning_rate": 9.999831399002871e-05, "loss": -0.1315, "step": 3150 }, { "epoch": 1.839348079161816, "grad_norm": 0.5471633672714233, "learning_rate": 9.999808008716494e-05, "loss": -0.1331, "step": 3160 }, { "epoch": 1.8451688009313156, "grad_norm": 0.5450029373168945, "learning_rate": 9.999783099620459e-05, "loss": -0.1225, "step": 3170 }, { "epoch": 1.8509895227008148, "grad_norm": 0.5824825167655945, "learning_rate": 9.999756671722328e-05, "loss": -0.1232, "step": 3180 }, { "epoch": 1.8568102444703143, "grad_norm": 0.7028770446777344, "learning_rate": 9.99972872503013e-05, "loss": -0.1309, "step": 3190 }, { "epoch": 1.8626309662398137, "grad_norm": 0.5865168571472168, "learning_rate": 9.999699259552359e-05, "loss": -0.1169, "step": 3200 }, { "epoch": 1.8684516880093132, "grad_norm": 0.7004950046539307, "learning_rate": 9.99966827529796e-05, "loss": -0.1284, "step": 3210 }, { "epoch": 1.8742724097788126, "grad_norm": 0.523371696472168, "learning_rate": 9.999635772276348e-05, "loss": -0.1249, "step": 3220 }, { "epoch": 1.880093131548312, "grad_norm": 0.4007487893104553, "learning_rate": 9.999601750497396e-05, "loss": -0.1299, "step": 3230 }, { "epoch": 1.8859138533178115, "grad_norm": 0.4969002604484558, "learning_rate": 9.99956620997144e-05, "loss": -0.1342, "step": 3240 }, { "epoch": 1.8917345750873107, "grad_norm": 0.48268404603004456, "learning_rate": 9.999529150709275e-05, "loss": -0.1297, "step": 3250 }, { "epoch": 1.8975552968568103, "grad_norm": 0.5674788951873779, "learning_rate": 9.999490572722158e-05, "loss": -0.1297, "step": 3260 }, { "epoch": 1.9033760186263096, "grad_norm": 0.5590144395828247, "learning_rate": 9.99945047602181e-05, "loss": -0.1247, "step": 3270 }, { "epoch": 1.9091967403958092, "grad_norm": 0.598161518573761, "learning_rate": 9.99940886062041e-05, "loss": -0.1283, "step": 3280 }, { "epoch": 1.9150174621653084, "grad_norm": 0.44949647784233093, "learning_rate": 9.999365726530599e-05, "loss": -0.1276, "step": 3290 }, { "epoch": 1.9208381839348079, "grad_norm": 0.5884619951248169, "learning_rate": 9.999321073765481e-05, "loss": -0.1199, "step": 3300 }, { "epoch": 1.9266589057043073, "grad_norm": 0.4510299563407898, "learning_rate": 9.99927490233862e-05, "loss": -0.141, "step": 3310 }, { "epoch": 1.9324796274738067, "grad_norm": 0.4353652596473694, "learning_rate": 9.999227212264043e-05, "loss": -0.1383, "step": 3320 }, { "epoch": 1.9383003492433062, "grad_norm": 0.4976765811443329, "learning_rate": 9.999178003556236e-05, "loss": -0.1285, "step": 3330 }, { "epoch": 1.9441210710128056, "grad_norm": 0.5082997679710388, "learning_rate": 9.999127276230146e-05, "loss": -0.1383, "step": 3340 }, { "epoch": 1.949941792782305, "grad_norm": 0.5784724950790405, "learning_rate": 9.999075030301184e-05, "loss": -0.1319, "step": 3350 }, { "epoch": 1.9557625145518043, "grad_norm": 0.5174658894538879, "learning_rate": 9.999021265785221e-05, "loss": -0.1262, "step": 3360 }, { "epoch": 1.961583236321304, "grad_norm": 0.4805922508239746, "learning_rate": 9.998965982698589e-05, "loss": -0.119, "step": 3370 }, { "epoch": 1.9674039580908032, "grad_norm": 0.4483237862586975, "learning_rate": 9.998909181058082e-05, "loss": -0.1388, "step": 3380 }, { "epoch": 1.9732246798603028, "grad_norm": 0.5114749073982239, "learning_rate": 9.998850860880953e-05, "loss": -0.1347, "step": 3390 }, { "epoch": 1.979045401629802, "grad_norm": 0.6115111112594604, "learning_rate": 9.998791022184922e-05, "loss": -0.1356, "step": 3400 }, { "epoch": 1.9848661233993015, "grad_norm": 0.5335850119590759, "learning_rate": 9.99872966498816e-05, "loss": -0.1353, "step": 3410 }, { "epoch": 1.990686845168801, "grad_norm": 0.5419212579727173, "learning_rate": 9.998666789309313e-05, "loss": -0.1307, "step": 3420 }, { "epoch": 1.9965075669383003, "grad_norm": 0.6311287879943848, "learning_rate": 9.998602395167475e-05, "loss": -0.1377, "step": 3430 }, { "epoch": 2.0023282887077998, "grad_norm": 0.3418199419975281, "learning_rate": 9.998536482582213e-05, "loss": -0.126, "step": 3440 }, { "epoch": 2.008149010477299, "grad_norm": 0.38706034421920776, "learning_rate": 9.998469051573544e-05, "loss": -0.1315, "step": 3450 }, { "epoch": 2.0139697322467986, "grad_norm": 0.27746546268463135, "learning_rate": 9.998400102161954e-05, "loss": -0.1369, "step": 3460 }, { "epoch": 2.019790454016298, "grad_norm": 0.3603499233722687, "learning_rate": 9.998329634368388e-05, "loss": -0.1418, "step": 3470 }, { "epoch": 2.0256111757857975, "grad_norm": 0.3626859486103058, "learning_rate": 9.998257648214253e-05, "loss": -0.1401, "step": 3480 }, { "epoch": 2.0314318975552967, "grad_norm": 0.5037516355514526, "learning_rate": 9.998184143721417e-05, "loss": -0.1286, "step": 3490 }, { "epoch": 2.0372526193247964, "grad_norm": 0.5708421468734741, "learning_rate": 9.998109120912206e-05, "loss": -0.1356, "step": 3500 }, { "epoch": 2.0430733410942956, "grad_norm": 0.4267216920852661, "learning_rate": 9.998032579809411e-05, "loss": -0.1324, "step": 3510 }, { "epoch": 2.0488940628637953, "grad_norm": 0.30465295910835266, "learning_rate": 9.997954520436286e-05, "loss": -0.1264, "step": 3520 }, { "epoch": 2.0547147846332945, "grad_norm": 0.47006407380104065, "learning_rate": 9.997874942816538e-05, "loss": -0.1391, "step": 3530 }, { "epoch": 2.060535506402794, "grad_norm": 0.45464256405830383, "learning_rate": 9.997793846974345e-05, "loss": -0.126, "step": 3540 }, { "epoch": 2.0663562281722934, "grad_norm": 0.5423076748847961, "learning_rate": 9.997711232934341e-05, "loss": -0.1343, "step": 3550 }, { "epoch": 2.0721769499417926, "grad_norm": 0.44433194398880005, "learning_rate": 9.99762710072162e-05, "loss": -0.1343, "step": 3560 }, { "epoch": 2.0779976717112922, "grad_norm": 0.4306599795818329, "learning_rate": 9.997541450361743e-05, "loss": -0.126, "step": 3570 }, { "epoch": 2.0838183934807915, "grad_norm": 0.46699175238609314, "learning_rate": 9.997454281880723e-05, "loss": -0.1257, "step": 3580 }, { "epoch": 2.089639115250291, "grad_norm": 0.5522453784942627, "learning_rate": 9.997365595305044e-05, "loss": -0.1303, "step": 3590 }, { "epoch": 2.0954598370197903, "grad_norm": 0.5077689290046692, "learning_rate": 9.997275390661644e-05, "loss": -0.13, "step": 3600 }, { "epoch": 2.10128055878929, "grad_norm": 0.3686368465423584, "learning_rate": 9.997183667977926e-05, "loss": -0.1328, "step": 3610 }, { "epoch": 2.107101280558789, "grad_norm": 0.5359586477279663, "learning_rate": 9.997090427281752e-05, "loss": -0.1341, "step": 3620 }, { "epoch": 2.112922002328289, "grad_norm": 0.5093519687652588, "learning_rate": 9.996995668601448e-05, "loss": -0.1372, "step": 3630 }, { "epoch": 2.118742724097788, "grad_norm": 0.47655460238456726, "learning_rate": 9.996899391965798e-05, "loss": -0.1404, "step": 3640 }, { "epoch": 2.1245634458672877, "grad_norm": 0.30853644013404846, "learning_rate": 9.996801597404048e-05, "loss": -0.1327, "step": 3650 }, { "epoch": 2.130384167636787, "grad_norm": 0.5059525370597839, "learning_rate": 9.996702284945905e-05, "loss": -0.1434, "step": 3660 }, { "epoch": 2.1362048894062866, "grad_norm": 0.43801119923591614, "learning_rate": 9.996601454621539e-05, "loss": -0.1363, "step": 3670 }, { "epoch": 2.142025611175786, "grad_norm": 0.4707636833190918, "learning_rate": 9.996499106461577e-05, "loss": -0.1404, "step": 3680 }, { "epoch": 2.147846332945285, "grad_norm": 0.4185681939125061, "learning_rate": 9.996395240497112e-05, "loss": -0.1324, "step": 3690 }, { "epoch": 2.1536670547147847, "grad_norm": 0.473952054977417, "learning_rate": 9.996289856759696e-05, "loss": -0.1262, "step": 3700 }, { "epoch": 2.159487776484284, "grad_norm": 0.4532175660133362, "learning_rate": 9.996182955281342e-05, "loss": -0.1319, "step": 3710 }, { "epoch": 2.1653084982537836, "grad_norm": 0.42053961753845215, "learning_rate": 9.996074536094519e-05, "loss": -0.1343, "step": 3720 }, { "epoch": 2.171129220023283, "grad_norm": 0.5017653703689575, "learning_rate": 9.995964599232168e-05, "loss": -0.1385, "step": 3730 }, { "epoch": 2.1769499417927825, "grad_norm": 0.43383386731147766, "learning_rate": 9.995853144727683e-05, "loss": -0.1313, "step": 3740 }, { "epoch": 2.1827706635622817, "grad_norm": 0.3208593726158142, "learning_rate": 9.99574017261492e-05, "loss": -0.1374, "step": 3750 }, { "epoch": 2.1885913853317813, "grad_norm": 0.25734132528305054, "learning_rate": 9.995625682928198e-05, "loss": -0.1453, "step": 3760 }, { "epoch": 2.1944121071012805, "grad_norm": 0.42521560192108154, "learning_rate": 9.995509675702295e-05, "loss": -0.1424, "step": 3770 }, { "epoch": 2.2002328288707798, "grad_norm": 0.3049241006374359, "learning_rate": 9.995392150972451e-05, "loss": -0.1397, "step": 3780 }, { "epoch": 2.2060535506402794, "grad_norm": 0.3915509283542633, "learning_rate": 9.995273108774366e-05, "loss": -0.129, "step": 3790 }, { "epoch": 2.2118742724097786, "grad_norm": 0.31814610958099365, "learning_rate": 9.995152549144205e-05, "loss": -0.1287, "step": 3800 }, { "epoch": 2.2176949941792783, "grad_norm": 0.3252395987510681, "learning_rate": 9.995030472118587e-05, "loss": -0.1339, "step": 3810 }, { "epoch": 2.2235157159487775, "grad_norm": 0.5596956014633179, "learning_rate": 9.9949068777346e-05, "loss": -0.1351, "step": 3820 }, { "epoch": 2.229336437718277, "grad_norm": 0.32171741127967834, "learning_rate": 9.994781766029786e-05, "loss": -0.1425, "step": 3830 }, { "epoch": 2.2351571594877764, "grad_norm": 0.3719198703765869, "learning_rate": 9.994655137042151e-05, "loss": -0.1295, "step": 3840 }, { "epoch": 2.240977881257276, "grad_norm": 0.3293401598930359, "learning_rate": 9.99452699081016e-05, "loss": -0.1372, "step": 3850 }, { "epoch": 2.2467986030267753, "grad_norm": 0.4674449861049652, "learning_rate": 9.994397327372743e-05, "loss": -0.1306, "step": 3860 }, { "epoch": 2.252619324796275, "grad_norm": 0.476461261510849, "learning_rate": 9.994266146769286e-05, "loss": -0.1338, "step": 3870 }, { "epoch": 2.258440046565774, "grad_norm": 0.5201424956321716, "learning_rate": 9.994133449039642e-05, "loss": -0.1388, "step": 3880 }, { "epoch": 2.264260768335274, "grad_norm": 0.45547446608543396, "learning_rate": 9.993999234224118e-05, "loss": -0.1298, "step": 3890 }, { "epoch": 2.270081490104773, "grad_norm": 0.4732528626918793, "learning_rate": 9.993863502363485e-05, "loss": -0.1331, "step": 3900 }, { "epoch": 2.275902211874272, "grad_norm": 0.4586186408996582, "learning_rate": 9.993726253498976e-05, "loss": -0.1416, "step": 3910 }, { "epoch": 2.281722933643772, "grad_norm": 0.5197663307189941, "learning_rate": 9.993587487672282e-05, "loss": -0.1429, "step": 3920 }, { "epoch": 2.287543655413271, "grad_norm": 0.3669624328613281, "learning_rate": 9.993447204925558e-05, "loss": -0.1376, "step": 3930 }, { "epoch": 2.2933643771827708, "grad_norm": 0.3149541914463043, "learning_rate": 9.993305405301416e-05, "loss": -0.1316, "step": 3940 }, { "epoch": 2.29918509895227, "grad_norm": 0.25870975852012634, "learning_rate": 9.993162088842935e-05, "loss": -0.1325, "step": 3950 }, { "epoch": 2.3050058207217696, "grad_norm": 0.42310869693756104, "learning_rate": 9.993017255593646e-05, "loss": -0.119, "step": 3960 }, { "epoch": 2.310826542491269, "grad_norm": 0.3142417371273041, "learning_rate": 9.992870905597548e-05, "loss": -0.138, "step": 3970 }, { "epoch": 2.3166472642607685, "grad_norm": 0.4345148205757141, "learning_rate": 9.9927230388991e-05, "loss": -0.1435, "step": 3980 }, { "epoch": 2.3224679860302677, "grad_norm": 0.37802931666374207, "learning_rate": 9.992573655543215e-05, "loss": -0.1408, "step": 3990 }, { "epoch": 2.328288707799767, "grad_norm": 0.35085442662239075, "learning_rate": 9.992422755575277e-05, "loss": -0.1411, "step": 4000 }, { "epoch": 2.3341094295692666, "grad_norm": 0.3755723536014557, "learning_rate": 9.992270339041123e-05, "loss": -0.1437, "step": 4010 }, { "epoch": 2.3399301513387663, "grad_norm": 0.5066754221916199, "learning_rate": 9.992116405987053e-05, "loss": -0.1447, "step": 4020 }, { "epoch": 2.3457508731082655, "grad_norm": 0.41223737597465515, "learning_rate": 9.991960956459828e-05, "loss": -0.1402, "step": 4030 }, { "epoch": 2.3515715948777647, "grad_norm": 0.43994614481925964, "learning_rate": 9.991803990506669e-05, "loss": -0.1358, "step": 4040 }, { "epoch": 2.3573923166472643, "grad_norm": 0.47909867763519287, "learning_rate": 9.991645508175258e-05, "loss": -0.1328, "step": 4050 }, { "epoch": 2.3632130384167636, "grad_norm": 0.431087464094162, "learning_rate": 9.99148550951374e-05, "loss": -0.1383, "step": 4060 }, { "epoch": 2.369033760186263, "grad_norm": 0.5004846453666687, "learning_rate": 9.991323994570716e-05, "loss": -0.1305, "step": 4070 }, { "epoch": 2.3748544819557624, "grad_norm": 0.40592873096466064, "learning_rate": 9.99116096339525e-05, "loss": -0.1398, "step": 4080 }, { "epoch": 2.380675203725262, "grad_norm": 0.4327537417411804, "learning_rate": 9.990996416036869e-05, "loss": -0.1382, "step": 4090 }, { "epoch": 2.3864959254947613, "grad_norm": 0.44195830821990967, "learning_rate": 9.990830352545555e-05, "loss": -0.1386, "step": 4100 }, { "epoch": 2.392316647264261, "grad_norm": 0.3235805034637451, "learning_rate": 9.990662772971756e-05, "loss": -0.1446, "step": 4110 }, { "epoch": 2.39813736903376, "grad_norm": 0.42246732115745544, "learning_rate": 9.990493677366376e-05, "loss": -0.1386, "step": 4120 }, { "epoch": 2.4039580908032594, "grad_norm": 0.58347487449646, "learning_rate": 9.990323065780786e-05, "loss": -0.1435, "step": 4130 }, { "epoch": 2.409778812572759, "grad_norm": 0.3547542989253998, "learning_rate": 9.990150938266808e-05, "loss": -0.1381, "step": 4140 }, { "epoch": 2.4155995343422583, "grad_norm": 0.42975127696990967, "learning_rate": 9.989977294876733e-05, "loss": -0.1393, "step": 4150 }, { "epoch": 2.421420256111758, "grad_norm": 0.3236544728279114, "learning_rate": 9.989802135663308e-05, "loss": -0.1488, "step": 4160 }, { "epoch": 2.427240977881257, "grad_norm": 0.3539857566356659, "learning_rate": 9.989625460679743e-05, "loss": -0.143, "step": 4170 }, { "epoch": 2.433061699650757, "grad_norm": 0.5142993927001953, "learning_rate": 9.989447269979706e-05, "loss": -0.1413, "step": 4180 }, { "epoch": 2.438882421420256, "grad_norm": 0.5329843163490295, "learning_rate": 9.989267563617328e-05, "loss": -0.1439, "step": 4190 }, { "epoch": 2.4447031431897557, "grad_norm": 0.35950011014938354, "learning_rate": 9.989086341647198e-05, "loss": -0.1473, "step": 4200 }, { "epoch": 2.450523864959255, "grad_norm": 0.3327524960041046, "learning_rate": 9.988903604124366e-05, "loss": -0.1434, "step": 4210 }, { "epoch": 2.4563445867287546, "grad_norm": 0.3719950020313263, "learning_rate": 9.988719351104343e-05, "loss": -0.1438, "step": 4220 }, { "epoch": 2.4621653084982538, "grad_norm": 0.5929665565490723, "learning_rate": 9.9885335826431e-05, "loss": -0.143, "step": 4230 }, { "epoch": 2.4679860302677534, "grad_norm": 0.4258379340171814, "learning_rate": 9.988346298797071e-05, "loss": -0.1447, "step": 4240 }, { "epoch": 2.4738067520372526, "grad_norm": 0.3888486325740814, "learning_rate": 9.988157499623146e-05, "loss": -0.1316, "step": 4250 }, { "epoch": 2.479627473806752, "grad_norm": 0.44401854276657104, "learning_rate": 9.987967185178677e-05, "loss": -0.1454, "step": 4260 }, { "epoch": 2.4854481955762515, "grad_norm": 0.5072980523109436, "learning_rate": 9.987775355521476e-05, "loss": -0.1378, "step": 4270 }, { "epoch": 2.4912689173457507, "grad_norm": 0.3895522952079773, "learning_rate": 9.987582010709817e-05, "loss": -0.1393, "step": 4280 }, { "epoch": 2.4970896391152504, "grad_norm": 0.2541879117488861, "learning_rate": 9.987387150802431e-05, "loss": -0.1258, "step": 4290 }, { "epoch": 2.5029103608847496, "grad_norm": 0.5017833113670349, "learning_rate": 9.987190775858517e-05, "loss": -0.147, "step": 4300 }, { "epoch": 2.5087310826542493, "grad_norm": 0.345814973115921, "learning_rate": 9.98699288593772e-05, "loss": -0.1334, "step": 4310 }, { "epoch": 2.5145518044237485, "grad_norm": 0.44058892130851746, "learning_rate": 9.986793481100161e-05, "loss": -0.1447, "step": 4320 }, { "epoch": 2.520372526193248, "grad_norm": 0.3788377642631531, "learning_rate": 9.986592561406412e-05, "loss": -0.1408, "step": 4330 }, { "epoch": 2.5261932479627474, "grad_norm": 0.3640884757041931, "learning_rate": 9.986390126917503e-05, "loss": -0.1536, "step": 4340 }, { "epoch": 2.5320139697322466, "grad_norm": 0.3170678913593292, "learning_rate": 9.986186177694933e-05, "loss": -0.149, "step": 4350 }, { "epoch": 2.5378346915017462, "grad_norm": 0.5103777647018433, "learning_rate": 9.985980713800656e-05, "loss": -0.1323, "step": 4360 }, { "epoch": 2.543655413271246, "grad_norm": 0.4430016279220581, "learning_rate": 9.985773735297084e-05, "loss": -0.1495, "step": 4370 }, { "epoch": 2.549476135040745, "grad_norm": 0.33805692195892334, "learning_rate": 9.985565242247092e-05, "loss": -0.1406, "step": 4380 }, { "epoch": 2.5552968568102443, "grad_norm": 0.34940671920776367, "learning_rate": 9.985355234714016e-05, "loss": -0.1306, "step": 4390 }, { "epoch": 2.561117578579744, "grad_norm": 0.6388701796531677, "learning_rate": 9.985143712761652e-05, "loss": -0.1453, "step": 4400 }, { "epoch": 2.566938300349243, "grad_norm": 0.40163466334342957, "learning_rate": 9.984930676454252e-05, "loss": -0.1416, "step": 4410 }, { "epoch": 2.572759022118743, "grad_norm": 0.38464003801345825, "learning_rate": 9.984716125856532e-05, "loss": -0.1438, "step": 4420 }, { "epoch": 2.578579743888242, "grad_norm": 0.32465845346450806, "learning_rate": 9.984500061033667e-05, "loss": -0.1475, "step": 4430 }, { "epoch": 2.5844004656577413, "grad_norm": 0.31706610321998596, "learning_rate": 9.984282482051293e-05, "loss": -0.1465, "step": 4440 }, { "epoch": 2.590221187427241, "grad_norm": 0.386240690946579, "learning_rate": 9.9840633889755e-05, "loss": -0.1421, "step": 4450 }, { "epoch": 2.5960419091967406, "grad_norm": 0.34940049052238464, "learning_rate": 9.983842781872848e-05, "loss": -0.1373, "step": 4460 }, { "epoch": 2.60186263096624, "grad_norm": 0.2631845474243164, "learning_rate": 9.98362066081035e-05, "loss": -0.1423, "step": 4470 }, { "epoch": 2.607683352735739, "grad_norm": 0.2613656222820282, "learning_rate": 9.983397025855479e-05, "loss": -0.1516, "step": 4480 }, { "epoch": 2.6135040745052387, "grad_norm": 0.32389959692955017, "learning_rate": 9.983171877076171e-05, "loss": -0.1487, "step": 4490 }, { "epoch": 2.619324796274738, "grad_norm": 0.3654262125492096, "learning_rate": 9.98294521454082e-05, "loss": -0.1353, "step": 4500 }, { "epoch": 2.6251455180442376, "grad_norm": 0.3065401613712311, "learning_rate": 9.98271703831828e-05, "loss": -0.1439, "step": 4510 }, { "epoch": 2.630966239813737, "grad_norm": 0.3796231746673584, "learning_rate": 9.982487348477865e-05, "loss": -0.1336, "step": 4520 }, { "epoch": 2.6367869615832364, "grad_norm": 0.37740278244018555, "learning_rate": 9.982256145089347e-05, "loss": -0.1409, "step": 4530 }, { "epoch": 2.6426076833527357, "grad_norm": 0.5353806018829346, "learning_rate": 9.982023428222962e-05, "loss": -0.1495, "step": 4540 }, { "epoch": 2.6484284051222353, "grad_norm": 0.364484041929245, "learning_rate": 9.981789197949403e-05, "loss": -0.1336, "step": 4550 }, { "epoch": 2.6542491268917345, "grad_norm": 0.4433594048023224, "learning_rate": 9.98155345433982e-05, "loss": -0.1426, "step": 4560 }, { "epoch": 2.6600698486612337, "grad_norm": 0.32656827569007874, "learning_rate": 9.981316197465831e-05, "loss": -0.1418, "step": 4570 }, { "epoch": 2.6658905704307334, "grad_norm": 0.31436091661453247, "learning_rate": 9.981077427399504e-05, "loss": -0.1458, "step": 4580 }, { "epoch": 2.671711292200233, "grad_norm": 0.20536382496356964, "learning_rate": 9.980837144213371e-05, "loss": -0.1454, "step": 4590 }, { "epoch": 2.6775320139697323, "grad_norm": 0.3308965265750885, "learning_rate": 9.980595347980426e-05, "loss": -0.1486, "step": 4600 }, { "epoch": 2.6833527357392315, "grad_norm": 0.2899720370769501, "learning_rate": 9.980352038774119e-05, "loss": -0.1339, "step": 4610 }, { "epoch": 2.689173457508731, "grad_norm": 0.33437708020210266, "learning_rate": 9.98010721666836e-05, "loss": -0.1372, "step": 4620 }, { "epoch": 2.6949941792782304, "grad_norm": 0.24406571686267853, "learning_rate": 9.979860881737523e-05, "loss": -0.151, "step": 4630 }, { "epoch": 2.70081490104773, "grad_norm": 0.391169011592865, "learning_rate": 9.979613034056434e-05, "loss": -0.1421, "step": 4640 }, { "epoch": 2.7066356228172292, "grad_norm": 0.4216320514678955, "learning_rate": 9.979363673700386e-05, "loss": -0.1348, "step": 4650 }, { "epoch": 2.712456344586729, "grad_norm": 0.47685879468917847, "learning_rate": 9.979112800745124e-05, "loss": -0.1338, "step": 4660 }, { "epoch": 2.718277066356228, "grad_norm": 0.27402031421661377, "learning_rate": 9.978860415266861e-05, "loss": -0.1402, "step": 4670 }, { "epoch": 2.724097788125728, "grad_norm": 0.392081081867218, "learning_rate": 9.978606517342262e-05, "loss": -0.1463, "step": 4680 }, { "epoch": 2.729918509895227, "grad_norm": 0.2964446544647217, "learning_rate": 9.978351107048456e-05, "loss": -0.1395, "step": 4690 }, { "epoch": 2.735739231664726, "grad_norm": 0.43455249071121216, "learning_rate": 9.978094184463029e-05, "loss": -0.1402, "step": 4700 }, { "epoch": 2.741559953434226, "grad_norm": 0.6157009601593018, "learning_rate": 9.977835749664029e-05, "loss": -0.1411, "step": 4710 }, { "epoch": 2.7473806752037255, "grad_norm": 0.4236374795436859, "learning_rate": 9.97757580272996e-05, "loss": -0.1518, "step": 4720 }, { "epoch": 2.7532013969732247, "grad_norm": 0.2740592658519745, "learning_rate": 9.977314343739786e-05, "loss": -0.1528, "step": 4730 }, { "epoch": 2.759022118742724, "grad_norm": 0.337458074092865, "learning_rate": 9.977051372772934e-05, "loss": -0.1385, "step": 4740 }, { "epoch": 2.7648428405122236, "grad_norm": 0.203681081533432, "learning_rate": 9.976786889909286e-05, "loss": -0.1504, "step": 4750 }, { "epoch": 2.770663562281723, "grad_norm": 0.48540520668029785, "learning_rate": 9.976520895229185e-05, "loss": -0.1472, "step": 4760 }, { "epoch": 2.7764842840512225, "grad_norm": 0.38403189182281494, "learning_rate": 9.976253388813433e-05, "loss": -0.1428, "step": 4770 }, { "epoch": 2.7823050058207217, "grad_norm": 0.28188055753707886, "learning_rate": 9.975984370743293e-05, "loss": -0.1421, "step": 4780 }, { "epoch": 2.788125727590221, "grad_norm": 0.3421569764614105, "learning_rate": 9.975713841100485e-05, "loss": -0.1373, "step": 4790 }, { "epoch": 2.7939464493597206, "grad_norm": 0.30207955837249756, "learning_rate": 9.975441799967187e-05, "loss": -0.1433, "step": 4800 }, { "epoch": 2.7997671711292202, "grad_norm": 0.23610568046569824, "learning_rate": 9.975168247426039e-05, "loss": -0.1465, "step": 4810 }, { "epoch": 2.8055878928987195, "grad_norm": 0.49222955107688904, "learning_rate": 9.974893183560139e-05, "loss": -0.1481, "step": 4820 }, { "epoch": 2.8114086146682187, "grad_norm": 0.36237451434135437, "learning_rate": 9.974616608453045e-05, "loss": -0.1442, "step": 4830 }, { "epoch": 2.8172293364377183, "grad_norm": 0.37779945135116577, "learning_rate": 9.974338522188772e-05, "loss": -0.1433, "step": 4840 }, { "epoch": 2.8230500582072175, "grad_norm": 0.30023419857025146, "learning_rate": 9.974058924851797e-05, "loss": -0.1461, "step": 4850 }, { "epoch": 2.828870779976717, "grad_norm": 0.3701309859752655, "learning_rate": 9.973777816527051e-05, "loss": -0.1446, "step": 4860 }, { "epoch": 2.8346915017462164, "grad_norm": 0.5006577372550964, "learning_rate": 9.973495197299931e-05, "loss": -0.142, "step": 4870 }, { "epoch": 2.840512223515716, "grad_norm": 0.48604318499565125, "learning_rate": 9.973211067256287e-05, "loss": -0.1437, "step": 4880 }, { "epoch": 2.8463329452852153, "grad_norm": 0.39025428891181946, "learning_rate": 9.97292542648243e-05, "loss": -0.1378, "step": 4890 }, { "epoch": 2.852153667054715, "grad_norm": 0.4947015643119812, "learning_rate": 9.972638275065131e-05, "loss": -0.1447, "step": 4900 }, { "epoch": 2.857974388824214, "grad_norm": 0.4146808385848999, "learning_rate": 9.972349613091621e-05, "loss": -0.1382, "step": 4910 }, { "epoch": 2.8637951105937134, "grad_norm": 0.26712799072265625, "learning_rate": 9.972059440649584e-05, "loss": -0.1358, "step": 4920 }, { "epoch": 2.869615832363213, "grad_norm": 0.31882810592651367, "learning_rate": 9.971767757827168e-05, "loss": -0.1383, "step": 4930 }, { "epoch": 2.8754365541327127, "grad_norm": 0.45565831661224365, "learning_rate": 9.971474564712982e-05, "loss": -0.138, "step": 4940 }, { "epoch": 2.881257275902212, "grad_norm": 0.3931305706501007, "learning_rate": 9.971179861396084e-05, "loss": -0.1445, "step": 4950 }, { "epoch": 2.887077997671711, "grad_norm": 0.3766554296016693, "learning_rate": 9.970883647966003e-05, "loss": -0.1426, "step": 4960 }, { "epoch": 2.892898719441211, "grad_norm": 0.6163656115531921, "learning_rate": 9.970585924512717e-05, "loss": -0.1413, "step": 4970 }, { "epoch": 2.89871944121071, "grad_norm": 0.5094054341316223, "learning_rate": 9.970286691126669e-05, "loss": -0.1502, "step": 4980 }, { "epoch": 2.9045401629802097, "grad_norm": 0.35248681902885437, "learning_rate": 9.969985947898756e-05, "loss": -0.142, "step": 4990 }, { "epoch": 2.910360884749709, "grad_norm": 0.3669475317001343, "learning_rate": 9.969683694920337e-05, "loss": -0.1341, "step": 5000 }, { "epoch": 2.9161816065192085, "grad_norm": 0.47501325607299805, "learning_rate": 9.969379932283228e-05, "loss": -0.1421, "step": 5010 }, { "epoch": 2.9220023282887078, "grad_norm": 0.34201133251190186, "learning_rate": 9.969074660079704e-05, "loss": -0.1392, "step": 5020 }, { "epoch": 2.9278230500582074, "grad_norm": 0.49699246883392334, "learning_rate": 9.968767878402501e-05, "loss": -0.1407, "step": 5030 }, { "epoch": 2.9336437718277066, "grad_norm": 0.38121822476387024, "learning_rate": 9.968459587344808e-05, "loss": -0.1422, "step": 5040 }, { "epoch": 2.939464493597206, "grad_norm": 0.25584474205970764, "learning_rate": 9.968149787000278e-05, "loss": -0.1426, "step": 5050 }, { "epoch": 2.9452852153667055, "grad_norm": 0.2867460548877716, "learning_rate": 9.967838477463018e-05, "loss": -0.1526, "step": 5060 }, { "epoch": 2.9511059371362047, "grad_norm": 0.33176666498184204, "learning_rate": 9.967525658827597e-05, "loss": -0.1431, "step": 5070 }, { "epoch": 2.9569266589057044, "grad_norm": 0.40787962079048157, "learning_rate": 9.967211331189042e-05, "loss": -0.1376, "step": 5080 }, { "epoch": 2.9627473806752036, "grad_norm": 0.44687262177467346, "learning_rate": 9.966895494642834e-05, "loss": -0.133, "step": 5090 }, { "epoch": 2.9685681024447033, "grad_norm": 0.48000141978263855, "learning_rate": 9.96657814928492e-05, "loss": -0.1367, "step": 5100 }, { "epoch": 2.9743888242142025, "grad_norm": 0.3702305853366852, "learning_rate": 9.966259295211697e-05, "loss": -0.14, "step": 5110 }, { "epoch": 2.980209545983702, "grad_norm": 0.28364667296409607, "learning_rate": 9.965938932520028e-05, "loss": -0.1429, "step": 5120 }, { "epoch": 2.9860302677532014, "grad_norm": 0.35856735706329346, "learning_rate": 9.965617061307229e-05, "loss": -0.1459, "step": 5130 }, { "epoch": 2.9918509895227006, "grad_norm": 0.386100172996521, "learning_rate": 9.965293681671077e-05, "loss": -0.1421, "step": 5140 }, { "epoch": 2.9976717112922002, "grad_norm": 0.3084077835083008, "learning_rate": 9.964968793709804e-05, "loss": -0.138, "step": 5150 }, { "epoch": 3.0034924330616994, "grad_norm": 0.39688849449157715, "learning_rate": 9.964642397522106e-05, "loss": -0.1406, "step": 5160 }, { "epoch": 3.009313154831199, "grad_norm": 0.3999127149581909, "learning_rate": 9.96431449320713e-05, "loss": -0.1451, "step": 5170 }, { "epoch": 3.0151338766006983, "grad_norm": 0.2479352205991745, "learning_rate": 9.963985080864486e-05, "loss": -0.1417, "step": 5180 }, { "epoch": 3.020954598370198, "grad_norm": 0.23902179300785065, "learning_rate": 9.96365416059424e-05, "loss": -0.1506, "step": 5190 }, { "epoch": 3.026775320139697, "grad_norm": 0.22916990518569946, "learning_rate": 9.963321732496919e-05, "loss": -0.1447, "step": 5200 }, { "epoch": 3.032596041909197, "grad_norm": 0.3781123161315918, "learning_rate": 9.962987796673506e-05, "loss": -0.1439, "step": 5210 }, { "epoch": 3.038416763678696, "grad_norm": 0.3761482536792755, "learning_rate": 9.962652353225438e-05, "loss": -0.1507, "step": 5220 }, { "epoch": 3.0442374854481957, "grad_norm": 0.42491909861564636, "learning_rate": 9.962315402254619e-05, "loss": -0.1428, "step": 5230 }, { "epoch": 3.050058207217695, "grad_norm": 0.3919926881790161, "learning_rate": 9.9619769438634e-05, "loss": -0.1428, "step": 5240 }, { "epoch": 3.0558789289871946, "grad_norm": 0.28440016508102417, "learning_rate": 9.9616369781546e-05, "loss": -0.1493, "step": 5250 }, { "epoch": 3.061699650756694, "grad_norm": 0.3560730218887329, "learning_rate": 9.961295505231491e-05, "loss": -0.1428, "step": 5260 }, { "epoch": 3.067520372526193, "grad_norm": 0.39380335807800293, "learning_rate": 9.960952525197804e-05, "loss": -0.1481, "step": 5270 }, { "epoch": 3.0733410942956927, "grad_norm": 0.47417381405830383, "learning_rate": 9.960608038157724e-05, "loss": -0.1542, "step": 5280 }, { "epoch": 3.079161816065192, "grad_norm": 0.49706265330314636, "learning_rate": 9.960262044215901e-05, "loss": -0.1432, "step": 5290 }, { "epoch": 3.0849825378346916, "grad_norm": 0.3075634837150574, "learning_rate": 9.959914543477435e-05, "loss": -0.1329, "step": 5300 }, { "epoch": 3.090803259604191, "grad_norm": 0.32593950629234314, "learning_rate": 9.959565536047892e-05, "loss": -0.1267, "step": 5310 }, { "epoch": 3.0966239813736904, "grad_norm": 0.24973273277282715, "learning_rate": 9.959215022033288e-05, "loss": -0.141, "step": 5320 }, { "epoch": 3.1024447031431897, "grad_norm": 0.47045084834098816, "learning_rate": 9.9588630015401e-05, "loss": -0.1388, "step": 5330 }, { "epoch": 3.1082654249126893, "grad_norm": 0.2465456873178482, "learning_rate": 9.958509474675264e-05, "loss": -0.1471, "step": 5340 }, { "epoch": 3.1140861466821885, "grad_norm": 0.2842257022857666, "learning_rate": 9.958154441546171e-05, "loss": -0.14, "step": 5350 }, { "epoch": 3.119906868451688, "grad_norm": 0.18484774231910706, "learning_rate": 9.957797902260673e-05, "loss": -0.1419, "step": 5360 }, { "epoch": 3.1257275902211874, "grad_norm": 0.2824820280075073, "learning_rate": 9.957439856927073e-05, "loss": -0.1481, "step": 5370 }, { "epoch": 3.131548311990687, "grad_norm": 0.24187946319580078, "learning_rate": 9.957080305654139e-05, "loss": -0.1416, "step": 5380 }, { "epoch": 3.1373690337601863, "grad_norm": 0.527245819568634, "learning_rate": 9.956719248551092e-05, "loss": -0.1372, "step": 5390 }, { "epoch": 3.1431897555296855, "grad_norm": 0.5984659194946289, "learning_rate": 9.956356685727612e-05, "loss": -0.143, "step": 5400 }, { "epoch": 3.149010477299185, "grad_norm": 0.44486376643180847, "learning_rate": 9.955992617293836e-05, "loss": -0.1442, "step": 5410 }, { "epoch": 3.1548311990686844, "grad_norm": 0.3126315176486969, "learning_rate": 9.955627043360358e-05, "loss": -0.1336, "step": 5420 }, { "epoch": 3.160651920838184, "grad_norm": 0.42486372590065, "learning_rate": 9.955259964038231e-05, "loss": -0.1478, "step": 5430 }, { "epoch": 3.1664726426076832, "grad_norm": 0.5528666973114014, "learning_rate": 9.954891379438962e-05, "loss": -0.1415, "step": 5440 }, { "epoch": 3.172293364377183, "grad_norm": 0.4598698914051056, "learning_rate": 9.954521289674519e-05, "loss": -0.1348, "step": 5450 }, { "epoch": 3.178114086146682, "grad_norm": 0.42272481322288513, "learning_rate": 9.954149694857325e-05, "loss": -0.1424, "step": 5460 }, { "epoch": 3.1839348079161818, "grad_norm": 0.4644860029220581, "learning_rate": 9.953776595100258e-05, "loss": -0.1345, "step": 5470 }, { "epoch": 3.189755529685681, "grad_norm": 0.421464741230011, "learning_rate": 9.95340199051666e-05, "loss": -0.1495, "step": 5480 }, { "epoch": 3.1955762514551806, "grad_norm": 0.3138900399208069, "learning_rate": 9.953025881220325e-05, "loss": -0.1471, "step": 5490 }, { "epoch": 3.20139697322468, "grad_norm": 0.2489163726568222, "learning_rate": 9.952648267325504e-05, "loss": -0.1466, "step": 5500 }, { "epoch": 3.207217694994179, "grad_norm": 0.41411200165748596, "learning_rate": 9.952269148946905e-05, "loss": -0.1464, "step": 5510 }, { "epoch": 3.2130384167636787, "grad_norm": 0.34392186999320984, "learning_rate": 9.951888526199697e-05, "loss": -0.1442, "step": 5520 }, { "epoch": 3.218859138533178, "grad_norm": 0.31325456500053406, "learning_rate": 9.951506399199501e-05, "loss": -0.1473, "step": 5530 }, { "epoch": 3.2246798603026776, "grad_norm": 0.3383833169937134, "learning_rate": 9.951122768062399e-05, "loss": -0.1511, "step": 5540 }, { "epoch": 3.230500582072177, "grad_norm": 0.28812772035598755, "learning_rate": 9.950737632904927e-05, "loss": -0.1461, "step": 5550 }, { "epoch": 3.2363213038416765, "grad_norm": 0.3159834146499634, "learning_rate": 9.950350993844077e-05, "loss": -0.1434, "step": 5560 }, { "epoch": 3.2421420256111757, "grad_norm": 0.30085164308547974, "learning_rate": 9.949962850997303e-05, "loss": -0.1476, "step": 5570 }, { "epoch": 3.2479627473806754, "grad_norm": 0.4456650912761688, "learning_rate": 9.949573204482512e-05, "loss": -0.1581, "step": 5580 }, { "epoch": 3.2537834691501746, "grad_norm": 0.2946320176124573, "learning_rate": 9.949182054418064e-05, "loss": -0.1508, "step": 5590 }, { "epoch": 3.2596041909196742, "grad_norm": 0.3338647782802582, "learning_rate": 9.948789400922787e-05, "loss": -0.1447, "step": 5600 }, { "epoch": 3.2654249126891735, "grad_norm": 0.2817605137825012, "learning_rate": 9.948395244115953e-05, "loss": -0.1451, "step": 5610 }, { "epoch": 3.2712456344586727, "grad_norm": 0.2889823615550995, "learning_rate": 9.9479995841173e-05, "loss": -0.1447, "step": 5620 }, { "epoch": 3.2770663562281723, "grad_norm": 0.24981628358364105, "learning_rate": 9.947602421047017e-05, "loss": -0.1542, "step": 5630 }, { "epoch": 3.2828870779976715, "grad_norm": 0.4141489863395691, "learning_rate": 9.947203755025753e-05, "loss": -0.1447, "step": 5640 }, { "epoch": 3.288707799767171, "grad_norm": 0.31340742111206055, "learning_rate": 9.946803586174611e-05, "loss": -0.1512, "step": 5650 }, { "epoch": 3.2945285215366704, "grad_norm": 0.3673177659511566, "learning_rate": 9.946401914615151e-05, "loss": -0.1485, "step": 5660 }, { "epoch": 3.30034924330617, "grad_norm": 0.2923631966114044, "learning_rate": 9.945998740469394e-05, "loss": -0.1529, "step": 5670 }, { "epoch": 3.3061699650756693, "grad_norm": 0.34747523069381714, "learning_rate": 9.945594063859809e-05, "loss": -0.1385, "step": 5680 }, { "epoch": 3.311990686845169, "grad_norm": 0.4251538813114166, "learning_rate": 9.94518788490933e-05, "loss": -0.154, "step": 5690 }, { "epoch": 3.317811408614668, "grad_norm": 0.37955987453460693, "learning_rate": 9.944780203741341e-05, "loss": -0.1405, "step": 5700 }, { "epoch": 3.323632130384168, "grad_norm": 0.4243569076061249, "learning_rate": 9.944371020479686e-05, "loss": -0.1423, "step": 5710 }, { "epoch": 3.329452852153667, "grad_norm": 0.2922055721282959, "learning_rate": 9.943960335248662e-05, "loss": -0.1464, "step": 5720 }, { "epoch": 3.3352735739231667, "grad_norm": 0.5232503414154053, "learning_rate": 9.943548148173027e-05, "loss": -0.142, "step": 5730 }, { "epoch": 3.341094295692666, "grad_norm": 0.39474180340766907, "learning_rate": 9.943134459377992e-05, "loss": -0.1528, "step": 5740 }, { "epoch": 3.346915017462165, "grad_norm": 0.29338982701301575, "learning_rate": 9.942719268989222e-05, "loss": -0.1454, "step": 5750 }, { "epoch": 3.352735739231665, "grad_norm": 0.2949526607990265, "learning_rate": 9.942302577132844e-05, "loss": -0.1541, "step": 5760 }, { "epoch": 3.358556461001164, "grad_norm": 0.4200015664100647, "learning_rate": 9.941884383935438e-05, "loss": -0.152, "step": 5770 }, { "epoch": 3.3643771827706637, "grad_norm": 0.34657272696495056, "learning_rate": 9.941464689524039e-05, "loss": -0.1498, "step": 5780 }, { "epoch": 3.370197904540163, "grad_norm": 0.4133327603340149, "learning_rate": 9.941043494026139e-05, "loss": -0.149, "step": 5790 }, { "epoch": 3.3760186263096625, "grad_norm": 0.3199270963668823, "learning_rate": 9.940620797569685e-05, "loss": -0.148, "step": 5800 }, { "epoch": 3.3818393480791618, "grad_norm": 0.350058376789093, "learning_rate": 9.940196600283082e-05, "loss": -0.1335, "step": 5810 }, { "epoch": 3.3876600698486614, "grad_norm": 0.30616191029548645, "learning_rate": 9.939770902295192e-05, "loss": -0.1378, "step": 5820 }, { "epoch": 3.3934807916181606, "grad_norm": 0.5176123380661011, "learning_rate": 9.939343703735329e-05, "loss": -0.1418, "step": 5830 }, { "epoch": 3.39930151338766, "grad_norm": 0.30826908349990845, "learning_rate": 9.938915004733264e-05, "loss": -0.1488, "step": 5840 }, { "epoch": 3.4051222351571595, "grad_norm": 0.384765625, "learning_rate": 9.938484805419224e-05, "loss": -0.1451, "step": 5850 }, { "epoch": 3.4109429569266587, "grad_norm": 0.42345163226127625, "learning_rate": 9.938053105923894e-05, "loss": -0.1473, "step": 5860 }, { "epoch": 3.4167636786961584, "grad_norm": 0.3748398423194885, "learning_rate": 9.937619906378413e-05, "loss": -0.1503, "step": 5870 }, { "epoch": 3.4225844004656576, "grad_norm": 0.31775686144828796, "learning_rate": 9.937185206914374e-05, "loss": -0.145, "step": 5880 }, { "epoch": 3.4284051222351573, "grad_norm": 0.4171200394630432, "learning_rate": 9.936749007663829e-05, "loss": -0.1478, "step": 5890 }, { "epoch": 3.4342258440046565, "grad_norm": 0.4008040726184845, "learning_rate": 9.93631130875928e-05, "loss": -0.1482, "step": 5900 }, { "epoch": 3.440046565774156, "grad_norm": 0.4326243996620178, "learning_rate": 9.935872110333692e-05, "loss": -0.1379, "step": 5910 }, { "epoch": 3.4458672875436553, "grad_norm": 0.4327954947948456, "learning_rate": 9.935431412520484e-05, "loss": -0.1398, "step": 5920 }, { "epoch": 3.451688009313155, "grad_norm": 0.3058507442474365, "learning_rate": 9.934989215453523e-05, "loss": -0.1466, "step": 5930 }, { "epoch": 3.457508731082654, "grad_norm": 0.3418827950954437, "learning_rate": 9.934545519267139e-05, "loss": -0.145, "step": 5940 }, { "epoch": 3.463329452852154, "grad_norm": 0.360569030046463, "learning_rate": 9.934100324096117e-05, "loss": -0.1476, "step": 5950 }, { "epoch": 3.469150174621653, "grad_norm": 0.3138970136642456, "learning_rate": 9.933653630075692e-05, "loss": -0.1451, "step": 5960 }, { "epoch": 3.4749708963911523, "grad_norm": 0.29288315773010254, "learning_rate": 9.93320543734156e-05, "loss": -0.1449, "step": 5970 }, { "epoch": 3.480791618160652, "grad_norm": 0.2747132182121277, "learning_rate": 9.932755746029871e-05, "loss": -0.1545, "step": 5980 }, { "epoch": 3.486612339930151, "grad_norm": 0.47580984234809875, "learning_rate": 9.932304556277228e-05, "loss": -0.1488, "step": 5990 }, { "epoch": 3.492433061699651, "grad_norm": 0.3722217381000519, "learning_rate": 9.93185186822069e-05, "loss": -0.1431, "step": 6000 }, { "epoch": 3.49825378346915, "grad_norm": 0.39119312167167664, "learning_rate": 9.931397681997773e-05, "loss": -0.147, "step": 6010 }, { "epoch": 3.5040745052386497, "grad_norm": 0.32204577326774597, "learning_rate": 9.930941997746446e-05, "loss": -0.1468, "step": 6020 }, { "epoch": 3.509895227008149, "grad_norm": 0.35613566637039185, "learning_rate": 9.930484815605134e-05, "loss": -0.1446, "step": 6030 }, { "epoch": 3.5157159487776486, "grad_norm": 0.43474119901657104, "learning_rate": 9.930026135712717e-05, "loss": -0.1431, "step": 6040 }, { "epoch": 3.521536670547148, "grad_norm": 0.3824903070926666, "learning_rate": 9.92956595820853e-05, "loss": -0.1466, "step": 6050 }, { "epoch": 3.527357392316647, "grad_norm": 0.4285513758659363, "learning_rate": 9.929104283232362e-05, "loss": -0.1516, "step": 6060 }, { "epoch": 3.5331781140861467, "grad_norm": 0.3761999011039734, "learning_rate": 9.92864111092446e-05, "loss": -0.1474, "step": 6070 }, { "epoch": 3.5389988358556463, "grad_norm": 0.32323789596557617, "learning_rate": 9.92817644142552e-05, "loss": -0.1504, "step": 6080 }, { "epoch": 3.5448195576251456, "grad_norm": 0.32032284140586853, "learning_rate": 9.927710274876698e-05, "loss": -0.1482, "step": 6090 }, { "epoch": 3.5506402793946448, "grad_norm": 0.43808382749557495, "learning_rate": 9.927242611419603e-05, "loss": -0.1478, "step": 6100 }, { "epoch": 3.5564610011641444, "grad_norm": 0.4395265579223633, "learning_rate": 9.926773451196301e-05, "loss": -0.1416, "step": 6110 }, { "epoch": 3.5622817229336436, "grad_norm": 0.43051084876060486, "learning_rate": 9.926302794349306e-05, "loss": -0.144, "step": 6120 }, { "epoch": 3.5681024447031433, "grad_norm": 0.5103907585144043, "learning_rate": 9.925830641021594e-05, "loss": -0.1338, "step": 6130 }, { "epoch": 3.5739231664726425, "grad_norm": 0.32484960556030273, "learning_rate": 9.925356991356593e-05, "loss": -0.1476, "step": 6140 }, { "epoch": 3.579743888242142, "grad_norm": 0.4450293183326721, "learning_rate": 9.924881845498184e-05, "loss": -0.1478, "step": 6150 }, { "epoch": 3.5855646100116414, "grad_norm": 0.4100721478462219, "learning_rate": 9.924405203590705e-05, "loss": -0.1456, "step": 6160 }, { "epoch": 3.591385331781141, "grad_norm": 0.3575913906097412, "learning_rate": 9.923927065778946e-05, "loss": -0.1475, "step": 6170 }, { "epoch": 3.5972060535506403, "grad_norm": 0.334748238325119, "learning_rate": 9.923447432208154e-05, "loss": -0.1457, "step": 6180 }, { "epoch": 3.6030267753201395, "grad_norm": 0.42030906677246094, "learning_rate": 9.922966303024027e-05, "loss": -0.1502, "step": 6190 }, { "epoch": 3.608847497089639, "grad_norm": 0.27079683542251587, "learning_rate": 9.922483678372721e-05, "loss": -0.1445, "step": 6200 }, { "epoch": 3.614668218859139, "grad_norm": 0.3267030417919159, "learning_rate": 9.921999558400845e-05, "loss": -0.1465, "step": 6210 }, { "epoch": 3.620488940628638, "grad_norm": 0.33627933263778687, "learning_rate": 9.92151394325546e-05, "loss": -0.1334, "step": 6220 }, { "epoch": 3.6263096623981372, "grad_norm": 0.3937157690525055, "learning_rate": 9.921026833084084e-05, "loss": -0.1432, "step": 6230 }, { "epoch": 3.632130384167637, "grad_norm": 0.4476763606071472, "learning_rate": 9.920538228034689e-05, "loss": -0.149, "step": 6240 }, { "epoch": 3.637951105937136, "grad_norm": 0.2560654580593109, "learning_rate": 9.920048128255699e-05, "loss": -0.1468, "step": 6250 }, { "epoch": 3.6437718277066358, "grad_norm": 0.4654366672039032, "learning_rate": 9.919556533895995e-05, "loss": -0.1408, "step": 6260 }, { "epoch": 3.649592549476135, "grad_norm": 0.2755841612815857, "learning_rate": 9.919063445104907e-05, "loss": -0.1437, "step": 6270 }, { "epoch": 3.655413271245634, "grad_norm": 0.26588284969329834, "learning_rate": 9.918568862032227e-05, "loss": -0.1474, "step": 6280 }, { "epoch": 3.661233993015134, "grad_norm": 0.32937806844711304, "learning_rate": 9.918072784828194e-05, "loss": -0.1528, "step": 6290 }, { "epoch": 3.6670547147846335, "grad_norm": 0.39334648847579956, "learning_rate": 9.917575213643501e-05, "loss": -0.1455, "step": 6300 }, { "epoch": 3.6728754365541327, "grad_norm": 0.2815324068069458, "learning_rate": 9.917076148629302e-05, "loss": -0.1511, "step": 6310 }, { "epoch": 3.678696158323632, "grad_norm": 0.2997357249259949, "learning_rate": 9.916575589937196e-05, "loss": -0.145, "step": 6320 }, { "epoch": 3.6845168800931316, "grad_norm": 0.32520055770874023, "learning_rate": 9.916073537719239e-05, "loss": -0.1464, "step": 6330 }, { "epoch": 3.690337601862631, "grad_norm": 0.26879116892814636, "learning_rate": 9.915569992127944e-05, "loss": -0.1503, "step": 6340 }, { "epoch": 3.6961583236321305, "grad_norm": 0.33353909850120544, "learning_rate": 9.915064953316273e-05, "loss": -0.1509, "step": 6350 }, { "epoch": 3.7019790454016297, "grad_norm": 0.3320966064929962, "learning_rate": 9.914558421437645e-05, "loss": -0.1509, "step": 6360 }, { "epoch": 3.7077997671711294, "grad_norm": 0.2615768313407898, "learning_rate": 9.914050396645929e-05, "loss": -0.1514, "step": 6370 }, { "epoch": 3.7136204889406286, "grad_norm": 0.35985174775123596, "learning_rate": 9.913540879095452e-05, "loss": -0.1468, "step": 6380 }, { "epoch": 3.7194412107101282, "grad_norm": 0.29939574003219604, "learning_rate": 9.913029868940987e-05, "loss": -0.1412, "step": 6390 }, { "epoch": 3.7252619324796274, "grad_norm": 0.3030306398868561, "learning_rate": 9.912517366337772e-05, "loss": -0.1479, "step": 6400 }, { "epoch": 3.7310826542491267, "grad_norm": 0.36661291122436523, "learning_rate": 9.912003371441487e-05, "loss": -0.1451, "step": 6410 }, { "epoch": 3.7369033760186263, "grad_norm": 0.440748929977417, "learning_rate": 9.911487884408271e-05, "loss": -0.1488, "step": 6420 }, { "epoch": 3.742724097788126, "grad_norm": 0.21482373774051666, "learning_rate": 9.910970905394719e-05, "loss": -0.1481, "step": 6430 }, { "epoch": 3.748544819557625, "grad_norm": 0.33083948493003845, "learning_rate": 9.91045243455787e-05, "loss": -0.1544, "step": 6440 }, { "epoch": 3.7543655413271244, "grad_norm": 0.19196370244026184, "learning_rate": 9.909932472055225e-05, "loss": -0.1461, "step": 6450 }, { "epoch": 3.760186263096624, "grad_norm": 0.1801770180463791, "learning_rate": 9.909411018044734e-05, "loss": -0.1561, "step": 6460 }, { "epoch": 3.7660069848661233, "grad_norm": 0.4264330565929413, "learning_rate": 9.908888072684802e-05, "loss": -0.1522, "step": 6470 }, { "epoch": 3.771827706635623, "grad_norm": 0.2655431628227234, "learning_rate": 9.908363636134285e-05, "loss": -0.1578, "step": 6480 }, { "epoch": 3.777648428405122, "grad_norm": 0.2078094631433487, "learning_rate": 9.907837708552493e-05, "loss": -0.1585, "step": 6490 }, { "epoch": 3.7834691501746214, "grad_norm": 0.35399165749549866, "learning_rate": 9.90731029009919e-05, "loss": -0.1528, "step": 6500 }, { "epoch": 3.789289871944121, "grad_norm": 0.5218048095703125, "learning_rate": 9.906781380934589e-05, "loss": -0.1385, "step": 6510 }, { "epoch": 3.7951105937136207, "grad_norm": 0.4146631360054016, "learning_rate": 9.906250981219362e-05, "loss": -0.1451, "step": 6520 }, { "epoch": 3.80093131548312, "grad_norm": 0.3916129469871521, "learning_rate": 9.905719091114628e-05, "loss": -0.1556, "step": 6530 }, { "epoch": 3.806752037252619, "grad_norm": 0.42963171005249023, "learning_rate": 9.905185710781964e-05, "loss": -0.1417, "step": 6540 }, { "epoch": 3.812572759022119, "grad_norm": 0.31569233536720276, "learning_rate": 9.904650840383392e-05, "loss": -0.1489, "step": 6550 }, { "epoch": 3.818393480791618, "grad_norm": 0.267821729183197, "learning_rate": 9.904114480081397e-05, "loss": -0.1572, "step": 6560 }, { "epoch": 3.8242142025611177, "grad_norm": 0.2856549918651581, "learning_rate": 9.903576630038906e-05, "loss": -0.145, "step": 6570 }, { "epoch": 3.830034924330617, "grad_norm": 0.22708085179328918, "learning_rate": 9.903037290419309e-05, "loss": -0.1513, "step": 6580 }, { "epoch": 3.8358556461001165, "grad_norm": 0.3083229959011078, "learning_rate": 9.902496461386439e-05, "loss": -0.1534, "step": 6590 }, { "epoch": 3.8416763678696157, "grad_norm": 0.3642595112323761, "learning_rate": 9.901954143104588e-05, "loss": -0.1503, "step": 6600 }, { "epoch": 3.8474970896391154, "grad_norm": 0.4053795635700226, "learning_rate": 9.901410335738496e-05, "loss": -0.1581, "step": 6610 }, { "epoch": 3.8533178114086146, "grad_norm": 0.4036402106285095, "learning_rate": 9.900865039453358e-05, "loss": -0.1434, "step": 6620 }, { "epoch": 3.859138533178114, "grad_norm": 0.3387029767036438, "learning_rate": 9.900318254414821e-05, "loss": -0.1493, "step": 6630 }, { "epoch": 3.8649592549476135, "grad_norm": 0.3293037414550781, "learning_rate": 9.899769980788985e-05, "loss": -0.153, "step": 6640 }, { "epoch": 3.870779976717113, "grad_norm": 0.24349893629550934, "learning_rate": 9.899220218742398e-05, "loss": -0.1527, "step": 6650 }, { "epoch": 3.8766006984866124, "grad_norm": 0.43836963176727295, "learning_rate": 9.898668968442066e-05, "loss": -0.1403, "step": 6660 }, { "epoch": 3.8824214202561116, "grad_norm": 0.36780914664268494, "learning_rate": 9.898116230055443e-05, "loss": -0.1477, "step": 6670 }, { "epoch": 3.8882421420256112, "grad_norm": 0.272538959980011, "learning_rate": 9.897562003750437e-05, "loss": -0.1517, "step": 6680 }, { "epoch": 3.8940628637951105, "grad_norm": 0.2737900912761688, "learning_rate": 9.897006289695407e-05, "loss": -0.147, "step": 6690 }, { "epoch": 3.89988358556461, "grad_norm": 0.28877902030944824, "learning_rate": 9.896449088059164e-05, "loss": -0.1437, "step": 6700 }, { "epoch": 3.9057043073341093, "grad_norm": 0.3803444802761078, "learning_rate": 9.89589039901097e-05, "loss": -0.149, "step": 6710 }, { "epoch": 3.911525029103609, "grad_norm": 0.4721880257129669, "learning_rate": 9.895330222720542e-05, "loss": -0.1497, "step": 6720 }, { "epoch": 3.917345750873108, "grad_norm": 0.2458130121231079, "learning_rate": 9.894768559358047e-05, "loss": -0.14, "step": 6730 }, { "epoch": 3.923166472642608, "grad_norm": 0.27911826968193054, "learning_rate": 9.894205409094101e-05, "loss": -0.1412, "step": 6740 }, { "epoch": 3.928987194412107, "grad_norm": 0.22117756307125092, "learning_rate": 9.893640772099777e-05, "loss": -0.1525, "step": 6750 }, { "epoch": 3.9348079161816063, "grad_norm": 0.2823420763015747, "learning_rate": 9.893074648546595e-05, "loss": -0.1352, "step": 6760 }, { "epoch": 3.940628637951106, "grad_norm": 0.4548528790473938, "learning_rate": 9.892507038606528e-05, "loss": -0.1376, "step": 6770 }, { "epoch": 3.9464493597206056, "grad_norm": 0.41277816891670227, "learning_rate": 9.891937942452003e-05, "loss": -0.145, "step": 6780 }, { "epoch": 3.952270081490105, "grad_norm": 0.35598939657211304, "learning_rate": 9.891367360255895e-05, "loss": -0.149, "step": 6790 }, { "epoch": 3.958090803259604, "grad_norm": 0.5258488059043884, "learning_rate": 9.890795292191532e-05, "loss": -0.1343, "step": 6800 }, { "epoch": 3.9639115250291037, "grad_norm": 0.408407986164093, "learning_rate": 9.890221738432694e-05, "loss": -0.1421, "step": 6810 }, { "epoch": 3.969732246798603, "grad_norm": 0.32275456190109253, "learning_rate": 9.88964669915361e-05, "loss": -0.15, "step": 6820 }, { "epoch": 3.9755529685681026, "grad_norm": 0.45242932438850403, "learning_rate": 9.889070174528963e-05, "loss": -0.1409, "step": 6830 }, { "epoch": 3.981373690337602, "grad_norm": 0.24531777203083038, "learning_rate": 9.888492164733883e-05, "loss": -0.1542, "step": 6840 }, { "epoch": 3.987194412107101, "grad_norm": 0.2830636501312256, "learning_rate": 9.88791266994396e-05, "loss": -0.1497, "step": 6850 }, { "epoch": 3.9930151338766007, "grad_norm": 0.21855272352695465, "learning_rate": 9.887331690335223e-05, "loss": -0.1533, "step": 6860 }, { "epoch": 3.9988358556461003, "grad_norm": 0.25525686144828796, "learning_rate": 9.886749226084163e-05, "loss": -0.1557, "step": 6870 }, { "epoch": 4.0046565774155995, "grad_norm": 0.2967360317707062, "learning_rate": 9.886165277367714e-05, "loss": -0.1393, "step": 6880 }, { "epoch": 4.010477299185099, "grad_norm": 0.3592689335346222, "learning_rate": 9.885579844363265e-05, "loss": -0.1547, "step": 6890 }, { "epoch": 4.016298020954598, "grad_norm": 0.3357367217540741, "learning_rate": 9.884992927248656e-05, "loss": -0.1524, "step": 6900 }, { "epoch": 4.022118742724098, "grad_norm": 0.36255794763565063, "learning_rate": 9.884404526202178e-05, "loss": -0.1473, "step": 6910 }, { "epoch": 4.027939464493597, "grad_norm": 0.31550678610801697, "learning_rate": 9.883814641402568e-05, "loss": -0.1441, "step": 6920 }, { "epoch": 4.0337601862630965, "grad_norm": 0.3075209856033325, "learning_rate": 9.88322327302902e-05, "loss": -0.1503, "step": 6930 }, { "epoch": 4.039580908032596, "grad_norm": 0.2781177759170532, "learning_rate": 9.882630421261176e-05, "loss": -0.1545, "step": 6940 }, { "epoch": 4.045401629802096, "grad_norm": 0.232516348361969, "learning_rate": 9.88203608627913e-05, "loss": -0.1468, "step": 6950 }, { "epoch": 4.051222351571595, "grad_norm": 0.38300082087516785, "learning_rate": 9.881440268263422e-05, "loss": -0.1471, "step": 6960 }, { "epoch": 4.057043073341094, "grad_norm": 0.3354392945766449, "learning_rate": 9.880842967395048e-05, "loss": -0.1485, "step": 6970 }, { "epoch": 4.0628637951105935, "grad_norm": 0.29821231961250305, "learning_rate": 9.880244183855452e-05, "loss": -0.1496, "step": 6980 }, { "epoch": 4.068684516880094, "grad_norm": 0.44286230206489563, "learning_rate": 9.879643917826527e-05, "loss": -0.1372, "step": 6990 }, { "epoch": 4.074505238649593, "grad_norm": 0.49496540427207947, "learning_rate": 9.87904216949062e-05, "loss": -0.1486, "step": 7000 }, { "epoch": 4.080325960419092, "grad_norm": 0.33487483859062195, "learning_rate": 9.878438939030526e-05, "loss": -0.1428, "step": 7010 }, { "epoch": 4.086146682188591, "grad_norm": 0.2651703655719757, "learning_rate": 9.877834226629489e-05, "loss": -0.1496, "step": 7020 }, { "epoch": 4.09196740395809, "grad_norm": 0.4361928403377533, "learning_rate": 9.877228032471206e-05, "loss": -0.1506, "step": 7030 }, { "epoch": 4.0977881257275905, "grad_norm": 0.34555765986442566, "learning_rate": 9.876620356739823e-05, "loss": -0.1513, "step": 7040 }, { "epoch": 4.10360884749709, "grad_norm": 0.26471734046936035, "learning_rate": 9.876011199619935e-05, "loss": -0.1505, "step": 7050 }, { "epoch": 4.109429569266589, "grad_norm": 0.3115619719028473, "learning_rate": 9.875400561296589e-05, "loss": -0.1497, "step": 7060 }, { "epoch": 4.115250291036088, "grad_norm": 0.26282060146331787, "learning_rate": 9.874788441955278e-05, "loss": -0.1486, "step": 7070 }, { "epoch": 4.121071012805588, "grad_norm": 0.3152296245098114, "learning_rate": 9.874174841781951e-05, "loss": -0.1468, "step": 7080 }, { "epoch": 4.1268917345750875, "grad_norm": 0.3351171910762787, "learning_rate": 9.873559760963003e-05, "loss": -0.1465, "step": 7090 }, { "epoch": 4.132712456344587, "grad_norm": 0.37481799721717834, "learning_rate": 9.872943199685278e-05, "loss": -0.1484, "step": 7100 }, { "epoch": 4.138533178114086, "grad_norm": 0.2223481386899948, "learning_rate": 9.872325158136071e-05, "loss": -0.1469, "step": 7110 }, { "epoch": 4.144353899883585, "grad_norm": 0.4044785797595978, "learning_rate": 9.871705636503128e-05, "loss": -0.1485, "step": 7120 }, { "epoch": 4.150174621653085, "grad_norm": 0.2506202459335327, "learning_rate": 9.871084634974641e-05, "loss": -0.1414, "step": 7130 }, { "epoch": 4.1559953434225845, "grad_norm": 0.49397850036621094, "learning_rate": 9.870462153739257e-05, "loss": -0.1396, "step": 7140 }, { "epoch": 4.161816065192084, "grad_norm": 0.2959056794643402, "learning_rate": 9.869838192986067e-05, "loss": -0.1506, "step": 7150 }, { "epoch": 4.167636786961583, "grad_norm": 0.2666018307209015, "learning_rate": 9.869212752904616e-05, "loss": -0.1472, "step": 7160 }, { "epoch": 4.173457508731083, "grad_norm": 0.2572413682937622, "learning_rate": 9.868585833684894e-05, "loss": -0.1518, "step": 7170 }, { "epoch": 4.179278230500582, "grad_norm": 0.31192076206207275, "learning_rate": 9.867957435517342e-05, "loss": -0.1514, "step": 7180 }, { "epoch": 4.185098952270081, "grad_norm": 0.2805185317993164, "learning_rate": 9.867327558592854e-05, "loss": -0.1482, "step": 7190 }, { "epoch": 4.190919674039581, "grad_norm": 0.42959922552108765, "learning_rate": 9.866696203102766e-05, "loss": -0.1367, "step": 7200 }, { "epoch": 4.19674039580908, "grad_norm": 0.39876046776771545, "learning_rate": 9.86606336923887e-05, "loss": -0.1516, "step": 7210 }, { "epoch": 4.20256111757858, "grad_norm": 0.22789578139781952, "learning_rate": 9.865429057193403e-05, "loss": -0.1505, "step": 7220 }, { "epoch": 4.208381839348079, "grad_norm": 0.3086608648300171, "learning_rate": 9.864793267159053e-05, "loss": -0.146, "step": 7230 }, { "epoch": 4.214202561117578, "grad_norm": 0.3561439514160156, "learning_rate": 9.864155999328957e-05, "loss": -0.1497, "step": 7240 }, { "epoch": 4.220023282887078, "grad_norm": 0.33744528889656067, "learning_rate": 9.8635172538967e-05, "loss": -0.1469, "step": 7250 }, { "epoch": 4.225844004656578, "grad_norm": 0.3330278694629669, "learning_rate": 9.862877031056312e-05, "loss": -0.1515, "step": 7260 }, { "epoch": 4.231664726426077, "grad_norm": 0.2667294442653656, "learning_rate": 9.862235331002279e-05, "loss": -0.1423, "step": 7270 }, { "epoch": 4.237485448195576, "grad_norm": 0.4801054000854492, "learning_rate": 9.861592153929533e-05, "loss": -0.1511, "step": 7280 }, { "epoch": 4.243306169965075, "grad_norm": 0.3038192391395569, "learning_rate": 9.860947500033455e-05, "loss": -0.1529, "step": 7290 }, { "epoch": 4.2491268917345755, "grad_norm": 0.2001381367444992, "learning_rate": 9.86030136950987e-05, "loss": -0.1485, "step": 7300 }, { "epoch": 4.254947613504075, "grad_norm": 0.36300137639045715, "learning_rate": 9.85965376255506e-05, "loss": -0.1568, "step": 7310 }, { "epoch": 4.260768335273574, "grad_norm": 0.3293308615684509, "learning_rate": 9.859004679365747e-05, "loss": -0.1559, "step": 7320 }, { "epoch": 4.266589057043073, "grad_norm": 0.2760055661201477, "learning_rate": 9.858354120139108e-05, "loss": -0.1522, "step": 7330 }, { "epoch": 4.272409778812573, "grad_norm": 0.28954339027404785, "learning_rate": 9.857702085072764e-05, "loss": -0.1478, "step": 7340 }, { "epoch": 4.278230500582072, "grad_norm": 0.4102955460548401, "learning_rate": 9.857048574364787e-05, "loss": -0.1508, "step": 7350 }, { "epoch": 4.284051222351572, "grad_norm": 0.26610130071640015, "learning_rate": 9.856393588213698e-05, "loss": -0.1465, "step": 7360 }, { "epoch": 4.289871944121071, "grad_norm": 0.28908342123031616, "learning_rate": 9.855737126818458e-05, "loss": -0.1458, "step": 7370 }, { "epoch": 4.29569266589057, "grad_norm": 0.25044742226600647, "learning_rate": 9.855079190378491e-05, "loss": -0.1487, "step": 7380 }, { "epoch": 4.30151338766007, "grad_norm": 0.2585987150669098, "learning_rate": 9.854419779093655e-05, "loss": -0.1539, "step": 7390 }, { "epoch": 4.307334109429569, "grad_norm": 0.32537588477134705, "learning_rate": 9.853758893164264e-05, "loss": -0.1533, "step": 7400 }, { "epoch": 4.313154831199069, "grad_norm": 0.35842108726501465, "learning_rate": 9.853096532791078e-05, "loss": -0.1545, "step": 7410 }, { "epoch": 4.318975552968568, "grad_norm": 0.3426719903945923, "learning_rate": 9.852432698175304e-05, "loss": -0.1456, "step": 7420 }, { "epoch": 4.324796274738068, "grad_norm": 0.34040695428848267, "learning_rate": 9.851767389518597e-05, "loss": -0.1451, "step": 7430 }, { "epoch": 4.330616996507567, "grad_norm": 0.3119383156299591, "learning_rate": 9.85110060702306e-05, "loss": -0.1504, "step": 7440 }, { "epoch": 4.336437718277066, "grad_norm": 0.2949092388153076, "learning_rate": 9.850432350891245e-05, "loss": -0.1514, "step": 7450 }, { "epoch": 4.342258440046566, "grad_norm": 0.33593299984931946, "learning_rate": 9.84976262132615e-05, "loss": -0.1554, "step": 7460 }, { "epoch": 4.348079161816065, "grad_norm": 0.32844409346580505, "learning_rate": 9.849091418531222e-05, "loss": -0.1428, "step": 7470 }, { "epoch": 4.353899883585565, "grad_norm": 0.32900679111480713, "learning_rate": 9.848418742710353e-05, "loss": -0.1439, "step": 7480 }, { "epoch": 4.359720605355064, "grad_norm": 0.28914201259613037, "learning_rate": 9.847744594067885e-05, "loss": -0.1523, "step": 7490 }, { "epoch": 4.365541327124563, "grad_norm": 0.37706607580184937, "learning_rate": 9.847068972808607e-05, "loss": -0.1487, "step": 7500 }, { "epoch": 4.3713620488940625, "grad_norm": 0.3789500296115875, "learning_rate": 9.846391879137756e-05, "loss": -0.1582, "step": 7510 }, { "epoch": 4.377182770663563, "grad_norm": 0.3532675802707672, "learning_rate": 9.845713313261012e-05, "loss": -0.1485, "step": 7520 }, { "epoch": 4.383003492433062, "grad_norm": 0.4628465175628662, "learning_rate": 9.845033275384505e-05, "loss": -0.1364, "step": 7530 }, { "epoch": 4.388824214202561, "grad_norm": 0.2525712847709656, "learning_rate": 9.844351765714818e-05, "loss": -0.148, "step": 7540 }, { "epoch": 4.39464493597206, "grad_norm": 0.23097804188728333, "learning_rate": 9.843668784458971e-05, "loss": -0.1424, "step": 7550 }, { "epoch": 4.4004656577415595, "grad_norm": 0.40140584111213684, "learning_rate": 9.842984331824437e-05, "loss": -0.1523, "step": 7560 }, { "epoch": 4.40628637951106, "grad_norm": 0.23307892680168152, "learning_rate": 9.842298408019133e-05, "loss": -0.1591, "step": 7570 }, { "epoch": 4.412107101280559, "grad_norm": 0.29149144887924194, "learning_rate": 9.841611013251429e-05, "loss": -0.1458, "step": 7580 }, { "epoch": 4.417927823050058, "grad_norm": 0.2710326313972473, "learning_rate": 9.840922147730133e-05, "loss": -0.1492, "step": 7590 }, { "epoch": 4.423748544819557, "grad_norm": 0.32385769486427307, "learning_rate": 9.840231811664506e-05, "loss": -0.1473, "step": 7600 }, { "epoch": 4.429569266589057, "grad_norm": 0.4243689477443695, "learning_rate": 9.839540005264252e-05, "loss": -0.1484, "step": 7610 }, { "epoch": 4.435389988358557, "grad_norm": 0.34669336676597595, "learning_rate": 9.838846728739527e-05, "loss": -0.1571, "step": 7620 }, { "epoch": 4.441210710128056, "grad_norm": 0.22319762408733368, "learning_rate": 9.838151982300927e-05, "loss": -0.1435, "step": 7630 }, { "epoch": 4.447031431897555, "grad_norm": 0.32253795862197876, "learning_rate": 9.8374557661595e-05, "loss": -0.1502, "step": 7640 }, { "epoch": 4.452852153667055, "grad_norm": 0.2753729522228241, "learning_rate": 9.836758080526735e-05, "loss": -0.1486, "step": 7650 }, { "epoch": 4.458672875436554, "grad_norm": 0.27889275550842285, "learning_rate": 9.836058925614575e-05, "loss": -0.1522, "step": 7660 }, { "epoch": 4.4644935972060535, "grad_norm": 0.19378578662872314, "learning_rate": 9.8353583016354e-05, "loss": -0.1536, "step": 7670 }, { "epoch": 4.470314318975553, "grad_norm": 0.2738265097141266, "learning_rate": 9.834656208802044e-05, "loss": -0.1502, "step": 7680 }, { "epoch": 4.476135040745052, "grad_norm": 0.2990783154964447, "learning_rate": 9.833952647327784e-05, "loss": -0.1512, "step": 7690 }, { "epoch": 4.481955762514552, "grad_norm": 0.2699309289455414, "learning_rate": 9.833247617426342e-05, "loss": -0.1561, "step": 7700 }, { "epoch": 4.487776484284051, "grad_norm": 0.3020724058151245, "learning_rate": 9.832541119311889e-05, "loss": -0.1549, "step": 7710 }, { "epoch": 4.4935972060535505, "grad_norm": 0.28118541836738586, "learning_rate": 9.83183315319904e-05, "loss": -0.1419, "step": 7720 }, { "epoch": 4.49941792782305, "grad_norm": 0.3782808482646942, "learning_rate": 9.831123719302855e-05, "loss": -0.1474, "step": 7730 }, { "epoch": 4.50523864959255, "grad_norm": 0.3349355161190033, "learning_rate": 9.830412817838842e-05, "loss": -0.1511, "step": 7740 }, { "epoch": 4.511059371362049, "grad_norm": 0.2886602580547333, "learning_rate": 9.829700449022956e-05, "loss": -0.1476, "step": 7750 }, { "epoch": 4.516880093131548, "grad_norm": 0.24243706464767456, "learning_rate": 9.828986613071593e-05, "loss": -0.1533, "step": 7760 }, { "epoch": 4.5227008149010475, "grad_norm": 0.41285791993141174, "learning_rate": 9.828271310201601e-05, "loss": -0.1548, "step": 7770 }, { "epoch": 4.528521536670548, "grad_norm": 0.3204813599586487, "learning_rate": 9.827554540630268e-05, "loss": -0.1565, "step": 7780 }, { "epoch": 4.534342258440047, "grad_norm": 0.2527124285697937, "learning_rate": 9.826836304575329e-05, "loss": -0.1426, "step": 7790 }, { "epoch": 4.540162980209546, "grad_norm": 0.32220250368118286, "learning_rate": 9.826116602254966e-05, "loss": -0.1561, "step": 7800 }, { "epoch": 4.545983701979045, "grad_norm": 0.36405688524246216, "learning_rate": 9.825395433887805e-05, "loss": -0.1509, "step": 7810 }, { "epoch": 4.551804423748544, "grad_norm": 0.26998913288116455, "learning_rate": 9.824672799692917e-05, "loss": -0.1536, "step": 7820 }, { "epoch": 4.5576251455180445, "grad_norm": 0.3628356158733368, "learning_rate": 9.823948699889823e-05, "loss": -0.145, "step": 7830 }, { "epoch": 4.563445867287544, "grad_norm": 0.31674906611442566, "learning_rate": 9.823223134698483e-05, "loss": -0.154, "step": 7840 }, { "epoch": 4.569266589057043, "grad_norm": 0.19394193589687347, "learning_rate": 9.822496104339303e-05, "loss": -0.1519, "step": 7850 }, { "epoch": 4.575087310826542, "grad_norm": 0.25188928842544556, "learning_rate": 9.821767609033138e-05, "loss": -0.1536, "step": 7860 }, { "epoch": 4.580908032596042, "grad_norm": 0.23831060528755188, "learning_rate": 9.821037649001284e-05, "loss": -0.1497, "step": 7870 }, { "epoch": 4.5867287543655415, "grad_norm": 0.3388115465641022, "learning_rate": 9.820306224465486e-05, "loss": -0.1511, "step": 7880 }, { "epoch": 4.592549476135041, "grad_norm": 0.25026726722717285, "learning_rate": 9.819573335647928e-05, "loss": -0.1518, "step": 7890 }, { "epoch": 4.59837019790454, "grad_norm": 0.4085249602794647, "learning_rate": 9.818838982771246e-05, "loss": -0.1449, "step": 7900 }, { "epoch": 4.604190919674039, "grad_norm": 0.35622483491897583, "learning_rate": 9.818103166058514e-05, "loss": -0.1537, "step": 7910 }, { "epoch": 4.610011641443539, "grad_norm": 0.33362165093421936, "learning_rate": 9.817365885733254e-05, "loss": -0.156, "step": 7920 }, { "epoch": 4.6158323632130385, "grad_norm": 0.2711986005306244, "learning_rate": 9.816627142019434e-05, "loss": -0.1526, "step": 7930 }, { "epoch": 4.621653084982538, "grad_norm": 0.19333447515964508, "learning_rate": 9.815886935141463e-05, "loss": -0.1432, "step": 7940 }, { "epoch": 4.627473806752037, "grad_norm": 0.251564621925354, "learning_rate": 9.8151452653242e-05, "loss": -0.1509, "step": 7950 }, { "epoch": 4.633294528521537, "grad_norm": 0.2750026285648346, "learning_rate": 9.814402132792939e-05, "loss": -0.1505, "step": 7960 }, { "epoch": 4.639115250291036, "grad_norm": 0.27309733629226685, "learning_rate": 9.813657537773428e-05, "loss": -0.1508, "step": 7970 }, { "epoch": 4.644935972060535, "grad_norm": 0.3028605878353119, "learning_rate": 9.812911480491854e-05, "loss": -0.154, "step": 7980 }, { "epoch": 4.650756693830035, "grad_norm": 0.3167237937450409, "learning_rate": 9.81216396117485e-05, "loss": -0.147, "step": 7990 }, { "epoch": 4.656577415599534, "grad_norm": 0.39655986428260803, "learning_rate": 9.811414980049491e-05, "loss": -0.1494, "step": 8000 }, { "epoch": 4.662398137369034, "grad_norm": 0.23724772036075592, "learning_rate": 9.810664537343301e-05, "loss": -0.155, "step": 8010 }, { "epoch": 4.668218859138533, "grad_norm": 0.26841971278190613, "learning_rate": 9.809912633284243e-05, "loss": -0.155, "step": 8020 }, { "epoch": 4.674039580908032, "grad_norm": 0.34861624240875244, "learning_rate": 9.809159268100725e-05, "loss": -0.1527, "step": 8030 }, { "epoch": 4.6798603026775325, "grad_norm": 0.24782533943653107, "learning_rate": 9.808404442021599e-05, "loss": -0.1532, "step": 8040 }, { "epoch": 4.685681024447032, "grad_norm": 0.3738064169883728, "learning_rate": 9.807648155276163e-05, "loss": -0.1481, "step": 8050 }, { "epoch": 4.691501746216531, "grad_norm": 0.30061355233192444, "learning_rate": 9.806890408094156e-05, "loss": -0.1481, "step": 8060 }, { "epoch": 4.69732246798603, "grad_norm": 0.33743637800216675, "learning_rate": 9.806131200705761e-05, "loss": -0.1563, "step": 8070 }, { "epoch": 4.703143189755529, "grad_norm": 0.3184691071510315, "learning_rate": 9.805370533341605e-05, "loss": -0.1483, "step": 8080 }, { "epoch": 4.7089639115250295, "grad_norm": 0.3935103714466095, "learning_rate": 9.804608406232762e-05, "loss": -0.1568, "step": 8090 }, { "epoch": 4.714784633294529, "grad_norm": 0.4667988121509552, "learning_rate": 9.803844819610741e-05, "loss": -0.1504, "step": 8100 }, { "epoch": 4.720605355064028, "grad_norm": 0.24304728209972382, "learning_rate": 9.803079773707504e-05, "loss": -0.1475, "step": 8110 }, { "epoch": 4.726426076833527, "grad_norm": 0.2707105278968811, "learning_rate": 9.802313268755447e-05, "loss": -0.143, "step": 8120 }, { "epoch": 4.732246798603027, "grad_norm": 0.27235883474349976, "learning_rate": 9.801545304987419e-05, "loss": -0.1508, "step": 8130 }, { "epoch": 4.738067520372526, "grad_norm": 0.2882414162158966, "learning_rate": 9.800775882636704e-05, "loss": -0.1534, "step": 8140 }, { "epoch": 4.743888242142026, "grad_norm": 0.2991355061531067, "learning_rate": 9.800005001937034e-05, "loss": -0.1607, "step": 8150 }, { "epoch": 4.749708963911525, "grad_norm": 0.3258233964443207, "learning_rate": 9.79923266312258e-05, "loss": -0.1539, "step": 8160 }, { "epoch": 4.755529685681024, "grad_norm": 0.29975956678390503, "learning_rate": 9.79845886642796e-05, "loss": -0.1511, "step": 8170 }, { "epoch": 4.761350407450524, "grad_norm": 0.5007794499397278, "learning_rate": 9.797683612088233e-05, "loss": -0.1519, "step": 8180 }, { "epoch": 4.767171129220023, "grad_norm": 0.29705318808555603, "learning_rate": 9.796906900338898e-05, "loss": -0.1487, "step": 8190 }, { "epoch": 4.772991850989523, "grad_norm": 0.44977080821990967, "learning_rate": 9.796128731415903e-05, "loss": -0.1469, "step": 8200 }, { "epoch": 4.778812572759022, "grad_norm": 0.3397991955280304, "learning_rate": 9.795349105555634e-05, "loss": -0.1515, "step": 8210 }, { "epoch": 4.784633294528522, "grad_norm": 0.40080246329307556, "learning_rate": 9.794568022994922e-05, "loss": -0.1503, "step": 8220 }, { "epoch": 4.790454016298021, "grad_norm": 0.311405748128891, "learning_rate": 9.793785483971034e-05, "loss": -0.1536, "step": 8230 }, { "epoch": 4.79627473806752, "grad_norm": 0.31072404980659485, "learning_rate": 9.793001488721691e-05, "loss": -0.1546, "step": 8240 }, { "epoch": 4.80209545983702, "grad_norm": 0.2548303008079529, "learning_rate": 9.792216037485047e-05, "loss": -0.1543, "step": 8250 }, { "epoch": 4.807916181606519, "grad_norm": 0.3671762943267822, "learning_rate": 9.791429130499704e-05, "loss": -0.1502, "step": 8260 }, { "epoch": 4.813736903376019, "grad_norm": 0.2357182502746582, "learning_rate": 9.790640768004698e-05, "loss": -0.1531, "step": 8270 }, { "epoch": 4.819557625145518, "grad_norm": 0.3528524339199066, "learning_rate": 9.789850950239518e-05, "loss": -0.1487, "step": 8280 }, { "epoch": 4.825378346915017, "grad_norm": 0.32980817556381226, "learning_rate": 9.789059677444089e-05, "loss": -0.1509, "step": 8290 }, { "epoch": 4.8311990686845165, "grad_norm": 0.275401771068573, "learning_rate": 9.788266949858776e-05, "loss": -0.1566, "step": 8300 }, { "epoch": 4.837019790454017, "grad_norm": 0.3624584674835205, "learning_rate": 9.787472767724392e-05, "loss": -0.1625, "step": 8310 }, { "epoch": 4.842840512223516, "grad_norm": 0.32502463459968567, "learning_rate": 9.786677131282185e-05, "loss": -0.1539, "step": 8320 }, { "epoch": 4.848661233993015, "grad_norm": 0.32440632581710815, "learning_rate": 9.785880040773853e-05, "loss": -0.1541, "step": 8330 }, { "epoch": 4.854481955762514, "grad_norm": 0.4825587272644043, "learning_rate": 9.785081496441527e-05, "loss": -0.1449, "step": 8340 }, { "epoch": 4.8603026775320135, "grad_norm": 0.3383822441101074, "learning_rate": 9.784281498527785e-05, "loss": -0.1519, "step": 8350 }, { "epoch": 4.866123399301514, "grad_norm": 0.3336241841316223, "learning_rate": 9.783480047275646e-05, "loss": -0.1429, "step": 8360 }, { "epoch": 4.871944121071013, "grad_norm": 0.2646419405937195, "learning_rate": 9.78267714292857e-05, "loss": -0.1475, "step": 8370 }, { "epoch": 4.877764842840512, "grad_norm": 0.36168792843818665, "learning_rate": 9.781872785730454e-05, "loss": -0.1465, "step": 8380 }, { "epoch": 4.883585564610011, "grad_norm": 0.3291957378387451, "learning_rate": 9.781066975925646e-05, "loss": -0.1515, "step": 8390 }, { "epoch": 4.889406286379511, "grad_norm": 0.3155612647533417, "learning_rate": 9.780259713758928e-05, "loss": -0.152, "step": 8400 }, { "epoch": 4.895227008149011, "grad_norm": 0.30874937772750854, "learning_rate": 9.779450999475524e-05, "loss": -0.1531, "step": 8410 }, { "epoch": 4.90104772991851, "grad_norm": 0.3245904743671417, "learning_rate": 9.7786408333211e-05, "loss": -0.1551, "step": 8420 }, { "epoch": 4.906868451688009, "grad_norm": 0.3815837800502777, "learning_rate": 9.777829215541764e-05, "loss": -0.1491, "step": 8430 }, { "epoch": 4.912689173457509, "grad_norm": 0.2523835599422455, "learning_rate": 9.777016146384064e-05, "loss": -0.1561, "step": 8440 }, { "epoch": 4.918509895227008, "grad_norm": 0.31139668822288513, "learning_rate": 9.776201626094988e-05, "loss": -0.1558, "step": 8450 }, { "epoch": 4.9243306169965075, "grad_norm": 0.3521738350391388, "learning_rate": 9.775385654921965e-05, "loss": -0.156, "step": 8460 }, { "epoch": 4.930151338766007, "grad_norm": 0.29077765345573425, "learning_rate": 9.774568233112868e-05, "loss": -0.1521, "step": 8470 }, { "epoch": 4.935972060535507, "grad_norm": 0.31870484352111816, "learning_rate": 9.773749360916007e-05, "loss": -0.147, "step": 8480 }, { "epoch": 4.941792782305006, "grad_norm": 0.3000575304031372, "learning_rate": 9.772929038580134e-05, "loss": -0.153, "step": 8490 }, { "epoch": 4.947613504074505, "grad_norm": 0.45119190216064453, "learning_rate": 9.772107266354439e-05, "loss": -0.1453, "step": 8500 }, { "epoch": 4.9534342258440045, "grad_norm": 0.24238018691539764, "learning_rate": 9.77128404448856e-05, "loss": -0.1423, "step": 8510 }, { "epoch": 4.959254947613504, "grad_norm": 0.3712165355682373, "learning_rate": 9.770459373232565e-05, "loss": -0.1577, "step": 8520 }, { "epoch": 4.965075669383004, "grad_norm": 0.3338068127632141, "learning_rate": 9.769633252836969e-05, "loss": -0.1464, "step": 8530 }, { "epoch": 4.970896391152503, "grad_norm": 0.21626228094100952, "learning_rate": 9.768805683552724e-05, "loss": -0.1561, "step": 8540 }, { "epoch": 4.976717112922002, "grad_norm": 0.18793481588363647, "learning_rate": 9.767976665631228e-05, "loss": -0.1555, "step": 8550 }, { "epoch": 4.9825378346915015, "grad_norm": 0.3204384446144104, "learning_rate": 9.767146199324311e-05, "loss": -0.1543, "step": 8560 }, { "epoch": 4.988358556461002, "grad_norm": 0.2013837993144989, "learning_rate": 9.766314284884249e-05, "loss": -0.1614, "step": 8570 }, { "epoch": 4.994179278230501, "grad_norm": 0.33319327235221863, "learning_rate": 9.765480922563752e-05, "loss": -0.15, "step": 8580 }, { "epoch": 5.0, "grad_norm": 0.33664414286613464, "learning_rate": 9.764646112615978e-05, "loss": -0.155, "step": 8590 }, { "epoch": 5.005820721769499, "grad_norm": 0.2675120234489441, "learning_rate": 9.763809855294517e-05, "loss": -0.1533, "step": 8600 }, { "epoch": 5.011641443538998, "grad_norm": 0.2938055992126465, "learning_rate": 9.762972150853404e-05, "loss": -0.1558, "step": 8610 }, { "epoch": 5.0174621653084985, "grad_norm": 0.4498685896396637, "learning_rate": 9.762132999547111e-05, "loss": -0.1555, "step": 8620 }, { "epoch": 5.023282887077998, "grad_norm": 0.22320082783699036, "learning_rate": 9.761292401630549e-05, "loss": -0.1525, "step": 8630 }, { "epoch": 5.029103608847497, "grad_norm": 0.18247075378894806, "learning_rate": 9.76045035735907e-05, "loss": -0.1582, "step": 8640 }, { "epoch": 5.034924330616996, "grad_norm": 0.25329822301864624, "learning_rate": 9.759606866988464e-05, "loss": -0.1552, "step": 8650 }, { "epoch": 5.040745052386496, "grad_norm": 0.24867337942123413, "learning_rate": 9.758761930774963e-05, "loss": -0.1592, "step": 8660 }, { "epoch": 5.0465657741559955, "grad_norm": 0.3323189914226532, "learning_rate": 9.757915548975235e-05, "loss": -0.155, "step": 8670 }, { "epoch": 5.052386495925495, "grad_norm": 0.33766961097717285, "learning_rate": 9.757067721846389e-05, "loss": -0.1538, "step": 8680 }, { "epoch": 5.058207217694994, "grad_norm": 0.2869926393032074, "learning_rate": 9.756218449645971e-05, "loss": -0.1543, "step": 8690 }, { "epoch": 5.064027939464494, "grad_norm": 0.27604812383651733, "learning_rate": 9.75536773263197e-05, "loss": -0.1539, "step": 8700 }, { "epoch": 5.069848661233993, "grad_norm": 0.2515522837638855, "learning_rate": 9.75451557106281e-05, "loss": -0.16, "step": 8710 }, { "epoch": 5.0756693830034925, "grad_norm": 0.29832732677459717, "learning_rate": 9.753661965197354e-05, "loss": -0.1548, "step": 8720 }, { "epoch": 5.081490104772992, "grad_norm": 0.39300742745399475, "learning_rate": 9.752806915294908e-05, "loss": -0.151, "step": 8730 }, { "epoch": 5.087310826542491, "grad_norm": 0.345580518245697, "learning_rate": 9.75195042161521e-05, "loss": -0.154, "step": 8740 }, { "epoch": 5.093131548311991, "grad_norm": 0.36918380856513977, "learning_rate": 9.751092484418442e-05, "loss": -0.1569, "step": 8750 }, { "epoch": 5.09895227008149, "grad_norm": 0.2837171256542206, "learning_rate": 9.750233103965224e-05, "loss": -0.1545, "step": 8760 }, { "epoch": 5.104772991850989, "grad_norm": 0.3429955840110779, "learning_rate": 9.749372280516611e-05, "loss": -0.1523, "step": 8770 }, { "epoch": 5.110593713620489, "grad_norm": 0.3225143551826477, "learning_rate": 9.748510014334097e-05, "loss": -0.1489, "step": 8780 }, { "epoch": 5.116414435389989, "grad_norm": 0.24081586301326752, "learning_rate": 9.747646305679621e-05, "loss": -0.1459, "step": 8790 }, { "epoch": 5.122235157159488, "grad_norm": 0.37302473187446594, "learning_rate": 9.74678115481555e-05, "loss": -0.1457, "step": 8800 }, { "epoch": 5.128055878928987, "grad_norm": 0.19980613887310028, "learning_rate": 9.745914562004696e-05, "loss": -0.151, "step": 8810 }, { "epoch": 5.133876600698486, "grad_norm": 0.3739809989929199, "learning_rate": 9.745046527510307e-05, "loss": -0.1576, "step": 8820 }, { "epoch": 5.139697322467986, "grad_norm": 0.4415774643421173, "learning_rate": 9.744177051596068e-05, "loss": -0.1495, "step": 8830 }, { "epoch": 5.145518044237486, "grad_norm": 0.3257841169834137, "learning_rate": 9.743306134526105e-05, "loss": -0.152, "step": 8840 }, { "epoch": 5.151338766006985, "grad_norm": 0.25513002276420593, "learning_rate": 9.742433776564977e-05, "loss": -0.1503, "step": 8850 }, { "epoch": 5.157159487776484, "grad_norm": 0.27174490690231323, "learning_rate": 9.741559977977683e-05, "loss": -0.1518, "step": 8860 }, { "epoch": 5.162980209545983, "grad_norm": 0.3639602065086365, "learning_rate": 9.740684739029661e-05, "loss": -0.1519, "step": 8870 }, { "epoch": 5.1688009313154835, "grad_norm": 0.3613210916519165, "learning_rate": 9.739808059986789e-05, "loss": -0.1439, "step": 8880 }, { "epoch": 5.174621653084983, "grad_norm": 0.3571071922779083, "learning_rate": 9.738929941115373e-05, "loss": -0.1478, "step": 8890 }, { "epoch": 5.180442374854482, "grad_norm": 0.3334673345088959, "learning_rate": 9.738050382682167e-05, "loss": -0.1593, "step": 8900 }, { "epoch": 5.186263096623981, "grad_norm": 0.3697063624858856, "learning_rate": 9.737169384954355e-05, "loss": -0.1533, "step": 8910 }, { "epoch": 5.192083818393481, "grad_norm": 0.2270103543996811, "learning_rate": 9.736286948199562e-05, "loss": -0.1547, "step": 8920 }, { "epoch": 5.19790454016298, "grad_norm": 0.29483509063720703, "learning_rate": 9.735403072685848e-05, "loss": -0.1534, "step": 8930 }, { "epoch": 5.20372526193248, "grad_norm": 0.32310235500335693, "learning_rate": 9.734517758681712e-05, "loss": -0.1517, "step": 8940 }, { "epoch": 5.209545983701979, "grad_norm": 0.25307413935661316, "learning_rate": 9.733631006456088e-05, "loss": -0.1524, "step": 8950 }, { "epoch": 5.215366705471478, "grad_norm": 0.3116288483142853, "learning_rate": 9.732742816278348e-05, "loss": -0.1515, "step": 8960 }, { "epoch": 5.221187427240978, "grad_norm": 0.26965558528900146, "learning_rate": 9.731853188418302e-05, "loss": -0.1575, "step": 8970 }, { "epoch": 5.227008149010477, "grad_norm": 0.2747616469860077, "learning_rate": 9.730962123146194e-05, "loss": -0.1481, "step": 8980 }, { "epoch": 5.232828870779977, "grad_norm": 0.30774641036987305, "learning_rate": 9.730069620732709e-05, "loss": -0.1511, "step": 8990 }, { "epoch": 5.238649592549476, "grad_norm": 0.29295024275779724, "learning_rate": 9.72917568144896e-05, "loss": -0.1543, "step": 9000 }, { "epoch": 5.244470314318976, "grad_norm": 0.24930691719055176, "learning_rate": 9.728280305566509e-05, "loss": -0.1514, "step": 9010 }, { "epoch": 5.250291036088475, "grad_norm": 0.2748640179634094, "learning_rate": 9.727383493357343e-05, "loss": -0.1509, "step": 9020 }, { "epoch": 5.256111757857974, "grad_norm": 0.33339351415634155, "learning_rate": 9.726485245093891e-05, "loss": -0.1563, "step": 9030 }, { "epoch": 5.261932479627474, "grad_norm": 0.22069209814071655, "learning_rate": 9.725585561049018e-05, "loss": -0.1568, "step": 9040 }, { "epoch": 5.267753201396973, "grad_norm": 0.31465575098991394, "learning_rate": 9.724684441496022e-05, "loss": -0.1506, "step": 9050 }, { "epoch": 5.273573923166473, "grad_norm": 0.3323848247528076, "learning_rate": 9.72378188670864e-05, "loss": -0.1568, "step": 9060 }, { "epoch": 5.279394644935972, "grad_norm": 0.27225157618522644, "learning_rate": 9.722877896961047e-05, "loss": -0.156, "step": 9070 }, { "epoch": 5.285215366705471, "grad_norm": 0.24934889376163483, "learning_rate": 9.721972472527848e-05, "loss": -0.1559, "step": 9080 }, { "epoch": 5.2910360884749705, "grad_norm": 0.3127851188182831, "learning_rate": 9.721065613684089e-05, "loss": -0.1583, "step": 9090 }, { "epoch": 5.296856810244471, "grad_norm": 0.1950501650571823, "learning_rate": 9.72015732070525e-05, "loss": -0.1562, "step": 9100 }, { "epoch": 5.30267753201397, "grad_norm": 0.3242107927799225, "learning_rate": 9.719247593867244e-05, "loss": -0.1585, "step": 9110 }, { "epoch": 5.308498253783469, "grad_norm": 0.31914880871772766, "learning_rate": 9.718336433446423e-05, "loss": -0.1586, "step": 9120 }, { "epoch": 5.314318975552968, "grad_norm": 0.30802714824676514, "learning_rate": 9.717423839719574e-05, "loss": -0.1552, "step": 9130 }, { "epoch": 5.320139697322468, "grad_norm": 0.391448438167572, "learning_rate": 9.71650981296392e-05, "loss": -0.1543, "step": 9140 }, { "epoch": 5.325960419091968, "grad_norm": 0.3320644795894623, "learning_rate": 9.715594353457118e-05, "loss": -0.1572, "step": 9150 }, { "epoch": 5.331781140861467, "grad_norm": 0.2820534110069275, "learning_rate": 9.714677461477257e-05, "loss": -0.1517, "step": 9160 }, { "epoch": 5.337601862630966, "grad_norm": 0.3542715907096863, "learning_rate": 9.713759137302869e-05, "loss": -0.1551, "step": 9170 }, { "epoch": 5.343422584400465, "grad_norm": 0.3107186555862427, "learning_rate": 9.712839381212914e-05, "loss": -0.1597, "step": 9180 }, { "epoch": 5.349243306169965, "grad_norm": 0.3578527271747589, "learning_rate": 9.71191819348679e-05, "loss": -0.1575, "step": 9190 }, { "epoch": 5.355064027939465, "grad_norm": 0.3328803777694702, "learning_rate": 9.710995574404331e-05, "loss": -0.1532, "step": 9200 }, { "epoch": 5.360884749708964, "grad_norm": 0.3094511330127716, "learning_rate": 9.710071524245802e-05, "loss": -0.1542, "step": 9210 }, { "epoch": 5.366705471478463, "grad_norm": 0.273294597864151, "learning_rate": 9.709146043291906e-05, "loss": -0.162, "step": 9220 }, { "epoch": 5.372526193247963, "grad_norm": 0.3182125687599182, "learning_rate": 9.70821913182378e-05, "loss": -0.1551, "step": 9230 }, { "epoch": 5.378346915017462, "grad_norm": 0.25263407826423645, "learning_rate": 9.707290790122995e-05, "loss": -0.1547, "step": 9240 }, { "epoch": 5.3841676367869615, "grad_norm": 0.1568390280008316, "learning_rate": 9.706361018471557e-05, "loss": -0.1549, "step": 9250 }, { "epoch": 5.389988358556461, "grad_norm": 0.3486315906047821, "learning_rate": 9.705429817151906e-05, "loss": -0.1536, "step": 9260 }, { "epoch": 5.395809080325961, "grad_norm": 0.35763487219810486, "learning_rate": 9.704497186446917e-05, "loss": -0.1568, "step": 9270 }, { "epoch": 5.40162980209546, "grad_norm": 0.2386152148246765, "learning_rate": 9.703563126639896e-05, "loss": -0.1516, "step": 9280 }, { "epoch": 5.407450523864959, "grad_norm": 0.274522602558136, "learning_rate": 9.70262763801459e-05, "loss": -0.1553, "step": 9290 }, { "epoch": 5.4132712456344585, "grad_norm": 0.339236855506897, "learning_rate": 9.701690720855171e-05, "loss": -0.1518, "step": 9300 }, { "epoch": 5.419091967403958, "grad_norm": 0.2733536958694458, "learning_rate": 9.700752375446253e-05, "loss": -0.1551, "step": 9310 }, { "epoch": 5.424912689173458, "grad_norm": 0.3016408681869507, "learning_rate": 9.69981260207288e-05, "loss": -0.145, "step": 9320 }, { "epoch": 5.430733410942957, "grad_norm": 0.26135554909706116, "learning_rate": 9.698871401020529e-05, "loss": -0.1544, "step": 9330 }, { "epoch": 5.436554132712456, "grad_norm": 0.34924402832984924, "learning_rate": 9.697928772575112e-05, "loss": -0.1548, "step": 9340 }, { "epoch": 5.4423748544819555, "grad_norm": 0.3130134046077728, "learning_rate": 9.696984717022976e-05, "loss": -0.1597, "step": 9350 }, { "epoch": 5.448195576251456, "grad_norm": 0.22237153351306915, "learning_rate": 9.6960392346509e-05, "loss": -0.1602, "step": 9360 }, { "epoch": 5.454016298020955, "grad_norm": 0.219522625207901, "learning_rate": 9.695092325746097e-05, "loss": -0.1469, "step": 9370 }, { "epoch": 5.459837019790454, "grad_norm": 0.3040454685688019, "learning_rate": 9.694143990596211e-05, "loss": -0.1598, "step": 9380 }, { "epoch": 5.465657741559953, "grad_norm": 0.3655194342136383, "learning_rate": 9.693194229489325e-05, "loss": -0.1445, "step": 9390 }, { "epoch": 5.471478463329452, "grad_norm": 0.39664527773857117, "learning_rate": 9.692243042713944e-05, "loss": -0.1565, "step": 9400 }, { "epoch": 5.4772991850989525, "grad_norm": 0.25020831823349, "learning_rate": 9.691290430559022e-05, "loss": -0.155, "step": 9410 }, { "epoch": 5.483119906868452, "grad_norm": 0.27817150950431824, "learning_rate": 9.690336393313932e-05, "loss": -0.1559, "step": 9420 }, { "epoch": 5.488940628637951, "grad_norm": 0.27201151847839355, "learning_rate": 9.689380931268487e-05, "loss": -0.1514, "step": 9430 }, { "epoch": 5.49476135040745, "grad_norm": 0.38594740629196167, "learning_rate": 9.688424044712932e-05, "loss": -0.15, "step": 9440 }, { "epoch": 5.50058207217695, "grad_norm": 0.30251988768577576, "learning_rate": 9.687465733937942e-05, "loss": -0.1489, "step": 9450 }, { "epoch": 5.5064027939464495, "grad_norm": 0.3709050416946411, "learning_rate": 9.686505999234627e-05, "loss": -0.157, "step": 9460 }, { "epoch": 5.512223515715949, "grad_norm": 0.278825044631958, "learning_rate": 9.685544840894529e-05, "loss": -0.1548, "step": 9470 }, { "epoch": 5.518044237485448, "grad_norm": 0.3543613851070404, "learning_rate": 9.684582259209624e-05, "loss": -0.1555, "step": 9480 }, { "epoch": 5.523864959254947, "grad_norm": 0.38519805669784546, "learning_rate": 9.683618254472317e-05, "loss": -0.1532, "step": 9490 }, { "epoch": 5.529685681024447, "grad_norm": 0.31696486473083496, "learning_rate": 9.682652826975449e-05, "loss": -0.1568, "step": 9500 }, { "epoch": 5.5355064027939465, "grad_norm": 0.18442314863204956, "learning_rate": 9.681685977012291e-05, "loss": -0.157, "step": 9510 }, { "epoch": 5.541327124563446, "grad_norm": 0.35947465896606445, "learning_rate": 9.680717704876546e-05, "loss": -0.1561, "step": 9520 }, { "epoch": 5.547147846332946, "grad_norm": 0.300942599773407, "learning_rate": 9.679748010862349e-05, "loss": -0.1554, "step": 9530 }, { "epoch": 5.552968568102445, "grad_norm": 0.24002447724342346, "learning_rate": 9.678776895264267e-05, "loss": -0.157, "step": 9540 }, { "epoch": 5.558789289871944, "grad_norm": 0.2968622148036957, "learning_rate": 9.6778043583773e-05, "loss": -0.1492, "step": 9550 }, { "epoch": 5.564610011641443, "grad_norm": 0.2714739143848419, "learning_rate": 9.67683040049688e-05, "loss": -0.1591, "step": 9560 }, { "epoch": 5.570430733410943, "grad_norm": 0.31258857250213623, "learning_rate": 9.675855021918869e-05, "loss": -0.1549, "step": 9570 }, { "epoch": 5.576251455180443, "grad_norm": 0.3719431161880493, "learning_rate": 9.674878222939561e-05, "loss": -0.1609, "step": 9580 }, { "epoch": 5.582072176949942, "grad_norm": 0.26445719599723816, "learning_rate": 9.673900003855681e-05, "loss": -0.1643, "step": 9590 }, { "epoch": 5.587892898719441, "grad_norm": 0.3263351023197174, "learning_rate": 9.672920364964389e-05, "loss": -0.1507, "step": 9600 }, { "epoch": 5.59371362048894, "grad_norm": 0.2476455569267273, "learning_rate": 9.671939306563269e-05, "loss": -0.162, "step": 9610 }, { "epoch": 5.5995343422584405, "grad_norm": 0.26417937874794006, "learning_rate": 9.670956828950345e-05, "loss": -0.1533, "step": 9620 }, { "epoch": 5.60535506402794, "grad_norm": 0.28166985511779785, "learning_rate": 9.669972932424065e-05, "loss": -0.1596, "step": 9630 }, { "epoch": 5.611175785797439, "grad_norm": 0.26763197779655457, "learning_rate": 9.668987617283312e-05, "loss": -0.1625, "step": 9640 }, { "epoch": 5.616996507566938, "grad_norm": 0.31575462222099304, "learning_rate": 9.668000883827397e-05, "loss": -0.1518, "step": 9650 }, { "epoch": 5.622817229336437, "grad_norm": 0.4094064235687256, "learning_rate": 9.667012732356067e-05, "loss": -0.1518, "step": 9660 }, { "epoch": 5.6286379511059375, "grad_norm": 0.26892802119255066, "learning_rate": 9.666023163169493e-05, "loss": -0.1401, "step": 9670 }, { "epoch": 5.634458672875437, "grad_norm": 0.23076848685741425, "learning_rate": 9.665032176568281e-05, "loss": -0.1542, "step": 9680 }, { "epoch": 5.640279394644936, "grad_norm": 0.2780521512031555, "learning_rate": 9.664039772853469e-05, "loss": -0.1531, "step": 9690 }, { "epoch": 5.646100116414435, "grad_norm": 0.3055569529533386, "learning_rate": 9.663045952326518e-05, "loss": -0.1568, "step": 9700 }, { "epoch": 5.651920838183935, "grad_norm": 0.21599531173706055, "learning_rate": 9.662050715289328e-05, "loss": -0.1636, "step": 9710 }, { "epoch": 5.657741559953434, "grad_norm": 0.24351729452610016, "learning_rate": 9.661054062044226e-05, "loss": -0.1522, "step": 9720 }, { "epoch": 5.663562281722934, "grad_norm": 0.24646329879760742, "learning_rate": 9.660055992893968e-05, "loss": -0.164, "step": 9730 }, { "epoch": 5.669383003492433, "grad_norm": 0.23029863834381104, "learning_rate": 9.659056508141739e-05, "loss": -0.1557, "step": 9740 }, { "epoch": 5.675203725261932, "grad_norm": 0.3617748022079468, "learning_rate": 9.658055608091161e-05, "loss": -0.1525, "step": 9750 }, { "epoch": 5.681024447031432, "grad_norm": 0.35028377175331116, "learning_rate": 9.657053293046276e-05, "loss": -0.1521, "step": 9760 }, { "epoch": 5.686845168800931, "grad_norm": 0.30363914370536804, "learning_rate": 9.656049563311564e-05, "loss": -0.1592, "step": 9770 }, { "epoch": 5.692665890570431, "grad_norm": 0.28670600056648254, "learning_rate": 9.655044419191929e-05, "loss": -0.1515, "step": 9780 }, { "epoch": 5.69848661233993, "grad_norm": 0.2539467513561249, "learning_rate": 9.654037860992711e-05, "loss": -0.1593, "step": 9790 }, { "epoch": 5.70430733410943, "grad_norm": 0.2619589567184448, "learning_rate": 9.653029889019672e-05, "loss": -0.1543, "step": 9800 }, { "epoch": 5.710128055878929, "grad_norm": 0.30241379141807556, "learning_rate": 9.65202050357901e-05, "loss": -0.1573, "step": 9810 }, { "epoch": 5.715948777648428, "grad_norm": 0.3108255863189697, "learning_rate": 9.651009704977347e-05, "loss": -0.1577, "step": 9820 }, { "epoch": 5.721769499417928, "grad_norm": 0.2982947528362274, "learning_rate": 9.649997493521738e-05, "loss": -0.1564, "step": 9830 }, { "epoch": 5.727590221187427, "grad_norm": 0.3458094894886017, "learning_rate": 9.64898386951967e-05, "loss": -0.1597, "step": 9840 }, { "epoch": 5.733410942956927, "grad_norm": 0.37107980251312256, "learning_rate": 9.647968833279049e-05, "loss": -0.1551, "step": 9850 }, { "epoch": 5.739231664726426, "grad_norm": 0.35593098402023315, "learning_rate": 9.646952385108218e-05, "loss": -0.1523, "step": 9860 }, { "epoch": 5.745052386495925, "grad_norm": 0.29758724570274353, "learning_rate": 9.645934525315951e-05, "loss": -0.1612, "step": 9870 }, { "epoch": 5.7508731082654245, "grad_norm": 0.3756684958934784, "learning_rate": 9.644915254211442e-05, "loss": -0.1594, "step": 9880 }, { "epoch": 5.756693830034925, "grad_norm": 0.3210456967353821, "learning_rate": 9.643894572104321e-05, "loss": -0.1568, "step": 9890 }, { "epoch": 5.762514551804424, "grad_norm": 0.2918015122413635, "learning_rate": 9.642872479304644e-05, "loss": -0.1545, "step": 9900 }, { "epoch": 5.768335273573923, "grad_norm": 0.3740055561065674, "learning_rate": 9.641848976122895e-05, "loss": -0.1599, "step": 9910 }, { "epoch": 5.774155995343422, "grad_norm": 0.35329365730285645, "learning_rate": 9.64082406286999e-05, "loss": -0.1608, "step": 9920 }, { "epoch": 5.779976717112922, "grad_norm": 0.44872888922691345, "learning_rate": 9.639797739857269e-05, "loss": -0.148, "step": 9930 }, { "epoch": 5.785797438882422, "grad_norm": 0.3459102213382721, "learning_rate": 9.638770007396498e-05, "loss": -0.1557, "step": 9940 }, { "epoch": 5.791618160651921, "grad_norm": 0.29715695977211, "learning_rate": 9.63774086579988e-05, "loss": -0.1465, "step": 9950 }, { "epoch": 5.79743888242142, "grad_norm": 0.23445095121860504, "learning_rate": 9.63671031538004e-05, "loss": -0.1575, "step": 9960 }, { "epoch": 5.80325960419092, "grad_norm": 0.3494913578033447, "learning_rate": 9.635678356450031e-05, "loss": -0.1558, "step": 9970 }, { "epoch": 5.809080325960419, "grad_norm": 0.2504556179046631, "learning_rate": 9.634644989323336e-05, "loss": -0.154, "step": 9980 }, { "epoch": 5.814901047729919, "grad_norm": 0.42917054891586304, "learning_rate": 9.633610214313861e-05, "loss": -0.1557, "step": 9990 }, { "epoch": 5.820721769499418, "grad_norm": 0.2375221848487854, "learning_rate": 9.632574031735951e-05, "loss": -0.1587, "step": 10000 }, { "epoch": 5.826542491268917, "grad_norm": 0.2631641924381256, "learning_rate": 9.631536441904364e-05, "loss": -0.1578, "step": 10010 }, { "epoch": 5.832363213038417, "grad_norm": 0.18033595383167267, "learning_rate": 9.630497445134293e-05, "loss": -0.1537, "step": 10020 }, { "epoch": 5.838183934807916, "grad_norm": 0.18279464542865753, "learning_rate": 9.62945704174136e-05, "loss": -0.1609, "step": 10030 }, { "epoch": 5.8440046565774155, "grad_norm": 0.3528279662132263, "learning_rate": 9.628415232041612e-05, "loss": -0.1534, "step": 10040 }, { "epoch": 5.849825378346915, "grad_norm": 0.25980719923973083, "learning_rate": 9.627372016351524e-05, "loss": -0.1581, "step": 10050 }, { "epoch": 5.855646100116415, "grad_norm": 0.2693552076816559, "learning_rate": 9.626327394987995e-05, "loss": -0.1591, "step": 10060 }, { "epoch": 5.861466821885914, "grad_norm": 0.2881964147090912, "learning_rate": 9.625281368268355e-05, "loss": -0.158, "step": 10070 }, { "epoch": 5.867287543655413, "grad_norm": 0.2472992241382599, "learning_rate": 9.624233936510357e-05, "loss": -0.1529, "step": 10080 }, { "epoch": 5.8731082654249125, "grad_norm": 0.3586422801017761, "learning_rate": 9.623185100032187e-05, "loss": -0.1529, "step": 10090 }, { "epoch": 5.878928987194412, "grad_norm": 0.20823965966701508, "learning_rate": 9.62213485915245e-05, "loss": -0.1581, "step": 10100 }, { "epoch": 5.884749708963912, "grad_norm": 0.3448750674724579, "learning_rate": 9.621083214190186e-05, "loss": -0.1571, "step": 10110 }, { "epoch": 5.890570430733411, "grad_norm": 0.2418372929096222, "learning_rate": 9.62003016546485e-05, "loss": -0.1554, "step": 10120 }, { "epoch": 5.89639115250291, "grad_norm": 0.3275255262851715, "learning_rate": 9.618975713296339e-05, "loss": -0.1605, "step": 10130 }, { "epoch": 5.9022118742724095, "grad_norm": 0.3207539916038513, "learning_rate": 9.61791985800496e-05, "loss": -0.163, "step": 10140 }, { "epoch": 5.90803259604191, "grad_norm": 0.2927592694759369, "learning_rate": 9.616862599911458e-05, "loss": -0.1538, "step": 10150 }, { "epoch": 5.913853317811409, "grad_norm": 0.24966807663440704, "learning_rate": 9.615803939337e-05, "loss": -0.1562, "step": 10160 }, { "epoch": 5.919674039580908, "grad_norm": 0.2946247458457947, "learning_rate": 9.614743876603178e-05, "loss": -0.1562, "step": 10170 }, { "epoch": 5.925494761350407, "grad_norm": 0.1489664614200592, "learning_rate": 9.613682412032013e-05, "loss": -0.1555, "step": 10180 }, { "epoch": 5.931315483119906, "grad_norm": 0.30109262466430664, "learning_rate": 9.612619545945947e-05, "loss": -0.1483, "step": 10190 }, { "epoch": 5.9371362048894065, "grad_norm": 0.3009040951728821, "learning_rate": 9.611555278667852e-05, "loss": -0.1562, "step": 10200 }, { "epoch": 5.942956926658906, "grad_norm": 0.23955075442790985, "learning_rate": 9.610489610521024e-05, "loss": -0.1602, "step": 10210 }, { "epoch": 5.948777648428405, "grad_norm": 0.30343806743621826, "learning_rate": 9.609422541829187e-05, "loss": -0.1573, "step": 10220 }, { "epoch": 5.954598370197904, "grad_norm": 0.21667324006557465, "learning_rate": 9.608354072916486e-05, "loss": -0.1482, "step": 10230 }, { "epoch": 5.960419091967404, "grad_norm": 0.29413214325904846, "learning_rate": 9.607284204107493e-05, "loss": -0.151, "step": 10240 }, { "epoch": 5.9662398137369035, "grad_norm": 0.4461844563484192, "learning_rate": 9.606212935727208e-05, "loss": -0.1606, "step": 10250 }, { "epoch": 5.972060535506403, "grad_norm": 0.28282955288887024, "learning_rate": 9.605140268101052e-05, "loss": -0.158, "step": 10260 }, { "epoch": 5.977881257275902, "grad_norm": 0.3450043201446533, "learning_rate": 9.604066201554875e-05, "loss": -0.1612, "step": 10270 }, { "epoch": 5.983701979045401, "grad_norm": 0.40317845344543457, "learning_rate": 9.60299073641495e-05, "loss": -0.157, "step": 10280 }, { "epoch": 5.989522700814901, "grad_norm": 0.21685154736042023, "learning_rate": 9.601913873007974e-05, "loss": -0.1651, "step": 10290 }, { "epoch": 5.9953434225844005, "grad_norm": 0.2595549523830414, "learning_rate": 9.60083561166107e-05, "loss": -0.1582, "step": 10300 }, { "epoch": 6.0011641443539, "grad_norm": 0.32108062505722046, "learning_rate": 9.599755952701783e-05, "loss": -0.1558, "step": 10310 }, { "epoch": 6.006984866123399, "grad_norm": 0.3575068712234497, "learning_rate": 9.598674896458089e-05, "loss": -0.1551, "step": 10320 }, { "epoch": 6.012805587892899, "grad_norm": 0.249012291431427, "learning_rate": 9.597592443258383e-05, "loss": -0.1649, "step": 10330 }, { "epoch": 6.018626309662398, "grad_norm": 0.22445464134216309, "learning_rate": 9.596508593431483e-05, "loss": -0.1625, "step": 10340 }, { "epoch": 6.024447031431897, "grad_norm": 0.20220886170864105, "learning_rate": 9.59542334730664e-05, "loss": -0.1591, "step": 10350 }, { "epoch": 6.030267753201397, "grad_norm": 0.2405746430158615, "learning_rate": 9.594336705213516e-05, "loss": -0.157, "step": 10360 }, { "epoch": 6.036088474970897, "grad_norm": 0.18216288089752197, "learning_rate": 9.593248667482208e-05, "loss": -0.1569, "step": 10370 }, { "epoch": 6.041909196740396, "grad_norm": 0.41125157475471497, "learning_rate": 9.592159234443233e-05, "loss": -0.152, "step": 10380 }, { "epoch": 6.047729918509895, "grad_norm": 0.27475276589393616, "learning_rate": 9.59106840642753e-05, "loss": -0.149, "step": 10390 }, { "epoch": 6.053550640279394, "grad_norm": 0.22099655866622925, "learning_rate": 9.589976183766467e-05, "loss": -0.1583, "step": 10400 }, { "epoch": 6.0593713620488945, "grad_norm": 0.3277629613876343, "learning_rate": 9.58888256679183e-05, "loss": -0.1557, "step": 10410 }, { "epoch": 6.065192083818394, "grad_norm": 0.266167551279068, "learning_rate": 9.587787555835832e-05, "loss": -0.158, "step": 10420 }, { "epoch": 6.071012805587893, "grad_norm": 0.2879430055618286, "learning_rate": 9.586691151231107e-05, "loss": -0.1584, "step": 10430 }, { "epoch": 6.076833527357392, "grad_norm": 0.2679694592952728, "learning_rate": 9.585593353310715e-05, "loss": -0.1602, "step": 10440 }, { "epoch": 6.082654249126891, "grad_norm": 0.23761460185050964, "learning_rate": 9.58449416240814e-05, "loss": -0.1528, "step": 10450 }, { "epoch": 6.0884749708963914, "grad_norm": 0.2704014480113983, "learning_rate": 9.583393578857283e-05, "loss": -0.1578, "step": 10460 }, { "epoch": 6.094295692665891, "grad_norm": 0.20714710652828217, "learning_rate": 9.582291602992474e-05, "loss": -0.1604, "step": 10470 }, { "epoch": 6.10011641443539, "grad_norm": 0.2906169295310974, "learning_rate": 9.581188235148466e-05, "loss": -0.1578, "step": 10480 }, { "epoch": 6.105937136204889, "grad_norm": 0.3475373089313507, "learning_rate": 9.58008347566043e-05, "loss": -0.1624, "step": 10490 }, { "epoch": 6.111757857974389, "grad_norm": 0.2972780764102936, "learning_rate": 9.578977324863965e-05, "loss": -0.1609, "step": 10500 }, { "epoch": 6.117578579743888, "grad_norm": 0.314119428396225, "learning_rate": 9.577869783095089e-05, "loss": -0.1547, "step": 10510 }, { "epoch": 6.123399301513388, "grad_norm": 0.19196009635925293, "learning_rate": 9.576760850690245e-05, "loss": -0.1549, "step": 10520 }, { "epoch": 6.129220023282887, "grad_norm": 0.30689409375190735, "learning_rate": 9.575650527986298e-05, "loss": -0.1493, "step": 10530 }, { "epoch": 6.135040745052386, "grad_norm": 0.3629699647426605, "learning_rate": 9.574538815320531e-05, "loss": -0.159, "step": 10540 }, { "epoch": 6.140861466821886, "grad_norm": 0.40111035108566284, "learning_rate": 9.573425713030656e-05, "loss": -0.1504, "step": 10550 }, { "epoch": 6.146682188591385, "grad_norm": 0.3043433427810669, "learning_rate": 9.572311221454806e-05, "loss": -0.1558, "step": 10560 }, { "epoch": 6.152502910360885, "grad_norm": 0.26635634899139404, "learning_rate": 9.57119534093153e-05, "loss": -0.1512, "step": 10570 }, { "epoch": 6.158323632130384, "grad_norm": 0.274587482213974, "learning_rate": 9.570078071799806e-05, "loss": -0.1597, "step": 10580 }, { "epoch": 6.164144353899884, "grad_norm": 0.20475101470947266, "learning_rate": 9.568959414399028e-05, "loss": -0.1606, "step": 10590 }, { "epoch": 6.169965075669383, "grad_norm": 0.2568678557872772, "learning_rate": 9.567839369069018e-05, "loss": -0.1597, "step": 10600 }, { "epoch": 6.175785797438882, "grad_norm": 0.278200626373291, "learning_rate": 9.566717936150013e-05, "loss": -0.1559, "step": 10610 }, { "epoch": 6.181606519208382, "grad_norm": 0.2165462076663971, "learning_rate": 9.565595115982678e-05, "loss": -0.1594, "step": 10620 }, { "epoch": 6.187427240977882, "grad_norm": 0.3760250508785248, "learning_rate": 9.564470908908094e-05, "loss": -0.157, "step": 10630 }, { "epoch": 6.193247962747381, "grad_norm": 0.3532213568687439, "learning_rate": 9.563345315267764e-05, "loss": -0.1557, "step": 10640 }, { "epoch": 6.19906868451688, "grad_norm": 0.31711041927337646, "learning_rate": 9.562218335403616e-05, "loss": -0.158, "step": 10650 }, { "epoch": 6.204889406286379, "grad_norm": 0.1572282910346985, "learning_rate": 9.561089969657999e-05, "loss": -0.1591, "step": 10660 }, { "epoch": 6.2107101280558785, "grad_norm": 0.31054359674453735, "learning_rate": 9.559960218373673e-05, "loss": -0.1553, "step": 10670 }, { "epoch": 6.216530849825379, "grad_norm": 0.2339305430650711, "learning_rate": 9.558829081893836e-05, "loss": -0.1561, "step": 10680 }, { "epoch": 6.222351571594878, "grad_norm": 0.2966518998146057, "learning_rate": 9.55769656056209e-05, "loss": -0.1617, "step": 10690 }, { "epoch": 6.228172293364377, "grad_norm": 0.3169662654399872, "learning_rate": 9.556562654722469e-05, "loss": -0.1562, "step": 10700 }, { "epoch": 6.233993015133876, "grad_norm": 0.20880943536758423, "learning_rate": 9.555427364719422e-05, "loss": -0.1525, "step": 10710 }, { "epoch": 6.239813736903376, "grad_norm": 0.19920915365219116, "learning_rate": 9.55429069089782e-05, "loss": -0.1628, "step": 10720 }, { "epoch": 6.245634458672876, "grad_norm": 0.2270524948835373, "learning_rate": 9.553152633602956e-05, "loss": -0.1573, "step": 10730 }, { "epoch": 6.251455180442375, "grad_norm": 0.3182331919670105, "learning_rate": 9.552013193180543e-05, "loss": -0.1553, "step": 10740 }, { "epoch": 6.257275902211874, "grad_norm": 0.31177353858947754, "learning_rate": 9.550872369976707e-05, "loss": -0.1619, "step": 10750 }, { "epoch": 6.263096623981374, "grad_norm": 0.3929601013660431, "learning_rate": 9.549730164338007e-05, "loss": -0.1583, "step": 10760 }, { "epoch": 6.268917345750873, "grad_norm": 0.27592742443084717, "learning_rate": 9.548586576611408e-05, "loss": -0.1603, "step": 10770 }, { "epoch": 6.2747380675203726, "grad_norm": 0.3481783866882324, "learning_rate": 9.54744160714431e-05, "loss": -0.1599, "step": 10780 }, { "epoch": 6.280558789289872, "grad_norm": 0.20872388780117035, "learning_rate": 9.546295256284516e-05, "loss": -0.1613, "step": 10790 }, { "epoch": 6.286379511059371, "grad_norm": 0.23562847077846527, "learning_rate": 9.545147524380265e-05, "loss": -0.1591, "step": 10800 }, { "epoch": 6.292200232828871, "grad_norm": 0.22396332025527954, "learning_rate": 9.543998411780201e-05, "loss": -0.1598, "step": 10810 }, { "epoch": 6.29802095459837, "grad_norm": 0.21994149684906006, "learning_rate": 9.542847918833397e-05, "loss": -0.1593, "step": 10820 }, { "epoch": 6.3038416763678695, "grad_norm": 0.22596070170402527, "learning_rate": 9.541696045889343e-05, "loss": -0.1596, "step": 10830 }, { "epoch": 6.309662398137369, "grad_norm": 0.17179234325885773, "learning_rate": 9.540542793297947e-05, "loss": -0.1629, "step": 10840 }, { "epoch": 6.315483119906869, "grad_norm": 0.18426449596881866, "learning_rate": 9.539388161409537e-05, "loss": -0.1624, "step": 10850 }, { "epoch": 6.321303841676368, "grad_norm": 0.3394531011581421, "learning_rate": 9.538232150574857e-05, "loss": -0.1582, "step": 10860 }, { "epoch": 6.327124563445867, "grad_norm": 0.2166965901851654, "learning_rate": 9.537074761145076e-05, "loss": -0.1623, "step": 10870 }, { "epoch": 6.3329452852153665, "grad_norm": 0.37436673045158386, "learning_rate": 9.535915993471778e-05, "loss": -0.1542, "step": 10880 }, { "epoch": 6.338766006984866, "grad_norm": 0.19655343890190125, "learning_rate": 9.534755847906964e-05, "loss": -0.1498, "step": 10890 }, { "epoch": 6.344586728754366, "grad_norm": 0.27624234557151794, "learning_rate": 9.533594324803057e-05, "loss": -0.1569, "step": 10900 }, { "epoch": 6.350407450523865, "grad_norm": 0.28609395027160645, "learning_rate": 9.532431424512895e-05, "loss": -0.1622, "step": 10910 }, { "epoch": 6.356228172293364, "grad_norm": 0.22501595318317413, "learning_rate": 9.531267147389741e-05, "loss": -0.1569, "step": 10920 }, { "epoch": 6.3620488940628634, "grad_norm": 0.32660049200057983, "learning_rate": 9.530101493787266e-05, "loss": -0.1552, "step": 10930 }, { "epoch": 6.3678696158323636, "grad_norm": 0.2835864722728729, "learning_rate": 9.528934464059571e-05, "loss": -0.1635, "step": 10940 }, { "epoch": 6.373690337601863, "grad_norm": 0.24852344393730164, "learning_rate": 9.527766058561163e-05, "loss": -0.1621, "step": 10950 }, { "epoch": 6.379511059371362, "grad_norm": 0.24768438935279846, "learning_rate": 9.526596277646976e-05, "loss": -0.1559, "step": 10960 }, { "epoch": 6.385331781140861, "grad_norm": 0.2822255492210388, "learning_rate": 9.525425121672358e-05, "loss": -0.1594, "step": 10970 }, { "epoch": 6.391152502910361, "grad_norm": 0.33924877643585205, "learning_rate": 9.524252590993074e-05, "loss": -0.1624, "step": 10980 }, { "epoch": 6.3969732246798605, "grad_norm": 0.17331574857234955, "learning_rate": 9.523078685965309e-05, "loss": -0.1592, "step": 10990 }, { "epoch": 6.40279394644936, "grad_norm": 0.3014139235019684, "learning_rate": 9.521903406945664e-05, "loss": -0.1623, "step": 11000 }, { "epoch": 6.408614668218859, "grad_norm": 0.22390571236610413, "learning_rate": 9.520726754291158e-05, "loss": -0.1607, "step": 11010 }, { "epoch": 6.414435389988358, "grad_norm": 0.2612871527671814, "learning_rate": 9.519548728359227e-05, "loss": -0.1579, "step": 11020 }, { "epoch": 6.420256111757858, "grad_norm": 0.23726485669612885, "learning_rate": 9.518369329507726e-05, "loss": -0.1655, "step": 11030 }, { "epoch": 6.4260768335273575, "grad_norm": 0.29187849164009094, "learning_rate": 9.51718855809492e-05, "loss": -0.1646, "step": 11040 }, { "epoch": 6.431897555296857, "grad_norm": 0.25441762804985046, "learning_rate": 9.516006414479502e-05, "loss": -0.1564, "step": 11050 }, { "epoch": 6.437718277066356, "grad_norm": 0.4073715806007385, "learning_rate": 9.514822899020572e-05, "loss": -0.1487, "step": 11060 }, { "epoch": 6.443538998835856, "grad_norm": 0.2116546928882599, "learning_rate": 9.513638012077654e-05, "loss": -0.1537, "step": 11070 }, { "epoch": 6.449359720605355, "grad_norm": 0.2533140778541565, "learning_rate": 9.512451754010683e-05, "loss": -0.1627, "step": 11080 }, { "epoch": 6.455180442374854, "grad_norm": 0.2824019193649292, "learning_rate": 9.511264125180013e-05, "loss": -0.1656, "step": 11090 }, { "epoch": 6.461001164144354, "grad_norm": 0.2773459553718567, "learning_rate": 9.510075125946414e-05, "loss": -0.1595, "step": 11100 }, { "epoch": 6.466821885913854, "grad_norm": 0.25388699769973755, "learning_rate": 9.508884756671075e-05, "loss": -0.1582, "step": 11110 }, { "epoch": 6.472642607683353, "grad_norm": 0.20915523171424866, "learning_rate": 9.507693017715596e-05, "loss": -0.1596, "step": 11120 }, { "epoch": 6.478463329452852, "grad_norm": 0.220102459192276, "learning_rate": 9.506499909441997e-05, "loss": -0.1608, "step": 11130 }, { "epoch": 6.484284051222351, "grad_norm": 0.39168664813041687, "learning_rate": 9.505305432212713e-05, "loss": -0.1596, "step": 11140 }, { "epoch": 6.490104772991851, "grad_norm": 0.4033607542514801, "learning_rate": 9.504109586390595e-05, "loss": -0.1611, "step": 11150 }, { "epoch": 6.495925494761351, "grad_norm": 0.26587924361228943, "learning_rate": 9.502912372338908e-05, "loss": -0.1573, "step": 11160 }, { "epoch": 6.50174621653085, "grad_norm": 0.21952591836452484, "learning_rate": 9.501713790421335e-05, "loss": -0.1583, "step": 11170 }, { "epoch": 6.507566938300349, "grad_norm": 0.33387839794158936, "learning_rate": 9.500513841001974e-05, "loss": -0.1576, "step": 11180 }, { "epoch": 6.513387660069848, "grad_norm": 0.2542334198951721, "learning_rate": 9.499312524445336e-05, "loss": -0.154, "step": 11190 }, { "epoch": 6.5192083818393485, "grad_norm": 0.33428502082824707, "learning_rate": 9.498109841116351e-05, "loss": -0.1586, "step": 11200 }, { "epoch": 6.525029103608848, "grad_norm": 0.28884023427963257, "learning_rate": 9.496905791380363e-05, "loss": -0.151, "step": 11210 }, { "epoch": 6.530849825378347, "grad_norm": 0.346184104681015, "learning_rate": 9.495700375603129e-05, "loss": -0.1623, "step": 11220 }, { "epoch": 6.536670547147846, "grad_norm": 0.30323168635368347, "learning_rate": 9.494493594150822e-05, "loss": -0.158, "step": 11230 }, { "epoch": 6.542491268917345, "grad_norm": 0.2825276851654053, "learning_rate": 9.493285447390032e-05, "loss": -0.1582, "step": 11240 }, { "epoch": 6.548311990686845, "grad_norm": 0.25697436928749084, "learning_rate": 9.492075935687761e-05, "loss": -0.1614, "step": 11250 }, { "epoch": 6.554132712456345, "grad_norm": 0.2772540748119354, "learning_rate": 9.490865059411427e-05, "loss": -0.1595, "step": 11260 }, { "epoch": 6.559953434225844, "grad_norm": 0.20276261866092682, "learning_rate": 9.489652818928863e-05, "loss": -0.1599, "step": 11270 }, { "epoch": 6.565774155995343, "grad_norm": 0.2553092837333679, "learning_rate": 9.488439214608315e-05, "loss": -0.1463, "step": 11280 }, { "epoch": 6.571594877764843, "grad_norm": 0.2312633991241455, "learning_rate": 9.487224246818444e-05, "loss": -0.1652, "step": 11290 }, { "epoch": 6.577415599534342, "grad_norm": 0.356156587600708, "learning_rate": 9.486007915928325e-05, "loss": -0.1651, "step": 11300 }, { "epoch": 6.583236321303842, "grad_norm": 0.212967187166214, "learning_rate": 9.484790222307448e-05, "loss": -0.1656, "step": 11310 }, { "epoch": 6.589057043073341, "grad_norm": 0.18783429265022278, "learning_rate": 9.483571166325716e-05, "loss": -0.1565, "step": 11320 }, { "epoch": 6.59487776484284, "grad_norm": 0.27042168378829956, "learning_rate": 9.482350748353444e-05, "loss": -0.159, "step": 11330 }, { "epoch": 6.60069848661234, "grad_norm": 0.32352936267852783, "learning_rate": 9.481128968761363e-05, "loss": -0.161, "step": 11340 }, { "epoch": 6.606519208381839, "grad_norm": 0.2605055868625641, "learning_rate": 9.479905827920621e-05, "loss": -0.164, "step": 11350 }, { "epoch": 6.612339930151339, "grad_norm": 0.2941615879535675, "learning_rate": 9.478681326202773e-05, "loss": -0.1594, "step": 11360 }, { "epoch": 6.618160651920838, "grad_norm": 0.29798033833503723, "learning_rate": 9.477455463979791e-05, "loss": -0.1597, "step": 11370 }, { "epoch": 6.623981373690338, "grad_norm": 0.20750805735588074, "learning_rate": 9.476228241624059e-05, "loss": -0.1487, "step": 11380 }, { "epoch": 6.629802095459837, "grad_norm": 0.34469321370124817, "learning_rate": 9.474999659508374e-05, "loss": -0.1614, "step": 11390 }, { "epoch": 6.635622817229336, "grad_norm": 0.20708638429641724, "learning_rate": 9.47376971800595e-05, "loss": -0.1645, "step": 11400 }, { "epoch": 6.6414435389988355, "grad_norm": 0.2970019280910492, "learning_rate": 9.472538417490409e-05, "loss": -0.1543, "step": 11410 }, { "epoch": 6.647264260768336, "grad_norm": 0.2453976571559906, "learning_rate": 9.471305758335784e-05, "loss": -0.1558, "step": 11420 }, { "epoch": 6.653084982537835, "grad_norm": 0.24791665375232697, "learning_rate": 9.47007174091653e-05, "loss": -0.1513, "step": 11430 }, { "epoch": 6.658905704307334, "grad_norm": 0.24478216469287872, "learning_rate": 9.468836365607507e-05, "loss": -0.1527, "step": 11440 }, { "epoch": 6.664726426076833, "grad_norm": 0.37055113911628723, "learning_rate": 9.467599632783988e-05, "loss": -0.1536, "step": 11450 }, { "epoch": 6.670547147846333, "grad_norm": 0.24855686724185944, "learning_rate": 9.466361542821662e-05, "loss": -0.1553, "step": 11460 }, { "epoch": 6.676367869615833, "grad_norm": 0.2534138858318329, "learning_rate": 9.465122096096625e-05, "loss": -0.162, "step": 11470 }, { "epoch": 6.682188591385332, "grad_norm": 0.2230546921491623, "learning_rate": 9.463881292985391e-05, "loss": -0.1581, "step": 11480 }, { "epoch": 6.688009313154831, "grad_norm": 0.32027101516723633, "learning_rate": 9.462639133864881e-05, "loss": -0.1619, "step": 11490 }, { "epoch": 6.69383003492433, "grad_norm": 0.27564293146133423, "learning_rate": 9.461395619112432e-05, "loss": -0.1601, "step": 11500 }, { "epoch": 6.69965075669383, "grad_norm": 0.23754920065402985, "learning_rate": 9.460150749105791e-05, "loss": -0.1644, "step": 11510 }, { "epoch": 6.70547147846333, "grad_norm": 0.2767179608345032, "learning_rate": 9.458904524223116e-05, "loss": -0.1583, "step": 11520 }, { "epoch": 6.711292200232829, "grad_norm": 0.22901642322540283, "learning_rate": 9.457656944842976e-05, "loss": -0.1572, "step": 11530 }, { "epoch": 6.717112922002328, "grad_norm": 0.23947349190711975, "learning_rate": 9.456408011344353e-05, "loss": -0.1637, "step": 11540 }, { "epoch": 6.722933643771828, "grad_norm": 0.26557961106300354, "learning_rate": 9.455157724106643e-05, "loss": -0.1572, "step": 11550 }, { "epoch": 6.728754365541327, "grad_norm": 0.19715499877929688, "learning_rate": 9.453906083509647e-05, "loss": -0.1563, "step": 11560 }, { "epoch": 6.7345750873108265, "grad_norm": 0.33572983741760254, "learning_rate": 9.45265308993358e-05, "loss": -0.1627, "step": 11570 }, { "epoch": 6.740395809080326, "grad_norm": 0.33068040013313293, "learning_rate": 9.451398743759071e-05, "loss": -0.1519, "step": 11580 }, { "epoch": 6.746216530849825, "grad_norm": 0.2952823340892792, "learning_rate": 9.450143045367156e-05, "loss": -0.1622, "step": 11590 }, { "epoch": 6.752037252619325, "grad_norm": 0.3000015616416931, "learning_rate": 9.448885995139283e-05, "loss": -0.1611, "step": 11600 }, { "epoch": 6.757857974388824, "grad_norm": 0.3059895634651184, "learning_rate": 9.44762759345731e-05, "loss": -0.1621, "step": 11610 }, { "epoch": 6.7636786961583235, "grad_norm": 0.28476259112358093, "learning_rate": 9.446367840703509e-05, "loss": -0.1568, "step": 11620 }, { "epoch": 6.769499417927823, "grad_norm": 0.3486057221889496, "learning_rate": 9.445106737260556e-05, "loss": -0.1639, "step": 11630 }, { "epoch": 6.775320139697323, "grad_norm": 0.2635875344276428, "learning_rate": 9.443844283511543e-05, "loss": -0.1591, "step": 11640 }, { "epoch": 6.781140861466822, "grad_norm": 0.35652729868888855, "learning_rate": 9.442580479839968e-05, "loss": -0.1572, "step": 11650 }, { "epoch": 6.786961583236321, "grad_norm": 0.3111717700958252, "learning_rate": 9.441315326629745e-05, "loss": -0.1582, "step": 11660 }, { "epoch": 6.7927823050058205, "grad_norm": 0.2361651360988617, "learning_rate": 9.44004882426519e-05, "loss": -0.1588, "step": 11670 }, { "epoch": 6.79860302677532, "grad_norm": 0.2399349808692932, "learning_rate": 9.438780973131037e-05, "loss": -0.1634, "step": 11680 }, { "epoch": 6.80442374854482, "grad_norm": 0.25472643971443176, "learning_rate": 9.437511773612423e-05, "loss": -0.1537, "step": 11690 }, { "epoch": 6.810244470314319, "grad_norm": 0.21227575838565826, "learning_rate": 9.436241226094896e-05, "loss": -0.1503, "step": 11700 }, { "epoch": 6.816065192083818, "grad_norm": 0.17871171236038208, "learning_rate": 9.434969330964418e-05, "loss": -0.1608, "step": 11710 }, { "epoch": 6.821885913853317, "grad_norm": 0.19633203744888306, "learning_rate": 9.433696088607356e-05, "loss": -0.1631, "step": 11720 }, { "epoch": 6.8277066356228175, "grad_norm": 0.21929219365119934, "learning_rate": 9.432421499410486e-05, "loss": -0.1629, "step": 11730 }, { "epoch": 6.833527357392317, "grad_norm": 0.22600674629211426, "learning_rate": 9.431145563760998e-05, "loss": -0.1645, "step": 11740 }, { "epoch": 6.839348079161816, "grad_norm": 0.33336642384529114, "learning_rate": 9.429868282046484e-05, "loss": -0.1535, "step": 11750 }, { "epoch": 6.845168800931315, "grad_norm": 0.33211949467658997, "learning_rate": 9.428589654654951e-05, "loss": -0.152, "step": 11760 }, { "epoch": 6.850989522700814, "grad_norm": 0.2437221109867096, "learning_rate": 9.42730968197481e-05, "loss": -0.1613, "step": 11770 }, { "epoch": 6.8568102444703145, "grad_norm": 0.40572619438171387, "learning_rate": 9.426028364394883e-05, "loss": -0.1631, "step": 11780 }, { "epoch": 6.862630966239814, "grad_norm": 0.28756120800971985, "learning_rate": 9.424745702304402e-05, "loss": -0.1598, "step": 11790 }, { "epoch": 6.868451688009313, "grad_norm": 0.31648850440979004, "learning_rate": 9.423461696093006e-05, "loss": -0.1522, "step": 11800 }, { "epoch": 6.874272409778813, "grad_norm": 0.2771090269088745, "learning_rate": 9.422176346150741e-05, "loss": -0.1594, "step": 11810 }, { "epoch": 6.880093131548312, "grad_norm": 0.29482534527778625, "learning_rate": 9.420889652868063e-05, "loss": -0.1552, "step": 11820 }, { "epoch": 6.8859138533178115, "grad_norm": 0.16800802946090698, "learning_rate": 9.419601616635836e-05, "loss": -0.1599, "step": 11830 }, { "epoch": 6.891734575087311, "grad_norm": 0.22649917006492615, "learning_rate": 9.418312237845331e-05, "loss": -0.1587, "step": 11840 }, { "epoch": 6.89755529685681, "grad_norm": 0.29538455605506897, "learning_rate": 9.417021516888225e-05, "loss": -0.1606, "step": 11850 }, { "epoch": 6.90337601862631, "grad_norm": 0.29041457176208496, "learning_rate": 9.415729454156608e-05, "loss": -0.1629, "step": 11860 }, { "epoch": 6.909196740395809, "grad_norm": 0.2946361005306244, "learning_rate": 9.414436050042973e-05, "loss": -0.1563, "step": 11870 }, { "epoch": 6.915017462165308, "grad_norm": 0.19429413974285126, "learning_rate": 9.413141304940223e-05, "loss": -0.1625, "step": 11880 }, { "epoch": 6.920838183934808, "grad_norm": 0.33484891057014465, "learning_rate": 9.411845219241666e-05, "loss": -0.1632, "step": 11890 }, { "epoch": 6.926658905704308, "grad_norm": 0.2588505446910858, "learning_rate": 9.410547793341021e-05, "loss": -0.1582, "step": 11900 }, { "epoch": 6.932479627473807, "grad_norm": 0.18736185133457184, "learning_rate": 9.409249027632408e-05, "loss": -0.1638, "step": 11910 }, { "epoch": 6.938300349243306, "grad_norm": 0.25182199478149414, "learning_rate": 9.407948922510362e-05, "loss": -0.1633, "step": 11920 }, { "epoch": 6.944121071012805, "grad_norm": 0.18534378707408905, "learning_rate": 9.406647478369817e-05, "loss": -0.1672, "step": 11930 }, { "epoch": 6.949941792782305, "grad_norm": 0.3023222088813782, "learning_rate": 9.405344695606118e-05, "loss": -0.1624, "step": 11940 }, { "epoch": 6.955762514551805, "grad_norm": 0.25588682293891907, "learning_rate": 9.404040574615018e-05, "loss": -0.1623, "step": 11950 }, { "epoch": 6.961583236321304, "grad_norm": 0.2874225378036499, "learning_rate": 9.402735115792674e-05, "loss": -0.1584, "step": 11960 }, { "epoch": 6.967403958090803, "grad_norm": 0.27472633123397827, "learning_rate": 9.401428319535649e-05, "loss": -0.1648, "step": 11970 }, { "epoch": 6.973224679860302, "grad_norm": 0.295185923576355, "learning_rate": 9.400120186240912e-05, "loss": -0.1591, "step": 11980 }, { "epoch": 6.9790454016298025, "grad_norm": 0.28571653366088867, "learning_rate": 9.398810716305844e-05, "loss": -0.1623, "step": 11990 }, { "epoch": 6.984866123399302, "grad_norm": 0.18728232383728027, "learning_rate": 9.397499910128222e-05, "loss": -0.1594, "step": 12000 }, { "epoch": 6.990686845168801, "grad_norm": 0.29403072595596313, "learning_rate": 9.396187768106237e-05, "loss": -0.1668, "step": 12010 }, { "epoch": 6.9965075669383, "grad_norm": 0.2752435505390167, "learning_rate": 9.394874290638482e-05, "loss": -0.1611, "step": 12020 }, { "epoch": 7.002328288707799, "grad_norm": 0.37140846252441406, "learning_rate": 9.393559478123959e-05, "loss": -0.1577, "step": 12030 }, { "epoch": 7.008149010477299, "grad_norm": 0.3465358018875122, "learning_rate": 9.39224333096207e-05, "loss": -0.1578, "step": 12040 }, { "epoch": 7.013969732246799, "grad_norm": 0.19435717165470123, "learning_rate": 9.390925849552629e-05, "loss": -0.1626, "step": 12050 }, { "epoch": 7.019790454016298, "grad_norm": 0.21809260547161102, "learning_rate": 9.389607034295849e-05, "loss": -0.166, "step": 12060 }, { "epoch": 7.025611175785797, "grad_norm": 0.21946947276592255, "learning_rate": 9.388286885592355e-05, "loss": -0.1524, "step": 12070 }, { "epoch": 7.031431897555297, "grad_norm": 0.19077417254447937, "learning_rate": 9.386965403843168e-05, "loss": -0.1619, "step": 12080 }, { "epoch": 7.037252619324796, "grad_norm": 0.22167782485485077, "learning_rate": 9.385642589449726e-05, "loss": -0.1627, "step": 12090 }, { "epoch": 7.043073341094296, "grad_norm": 0.2398911863565445, "learning_rate": 9.38431844281386e-05, "loss": -0.1665, "step": 12100 }, { "epoch": 7.048894062863795, "grad_norm": 0.3024662435054779, "learning_rate": 9.38299296433781e-05, "loss": -0.1616, "step": 12110 }, { "epoch": 7.054714784633295, "grad_norm": 0.23173584043979645, "learning_rate": 9.381666154424226e-05, "loss": -0.1616, "step": 12120 }, { "epoch": 7.060535506402794, "grad_norm": 0.2752416729927063, "learning_rate": 9.380338013476157e-05, "loss": -0.1629, "step": 12130 }, { "epoch": 7.066356228172293, "grad_norm": 0.2893044650554657, "learning_rate": 9.379008541897054e-05, "loss": -0.1623, "step": 12140 }, { "epoch": 7.072176949941793, "grad_norm": 0.21962985396385193, "learning_rate": 9.377677740090777e-05, "loss": -0.1566, "step": 12150 }, { "epoch": 7.077997671711292, "grad_norm": 0.29787126183509827, "learning_rate": 9.376345608461588e-05, "loss": -0.1606, "step": 12160 }, { "epoch": 7.083818393480792, "grad_norm": 0.2039455771446228, "learning_rate": 9.375012147414155e-05, "loss": -0.1647, "step": 12170 }, { "epoch": 7.089639115250291, "grad_norm": 0.2137826383113861, "learning_rate": 9.373677357353545e-05, "loss": -0.1602, "step": 12180 }, { "epoch": 7.09545983701979, "grad_norm": 0.2888389825820923, "learning_rate": 9.372341238685237e-05, "loss": -0.1637, "step": 12190 }, { "epoch": 7.1012805587892895, "grad_norm": 0.3167977035045624, "learning_rate": 9.371003791815102e-05, "loss": -0.1654, "step": 12200 }, { "epoch": 7.10710128055879, "grad_norm": 0.26735350489616394, "learning_rate": 9.369665017149429e-05, "loss": -0.1618, "step": 12210 }, { "epoch": 7.112922002328289, "grad_norm": 0.16103404760360718, "learning_rate": 9.368324915094895e-05, "loss": -0.1655, "step": 12220 }, { "epoch": 7.118742724097788, "grad_norm": 0.22361642122268677, "learning_rate": 9.366983486058591e-05, "loss": -0.1586, "step": 12230 }, { "epoch": 7.124563445867287, "grad_norm": 0.22527211904525757, "learning_rate": 9.365640730448009e-05, "loss": -0.1607, "step": 12240 }, { "epoch": 7.130384167636787, "grad_norm": 0.2415955513715744, "learning_rate": 9.36429664867104e-05, "loss": -0.1606, "step": 12250 }, { "epoch": 7.136204889406287, "grad_norm": 0.2661783993244171, "learning_rate": 9.362951241135982e-05, "loss": -0.1596, "step": 12260 }, { "epoch": 7.142025611175786, "grad_norm": 0.27461084723472595, "learning_rate": 9.361604508251534e-05, "loss": -0.166, "step": 12270 }, { "epoch": 7.147846332945285, "grad_norm": 0.2262769192457199, "learning_rate": 9.360256450426799e-05, "loss": -0.161, "step": 12280 }, { "epoch": 7.153667054714784, "grad_norm": 0.24779638648033142, "learning_rate": 9.358907068071279e-05, "loss": -0.1609, "step": 12290 }, { "epoch": 7.159487776484284, "grad_norm": 0.26043835282325745, "learning_rate": 9.357556361594882e-05, "loss": -0.1678, "step": 12300 }, { "epoch": 7.165308498253784, "grad_norm": 0.21188844740390778, "learning_rate": 9.356204331407917e-05, "loss": -0.163, "step": 12310 }, { "epoch": 7.171129220023283, "grad_norm": 0.3122115731239319, "learning_rate": 9.354850977921094e-05, "loss": -0.1563, "step": 12320 }, { "epoch": 7.176949941792782, "grad_norm": 0.19287395477294922, "learning_rate": 9.353496301545529e-05, "loss": -0.1604, "step": 12330 }, { "epoch": 7.182770663562282, "grad_norm": 0.3425050675868988, "learning_rate": 9.352140302692733e-05, "loss": -0.159, "step": 12340 }, { "epoch": 7.188591385331781, "grad_norm": 0.3405315577983856, "learning_rate": 9.350782981774627e-05, "loss": -0.155, "step": 12350 }, { "epoch": 7.1944121071012805, "grad_norm": 0.23526135087013245, "learning_rate": 9.349424339203526e-05, "loss": -0.1601, "step": 12360 }, { "epoch": 7.20023282887078, "grad_norm": 0.21770915389060974, "learning_rate": 9.34806437539215e-05, "loss": -0.159, "step": 12370 }, { "epoch": 7.206053550640279, "grad_norm": 0.21770787239074707, "learning_rate": 9.346703090753622e-05, "loss": -0.1556, "step": 12380 }, { "epoch": 7.211874272409779, "grad_norm": 0.2982602119445801, "learning_rate": 9.345340485701461e-05, "loss": -0.1597, "step": 12390 }, { "epoch": 7.217694994179278, "grad_norm": 0.16702574491500854, "learning_rate": 9.343976560649595e-05, "loss": -0.1624, "step": 12400 }, { "epoch": 7.2235157159487775, "grad_norm": 0.24982264637947083, "learning_rate": 9.342611316012344e-05, "loss": -0.1633, "step": 12410 }, { "epoch": 7.229336437718277, "grad_norm": 0.25714385509490967, "learning_rate": 9.341244752204437e-05, "loss": -0.1581, "step": 12420 }, { "epoch": 7.235157159487777, "grad_norm": 0.21696481108665466, "learning_rate": 9.339876869640995e-05, "loss": -0.1635, "step": 12430 }, { "epoch": 7.240977881257276, "grad_norm": 0.2510944902896881, "learning_rate": 9.33850766873755e-05, "loss": -0.16, "step": 12440 }, { "epoch": 7.246798603026775, "grad_norm": 0.3436543643474579, "learning_rate": 9.337137149910028e-05, "loss": -0.1614, "step": 12450 }, { "epoch": 7.2526193247962745, "grad_norm": 0.30558067560195923, "learning_rate": 9.335765313574753e-05, "loss": -0.1663, "step": 12460 }, { "epoch": 7.258440046565774, "grad_norm": 0.23871400952339172, "learning_rate": 9.334392160148457e-05, "loss": -0.1655, "step": 12470 }, { "epoch": 7.264260768335274, "grad_norm": 0.26366889476776123, "learning_rate": 9.333017690048264e-05, "loss": -0.1608, "step": 12480 }, { "epoch": 7.270081490104773, "grad_norm": 0.27616801857948303, "learning_rate": 9.331641903691706e-05, "loss": -0.1605, "step": 12490 }, { "epoch": 7.275902211874272, "grad_norm": 0.3010527193546295, "learning_rate": 9.330264801496707e-05, "loss": -0.157, "step": 12500 }, { "epoch": 7.281722933643771, "grad_norm": 0.24512632191181183, "learning_rate": 9.328886383881594e-05, "loss": -0.1613, "step": 12510 }, { "epoch": 7.2875436554132715, "grad_norm": 0.2531563341617584, "learning_rate": 9.327506651265095e-05, "loss": -0.1602, "step": 12520 }, { "epoch": 7.293364377182771, "grad_norm": 0.3836725056171417, "learning_rate": 9.326125604066338e-05, "loss": -0.1605, "step": 12530 }, { "epoch": 7.29918509895227, "grad_norm": 0.2510744035243988, "learning_rate": 9.324743242704847e-05, "loss": -0.1635, "step": 12540 }, { "epoch": 7.305005820721769, "grad_norm": 0.2568488121032715, "learning_rate": 9.323359567600546e-05, "loss": -0.1599, "step": 12550 }, { "epoch": 7.310826542491269, "grad_norm": 0.36839720606803894, "learning_rate": 9.321974579173761e-05, "loss": -0.1623, "step": 12560 }, { "epoch": 7.3166472642607685, "grad_norm": 0.33726978302001953, "learning_rate": 9.320588277845213e-05, "loss": -0.1553, "step": 12570 }, { "epoch": 7.322467986030268, "grad_norm": 0.2232239693403244, "learning_rate": 9.319200664036026e-05, "loss": -0.162, "step": 12580 }, { "epoch": 7.328288707799767, "grad_norm": 0.28503498435020447, "learning_rate": 9.31781173816772e-05, "loss": -0.1664, "step": 12590 }, { "epoch": 7.334109429569267, "grad_norm": 0.33578914403915405, "learning_rate": 9.316421500662212e-05, "loss": -0.1625, "step": 12600 }, { "epoch": 7.339930151338766, "grad_norm": 0.23495905101299286, "learning_rate": 9.31502995194182e-05, "loss": -0.1629, "step": 12610 }, { "epoch": 7.3457508731082655, "grad_norm": 0.21669454872608185, "learning_rate": 9.31363709242926e-05, "loss": -0.1619, "step": 12620 }, { "epoch": 7.351571594877765, "grad_norm": 0.27161282300949097, "learning_rate": 9.312242922547647e-05, "loss": -0.1688, "step": 12630 }, { "epoch": 7.357392316647264, "grad_norm": 0.2434922456741333, "learning_rate": 9.310847442720492e-05, "loss": -0.1595, "step": 12640 }, { "epoch": 7.363213038416764, "grad_norm": 0.17996571958065033, "learning_rate": 9.309450653371706e-05, "loss": -0.1657, "step": 12650 }, { "epoch": 7.369033760186263, "grad_norm": 0.40726712346076965, "learning_rate": 9.308052554925595e-05, "loss": -0.1665, "step": 12660 }, { "epoch": 7.374854481955762, "grad_norm": 0.2550795078277588, "learning_rate": 9.306653147806867e-05, "loss": -0.1636, "step": 12670 }, { "epoch": 7.380675203725262, "grad_norm": 0.4556962549686432, "learning_rate": 9.305252432440622e-05, "loss": -0.1616, "step": 12680 }, { "epoch": 7.386495925494762, "grad_norm": 0.23911447823047638, "learning_rate": 9.303850409252361e-05, "loss": -0.1615, "step": 12690 }, { "epoch": 7.392316647264261, "grad_norm": 0.27061018347740173, "learning_rate": 9.302447078667985e-05, "loss": -0.164, "step": 12700 }, { "epoch": 7.39813736903376, "grad_norm": 0.3133446276187897, "learning_rate": 9.301042441113783e-05, "loss": -0.1634, "step": 12710 }, { "epoch": 7.403958090803259, "grad_norm": 0.29624226689338684, "learning_rate": 9.299636497016451e-05, "loss": -0.1579, "step": 12720 }, { "epoch": 7.409778812572759, "grad_norm": 0.22505389153957367, "learning_rate": 9.298229246803076e-05, "loss": -0.1599, "step": 12730 }, { "epoch": 7.415599534342259, "grad_norm": 0.27042657136917114, "learning_rate": 9.296820690901144e-05, "loss": -0.1582, "step": 12740 }, { "epoch": 7.421420256111758, "grad_norm": 0.2551143169403076, "learning_rate": 9.295410829738539e-05, "loss": -0.1625, "step": 12750 }, { "epoch": 7.427240977881257, "grad_norm": 0.3161560297012329, "learning_rate": 9.293999663743535e-05, "loss": -0.1618, "step": 12760 }, { "epoch": 7.433061699650756, "grad_norm": 0.34492209553718567, "learning_rate": 9.292587193344813e-05, "loss": -0.1559, "step": 12770 }, { "epoch": 7.4388824214202565, "grad_norm": 0.31443047523498535, "learning_rate": 9.291173418971437e-05, "loss": -0.1602, "step": 12780 }, { "epoch": 7.444703143189756, "grad_norm": 0.27629390358924866, "learning_rate": 9.28975834105288e-05, "loss": -0.1595, "step": 12790 }, { "epoch": 7.450523864959255, "grad_norm": 0.2752944827079773, "learning_rate": 9.288341960019004e-05, "loss": -0.1618, "step": 12800 }, { "epoch": 7.456344586728754, "grad_norm": 0.27009695768356323, "learning_rate": 9.286924276300067e-05, "loss": -0.1609, "step": 12810 }, { "epoch": 7.462165308498253, "grad_norm": 0.2521986961364746, "learning_rate": 9.285505290326726e-05, "loss": -0.1624, "step": 12820 }, { "epoch": 7.467986030267753, "grad_norm": 0.17748580873012543, "learning_rate": 9.284085002530027e-05, "loss": -0.1644, "step": 12830 }, { "epoch": 7.473806752037253, "grad_norm": 0.42151322960853577, "learning_rate": 9.282663413341422e-05, "loss": -0.159, "step": 12840 }, { "epoch": 7.479627473806752, "grad_norm": 0.2554302513599396, "learning_rate": 9.281240523192747e-05, "loss": -0.1631, "step": 12850 }, { "epoch": 7.485448195576251, "grad_norm": 0.13274289667606354, "learning_rate": 9.279816332516242e-05, "loss": -0.1657, "step": 12860 }, { "epoch": 7.491268917345751, "grad_norm": 0.2392982542514801, "learning_rate": 9.278390841744536e-05, "loss": -0.1642, "step": 12870 }, { "epoch": 7.49708963911525, "grad_norm": 0.2505616545677185, "learning_rate": 9.276964051310658e-05, "loss": -0.1643, "step": 12880 }, { "epoch": 7.50291036088475, "grad_norm": 0.3310863971710205, "learning_rate": 9.275535961648027e-05, "loss": -0.1662, "step": 12890 }, { "epoch": 7.508731082654249, "grad_norm": 0.2116427719593048, "learning_rate": 9.274106573190459e-05, "loss": -0.1633, "step": 12900 }, { "epoch": 7.514551804423749, "grad_norm": 0.16376011073589325, "learning_rate": 9.272675886372168e-05, "loss": -0.168, "step": 12910 }, { "epoch": 7.520372526193248, "grad_norm": 0.24374085664749146, "learning_rate": 9.271243901627754e-05, "loss": -0.1615, "step": 12920 }, { "epoch": 7.526193247962747, "grad_norm": 0.3135235011577606, "learning_rate": 9.269810619392219e-05, "loss": -0.164, "step": 12930 }, { "epoch": 7.532013969732247, "grad_norm": 0.20299457013607025, "learning_rate": 9.268376040100955e-05, "loss": -0.1583, "step": 12940 }, { "epoch": 7.537834691501747, "grad_norm": 0.28494516015052795, "learning_rate": 9.266940164189752e-05, "loss": -0.1645, "step": 12950 }, { "epoch": 7.543655413271246, "grad_norm": 0.20629066228866577, "learning_rate": 9.265502992094787e-05, "loss": -0.1622, "step": 12960 }, { "epoch": 7.549476135040745, "grad_norm": 0.20019802451133728, "learning_rate": 9.264064524252638e-05, "loss": -0.1678, "step": 12970 }, { "epoch": 7.555296856810244, "grad_norm": 0.18628858029842377, "learning_rate": 9.262624761100271e-05, "loss": -0.16, "step": 12980 }, { "epoch": 7.5611175785797435, "grad_norm": 0.23856835067272186, "learning_rate": 9.261183703075051e-05, "loss": -0.1632, "step": 12990 }, { "epoch": 7.566938300349244, "grad_norm": 0.4515165090560913, "learning_rate": 9.259741350614733e-05, "loss": -0.1621, "step": 13000 }, { "epoch": 7.572759022118743, "grad_norm": 0.21104830503463745, "learning_rate": 9.258297704157464e-05, "loss": -0.1611, "step": 13010 }, { "epoch": 7.578579743888242, "grad_norm": 0.23207204043865204, "learning_rate": 9.256852764141786e-05, "loss": -0.1623, "step": 13020 }, { "epoch": 7.584400465657741, "grad_norm": 0.22634965181350708, "learning_rate": 9.255406531006634e-05, "loss": -0.1659, "step": 13030 }, { "epoch": 7.590221187427241, "grad_norm": 0.34191858768463135, "learning_rate": 9.253959005191335e-05, "loss": -0.1543, "step": 13040 }, { "epoch": 7.596041909196741, "grad_norm": 0.21011775732040405, "learning_rate": 9.25251018713561e-05, "loss": -0.1556, "step": 13050 }, { "epoch": 7.60186263096624, "grad_norm": 0.21061553061008453, "learning_rate": 9.251060077279571e-05, "loss": -0.1625, "step": 13060 }, { "epoch": 7.607683352735739, "grad_norm": 0.20293660461902618, "learning_rate": 9.249608676063724e-05, "loss": -0.1635, "step": 13070 }, { "epoch": 7.613504074505238, "grad_norm": 0.4120715260505676, "learning_rate": 9.248155983928964e-05, "loss": -0.1608, "step": 13080 }, { "epoch": 7.619324796274738, "grad_norm": 0.3140145540237427, "learning_rate": 9.246702001316583e-05, "loss": -0.1639, "step": 13090 }, { "epoch": 7.625145518044238, "grad_norm": 0.27281197905540466, "learning_rate": 9.245246728668262e-05, "loss": -0.1577, "step": 13100 }, { "epoch": 7.630966239813737, "grad_norm": 0.2788638770580292, "learning_rate": 9.243790166426073e-05, "loss": -0.1574, "step": 13110 }, { "epoch": 7.636786961583236, "grad_norm": 0.28383010625839233, "learning_rate": 9.242332315032484e-05, "loss": -0.1633, "step": 13120 }, { "epoch": 7.642607683352736, "grad_norm": 0.2795552611351013, "learning_rate": 9.240873174930349e-05, "loss": -0.1589, "step": 13130 }, { "epoch": 7.648428405122235, "grad_norm": 0.2757415771484375, "learning_rate": 9.239412746562917e-05, "loss": -0.1565, "step": 13140 }, { "epoch": 7.6542491268917345, "grad_norm": 0.20516738295555115, "learning_rate": 9.237951030373828e-05, "loss": -0.1546, "step": 13150 }, { "epoch": 7.660069848661234, "grad_norm": 0.2452271431684494, "learning_rate": 9.236488026807113e-05, "loss": -0.156, "step": 13160 }, { "epoch": 7.665890570430733, "grad_norm": 0.2509324550628662, "learning_rate": 9.235023736307193e-05, "loss": -0.166, "step": 13170 }, { "epoch": 7.671711292200233, "grad_norm": 0.23685337603092194, "learning_rate": 9.233558159318881e-05, "loss": -0.1633, "step": 13180 }, { "epoch": 7.677532013969732, "grad_norm": 0.2815634310245514, "learning_rate": 9.232091296287382e-05, "loss": -0.1634, "step": 13190 }, { "epoch": 7.6833527357392315, "grad_norm": 0.2440749853849411, "learning_rate": 9.230623147658288e-05, "loss": -0.1589, "step": 13200 }, { "epoch": 7.689173457508731, "grad_norm": 0.2636657655239105, "learning_rate": 9.229153713877586e-05, "loss": -0.1625, "step": 13210 }, { "epoch": 7.694994179278231, "grad_norm": 0.20337054133415222, "learning_rate": 9.227682995391649e-05, "loss": -0.1581, "step": 13220 }, { "epoch": 7.70081490104773, "grad_norm": 0.34269192814826965, "learning_rate": 9.226210992647243e-05, "loss": -0.1617, "step": 13230 }, { "epoch": 7.706635622817229, "grad_norm": 0.30512404441833496, "learning_rate": 9.224737706091525e-05, "loss": -0.1655, "step": 13240 }, { "epoch": 7.7124563445867285, "grad_norm": 0.1989171802997589, "learning_rate": 9.223263136172039e-05, "loss": -0.1616, "step": 13250 }, { "epoch": 7.718277066356228, "grad_norm": 0.2190697193145752, "learning_rate": 9.22178728333672e-05, "loss": -0.1569, "step": 13260 }, { "epoch": 7.724097788125728, "grad_norm": 0.26985588669776917, "learning_rate": 9.220310148033897e-05, "loss": -0.1614, "step": 13270 }, { "epoch": 7.729918509895227, "grad_norm": 0.37821832299232483, "learning_rate": 9.21883173071228e-05, "loss": -0.1629, "step": 13280 }, { "epoch": 7.735739231664726, "grad_norm": 0.3103080093860626, "learning_rate": 9.217352031820976e-05, "loss": -0.1655, "step": 13290 }, { "epoch": 7.741559953434226, "grad_norm": 0.27687519788742065, "learning_rate": 9.215871051809477e-05, "loss": -0.1623, "step": 13300 }, { "epoch": 7.7473806752037255, "grad_norm": 0.42024508118629456, "learning_rate": 9.214388791127666e-05, "loss": -0.1648, "step": 13310 }, { "epoch": 7.753201396973225, "grad_norm": 0.22345596551895142, "learning_rate": 9.212905250225814e-05, "loss": -0.1659, "step": 13320 }, { "epoch": 7.759022118742724, "grad_norm": 0.409341037273407, "learning_rate": 9.211420429554583e-05, "loss": -0.1627, "step": 13330 }, { "epoch": 7.764842840512223, "grad_norm": 0.23329131305217743, "learning_rate": 9.209934329565022e-05, "loss": -0.164, "step": 13340 }, { "epoch": 7.770663562281723, "grad_norm": 0.2143016904592514, "learning_rate": 9.208446950708568e-05, "loss": -0.1601, "step": 13350 }, { "epoch": 7.7764842840512225, "grad_norm": 0.22066998481750488, "learning_rate": 9.20695829343705e-05, "loss": -0.165, "step": 13360 }, { "epoch": 7.782305005820722, "grad_norm": 0.3193666636943817, "learning_rate": 9.205468358202678e-05, "loss": -0.1663, "step": 13370 }, { "epoch": 7.788125727590221, "grad_norm": 0.22838792204856873, "learning_rate": 9.203977145458059e-05, "loss": -0.1625, "step": 13380 }, { "epoch": 7.793946449359721, "grad_norm": 0.22080278396606445, "learning_rate": 9.202484655656182e-05, "loss": -0.1631, "step": 13390 }, { "epoch": 7.79976717112922, "grad_norm": 0.30980563163757324, "learning_rate": 9.200990889250427e-05, "loss": -0.1631, "step": 13400 }, { "epoch": 7.8055878928987195, "grad_norm": 0.23229098320007324, "learning_rate": 9.19949584669456e-05, "loss": -0.1662, "step": 13410 }, { "epoch": 7.811408614668219, "grad_norm": 0.37524136900901794, "learning_rate": 9.197999528442738e-05, "loss": -0.1609, "step": 13420 }, { "epoch": 7.817229336437718, "grad_norm": 0.16600215435028076, "learning_rate": 9.196501934949499e-05, "loss": -0.1636, "step": 13430 }, { "epoch": 7.823050058207218, "grad_norm": 0.28038543462753296, "learning_rate": 9.195003066669776e-05, "loss": -0.1639, "step": 13440 }, { "epoch": 7.828870779976717, "grad_norm": 0.22609175741672516, "learning_rate": 9.193502924058884e-05, "loss": -0.166, "step": 13450 }, { "epoch": 7.834691501746216, "grad_norm": 0.30284392833709717, "learning_rate": 9.192001507572526e-05, "loss": -0.1613, "step": 13460 }, { "epoch": 7.840512223515716, "grad_norm": 0.26401370763778687, "learning_rate": 9.190498817666793e-05, "loss": -0.1563, "step": 13470 }, { "epoch": 7.846332945285216, "grad_norm": 0.26491788029670715, "learning_rate": 9.188994854798163e-05, "loss": -0.1647, "step": 13480 }, { "epoch": 7.852153667054715, "grad_norm": 0.2466113418340683, "learning_rate": 9.187489619423499e-05, "loss": -0.1635, "step": 13490 }, { "epoch": 7.857974388824214, "grad_norm": 0.26606258749961853, "learning_rate": 9.185983112000056e-05, "loss": -0.1651, "step": 13500 }, { "epoch": 7.863795110593713, "grad_norm": 0.23057131469249725, "learning_rate": 9.184475332985464e-05, "loss": -0.1657, "step": 13510 }, { "epoch": 7.869615832363213, "grad_norm": 0.3279736340045929, "learning_rate": 9.182966282837754e-05, "loss": -0.1618, "step": 13520 }, { "epoch": 7.875436554132713, "grad_norm": 0.3142469823360443, "learning_rate": 9.18145596201533e-05, "loss": -0.166, "step": 13530 }, { "epoch": 7.881257275902212, "grad_norm": 0.3135509490966797, "learning_rate": 9.179944370976991e-05, "loss": -0.1628, "step": 13540 }, { "epoch": 7.887077997671711, "grad_norm": 0.2635798454284668, "learning_rate": 9.178431510181918e-05, "loss": -0.1653, "step": 13550 }, { "epoch": 7.89289871944121, "grad_norm": 0.2002260982990265, "learning_rate": 9.176917380089675e-05, "loss": -0.1631, "step": 13560 }, { "epoch": 7.8987194412107105, "grad_norm": 0.3830412030220032, "learning_rate": 9.175401981160219e-05, "loss": -0.1612, "step": 13570 }, { "epoch": 7.90454016298021, "grad_norm": 0.28524166345596313, "learning_rate": 9.173885313853885e-05, "loss": -0.164, "step": 13580 }, { "epoch": 7.910360884749709, "grad_norm": 0.3081014156341553, "learning_rate": 9.172367378631398e-05, "loss": -0.1584, "step": 13590 }, { "epoch": 7.916181606519208, "grad_norm": 0.19954133033752441, "learning_rate": 9.170848175953866e-05, "loss": -0.1584, "step": 13600 }, { "epoch": 7.922002328288707, "grad_norm": 0.18321718275547028, "learning_rate": 9.169327706282784e-05, "loss": -0.165, "step": 13610 }, { "epoch": 7.927823050058207, "grad_norm": 0.24373109638690948, "learning_rate": 9.167805970080029e-05, "loss": -0.1671, "step": 13620 }, { "epoch": 7.933643771827707, "grad_norm": 0.23685859143733978, "learning_rate": 9.166282967807864e-05, "loss": -0.1638, "step": 13630 }, { "epoch": 7.939464493597206, "grad_norm": 0.2923697233200073, "learning_rate": 9.16475869992894e-05, "loss": -0.1659, "step": 13640 }, { "epoch": 7.945285215366706, "grad_norm": 0.24714365601539612, "learning_rate": 9.163233166906284e-05, "loss": -0.1701, "step": 13650 }, { "epoch": 7.951105937136205, "grad_norm": 0.25182420015335083, "learning_rate": 9.161706369203317e-05, "loss": -0.1635, "step": 13660 }, { "epoch": 7.956926658905704, "grad_norm": 0.23637479543685913, "learning_rate": 9.16017830728384e-05, "loss": -0.1691, "step": 13670 }, { "epoch": 7.962747380675204, "grad_norm": 0.19637691974639893, "learning_rate": 9.158648981612035e-05, "loss": -0.1614, "step": 13680 }, { "epoch": 7.968568102444703, "grad_norm": 0.21221137046813965, "learning_rate": 9.157118392652472e-05, "loss": -0.1652, "step": 13690 }, { "epoch": 7.974388824214203, "grad_norm": 0.19472837448120117, "learning_rate": 9.155586540870104e-05, "loss": -0.166, "step": 13700 }, { "epoch": 7.980209545983702, "grad_norm": 0.26655834913253784, "learning_rate": 9.154053426730267e-05, "loss": -0.1602, "step": 13710 }, { "epoch": 7.986030267753201, "grad_norm": 0.3017086386680603, "learning_rate": 9.15251905069868e-05, "loss": -0.1583, "step": 13720 }, { "epoch": 7.991850989522701, "grad_norm": 0.2703157365322113, "learning_rate": 9.150983413241446e-05, "loss": -0.1636, "step": 13730 }, { "epoch": 7.997671711292201, "grad_norm": 0.2557966113090515, "learning_rate": 9.149446514825051e-05, "loss": -0.1581, "step": 13740 }, { "epoch": 8.0034924330617, "grad_norm": 0.27846676111221313, "learning_rate": 9.147908355916365e-05, "loss": -0.1583, "step": 13750 }, { "epoch": 8.009313154831199, "grad_norm": 0.15584900975227356, "learning_rate": 9.146368936982642e-05, "loss": -0.1573, "step": 13760 }, { "epoch": 8.015133876600698, "grad_norm": 0.19544383883476257, "learning_rate": 9.144828258491511e-05, "loss": -0.1608, "step": 13770 }, { "epoch": 8.020954598370198, "grad_norm": 0.12741544842720032, "learning_rate": 9.143286320910996e-05, "loss": -0.1669, "step": 13780 }, { "epoch": 8.026775320139697, "grad_norm": 0.23034042119979858, "learning_rate": 9.141743124709491e-05, "loss": -0.164, "step": 13790 }, { "epoch": 8.032596041909196, "grad_norm": 0.2924017906188965, "learning_rate": 9.140198670355784e-05, "loss": -0.1619, "step": 13800 }, { "epoch": 8.038416763678697, "grad_norm": 0.259881854057312, "learning_rate": 9.138652958319034e-05, "loss": -0.1594, "step": 13810 }, { "epoch": 8.044237485448196, "grad_norm": 0.231222465634346, "learning_rate": 9.137105989068791e-05, "loss": -0.1657, "step": 13820 }, { "epoch": 8.050058207217695, "grad_norm": 0.25166329741477966, "learning_rate": 9.135557763074983e-05, "loss": -0.1547, "step": 13830 }, { "epoch": 8.055878928987195, "grad_norm": 0.2953591048717499, "learning_rate": 9.13400828080792e-05, "loss": -0.1594, "step": 13840 }, { "epoch": 8.061699650756694, "grad_norm": 0.23005978763103485, "learning_rate": 9.132457542738292e-05, "loss": -0.1638, "step": 13850 }, { "epoch": 8.067520372526193, "grad_norm": 0.18096107244491577, "learning_rate": 9.130905549337174e-05, "loss": -0.1669, "step": 13860 }, { "epoch": 8.073341094295692, "grad_norm": 0.30725324153900146, "learning_rate": 9.129352301076021e-05, "loss": -0.1648, "step": 13870 }, { "epoch": 8.079161816065191, "grad_norm": 0.25550681352615356, "learning_rate": 9.127797798426668e-05, "loss": -0.1625, "step": 13880 }, { "epoch": 8.08498253783469, "grad_norm": 0.19790305197238922, "learning_rate": 9.126242041861333e-05, "loss": -0.1569, "step": 13890 }, { "epoch": 8.090803259604192, "grad_norm": 0.23634159564971924, "learning_rate": 9.124685031852611e-05, "loss": -0.1602, "step": 13900 }, { "epoch": 8.09662398137369, "grad_norm": 0.24725396931171417, "learning_rate": 9.123126768873482e-05, "loss": -0.1623, "step": 13910 }, { "epoch": 8.10244470314319, "grad_norm": 0.2926902174949646, "learning_rate": 9.121567253397308e-05, "loss": -0.1633, "step": 13920 }, { "epoch": 8.10826542491269, "grad_norm": 0.18002909421920776, "learning_rate": 9.120006485897824e-05, "loss": -0.1673, "step": 13930 }, { "epoch": 8.114086146682189, "grad_norm": 0.22409719228744507, "learning_rate": 9.118444466849152e-05, "loss": -0.1622, "step": 13940 }, { "epoch": 8.119906868451688, "grad_norm": 0.3047839105129242, "learning_rate": 9.116881196725793e-05, "loss": -0.165, "step": 13950 }, { "epoch": 8.125727590221187, "grad_norm": 0.2590312957763672, "learning_rate": 9.115316676002627e-05, "loss": -0.1634, "step": 13960 }, { "epoch": 8.131548311990686, "grad_norm": 0.19353781640529633, "learning_rate": 9.113750905154911e-05, "loss": -0.1627, "step": 13970 }, { "epoch": 8.137369033760187, "grad_norm": 0.19880323112010956, "learning_rate": 9.112183884658289e-05, "loss": -0.1605, "step": 13980 }, { "epoch": 8.143189755529686, "grad_norm": 0.26202845573425293, "learning_rate": 9.11061561498878e-05, "loss": -0.1689, "step": 13990 }, { "epoch": 8.149010477299186, "grad_norm": 0.29117119312286377, "learning_rate": 9.109046096622779e-05, "loss": -0.1615, "step": 14000 }, { "epoch": 8.154831199068685, "grad_norm": 0.3713294565677643, "learning_rate": 9.107475330037069e-05, "loss": -0.1627, "step": 14010 }, { "epoch": 8.160651920838184, "grad_norm": 0.3731291592121124, "learning_rate": 9.105903315708806e-05, "loss": -0.1669, "step": 14020 }, { "epoch": 8.166472642607683, "grad_norm": 0.22542132437229156, "learning_rate": 9.104330054115524e-05, "loss": -0.1669, "step": 14030 }, { "epoch": 8.172293364377182, "grad_norm": 0.23728597164154053, "learning_rate": 9.102755545735141e-05, "loss": -0.1643, "step": 14040 }, { "epoch": 8.178114086146682, "grad_norm": 0.3214988708496094, "learning_rate": 9.10117979104595e-05, "loss": -0.1635, "step": 14050 }, { "epoch": 8.18393480791618, "grad_norm": 0.2963155210018158, "learning_rate": 9.099602790526624e-05, "loss": -0.1609, "step": 14060 }, { "epoch": 8.189755529685682, "grad_norm": 0.36467695236206055, "learning_rate": 9.098024544656212e-05, "loss": -0.1644, "step": 14070 }, { "epoch": 8.195576251455181, "grad_norm": 0.18309776484966278, "learning_rate": 9.096445053914148e-05, "loss": -0.1592, "step": 14080 }, { "epoch": 8.20139697322468, "grad_norm": 0.1871994137763977, "learning_rate": 9.094864318780236e-05, "loss": -0.1593, "step": 14090 }, { "epoch": 8.20721769499418, "grad_norm": 0.40359652042388916, "learning_rate": 9.093282339734663e-05, "loss": -0.1492, "step": 14100 }, { "epoch": 8.213038416763679, "grad_norm": 0.2636183500289917, "learning_rate": 9.091699117257992e-05, "loss": -0.1644, "step": 14110 }, { "epoch": 8.218859138533178, "grad_norm": 0.22861583530902863, "learning_rate": 9.090114651831163e-05, "loss": -0.1659, "step": 14120 }, { "epoch": 8.224679860302677, "grad_norm": 0.27151140570640564, "learning_rate": 9.088528943935497e-05, "loss": -0.1573, "step": 14130 }, { "epoch": 8.230500582072176, "grad_norm": 0.26123157143592834, "learning_rate": 9.086941994052689e-05, "loss": -0.1645, "step": 14140 }, { "epoch": 8.236321303841676, "grad_norm": 0.21025921404361725, "learning_rate": 9.085353802664813e-05, "loss": -0.1643, "step": 14150 }, { "epoch": 8.242142025611177, "grad_norm": 0.24165518581867218, "learning_rate": 9.08376437025432e-05, "loss": -0.1674, "step": 14160 }, { "epoch": 8.247962747380676, "grad_norm": 0.24304093420505524, "learning_rate": 9.082173697304035e-05, "loss": -0.1633, "step": 14170 }, { "epoch": 8.253783469150175, "grad_norm": 0.1862281709909439, "learning_rate": 9.080581784297166e-05, "loss": -0.1699, "step": 14180 }, { "epoch": 8.259604190919674, "grad_norm": 0.20361782610416412, "learning_rate": 9.078988631717291e-05, "loss": -0.1659, "step": 14190 }, { "epoch": 8.265424912689173, "grad_norm": 0.21054962277412415, "learning_rate": 9.077394240048369e-05, "loss": -0.1664, "step": 14200 }, { "epoch": 8.271245634458673, "grad_norm": 0.23256157338619232, "learning_rate": 9.075798609774736e-05, "loss": -0.1624, "step": 14210 }, { "epoch": 8.277066356228172, "grad_norm": 0.26467663049697876, "learning_rate": 9.0742017413811e-05, "loss": -0.1656, "step": 14220 }, { "epoch": 8.282887077997671, "grad_norm": 0.26641103625297546, "learning_rate": 9.072603635352548e-05, "loss": -0.1647, "step": 14230 }, { "epoch": 8.28870779976717, "grad_norm": 0.27719390392303467, "learning_rate": 9.071004292174541e-05, "loss": -0.1631, "step": 14240 }, { "epoch": 8.294528521536671, "grad_norm": 0.1488545686006546, "learning_rate": 9.06940371233292e-05, "loss": -0.1687, "step": 14250 }, { "epoch": 8.30034924330617, "grad_norm": 0.2042831927537918, "learning_rate": 9.067801896313898e-05, "loss": -0.1649, "step": 14260 }, { "epoch": 8.30616996507567, "grad_norm": 0.2635717988014221, "learning_rate": 9.066198844604064e-05, "loss": -0.1629, "step": 14270 }, { "epoch": 8.311990686845169, "grad_norm": 0.2261115461587906, "learning_rate": 9.06459455769038e-05, "loss": -0.1614, "step": 14280 }, { "epoch": 8.317811408614668, "grad_norm": 0.35501953959465027, "learning_rate": 9.062989036060193e-05, "loss": -0.1556, "step": 14290 }, { "epoch": 8.323632130384167, "grad_norm": 0.2503778040409088, "learning_rate": 9.061382280201212e-05, "loss": -0.1668, "step": 14300 }, { "epoch": 8.329452852153667, "grad_norm": 0.18284691870212555, "learning_rate": 9.059774290601528e-05, "loss": -0.157, "step": 14310 }, { "epoch": 8.335273573923166, "grad_norm": 0.2225686013698578, "learning_rate": 9.058165067749606e-05, "loss": -0.1647, "step": 14320 }, { "epoch": 8.341094295692667, "grad_norm": 0.30924609303474426, "learning_rate": 9.056554612134288e-05, "loss": -0.1623, "step": 14330 }, { "epoch": 8.346915017462166, "grad_norm": 0.2984679937362671, "learning_rate": 9.054942924244785e-05, "loss": -0.1651, "step": 14340 }, { "epoch": 8.352735739231665, "grad_norm": 0.24833226203918457, "learning_rate": 9.053330004570686e-05, "loss": -0.1631, "step": 14350 }, { "epoch": 8.358556461001164, "grad_norm": 0.2666308581829071, "learning_rate": 9.051715853601955e-05, "loss": -0.1631, "step": 14360 }, { "epoch": 8.364377182770664, "grad_norm": 0.19747444987297058, "learning_rate": 9.050100471828926e-05, "loss": -0.1594, "step": 14370 }, { "epoch": 8.370197904540163, "grad_norm": 0.2816561162471771, "learning_rate": 9.048483859742311e-05, "loss": -0.1598, "step": 14380 }, { "epoch": 8.376018626309662, "grad_norm": 0.30501383543014526, "learning_rate": 9.046866017833193e-05, "loss": -0.167, "step": 14390 }, { "epoch": 8.381839348079161, "grad_norm": 0.19543872773647308, "learning_rate": 9.045246946593029e-05, "loss": -0.1636, "step": 14400 }, { "epoch": 8.38766006984866, "grad_norm": 0.35576269030570984, "learning_rate": 9.043626646513652e-05, "loss": -0.1665, "step": 14410 }, { "epoch": 8.39348079161816, "grad_norm": 0.22707520425319672, "learning_rate": 9.042005118087267e-05, "loss": -0.1686, "step": 14420 }, { "epoch": 8.39930151338766, "grad_norm": 0.2056053727865219, "learning_rate": 9.040382361806448e-05, "loss": -0.159, "step": 14430 }, { "epoch": 8.40512223515716, "grad_norm": 0.2546921968460083, "learning_rate": 9.038758378164148e-05, "loss": -0.1566, "step": 14440 }, { "epoch": 8.41094295692666, "grad_norm": 0.14845238626003265, "learning_rate": 9.037133167653691e-05, "loss": -0.1664, "step": 14450 }, { "epoch": 8.416763678696158, "grad_norm": 0.2980784773826599, "learning_rate": 9.035506730768771e-05, "loss": -0.1642, "step": 14460 }, { "epoch": 8.422584400465658, "grad_norm": 0.2663375437259674, "learning_rate": 9.033879068003458e-05, "loss": -0.1659, "step": 14470 }, { "epoch": 8.428405122235157, "grad_norm": 0.21866793930530548, "learning_rate": 9.032250179852193e-05, "loss": -0.1646, "step": 14480 }, { "epoch": 8.434225844004656, "grad_norm": 0.2808361351490021, "learning_rate": 9.030620066809787e-05, "loss": -0.1655, "step": 14490 }, { "epoch": 8.440046565774155, "grad_norm": 0.24698860943317413, "learning_rate": 9.028988729371428e-05, "loss": -0.1665, "step": 14500 }, { "epoch": 8.445867287543656, "grad_norm": 0.23383869230747223, "learning_rate": 9.027356168032673e-05, "loss": -0.1635, "step": 14510 }, { "epoch": 8.451688009313155, "grad_norm": 0.28915390372276306, "learning_rate": 9.02572238328945e-05, "loss": -0.1635, "step": 14520 }, { "epoch": 8.457508731082655, "grad_norm": 0.2741990089416504, "learning_rate": 9.02408737563806e-05, "loss": -0.1642, "step": 14530 }, { "epoch": 8.463329452852154, "grad_norm": 0.18862777948379517, "learning_rate": 9.022451145575174e-05, "loss": -0.1652, "step": 14540 }, { "epoch": 8.469150174621653, "grad_norm": 0.32106176018714905, "learning_rate": 9.02081369359784e-05, "loss": -0.1656, "step": 14550 }, { "epoch": 8.474970896391152, "grad_norm": 0.23990514874458313, "learning_rate": 9.019175020203465e-05, "loss": -0.1681, "step": 14560 }, { "epoch": 8.480791618160652, "grad_norm": 0.2537248730659485, "learning_rate": 9.017535125889842e-05, "loss": -0.1678, "step": 14570 }, { "epoch": 8.48661233993015, "grad_norm": 0.18963927030563354, "learning_rate": 9.015894011155124e-05, "loss": -0.164, "step": 14580 }, { "epoch": 8.49243306169965, "grad_norm": 0.28129246830940247, "learning_rate": 9.014251676497838e-05, "loss": -0.1594, "step": 14590 }, { "epoch": 8.498253783469151, "grad_norm": 0.18377579748630524, "learning_rate": 9.012608122416884e-05, "loss": -0.1636, "step": 14600 }, { "epoch": 8.50407450523865, "grad_norm": 0.1926756203174591, "learning_rate": 9.010963349411529e-05, "loss": -0.1613, "step": 14610 }, { "epoch": 8.50989522700815, "grad_norm": 0.16439324617385864, "learning_rate": 9.00931735798141e-05, "loss": -0.1624, "step": 14620 }, { "epoch": 8.515715948777649, "grad_norm": 0.27869710326194763, "learning_rate": 9.00767014862654e-05, "loss": -0.1667, "step": 14630 }, { "epoch": 8.521536670547148, "grad_norm": 0.23701272904872894, "learning_rate": 9.006021721847295e-05, "loss": -0.1655, "step": 14640 }, { "epoch": 8.527357392316647, "grad_norm": 0.2982766032218933, "learning_rate": 9.004372078144423e-05, "loss": -0.1658, "step": 14650 }, { "epoch": 8.533178114086146, "grad_norm": 0.2235952466726303, "learning_rate": 9.002721218019043e-05, "loss": -0.1639, "step": 14660 }, { "epoch": 8.538998835855645, "grad_norm": 0.20706525444984436, "learning_rate": 9.001069141972642e-05, "loss": -0.1677, "step": 14670 }, { "epoch": 8.544819557625146, "grad_norm": 0.24743136763572693, "learning_rate": 8.99941585050708e-05, "loss": -0.1667, "step": 14680 }, { "epoch": 8.550640279394646, "grad_norm": 0.19147343933582306, "learning_rate": 8.997761344124578e-05, "loss": -0.1659, "step": 14690 }, { "epoch": 8.556461001164145, "grad_norm": 0.2562982141971588, "learning_rate": 8.996105623327737e-05, "loss": -0.1667, "step": 14700 }, { "epoch": 8.562281722933644, "grad_norm": 0.21158954501152039, "learning_rate": 8.994448688619517e-05, "loss": -0.1669, "step": 14710 }, { "epoch": 8.568102444703143, "grad_norm": 0.24282631278038025, "learning_rate": 8.992790540503253e-05, "loss": -0.1655, "step": 14720 }, { "epoch": 8.573923166472643, "grad_norm": 0.2144221067428589, "learning_rate": 8.991131179482648e-05, "loss": -0.1668, "step": 14730 }, { "epoch": 8.579743888242142, "grad_norm": 0.21581779420375824, "learning_rate": 8.989470606061768e-05, "loss": -0.1638, "step": 14740 }, { "epoch": 8.585564610011641, "grad_norm": 0.27363020181655884, "learning_rate": 8.987808820745056e-05, "loss": -0.1632, "step": 14750 }, { "epoch": 8.59138533178114, "grad_norm": 0.2995285391807556, "learning_rate": 8.986145824037315e-05, "loss": -0.167, "step": 14760 }, { "epoch": 8.59720605355064, "grad_norm": 0.19579742848873138, "learning_rate": 8.984481616443721e-05, "loss": -0.1588, "step": 14770 }, { "epoch": 8.60302677532014, "grad_norm": 0.17691384255886078, "learning_rate": 8.982816198469815e-05, "loss": -0.1668, "step": 14780 }, { "epoch": 8.60884749708964, "grad_norm": 0.22987110912799835, "learning_rate": 8.98114957062151e-05, "loss": -0.1633, "step": 14790 }, { "epoch": 8.614668218859139, "grad_norm": 0.28858283162117004, "learning_rate": 8.97948173340508e-05, "loss": -0.1667, "step": 14800 }, { "epoch": 8.620488940628638, "grad_norm": 0.37715327739715576, "learning_rate": 8.977812687327172e-05, "loss": -0.1697, "step": 14810 }, { "epoch": 8.626309662398137, "grad_norm": 0.2100522369146347, "learning_rate": 8.976142432894798e-05, "loss": -0.1644, "step": 14820 }, { "epoch": 8.632130384167636, "grad_norm": 0.16457590460777283, "learning_rate": 8.974470970615336e-05, "loss": -0.1633, "step": 14830 }, { "epoch": 8.637951105937136, "grad_norm": 0.3034711480140686, "learning_rate": 8.972798300996534e-05, "loss": -0.1631, "step": 14840 }, { "epoch": 8.643771827706635, "grad_norm": 0.19434870779514313, "learning_rate": 8.971124424546504e-05, "loss": -0.1668, "step": 14850 }, { "epoch": 8.649592549476136, "grad_norm": 0.1984349936246872, "learning_rate": 8.969449341773724e-05, "loss": -0.1636, "step": 14860 }, { "epoch": 8.655413271245635, "grad_norm": 0.2524275779724121, "learning_rate": 8.967773053187042e-05, "loss": -0.1609, "step": 14870 }, { "epoch": 8.661233993015134, "grad_norm": 0.23002521693706512, "learning_rate": 8.966095559295668e-05, "loss": -0.1667, "step": 14880 }, { "epoch": 8.667054714784634, "grad_norm": 0.22758518159389496, "learning_rate": 8.964416860609184e-05, "loss": -0.1668, "step": 14890 }, { "epoch": 8.672875436554133, "grad_norm": 0.18563856184482574, "learning_rate": 8.962736957637532e-05, "loss": -0.169, "step": 14900 }, { "epoch": 8.678696158323632, "grad_norm": 0.23760005831718445, "learning_rate": 8.96105585089102e-05, "loss": -0.1694, "step": 14910 }, { "epoch": 8.684516880093131, "grad_norm": 0.22134247422218323, "learning_rate": 8.959373540880329e-05, "loss": -0.157, "step": 14920 }, { "epoch": 8.69033760186263, "grad_norm": 0.1552427113056183, "learning_rate": 8.957690028116495e-05, "loss": -0.165, "step": 14930 }, { "epoch": 8.69615832363213, "grad_norm": 0.22052232921123505, "learning_rate": 8.956005313110928e-05, "loss": -0.166, "step": 14940 }, { "epoch": 8.70197904540163, "grad_norm": 0.2325296849012375, "learning_rate": 8.9543193963754e-05, "loss": -0.1627, "step": 14950 }, { "epoch": 8.70779976717113, "grad_norm": 0.22153137624263763, "learning_rate": 8.952632278422048e-05, "loss": -0.1683, "step": 14960 }, { "epoch": 8.713620488940629, "grad_norm": 0.19736407697200775, "learning_rate": 8.95094395976337e-05, "loss": -0.1622, "step": 14970 }, { "epoch": 8.719441210710128, "grad_norm": 0.29964885115623474, "learning_rate": 8.949254440912239e-05, "loss": -0.1645, "step": 14980 }, { "epoch": 8.725261932479627, "grad_norm": 0.17866471409797668, "learning_rate": 8.94756372238188e-05, "loss": -0.1661, "step": 14990 }, { "epoch": 8.731082654249127, "grad_norm": 0.3786064684391022, "learning_rate": 8.945871804685892e-05, "loss": -0.1654, "step": 15000 }, { "epoch": 8.736903376018626, "grad_norm": 0.19610248506069183, "learning_rate": 8.944178688338236e-05, "loss": -0.1684, "step": 15010 }, { "epoch": 8.742724097788125, "grad_norm": 0.25958630442619324, "learning_rate": 8.942484373853233e-05, "loss": -0.1698, "step": 15020 }, { "epoch": 8.748544819557626, "grad_norm": 0.30485549569129944, "learning_rate": 8.940788861745572e-05, "loss": -0.1625, "step": 15030 }, { "epoch": 8.754365541327125, "grad_norm": 0.2630361318588257, "learning_rate": 8.939092152530308e-05, "loss": -0.1652, "step": 15040 }, { "epoch": 8.760186263096625, "grad_norm": 0.3740026652812958, "learning_rate": 8.937394246722853e-05, "loss": -0.1668, "step": 15050 }, { "epoch": 8.766006984866124, "grad_norm": 0.16534829139709473, "learning_rate": 8.935695144838984e-05, "loss": -0.161, "step": 15060 }, { "epoch": 8.771827706635623, "grad_norm": 0.3375758230686188, "learning_rate": 8.933994847394849e-05, "loss": -0.1656, "step": 15070 }, { "epoch": 8.777648428405122, "grad_norm": 0.1981835514307022, "learning_rate": 8.932293354906949e-05, "loss": -0.1689, "step": 15080 }, { "epoch": 8.783469150174621, "grad_norm": 0.21235531568527222, "learning_rate": 8.930590667892153e-05, "loss": -0.1647, "step": 15090 }, { "epoch": 8.78928987194412, "grad_norm": 0.24701261520385742, "learning_rate": 8.928886786867696e-05, "loss": -0.167, "step": 15100 }, { "epoch": 8.79511059371362, "grad_norm": 0.22552922368049622, "learning_rate": 8.927181712351168e-05, "loss": -0.1699, "step": 15110 }, { "epoch": 8.800931315483119, "grad_norm": 0.20846489071846008, "learning_rate": 8.925475444860527e-05, "loss": -0.1666, "step": 15120 }, { "epoch": 8.80675203725262, "grad_norm": 0.27238979935646057, "learning_rate": 8.923767984914092e-05, "loss": -0.1696, "step": 15130 }, { "epoch": 8.81257275902212, "grad_norm": 0.25099727511405945, "learning_rate": 8.922059333030545e-05, "loss": -0.1603, "step": 15140 }, { "epoch": 8.818393480791618, "grad_norm": 0.3129509687423706, "learning_rate": 8.920349489728928e-05, "loss": -0.1661, "step": 15150 }, { "epoch": 8.824214202561118, "grad_norm": 0.2642534375190735, "learning_rate": 8.918638455528646e-05, "loss": -0.1607, "step": 15160 }, { "epoch": 8.830034924330617, "grad_norm": 0.2245689183473587, "learning_rate": 8.916926230949468e-05, "loss": -0.1658, "step": 15170 }, { "epoch": 8.835855646100116, "grad_norm": 0.22597767412662506, "learning_rate": 8.915212816511522e-05, "loss": -0.1658, "step": 15180 }, { "epoch": 8.841676367869615, "grad_norm": 0.20421981811523438, "learning_rate": 8.913498212735296e-05, "loss": -0.1644, "step": 15190 }, { "epoch": 8.847497089639115, "grad_norm": 0.18841031193733215, "learning_rate": 8.911782420141643e-05, "loss": -0.1656, "step": 15200 }, { "epoch": 8.853317811408616, "grad_norm": 0.24606403708457947, "learning_rate": 8.910065439251775e-05, "loss": -0.1707, "step": 15210 }, { "epoch": 8.859138533178115, "grad_norm": 0.2743602395057678, "learning_rate": 8.908347270587268e-05, "loss": -0.1657, "step": 15220 }, { "epoch": 8.864959254947614, "grad_norm": 0.2063777595758438, "learning_rate": 8.906627914670054e-05, "loss": -0.1648, "step": 15230 }, { "epoch": 8.870779976717113, "grad_norm": 0.17924965918064117, "learning_rate": 8.904907372022427e-05, "loss": -0.1668, "step": 15240 }, { "epoch": 8.876600698486612, "grad_norm": 0.25503236055374146, "learning_rate": 8.903185643167042e-05, "loss": -0.1656, "step": 15250 }, { "epoch": 8.882421420256112, "grad_norm": 0.2107582986354828, "learning_rate": 8.901462728626919e-05, "loss": -0.1657, "step": 15260 }, { "epoch": 8.88824214202561, "grad_norm": 0.26941990852355957, "learning_rate": 8.899738628925429e-05, "loss": -0.1634, "step": 15270 }, { "epoch": 8.89406286379511, "grad_norm": 0.3149419128894806, "learning_rate": 8.898013344586312e-05, "loss": -0.1677, "step": 15280 }, { "epoch": 8.89988358556461, "grad_norm": 0.2806655466556549, "learning_rate": 8.896286876133661e-05, "loss": -0.1603, "step": 15290 }, { "epoch": 8.90570430733411, "grad_norm": 0.21358054876327515, "learning_rate": 8.894559224091933e-05, "loss": -0.1671, "step": 15300 }, { "epoch": 8.91152502910361, "grad_norm": 0.27718526124954224, "learning_rate": 8.892830388985942e-05, "loss": -0.1649, "step": 15310 }, { "epoch": 8.917345750873109, "grad_norm": 0.2084423005580902, "learning_rate": 8.891100371340864e-05, "loss": -0.1673, "step": 15320 }, { "epoch": 8.923166472642608, "grad_norm": 0.2136237770318985, "learning_rate": 8.889369171682231e-05, "loss": -0.1641, "step": 15330 }, { "epoch": 8.928987194412107, "grad_norm": 0.26184895634651184, "learning_rate": 8.887636790535936e-05, "loss": -0.1679, "step": 15340 }, { "epoch": 8.934807916181606, "grad_norm": 0.12485957890748978, "learning_rate": 8.885903228428231e-05, "loss": -0.1655, "step": 15350 }, { "epoch": 8.940628637951106, "grad_norm": 0.27203816175460815, "learning_rate": 8.884168485885727e-05, "loss": -0.1676, "step": 15360 }, { "epoch": 8.946449359720605, "grad_norm": 0.19030189514160156, "learning_rate": 8.882432563435393e-05, "loss": -0.1657, "step": 15370 }, { "epoch": 8.952270081490104, "grad_norm": 0.20540980994701385, "learning_rate": 8.880695461604556e-05, "loss": -0.1688, "step": 15380 }, { "epoch": 8.958090803259605, "grad_norm": 0.30234917998313904, "learning_rate": 8.878957180920901e-05, "loss": -0.1685, "step": 15390 }, { "epoch": 8.963911525029104, "grad_norm": 0.376111775636673, "learning_rate": 8.877217721912473e-05, "loss": -0.1651, "step": 15400 }, { "epoch": 8.969732246798603, "grad_norm": 0.20627763867378235, "learning_rate": 8.875477085107673e-05, "loss": -0.1657, "step": 15410 }, { "epoch": 8.975552968568103, "grad_norm": 0.3069206476211548, "learning_rate": 8.87373527103526e-05, "loss": -0.1631, "step": 15420 }, { "epoch": 8.981373690337602, "grad_norm": 0.18132103979587555, "learning_rate": 8.871992280224353e-05, "loss": -0.1681, "step": 15430 }, { "epoch": 8.987194412107101, "grad_norm": 0.218421071767807, "learning_rate": 8.870248113204422e-05, "loss": -0.1668, "step": 15440 }, { "epoch": 8.9930151338766, "grad_norm": 0.18205060064792633, "learning_rate": 8.868502770505306e-05, "loss": -0.1647, "step": 15450 }, { "epoch": 8.9988358556461, "grad_norm": 0.20293915271759033, "learning_rate": 8.86675625265719e-05, "loss": -0.1625, "step": 15460 }, { "epoch": 9.004656577415599, "grad_norm": 0.2435125708580017, "learning_rate": 8.865008560190618e-05, "loss": -0.1695, "step": 15470 }, { "epoch": 9.0104772991851, "grad_norm": 0.1972782164812088, "learning_rate": 8.863259693636496e-05, "loss": -0.1645, "step": 15480 }, { "epoch": 9.016298020954599, "grad_norm": 0.20255674421787262, "learning_rate": 8.861509653526083e-05, "loss": -0.165, "step": 15490 }, { "epoch": 9.022118742724098, "grad_norm": 0.23696884512901306, "learning_rate": 8.859758440390993e-05, "loss": -0.1657, "step": 15500 }, { "epoch": 9.027939464493597, "grad_norm": 0.18869662284851074, "learning_rate": 8.858006054763202e-05, "loss": -0.1665, "step": 15510 }, { "epoch": 9.033760186263097, "grad_norm": 0.23101885616779327, "learning_rate": 8.856252497175035e-05, "loss": -0.1611, "step": 15520 }, { "epoch": 9.039580908032596, "grad_norm": 0.12122911214828491, "learning_rate": 8.854497768159178e-05, "loss": -0.17, "step": 15530 }, { "epoch": 9.045401629802095, "grad_norm": 0.33453798294067383, "learning_rate": 8.852741868248671e-05, "loss": -0.1662, "step": 15540 }, { "epoch": 9.051222351571594, "grad_norm": 0.32955503463745117, "learning_rate": 8.85098479797691e-05, "loss": -0.1627, "step": 15550 }, { "epoch": 9.057043073341095, "grad_norm": 0.24184547364711761, "learning_rate": 8.849226557877646e-05, "loss": -0.1648, "step": 15560 }, { "epoch": 9.062863795110594, "grad_norm": 0.21345071494579315, "learning_rate": 8.84746714848499e-05, "loss": -0.1657, "step": 15570 }, { "epoch": 9.068684516880094, "grad_norm": 0.20814667642116547, "learning_rate": 8.845706570333397e-05, "loss": -0.1705, "step": 15580 }, { "epoch": 9.074505238649593, "grad_norm": 0.20709460973739624, "learning_rate": 8.84394482395769e-05, "loss": -0.1657, "step": 15590 }, { "epoch": 9.080325960419092, "grad_norm": 0.25484955310821533, "learning_rate": 8.842181909893038e-05, "loss": -0.1687, "step": 15600 }, { "epoch": 9.086146682188591, "grad_norm": 0.1371774971485138, "learning_rate": 8.840417828674969e-05, "loss": -0.1643, "step": 15610 }, { "epoch": 9.09196740395809, "grad_norm": 0.22785943746566772, "learning_rate": 8.838652580839364e-05, "loss": -0.165, "step": 15620 }, { "epoch": 9.09778812572759, "grad_norm": 0.30361104011535645, "learning_rate": 8.836886166922458e-05, "loss": -0.1562, "step": 15630 }, { "epoch": 9.103608847497089, "grad_norm": 0.18639947474002838, "learning_rate": 8.835118587460844e-05, "loss": -0.1686, "step": 15640 }, { "epoch": 9.10942956926659, "grad_norm": 0.2906993329524994, "learning_rate": 8.83334984299146e-05, "loss": -0.1611, "step": 15650 }, { "epoch": 9.115250291036089, "grad_norm": 0.2620888650417328, "learning_rate": 8.83157993405161e-05, "loss": -0.1548, "step": 15660 }, { "epoch": 9.121071012805588, "grad_norm": 0.21441559493541718, "learning_rate": 8.829808861178943e-05, "loss": -0.1643, "step": 15670 }, { "epoch": 9.126891734575088, "grad_norm": 0.2074766904115677, "learning_rate": 8.828036624911464e-05, "loss": -0.161, "step": 15680 }, { "epoch": 9.132712456344587, "grad_norm": 0.21331869065761566, "learning_rate": 8.826263225787532e-05, "loss": -0.1666, "step": 15690 }, { "epoch": 9.138533178114086, "grad_norm": 0.24721577763557434, "learning_rate": 8.824488664345858e-05, "loss": -0.1669, "step": 15700 }, { "epoch": 9.144353899883585, "grad_norm": 0.2952183485031128, "learning_rate": 8.822712941125508e-05, "loss": -0.1701, "step": 15710 }, { "epoch": 9.150174621653084, "grad_norm": 0.3291685879230499, "learning_rate": 8.820936056665898e-05, "loss": -0.1649, "step": 15720 }, { "epoch": 9.155995343422584, "grad_norm": 0.2355552762746811, "learning_rate": 8.819158011506801e-05, "loss": -0.1648, "step": 15730 }, { "epoch": 9.161816065192085, "grad_norm": 0.2847606837749481, "learning_rate": 8.81737880618834e-05, "loss": -0.1659, "step": 15740 }, { "epoch": 9.167636786961584, "grad_norm": 0.2570565342903137, "learning_rate": 8.815598441250987e-05, "loss": -0.1632, "step": 15750 }, { "epoch": 9.173457508731083, "grad_norm": 0.25859135389328003, "learning_rate": 8.813816917235576e-05, "loss": -0.1658, "step": 15760 }, { "epoch": 9.179278230500582, "grad_norm": 0.2810298800468445, "learning_rate": 8.812034234683282e-05, "loss": -0.164, "step": 15770 }, { "epoch": 9.185098952270081, "grad_norm": 0.23131205141544342, "learning_rate": 8.810250394135637e-05, "loss": -0.1654, "step": 15780 }, { "epoch": 9.19091967403958, "grad_norm": 0.192098468542099, "learning_rate": 8.808465396134529e-05, "loss": -0.1652, "step": 15790 }, { "epoch": 9.19674039580908, "grad_norm": 0.22277721762657166, "learning_rate": 8.806679241222189e-05, "loss": -0.1691, "step": 15800 }, { "epoch": 9.202561117578579, "grad_norm": 0.26722127199172974, "learning_rate": 8.804891929941203e-05, "loss": -0.1639, "step": 15810 }, { "epoch": 9.208381839348078, "grad_norm": 0.19726455211639404, "learning_rate": 8.803103462834514e-05, "loss": -0.1638, "step": 15820 }, { "epoch": 9.21420256111758, "grad_norm": 0.1991128772497177, "learning_rate": 8.801313840445408e-05, "loss": -0.1663, "step": 15830 }, { "epoch": 9.220023282887079, "grad_norm": 0.21199455857276917, "learning_rate": 8.799523063317524e-05, "loss": -0.1679, "step": 15840 }, { "epoch": 9.225844004656578, "grad_norm": 0.15656958520412445, "learning_rate": 8.797731131994854e-05, "loss": -0.1666, "step": 15850 }, { "epoch": 9.231664726426077, "grad_norm": 0.28040611743927, "learning_rate": 8.795938047021739e-05, "loss": -0.1603, "step": 15860 }, { "epoch": 9.237485448195576, "grad_norm": 0.22053486108779907, "learning_rate": 8.794143808942872e-05, "loss": -0.166, "step": 15870 }, { "epoch": 9.243306169965075, "grad_norm": 0.2027774453163147, "learning_rate": 8.792348418303296e-05, "loss": -0.1686, "step": 15880 }, { "epoch": 9.249126891734575, "grad_norm": 0.17902491986751556, "learning_rate": 8.790551875648398e-05, "loss": -0.1629, "step": 15890 }, { "epoch": 9.254947613504074, "grad_norm": 0.2024834305047989, "learning_rate": 8.788754181523926e-05, "loss": -0.1614, "step": 15900 }, { "epoch": 9.260768335273575, "grad_norm": 0.27250441908836365, "learning_rate": 8.78695533647597e-05, "loss": -0.1626, "step": 15910 }, { "epoch": 9.266589057043074, "grad_norm": 0.27964362502098083, "learning_rate": 8.785155341050972e-05, "loss": -0.1587, "step": 15920 }, { "epoch": 9.272409778812573, "grad_norm": 0.2848932445049286, "learning_rate": 8.783354195795721e-05, "loss": -0.1612, "step": 15930 }, { "epoch": 9.278230500582072, "grad_norm": 0.25866249203681946, "learning_rate": 8.78155190125736e-05, "loss": -0.1634, "step": 15940 }, { "epoch": 9.284051222351572, "grad_norm": 0.23689241707324982, "learning_rate": 8.779748457983378e-05, "loss": -0.1603, "step": 15950 }, { "epoch": 9.28987194412107, "grad_norm": 0.30350130796432495, "learning_rate": 8.777943866521612e-05, "loss": -0.1591, "step": 15960 }, { "epoch": 9.29569266589057, "grad_norm": 0.21967527270317078, "learning_rate": 8.77613812742025e-05, "loss": -0.1669, "step": 15970 }, { "epoch": 9.30151338766007, "grad_norm": 0.2645910084247589, "learning_rate": 8.774331241227829e-05, "loss": -0.1599, "step": 15980 }, { "epoch": 9.307334109429569, "grad_norm": 0.257649302482605, "learning_rate": 8.772523208493232e-05, "loss": -0.1672, "step": 15990 }, { "epoch": 9.31315483119907, "grad_norm": 0.16504307091236115, "learning_rate": 8.770714029765692e-05, "loss": -0.1644, "step": 16000 }, { "epoch": 9.318975552968569, "grad_norm": 0.26129502058029175, "learning_rate": 8.768903705594789e-05, "loss": -0.1634, "step": 16010 }, { "epoch": 9.324796274738068, "grad_norm": 0.25308313965797424, "learning_rate": 8.767092236530453e-05, "loss": -0.165, "step": 16020 }, { "epoch": 9.330616996507567, "grad_norm": 0.26346999406814575, "learning_rate": 8.76527962312296e-05, "loss": -0.1623, "step": 16030 }, { "epoch": 9.336437718277066, "grad_norm": 0.19960075616836548, "learning_rate": 8.763465865922934e-05, "loss": -0.1683, "step": 16040 }, { "epoch": 9.342258440046566, "grad_norm": 0.2647537291049957, "learning_rate": 8.761650965481347e-05, "loss": -0.1642, "step": 16050 }, { "epoch": 9.348079161816065, "grad_norm": 0.12618529796600342, "learning_rate": 8.759834922349516e-05, "loss": -0.1656, "step": 16060 }, { "epoch": 9.353899883585564, "grad_norm": 0.25632745027542114, "learning_rate": 8.758017737079108e-05, "loss": -0.1663, "step": 16070 }, { "epoch": 9.359720605355063, "grad_norm": 0.16681529581546783, "learning_rate": 8.756199410222137e-05, "loss": -0.1721, "step": 16080 }, { "epoch": 9.365541327124564, "grad_norm": 0.19103074073791504, "learning_rate": 8.754379942330963e-05, "loss": -0.1676, "step": 16090 }, { "epoch": 9.371362048894063, "grad_norm": 0.20265696942806244, "learning_rate": 8.75255933395829e-05, "loss": -0.1688, "step": 16100 }, { "epoch": 9.377182770663563, "grad_norm": 0.25347498059272766, "learning_rate": 8.750737585657171e-05, "loss": -0.1658, "step": 16110 }, { "epoch": 9.383003492433062, "grad_norm": 0.22702272236347198, "learning_rate": 8.748914697981008e-05, "loss": -0.1665, "step": 16120 }, { "epoch": 9.388824214202561, "grad_norm": 0.17253082990646362, "learning_rate": 8.747090671483542e-05, "loss": -0.163, "step": 16130 }, { "epoch": 9.39464493597206, "grad_norm": 0.17048227787017822, "learning_rate": 8.745265506718869e-05, "loss": -0.172, "step": 16140 }, { "epoch": 9.40046565774156, "grad_norm": 0.12013593316078186, "learning_rate": 8.74343920424142e-05, "loss": -0.1682, "step": 16150 }, { "epoch": 9.406286379511059, "grad_norm": 0.26527589559555054, "learning_rate": 8.741611764605982e-05, "loss": -0.1662, "step": 16160 }, { "epoch": 9.412107101280558, "grad_norm": 0.1896485537290573, "learning_rate": 8.739783188367682e-05, "loss": -0.1678, "step": 16170 }, { "epoch": 9.417927823050059, "grad_norm": 0.38984304666519165, "learning_rate": 8.737953476081991e-05, "loss": -0.1695, "step": 16180 }, { "epoch": 9.423748544819558, "grad_norm": 0.24559526145458221, "learning_rate": 8.73612262830473e-05, "loss": -0.1674, "step": 16190 }, { "epoch": 9.429569266589057, "grad_norm": 0.2838663160800934, "learning_rate": 8.734290645592061e-05, "loss": -0.1703, "step": 16200 }, { "epoch": 9.435389988358557, "grad_norm": 0.20907852053642273, "learning_rate": 8.732457528500493e-05, "loss": -0.1678, "step": 16210 }, { "epoch": 9.441210710128056, "grad_norm": 0.3074318468570709, "learning_rate": 8.730623277586875e-05, "loss": -0.1661, "step": 16220 }, { "epoch": 9.447031431897555, "grad_norm": 0.1864089071750641, "learning_rate": 8.72878789340841e-05, "loss": -0.1668, "step": 16230 }, { "epoch": 9.452852153667054, "grad_norm": 0.19197824597358704, "learning_rate": 8.726951376522635e-05, "loss": -0.164, "step": 16240 }, { "epoch": 9.458672875436553, "grad_norm": 0.3732004165649414, "learning_rate": 8.725113727487435e-05, "loss": -0.1677, "step": 16250 }, { "epoch": 9.464493597206054, "grad_norm": 0.16302506625652313, "learning_rate": 8.723274946861042e-05, "loss": -0.1704, "step": 16260 }, { "epoch": 9.470314318975554, "grad_norm": 0.286629319190979, "learning_rate": 8.721435035202026e-05, "loss": -0.1693, "step": 16270 }, { "epoch": 9.476135040745053, "grad_norm": 0.21908894181251526, "learning_rate": 8.719593993069306e-05, "loss": -0.1563, "step": 16280 }, { "epoch": 9.481955762514552, "grad_norm": 0.2841387391090393, "learning_rate": 8.717751821022139e-05, "loss": -0.1646, "step": 16290 }, { "epoch": 9.487776484284051, "grad_norm": 0.24221868813037872, "learning_rate": 8.715908519620134e-05, "loss": -0.1662, "step": 16300 }, { "epoch": 9.49359720605355, "grad_norm": 0.214243546128273, "learning_rate": 8.71406408942323e-05, "loss": -0.1652, "step": 16310 }, { "epoch": 9.49941792782305, "grad_norm": 0.1878916472196579, "learning_rate": 8.712218530991723e-05, "loss": -0.1682, "step": 16320 }, { "epoch": 9.505238649592549, "grad_norm": 0.27211660146713257, "learning_rate": 8.710371844886241e-05, "loss": -0.1684, "step": 16330 }, { "epoch": 9.511059371362048, "grad_norm": 0.33663731813430786, "learning_rate": 8.708524031667758e-05, "loss": -0.1651, "step": 16340 }, { "epoch": 9.516880093131547, "grad_norm": 0.2265361249446869, "learning_rate": 8.706675091897592e-05, "loss": -0.1637, "step": 16350 }, { "epoch": 9.522700814901048, "grad_norm": 0.16085068881511688, "learning_rate": 8.704825026137404e-05, "loss": -0.1644, "step": 16360 }, { "epoch": 9.528521536670548, "grad_norm": 0.2614370286464691, "learning_rate": 8.702973834949192e-05, "loss": -0.1663, "step": 16370 }, { "epoch": 9.534342258440047, "grad_norm": 0.2616744339466095, "learning_rate": 8.701121518895301e-05, "loss": -0.1679, "step": 16380 }, { "epoch": 9.540162980209546, "grad_norm": 0.291604608297348, "learning_rate": 8.699268078538414e-05, "loss": -0.1643, "step": 16390 }, { "epoch": 9.545983701979045, "grad_norm": 0.2093329131603241, "learning_rate": 8.69741351444156e-05, "loss": -0.1716, "step": 16400 }, { "epoch": 9.551804423748544, "grad_norm": 0.26900532841682434, "learning_rate": 8.695557827168101e-05, "loss": -0.1684, "step": 16410 }, { "epoch": 9.557625145518044, "grad_norm": 0.21500176191329956, "learning_rate": 8.693701017281753e-05, "loss": -0.1641, "step": 16420 }, { "epoch": 9.563445867287543, "grad_norm": 0.3018999993801117, "learning_rate": 8.691843085346563e-05, "loss": -0.1594, "step": 16430 }, { "epoch": 9.569266589057044, "grad_norm": 0.2467811107635498, "learning_rate": 8.689984031926919e-05, "loss": -0.1696, "step": 16440 }, { "epoch": 9.575087310826543, "grad_norm": 0.2710178792476654, "learning_rate": 8.688123857587555e-05, "loss": -0.1654, "step": 16450 }, { "epoch": 9.580908032596042, "grad_norm": 0.3939068615436554, "learning_rate": 8.686262562893544e-05, "loss": -0.165, "step": 16460 }, { "epoch": 9.586728754365542, "grad_norm": 0.3141782879829407, "learning_rate": 8.684400148410294e-05, "loss": -0.169, "step": 16470 }, { "epoch": 9.59254947613504, "grad_norm": 0.15799161791801453, "learning_rate": 8.682536614703562e-05, "loss": -0.1676, "step": 16480 }, { "epoch": 9.59837019790454, "grad_norm": 0.3481397032737732, "learning_rate": 8.680671962339437e-05, "loss": -0.1677, "step": 16490 }, { "epoch": 9.60419091967404, "grad_norm": 0.19428113102912903, "learning_rate": 8.678806191884352e-05, "loss": -0.1673, "step": 16500 }, { "epoch": 9.610011641443538, "grad_norm": 0.2428073137998581, "learning_rate": 8.67693930390508e-05, "loss": -0.1677, "step": 16510 }, { "epoch": 9.615832363213038, "grad_norm": 0.32973915338516235, "learning_rate": 8.67507129896873e-05, "loss": -0.166, "step": 16520 }, { "epoch": 9.621653084982539, "grad_norm": 0.27015218138694763, "learning_rate": 8.673202177642757e-05, "loss": -0.162, "step": 16530 }, { "epoch": 9.627473806752038, "grad_norm": 0.14152099192142487, "learning_rate": 8.671331940494945e-05, "loss": -0.1597, "step": 16540 }, { "epoch": 9.633294528521537, "grad_norm": 0.25966429710388184, "learning_rate": 8.669460588093427e-05, "loss": -0.1645, "step": 16550 }, { "epoch": 9.639115250291036, "grad_norm": 0.3047940731048584, "learning_rate": 8.667588121006667e-05, "loss": -0.1695, "step": 16560 }, { "epoch": 9.644935972060535, "grad_norm": 0.27526354789733887, "learning_rate": 8.665714539803475e-05, "loss": -0.1613, "step": 16570 }, { "epoch": 9.650756693830035, "grad_norm": 0.2085220217704773, "learning_rate": 8.663839845052993e-05, "loss": -0.1702, "step": 16580 }, { "epoch": 9.656577415599534, "grad_norm": 0.27921921014785767, "learning_rate": 8.661964037324703e-05, "loss": -0.1655, "step": 16590 }, { "epoch": 9.662398137369033, "grad_norm": 0.16873124241828918, "learning_rate": 8.660087117188427e-05, "loss": -0.1729, "step": 16600 }, { "epoch": 9.668218859138534, "grad_norm": 0.2387617826461792, "learning_rate": 8.658209085214325e-05, "loss": -0.1643, "step": 16610 }, { "epoch": 9.674039580908033, "grad_norm": 0.25191816687583923, "learning_rate": 8.656329941972891e-05, "loss": -0.1588, "step": 16620 }, { "epoch": 9.679860302677533, "grad_norm": 0.18587568402290344, "learning_rate": 8.654449688034963e-05, "loss": -0.1644, "step": 16630 }, { "epoch": 9.685681024447032, "grad_norm": 0.2798614799976349, "learning_rate": 8.652568323971706e-05, "loss": -0.1639, "step": 16640 }, { "epoch": 9.691501746216531, "grad_norm": 0.3153642416000366, "learning_rate": 8.650685850354636e-05, "loss": -0.1666, "step": 16650 }, { "epoch": 9.69732246798603, "grad_norm": 0.20884688198566437, "learning_rate": 8.648802267755593e-05, "loss": -0.1664, "step": 16660 }, { "epoch": 9.70314318975553, "grad_norm": 0.19041024148464203, "learning_rate": 8.646917576746764e-05, "loss": -0.1666, "step": 16670 }, { "epoch": 9.708963911525029, "grad_norm": 0.2767574191093445, "learning_rate": 8.645031777900666e-05, "loss": -0.1694, "step": 16680 }, { "epoch": 9.714784633294528, "grad_norm": 0.2807285785675049, "learning_rate": 8.643144871790154e-05, "loss": -0.1624, "step": 16690 }, { "epoch": 9.720605355064027, "grad_norm": 0.17106680572032928, "learning_rate": 8.641256858988424e-05, "loss": -0.1675, "step": 16700 }, { "epoch": 9.726426076833528, "grad_norm": 0.25357750058174133, "learning_rate": 8.639367740069e-05, "loss": -0.1701, "step": 16710 }, { "epoch": 9.732246798603027, "grad_norm": 0.26342469453811646, "learning_rate": 8.63747751560575e-05, "loss": -0.1691, "step": 16720 }, { "epoch": 9.738067520372526, "grad_norm": 0.20358677208423615, "learning_rate": 8.635586186172871e-05, "loss": -0.1707, "step": 16730 }, { "epoch": 9.743888242142026, "grad_norm": 0.23705540597438812, "learning_rate": 8.633693752344902e-05, "loss": -0.1718, "step": 16740 }, { "epoch": 9.749708963911525, "grad_norm": 0.2475881576538086, "learning_rate": 8.631800214696713e-05, "loss": -0.1688, "step": 16750 }, { "epoch": 9.755529685681024, "grad_norm": 0.1397639960050583, "learning_rate": 8.629905573803511e-05, "loss": -0.168, "step": 16760 }, { "epoch": 9.761350407450523, "grad_norm": 0.1732625961303711, "learning_rate": 8.628009830240839e-05, "loss": -0.1677, "step": 16770 }, { "epoch": 9.767171129220023, "grad_norm": 0.15530139207839966, "learning_rate": 8.626112984584571e-05, "loss": -0.1673, "step": 16780 }, { "epoch": 9.772991850989523, "grad_norm": 0.15548935532569885, "learning_rate": 8.62421503741092e-05, "loss": -0.1702, "step": 16790 }, { "epoch": 9.778812572759023, "grad_norm": 0.18450340628623962, "learning_rate": 8.622315989296432e-05, "loss": -0.1742, "step": 16800 }, { "epoch": 9.784633294528522, "grad_norm": 0.2007347047328949, "learning_rate": 8.62041584081799e-05, "loss": -0.1696, "step": 16810 }, { "epoch": 9.790454016298021, "grad_norm": 0.20290586352348328, "learning_rate": 8.618514592552807e-05, "loss": -0.1696, "step": 16820 }, { "epoch": 9.79627473806752, "grad_norm": 0.13410967588424683, "learning_rate": 8.616612245078431e-05, "loss": -0.1705, "step": 16830 }, { "epoch": 9.80209545983702, "grad_norm": 0.2741197943687439, "learning_rate": 8.614708798972746e-05, "loss": -0.1672, "step": 16840 }, { "epoch": 9.807916181606519, "grad_norm": 0.16539794206619263, "learning_rate": 8.61280425481397e-05, "loss": -0.1714, "step": 16850 }, { "epoch": 9.813736903376018, "grad_norm": 0.29271602630615234, "learning_rate": 8.61089861318065e-05, "loss": -0.165, "step": 16860 }, { "epoch": 9.819557625145517, "grad_norm": 0.15705186128616333, "learning_rate": 8.608991874651673e-05, "loss": -0.1729, "step": 16870 }, { "epoch": 9.825378346915018, "grad_norm": 0.1814744919538498, "learning_rate": 8.607084039806255e-05, "loss": -0.1691, "step": 16880 }, { "epoch": 9.831199068684517, "grad_norm": 0.22647075355052948, "learning_rate": 8.605175109223944e-05, "loss": -0.1707, "step": 16890 }, { "epoch": 9.837019790454017, "grad_norm": 0.24212463200092316, "learning_rate": 8.603265083484624e-05, "loss": -0.169, "step": 16900 }, { "epoch": 9.842840512223516, "grad_norm": 0.1674526333808899, "learning_rate": 8.60135396316851e-05, "loss": -0.1705, "step": 16910 }, { "epoch": 9.848661233993015, "grad_norm": 0.17378757894039154, "learning_rate": 8.599441748856152e-05, "loss": -0.1723, "step": 16920 }, { "epoch": 9.854481955762514, "grad_norm": 0.2548559904098511, "learning_rate": 8.597528441128427e-05, "loss": -0.1691, "step": 16930 }, { "epoch": 9.860302677532014, "grad_norm": 0.20306070148944855, "learning_rate": 8.595614040566549e-05, "loss": -0.1672, "step": 16940 }, { "epoch": 9.866123399301513, "grad_norm": 0.24669016897678375, "learning_rate": 8.593698547752063e-05, "loss": -0.1683, "step": 16950 }, { "epoch": 9.871944121071014, "grad_norm": 0.24467410147190094, "learning_rate": 8.591781963266843e-05, "loss": -0.1555, "step": 16960 }, { "epoch": 9.877764842840513, "grad_norm": 0.2976756989955902, "learning_rate": 8.5898642876931e-05, "loss": -0.1687, "step": 16970 }, { "epoch": 9.883585564610012, "grad_norm": 0.21104194223880768, "learning_rate": 8.587945521613369e-05, "loss": -0.1594, "step": 16980 }, { "epoch": 9.889406286379511, "grad_norm": 0.24729566276073456, "learning_rate": 8.586025665610524e-05, "loss": -0.1714, "step": 16990 }, { "epoch": 9.89522700814901, "grad_norm": 0.24295111000537872, "learning_rate": 8.584104720267765e-05, "loss": -0.1673, "step": 17000 }, { "epoch": 9.90104772991851, "grad_norm": 0.1458570957183838, "learning_rate": 8.582182686168625e-05, "loss": -0.1679, "step": 17010 }, { "epoch": 9.906868451688009, "grad_norm": 0.26635876297950745, "learning_rate": 8.580259563896967e-05, "loss": -0.1664, "step": 17020 }, { "epoch": 9.912689173457508, "grad_norm": 0.273337721824646, "learning_rate": 8.578335354036983e-05, "loss": -0.1675, "step": 17030 }, { "epoch": 9.918509895227007, "grad_norm": 0.20252466201782227, "learning_rate": 8.576410057173201e-05, "loss": -0.1666, "step": 17040 }, { "epoch": 9.924330616996507, "grad_norm": 0.2774480879306793, "learning_rate": 8.574483673890474e-05, "loss": -0.168, "step": 17050 }, { "epoch": 9.930151338766008, "grad_norm": 0.24067945778369904, "learning_rate": 8.572556204773983e-05, "loss": -0.1697, "step": 17060 }, { "epoch": 9.935972060535507, "grad_norm": 0.22593192756175995, "learning_rate": 8.570627650409246e-05, "loss": -0.1684, "step": 17070 }, { "epoch": 9.941792782305006, "grad_norm": 0.15588951110839844, "learning_rate": 8.568698011382107e-05, "loss": -0.1703, "step": 17080 }, { "epoch": 9.947613504074505, "grad_norm": 0.27614548802375793, "learning_rate": 8.566767288278738e-05, "loss": -0.1624, "step": 17090 }, { "epoch": 9.953434225844005, "grad_norm": 0.17979545891284943, "learning_rate": 8.56483548168564e-05, "loss": -0.1682, "step": 17100 }, { "epoch": 9.959254947613504, "grad_norm": 0.2594098448753357, "learning_rate": 8.562902592189648e-05, "loss": -0.1683, "step": 17110 }, { "epoch": 9.965075669383003, "grad_norm": 0.1363094598054886, "learning_rate": 8.560968620377921e-05, "loss": -0.1707, "step": 17120 }, { "epoch": 9.970896391152502, "grad_norm": 0.16968323290348053, "learning_rate": 8.559033566837951e-05, "loss": -0.1654, "step": 17130 }, { "epoch": 9.976717112922003, "grad_norm": 0.20715029537677765, "learning_rate": 8.557097432157551e-05, "loss": -0.1661, "step": 17140 }, { "epoch": 9.982537834691502, "grad_norm": 0.16091488301753998, "learning_rate": 8.555160216924872e-05, "loss": -0.171, "step": 17150 }, { "epoch": 9.988358556461002, "grad_norm": 0.26855745911598206, "learning_rate": 8.55322192172839e-05, "loss": -0.1701, "step": 17160 }, { "epoch": 9.9941792782305, "grad_norm": 0.12628397345542908, "learning_rate": 8.551282547156902e-05, "loss": -0.1719, "step": 17170 }, { "epoch": 10.0, "grad_norm": 0.25493404269218445, "learning_rate": 8.549342093799544e-05, "loss": -0.1666, "step": 17180 }, { "epoch": 10.0058207217695, "grad_norm": 0.16605761647224426, "learning_rate": 8.547400562245773e-05, "loss": -0.1662, "step": 17190 }, { "epoch": 10.011641443538998, "grad_norm": 0.2047155648469925, "learning_rate": 8.545457953085374e-05, "loss": -0.1666, "step": 17200 }, { "epoch": 10.017462165308498, "grad_norm": 0.15024420619010925, "learning_rate": 8.543514266908463e-05, "loss": -0.1683, "step": 17210 }, { "epoch": 10.023282887077997, "grad_norm": 0.19510561227798462, "learning_rate": 8.541569504305478e-05, "loss": -0.1661, "step": 17220 }, { "epoch": 10.029103608847498, "grad_norm": 0.1447337120771408, "learning_rate": 8.539623665867187e-05, "loss": -0.1668, "step": 17230 }, { "epoch": 10.034924330616997, "grad_norm": 0.22807267308235168, "learning_rate": 8.537676752184685e-05, "loss": -0.1635, "step": 17240 }, { "epoch": 10.040745052386496, "grad_norm": 0.25671419501304626, "learning_rate": 8.53572876384939e-05, "loss": -0.1671, "step": 17250 }, { "epoch": 10.046565774155995, "grad_norm": 0.22477535903453827, "learning_rate": 8.533779701453056e-05, "loss": -0.1694, "step": 17260 }, { "epoch": 10.052386495925495, "grad_norm": 0.23700359463691711, "learning_rate": 8.53182956558775e-05, "loss": -0.1679, "step": 17270 }, { "epoch": 10.058207217694994, "grad_norm": 0.160465270280838, "learning_rate": 8.529878356845877e-05, "loss": -0.1707, "step": 17280 }, { "epoch": 10.064027939464493, "grad_norm": 0.24114516377449036, "learning_rate": 8.527926075820158e-05, "loss": -0.1664, "step": 17290 }, { "epoch": 10.069848661233992, "grad_norm": 0.26750171184539795, "learning_rate": 8.525972723103648e-05, "loss": -0.1683, "step": 17300 }, { "epoch": 10.075669383003492, "grad_norm": 0.19361314177513123, "learning_rate": 8.524018299289722e-05, "loss": -0.1691, "step": 17310 }, { "epoch": 10.081490104772993, "grad_norm": 0.32490959763526917, "learning_rate": 8.522062804972083e-05, "loss": -0.1677, "step": 17320 }, { "epoch": 10.087310826542492, "grad_norm": 0.2041608989238739, "learning_rate": 8.520106240744759e-05, "loss": -0.1681, "step": 17330 }, { "epoch": 10.093131548311991, "grad_norm": 0.22964006662368774, "learning_rate": 8.518148607202102e-05, "loss": -0.165, "step": 17340 }, { "epoch": 10.09895227008149, "grad_norm": 0.22103677690029144, "learning_rate": 8.51618990493879e-05, "loss": -0.1659, "step": 17350 }, { "epoch": 10.10477299185099, "grad_norm": 0.22885356843471527, "learning_rate": 8.514230134549823e-05, "loss": -0.1621, "step": 17360 }, { "epoch": 10.110593713620489, "grad_norm": 0.2451721876859665, "learning_rate": 8.51226929663053e-05, "loss": -0.1687, "step": 17370 }, { "epoch": 10.116414435389988, "grad_norm": 0.3274253308773041, "learning_rate": 8.51030739177656e-05, "loss": -0.1673, "step": 17380 }, { "epoch": 10.122235157159487, "grad_norm": 0.1665295660495758, "learning_rate": 8.508344420583889e-05, "loss": -0.1656, "step": 17390 }, { "epoch": 10.128055878928988, "grad_norm": 0.24531055986881256, "learning_rate": 8.506380383648816e-05, "loss": -0.1689, "step": 17400 }, { "epoch": 10.133876600698487, "grad_norm": 0.21890881657600403, "learning_rate": 8.504415281567963e-05, "loss": -0.1705, "step": 17410 }, { "epoch": 10.139697322467986, "grad_norm": 0.2730824649333954, "learning_rate": 8.502449114938275e-05, "loss": -0.1715, "step": 17420 }, { "epoch": 10.145518044237486, "grad_norm": 0.2822173237800598, "learning_rate": 8.500481884357025e-05, "loss": -0.1673, "step": 17430 }, { "epoch": 10.151338766006985, "grad_norm": 0.25890418887138367, "learning_rate": 8.498513590421801e-05, "loss": -0.169, "step": 17440 }, { "epoch": 10.157159487776484, "grad_norm": 0.2392275482416153, "learning_rate": 8.496544233730522e-05, "loss": -0.1655, "step": 17450 }, { "epoch": 10.162980209545983, "grad_norm": 0.18674920499324799, "learning_rate": 8.494573814881426e-05, "loss": -0.1702, "step": 17460 }, { "epoch": 10.168800931315483, "grad_norm": 0.19182927906513214, "learning_rate": 8.492602334473074e-05, "loss": -0.1708, "step": 17470 }, { "epoch": 10.174621653084982, "grad_norm": 0.33115771412849426, "learning_rate": 8.49062979310435e-05, "loss": -0.163, "step": 17480 }, { "epoch": 10.180442374854483, "grad_norm": 0.16433683037757874, "learning_rate": 8.488656191374458e-05, "loss": -0.1645, "step": 17490 }, { "epoch": 10.186263096623982, "grad_norm": 0.27004483342170715, "learning_rate": 8.48668152988293e-05, "loss": -0.1708, "step": 17500 }, { "epoch": 10.192083818393481, "grad_norm": 0.19518671929836273, "learning_rate": 8.484705809229612e-05, "loss": -0.169, "step": 17510 }, { "epoch": 10.19790454016298, "grad_norm": 0.22596831619739532, "learning_rate": 8.482729030014677e-05, "loss": -0.1635, "step": 17520 }, { "epoch": 10.20372526193248, "grad_norm": 0.25491878390312195, "learning_rate": 8.48075119283862e-05, "loss": -0.1681, "step": 17530 }, { "epoch": 10.209545983701979, "grad_norm": 0.2570585608482361, "learning_rate": 8.478772298302254e-05, "loss": -0.1673, "step": 17540 }, { "epoch": 10.215366705471478, "grad_norm": 0.28403908014297485, "learning_rate": 8.476792347006716e-05, "loss": -0.1618, "step": 17550 }, { "epoch": 10.221187427240977, "grad_norm": 0.31573930382728577, "learning_rate": 8.474811339553462e-05, "loss": -0.1676, "step": 17560 }, { "epoch": 10.227008149010477, "grad_norm": 0.2495722770690918, "learning_rate": 8.47282927654427e-05, "loss": -0.1594, "step": 17570 }, { "epoch": 10.232828870779977, "grad_norm": 0.22850154340267181, "learning_rate": 8.470846158581238e-05, "loss": -0.1711, "step": 17580 }, { "epoch": 10.238649592549477, "grad_norm": 0.17709393799304962, "learning_rate": 8.468861986266787e-05, "loss": -0.1691, "step": 17590 }, { "epoch": 10.244470314318976, "grad_norm": 0.186215341091156, "learning_rate": 8.466876760203654e-05, "loss": -0.1699, "step": 17600 }, { "epoch": 10.250291036088475, "grad_norm": 0.1840776950120926, "learning_rate": 8.464890480994898e-05, "loss": -0.1635, "step": 17610 }, { "epoch": 10.256111757857974, "grad_norm": 0.14089691638946533, "learning_rate": 8.462903149243899e-05, "loss": -0.1698, "step": 17620 }, { "epoch": 10.261932479627474, "grad_norm": 0.17150476574897766, "learning_rate": 8.460914765554357e-05, "loss": -0.1723, "step": 17630 }, { "epoch": 10.267753201396973, "grad_norm": 0.19723521173000336, "learning_rate": 8.458925330530288e-05, "loss": -0.1582, "step": 17640 }, { "epoch": 10.273573923166472, "grad_norm": 0.23644231259822845, "learning_rate": 8.456934844776032e-05, "loss": -0.1691, "step": 17650 }, { "epoch": 10.279394644935971, "grad_norm": 0.18569229543209076, "learning_rate": 8.454943308896246e-05, "loss": -0.1671, "step": 17660 }, { "epoch": 10.285215366705472, "grad_norm": 0.15280331671237946, "learning_rate": 8.452950723495905e-05, "loss": -0.1712, "step": 17670 }, { "epoch": 10.291036088474971, "grad_norm": 0.1613529473543167, "learning_rate": 8.450957089180303e-05, "loss": -0.1693, "step": 17680 }, { "epoch": 10.29685681024447, "grad_norm": 0.2912985682487488, "learning_rate": 8.448962406555055e-05, "loss": -0.1664, "step": 17690 }, { "epoch": 10.30267753201397, "grad_norm": 0.2251737415790558, "learning_rate": 8.446966676226093e-05, "loss": -0.1629, "step": 17700 }, { "epoch": 10.308498253783469, "grad_norm": 0.21887582540512085, "learning_rate": 8.444969898799667e-05, "loss": -0.1674, "step": 17710 }, { "epoch": 10.314318975552968, "grad_norm": 0.298465371131897, "learning_rate": 8.442972074882343e-05, "loss": -0.1692, "step": 17720 }, { "epoch": 10.320139697322467, "grad_norm": 0.21477146446704865, "learning_rate": 8.44097320508101e-05, "loss": -0.1685, "step": 17730 }, { "epoch": 10.325960419091967, "grad_norm": 0.2353433519601822, "learning_rate": 8.43897329000287e-05, "loss": -0.172, "step": 17740 }, { "epoch": 10.331781140861466, "grad_norm": 0.23485825955867767, "learning_rate": 8.436972330255448e-05, "loss": -0.1671, "step": 17750 }, { "epoch": 10.337601862630967, "grad_norm": 0.19708697497844696, "learning_rate": 8.434970326446579e-05, "loss": -0.1653, "step": 17760 }, { "epoch": 10.343422584400466, "grad_norm": 0.16549259424209595, "learning_rate": 8.432967279184418e-05, "loss": -0.1705, "step": 17770 }, { "epoch": 10.349243306169965, "grad_norm": 0.21828265488147736, "learning_rate": 8.430963189077441e-05, "loss": -0.1699, "step": 17780 }, { "epoch": 10.355064027939465, "grad_norm": 0.24500873684883118, "learning_rate": 8.428958056734437e-05, "loss": -0.1704, "step": 17790 }, { "epoch": 10.360884749708964, "grad_norm": 0.2833419442176819, "learning_rate": 8.426951882764513e-05, "loss": -0.1601, "step": 17800 }, { "epoch": 10.366705471478463, "grad_norm": 0.13559921085834503, "learning_rate": 8.424944667777089e-05, "loss": -0.1722, "step": 17810 }, { "epoch": 10.372526193247962, "grad_norm": 0.23035408556461334, "learning_rate": 8.422936412381905e-05, "loss": -0.1683, "step": 17820 }, { "epoch": 10.378346915017461, "grad_norm": 0.26507917046546936, "learning_rate": 8.420927117189017e-05, "loss": -0.1684, "step": 17830 }, { "epoch": 10.384167636786962, "grad_norm": 0.19036874175071716, "learning_rate": 8.418916782808795e-05, "loss": -0.167, "step": 17840 }, { "epoch": 10.389988358556462, "grad_norm": 0.1706628054380417, "learning_rate": 8.416905409851926e-05, "loss": -0.1683, "step": 17850 }, { "epoch": 10.39580908032596, "grad_norm": 0.16272006928920746, "learning_rate": 8.41489299892941e-05, "loss": -0.1714, "step": 17860 }, { "epoch": 10.40162980209546, "grad_norm": 0.16875997185707092, "learning_rate": 8.412879550652566e-05, "loss": -0.1701, "step": 17870 }, { "epoch": 10.40745052386496, "grad_norm": 0.3124374449253082, "learning_rate": 8.410865065633029e-05, "loss": -0.1694, "step": 17880 }, { "epoch": 10.413271245634458, "grad_norm": 0.27757349610328674, "learning_rate": 8.408849544482742e-05, "loss": -0.1664, "step": 17890 }, { "epoch": 10.419091967403958, "grad_norm": 0.32878363132476807, "learning_rate": 8.406832987813968e-05, "loss": -0.1667, "step": 17900 }, { "epoch": 10.424912689173457, "grad_norm": 0.2966049611568451, "learning_rate": 8.404815396239286e-05, "loss": -0.1665, "step": 17910 }, { "epoch": 10.430733410942956, "grad_norm": 0.21782036125659943, "learning_rate": 8.402796770371587e-05, "loss": -0.1654, "step": 17920 }, { "epoch": 10.436554132712457, "grad_norm": 0.23957964777946472, "learning_rate": 8.400777110824071e-05, "loss": -0.1709, "step": 17930 }, { "epoch": 10.442374854481956, "grad_norm": 0.3404388427734375, "learning_rate": 8.398756418210263e-05, "loss": -0.168, "step": 17940 }, { "epoch": 10.448195576251456, "grad_norm": 0.20335116982460022, "learning_rate": 8.396734693143993e-05, "loss": -0.17, "step": 17950 }, { "epoch": 10.454016298020955, "grad_norm": 0.21960243582725525, "learning_rate": 8.39471193623941e-05, "loss": -0.1612, "step": 17960 }, { "epoch": 10.459837019790454, "grad_norm": 0.3161000907421112, "learning_rate": 8.392688148110974e-05, "loss": -0.1672, "step": 17970 }, { "epoch": 10.465657741559953, "grad_norm": 0.2242903858423233, "learning_rate": 8.390663329373456e-05, "loss": -0.1689, "step": 17980 }, { "epoch": 10.471478463329452, "grad_norm": 0.24068410694599152, "learning_rate": 8.388637480641944e-05, "loss": -0.1648, "step": 17990 }, { "epoch": 10.477299185098952, "grad_norm": 0.3171709477901459, "learning_rate": 8.386610602531837e-05, "loss": -0.1653, "step": 18000 }, { "epoch": 10.48311990686845, "grad_norm": 0.2774703800678253, "learning_rate": 8.384582695658847e-05, "loss": -0.1686, "step": 18010 }, { "epoch": 10.488940628637952, "grad_norm": 0.16137556731700897, "learning_rate": 8.382553760638999e-05, "loss": -0.172, "step": 18020 }, { "epoch": 10.494761350407451, "grad_norm": 0.2494068592786789, "learning_rate": 8.380523798088631e-05, "loss": -0.1721, "step": 18030 }, { "epoch": 10.50058207217695, "grad_norm": 0.13273778557777405, "learning_rate": 8.378492808624389e-05, "loss": -0.1684, "step": 18040 }, { "epoch": 10.50640279394645, "grad_norm": 0.250379353761673, "learning_rate": 8.376460792863237e-05, "loss": -0.165, "step": 18050 }, { "epoch": 10.512223515715949, "grad_norm": 0.19368170201778412, "learning_rate": 8.374427751422444e-05, "loss": -0.174, "step": 18060 }, { "epoch": 10.518044237485448, "grad_norm": 0.1786690652370453, "learning_rate": 8.3723936849196e-05, "loss": -0.1706, "step": 18070 }, { "epoch": 10.523864959254947, "grad_norm": 0.16531147062778473, "learning_rate": 8.370358593972595e-05, "loss": -0.1696, "step": 18080 }, { "epoch": 10.529685681024446, "grad_norm": 0.19023151695728302, "learning_rate": 8.36832247919964e-05, "loss": -0.1722, "step": 18090 }, { "epoch": 10.535506402793946, "grad_norm": 0.1845029890537262, "learning_rate": 8.36628534121925e-05, "loss": -0.1691, "step": 18100 }, { "epoch": 10.541327124563447, "grad_norm": 0.23262527585029602, "learning_rate": 8.364247180650254e-05, "loss": -0.1648, "step": 18110 }, { "epoch": 10.547147846332946, "grad_norm": 0.22220809757709503, "learning_rate": 8.362207998111794e-05, "loss": -0.1736, "step": 18120 }, { "epoch": 10.552968568102445, "grad_norm": 0.17086927592754364, "learning_rate": 8.360167794223318e-05, "loss": -0.1691, "step": 18130 }, { "epoch": 10.558789289871944, "grad_norm": 0.2700657546520233, "learning_rate": 8.358126569604586e-05, "loss": -0.1664, "step": 18140 }, { "epoch": 10.564610011641443, "grad_norm": 0.15295596420764923, "learning_rate": 8.356084324875668e-05, "loss": -0.1669, "step": 18150 }, { "epoch": 10.570430733410943, "grad_norm": 0.27174872159957886, "learning_rate": 8.354041060656945e-05, "loss": -0.1718, "step": 18160 }, { "epoch": 10.576251455180442, "grad_norm": 0.20117269456386566, "learning_rate": 8.351996777569106e-05, "loss": -0.1716, "step": 18170 }, { "epoch": 10.582072176949941, "grad_norm": 0.21733924746513367, "learning_rate": 8.349951476233148e-05, "loss": -0.1704, "step": 18180 }, { "epoch": 10.587892898719442, "grad_norm": 0.2225423902273178, "learning_rate": 8.347905157270386e-05, "loss": -0.171, "step": 18190 }, { "epoch": 10.593713620488941, "grad_norm": 0.17068862915039062, "learning_rate": 8.345857821302432e-05, "loss": -0.167, "step": 18200 }, { "epoch": 10.59953434225844, "grad_norm": 0.255231648683548, "learning_rate": 8.343809468951213e-05, "loss": -0.17, "step": 18210 }, { "epoch": 10.60535506402794, "grad_norm": 0.15086381137371063, "learning_rate": 8.341760100838965e-05, "loss": -0.1662, "step": 18220 }, { "epoch": 10.611175785797439, "grad_norm": 0.2547335624694824, "learning_rate": 8.339709717588233e-05, "loss": -0.1662, "step": 18230 }, { "epoch": 10.616996507566938, "grad_norm": 0.2971756160259247, "learning_rate": 8.33765831982187e-05, "loss": -0.1692, "step": 18240 }, { "epoch": 10.622817229336437, "grad_norm": 0.14478707313537598, "learning_rate": 8.335605908163035e-05, "loss": -0.1737, "step": 18250 }, { "epoch": 10.628637951105937, "grad_norm": 0.23123791813850403, "learning_rate": 8.333552483235196e-05, "loss": -0.1733, "step": 18260 }, { "epoch": 10.634458672875436, "grad_norm": 0.15960803627967834, "learning_rate": 8.33149804566213e-05, "loss": -0.1693, "step": 18270 }, { "epoch": 10.640279394644937, "grad_norm": 0.2852303683757782, "learning_rate": 8.329442596067921e-05, "loss": -0.1673, "step": 18280 }, { "epoch": 10.646100116414436, "grad_norm": 0.24949179589748383, "learning_rate": 8.32738613507696e-05, "loss": -0.1621, "step": 18290 }, { "epoch": 10.651920838183935, "grad_norm": 0.27603456377983093, "learning_rate": 8.325328663313946e-05, "loss": -0.166, "step": 18300 }, { "epoch": 10.657741559953434, "grad_norm": 0.312129408121109, "learning_rate": 8.323270181403884e-05, "loss": -0.169, "step": 18310 }, { "epoch": 10.663562281722934, "grad_norm": 0.18321508169174194, "learning_rate": 8.321210689972086e-05, "loss": -0.1684, "step": 18320 }, { "epoch": 10.669383003492433, "grad_norm": 0.2749752998352051, "learning_rate": 8.319150189644174e-05, "loss": -0.1686, "step": 18330 }, { "epoch": 10.675203725261932, "grad_norm": 0.19340063631534576, "learning_rate": 8.31708868104607e-05, "loss": -0.1609, "step": 18340 }, { "epoch": 10.681024447031431, "grad_norm": 0.17626817524433136, "learning_rate": 8.315026164804007e-05, "loss": -0.1673, "step": 18350 }, { "epoch": 10.68684516880093, "grad_norm": 0.2727813422679901, "learning_rate": 8.312962641544524e-05, "loss": -0.1672, "step": 18360 }, { "epoch": 10.692665890570431, "grad_norm": 0.2694284915924072, "learning_rate": 8.310898111894465e-05, "loss": -0.1718, "step": 18370 }, { "epoch": 10.69848661233993, "grad_norm": 0.19812877476215363, "learning_rate": 8.308832576480977e-05, "loss": -0.1716, "step": 18380 }, { "epoch": 10.70430733410943, "grad_norm": 0.3242059350013733, "learning_rate": 8.306766035931519e-05, "loss": -0.1625, "step": 18390 }, { "epoch": 10.71012805587893, "grad_norm": 0.21574395895004272, "learning_rate": 8.304698490873847e-05, "loss": -0.1707, "step": 18400 }, { "epoch": 10.715948777648428, "grad_norm": 0.2809971272945404, "learning_rate": 8.30262994193603e-05, "loss": -0.1675, "step": 18410 }, { "epoch": 10.721769499417928, "grad_norm": 0.3048672378063202, "learning_rate": 8.300560389746438e-05, "loss": -0.1698, "step": 18420 }, { "epoch": 10.727590221187427, "grad_norm": 0.31448179483413696, "learning_rate": 8.298489834933745e-05, "loss": -0.1656, "step": 18430 }, { "epoch": 10.733410942956926, "grad_norm": 0.21667659282684326, "learning_rate": 8.296418278126934e-05, "loss": -0.1655, "step": 18440 }, { "epoch": 10.739231664726425, "grad_norm": 0.1629268229007721, "learning_rate": 8.294345719955284e-05, "loss": -0.1676, "step": 18450 }, { "epoch": 10.745052386495926, "grad_norm": 0.2497188150882721, "learning_rate": 8.29227216104839e-05, "loss": -0.17, "step": 18460 }, { "epoch": 10.750873108265425, "grad_norm": 0.15397994220256805, "learning_rate": 8.290197602036137e-05, "loss": -0.1671, "step": 18470 }, { "epoch": 10.756693830034925, "grad_norm": 0.2929149866104126, "learning_rate": 8.288122043548725e-05, "loss": -0.1731, "step": 18480 }, { "epoch": 10.762514551804424, "grad_norm": 0.13401095569133759, "learning_rate": 8.286045486216657e-05, "loss": -0.164, "step": 18490 }, { "epoch": 10.768335273573923, "grad_norm": 0.2653178870677948, "learning_rate": 8.283967930670733e-05, "loss": -0.1689, "step": 18500 }, { "epoch": 10.774155995343422, "grad_norm": 0.2684502601623535, "learning_rate": 8.281889377542058e-05, "loss": -0.1683, "step": 18510 }, { "epoch": 10.779976717112921, "grad_norm": 0.2765365242958069, "learning_rate": 8.279809827462045e-05, "loss": -0.1697, "step": 18520 }, { "epoch": 10.78579743888242, "grad_norm": 0.268265962600708, "learning_rate": 8.277729281062402e-05, "loss": -0.169, "step": 18530 }, { "epoch": 10.791618160651922, "grad_norm": 0.16258534789085388, "learning_rate": 8.27564773897515e-05, "loss": -0.1714, "step": 18540 }, { "epoch": 10.797438882421421, "grad_norm": 0.16429932415485382, "learning_rate": 8.273565201832602e-05, "loss": -0.1736, "step": 18550 }, { "epoch": 10.80325960419092, "grad_norm": 0.3556058406829834, "learning_rate": 8.27148167026738e-05, "loss": -0.1716, "step": 18560 }, { "epoch": 10.80908032596042, "grad_norm": 0.2708013951778412, "learning_rate": 8.269397144912405e-05, "loss": -0.1691, "step": 18570 }, { "epoch": 10.814901047729919, "grad_norm": 0.1624993085861206, "learning_rate": 8.267311626400899e-05, "loss": -0.1721, "step": 18580 }, { "epoch": 10.820721769499418, "grad_norm": 0.21717381477355957, "learning_rate": 8.26522511536639e-05, "loss": -0.1663, "step": 18590 }, { "epoch": 10.826542491268917, "grad_norm": 0.22356215119361877, "learning_rate": 8.263137612442706e-05, "loss": -0.1698, "step": 18600 }, { "epoch": 10.832363213038416, "grad_norm": 0.2642636299133301, "learning_rate": 8.261049118263971e-05, "loss": -0.1723, "step": 18610 }, { "epoch": 10.838183934807915, "grad_norm": 0.2343587428331375, "learning_rate": 8.258959633464619e-05, "loss": -0.1665, "step": 18620 }, { "epoch": 10.844004656577416, "grad_norm": 0.2306452840566635, "learning_rate": 8.256869158679377e-05, "loss": -0.1701, "step": 18630 }, { "epoch": 10.849825378346916, "grad_norm": 0.11867993324995041, "learning_rate": 8.254777694543278e-05, "loss": -0.1709, "step": 18640 }, { "epoch": 10.855646100116415, "grad_norm": 0.16335274279117584, "learning_rate": 8.252685241691651e-05, "loss": -0.1703, "step": 18650 }, { "epoch": 10.861466821885914, "grad_norm": 0.22889214754104614, "learning_rate": 8.250591800760133e-05, "loss": -0.1602, "step": 18660 }, { "epoch": 10.867287543655413, "grad_norm": 0.3222566246986389, "learning_rate": 8.248497372384649e-05, "loss": -0.1643, "step": 18670 }, { "epoch": 10.873108265424912, "grad_norm": 0.26144057512283325, "learning_rate": 8.246401957201437e-05, "loss": -0.1673, "step": 18680 }, { "epoch": 10.878928987194412, "grad_norm": 0.1974111795425415, "learning_rate": 8.244305555847027e-05, "loss": -0.1678, "step": 18690 }, { "epoch": 10.884749708963911, "grad_norm": 0.2812807559967041, "learning_rate": 8.24220816895825e-05, "loss": -0.1707, "step": 18700 }, { "epoch": 10.89057043073341, "grad_norm": 0.17214514315128326, "learning_rate": 8.240109797172237e-05, "loss": -0.1671, "step": 18710 }, { "epoch": 10.896391152502911, "grad_norm": 0.2551290690898895, "learning_rate": 8.238010441126416e-05, "loss": -0.1651, "step": 18720 }, { "epoch": 10.90221187427241, "grad_norm": 0.326762318611145, "learning_rate": 8.23591010145852e-05, "loss": -0.1637, "step": 18730 }, { "epoch": 10.90803259604191, "grad_norm": 0.2758174538612366, "learning_rate": 8.233808778806571e-05, "loss": -0.1661, "step": 18740 }, { "epoch": 10.913853317811409, "grad_norm": 0.27452656626701355, "learning_rate": 8.231706473808903e-05, "loss": -0.1682, "step": 18750 }, { "epoch": 10.919674039580908, "grad_norm": 0.23754844069480896, "learning_rate": 8.229603187104133e-05, "loss": -0.1665, "step": 18760 }, { "epoch": 10.925494761350407, "grad_norm": 0.24167852103710175, "learning_rate": 8.22749891933119e-05, "loss": -0.1671, "step": 18770 }, { "epoch": 10.931315483119906, "grad_norm": 0.19239825010299683, "learning_rate": 8.225393671129291e-05, "loss": -0.1654, "step": 18780 }, { "epoch": 10.937136204889406, "grad_norm": 0.1614493876695633, "learning_rate": 8.223287443137957e-05, "loss": -0.1702, "step": 18790 }, { "epoch": 10.942956926658905, "grad_norm": 0.1473512351512909, "learning_rate": 8.221180235997004e-05, "loss": -0.1717, "step": 18800 }, { "epoch": 10.948777648428406, "grad_norm": 0.16288208961486816, "learning_rate": 8.219072050346544e-05, "loss": -0.1697, "step": 18810 }, { "epoch": 10.954598370197905, "grad_norm": 0.21725639700889587, "learning_rate": 8.216962886826992e-05, "loss": -0.1684, "step": 18820 }, { "epoch": 10.960419091967404, "grad_norm": 0.2328362911939621, "learning_rate": 8.214852746079054e-05, "loss": -0.1663, "step": 18830 }, { "epoch": 10.966239813736903, "grad_norm": 0.20836786925792694, "learning_rate": 8.212741628743732e-05, "loss": -0.17, "step": 18840 }, { "epoch": 10.972060535506403, "grad_norm": 0.21911455690860748, "learning_rate": 8.210629535462333e-05, "loss": -0.1696, "step": 18850 }, { "epoch": 10.977881257275902, "grad_norm": 0.2308933436870575, "learning_rate": 8.208516466876453e-05, "loss": -0.1694, "step": 18860 }, { "epoch": 10.983701979045401, "grad_norm": 0.23259177803993225, "learning_rate": 8.206402423627986e-05, "loss": -0.1735, "step": 18870 }, { "epoch": 10.9895227008149, "grad_norm": 0.25223222374916077, "learning_rate": 8.204287406359124e-05, "loss": -0.1614, "step": 18880 }, { "epoch": 10.995343422584401, "grad_norm": 0.2921522855758667, "learning_rate": 8.20217141571235e-05, "loss": -0.1681, "step": 18890 }, { "epoch": 11.0011641443539, "grad_norm": 0.20235949754714966, "learning_rate": 8.200054452330449e-05, "loss": -0.1732, "step": 18900 }, { "epoch": 11.0069848661234, "grad_norm": 0.17887860536575317, "learning_rate": 8.197936516856499e-05, "loss": -0.1681, "step": 18910 }, { "epoch": 11.012805587892899, "grad_norm": 0.3216020166873932, "learning_rate": 8.195817609933871e-05, "loss": -0.1697, "step": 18920 }, { "epoch": 11.018626309662398, "grad_norm": 0.26186713576316833, "learning_rate": 8.193697732206233e-05, "loss": -0.1687, "step": 18930 }, { "epoch": 11.024447031431897, "grad_norm": 0.28612086176872253, "learning_rate": 8.19157688431755e-05, "loss": -0.1667, "step": 18940 }, { "epoch": 11.030267753201397, "grad_norm": 0.2621441185474396, "learning_rate": 8.189455066912077e-05, "loss": -0.1676, "step": 18950 }, { "epoch": 11.036088474970896, "grad_norm": 0.2694191634654999, "learning_rate": 8.187332280634369e-05, "loss": -0.165, "step": 18960 }, { "epoch": 11.041909196740395, "grad_norm": 0.2651559114456177, "learning_rate": 8.18520852612927e-05, "loss": -0.1671, "step": 18970 }, { "epoch": 11.047729918509896, "grad_norm": 0.18902882933616638, "learning_rate": 8.183083804041921e-05, "loss": -0.1643, "step": 18980 }, { "epoch": 11.053550640279395, "grad_norm": 0.22702080011367798, "learning_rate": 8.180958115017757e-05, "loss": -0.1713, "step": 18990 }, { "epoch": 11.059371362048894, "grad_norm": 0.33309483528137207, "learning_rate": 8.178831459702505e-05, "loss": -0.1627, "step": 19000 }, { "epoch": 11.065192083818394, "grad_norm": 0.2516144812107086, "learning_rate": 8.17670383874219e-05, "loss": -0.1659, "step": 19010 }, { "epoch": 11.071012805587893, "grad_norm": 0.1840551495552063, "learning_rate": 8.174575252783124e-05, "loss": -0.1697, "step": 19020 }, { "epoch": 11.076833527357392, "grad_norm": 0.24317099153995514, "learning_rate": 8.172445702471914e-05, "loss": -0.1625, "step": 19030 }, { "epoch": 11.082654249126891, "grad_norm": 0.2416529804468155, "learning_rate": 8.170315188455466e-05, "loss": -0.1666, "step": 19040 }, { "epoch": 11.08847497089639, "grad_norm": 0.3091711103916168, "learning_rate": 8.168183711380969e-05, "loss": -0.1679, "step": 19050 }, { "epoch": 11.09429569266589, "grad_norm": 0.3012310266494751, "learning_rate": 8.166051271895913e-05, "loss": -0.1679, "step": 19060 }, { "epoch": 11.10011641443539, "grad_norm": 0.2027960866689682, "learning_rate": 8.163917870648075e-05, "loss": -0.1638, "step": 19070 }, { "epoch": 11.10593713620489, "grad_norm": 0.2890065610408783, "learning_rate": 8.161783508285526e-05, "loss": -0.1703, "step": 19080 }, { "epoch": 11.11175785797439, "grad_norm": 0.2410547286272049, "learning_rate": 8.159648185456628e-05, "loss": -0.1667, "step": 19090 }, { "epoch": 11.117578579743888, "grad_norm": 0.23107266426086426, "learning_rate": 8.157511902810038e-05, "loss": -0.1649, "step": 19100 }, { "epoch": 11.123399301513388, "grad_norm": 0.2780216336250305, "learning_rate": 8.155374660994701e-05, "loss": -0.1679, "step": 19110 }, { "epoch": 11.129220023282887, "grad_norm": 0.1534576714038849, "learning_rate": 8.153236460659857e-05, "loss": -0.1655, "step": 19120 }, { "epoch": 11.135040745052386, "grad_norm": 0.29930099844932556, "learning_rate": 8.151097302455031e-05, "loss": -0.171, "step": 19130 }, { "epoch": 11.140861466821885, "grad_norm": 0.17322440445423126, "learning_rate": 8.148957187030044e-05, "loss": -0.1645, "step": 19140 }, { "epoch": 11.146682188591384, "grad_norm": 0.17047163844108582, "learning_rate": 8.146816115035006e-05, "loss": -0.1681, "step": 19150 }, { "epoch": 11.152502910360885, "grad_norm": 0.2570835053920746, "learning_rate": 8.14467408712032e-05, "loss": -0.1644, "step": 19160 }, { "epoch": 11.158323632130385, "grad_norm": 0.23798133432865143, "learning_rate": 8.142531103936678e-05, "loss": -0.1696, "step": 19170 }, { "epoch": 11.164144353899884, "grad_norm": 0.1893618404865265, "learning_rate": 8.14038716613506e-05, "loss": -0.1745, "step": 19180 }, { "epoch": 11.169965075669383, "grad_norm": 0.19679774343967438, "learning_rate": 8.138242274366736e-05, "loss": -0.1692, "step": 19190 }, { "epoch": 11.175785797438882, "grad_norm": 0.21947066485881805, "learning_rate": 8.136096429283271e-05, "loss": -0.1681, "step": 19200 }, { "epoch": 11.181606519208382, "grad_norm": 0.2805846333503723, "learning_rate": 8.133949631536515e-05, "loss": -0.1667, "step": 19210 }, { "epoch": 11.18742724097788, "grad_norm": 0.24552594125270844, "learning_rate": 8.131801881778607e-05, "loss": -0.1623, "step": 19220 }, { "epoch": 11.19324796274738, "grad_norm": 0.2521964907646179, "learning_rate": 8.129653180661978e-05, "loss": -0.1679, "step": 19230 }, { "epoch": 11.199068684516881, "grad_norm": 0.2447437345981598, "learning_rate": 8.127503528839346e-05, "loss": -0.1669, "step": 19240 }, { "epoch": 11.20488940628638, "grad_norm": 0.1775577962398529, "learning_rate": 8.125352926963721e-05, "loss": -0.1619, "step": 19250 }, { "epoch": 11.21071012805588, "grad_norm": 0.3124282658100128, "learning_rate": 8.123201375688395e-05, "loss": -0.1688, "step": 19260 }, { "epoch": 11.216530849825379, "grad_norm": 0.257306843996048, "learning_rate": 8.121048875666954e-05, "loss": -0.169, "step": 19270 }, { "epoch": 11.222351571594878, "grad_norm": 0.17344658076763153, "learning_rate": 8.118895427553274e-05, "loss": -0.1726, "step": 19280 }, { "epoch": 11.228172293364377, "grad_norm": 0.22317615151405334, "learning_rate": 8.116741032001511e-05, "loss": -0.1656, "step": 19290 }, { "epoch": 11.233993015133876, "grad_norm": 0.28576868772506714, "learning_rate": 8.114585689666114e-05, "loss": -0.1676, "step": 19300 }, { "epoch": 11.239813736903375, "grad_norm": 0.18741825222969055, "learning_rate": 8.112429401201821e-05, "loss": -0.173, "step": 19310 }, { "epoch": 11.245634458672875, "grad_norm": 0.1915220469236374, "learning_rate": 8.110272167263656e-05, "loss": -0.1686, "step": 19320 }, { "epoch": 11.251455180442376, "grad_norm": 0.15793855488300323, "learning_rate": 8.108113988506929e-05, "loss": -0.1693, "step": 19330 }, { "epoch": 11.257275902211875, "grad_norm": 0.18166540563106537, "learning_rate": 8.105954865587235e-05, "loss": -0.1697, "step": 19340 }, { "epoch": 11.263096623981374, "grad_norm": 0.25339776277542114, "learning_rate": 8.103794799160463e-05, "loss": -0.1705, "step": 19350 }, { "epoch": 11.268917345750873, "grad_norm": 0.2532235383987427, "learning_rate": 8.101633789882781e-05, "loss": -0.165, "step": 19360 }, { "epoch": 11.274738067520373, "grad_norm": 0.23207247257232666, "learning_rate": 8.099471838410648e-05, "loss": -0.1718, "step": 19370 }, { "epoch": 11.280558789289872, "grad_norm": 0.18689797818660736, "learning_rate": 8.097308945400806e-05, "loss": -0.1707, "step": 19380 }, { "epoch": 11.286379511059371, "grad_norm": 0.18331731855869293, "learning_rate": 8.095145111510288e-05, "loss": -0.1717, "step": 19390 }, { "epoch": 11.29220023282887, "grad_norm": 0.190359964966774, "learning_rate": 8.092980337396406e-05, "loss": -0.1739, "step": 19400 }, { "epoch": 11.29802095459837, "grad_norm": 0.20120298862457275, "learning_rate": 8.090814623716763e-05, "loss": -0.1689, "step": 19410 }, { "epoch": 11.30384167636787, "grad_norm": 0.2094402015209198, "learning_rate": 8.088647971129246e-05, "loss": -0.1692, "step": 19420 }, { "epoch": 11.30966239813737, "grad_norm": 0.15001097321510315, "learning_rate": 8.086480380292026e-05, "loss": -0.169, "step": 19430 }, { "epoch": 11.315483119906869, "grad_norm": 0.22142277657985687, "learning_rate": 8.084311851863562e-05, "loss": -0.1709, "step": 19440 }, { "epoch": 11.321303841676368, "grad_norm": 0.29899531602859497, "learning_rate": 8.082142386502591e-05, "loss": -0.1703, "step": 19450 }, { "epoch": 11.327124563445867, "grad_norm": 0.2635643780231476, "learning_rate": 8.079971984868145e-05, "loss": -0.1658, "step": 19460 }, { "epoch": 11.332945285215366, "grad_norm": 0.25262585282325745, "learning_rate": 8.077800647619532e-05, "loss": -0.1674, "step": 19470 }, { "epoch": 11.338766006984866, "grad_norm": 0.15870510041713715, "learning_rate": 8.075628375416345e-05, "loss": -0.1724, "step": 19480 }, { "epoch": 11.344586728754365, "grad_norm": 0.26188716292381287, "learning_rate": 8.073455168918464e-05, "loss": -0.1681, "step": 19490 }, { "epoch": 11.350407450523864, "grad_norm": 0.19317977130413055, "learning_rate": 8.071281028786055e-05, "loss": -0.1703, "step": 19500 }, { "epoch": 11.356228172293365, "grad_norm": 0.23541414737701416, "learning_rate": 8.069105955679562e-05, "loss": -0.1709, "step": 19510 }, { "epoch": 11.362048894062864, "grad_norm": 0.24106985330581665, "learning_rate": 8.066929950259713e-05, "loss": -0.1677, "step": 19520 }, { "epoch": 11.367869615832364, "grad_norm": 0.20354525744915009, "learning_rate": 8.064753013187522e-05, "loss": -0.1711, "step": 19530 }, { "epoch": 11.373690337601863, "grad_norm": 0.19187188148498535, "learning_rate": 8.062575145124289e-05, "loss": -0.1712, "step": 19540 }, { "epoch": 11.379511059371362, "grad_norm": 0.17336969077587128, "learning_rate": 8.060396346731587e-05, "loss": -0.1699, "step": 19550 }, { "epoch": 11.385331781140861, "grad_norm": 0.24326065182685852, "learning_rate": 8.058216618671281e-05, "loss": -0.1729, "step": 19560 }, { "epoch": 11.39115250291036, "grad_norm": 0.1653679758310318, "learning_rate": 8.056035961605514e-05, "loss": -0.172, "step": 19570 }, { "epoch": 11.39697322467986, "grad_norm": 0.27018600702285767, "learning_rate": 8.05385437619671e-05, "loss": -0.1646, "step": 19580 }, { "epoch": 11.40279394644936, "grad_norm": 0.16477514803409576, "learning_rate": 8.05167186310758e-05, "loss": -0.174, "step": 19590 }, { "epoch": 11.40861466821886, "grad_norm": 0.18121051788330078, "learning_rate": 8.049488423001113e-05, "loss": -0.1717, "step": 19600 }, { "epoch": 11.414435389988359, "grad_norm": 0.317001610994339, "learning_rate": 8.047304056540581e-05, "loss": -0.1682, "step": 19610 }, { "epoch": 11.420256111757858, "grad_norm": 0.3444148004055023, "learning_rate": 8.045118764389534e-05, "loss": -0.1685, "step": 19620 }, { "epoch": 11.426076833527357, "grad_norm": 0.13857905566692352, "learning_rate": 8.042932547211809e-05, "loss": -0.1738, "step": 19630 }, { "epoch": 11.431897555296857, "grad_norm": 0.20435787737369537, "learning_rate": 8.04074540567152e-05, "loss": -0.1693, "step": 19640 }, { "epoch": 11.437718277066356, "grad_norm": 0.3113619089126587, "learning_rate": 8.038557340433063e-05, "loss": -0.1602, "step": 19650 }, { "epoch": 11.443538998835855, "grad_norm": 0.19085533916950226, "learning_rate": 8.036368352161115e-05, "loss": -0.17, "step": 19660 }, { "epoch": 11.449359720605354, "grad_norm": 0.27451035380363464, "learning_rate": 8.034178441520633e-05, "loss": -0.1692, "step": 19670 }, { "epoch": 11.455180442374855, "grad_norm": 0.16299372911453247, "learning_rate": 8.031987609176852e-05, "loss": -0.1658, "step": 19680 }, { "epoch": 11.461001164144355, "grad_norm": 0.15208184719085693, "learning_rate": 8.02979585579529e-05, "loss": -0.1697, "step": 19690 }, { "epoch": 11.466821885913854, "grad_norm": 0.16371959447860718, "learning_rate": 8.027603182041745e-05, "loss": -0.1641, "step": 19700 }, { "epoch": 11.472642607683353, "grad_norm": 0.14045967161655426, "learning_rate": 8.025409588582292e-05, "loss": -0.1655, "step": 19710 }, { "epoch": 11.478463329452852, "grad_norm": 0.33595532178878784, "learning_rate": 8.023215076083288e-05, "loss": -0.164, "step": 19720 }, { "epoch": 11.484284051222351, "grad_norm": 0.22768861055374146, "learning_rate": 8.021019645211367e-05, "loss": -0.1687, "step": 19730 }, { "epoch": 11.49010477299185, "grad_norm": 0.19044293463230133, "learning_rate": 8.018823296633441e-05, "loss": -0.1667, "step": 19740 }, { "epoch": 11.49592549476135, "grad_norm": 0.14282697439193726, "learning_rate": 8.016626031016708e-05, "loss": -0.163, "step": 19750 }, { "epoch": 11.501746216530849, "grad_norm": 0.16730362176895142, "learning_rate": 8.014427849028636e-05, "loss": -0.17, "step": 19760 }, { "epoch": 11.50756693830035, "grad_norm": 0.19306127727031708, "learning_rate": 8.012228751336974e-05, "loss": -0.1687, "step": 19770 }, { "epoch": 11.51338766006985, "grad_norm": 0.2994961142539978, "learning_rate": 8.01002873860975e-05, "loss": -0.1618, "step": 19780 }, { "epoch": 11.519208381839348, "grad_norm": 0.25361964106559753, "learning_rate": 8.00782781151527e-05, "loss": -0.1693, "step": 19790 }, { "epoch": 11.525029103608848, "grad_norm": 0.1771877110004425, "learning_rate": 8.005625970722119e-05, "loss": -0.1681, "step": 19800 }, { "epoch": 11.530849825378347, "grad_norm": 0.23163726925849915, "learning_rate": 8.003423216899158e-05, "loss": -0.174, "step": 19810 }, { "epoch": 11.536670547147846, "grad_norm": 0.15821857750415802, "learning_rate": 8.001219550715522e-05, "loss": -0.1658, "step": 19820 }, { "epoch": 11.542491268917345, "grad_norm": 0.1840433031320572, "learning_rate": 7.999014972840632e-05, "loss": -0.1625, "step": 19830 }, { "epoch": 11.548311990686845, "grad_norm": 0.17680470645427704, "learning_rate": 7.996809483944174e-05, "loss": -0.1713, "step": 19840 }, { "epoch": 11.554132712456344, "grad_norm": 0.1986510157585144, "learning_rate": 7.994603084696124e-05, "loss": -0.1725, "step": 19850 }, { "epoch": 11.559953434225845, "grad_norm": 0.1810416877269745, "learning_rate": 7.992395775766724e-05, "loss": -0.1729, "step": 19860 }, { "epoch": 11.565774155995344, "grad_norm": 0.16662615537643433, "learning_rate": 7.990187557826497e-05, "loss": -0.1641, "step": 19870 }, { "epoch": 11.571594877764843, "grad_norm": 0.15667851269245148, "learning_rate": 7.987978431546242e-05, "loss": -0.1697, "step": 19880 }, { "epoch": 11.577415599534342, "grad_norm": 0.23174837231636047, "learning_rate": 7.985768397597031e-05, "loss": -0.1682, "step": 19890 }, { "epoch": 11.583236321303842, "grad_norm": 0.23283347487449646, "learning_rate": 7.983557456650216e-05, "loss": -0.1679, "step": 19900 }, { "epoch": 11.58905704307334, "grad_norm": 0.3099663257598877, "learning_rate": 7.981345609377422e-05, "loss": -0.1689, "step": 19910 }, { "epoch": 11.59487776484284, "grad_norm": 0.20657463371753693, "learning_rate": 7.97913285645055e-05, "loss": -0.1695, "step": 19920 }, { "epoch": 11.60069848661234, "grad_norm": 0.3310626149177551, "learning_rate": 7.976919198541776e-05, "loss": -0.1706, "step": 19930 }, { "epoch": 11.60651920838184, "grad_norm": 0.27814361453056335, "learning_rate": 7.974704636323548e-05, "loss": -0.1671, "step": 19940 }, { "epoch": 11.61233993015134, "grad_norm": 0.12619437277317047, "learning_rate": 7.972489170468597e-05, "loss": -0.1741, "step": 19950 }, { "epoch": 11.618160651920839, "grad_norm": 0.18098771572113037, "learning_rate": 7.970272801649918e-05, "loss": -0.1692, "step": 19960 }, { "epoch": 11.623981373690338, "grad_norm": 0.20979225635528564, "learning_rate": 7.96805553054079e-05, "loss": -0.1682, "step": 19970 }, { "epoch": 11.629802095459837, "grad_norm": 0.20850937068462372, "learning_rate": 7.965837357814756e-05, "loss": -0.1723, "step": 19980 }, { "epoch": 11.635622817229336, "grad_norm": 0.2330608069896698, "learning_rate": 7.963618284145643e-05, "loss": -0.1677, "step": 19990 }, { "epoch": 11.641443538998836, "grad_norm": 0.27927204966545105, "learning_rate": 7.961398310207544e-05, "loss": -0.1667, "step": 20000 }, { "epoch": 11.647264260768335, "grad_norm": 0.2064618468284607, "learning_rate": 7.95917743667483e-05, "loss": -0.1696, "step": 20010 }, { "epoch": 11.653084982537834, "grad_norm": 0.1695016473531723, "learning_rate": 7.956955664222144e-05, "loss": -0.1689, "step": 20020 }, { "epoch": 11.658905704307333, "grad_norm": 0.18226559460163116, "learning_rate": 7.954732993524399e-05, "loss": -0.1702, "step": 20030 }, { "epoch": 11.664726426076834, "grad_norm": 0.22755958139896393, "learning_rate": 7.952509425256786e-05, "loss": -0.1703, "step": 20040 }, { "epoch": 11.670547147846333, "grad_norm": 0.19713671505451202, "learning_rate": 7.950284960094767e-05, "loss": -0.1696, "step": 20050 }, { "epoch": 11.676367869615833, "grad_norm": 0.21630948781967163, "learning_rate": 7.948059598714076e-05, "loss": -0.1689, "step": 20060 }, { "epoch": 11.682188591385332, "grad_norm": 0.26954248547554016, "learning_rate": 7.945833341790717e-05, "loss": -0.1667, "step": 20070 }, { "epoch": 11.688009313154831, "grad_norm": 0.19062219560146332, "learning_rate": 7.94360619000097e-05, "loss": -0.1663, "step": 20080 }, { "epoch": 11.69383003492433, "grad_norm": 0.15570349991321564, "learning_rate": 7.941378144021381e-05, "loss": -0.1708, "step": 20090 }, { "epoch": 11.69965075669383, "grad_norm": 0.15013635158538818, "learning_rate": 7.939149204528777e-05, "loss": -0.1713, "step": 20100 }, { "epoch": 11.705471478463329, "grad_norm": 0.27505016326904297, "learning_rate": 7.936919372200246e-05, "loss": -0.1678, "step": 20110 }, { "epoch": 11.71129220023283, "grad_norm": 0.15394115447998047, "learning_rate": 7.934688647713158e-05, "loss": -0.1709, "step": 20120 }, { "epoch": 11.717112922002329, "grad_norm": 0.21855542063713074, "learning_rate": 7.932457031745143e-05, "loss": -0.1692, "step": 20130 }, { "epoch": 11.722933643771828, "grad_norm": 0.19852453470230103, "learning_rate": 7.930224524974108e-05, "loss": -0.1679, "step": 20140 }, { "epoch": 11.728754365541327, "grad_norm": 0.1670646220445633, "learning_rate": 7.927991128078232e-05, "loss": -0.1721, "step": 20150 }, { "epoch": 11.734575087310827, "grad_norm": 0.14806927740573883, "learning_rate": 7.925756841735958e-05, "loss": -0.1713, "step": 20160 }, { "epoch": 11.740395809080326, "grad_norm": 0.23623178899288177, "learning_rate": 7.923521666626008e-05, "loss": -0.1724, "step": 20170 }, { "epoch": 11.746216530849825, "grad_norm": 0.2200116217136383, "learning_rate": 7.921285603427366e-05, "loss": -0.1727, "step": 20180 }, { "epoch": 11.752037252619324, "grad_norm": 0.24086834490299225, "learning_rate": 7.91904865281929e-05, "loss": -0.17, "step": 20190 }, { "epoch": 11.757857974388823, "grad_norm": 0.30423587560653687, "learning_rate": 7.916810815481307e-05, "loss": -0.1714, "step": 20200 }, { "epoch": 11.763678696158324, "grad_norm": 0.20136041939258575, "learning_rate": 7.914572092093211e-05, "loss": -0.171, "step": 20210 }, { "epoch": 11.769499417927824, "grad_norm": 0.1880744993686676, "learning_rate": 7.912332483335068e-05, "loss": -0.1716, "step": 20220 }, { "epoch": 11.775320139697323, "grad_norm": 0.2340647578239441, "learning_rate": 7.910091989887213e-05, "loss": -0.1728, "step": 20230 }, { "epoch": 11.781140861466822, "grad_norm": 0.22037047147750854, "learning_rate": 7.907850612430248e-05, "loss": -0.1702, "step": 20240 }, { "epoch": 11.786961583236321, "grad_norm": 0.17724336683750153, "learning_rate": 7.905608351645044e-05, "loss": -0.1744, "step": 20250 }, { "epoch": 11.79278230500582, "grad_norm": 0.19799315929412842, "learning_rate": 7.90336520821274e-05, "loss": -0.1704, "step": 20260 }, { "epoch": 11.79860302677532, "grad_norm": 0.0997978150844574, "learning_rate": 7.901121182814746e-05, "loss": -0.1724, "step": 20270 }, { "epoch": 11.804423748544819, "grad_norm": 0.22107020020484924, "learning_rate": 7.898876276132736e-05, "loss": -0.1715, "step": 20280 }, { "epoch": 11.81024447031432, "grad_norm": 0.18039807677268982, "learning_rate": 7.896630488848654e-05, "loss": -0.1699, "step": 20290 }, { "epoch": 11.81606519208382, "grad_norm": 0.18127432465553284, "learning_rate": 7.89438382164471e-05, "loss": -0.1712, "step": 20300 }, { "epoch": 11.821885913853318, "grad_norm": 0.19620750844478607, "learning_rate": 7.892136275203383e-05, "loss": -0.1714, "step": 20310 }, { "epoch": 11.827706635622818, "grad_norm": 0.22580833733081818, "learning_rate": 7.889887850207418e-05, "loss": -0.1712, "step": 20320 }, { "epoch": 11.833527357392317, "grad_norm": 0.19803400337696075, "learning_rate": 7.887638547339827e-05, "loss": -0.1705, "step": 20330 }, { "epoch": 11.839348079161816, "grad_norm": 0.22640962898731232, "learning_rate": 7.885388367283891e-05, "loss": -0.171, "step": 20340 }, { "epoch": 11.845168800931315, "grad_norm": 0.2696293294429779, "learning_rate": 7.88313731072315e-05, "loss": -0.171, "step": 20350 }, { "epoch": 11.850989522700814, "grad_norm": 0.24334125220775604, "learning_rate": 7.88088537834142e-05, "loss": -0.1711, "step": 20360 }, { "epoch": 11.856810244470314, "grad_norm": 0.26673972606658936, "learning_rate": 7.878632570822778e-05, "loss": -0.1716, "step": 20370 }, { "epoch": 11.862630966239813, "grad_norm": 0.16595405340194702, "learning_rate": 7.876378888851567e-05, "loss": -0.1676, "step": 20380 }, { "epoch": 11.868451688009314, "grad_norm": 0.25724831223487854, "learning_rate": 7.874124333112396e-05, "loss": -0.166, "step": 20390 }, { "epoch": 11.874272409778813, "grad_norm": 0.24640220403671265, "learning_rate": 7.871868904290138e-05, "loss": -0.1711, "step": 20400 }, { "epoch": 11.880093131548312, "grad_norm": 0.2353174090385437, "learning_rate": 7.869612603069935e-05, "loss": -0.1673, "step": 20410 }, { "epoch": 11.885913853317811, "grad_norm": 0.2302115261554718, "learning_rate": 7.867355430137192e-05, "loss": -0.1723, "step": 20420 }, { "epoch": 11.89173457508731, "grad_norm": 0.19367030262947083, "learning_rate": 7.865097386177577e-05, "loss": -0.1746, "step": 20430 }, { "epoch": 11.89755529685681, "grad_norm": 0.26950889825820923, "learning_rate": 7.862838471877023e-05, "loss": -0.169, "step": 20440 }, { "epoch": 11.90337601862631, "grad_norm": 0.21699711680412292, "learning_rate": 7.860578687921731e-05, "loss": -0.1714, "step": 20450 }, { "epoch": 11.909196740395808, "grad_norm": 0.2568325996398926, "learning_rate": 7.858318034998164e-05, "loss": -0.1712, "step": 20460 }, { "epoch": 11.91501746216531, "grad_norm": 0.12751692533493042, "learning_rate": 7.856056513793046e-05, "loss": -0.1744, "step": 20470 }, { "epoch": 11.920838183934809, "grad_norm": 0.1787475347518921, "learning_rate": 7.85379412499337e-05, "loss": -0.171, "step": 20480 }, { "epoch": 11.926658905704308, "grad_norm": 0.13275296986103058, "learning_rate": 7.851530869286389e-05, "loss": -0.1712, "step": 20490 }, { "epoch": 11.932479627473807, "grad_norm": 0.22483734786510468, "learning_rate": 7.849266747359619e-05, "loss": -0.1706, "step": 20500 }, { "epoch": 11.938300349243306, "grad_norm": 0.21637149155139923, "learning_rate": 7.847001759900843e-05, "loss": -0.1741, "step": 20510 }, { "epoch": 11.944121071012805, "grad_norm": 0.23624728620052338, "learning_rate": 7.844735907598102e-05, "loss": -0.1659, "step": 20520 }, { "epoch": 11.949941792782305, "grad_norm": 0.20167413353919983, "learning_rate": 7.842469191139703e-05, "loss": -0.1711, "step": 20530 }, { "epoch": 11.955762514551804, "grad_norm": 0.15723834931850433, "learning_rate": 7.840201611214215e-05, "loss": -0.1698, "step": 20540 }, { "epoch": 11.961583236321303, "grad_norm": 0.20900651812553406, "learning_rate": 7.837933168510469e-05, "loss": -0.1638, "step": 20550 }, { "epoch": 11.967403958090804, "grad_norm": 0.27698391675949097, "learning_rate": 7.835663863717559e-05, "loss": -0.1665, "step": 20560 }, { "epoch": 11.973224679860303, "grad_norm": 0.299365758895874, "learning_rate": 7.833393697524838e-05, "loss": -0.1655, "step": 20570 }, { "epoch": 11.979045401629802, "grad_norm": 0.2611566483974457, "learning_rate": 7.831122670621922e-05, "loss": -0.1677, "step": 20580 }, { "epoch": 11.984866123399302, "grad_norm": 0.15385518968105316, "learning_rate": 7.82885078369869e-05, "loss": -0.1704, "step": 20590 }, { "epoch": 11.990686845168801, "grad_norm": 0.16863654553890228, "learning_rate": 7.826578037445283e-05, "loss": -0.1734, "step": 20600 }, { "epoch": 11.9965075669383, "grad_norm": 0.2153935730457306, "learning_rate": 7.824304432552097e-05, "loss": -0.1735, "step": 20610 }, { "epoch": 12.0023282887078, "grad_norm": 0.16959676146507263, "learning_rate": 7.822029969709798e-05, "loss": -0.1687, "step": 20620 }, { "epoch": 12.008149010477299, "grad_norm": 0.10592757910490036, "learning_rate": 7.819754649609306e-05, "loss": -0.1649, "step": 20630 }, { "epoch": 12.013969732246798, "grad_norm": 0.13903486728668213, "learning_rate": 7.817478472941802e-05, "loss": -0.1686, "step": 20640 }, { "epoch": 12.019790454016299, "grad_norm": 0.169545516371727, "learning_rate": 7.815201440398727e-05, "loss": -0.175, "step": 20650 }, { "epoch": 12.025611175785798, "grad_norm": 0.1966988444328308, "learning_rate": 7.812923552671789e-05, "loss": -0.1684, "step": 20660 }, { "epoch": 12.031431897555297, "grad_norm": 0.1608911007642746, "learning_rate": 7.810644810452945e-05, "loss": -0.1735, "step": 20670 }, { "epoch": 12.037252619324796, "grad_norm": 0.2663988173007965, "learning_rate": 7.808365214434417e-05, "loss": -0.1693, "step": 20680 }, { "epoch": 12.043073341094296, "grad_norm": 0.15064002573490143, "learning_rate": 7.80608476530869e-05, "loss": -0.1714, "step": 20690 }, { "epoch": 12.048894062863795, "grad_norm": 0.14299838244915009, "learning_rate": 7.8038034637685e-05, "loss": -0.1745, "step": 20700 }, { "epoch": 12.054714784633294, "grad_norm": 0.18565437197685242, "learning_rate": 7.801521310506848e-05, "loss": -0.1737, "step": 20710 }, { "epoch": 12.060535506402793, "grad_norm": 0.19546881318092346, "learning_rate": 7.799238306216994e-05, "loss": -0.1758, "step": 20720 }, { "epoch": 12.066356228172292, "grad_norm": 0.12570224702358246, "learning_rate": 7.796954451592448e-05, "loss": -0.1703, "step": 20730 }, { "epoch": 12.072176949941793, "grad_norm": 0.23569625616073608, "learning_rate": 7.794669747326992e-05, "loss": -0.1705, "step": 20740 }, { "epoch": 12.077997671711293, "grad_norm": 0.24181635677814484, "learning_rate": 7.792384194114654e-05, "loss": -0.1724, "step": 20750 }, { "epoch": 12.083818393480792, "grad_norm": 0.2549026608467102, "learning_rate": 7.790097792649729e-05, "loss": -0.1676, "step": 20760 }, { "epoch": 12.089639115250291, "grad_norm": 0.23899224400520325, "learning_rate": 7.787810543626762e-05, "loss": -0.1703, "step": 20770 }, { "epoch": 12.09545983701979, "grad_norm": 0.30497586727142334, "learning_rate": 7.785522447740558e-05, "loss": -0.1719, "step": 20780 }, { "epoch": 12.10128055878929, "grad_norm": 0.24982966482639313, "learning_rate": 7.783233505686182e-05, "loss": -0.1632, "step": 20790 }, { "epoch": 12.107101280558789, "grad_norm": 0.17170658707618713, "learning_rate": 7.780943718158955e-05, "loss": -0.1738, "step": 20800 }, { "epoch": 12.112922002328288, "grad_norm": 0.18466010689735413, "learning_rate": 7.778653085854453e-05, "loss": -0.1691, "step": 20810 }, { "epoch": 12.118742724097789, "grad_norm": 0.19157662987709045, "learning_rate": 7.77636160946851e-05, "loss": -0.1719, "step": 20820 }, { "epoch": 12.124563445867288, "grad_norm": 0.2657836973667145, "learning_rate": 7.774069289697215e-05, "loss": -0.1681, "step": 20830 }, { "epoch": 12.130384167636787, "grad_norm": 0.12979015707969666, "learning_rate": 7.771776127236913e-05, "loss": -0.1731, "step": 20840 }, { "epoch": 12.136204889406287, "grad_norm": 0.16401410102844238, "learning_rate": 7.769482122784212e-05, "loss": -0.1729, "step": 20850 }, { "epoch": 12.142025611175786, "grad_norm": 0.2874735891819, "learning_rate": 7.767187277035963e-05, "loss": -0.1721, "step": 20860 }, { "epoch": 12.147846332945285, "grad_norm": 0.24162885546684265, "learning_rate": 7.764891590689285e-05, "loss": -0.1686, "step": 20870 }, { "epoch": 12.153667054714784, "grad_norm": 0.14213576912879944, "learning_rate": 7.762595064441542e-05, "loss": -0.1692, "step": 20880 }, { "epoch": 12.159487776484283, "grad_norm": 0.3717765212059021, "learning_rate": 7.760297698990362e-05, "loss": -0.1718, "step": 20890 }, { "epoch": 12.165308498253783, "grad_norm": 0.20832160115242004, "learning_rate": 7.757999495033623e-05, "loss": -0.1697, "step": 20900 }, { "epoch": 12.171129220023284, "grad_norm": 0.14062410593032837, "learning_rate": 7.755700453269456e-05, "loss": -0.1767, "step": 20910 }, { "epoch": 12.176949941792783, "grad_norm": 0.18054670095443726, "learning_rate": 7.753400574396254e-05, "loss": -0.1701, "step": 20920 }, { "epoch": 12.182770663562282, "grad_norm": 0.3309914171695709, "learning_rate": 7.751099859112655e-05, "loss": -0.1686, "step": 20930 }, { "epoch": 12.188591385331781, "grad_norm": 0.17338702082633972, "learning_rate": 7.748798308117557e-05, "loss": -0.1735, "step": 20940 }, { "epoch": 12.19441210710128, "grad_norm": 0.22434355318546295, "learning_rate": 7.746495922110112e-05, "loss": -0.1726, "step": 20950 }, { "epoch": 12.20023282887078, "grad_norm": 0.14891378581523895, "learning_rate": 7.744192701789723e-05, "loss": -0.1718, "step": 20960 }, { "epoch": 12.206053550640279, "grad_norm": 0.20215263962745667, "learning_rate": 7.741888647856046e-05, "loss": -0.1725, "step": 20970 }, { "epoch": 12.211874272409778, "grad_norm": 0.21785402297973633, "learning_rate": 7.739583761008994e-05, "loss": -0.1672, "step": 20980 }, { "epoch": 12.217694994179277, "grad_norm": 0.1996198147535324, "learning_rate": 7.73727804194873e-05, "loss": -0.1712, "step": 20990 }, { "epoch": 12.223515715948778, "grad_norm": 0.24582690000534058, "learning_rate": 7.734971491375671e-05, "loss": -0.1719, "step": 21000 }, { "epoch": 12.229336437718278, "grad_norm": 0.272693395614624, "learning_rate": 7.732664109990485e-05, "loss": -0.1708, "step": 21010 }, { "epoch": 12.235157159487777, "grad_norm": 0.26877525448799133, "learning_rate": 7.730355898494095e-05, "loss": -0.1628, "step": 21020 }, { "epoch": 12.240977881257276, "grad_norm": 0.221077099442482, "learning_rate": 7.728046857587673e-05, "loss": -0.1672, "step": 21030 }, { "epoch": 12.246798603026775, "grad_norm": 0.24263769388198853, "learning_rate": 7.725736987972647e-05, "loss": -0.1703, "step": 21040 }, { "epoch": 12.252619324796274, "grad_norm": 0.30044886469841003, "learning_rate": 7.723426290350691e-05, "loss": -0.1688, "step": 21050 }, { "epoch": 12.258440046565774, "grad_norm": 0.2262112945318222, "learning_rate": 7.721114765423736e-05, "loss": -0.1671, "step": 21060 }, { "epoch": 12.264260768335273, "grad_norm": 0.22270157933235168, "learning_rate": 7.718802413893963e-05, "loss": -0.173, "step": 21070 }, { "epoch": 12.270081490104772, "grad_norm": 0.22479675710201263, "learning_rate": 7.716489236463802e-05, "loss": -0.1698, "step": 21080 }, { "epoch": 12.275902211874273, "grad_norm": 0.19307886064052582, "learning_rate": 7.714175233835936e-05, "loss": -0.1731, "step": 21090 }, { "epoch": 12.281722933643772, "grad_norm": 0.29259055852890015, "learning_rate": 7.711860406713299e-05, "loss": -0.1723, "step": 21100 }, { "epoch": 12.287543655413272, "grad_norm": 0.20881609618663788, "learning_rate": 7.70954475579907e-05, "loss": -0.1724, "step": 21110 }, { "epoch": 12.29336437718277, "grad_norm": 0.21355192363262177, "learning_rate": 7.707228281796688e-05, "loss": -0.1697, "step": 21120 }, { "epoch": 12.29918509895227, "grad_norm": 0.2691132128238678, "learning_rate": 7.704910985409833e-05, "loss": -0.17, "step": 21130 }, { "epoch": 12.30500582072177, "grad_norm": 0.21978391706943512, "learning_rate": 7.702592867342439e-05, "loss": -0.1697, "step": 21140 }, { "epoch": 12.310826542491268, "grad_norm": 0.231549933552742, "learning_rate": 7.700273928298691e-05, "loss": -0.1709, "step": 21150 }, { "epoch": 12.316647264260768, "grad_norm": 0.2832668125629425, "learning_rate": 7.697954168983021e-05, "loss": -0.1709, "step": 21160 }, { "epoch": 12.322467986030269, "grad_norm": 0.19434428215026855, "learning_rate": 7.695633590100109e-05, "loss": -0.1696, "step": 21170 }, { "epoch": 12.328288707799768, "grad_norm": 0.2189367413520813, "learning_rate": 7.693312192354886e-05, "loss": -0.17, "step": 21180 }, { "epoch": 12.334109429569267, "grad_norm": 0.16399487853050232, "learning_rate": 7.690989976452532e-05, "loss": -0.1724, "step": 21190 }, { "epoch": 12.339930151338766, "grad_norm": 0.15024837851524353, "learning_rate": 7.688666943098475e-05, "loss": -0.1717, "step": 21200 }, { "epoch": 12.345750873108265, "grad_norm": 0.14667071402072906, "learning_rate": 7.686343092998389e-05, "loss": -0.1723, "step": 21210 }, { "epoch": 12.351571594877765, "grad_norm": 0.23672398924827576, "learning_rate": 7.684018426858202e-05, "loss": -0.1707, "step": 21220 }, { "epoch": 12.357392316647264, "grad_norm": 0.17926350235939026, "learning_rate": 7.681692945384084e-05, "loss": -0.1719, "step": 21230 }, { "epoch": 12.363213038416763, "grad_norm": 0.19499222934246063, "learning_rate": 7.679366649282456e-05, "loss": -0.1747, "step": 21240 }, { "epoch": 12.369033760186262, "grad_norm": 0.2500455379486084, "learning_rate": 7.677039539259983e-05, "loss": -0.1703, "step": 21250 }, { "epoch": 12.374854481955763, "grad_norm": 0.17885275185108185, "learning_rate": 7.674711616023581e-05, "loss": -0.1719, "step": 21260 }, { "epoch": 12.380675203725263, "grad_norm": 0.15814121067523956, "learning_rate": 7.672382880280413e-05, "loss": -0.1707, "step": 21270 }, { "epoch": 12.386495925494762, "grad_norm": 0.21945124864578247, "learning_rate": 7.670053332737885e-05, "loss": -0.1713, "step": 21280 }, { "epoch": 12.392316647264261, "grad_norm": 0.17065951228141785, "learning_rate": 7.667722974103654e-05, "loss": -0.1736, "step": 21290 }, { "epoch": 12.39813736903376, "grad_norm": 0.24911333620548248, "learning_rate": 7.66539180508562e-05, "loss": -0.1676, "step": 21300 }, { "epoch": 12.40395809080326, "grad_norm": 0.2571590840816498, "learning_rate": 7.663059826391932e-05, "loss": -0.1671, "step": 21310 }, { "epoch": 12.409778812572759, "grad_norm": 0.2113342136144638, "learning_rate": 7.660727038730981e-05, "loss": -0.1698, "step": 21320 }, { "epoch": 12.415599534342258, "grad_norm": 0.25472065806388855, "learning_rate": 7.65839344281141e-05, "loss": -0.1686, "step": 21330 }, { "epoch": 12.421420256111757, "grad_norm": 0.25762805342674255, "learning_rate": 7.656059039342101e-05, "loss": -0.1718, "step": 21340 }, { "epoch": 12.427240977881258, "grad_norm": 0.27611151337623596, "learning_rate": 7.653723829032187e-05, "loss": -0.1683, "step": 21350 }, { "epoch": 12.433061699650757, "grad_norm": 0.24299517273902893, "learning_rate": 7.65138781259104e-05, "loss": -0.1641, "step": 21360 }, { "epoch": 12.438882421420256, "grad_norm": 0.3121216893196106, "learning_rate": 7.649050990728279e-05, "loss": -0.1723, "step": 21370 }, { "epoch": 12.444703143189756, "grad_norm": 0.23146966099739075, "learning_rate": 7.646713364153774e-05, "loss": -0.1678, "step": 21380 }, { "epoch": 12.450523864959255, "grad_norm": 0.24595341086387634, "learning_rate": 7.64437493357763e-05, "loss": -0.1695, "step": 21390 }, { "epoch": 12.456344586728754, "grad_norm": 0.14067304134368896, "learning_rate": 7.642035699710202e-05, "loss": -0.1712, "step": 21400 }, { "epoch": 12.462165308498253, "grad_norm": 0.1651693433523178, "learning_rate": 7.639695663262089e-05, "loss": -0.1737, "step": 21410 }, { "epoch": 12.467986030267753, "grad_norm": 0.16865940392017365, "learning_rate": 7.637354824944128e-05, "loss": -0.1628, "step": 21420 }, { "epoch": 12.473806752037252, "grad_norm": 0.2698113024234772, "learning_rate": 7.635013185467408e-05, "loss": -0.1732, "step": 21430 }, { "epoch": 12.479627473806753, "grad_norm": 0.2551887333393097, "learning_rate": 7.632670745543256e-05, "loss": -0.172, "step": 21440 }, { "epoch": 12.485448195576252, "grad_norm": 0.22588825225830078, "learning_rate": 7.630327505883242e-05, "loss": -0.1704, "step": 21450 }, { "epoch": 12.491268917345751, "grad_norm": 0.17695359885692596, "learning_rate": 7.627983467199182e-05, "loss": -0.1732, "step": 21460 }, { "epoch": 12.49708963911525, "grad_norm": 0.15563906729221344, "learning_rate": 7.625638630203132e-05, "loss": -0.1692, "step": 21470 }, { "epoch": 12.50291036088475, "grad_norm": 0.20819534361362457, "learning_rate": 7.623292995607394e-05, "loss": -0.1704, "step": 21480 }, { "epoch": 12.508731082654249, "grad_norm": 0.2106199860572815, "learning_rate": 7.620946564124507e-05, "loss": -0.1694, "step": 21490 }, { "epoch": 12.514551804423748, "grad_norm": 0.2352447360754013, "learning_rate": 7.618599336467256e-05, "loss": -0.1724, "step": 21500 }, { "epoch": 12.520372526193247, "grad_norm": 0.16738392412662506, "learning_rate": 7.616251313348666e-05, "loss": -0.1718, "step": 21510 }, { "epoch": 12.526193247962748, "grad_norm": 0.27976202964782715, "learning_rate": 7.613902495482005e-05, "loss": -0.1683, "step": 21520 }, { "epoch": 12.532013969732247, "grad_norm": 0.20664285123348236, "learning_rate": 7.611552883580784e-05, "loss": -0.1734, "step": 21530 }, { "epoch": 12.537834691501747, "grad_norm": 0.15693198144435883, "learning_rate": 7.609202478358748e-05, "loss": -0.1709, "step": 21540 }, { "epoch": 12.543655413271246, "grad_norm": 0.2200556993484497, "learning_rate": 7.606851280529895e-05, "loss": -0.1713, "step": 21550 }, { "epoch": 12.549476135040745, "grad_norm": 0.166641503572464, "learning_rate": 7.604499290808449e-05, "loss": -0.1724, "step": 21560 }, { "epoch": 12.555296856810244, "grad_norm": 0.17264746129512787, "learning_rate": 7.602146509908888e-05, "loss": -0.172, "step": 21570 }, { "epoch": 12.561117578579744, "grad_norm": 0.238009974360466, "learning_rate": 7.599792938545921e-05, "loss": -0.1665, "step": 21580 }, { "epoch": 12.566938300349243, "grad_norm": 0.27419745922088623, "learning_rate": 7.597438577434506e-05, "loss": -0.1706, "step": 21590 }, { "epoch": 12.572759022118742, "grad_norm": 0.1876625418663025, "learning_rate": 7.595083427289831e-05, "loss": -0.1734, "step": 21600 }, { "epoch": 12.578579743888243, "grad_norm": 0.2602040767669678, "learning_rate": 7.59272748882733e-05, "loss": -0.1742, "step": 21610 }, { "epoch": 12.584400465657742, "grad_norm": 0.12041919678449631, "learning_rate": 7.590370762762675e-05, "loss": -0.1719, "step": 21620 }, { "epoch": 12.590221187427241, "grad_norm": 0.2296297550201416, "learning_rate": 7.588013249811777e-05, "loss": -0.1671, "step": 21630 }, { "epoch": 12.59604190919674, "grad_norm": 0.09902723133563995, "learning_rate": 7.585654950690786e-05, "loss": -0.1728, "step": 21640 }, { "epoch": 12.60186263096624, "grad_norm": 0.2476573884487152, "learning_rate": 7.583295866116091e-05, "loss": -0.1692, "step": 21650 }, { "epoch": 12.607683352735739, "grad_norm": 0.17431087791919708, "learning_rate": 7.580935996804321e-05, "loss": -0.1685, "step": 21660 }, { "epoch": 12.613504074505238, "grad_norm": 0.28369054198265076, "learning_rate": 7.57857534347234e-05, "loss": -0.1705, "step": 21670 }, { "epoch": 12.619324796274737, "grad_norm": 0.2029622197151184, "learning_rate": 7.576213906837254e-05, "loss": -0.1718, "step": 21680 }, { "epoch": 12.625145518044237, "grad_norm": 0.19394545257091522, "learning_rate": 7.573851687616403e-05, "loss": -0.1695, "step": 21690 }, { "epoch": 12.630966239813738, "grad_norm": 0.20384646952152252, "learning_rate": 7.571488686527368e-05, "loss": -0.1737, "step": 21700 }, { "epoch": 12.636786961583237, "grad_norm": 0.16647766530513763, "learning_rate": 7.569124904287968e-05, "loss": -0.1754, "step": 21710 }, { "epoch": 12.642607683352736, "grad_norm": 0.179524227976799, "learning_rate": 7.566760341616254e-05, "loss": -0.1718, "step": 21720 }, { "epoch": 12.648428405122235, "grad_norm": 0.25321638584136963, "learning_rate": 7.564394999230519e-05, "loss": -0.1704, "step": 21730 }, { "epoch": 12.654249126891735, "grad_norm": 0.15782274305820465, "learning_rate": 7.562028877849294e-05, "loss": -0.1743, "step": 21740 }, { "epoch": 12.660069848661234, "grad_norm": 0.2424970269203186, "learning_rate": 7.559661978191341e-05, "loss": -0.1717, "step": 21750 }, { "epoch": 12.665890570430733, "grad_norm": 0.19081422686576843, "learning_rate": 7.557294300975664e-05, "loss": -0.1746, "step": 21760 }, { "epoch": 12.671711292200232, "grad_norm": 0.1487026810646057, "learning_rate": 7.554925846921499e-05, "loss": -0.1695, "step": 21770 }, { "epoch": 12.677532013969731, "grad_norm": 0.22731304168701172, "learning_rate": 7.552556616748321e-05, "loss": -0.1731, "step": 21780 }, { "epoch": 12.683352735739232, "grad_norm": 0.24843385815620422, "learning_rate": 7.550186611175838e-05, "loss": -0.1671, "step": 21790 }, { "epoch": 12.689173457508732, "grad_norm": 0.18901507556438446, "learning_rate": 7.547815830923998e-05, "loss": -0.1751, "step": 21800 }, { "epoch": 12.69499417927823, "grad_norm": 0.29953089356422424, "learning_rate": 7.54544427671298e-05, "loss": -0.172, "step": 21810 }, { "epoch": 12.70081490104773, "grad_norm": 0.18666090071201324, "learning_rate": 7.543071949263198e-05, "loss": -0.1732, "step": 21820 }, { "epoch": 12.70663562281723, "grad_norm": 0.155965194106102, "learning_rate": 7.540698849295305e-05, "loss": -0.1765, "step": 21830 }, { "epoch": 12.712456344586728, "grad_norm": 0.14758433401584625, "learning_rate": 7.538324977530183e-05, "loss": -0.1739, "step": 21840 }, { "epoch": 12.718277066356228, "grad_norm": 0.305056095123291, "learning_rate": 7.535950334688955e-05, "loss": -0.1685, "step": 21850 }, { "epoch": 12.724097788125727, "grad_norm": 0.13768672943115234, "learning_rate": 7.533574921492972e-05, "loss": -0.1726, "step": 21860 }, { "epoch": 12.729918509895228, "grad_norm": 0.22325082123279572, "learning_rate": 7.531198738663824e-05, "loss": -0.1758, "step": 21870 }, { "epoch": 12.735739231664727, "grad_norm": 0.19107580184936523, "learning_rate": 7.528821786923333e-05, "loss": -0.1719, "step": 21880 }, { "epoch": 12.741559953434226, "grad_norm": 0.17727726697921753, "learning_rate": 7.52644406699355e-05, "loss": -0.1698, "step": 21890 }, { "epoch": 12.747380675203726, "grad_norm": 0.1496322900056839, "learning_rate": 7.524065579596766e-05, "loss": -0.1758, "step": 21900 }, { "epoch": 12.753201396973225, "grad_norm": 0.1777542531490326, "learning_rate": 7.521686325455506e-05, "loss": -0.176, "step": 21910 }, { "epoch": 12.759022118742724, "grad_norm": 0.18707652390003204, "learning_rate": 7.51930630529252e-05, "loss": -0.1718, "step": 21920 }, { "epoch": 12.764842840512223, "grad_norm": 0.20773561298847198, "learning_rate": 7.516925519830797e-05, "loss": -0.1723, "step": 21930 }, { "epoch": 12.770663562281722, "grad_norm": 0.25772908329963684, "learning_rate": 7.514543969793557e-05, "loss": -0.1706, "step": 21940 }, { "epoch": 12.776484284051222, "grad_norm": 0.32183727622032166, "learning_rate": 7.512161655904251e-05, "loss": -0.1704, "step": 21950 }, { "epoch": 12.782305005820723, "grad_norm": 0.2017184942960739, "learning_rate": 7.509778578886563e-05, "loss": -0.1713, "step": 21960 }, { "epoch": 12.788125727590222, "grad_norm": 0.18155233561992645, "learning_rate": 7.507394739464412e-05, "loss": -0.1725, "step": 21970 }, { "epoch": 12.793946449359721, "grad_norm": 0.20798787474632263, "learning_rate": 7.50501013836194e-05, "loss": -0.1672, "step": 21980 }, { "epoch": 12.79976717112922, "grad_norm": 0.1949644684791565, "learning_rate": 7.50262477630353e-05, "loss": -0.1688, "step": 21990 }, { "epoch": 12.80558789289872, "grad_norm": 0.1568857729434967, "learning_rate": 7.500238654013794e-05, "loss": -0.1684, "step": 22000 }, { "epoch": 12.811408614668219, "grad_norm": 0.24938036501407623, "learning_rate": 7.497851772217566e-05, "loss": -0.1729, "step": 22010 }, { "epoch": 12.817229336437718, "grad_norm": 0.17973892390727997, "learning_rate": 7.495464131639924e-05, "loss": -0.1746, "step": 22020 }, { "epoch": 12.823050058207217, "grad_norm": 0.15105991065502167, "learning_rate": 7.493075733006166e-05, "loss": -0.1733, "step": 22030 }, { "epoch": 12.828870779976716, "grad_norm": 0.19466528296470642, "learning_rate": 7.490686577041828e-05, "loss": -0.1711, "step": 22040 }, { "epoch": 12.834691501746217, "grad_norm": 0.18970251083374023, "learning_rate": 7.488296664472668e-05, "loss": -0.1681, "step": 22050 }, { "epoch": 12.840512223515717, "grad_norm": 0.19175833463668823, "learning_rate": 7.485905996024682e-05, "loss": -0.1726, "step": 22060 }, { "epoch": 12.846332945285216, "grad_norm": 0.31560081243515015, "learning_rate": 7.483514572424093e-05, "loss": -0.1747, "step": 22070 }, { "epoch": 12.852153667054715, "grad_norm": 0.1988903284072876, "learning_rate": 7.481122394397349e-05, "loss": -0.1733, "step": 22080 }, { "epoch": 12.857974388824214, "grad_norm": 0.229895681142807, "learning_rate": 7.478729462671131e-05, "loss": -0.1749, "step": 22090 }, { "epoch": 12.863795110593713, "grad_norm": 0.27898702025413513, "learning_rate": 7.47633577797235e-05, "loss": -0.1669, "step": 22100 }, { "epoch": 12.869615832363213, "grad_norm": 0.26959967613220215, "learning_rate": 7.473941341028144e-05, "loss": -0.1728, "step": 22110 }, { "epoch": 12.875436554132712, "grad_norm": 0.18122059106826782, "learning_rate": 7.471546152565879e-05, "loss": -0.1755, "step": 22120 }, { "epoch": 12.881257275902211, "grad_norm": 0.14361558854579926, "learning_rate": 7.46915021331315e-05, "loss": -0.1685, "step": 22130 }, { "epoch": 12.887077997671712, "grad_norm": 0.16159562766551971, "learning_rate": 7.466753523997778e-05, "loss": -0.1715, "step": 22140 }, { "epoch": 12.892898719441211, "grad_norm": 0.25378817319869995, "learning_rate": 7.464356085347819e-05, "loss": -0.1705, "step": 22150 }, { "epoch": 12.89871944121071, "grad_norm": 0.1854284256696701, "learning_rate": 7.461957898091548e-05, "loss": -0.1702, "step": 22160 }, { "epoch": 12.90454016298021, "grad_norm": 0.22824391722679138, "learning_rate": 7.459558962957473e-05, "loss": -0.1723, "step": 22170 }, { "epoch": 12.910360884749709, "grad_norm": 0.21251128613948822, "learning_rate": 7.457159280674326e-05, "loss": -0.1724, "step": 22180 }, { "epoch": 12.916181606519208, "grad_norm": 0.17452841997146606, "learning_rate": 7.454758851971066e-05, "loss": -0.1737, "step": 22190 }, { "epoch": 12.922002328288707, "grad_norm": 0.19964689016342163, "learning_rate": 7.45235767757688e-05, "loss": -0.1698, "step": 22200 }, { "epoch": 12.927823050058207, "grad_norm": 0.1545131355524063, "learning_rate": 7.449955758221183e-05, "loss": -0.1635, "step": 22210 }, { "epoch": 12.933643771827708, "grad_norm": 0.1342148631811142, "learning_rate": 7.447553094633615e-05, "loss": -0.172, "step": 22220 }, { "epoch": 12.939464493597207, "grad_norm": 0.2029929906129837, "learning_rate": 7.445149687544039e-05, "loss": -0.1727, "step": 22230 }, { "epoch": 12.945285215366706, "grad_norm": 0.2236425131559372, "learning_rate": 7.44274553768255e-05, "loss": -0.1716, "step": 22240 }, { "epoch": 12.951105937136205, "grad_norm": 0.17313139140605927, "learning_rate": 7.440340645779464e-05, "loss": -0.1735, "step": 22250 }, { "epoch": 12.956926658905704, "grad_norm": 0.10227436572313309, "learning_rate": 7.437935012565322e-05, "loss": -0.1708, "step": 22260 }, { "epoch": 12.962747380675204, "grad_norm": 0.23369772732257843, "learning_rate": 7.435528638770893e-05, "loss": -0.1692, "step": 22270 }, { "epoch": 12.968568102444703, "grad_norm": 0.14372795820236206, "learning_rate": 7.433121525127171e-05, "loss": -0.1676, "step": 22280 }, { "epoch": 12.974388824214202, "grad_norm": 0.21356689929962158, "learning_rate": 7.430713672365371e-05, "loss": -0.1588, "step": 22290 }, { "epoch": 12.980209545983701, "grad_norm": 0.1722528040409088, "learning_rate": 7.428305081216938e-05, "loss": -0.1726, "step": 22300 }, { "epoch": 12.9860302677532, "grad_norm": 0.23320642113685608, "learning_rate": 7.425895752413536e-05, "loss": -0.1729, "step": 22310 }, { "epoch": 12.991850989522701, "grad_norm": 0.14893485605716705, "learning_rate": 7.423485686687057e-05, "loss": -0.1671, "step": 22320 }, { "epoch": 12.9976717112922, "grad_norm": 0.1715293824672699, "learning_rate": 7.421074884769616e-05, "loss": -0.1726, "step": 22330 }, { "epoch": 13.0034924330617, "grad_norm": 0.16102413833141327, "learning_rate": 7.418663347393548e-05, "loss": -0.169, "step": 22340 }, { "epoch": 13.009313154831199, "grad_norm": 0.19643409550189972, "learning_rate": 7.416251075291418e-05, "loss": -0.1703, "step": 22350 }, { "epoch": 13.015133876600698, "grad_norm": 0.1835690140724182, "learning_rate": 7.413838069196007e-05, "loss": -0.1753, "step": 22360 }, { "epoch": 13.020954598370198, "grad_norm": 0.14547422528266907, "learning_rate": 7.411424329840324e-05, "loss": -0.1708, "step": 22370 }, { "epoch": 13.026775320139697, "grad_norm": 0.15975989401340485, "learning_rate": 7.409009857957601e-05, "loss": -0.1652, "step": 22380 }, { "epoch": 13.032596041909196, "grad_norm": 0.22136594355106354, "learning_rate": 7.40659465428129e-05, "loss": -0.1681, "step": 22390 }, { "epoch": 13.038416763678697, "grad_norm": 0.20021235942840576, "learning_rate": 7.404178719545063e-05, "loss": -0.1688, "step": 22400 }, { "epoch": 13.044237485448196, "grad_norm": 0.24376839399337769, "learning_rate": 7.401762054482822e-05, "loss": -0.1645, "step": 22410 }, { "epoch": 13.050058207217695, "grad_norm": 0.19294647872447968, "learning_rate": 7.39934465982868e-05, "loss": -0.1704, "step": 22420 }, { "epoch": 13.055878928987195, "grad_norm": 0.2832930386066437, "learning_rate": 7.396926536316984e-05, "loss": -0.1734, "step": 22430 }, { "epoch": 13.061699650756694, "grad_norm": 0.22521458566188812, "learning_rate": 7.394507684682293e-05, "loss": -0.1683, "step": 22440 }, { "epoch": 13.067520372526193, "grad_norm": 0.2733934819698334, "learning_rate": 7.392088105659393e-05, "loss": -0.1716, "step": 22450 }, { "epoch": 13.073341094295692, "grad_norm": 0.21534954011440277, "learning_rate": 7.389667799983284e-05, "loss": -0.1681, "step": 22460 }, { "epoch": 13.079161816065191, "grad_norm": 0.22780676186084747, "learning_rate": 7.387246768389193e-05, "loss": -0.1713, "step": 22470 }, { "epoch": 13.08498253783469, "grad_norm": 0.27511900663375854, "learning_rate": 7.384825011612563e-05, "loss": -0.1718, "step": 22480 }, { "epoch": 13.090803259604192, "grad_norm": 0.24828407168388367, "learning_rate": 7.382402530389066e-05, "loss": -0.1636, "step": 22490 }, { "epoch": 13.09662398137369, "grad_norm": 0.20946109294891357, "learning_rate": 7.379979325454582e-05, "loss": -0.1707, "step": 22500 }, { "epoch": 13.10244470314319, "grad_norm": 0.23711352050304413, "learning_rate": 7.37755539754522e-05, "loss": -0.1725, "step": 22510 }, { "epoch": 13.10826542491269, "grad_norm": 0.14373472332954407, "learning_rate": 7.375130747397302e-05, "loss": -0.1738, "step": 22520 }, { "epoch": 13.114086146682189, "grad_norm": 0.18181349337100983, "learning_rate": 7.372705375747377e-05, "loss": -0.1734, "step": 22530 }, { "epoch": 13.119906868451688, "grad_norm": 0.30135050415992737, "learning_rate": 7.370279283332205e-05, "loss": -0.1713, "step": 22540 }, { "epoch": 13.125727590221187, "grad_norm": 0.204696387052536, "learning_rate": 7.36785247088877e-05, "loss": -0.1716, "step": 22550 }, { "epoch": 13.131548311990686, "grad_norm": 0.2369365692138672, "learning_rate": 7.365424939154275e-05, "loss": -0.1722, "step": 22560 }, { "epoch": 13.137369033760187, "grad_norm": 0.1684751957654953, "learning_rate": 7.362996688866138e-05, "loss": -0.172, "step": 22570 }, { "epoch": 13.143189755529686, "grad_norm": 0.2395264059305191, "learning_rate": 7.360567720761999e-05, "loss": -0.1712, "step": 22580 }, { "epoch": 13.149010477299186, "grad_norm": 0.22920192778110504, "learning_rate": 7.358138035579711e-05, "loss": -0.1701, "step": 22590 }, { "epoch": 13.154831199068685, "grad_norm": 0.189186230301857, "learning_rate": 7.355707634057354e-05, "loss": -0.1689, "step": 22600 }, { "epoch": 13.160651920838184, "grad_norm": 0.1578788012266159, "learning_rate": 7.353276516933215e-05, "loss": -0.1766, "step": 22610 }, { "epoch": 13.166472642607683, "grad_norm": 0.15297886729240417, "learning_rate": 7.350844684945806e-05, "loss": -0.1686, "step": 22620 }, { "epoch": 13.172293364377182, "grad_norm": 0.2548614740371704, "learning_rate": 7.348412138833851e-05, "loss": -0.1725, "step": 22630 }, { "epoch": 13.178114086146682, "grad_norm": 0.15916293859481812, "learning_rate": 7.345978879336295e-05, "loss": -0.1734, "step": 22640 }, { "epoch": 13.18393480791618, "grad_norm": 0.11746558547019958, "learning_rate": 7.343544907192296e-05, "loss": -0.1702, "step": 22650 }, { "epoch": 13.189755529685682, "grad_norm": 0.1733592301607132, "learning_rate": 7.341110223141235e-05, "loss": -0.1697, "step": 22660 }, { "epoch": 13.195576251455181, "grad_norm": 0.3119220435619354, "learning_rate": 7.3386748279227e-05, "loss": -0.171, "step": 22670 }, { "epoch": 13.20139697322468, "grad_norm": 0.23215968906879425, "learning_rate": 7.336238722276501e-05, "loss": -0.1656, "step": 22680 }, { "epoch": 13.20721769499418, "grad_norm": 0.3094214200973511, "learning_rate": 7.333801906942663e-05, "loss": -0.1725, "step": 22690 }, { "epoch": 13.213038416763679, "grad_norm": 0.13321553170681, "learning_rate": 7.331364382661428e-05, "loss": -0.1697, "step": 22700 }, { "epoch": 13.218859138533178, "grad_norm": 0.16922487318515778, "learning_rate": 7.328926150173248e-05, "loss": -0.1735, "step": 22710 }, { "epoch": 13.224679860302677, "grad_norm": 0.3363385796546936, "learning_rate": 7.326487210218795e-05, "loss": -0.1738, "step": 22720 }, { "epoch": 13.230500582072176, "grad_norm": 0.2550869584083557, "learning_rate": 7.324047563538955e-05, "loss": -0.1637, "step": 22730 }, { "epoch": 13.236321303841676, "grad_norm": 0.1370771825313568, "learning_rate": 7.321607210874828e-05, "loss": -0.1709, "step": 22740 }, { "epoch": 13.242142025611177, "grad_norm": 0.19722136855125427, "learning_rate": 7.31916615296773e-05, "loss": -0.1737, "step": 22750 }, { "epoch": 13.247962747380676, "grad_norm": 0.14751487970352173, "learning_rate": 7.316724390559188e-05, "loss": -0.173, "step": 22760 }, { "epoch": 13.253783469150175, "grad_norm": 0.18051524460315704, "learning_rate": 7.314281924390946e-05, "loss": -0.178, "step": 22770 }, { "epoch": 13.259604190919674, "grad_norm": 0.13044783473014832, "learning_rate": 7.311838755204959e-05, "loss": -0.1715, "step": 22780 }, { "epoch": 13.265424912689173, "grad_norm": 0.2286159247159958, "learning_rate": 7.3093948837434e-05, "loss": -0.1723, "step": 22790 }, { "epoch": 13.271245634458673, "grad_norm": 0.2651482820510864, "learning_rate": 7.306950310748651e-05, "loss": -0.1746, "step": 22800 }, { "epoch": 13.277066356228172, "grad_norm": 0.2597445249557495, "learning_rate": 7.304505036963311e-05, "loss": -0.1694, "step": 22810 }, { "epoch": 13.282887077997671, "grad_norm": 0.19425246119499207, "learning_rate": 7.302059063130186e-05, "loss": -0.1726, "step": 22820 }, { "epoch": 13.28870779976717, "grad_norm": 0.15793977677822113, "learning_rate": 7.2996123899923e-05, "loss": -0.1744, "step": 22830 }, { "epoch": 13.294528521536671, "grad_norm": 0.26710423827171326, "learning_rate": 7.297165018292886e-05, "loss": -0.1691, "step": 22840 }, { "epoch": 13.30034924330617, "grad_norm": 0.1296713799238205, "learning_rate": 7.294716948775396e-05, "loss": -0.1743, "step": 22850 }, { "epoch": 13.30616996507567, "grad_norm": 0.24804022908210754, "learning_rate": 7.292268182183484e-05, "loss": -0.1711, "step": 22860 }, { "epoch": 13.311990686845169, "grad_norm": 0.22321750223636627, "learning_rate": 7.28981871926102e-05, "loss": -0.1754, "step": 22870 }, { "epoch": 13.317811408614668, "grad_norm": 0.18563657999038696, "learning_rate": 7.28736856075209e-05, "loss": -0.1696, "step": 22880 }, { "epoch": 13.323632130384167, "grad_norm": 0.19446276128292084, "learning_rate": 7.284917707400985e-05, "loss": -0.1757, "step": 22890 }, { "epoch": 13.329452852153667, "grad_norm": 0.18277952075004578, "learning_rate": 7.282466159952212e-05, "loss": -0.169, "step": 22900 }, { "epoch": 13.335273573923166, "grad_norm": 0.19935676455497742, "learning_rate": 7.280013919150483e-05, "loss": -0.1698, "step": 22910 }, { "epoch": 13.341094295692667, "grad_norm": 0.21340268850326538, "learning_rate": 7.277560985740728e-05, "loss": -0.1607, "step": 22920 }, { "epoch": 13.346915017462166, "grad_norm": 0.1847635954618454, "learning_rate": 7.275107360468079e-05, "loss": -0.1701, "step": 22930 }, { "epoch": 13.352735739231665, "grad_norm": 0.1522154062986374, "learning_rate": 7.272653044077885e-05, "loss": -0.1717, "step": 22940 }, { "epoch": 13.358556461001164, "grad_norm": 0.21442927420139313, "learning_rate": 7.270198037315703e-05, "loss": -0.1688, "step": 22950 }, { "epoch": 13.364377182770664, "grad_norm": 0.22261211276054382, "learning_rate": 7.267742340927297e-05, "loss": -0.1738, "step": 22960 }, { "epoch": 13.370197904540163, "grad_norm": 0.18816529214382172, "learning_rate": 7.265285955658645e-05, "loss": -0.1768, "step": 22970 }, { "epoch": 13.376018626309662, "grad_norm": 0.11659689247608185, "learning_rate": 7.26282888225593e-05, "loss": -0.1724, "step": 22980 }, { "epoch": 13.381839348079161, "grad_norm": 0.2118854820728302, "learning_rate": 7.260371121465548e-05, "loss": -0.1744, "step": 22990 }, { "epoch": 13.38766006984866, "grad_norm": 0.24277687072753906, "learning_rate": 7.2579126740341e-05, "loss": -0.1679, "step": 23000 }, { "epoch": 13.39348079161816, "grad_norm": 0.18349884450435638, "learning_rate": 7.2554535407084e-05, "loss": -0.1733, "step": 23010 }, { "epoch": 13.39930151338766, "grad_norm": 0.17465513944625854, "learning_rate": 7.252993722235464e-05, "loss": -0.1718, "step": 23020 }, { "epoch": 13.40512223515716, "grad_norm": 0.20303863286972046, "learning_rate": 7.250533219362523e-05, "loss": -0.175, "step": 23030 }, { "epoch": 13.41094295692666, "grad_norm": 0.29893791675567627, "learning_rate": 7.248072032837012e-05, "loss": -0.1672, "step": 23040 }, { "epoch": 13.416763678696158, "grad_norm": 0.207825168967247, "learning_rate": 7.245610163406575e-05, "loss": -0.1699, "step": 23050 }, { "epoch": 13.422584400465658, "grad_norm": 0.1901099681854248, "learning_rate": 7.243147611819061e-05, "loss": -0.1707, "step": 23060 }, { "epoch": 13.428405122235157, "grad_norm": 0.2572406828403473, "learning_rate": 7.240684378822531e-05, "loss": -0.171, "step": 23070 }, { "epoch": 13.434225844004656, "grad_norm": 0.2153395116329193, "learning_rate": 7.238220465165248e-05, "loss": -0.172, "step": 23080 }, { "epoch": 13.440046565774155, "grad_norm": 0.3048163056373596, "learning_rate": 7.235755871595684e-05, "loss": -0.1689, "step": 23090 }, { "epoch": 13.445867287543656, "grad_norm": 0.1959233582019806, "learning_rate": 7.233290598862517e-05, "loss": -0.1699, "step": 23100 }, { "epoch": 13.451688009313155, "grad_norm": 0.23059804737567902, "learning_rate": 7.230824647714635e-05, "loss": -0.1715, "step": 23110 }, { "epoch": 13.457508731082655, "grad_norm": 0.24568015336990356, "learning_rate": 7.228358018901124e-05, "loss": -0.1734, "step": 23120 }, { "epoch": 13.463329452852154, "grad_norm": 0.3029194474220276, "learning_rate": 7.225890713171286e-05, "loss": -0.1703, "step": 23130 }, { "epoch": 13.469150174621653, "grad_norm": 0.12852603197097778, "learning_rate": 7.223422731274618e-05, "loss": -0.1753, "step": 23140 }, { "epoch": 13.474970896391152, "grad_norm": 0.17007164657115936, "learning_rate": 7.220954073960832e-05, "loss": -0.1737, "step": 23150 }, { "epoch": 13.480791618160652, "grad_norm": 0.2791992425918579, "learning_rate": 7.218484741979838e-05, "loss": -0.1727, "step": 23160 }, { "epoch": 13.48661233993015, "grad_norm": 0.2177504003047943, "learning_rate": 7.216014736081756e-05, "loss": -0.168, "step": 23170 }, { "epoch": 13.49243306169965, "grad_norm": 0.14704522490501404, "learning_rate": 7.213544057016906e-05, "loss": -0.1738, "step": 23180 }, { "epoch": 13.498253783469151, "grad_norm": 0.21209798753261566, "learning_rate": 7.211072705535819e-05, "loss": -0.1735, "step": 23190 }, { "epoch": 13.50407450523865, "grad_norm": 0.21768134832382202, "learning_rate": 7.208600682389224e-05, "loss": -0.1738, "step": 23200 }, { "epoch": 13.50989522700815, "grad_norm": 0.22862748801708221, "learning_rate": 7.206127988328055e-05, "loss": -0.1721, "step": 23210 }, { "epoch": 13.515715948777649, "grad_norm": 0.22222402691841125, "learning_rate": 7.203654624103453e-05, "loss": -0.1746, "step": 23220 }, { "epoch": 13.521536670547148, "grad_norm": 0.22650198638439178, "learning_rate": 7.201180590466761e-05, "loss": -0.1721, "step": 23230 }, { "epoch": 13.527357392316647, "grad_norm": 0.23950150609016418, "learning_rate": 7.198705888169523e-05, "loss": -0.1716, "step": 23240 }, { "epoch": 13.533178114086146, "grad_norm": 0.20791442692279816, "learning_rate": 7.196230517963491e-05, "loss": -0.1755, "step": 23250 }, { "epoch": 13.538998835855645, "grad_norm": 0.15397411584854126, "learning_rate": 7.193754480600615e-05, "loss": -0.1747, "step": 23260 }, { "epoch": 13.544819557625146, "grad_norm": 0.20801450312137604, "learning_rate": 7.19127777683305e-05, "loss": -0.1669, "step": 23270 }, { "epoch": 13.550640279394646, "grad_norm": 0.19478341937065125, "learning_rate": 7.188800407413156e-05, "loss": -0.1781, "step": 23280 }, { "epoch": 13.556461001164145, "grad_norm": 0.19652540981769562, "learning_rate": 7.186322373093489e-05, "loss": -0.1708, "step": 23290 }, { "epoch": 13.562281722933644, "grad_norm": 0.20293089747428894, "learning_rate": 7.18384367462681e-05, "loss": -0.1755, "step": 23300 }, { "epoch": 13.568102444703143, "grad_norm": 0.18551470339298248, "learning_rate": 7.181364312766085e-05, "loss": -0.1733, "step": 23310 }, { "epoch": 13.573923166472643, "grad_norm": 0.2436571717262268, "learning_rate": 7.178884288264477e-05, "loss": -0.1762, "step": 23320 }, { "epoch": 13.579743888242142, "grad_norm": 0.22750991582870483, "learning_rate": 7.176403601875353e-05, "loss": -0.1702, "step": 23330 }, { "epoch": 13.585564610011641, "grad_norm": 0.2610931396484375, "learning_rate": 7.173922254352279e-05, "loss": -0.1728, "step": 23340 }, { "epoch": 13.59138533178114, "grad_norm": 0.16343554854393005, "learning_rate": 7.171440246449024e-05, "loss": -0.1745, "step": 23350 }, { "epoch": 13.59720605355064, "grad_norm": 0.19387298822402954, "learning_rate": 7.168957578919555e-05, "loss": -0.1728, "step": 23360 }, { "epoch": 13.60302677532014, "grad_norm": 0.15193571150302887, "learning_rate": 7.16647425251804e-05, "loss": -0.1737, "step": 23370 }, { "epoch": 13.60884749708964, "grad_norm": 0.115907683968544, "learning_rate": 7.163990267998852e-05, "loss": -0.1729, "step": 23380 }, { "epoch": 13.614668218859139, "grad_norm": 0.1724177896976471, "learning_rate": 7.161505626116556e-05, "loss": -0.1704, "step": 23390 }, { "epoch": 13.620488940628638, "grad_norm": 0.2153671532869339, "learning_rate": 7.159020327625923e-05, "loss": -0.1732, "step": 23400 }, { "epoch": 13.626309662398137, "grad_norm": 0.16668640077114105, "learning_rate": 7.15653437328192e-05, "loss": -0.1652, "step": 23410 }, { "epoch": 13.632130384167636, "grad_norm": 0.13220958411693573, "learning_rate": 7.154047763839713e-05, "loss": -0.1728, "step": 23420 }, { "epoch": 13.637951105937136, "grad_norm": 0.21366754174232483, "learning_rate": 7.15156050005467e-05, "loss": -0.1734, "step": 23430 }, { "epoch": 13.643771827706635, "grad_norm": 0.23259930312633514, "learning_rate": 7.149072582682357e-05, "loss": -0.174, "step": 23440 }, { "epoch": 13.649592549476136, "grad_norm": 0.1938934028148651, "learning_rate": 7.146584012478535e-05, "loss": -0.1747, "step": 23450 }, { "epoch": 13.655413271245635, "grad_norm": 0.1785568743944168, "learning_rate": 7.144094790199169e-05, "loss": -0.1724, "step": 23460 }, { "epoch": 13.661233993015134, "grad_norm": 0.20849201083183289, "learning_rate": 7.141604916600415e-05, "loss": -0.1743, "step": 23470 }, { "epoch": 13.667054714784634, "grad_norm": 0.15577076375484467, "learning_rate": 7.139114392438635e-05, "loss": -0.1758, "step": 23480 }, { "epoch": 13.672875436554133, "grad_norm": 0.1116483137011528, "learning_rate": 7.136623218470382e-05, "loss": -0.1736, "step": 23490 }, { "epoch": 13.678696158323632, "grad_norm": 0.22290244698524475, "learning_rate": 7.13413139545241e-05, "loss": -0.1731, "step": 23500 }, { "epoch": 13.684516880093131, "grad_norm": 0.187583789229393, "learning_rate": 7.131638924141668e-05, "loss": -0.173, "step": 23510 }, { "epoch": 13.69033760186263, "grad_norm": 0.19248422980308533, "learning_rate": 7.129145805295304e-05, "loss": -0.1716, "step": 23520 }, { "epoch": 13.69615832363213, "grad_norm": 0.12162961810827255, "learning_rate": 7.126652039670661e-05, "loss": -0.1779, "step": 23530 }, { "epoch": 13.70197904540163, "grad_norm": 0.1925746649503708, "learning_rate": 7.124157628025278e-05, "loss": -0.1727, "step": 23540 }, { "epoch": 13.70779976717113, "grad_norm": 0.1756446659564972, "learning_rate": 7.121662571116894e-05, "loss": -0.1752, "step": 23550 }, { "epoch": 13.713620488940629, "grad_norm": 0.17455589771270752, "learning_rate": 7.119166869703441e-05, "loss": -0.1764, "step": 23560 }, { "epoch": 13.719441210710128, "grad_norm": 0.21377980709075928, "learning_rate": 7.116670524543044e-05, "loss": -0.1694, "step": 23570 }, { "epoch": 13.725261932479627, "grad_norm": 0.22247987985610962, "learning_rate": 7.114173536394032e-05, "loss": -0.1749, "step": 23580 }, { "epoch": 13.731082654249127, "grad_norm": 0.17234846949577332, "learning_rate": 7.111675906014917e-05, "loss": -0.17, "step": 23590 }, { "epoch": 13.736903376018626, "grad_norm": 0.17098966240882874, "learning_rate": 7.109177634164421e-05, "loss": -0.175, "step": 23600 }, { "epoch": 13.742724097788125, "grad_norm": 0.19044123589992523, "learning_rate": 7.106678721601449e-05, "loss": -0.1771, "step": 23610 }, { "epoch": 13.748544819557626, "grad_norm": 0.21517950296401978, "learning_rate": 7.104179169085103e-05, "loss": -0.1745, "step": 23620 }, { "epoch": 13.754365541327125, "grad_norm": 0.269553542137146, "learning_rate": 7.101678977374683e-05, "loss": -0.1693, "step": 23630 }, { "epoch": 13.760186263096625, "grad_norm": 0.15665733814239502, "learning_rate": 7.099178147229685e-05, "loss": -0.172, "step": 23640 }, { "epoch": 13.766006984866124, "grad_norm": 0.16360414028167725, "learning_rate": 7.096676679409789e-05, "loss": -0.1697, "step": 23650 }, { "epoch": 13.771827706635623, "grad_norm": 0.18412698805332184, "learning_rate": 7.094174574674877e-05, "loss": -0.1723, "step": 23660 }, { "epoch": 13.777648428405122, "grad_norm": 0.20162199437618256, "learning_rate": 7.091671833785025e-05, "loss": -0.1749, "step": 23670 }, { "epoch": 13.783469150174621, "grad_norm": 0.21130463480949402, "learning_rate": 7.089168457500493e-05, "loss": -0.173, "step": 23680 }, { "epoch": 13.78928987194412, "grad_norm": 0.2117837816476822, "learning_rate": 7.086664446581747e-05, "loss": -0.1679, "step": 23690 }, { "epoch": 13.79511059371362, "grad_norm": 0.1868174523115158, "learning_rate": 7.084159801789438e-05, "loss": -0.1729, "step": 23700 }, { "epoch": 13.800931315483119, "grad_norm": 0.19903875887393951, "learning_rate": 7.081654523884411e-05, "loss": -0.1712, "step": 23710 }, { "epoch": 13.80675203725262, "grad_norm": 0.17958977818489075, "learning_rate": 7.0791486136277e-05, "loss": -0.1684, "step": 23720 }, { "epoch": 13.81257275902212, "grad_norm": 0.20863057672977448, "learning_rate": 7.07664207178054e-05, "loss": -0.1687, "step": 23730 }, { "epoch": 13.818393480791618, "grad_norm": 0.23865404725074768, "learning_rate": 7.074134899104345e-05, "loss": -0.1702, "step": 23740 }, { "epoch": 13.824214202561118, "grad_norm": 0.20894186198711395, "learning_rate": 7.071627096360735e-05, "loss": -0.1749, "step": 23750 }, { "epoch": 13.830034924330617, "grad_norm": 0.223727747797966, "learning_rate": 7.069118664311511e-05, "loss": -0.1745, "step": 23760 }, { "epoch": 13.835855646100116, "grad_norm": 0.1567213386297226, "learning_rate": 7.06660960371867e-05, "loss": -0.1766, "step": 23770 }, { "epoch": 13.841676367869615, "grad_norm": 0.2644517421722412, "learning_rate": 7.064099915344396e-05, "loss": -0.1666, "step": 23780 }, { "epoch": 13.847497089639115, "grad_norm": 0.26133283972740173, "learning_rate": 7.061589599951066e-05, "loss": -0.173, "step": 23790 }, { "epoch": 13.853317811408616, "grad_norm": 0.1568761020898819, "learning_rate": 7.05907865830125e-05, "loss": -0.1706, "step": 23800 }, { "epoch": 13.859138533178115, "grad_norm": 0.16806232929229736, "learning_rate": 7.056567091157703e-05, "loss": -0.1729, "step": 23810 }, { "epoch": 13.864959254947614, "grad_norm": 0.18229903280735016, "learning_rate": 7.054054899283375e-05, "loss": -0.171, "step": 23820 }, { "epoch": 13.870779976717113, "grad_norm": 0.2017216831445694, "learning_rate": 7.051542083441403e-05, "loss": -0.1717, "step": 23830 }, { "epoch": 13.876600698486612, "grad_norm": 0.18080180883407593, "learning_rate": 7.049028644395113e-05, "loss": -0.1732, "step": 23840 }, { "epoch": 13.882421420256112, "grad_norm": 0.18684464693069458, "learning_rate": 7.046514582908024e-05, "loss": -0.1752, "step": 23850 }, { "epoch": 13.88824214202561, "grad_norm": 0.23673053085803986, "learning_rate": 7.043999899743838e-05, "loss": -0.1742, "step": 23860 }, { "epoch": 13.89406286379511, "grad_norm": 0.18123869597911835, "learning_rate": 7.041484595666451e-05, "loss": -0.1698, "step": 23870 }, { "epoch": 13.89988358556461, "grad_norm": 0.18813660740852356, "learning_rate": 7.038968671439948e-05, "loss": -0.1732, "step": 23880 }, { "epoch": 13.90570430733411, "grad_norm": 0.2137608528137207, "learning_rate": 7.036452127828596e-05, "loss": -0.1685, "step": 23890 }, { "epoch": 13.91152502910361, "grad_norm": 0.19583532214164734, "learning_rate": 7.033934965596859e-05, "loss": -0.1762, "step": 23900 }, { "epoch": 13.917345750873109, "grad_norm": 0.200936958193779, "learning_rate": 7.031417185509381e-05, "loss": -0.174, "step": 23910 }, { "epoch": 13.923166472642608, "grad_norm": 0.20410972833633423, "learning_rate": 7.028898788331e-05, "loss": -0.1714, "step": 23920 }, { "epoch": 13.928987194412107, "grad_norm": 0.12134608626365662, "learning_rate": 7.026379774826736e-05, "loss": -0.1647, "step": 23930 }, { "epoch": 13.934807916181606, "grad_norm": 0.21012118458747864, "learning_rate": 7.0238601457618e-05, "loss": -0.1751, "step": 23940 }, { "epoch": 13.940628637951106, "grad_norm": 0.17676353454589844, "learning_rate": 7.02133990190159e-05, "loss": -0.1742, "step": 23950 }, { "epoch": 13.946449359720605, "grad_norm": 0.17625615000724792, "learning_rate": 7.018819044011687e-05, "loss": -0.1727, "step": 23960 }, { "epoch": 13.952270081490104, "grad_norm": 0.2113378793001175, "learning_rate": 7.016297572857863e-05, "loss": -0.1752, "step": 23970 }, { "epoch": 13.958090803259605, "grad_norm": 0.11539555341005325, "learning_rate": 7.013775489206072e-05, "loss": -0.1735, "step": 23980 }, { "epoch": 13.963911525029104, "grad_norm": 0.14329661428928375, "learning_rate": 7.01125279382246e-05, "loss": -0.1774, "step": 23990 }, { "epoch": 13.969732246798603, "grad_norm": 0.18468216061592102, "learning_rate": 7.008729487473351e-05, "loss": -0.1727, "step": 24000 }, { "epoch": 13.975552968568103, "grad_norm": 0.29593223333358765, "learning_rate": 7.006205570925263e-05, "loss": -0.1702, "step": 24010 }, { "epoch": 13.981373690337602, "grad_norm": 0.23273511230945587, "learning_rate": 7.003681044944892e-05, "loss": -0.1713, "step": 24020 }, { "epoch": 13.987194412107101, "grad_norm": 0.1808663308620453, "learning_rate": 7.001155910299126e-05, "loss": -0.1738, "step": 24030 }, { "epoch": 13.9930151338766, "grad_norm": 0.2319166511297226, "learning_rate": 6.99863016775503e-05, "loss": -0.1671, "step": 24040 }, { "epoch": 13.9988358556461, "grad_norm": 0.25137022137641907, "learning_rate": 6.996103818079859e-05, "loss": -0.1716, "step": 24050 }, { "epoch": 14.004656577415599, "grad_norm": 0.222672700881958, "learning_rate": 6.993576862041054e-05, "loss": -0.1711, "step": 24060 }, { "epoch": 14.0104772991851, "grad_norm": 0.20976360142230988, "learning_rate": 6.991049300406235e-05, "loss": -0.1754, "step": 24070 }, { "epoch": 14.016298020954599, "grad_norm": 0.195891335606575, "learning_rate": 6.988521133943209e-05, "loss": -0.1744, "step": 24080 }, { "epoch": 14.022118742724098, "grad_norm": 0.23414207994937897, "learning_rate": 6.985992363419966e-05, "loss": -0.1711, "step": 24090 }, { "epoch": 14.027939464493597, "grad_norm": 0.22730229794979095, "learning_rate": 6.983462989604682e-05, "loss": -0.1717, "step": 24100 }, { "epoch": 14.033760186263097, "grad_norm": 0.1796470731496811, "learning_rate": 6.980933013265709e-05, "loss": -0.1753, "step": 24110 }, { "epoch": 14.039580908032596, "grad_norm": 0.18339715898036957, "learning_rate": 6.978402435171592e-05, "loss": -0.1642, "step": 24120 }, { "epoch": 14.045401629802095, "grad_norm": 0.20906682312488556, "learning_rate": 6.975871256091052e-05, "loss": -0.1724, "step": 24130 }, { "epoch": 14.051222351571594, "grad_norm": 0.1987801045179367, "learning_rate": 6.973339476792995e-05, "loss": -0.1744, "step": 24140 }, { "epoch": 14.057043073341095, "grad_norm": 0.18677936494350433, "learning_rate": 6.970807098046505e-05, "loss": -0.1749, "step": 24150 }, { "epoch": 14.062863795110594, "grad_norm": 0.14486612379550934, "learning_rate": 6.968274120620858e-05, "loss": -0.1738, "step": 24160 }, { "epoch": 14.068684516880094, "grad_norm": 0.16513796150684357, "learning_rate": 6.965740545285499e-05, "loss": -0.1777, "step": 24170 }, { "epoch": 14.074505238649593, "grad_norm": 0.2060367316007614, "learning_rate": 6.963206372810068e-05, "loss": -0.1754, "step": 24180 }, { "epoch": 14.080325960419092, "grad_norm": 0.2564123272895813, "learning_rate": 6.960671603964375e-05, "loss": -0.1677, "step": 24190 }, { "epoch": 14.086146682188591, "grad_norm": 0.223873108625412, "learning_rate": 6.958136239518418e-05, "loss": -0.1725, "step": 24200 }, { "epoch": 14.09196740395809, "grad_norm": 0.17652159929275513, "learning_rate": 6.955600280242371e-05, "loss": -0.1739, "step": 24210 }, { "epoch": 14.09778812572759, "grad_norm": 0.2304995059967041, "learning_rate": 6.953063726906596e-05, "loss": -0.1707, "step": 24220 }, { "epoch": 14.103608847497089, "grad_norm": 0.22056087851524353, "learning_rate": 6.950526580281626e-05, "loss": -0.1725, "step": 24230 }, { "epoch": 14.10942956926659, "grad_norm": 0.17648760974407196, "learning_rate": 6.947988841138184e-05, "loss": -0.1698, "step": 24240 }, { "epoch": 14.115250291036089, "grad_norm": 0.19936951994895935, "learning_rate": 6.945450510247165e-05, "loss": -0.1741, "step": 24250 }, { "epoch": 14.121071012805588, "grad_norm": 0.14427800476551056, "learning_rate": 6.942911588379647e-05, "loss": -0.1727, "step": 24260 }, { "epoch": 14.126891734575088, "grad_norm": 0.14786171913146973, "learning_rate": 6.940372076306888e-05, "loss": -0.1723, "step": 24270 }, { "epoch": 14.132712456344587, "grad_norm": 0.18100029230117798, "learning_rate": 6.937831974800326e-05, "loss": -0.1734, "step": 24280 }, { "epoch": 14.138533178114086, "grad_norm": 0.18246859312057495, "learning_rate": 6.935291284631574e-05, "loss": -0.1692, "step": 24290 }, { "epoch": 14.144353899883585, "grad_norm": 0.17869213223457336, "learning_rate": 6.932750006572428e-05, "loss": -0.1758, "step": 24300 }, { "epoch": 14.150174621653084, "grad_norm": 0.1774594783782959, "learning_rate": 6.930208141394863e-05, "loss": -0.1579, "step": 24310 }, { "epoch": 14.155995343422584, "grad_norm": 0.1464976817369461, "learning_rate": 6.927665689871026e-05, "loss": -0.1746, "step": 24320 }, { "epoch": 14.161816065192085, "grad_norm": 0.29833275079727173, "learning_rate": 6.925122652773253e-05, "loss": -0.1747, "step": 24330 }, { "epoch": 14.167636786961584, "grad_norm": 0.13407671451568604, "learning_rate": 6.922579030874046e-05, "loss": -0.1751, "step": 24340 }, { "epoch": 14.173457508731083, "grad_norm": 0.240641251206398, "learning_rate": 6.920034824946093e-05, "loss": -0.1694, "step": 24350 }, { "epoch": 14.179278230500582, "grad_norm": 0.12193863093852997, "learning_rate": 6.917490035762255e-05, "loss": -0.1712, "step": 24360 }, { "epoch": 14.185098952270081, "grad_norm": 0.2991715669631958, "learning_rate": 6.914944664095573e-05, "loss": -0.1687, "step": 24370 }, { "epoch": 14.19091967403958, "grad_norm": 0.18120646476745605, "learning_rate": 6.912398710719264e-05, "loss": -0.1739, "step": 24380 }, { "epoch": 14.19674039580908, "grad_norm": 0.15051548182964325, "learning_rate": 6.90985217640672e-05, "loss": -0.1781, "step": 24390 }, { "epoch": 14.202561117578579, "grad_norm": 0.22661663591861725, "learning_rate": 6.90730506193151e-05, "loss": -0.1738, "step": 24400 }, { "epoch": 14.208381839348078, "grad_norm": 0.21501973271369934, "learning_rate": 6.904757368067384e-05, "loss": -0.1709, "step": 24410 }, { "epoch": 14.21420256111758, "grad_norm": 0.22533369064331055, "learning_rate": 6.90220909558826e-05, "loss": -0.1687, "step": 24420 }, { "epoch": 14.220023282887079, "grad_norm": 0.16298837959766388, "learning_rate": 6.899660245268237e-05, "loss": -0.1735, "step": 24430 }, { "epoch": 14.225844004656578, "grad_norm": 0.18144410848617554, "learning_rate": 6.897110817881592e-05, "loss": -0.1712, "step": 24440 }, { "epoch": 14.231664726426077, "grad_norm": 0.1900012493133545, "learning_rate": 6.894560814202769e-05, "loss": -0.1708, "step": 24450 }, { "epoch": 14.237485448195576, "grad_norm": 0.1656332165002823, "learning_rate": 6.892010235006394e-05, "loss": -0.1688, "step": 24460 }, { "epoch": 14.243306169965075, "grad_norm": 0.15526555478572845, "learning_rate": 6.889459081067264e-05, "loss": -0.1774, "step": 24470 }, { "epoch": 14.249126891734575, "grad_norm": 0.18214039504528046, "learning_rate": 6.886907353160356e-05, "loss": -0.1724, "step": 24480 }, { "epoch": 14.254947613504074, "grad_norm": 0.21490105986595154, "learning_rate": 6.884355052060814e-05, "loss": -0.1726, "step": 24490 }, { "epoch": 14.260768335273575, "grad_norm": 0.25254514813423157, "learning_rate": 6.88180217854396e-05, "loss": -0.1725, "step": 24500 }, { "epoch": 14.266589057043074, "grad_norm": 0.1506630927324295, "learning_rate": 6.87924873338529e-05, "loss": -0.1695, "step": 24510 }, { "epoch": 14.272409778812573, "grad_norm": 0.2972383201122284, "learning_rate": 6.876694717360475e-05, "loss": -0.1727, "step": 24520 }, { "epoch": 14.278230500582072, "grad_norm": 0.23036521673202515, "learning_rate": 6.874140131245355e-05, "loss": -0.1738, "step": 24530 }, { "epoch": 14.284051222351572, "grad_norm": 0.2638525664806366, "learning_rate": 6.871584975815948e-05, "loss": -0.169, "step": 24540 }, { "epoch": 14.28987194412107, "grad_norm": 0.21722012758255005, "learning_rate": 6.86902925184844e-05, "loss": -0.1746, "step": 24550 }, { "epoch": 14.29569266589057, "grad_norm": 0.1886986941099167, "learning_rate": 6.866472960119195e-05, "loss": -0.1702, "step": 24560 }, { "epoch": 14.30151338766007, "grad_norm": 0.14510607719421387, "learning_rate": 6.863916101404748e-05, "loss": -0.1742, "step": 24570 }, { "epoch": 14.307334109429569, "grad_norm": 0.114688441157341, "learning_rate": 6.8613586764818e-05, "loss": -0.175, "step": 24580 }, { "epoch": 14.31315483119907, "grad_norm": 0.15349318087100983, "learning_rate": 6.858800686127233e-05, "loss": -0.1758, "step": 24590 }, { "epoch": 14.318975552968569, "grad_norm": 0.1643262505531311, "learning_rate": 6.856242131118097e-05, "loss": -0.1748, "step": 24600 }, { "epoch": 14.324796274738068, "grad_norm": 0.2190718799829483, "learning_rate": 6.853683012231614e-05, "loss": -0.1743, "step": 24610 }, { "epoch": 14.330616996507567, "grad_norm": 0.23091179132461548, "learning_rate": 6.851123330245173e-05, "loss": -0.1741, "step": 24620 }, { "epoch": 14.336437718277066, "grad_norm": 0.2048763781785965, "learning_rate": 6.848563085936343e-05, "loss": -0.175, "step": 24630 }, { "epoch": 14.342258440046566, "grad_norm": 0.19869373738765717, "learning_rate": 6.846002280082853e-05, "loss": -0.1745, "step": 24640 }, { "epoch": 14.348079161816065, "grad_norm": 0.17415569722652435, "learning_rate": 6.843440913462614e-05, "loss": -0.1744, "step": 24650 }, { "epoch": 14.353899883585564, "grad_norm": 0.284613698720932, "learning_rate": 6.840878986853698e-05, "loss": -0.1735, "step": 24660 }, { "epoch": 14.359720605355063, "grad_norm": 0.202666774392128, "learning_rate": 6.838316501034352e-05, "loss": -0.1725, "step": 24670 }, { "epoch": 14.365541327124564, "grad_norm": 0.11404483765363693, "learning_rate": 6.83575345678299e-05, "loss": -0.1769, "step": 24680 }, { "epoch": 14.371362048894063, "grad_norm": 0.29311269521713257, "learning_rate": 6.833189854878196e-05, "loss": -0.1734, "step": 24690 }, { "epoch": 14.377182770663563, "grad_norm": 0.2106212079524994, "learning_rate": 6.83062569609873e-05, "loss": -0.1721, "step": 24700 }, { "epoch": 14.383003492433062, "grad_norm": 0.21470727026462555, "learning_rate": 6.828060981223512e-05, "loss": -0.1751, "step": 24710 }, { "epoch": 14.388824214202561, "grad_norm": 0.14923614263534546, "learning_rate": 6.825495711031634e-05, "loss": -0.1733, "step": 24720 }, { "epoch": 14.39464493597206, "grad_norm": 0.16040927171707153, "learning_rate": 6.822929886302359e-05, "loss": -0.1761, "step": 24730 }, { "epoch": 14.40046565774156, "grad_norm": 0.13183583319187164, "learning_rate": 6.820363507815116e-05, "loss": -0.1778, "step": 24740 }, { "epoch": 14.406286379511059, "grad_norm": 0.24555058777332306, "learning_rate": 6.817796576349501e-05, "loss": -0.1705, "step": 24750 }, { "epoch": 14.412107101280558, "grad_norm": 0.19275160133838654, "learning_rate": 6.815229092685285e-05, "loss": -0.1687, "step": 24760 }, { "epoch": 14.417927823050059, "grad_norm": 0.22933219373226166, "learning_rate": 6.812661057602399e-05, "loss": -0.1757, "step": 24770 }, { "epoch": 14.423748544819558, "grad_norm": 0.14625400304794312, "learning_rate": 6.810092471880943e-05, "loss": -0.1729, "step": 24780 }, { "epoch": 14.429569266589057, "grad_norm": 0.285205602645874, "learning_rate": 6.807523336301187e-05, "loss": -0.1718, "step": 24790 }, { "epoch": 14.435389988358557, "grad_norm": 0.21384498476982117, "learning_rate": 6.804953651643566e-05, "loss": -0.1713, "step": 24800 }, { "epoch": 14.441210710128056, "grad_norm": 0.1834525614976883, "learning_rate": 6.802383418688685e-05, "loss": -0.1717, "step": 24810 }, { "epoch": 14.447031431897555, "grad_norm": 0.2654552757740021, "learning_rate": 6.799812638217309e-05, "loss": -0.1709, "step": 24820 }, { "epoch": 14.452852153667054, "grad_norm": 0.19655899703502655, "learning_rate": 6.797241311010373e-05, "loss": -0.1728, "step": 24830 }, { "epoch": 14.458672875436553, "grad_norm": 0.1958548128604889, "learning_rate": 6.794669437848982e-05, "loss": -0.1702, "step": 24840 }, { "epoch": 14.464493597206054, "grad_norm": 0.14788641035556793, "learning_rate": 6.792097019514402e-05, "loss": -0.1724, "step": 24850 }, { "epoch": 14.470314318975554, "grad_norm": 0.19645379483699799, "learning_rate": 6.789524056788064e-05, "loss": -0.1709, "step": 24860 }, { "epoch": 14.476135040745053, "grad_norm": 0.2028084695339203, "learning_rate": 6.786950550451567e-05, "loss": -0.1743, "step": 24870 }, { "epoch": 14.481955762514552, "grad_norm": 0.18801936507225037, "learning_rate": 6.784376501286676e-05, "loss": -0.1721, "step": 24880 }, { "epoch": 14.487776484284051, "grad_norm": 0.1808944195508957, "learning_rate": 6.781801910075316e-05, "loss": -0.1766, "step": 24890 }, { "epoch": 14.49359720605355, "grad_norm": 0.19444715976715088, "learning_rate": 6.779226777599581e-05, "loss": -0.175, "step": 24900 }, { "epoch": 14.49941792782305, "grad_norm": 0.17025747895240784, "learning_rate": 6.776651104641729e-05, "loss": -0.173, "step": 24910 }, { "epoch": 14.505238649592549, "grad_norm": 0.28110599517822266, "learning_rate": 6.774074891984183e-05, "loss": -0.1719, "step": 24920 }, { "epoch": 14.511059371362048, "grad_norm": 0.2489691823720932, "learning_rate": 6.771498140409526e-05, "loss": -0.1709, "step": 24930 }, { "epoch": 14.516880093131547, "grad_norm": 0.1745544970035553, "learning_rate": 6.768920850700506e-05, "loss": -0.1763, "step": 24940 }, { "epoch": 14.522700814901048, "grad_norm": 0.16430284082889557, "learning_rate": 6.766343023640039e-05, "loss": -0.1687, "step": 24950 }, { "epoch": 14.528521536670548, "grad_norm": 0.15080104768276215, "learning_rate": 6.763764660011198e-05, "loss": -0.1763, "step": 24960 }, { "epoch": 14.534342258440047, "grad_norm": 0.2031201273202896, "learning_rate": 6.761185760597223e-05, "loss": -0.175, "step": 24970 }, { "epoch": 14.540162980209546, "grad_norm": 0.2231852412223816, "learning_rate": 6.758606326181515e-05, "loss": -0.1737, "step": 24980 }, { "epoch": 14.545983701979045, "grad_norm": 0.22220773994922638, "learning_rate": 6.75602635754764e-05, "loss": -0.1768, "step": 24990 }, { "epoch": 14.551804423748544, "grad_norm": 0.21946080029010773, "learning_rate": 6.75344585547932e-05, "loss": -0.1739, "step": 25000 }, { "epoch": 14.557625145518044, "grad_norm": 0.22862361371517181, "learning_rate": 6.750864820760449e-05, "loss": -0.1763, "step": 25010 }, { "epoch": 14.563445867287543, "grad_norm": 0.24635787308216095, "learning_rate": 6.748283254175072e-05, "loss": -0.1735, "step": 25020 }, { "epoch": 14.569266589057044, "grad_norm": 0.19931963086128235, "learning_rate": 6.745701156507404e-05, "loss": -0.1753, "step": 25030 }, { "epoch": 14.575087310826543, "grad_norm": 0.15435393154621124, "learning_rate": 6.743118528541818e-05, "loss": -0.1761, "step": 25040 }, { "epoch": 14.580908032596042, "grad_norm": 0.15423357486724854, "learning_rate": 6.740535371062846e-05, "loss": -0.1744, "step": 25050 }, { "epoch": 14.586728754365542, "grad_norm": 0.15276233851909637, "learning_rate": 6.737951684855185e-05, "loss": -0.1769, "step": 25060 }, { "epoch": 14.59254947613504, "grad_norm": 0.2705651521682739, "learning_rate": 6.735367470703691e-05, "loss": -0.1756, "step": 25070 }, { "epoch": 14.59837019790454, "grad_norm": 0.21647898852825165, "learning_rate": 6.732782729393379e-05, "loss": -0.1743, "step": 25080 }, { "epoch": 14.60419091967404, "grad_norm": 0.18118929862976074, "learning_rate": 6.730197461709425e-05, "loss": -0.175, "step": 25090 }, { "epoch": 14.610011641443538, "grad_norm": 0.18531055748462677, "learning_rate": 6.727611668437164e-05, "loss": -0.1709, "step": 25100 }, { "epoch": 14.615832363213038, "grad_norm": 0.14119406044483185, "learning_rate": 6.725025350362094e-05, "loss": -0.1741, "step": 25110 }, { "epoch": 14.621653084982539, "grad_norm": 0.17411676049232483, "learning_rate": 6.72243850826987e-05, "loss": -0.1768, "step": 25120 }, { "epoch": 14.627473806752038, "grad_norm": 0.25846317410469055, "learning_rate": 6.719851142946305e-05, "loss": -0.1749, "step": 25130 }, { "epoch": 14.633294528521537, "grad_norm": 0.3021990656852722, "learning_rate": 6.717263255177372e-05, "loss": -0.1681, "step": 25140 }, { "epoch": 14.639115250291036, "grad_norm": 0.17305956780910492, "learning_rate": 6.714674845749205e-05, "loss": -0.1749, "step": 25150 }, { "epoch": 14.644935972060535, "grad_norm": 0.20250444114208221, "learning_rate": 6.712085915448092e-05, "loss": -0.173, "step": 25160 }, { "epoch": 14.650756693830035, "grad_norm": 0.335670530796051, "learning_rate": 6.709496465060486e-05, "loss": -0.1727, "step": 25170 }, { "epoch": 14.656577415599534, "grad_norm": 0.19413620233535767, "learning_rate": 6.706906495372987e-05, "loss": -0.175, "step": 25180 }, { "epoch": 14.662398137369033, "grad_norm": 0.20583245158195496, "learning_rate": 6.704316007172365e-05, "loss": -0.1754, "step": 25190 }, { "epoch": 14.668218859138534, "grad_norm": 0.26598745584487915, "learning_rate": 6.701725001245539e-05, "loss": -0.1713, "step": 25200 }, { "epoch": 14.674039580908033, "grad_norm": 0.20074784755706787, "learning_rate": 6.699133478379588e-05, "loss": -0.173, "step": 25210 }, { "epoch": 14.679860302677533, "grad_norm": 0.15089687705039978, "learning_rate": 6.69654143936175e-05, "loss": -0.1724, "step": 25220 }, { "epoch": 14.685681024447032, "grad_norm": 0.2645115852355957, "learning_rate": 6.693948884979419e-05, "loss": -0.1706, "step": 25230 }, { "epoch": 14.691501746216531, "grad_norm": 0.15198543667793274, "learning_rate": 6.691355816020142e-05, "loss": -0.176, "step": 25240 }, { "epoch": 14.69732246798603, "grad_norm": 0.13521289825439453, "learning_rate": 6.688762233271624e-05, "loss": -0.174, "step": 25250 }, { "epoch": 14.70314318975553, "grad_norm": 0.21663494408130646, "learning_rate": 6.68616813752173e-05, "loss": -0.1707, "step": 25260 }, { "epoch": 14.708963911525029, "grad_norm": 0.1790519803762436, "learning_rate": 6.683573529558477e-05, "loss": -0.1741, "step": 25270 }, { "epoch": 14.714784633294528, "grad_norm": 0.1881546825170517, "learning_rate": 6.680978410170037e-05, "loss": -0.1739, "step": 25280 }, { "epoch": 14.720605355064027, "grad_norm": 0.20593313872814178, "learning_rate": 6.678382780144741e-05, "loss": -0.1719, "step": 25290 }, { "epoch": 14.726426076833528, "grad_norm": 0.1673659384250641, "learning_rate": 6.675786640271071e-05, "loss": -0.1765, "step": 25300 }, { "epoch": 14.732246798603027, "grad_norm": 0.17263354361057281, "learning_rate": 6.673189991337665e-05, "loss": -0.1755, "step": 25310 }, { "epoch": 14.738067520372526, "grad_norm": 0.2207842767238617, "learning_rate": 6.670592834133317e-05, "loss": -0.1717, "step": 25320 }, { "epoch": 14.743888242142026, "grad_norm": 0.20774489641189575, "learning_rate": 6.667995169446979e-05, "loss": -0.1761, "step": 25330 }, { "epoch": 14.749708963911525, "grad_norm": 0.21196100115776062, "learning_rate": 6.665396998067747e-05, "loss": -0.1707, "step": 25340 }, { "epoch": 14.755529685681024, "grad_norm": 0.22508756816387177, "learning_rate": 6.66279832078488e-05, "loss": -0.1772, "step": 25350 }, { "epoch": 14.761350407450523, "grad_norm": 0.1962384432554245, "learning_rate": 6.660199138387786e-05, "loss": -0.176, "step": 25360 }, { "epoch": 14.767171129220023, "grad_norm": 0.25208351016044617, "learning_rate": 6.65759945166603e-05, "loss": -0.1705, "step": 25370 }, { "epoch": 14.772991850989523, "grad_norm": 0.1964457780122757, "learning_rate": 6.654999261409326e-05, "loss": -0.17, "step": 25380 }, { "epoch": 14.778812572759023, "grad_norm": 0.14411978423595428, "learning_rate": 6.652398568407544e-05, "loss": -0.174, "step": 25390 }, { "epoch": 14.784633294528522, "grad_norm": 0.1181168481707573, "learning_rate": 6.649797373450707e-05, "loss": -0.1716, "step": 25400 }, { "epoch": 14.790454016298021, "grad_norm": 0.16617465019226074, "learning_rate": 6.647195677328988e-05, "loss": -0.1767, "step": 25410 }, { "epoch": 14.79627473806752, "grad_norm": 0.14945964515209198, "learning_rate": 6.644593480832712e-05, "loss": -0.1763, "step": 25420 }, { "epoch": 14.80209545983702, "grad_norm": 0.2066841870546341, "learning_rate": 6.641990784752363e-05, "loss": -0.1727, "step": 25430 }, { "epoch": 14.807916181606519, "grad_norm": 0.3086516857147217, "learning_rate": 6.639387589878566e-05, "loss": -0.1738, "step": 25440 }, { "epoch": 14.813736903376018, "grad_norm": 0.2218942791223526, "learning_rate": 6.636783897002103e-05, "loss": -0.1759, "step": 25450 }, { "epoch": 14.819557625145517, "grad_norm": 0.18574099242687225, "learning_rate": 6.63417970691391e-05, "loss": -0.1728, "step": 25460 }, { "epoch": 14.825378346915018, "grad_norm": 0.24018453061580658, "learning_rate": 6.63157502040507e-05, "loss": -0.1768, "step": 25470 }, { "epoch": 14.831199068684517, "grad_norm": 0.16075213253498077, "learning_rate": 6.628969838266819e-05, "loss": -0.1722, "step": 25480 }, { "epoch": 14.837019790454017, "grad_norm": 0.1918426752090454, "learning_rate": 6.626364161290541e-05, "loss": -0.1761, "step": 25490 }, { "epoch": 14.842840512223516, "grad_norm": 0.1951223909854889, "learning_rate": 6.623757990267774e-05, "loss": -0.1737, "step": 25500 }, { "epoch": 14.848661233993015, "grad_norm": 0.16390077769756317, "learning_rate": 6.621151325990201e-05, "loss": -0.175, "step": 25510 }, { "epoch": 14.854481955762514, "grad_norm": 0.19476094841957092, "learning_rate": 6.618544169249657e-05, "loss": -0.1745, "step": 25520 }, { "epoch": 14.860302677532014, "grad_norm": 0.1510647088289261, "learning_rate": 6.615936520838133e-05, "loss": -0.1729, "step": 25530 }, { "epoch": 14.866123399301513, "grad_norm": 0.1181366890668869, "learning_rate": 6.613328381547759e-05, "loss": -0.1783, "step": 25540 }, { "epoch": 14.871944121071014, "grad_norm": 0.2813461720943451, "learning_rate": 6.610719752170821e-05, "loss": -0.164, "step": 25550 }, { "epoch": 14.877764842840513, "grad_norm": 0.19493769109249115, "learning_rate": 6.60811063349975e-05, "loss": -0.1737, "step": 25560 }, { "epoch": 14.883585564610012, "grad_norm": 0.21408593654632568, "learning_rate": 6.605501026327127e-05, "loss": -0.1735, "step": 25570 }, { "epoch": 14.889406286379511, "grad_norm": 0.13138534128665924, "learning_rate": 6.602890931445685e-05, "loss": -0.1681, "step": 25580 }, { "epoch": 14.89522700814901, "grad_norm": 0.2324838787317276, "learning_rate": 6.6002803496483e-05, "loss": -0.1705, "step": 25590 }, { "epoch": 14.90104772991851, "grad_norm": 0.21300239861011505, "learning_rate": 6.597669281727997e-05, "loss": -0.1741, "step": 25600 }, { "epoch": 14.906868451688009, "grad_norm": 0.1796691119670868, "learning_rate": 6.595057728477949e-05, "loss": -0.1712, "step": 25610 }, { "epoch": 14.912689173457508, "grad_norm": 0.2815582752227783, "learning_rate": 6.59244569069148e-05, "loss": -0.1727, "step": 25620 }, { "epoch": 14.918509895227007, "grad_norm": 0.18018878996372223, "learning_rate": 6.589833169162054e-05, "loss": -0.1735, "step": 25630 }, { "epoch": 14.924330616996507, "grad_norm": 0.22165614366531372, "learning_rate": 6.587220164683291e-05, "loss": -0.1734, "step": 25640 }, { "epoch": 14.930151338766008, "grad_norm": 0.2091432362794876, "learning_rate": 6.58460667804895e-05, "loss": -0.175, "step": 25650 }, { "epoch": 14.935972060535507, "grad_norm": 0.21899457275867462, "learning_rate": 6.581992710052938e-05, "loss": -0.1719, "step": 25660 }, { "epoch": 14.941792782305006, "grad_norm": 0.21086086332798004, "learning_rate": 6.579378261489311e-05, "loss": -0.1731, "step": 25670 }, { "epoch": 14.947613504074505, "grad_norm": 0.16586856544017792, "learning_rate": 6.576763333152268e-05, "loss": -0.1748, "step": 25680 }, { "epoch": 14.953434225844005, "grad_norm": 0.1621132344007492, "learning_rate": 6.574147925836159e-05, "loss": -0.1767, "step": 25690 }, { "epoch": 14.959254947613504, "grad_norm": 0.18368297815322876, "learning_rate": 6.571532040335472e-05, "loss": -0.1748, "step": 25700 }, { "epoch": 14.965075669383003, "grad_norm": 0.2165188193321228, "learning_rate": 6.568915677444845e-05, "loss": -0.1752, "step": 25710 }, { "epoch": 14.970896391152502, "grad_norm": 0.15971164405345917, "learning_rate": 6.56629883795906e-05, "loss": -0.1716, "step": 25720 }, { "epoch": 14.976717112922003, "grad_norm": 0.1622537523508072, "learning_rate": 6.563681522673043e-05, "loss": -0.1741, "step": 25730 }, { "epoch": 14.982537834691502, "grad_norm": 0.2614172399044037, "learning_rate": 6.561063732381867e-05, "loss": -0.1731, "step": 25740 }, { "epoch": 14.988358556461002, "grad_norm": 0.16483791172504425, "learning_rate": 6.558445467880745e-05, "loss": -0.1757, "step": 25750 }, { "epoch": 14.9941792782305, "grad_norm": 0.3287380039691925, "learning_rate": 6.55582672996504e-05, "loss": -0.1722, "step": 25760 }, { "epoch": 15.0, "grad_norm": 0.15594571828842163, "learning_rate": 6.553207519430253e-05, "loss": -0.1759, "step": 25770 }, { "epoch": 15.0058207217695, "grad_norm": 0.1314963400363922, "learning_rate": 6.550587837072032e-05, "loss": -0.1738, "step": 25780 }, { "epoch": 15.011641443538998, "grad_norm": 0.1582624912261963, "learning_rate": 6.547967683686166e-05, "loss": -0.1734, "step": 25790 }, { "epoch": 15.017462165308498, "grad_norm": 0.20335829257965088, "learning_rate": 6.545347060068591e-05, "loss": -0.1778, "step": 25800 }, { "epoch": 15.023282887077997, "grad_norm": 0.2522447109222412, "learning_rate": 6.542725967015382e-05, "loss": -0.1735, "step": 25810 }, { "epoch": 15.029103608847498, "grad_norm": 0.2558622360229492, "learning_rate": 6.540104405322757e-05, "loss": -0.1746, "step": 25820 }, { "epoch": 15.034924330616997, "grad_norm": 0.16482259333133698, "learning_rate": 6.537482375787077e-05, "loss": -0.1698, "step": 25830 }, { "epoch": 15.040745052386496, "grad_norm": 0.20499376952648163, "learning_rate": 6.534859879204845e-05, "loss": -0.1725, "step": 25840 }, { "epoch": 15.046565774155995, "grad_norm": 0.1363297402858734, "learning_rate": 6.532236916372709e-05, "loss": -0.1751, "step": 25850 }, { "epoch": 15.052386495925495, "grad_norm": 0.24329055845737457, "learning_rate": 6.529613488087454e-05, "loss": -0.1742, "step": 25860 }, { "epoch": 15.058207217694994, "grad_norm": 0.19798074662685394, "learning_rate": 6.526989595146009e-05, "loss": -0.177, "step": 25870 }, { "epoch": 15.064027939464493, "grad_norm": 0.18550366163253784, "learning_rate": 6.524365238345441e-05, "loss": -0.1763, "step": 25880 }, { "epoch": 15.069848661233992, "grad_norm": 0.2118711769580841, "learning_rate": 6.521740418482964e-05, "loss": -0.1749, "step": 25890 }, { "epoch": 15.075669383003492, "grad_norm": 0.1345231533050537, "learning_rate": 6.519115136355925e-05, "loss": -0.1761, "step": 25900 }, { "epoch": 15.081490104772993, "grad_norm": 0.18095868825912476, "learning_rate": 6.51648939276182e-05, "loss": -0.1736, "step": 25910 }, { "epoch": 15.087310826542492, "grad_norm": 0.22829267382621765, "learning_rate": 6.513863188498277e-05, "loss": -0.1738, "step": 25920 }, { "epoch": 15.093131548311991, "grad_norm": 0.17368373274803162, "learning_rate": 6.511236524363068e-05, "loss": -0.175, "step": 25930 }, { "epoch": 15.09895227008149, "grad_norm": 0.13030390441417694, "learning_rate": 6.508609401154104e-05, "loss": -0.173, "step": 25940 }, { "epoch": 15.10477299185099, "grad_norm": 0.1772412210702896, "learning_rate": 6.505981819669439e-05, "loss": -0.1747, "step": 25950 }, { "epoch": 15.110593713620489, "grad_norm": 0.1470426619052887, "learning_rate": 6.503353780707258e-05, "loss": -0.1725, "step": 25960 }, { "epoch": 15.116414435389988, "grad_norm": 0.18318985402584076, "learning_rate": 6.500725285065895e-05, "loss": -0.1738, "step": 25970 }, { "epoch": 15.122235157159487, "grad_norm": 0.24383170902729034, "learning_rate": 6.498096333543813e-05, "loss": -0.1719, "step": 25980 }, { "epoch": 15.128055878928988, "grad_norm": 0.16373717784881592, "learning_rate": 6.49546692693962e-05, "loss": -0.1765, "step": 25990 }, { "epoch": 15.133876600698487, "grad_norm": 0.30537283420562744, "learning_rate": 6.492837066052059e-05, "loss": -0.1705, "step": 26000 }, { "epoch": 15.139697322467986, "grad_norm": 0.20327740907669067, "learning_rate": 6.490206751680014e-05, "loss": -0.1706, "step": 26010 }, { "epoch": 15.145518044237486, "grad_norm": 0.16069041192531586, "learning_rate": 6.487575984622505e-05, "loss": -0.172, "step": 26020 }, { "epoch": 15.151338766006985, "grad_norm": 0.1347699910402298, "learning_rate": 6.484944765678689e-05, "loss": -0.1703, "step": 26030 }, { "epoch": 15.157159487776484, "grad_norm": 0.22580809891223907, "learning_rate": 6.482313095647861e-05, "loss": -0.1752, "step": 26040 }, { "epoch": 15.162980209545983, "grad_norm": 0.2152622491121292, "learning_rate": 6.479680975329451e-05, "loss": -0.1711, "step": 26050 }, { "epoch": 15.168800931315483, "grad_norm": 0.18453913927078247, "learning_rate": 6.477048405523031e-05, "loss": -0.1757, "step": 26060 }, { "epoch": 15.174621653084982, "grad_norm": 0.25956490635871887, "learning_rate": 6.474415387028304e-05, "loss": -0.1735, "step": 26070 }, { "epoch": 15.180442374854483, "grad_norm": 0.15498489141464233, "learning_rate": 6.471781920645114e-05, "loss": -0.1764, "step": 26080 }, { "epoch": 15.186263096623982, "grad_norm": 0.24350126087665558, "learning_rate": 6.469148007173434e-05, "loss": -0.1757, "step": 26090 }, { "epoch": 15.192083818393481, "grad_norm": 0.21948695182800293, "learning_rate": 6.466513647413381e-05, "loss": -0.1734, "step": 26100 }, { "epoch": 15.19790454016298, "grad_norm": 0.24417614936828613, "learning_rate": 6.463878842165203e-05, "loss": -0.1737, "step": 26110 }, { "epoch": 15.20372526193248, "grad_norm": 0.13860002160072327, "learning_rate": 6.461243592229286e-05, "loss": -0.1707, "step": 26120 }, { "epoch": 15.209545983701979, "grad_norm": 0.18019255995750427, "learning_rate": 6.458607898406146e-05, "loss": -0.178, "step": 26130 }, { "epoch": 15.215366705471478, "grad_norm": 0.18672458827495575, "learning_rate": 6.455971761496439e-05, "loss": -0.1765, "step": 26140 }, { "epoch": 15.221187427240977, "grad_norm": 0.15812218189239502, "learning_rate": 6.453335182300953e-05, "loss": -0.1768, "step": 26150 }, { "epoch": 15.227008149010477, "grad_norm": 0.2561328411102295, "learning_rate": 6.450698161620612e-05, "loss": -0.1761, "step": 26160 }, { "epoch": 15.232828870779977, "grad_norm": 0.18061211705207825, "learning_rate": 6.448060700256473e-05, "loss": -0.1711, "step": 26170 }, { "epoch": 15.238649592549477, "grad_norm": 0.17592652142047882, "learning_rate": 6.445422799009726e-05, "loss": -0.1772, "step": 26180 }, { "epoch": 15.244470314318976, "grad_norm": 0.280459463596344, "learning_rate": 6.442784458681699e-05, "loss": -0.1744, "step": 26190 }, { "epoch": 15.250291036088475, "grad_norm": 0.23736199736595154, "learning_rate": 6.440145680073847e-05, "loss": -0.1765, "step": 26200 }, { "epoch": 15.256111757857974, "grad_norm": 0.10786401480436325, "learning_rate": 6.437506463987762e-05, "loss": -0.1739, "step": 26210 }, { "epoch": 15.261932479627474, "grad_norm": 0.11333847790956497, "learning_rate": 6.434866811225168e-05, "loss": -0.176, "step": 26220 }, { "epoch": 15.267753201396973, "grad_norm": 0.15517593920230865, "learning_rate": 6.432226722587923e-05, "loss": -0.1733, "step": 26230 }, { "epoch": 15.273573923166472, "grad_norm": 0.2854684591293335, "learning_rate": 6.429586198878015e-05, "loss": -0.1738, "step": 26240 }, { "epoch": 15.279394644935971, "grad_norm": 0.15738719701766968, "learning_rate": 6.426945240897566e-05, "loss": -0.1676, "step": 26250 }, { "epoch": 15.285215366705472, "grad_norm": 0.17013481259346008, "learning_rate": 6.424303849448829e-05, "loss": -0.1748, "step": 26260 }, { "epoch": 15.291036088474971, "grad_norm": 0.2443515807390213, "learning_rate": 6.42166202533419e-05, "loss": -0.175, "step": 26270 }, { "epoch": 15.29685681024447, "grad_norm": 0.22877666354179382, "learning_rate": 6.419019769356164e-05, "loss": -0.1752, "step": 26280 }, { "epoch": 15.30267753201397, "grad_norm": 0.25555577874183655, "learning_rate": 6.416377082317398e-05, "loss": -0.1743, "step": 26290 }, { "epoch": 15.308498253783469, "grad_norm": 0.17571821808815002, "learning_rate": 6.413733965020674e-05, "loss": -0.1753, "step": 26300 }, { "epoch": 15.314318975552968, "grad_norm": 0.19105611741542816, "learning_rate": 6.411090418268896e-05, "loss": -0.1781, "step": 26310 }, { "epoch": 15.320139697322467, "grad_norm": 0.218561589717865, "learning_rate": 6.408446442865109e-05, "loss": -0.1772, "step": 26320 }, { "epoch": 15.325960419091967, "grad_norm": 0.2226828634738922, "learning_rate": 6.405802039612479e-05, "loss": -0.1756, "step": 26330 }, { "epoch": 15.331781140861466, "grad_norm": 0.21463342010974884, "learning_rate": 6.403157209314308e-05, "loss": -0.173, "step": 26340 }, { "epoch": 15.337601862630967, "grad_norm": 0.2906154990196228, "learning_rate": 6.400511952774024e-05, "loss": -0.1742, "step": 26350 }, { "epoch": 15.343422584400466, "grad_norm": 0.1964346468448639, "learning_rate": 6.397866270795187e-05, "loss": -0.1765, "step": 26360 }, { "epoch": 15.349243306169965, "grad_norm": 0.13857810199260712, "learning_rate": 6.395220164181489e-05, "loss": -0.1763, "step": 26370 }, { "epoch": 15.355064027939465, "grad_norm": 0.16877417266368866, "learning_rate": 6.39257363373674e-05, "loss": -0.1756, "step": 26380 }, { "epoch": 15.360884749708964, "grad_norm": 0.16239288449287415, "learning_rate": 6.389926680264892e-05, "loss": -0.1723, "step": 26390 }, { "epoch": 15.366705471478463, "grad_norm": 0.19833903014659882, "learning_rate": 6.387279304570017e-05, "loss": -0.1699, "step": 26400 }, { "epoch": 15.372526193247962, "grad_norm": 0.16072462499141693, "learning_rate": 6.384631507456319e-05, "loss": -0.1755, "step": 26410 }, { "epoch": 15.378346915017461, "grad_norm": 0.20670677721500397, "learning_rate": 6.381983289728126e-05, "loss": -0.1749, "step": 26420 }, { "epoch": 15.384167636786962, "grad_norm": 0.20042696595191956, "learning_rate": 6.3793346521899e-05, "loss": -0.1735, "step": 26430 }, { "epoch": 15.389988358556462, "grad_norm": 0.19461417198181152, "learning_rate": 6.376685595646226e-05, "loss": -0.1715, "step": 26440 }, { "epoch": 15.39580908032596, "grad_norm": 0.1703818440437317, "learning_rate": 6.374036120901816e-05, "loss": -0.1709, "step": 26450 }, { "epoch": 15.40162980209546, "grad_norm": 0.23504602909088135, "learning_rate": 6.371386228761514e-05, "loss": -0.1753, "step": 26460 }, { "epoch": 15.40745052386496, "grad_norm": 0.2102624475955963, "learning_rate": 6.368735920030283e-05, "loss": -0.1698, "step": 26470 }, { "epoch": 15.413271245634458, "grad_norm": 0.12558403611183167, "learning_rate": 6.366085195513218e-05, "loss": -0.1739, "step": 26480 }, { "epoch": 15.419091967403958, "grad_norm": 0.20127587020397186, "learning_rate": 6.363434056015543e-05, "loss": -0.1685, "step": 26490 }, { "epoch": 15.424912689173457, "grad_norm": 0.1334630697965622, "learning_rate": 6.360782502342599e-05, "loss": -0.1753, "step": 26500 }, { "epoch": 15.430733410942956, "grad_norm": 0.19133290648460388, "learning_rate": 6.358130535299862e-05, "loss": -0.1711, "step": 26510 }, { "epoch": 15.436554132712457, "grad_norm": 0.2362821400165558, "learning_rate": 6.355478155692926e-05, "loss": -0.1726, "step": 26520 }, { "epoch": 15.442374854481956, "grad_norm": 0.2193259447813034, "learning_rate": 6.352825364327517e-05, "loss": -0.1733, "step": 26530 }, { "epoch": 15.448195576251456, "grad_norm": 0.20689187943935394, "learning_rate": 6.350172162009482e-05, "loss": -0.1659, "step": 26540 }, { "epoch": 15.454016298020955, "grad_norm": 0.278729647397995, "learning_rate": 6.347518549544793e-05, "loss": -0.1715, "step": 26550 }, { "epoch": 15.459837019790454, "grad_norm": 0.2367287278175354, "learning_rate": 6.344864527739547e-05, "loss": -0.1761, "step": 26560 }, { "epoch": 15.465657741559953, "grad_norm": 0.20113302767276764, "learning_rate": 6.342210097399966e-05, "loss": -0.1745, "step": 26570 }, { "epoch": 15.471478463329452, "grad_norm": 0.14329786598682404, "learning_rate": 6.339555259332398e-05, "loss": -0.1697, "step": 26580 }, { "epoch": 15.477299185098952, "grad_norm": 0.20057110488414764, "learning_rate": 6.33690001434331e-05, "loss": -0.1676, "step": 26590 }, { "epoch": 15.48311990686845, "grad_norm": 0.23832528293132782, "learning_rate": 6.334244363239296e-05, "loss": -0.1724, "step": 26600 }, { "epoch": 15.488940628637952, "grad_norm": 0.1301276683807373, "learning_rate": 6.331588306827073e-05, "loss": -0.1764, "step": 26610 }, { "epoch": 15.494761350407451, "grad_norm": 0.14611132442951202, "learning_rate": 6.328931845913483e-05, "loss": -0.176, "step": 26620 }, { "epoch": 15.50058207217695, "grad_norm": 0.19424474239349365, "learning_rate": 6.326274981305484e-05, "loss": -0.1757, "step": 26630 }, { "epoch": 15.50640279394645, "grad_norm": 0.17858116328716278, "learning_rate": 6.323617713810166e-05, "loss": -0.1726, "step": 26640 }, { "epoch": 15.512223515715949, "grad_norm": 0.13252463936805725, "learning_rate": 6.320960044234734e-05, "loss": -0.1758, "step": 26650 }, { "epoch": 15.518044237485448, "grad_norm": 0.18888163566589355, "learning_rate": 6.318301973386518e-05, "loss": -0.1715, "step": 26660 }, { "epoch": 15.523864959254947, "grad_norm": 0.35866934061050415, "learning_rate": 6.315643502072971e-05, "loss": -0.174, "step": 26670 }, { "epoch": 15.529685681024446, "grad_norm": 0.1646261066198349, "learning_rate": 6.312984631101667e-05, "loss": -0.1769, "step": 26680 }, { "epoch": 15.535506402793946, "grad_norm": 0.21092599630355835, "learning_rate": 6.310325361280297e-05, "loss": -0.1759, "step": 26690 }, { "epoch": 15.541327124563447, "grad_norm": 0.15883541107177734, "learning_rate": 6.30766569341668e-05, "loss": -0.1767, "step": 26700 }, { "epoch": 15.547147846332946, "grad_norm": 0.1429925262928009, "learning_rate": 6.305005628318753e-05, "loss": -0.1728, "step": 26710 }, { "epoch": 15.552968568102445, "grad_norm": 0.1415383666753769, "learning_rate": 6.302345166794572e-05, "loss": -0.1764, "step": 26720 }, { "epoch": 15.558789289871944, "grad_norm": 0.17015132308006287, "learning_rate": 6.299684309652316e-05, "loss": -0.175, "step": 26730 }, { "epoch": 15.564610011641443, "grad_norm": 0.17225050926208496, "learning_rate": 6.297023057700283e-05, "loss": -0.1782, "step": 26740 }, { "epoch": 15.570430733410943, "grad_norm": 0.16041375696659088, "learning_rate": 6.294361411746891e-05, "loss": -0.1762, "step": 26750 }, { "epoch": 15.576251455180442, "grad_norm": 0.17932583391666412, "learning_rate": 6.291699372600677e-05, "loss": -0.1729, "step": 26760 }, { "epoch": 15.582072176949941, "grad_norm": 0.21052969992160797, "learning_rate": 6.2890369410703e-05, "loss": -0.1723, "step": 26770 }, { "epoch": 15.587892898719442, "grad_norm": 0.21728633344173431, "learning_rate": 6.286374117964534e-05, "loss": -0.1775, "step": 26780 }, { "epoch": 15.593713620488941, "grad_norm": 0.13521461188793182, "learning_rate": 6.283710904092277e-05, "loss": -0.1771, "step": 26790 }, { "epoch": 15.59953434225844, "grad_norm": 0.18669675290584564, "learning_rate": 6.281047300262542e-05, "loss": -0.1771, "step": 26800 }, { "epoch": 15.60535506402794, "grad_norm": 0.1715828776359558, "learning_rate": 6.278383307284461e-05, "loss": -0.1754, "step": 26810 }, { "epoch": 15.611175785797439, "grad_norm": 0.13049408793449402, "learning_rate": 6.275718925967284e-05, "loss": -0.1749, "step": 26820 }, { "epoch": 15.616996507566938, "grad_norm": 0.2228919118642807, "learning_rate": 6.273054157120382e-05, "loss": -0.1736, "step": 26830 }, { "epoch": 15.622817229336437, "grad_norm": 0.21137261390686035, "learning_rate": 6.270389001553238e-05, "loss": -0.1723, "step": 26840 }, { "epoch": 15.628637951105937, "grad_norm": 0.216111958026886, "learning_rate": 6.26772346007546e-05, "loss": -0.1706, "step": 26850 }, { "epoch": 15.634458672875436, "grad_norm": 0.19987443089485168, "learning_rate": 6.265057533496767e-05, "loss": -0.1729, "step": 26860 }, { "epoch": 15.640279394644937, "grad_norm": 0.19764527678489685, "learning_rate": 6.262391222626997e-05, "loss": -0.1721, "step": 26870 }, { "epoch": 15.646100116414436, "grad_norm": 0.16626374423503876, "learning_rate": 6.259724528276106e-05, "loss": -0.1716, "step": 26880 }, { "epoch": 15.651920838183935, "grad_norm": 0.2529068887233734, "learning_rate": 6.257057451254162e-05, "loss": -0.1744, "step": 26890 }, { "epoch": 15.657741559953434, "grad_norm": 0.15352468192577362, "learning_rate": 6.254389992371357e-05, "loss": -0.1744, "step": 26900 }, { "epoch": 15.663562281722934, "grad_norm": 0.1362028270959854, "learning_rate": 6.25172215243799e-05, "loss": -0.1746, "step": 26910 }, { "epoch": 15.669383003492433, "grad_norm": 0.12991981208324432, "learning_rate": 6.249053932264486e-05, "loss": -0.1749, "step": 26920 }, { "epoch": 15.675203725261932, "grad_norm": 0.1469849795103073, "learning_rate": 6.246385332661376e-05, "loss": -0.1739, "step": 26930 }, { "epoch": 15.681024447031431, "grad_norm": 0.16231924295425415, "learning_rate": 6.24371635443931e-05, "loss": -0.173, "step": 26940 }, { "epoch": 15.68684516880093, "grad_norm": 0.13976924121379852, "learning_rate": 6.241046998409054e-05, "loss": -0.1783, "step": 26950 }, { "epoch": 15.692665890570431, "grad_norm": 0.19315414130687714, "learning_rate": 6.238377265381489e-05, "loss": -0.1751, "step": 26960 }, { "epoch": 15.69848661233993, "grad_norm": 0.14701275527477264, "learning_rate": 6.235707156167607e-05, "loss": -0.1697, "step": 26970 }, { "epoch": 15.70430733410943, "grad_norm": 0.23265251517295837, "learning_rate": 6.233036671578519e-05, "loss": -0.1651, "step": 26980 }, { "epoch": 15.71012805587893, "grad_norm": 0.14863042533397675, "learning_rate": 6.230365812425445e-05, "loss": -0.1747, "step": 26990 }, { "epoch": 15.715948777648428, "grad_norm": 0.18898998200893402, "learning_rate": 6.227694579519724e-05, "loss": -0.1709, "step": 27000 }, { "epoch": 15.721769499417928, "grad_norm": 0.10620572417974472, "learning_rate": 6.225022973672805e-05, "loss": -0.1736, "step": 27010 }, { "epoch": 15.727590221187427, "grad_norm": 0.26919540762901306, "learning_rate": 6.222350995696253e-05, "loss": -0.1678, "step": 27020 }, { "epoch": 15.733410942956926, "grad_norm": 0.2183120846748352, "learning_rate": 6.21967864640174e-05, "loss": -0.1763, "step": 27030 }, { "epoch": 15.739231664726425, "grad_norm": 0.28772810101509094, "learning_rate": 6.217005926601059e-05, "loss": -0.1773, "step": 27040 }, { "epoch": 15.745052386495926, "grad_norm": 0.24202635884284973, "learning_rate": 6.214332837106111e-05, "loss": -0.1774, "step": 27050 }, { "epoch": 15.750873108265425, "grad_norm": 0.16611617803573608, "learning_rate": 6.21165937872891e-05, "loss": -0.1749, "step": 27060 }, { "epoch": 15.756693830034925, "grad_norm": 0.2773790955543518, "learning_rate": 6.208985552281582e-05, "loss": -0.173, "step": 27070 }, { "epoch": 15.762514551804424, "grad_norm": 0.18215034902095795, "learning_rate": 6.206311358576364e-05, "loss": -0.1656, "step": 27080 }, { "epoch": 15.768335273573923, "grad_norm": 0.19431576132774353, "learning_rate": 6.203636798425608e-05, "loss": -0.1753, "step": 27090 }, { "epoch": 15.774155995343422, "grad_norm": 0.250496506690979, "learning_rate": 6.20096187264177e-05, "loss": -0.1712, "step": 27100 }, { "epoch": 15.779976717112921, "grad_norm": 0.23363614082336426, "learning_rate": 6.198286582037425e-05, "loss": -0.1753, "step": 27110 }, { "epoch": 15.78579743888242, "grad_norm": 0.18620577454566956, "learning_rate": 6.195610927425256e-05, "loss": -0.1723, "step": 27120 }, { "epoch": 15.791618160651922, "grad_norm": 0.1893618106842041, "learning_rate": 6.192934909618056e-05, "loss": -0.1779, "step": 27130 }, { "epoch": 15.797438882421421, "grad_norm": 0.1490909457206726, "learning_rate": 6.190258529428728e-05, "loss": -0.1771, "step": 27140 }, { "epoch": 15.80325960419092, "grad_norm": 0.0863681435585022, "learning_rate": 6.187581787670285e-05, "loss": -0.1689, "step": 27150 }, { "epoch": 15.80908032596042, "grad_norm": 0.11724822223186493, "learning_rate": 6.184904685155852e-05, "loss": -0.1733, "step": 27160 }, { "epoch": 15.814901047729919, "grad_norm": 0.2558000981807709, "learning_rate": 6.18222722269866e-05, "loss": -0.1705, "step": 27170 }, { "epoch": 15.820721769499418, "grad_norm": 0.1696695238351822, "learning_rate": 6.179549401112053e-05, "loss": -0.1785, "step": 27180 }, { "epoch": 15.826542491268917, "grad_norm": 0.16447371244430542, "learning_rate": 6.176871221209482e-05, "loss": -0.1746, "step": 27190 }, { "epoch": 15.832363213038416, "grad_norm": 0.13308553397655487, "learning_rate": 6.174192683804508e-05, "loss": -0.1757, "step": 27200 }, { "epoch": 15.838183934807915, "grad_norm": 0.11661705374717712, "learning_rate": 6.1715137897108e-05, "loss": -0.1765, "step": 27210 }, { "epoch": 15.844004656577416, "grad_norm": 0.10715445131063461, "learning_rate": 6.168834539742134e-05, "loss": -0.1707, "step": 27220 }, { "epoch": 15.849825378346916, "grad_norm": 0.2503073811531067, "learning_rate": 6.166154934712397e-05, "loss": -0.172, "step": 27230 }, { "epoch": 15.855646100116415, "grad_norm": 0.21116670966148376, "learning_rate": 6.163474975435581e-05, "loss": -0.1772, "step": 27240 }, { "epoch": 15.861466821885914, "grad_norm": 0.26246485114097595, "learning_rate": 6.160794662725787e-05, "loss": -0.1687, "step": 27250 }, { "epoch": 15.867287543655413, "grad_norm": 0.23508048057556152, "learning_rate": 6.158113997397222e-05, "loss": -0.1732, "step": 27260 }, { "epoch": 15.873108265424912, "grad_norm": 0.10229132324457169, "learning_rate": 6.155432980264205e-05, "loss": -0.1748, "step": 27270 }, { "epoch": 15.878928987194412, "grad_norm": 0.09195268154144287, "learning_rate": 6.152751612141156e-05, "loss": -0.171, "step": 27280 }, { "epoch": 15.884749708963911, "grad_norm": 0.11904300004243851, "learning_rate": 6.150069893842602e-05, "loss": -0.1749, "step": 27290 }, { "epoch": 15.89057043073341, "grad_norm": 0.13833652436733246, "learning_rate": 6.147387826183182e-05, "loss": -0.1772, "step": 27300 }, { "epoch": 15.896391152502911, "grad_norm": 0.2205265760421753, "learning_rate": 6.144705409977635e-05, "loss": -0.1685, "step": 27310 }, { "epoch": 15.90221187427241, "grad_norm": 0.17357264459133148, "learning_rate": 6.142022646040808e-05, "loss": -0.1762, "step": 27320 }, { "epoch": 15.90803259604191, "grad_norm": 0.1790962815284729, "learning_rate": 6.139339535187653e-05, "loss": -0.1749, "step": 27330 }, { "epoch": 15.913853317811409, "grad_norm": 0.22738081216812134, "learning_rate": 6.136656078233232e-05, "loss": -0.1783, "step": 27340 }, { "epoch": 15.919674039580908, "grad_norm": 0.2502976655960083, "learning_rate": 6.133972275992707e-05, "loss": -0.1718, "step": 27350 }, { "epoch": 15.925494761350407, "grad_norm": 0.2599133849143982, "learning_rate": 6.131288129281342e-05, "loss": -0.1727, "step": 27360 }, { "epoch": 15.931315483119906, "grad_norm": 0.21131929755210876, "learning_rate": 6.128603638914516e-05, "loss": -0.1754, "step": 27370 }, { "epoch": 15.937136204889406, "grad_norm": 0.1813148707151413, "learning_rate": 6.125918805707704e-05, "loss": -0.1754, "step": 27380 }, { "epoch": 15.942956926658905, "grad_norm": 0.16923773288726807, "learning_rate": 6.123233630476485e-05, "loss": -0.1751, "step": 27390 }, { "epoch": 15.948777648428406, "grad_norm": 0.17271985113620758, "learning_rate": 6.120548114036547e-05, "loss": -0.1761, "step": 27400 }, { "epoch": 15.954598370197905, "grad_norm": 0.2507558763027191, "learning_rate": 6.117862257203679e-05, "loss": -0.1736, "step": 27410 }, { "epoch": 15.960419091967404, "grad_norm": 0.2754637598991394, "learning_rate": 6.115176060793771e-05, "loss": -0.1703, "step": 27420 }, { "epoch": 15.966239813736903, "grad_norm": 0.19638778269290924, "learning_rate": 6.112489525622822e-05, "loss": -0.1779, "step": 27430 }, { "epoch": 15.972060535506403, "grad_norm": 0.2481566071510315, "learning_rate": 6.109802652506928e-05, "loss": -0.1744, "step": 27440 }, { "epoch": 15.977881257275902, "grad_norm": 0.23161491751670837, "learning_rate": 6.107115442262291e-05, "loss": -0.1754, "step": 27450 }, { "epoch": 15.983701979045401, "grad_norm": 0.17320077121257782, "learning_rate": 6.104427895705214e-05, "loss": -0.1728, "step": 27460 }, { "epoch": 15.9895227008149, "grad_norm": 0.1472579836845398, "learning_rate": 6.101740013652103e-05, "loss": -0.1798, "step": 27470 }, { "epoch": 15.995343422584401, "grad_norm": 0.13004198670387268, "learning_rate": 6.099051796919465e-05, "loss": -0.1725, "step": 27480 }, { "epoch": 16.0011641443539, "grad_norm": 0.1384580135345459, "learning_rate": 6.096363246323911e-05, "loss": -0.1724, "step": 27490 }, { "epoch": 16.0069848661234, "grad_norm": 0.2276305854320526, "learning_rate": 6.0936743626821504e-05, "loss": -0.1734, "step": 27500 }, { "epoch": 16.0128055878929, "grad_norm": 0.21953240036964417, "learning_rate": 6.090985146810996e-05, "loss": -0.1764, "step": 27510 }, { "epoch": 16.018626309662398, "grad_norm": 0.15183915197849274, "learning_rate": 6.088295599527357e-05, "loss": -0.1753, "step": 27520 }, { "epoch": 16.024447031431897, "grad_norm": 0.22590585052967072, "learning_rate": 6.085605721648252e-05, "loss": -0.1756, "step": 27530 }, { "epoch": 16.030267753201397, "grad_norm": 0.2546223998069763, "learning_rate": 6.082915513990792e-05, "loss": -0.1741, "step": 27540 }, { "epoch": 16.036088474970896, "grad_norm": 0.13669267296791077, "learning_rate": 6.080224977372192e-05, "loss": -0.1757, "step": 27550 }, { "epoch": 16.041909196740395, "grad_norm": 0.23386941850185394, "learning_rate": 6.0775341126097666e-05, "loss": -0.1742, "step": 27560 }, { "epoch": 16.047729918509894, "grad_norm": 0.15413537621498108, "learning_rate": 6.074842920520926e-05, "loss": -0.1732, "step": 27570 }, { "epoch": 16.053550640279393, "grad_norm": 0.21180236339569092, "learning_rate": 6.072151401923186e-05, "loss": -0.1741, "step": 27580 }, { "epoch": 16.059371362048893, "grad_norm": 0.23991844058036804, "learning_rate": 6.069459557634159e-05, "loss": -0.1726, "step": 27590 }, { "epoch": 16.065192083818392, "grad_norm": 0.2657960057258606, "learning_rate": 6.066767388471557e-05, "loss": -0.1727, "step": 27600 }, { "epoch": 16.07101280558789, "grad_norm": 0.2405424565076828, "learning_rate": 6.064074895253188e-05, "loss": -0.1732, "step": 27610 }, { "epoch": 16.076833527357394, "grad_norm": 0.1463184654712677, "learning_rate": 6.061382078796961e-05, "loss": -0.1719, "step": 27620 }, { "epoch": 16.082654249126893, "grad_norm": 0.19322480261325836, "learning_rate": 6.0586889399208814e-05, "loss": -0.1751, "step": 27630 }, { "epoch": 16.088474970896392, "grad_norm": 0.19059674441814423, "learning_rate": 6.0559954794430565e-05, "loss": -0.1704, "step": 27640 }, { "epoch": 16.09429569266589, "grad_norm": 0.1863642930984497, "learning_rate": 6.053301698181687e-05, "loss": -0.1715, "step": 27650 }, { "epoch": 16.10011641443539, "grad_norm": 0.20026381313800812, "learning_rate": 6.0506075969550725e-05, "loss": -0.1747, "step": 27660 }, { "epoch": 16.10593713620489, "grad_norm": 0.14173565804958344, "learning_rate": 6.047913176581609e-05, "loss": -0.1758, "step": 27670 }, { "epoch": 16.11175785797439, "grad_norm": 0.14439615607261658, "learning_rate": 6.0452184378797904e-05, "loss": -0.176, "step": 27680 }, { "epoch": 16.11757857974389, "grad_norm": 0.11581498384475708, "learning_rate": 6.042523381668209e-05, "loss": -0.1748, "step": 27690 }, { "epoch": 16.123399301513388, "grad_norm": 0.22414812445640564, "learning_rate": 6.03982800876555e-05, "loss": -0.1773, "step": 27700 }, { "epoch": 16.129220023282887, "grad_norm": 0.2143026888370514, "learning_rate": 6.0371323199905975e-05, "loss": -0.1687, "step": 27710 }, { "epoch": 16.135040745052386, "grad_norm": 0.19531412422657013, "learning_rate": 6.03443631616223e-05, "loss": -0.1738, "step": 27720 }, { "epoch": 16.140861466821885, "grad_norm": 0.1447213888168335, "learning_rate": 6.031739998099421e-05, "loss": -0.1777, "step": 27730 }, { "epoch": 16.146682188591384, "grad_norm": 0.13547728955745697, "learning_rate": 6.029043366621243e-05, "loss": -0.1779, "step": 27740 }, { "epoch": 16.152502910360884, "grad_norm": 0.19608362019062042, "learning_rate": 6.0263464225468615e-05, "loss": -0.1726, "step": 27750 }, { "epoch": 16.158323632130383, "grad_norm": 0.1743670254945755, "learning_rate": 6.023649166695534e-05, "loss": -0.1781, "step": 27760 }, { "epoch": 16.164144353899882, "grad_norm": 0.17028728127479553, "learning_rate": 6.0209515998866186e-05, "loss": -0.1743, "step": 27770 }, { "epoch": 16.16996507566938, "grad_norm": 0.139681875705719, "learning_rate": 6.018253722939563e-05, "loss": -0.1754, "step": 27780 }, { "epoch": 16.175785797438884, "grad_norm": 0.24066650867462158, "learning_rate": 6.015555536673914e-05, "loss": -0.1734, "step": 27790 }, { "epoch": 16.181606519208383, "grad_norm": 0.20176921784877777, "learning_rate": 6.0128570419093054e-05, "loss": -0.171, "step": 27800 }, { "epoch": 16.187427240977883, "grad_norm": 0.17549479007720947, "learning_rate": 6.010158239465471e-05, "loss": -0.1775, "step": 27810 }, { "epoch": 16.19324796274738, "grad_norm": 0.19493408501148224, "learning_rate": 6.007459130162235e-05, "loss": -0.1734, "step": 27820 }, { "epoch": 16.19906868451688, "grad_norm": 0.1404920369386673, "learning_rate": 6.004759714819516e-05, "loss": -0.1738, "step": 27830 }, { "epoch": 16.20488940628638, "grad_norm": 0.20851722359657288, "learning_rate": 6.002059994257323e-05, "loss": -0.1708, "step": 27840 }, { "epoch": 16.21071012805588, "grad_norm": 0.12937521934509277, "learning_rate": 5.999359969295764e-05, "loss": -0.1747, "step": 27850 }, { "epoch": 16.21653084982538, "grad_norm": 0.09871012717485428, "learning_rate": 5.9966596407550314e-05, "loss": -0.1779, "step": 27860 }, { "epoch": 16.222351571594878, "grad_norm": 0.24503356218338013, "learning_rate": 5.993959009455416e-05, "loss": -0.1723, "step": 27870 }, { "epoch": 16.228172293364377, "grad_norm": 0.26040932536125183, "learning_rate": 5.991258076217298e-05, "loss": -0.1764, "step": 27880 }, { "epoch": 16.233993015133876, "grad_norm": 0.16748815774917603, "learning_rate": 5.988556841861147e-05, "loss": -0.1726, "step": 27890 }, { "epoch": 16.239813736903375, "grad_norm": 0.20671014487743378, "learning_rate": 5.985855307207531e-05, "loss": -0.1726, "step": 27900 }, { "epoch": 16.245634458672875, "grad_norm": 0.15935279428958893, "learning_rate": 5.9831534730771e-05, "loss": -0.177, "step": 27910 }, { "epoch": 16.251455180442374, "grad_norm": 0.1620977818965912, "learning_rate": 5.980451340290605e-05, "loss": -0.1758, "step": 27920 }, { "epoch": 16.257275902211873, "grad_norm": 0.18710048496723175, "learning_rate": 5.97774890966888e-05, "loss": -0.179, "step": 27930 }, { "epoch": 16.263096623981372, "grad_norm": 0.167991504073143, "learning_rate": 5.975046182032851e-05, "loss": -0.1769, "step": 27940 }, { "epoch": 16.26891734575087, "grad_norm": 0.17100711166858673, "learning_rate": 5.972343158203537e-05, "loss": -0.1756, "step": 27950 }, { "epoch": 16.274738067520374, "grad_norm": 0.1395028829574585, "learning_rate": 5.969639839002045e-05, "loss": -0.1766, "step": 27960 }, { "epoch": 16.280558789289874, "grad_norm": 0.151261106133461, "learning_rate": 5.966936225249572e-05, "loss": -0.1732, "step": 27970 }, { "epoch": 16.286379511059373, "grad_norm": 0.14500422775745392, "learning_rate": 5.9642323177674044e-05, "loss": -0.1785, "step": 27980 }, { "epoch": 16.292200232828872, "grad_norm": 0.14496390521526337, "learning_rate": 5.9615281173769154e-05, "loss": -0.1767, "step": 27990 }, { "epoch": 16.29802095459837, "grad_norm": 0.13000860810279846, "learning_rate": 5.958823624899574e-05, "loss": -0.1775, "step": 28000 }, { "epoch": 16.30384167636787, "grad_norm": 0.14046798646450043, "learning_rate": 5.956118841156933e-05, "loss": -0.1757, "step": 28010 }, { "epoch": 16.30966239813737, "grad_norm": 0.07558560371398926, "learning_rate": 5.953413766970631e-05, "loss": -0.175, "step": 28020 }, { "epoch": 16.31548311990687, "grad_norm": 0.19468247890472412, "learning_rate": 5.9507084031624e-05, "loss": -0.1744, "step": 28030 }, { "epoch": 16.321303841676368, "grad_norm": 0.2444208413362503, "learning_rate": 5.948002750554058e-05, "loss": -0.1728, "step": 28040 }, { "epoch": 16.327124563445867, "grad_norm": 0.1645945906639099, "learning_rate": 5.9452968099675124e-05, "loss": -0.1768, "step": 28050 }, { "epoch": 16.332945285215366, "grad_norm": 0.2252202332019806, "learning_rate": 5.9425905822247527e-05, "loss": -0.1743, "step": 28060 }, { "epoch": 16.338766006984866, "grad_norm": 0.2224763035774231, "learning_rate": 5.939884068147864e-05, "loss": -0.1749, "step": 28070 }, { "epoch": 16.344586728754365, "grad_norm": 0.1683221310377121, "learning_rate": 5.937177268559011e-05, "loss": -0.1758, "step": 28080 }, { "epoch": 16.350407450523864, "grad_norm": 0.20433975756168365, "learning_rate": 5.934470184280448e-05, "loss": -0.1773, "step": 28090 }, { "epoch": 16.356228172293363, "grad_norm": 0.2214336097240448, "learning_rate": 5.931762816134516e-05, "loss": -0.176, "step": 28100 }, { "epoch": 16.362048894062863, "grad_norm": 0.21345500648021698, "learning_rate": 5.9290551649436434e-05, "loss": -0.1767, "step": 28110 }, { "epoch": 16.36786961583236, "grad_norm": 0.2499108612537384, "learning_rate": 5.9263472315303416e-05, "loss": -0.1755, "step": 28120 }, { "epoch": 16.37369033760186, "grad_norm": 0.23938842117786407, "learning_rate": 5.9236390167172096e-05, "loss": -0.1749, "step": 28130 }, { "epoch": 16.379511059371364, "grad_norm": 0.1731214076280594, "learning_rate": 5.920930521326932e-05, "loss": -0.1738, "step": 28140 }, { "epoch": 16.385331781140863, "grad_norm": 0.161411851644516, "learning_rate": 5.918221746182276e-05, "loss": -0.1748, "step": 28150 }, { "epoch": 16.391152502910362, "grad_norm": 0.1249956265091896, "learning_rate": 5.9155126921061e-05, "loss": -0.1761, "step": 28160 }, { "epoch": 16.39697322467986, "grad_norm": 0.16133350133895874, "learning_rate": 5.91280335992134e-05, "loss": -0.1765, "step": 28170 }, { "epoch": 16.40279394644936, "grad_norm": 0.1870051920413971, "learning_rate": 5.91009375045102e-05, "loss": -0.1743, "step": 28180 }, { "epoch": 16.40861466821886, "grad_norm": 0.19635683298110962, "learning_rate": 5.9073838645182476e-05, "loss": -0.1785, "step": 28190 }, { "epoch": 16.41443538998836, "grad_norm": 0.2168024629354477, "learning_rate": 5.904673702946217e-05, "loss": -0.1736, "step": 28200 }, { "epoch": 16.42025611175786, "grad_norm": 0.16391514241695404, "learning_rate": 5.9019632665582004e-05, "loss": -0.1765, "step": 28210 }, { "epoch": 16.426076833527357, "grad_norm": 0.21996811032295227, "learning_rate": 5.899252556177559e-05, "loss": -0.1763, "step": 28220 }, { "epoch": 16.431897555296857, "grad_norm": 0.11387787014245987, "learning_rate": 5.896541572627735e-05, "loss": -0.1751, "step": 28230 }, { "epoch": 16.437718277066356, "grad_norm": 0.14982064068317413, "learning_rate": 5.893830316732253e-05, "loss": -0.1767, "step": 28240 }, { "epoch": 16.443538998835855, "grad_norm": 0.15793882310390472, "learning_rate": 5.8911187893147214e-05, "loss": -0.1724, "step": 28250 }, { "epoch": 16.449359720605354, "grad_norm": 0.22242403030395508, "learning_rate": 5.888406991198828e-05, "loss": -0.1746, "step": 28260 }, { "epoch": 16.455180442374854, "grad_norm": 0.15541468560695648, "learning_rate": 5.885694923208349e-05, "loss": -0.17, "step": 28270 }, { "epoch": 16.461001164144353, "grad_norm": 0.23071807622909546, "learning_rate": 5.882982586167138e-05, "loss": -0.1749, "step": 28280 }, { "epoch": 16.466821885913852, "grad_norm": 0.09663324058055878, "learning_rate": 5.880269980899131e-05, "loss": -0.1775, "step": 28290 }, { "epoch": 16.47264260768335, "grad_norm": 0.13042190670967102, "learning_rate": 5.8775571082283465e-05, "loss": -0.1765, "step": 28300 }, { "epoch": 16.47846332945285, "grad_norm": 0.14836783707141876, "learning_rate": 5.8748439689788824e-05, "loss": -0.174, "step": 28310 }, { "epoch": 16.484284051222353, "grad_norm": 0.10916794836521149, "learning_rate": 5.87213056397492e-05, "loss": -0.176, "step": 28320 }, { "epoch": 16.490104772991852, "grad_norm": 0.13049273192882538, "learning_rate": 5.869416894040719e-05, "loss": -0.1723, "step": 28330 }, { "epoch": 16.49592549476135, "grad_norm": 0.12981683015823364, "learning_rate": 5.866702960000621e-05, "loss": -0.1758, "step": 28340 }, { "epoch": 16.50174621653085, "grad_norm": 0.1688581258058548, "learning_rate": 5.863988762679048e-05, "loss": -0.1743, "step": 28350 }, { "epoch": 16.50756693830035, "grad_norm": 0.24083462357521057, "learning_rate": 5.8612743029005e-05, "loss": -0.1756, "step": 28360 }, { "epoch": 16.51338766006985, "grad_norm": 0.18631264567375183, "learning_rate": 5.858559581489561e-05, "loss": -0.1763, "step": 28370 }, { "epoch": 16.51920838183935, "grad_norm": 0.11700030416250229, "learning_rate": 5.85584459927089e-05, "loss": -0.1749, "step": 28380 }, { "epoch": 16.525029103608848, "grad_norm": 0.11984679847955704, "learning_rate": 5.853129357069227e-05, "loss": -0.1772, "step": 28390 }, { "epoch": 16.530849825378347, "grad_norm": 0.145372673869133, "learning_rate": 5.8504138557093913e-05, "loss": -0.1757, "step": 28400 }, { "epoch": 16.536670547147846, "grad_norm": 0.1806095838546753, "learning_rate": 5.8476980960162784e-05, "loss": -0.1764, "step": 28410 }, { "epoch": 16.542491268917345, "grad_norm": 0.18651992082595825, "learning_rate": 5.844982078814868e-05, "loss": -0.1753, "step": 28420 }, { "epoch": 16.548311990686845, "grad_norm": 0.26289132237434387, "learning_rate": 5.842265804930211e-05, "loss": -0.1726, "step": 28430 }, { "epoch": 16.554132712456344, "grad_norm": 0.23003752529621124, "learning_rate": 5.839549275187444e-05, "loss": -0.1778, "step": 28440 }, { "epoch": 16.559953434225843, "grad_norm": 0.1954324096441269, "learning_rate": 5.836832490411771e-05, "loss": -0.175, "step": 28450 }, { "epoch": 16.565774155995342, "grad_norm": 0.1739429384469986, "learning_rate": 5.834115451428485e-05, "loss": -0.1727, "step": 28460 }, { "epoch": 16.57159487776484, "grad_norm": 0.12231942266225815, "learning_rate": 5.831398159062946e-05, "loss": -0.1764, "step": 28470 }, { "epoch": 16.57741559953434, "grad_norm": 0.2228018045425415, "learning_rate": 5.828680614140599e-05, "loss": -0.1771, "step": 28480 }, { "epoch": 16.583236321303843, "grad_norm": 0.21333171427249908, "learning_rate": 5.825962817486962e-05, "loss": -0.1751, "step": 28490 }, { "epoch": 16.589057043073343, "grad_norm": 0.20026598870754242, "learning_rate": 5.823244769927629e-05, "loss": -0.1767, "step": 28500 }, { "epoch": 16.594877764842842, "grad_norm": 0.22088024020195007, "learning_rate": 5.8205264722882716e-05, "loss": -0.1708, "step": 28510 }, { "epoch": 16.60069848661234, "grad_norm": 0.1960967481136322, "learning_rate": 5.817807925394636e-05, "loss": -0.1781, "step": 28520 }, { "epoch": 16.60651920838184, "grad_norm": 0.15127307176589966, "learning_rate": 5.815089130072546e-05, "loss": -0.1759, "step": 28530 }, { "epoch": 16.61233993015134, "grad_norm": 0.1836855262517929, "learning_rate": 5.8123700871479e-05, "loss": -0.178, "step": 28540 }, { "epoch": 16.61816065192084, "grad_norm": 0.13448862731456757, "learning_rate": 5.809650797446671e-05, "loss": -0.1765, "step": 28550 }, { "epoch": 16.623981373690338, "grad_norm": 0.1281631886959076, "learning_rate": 5.806931261794907e-05, "loss": -0.174, "step": 28560 }, { "epoch": 16.629802095459837, "grad_norm": 0.23516924679279327, "learning_rate": 5.804211481018731e-05, "loss": -0.179, "step": 28570 }, { "epoch": 16.635622817229336, "grad_norm": 0.12157844752073288, "learning_rate": 5.801491455944341e-05, "loss": -0.1758, "step": 28580 }, { "epoch": 16.641443538998836, "grad_norm": 0.11041021347045898, "learning_rate": 5.79877118739801e-05, "loss": -0.1787, "step": 28590 }, { "epoch": 16.647264260768335, "grad_norm": 0.15791232883930206, "learning_rate": 5.7960506762060816e-05, "loss": -0.1747, "step": 28600 }, { "epoch": 16.653084982537834, "grad_norm": 0.16377583146095276, "learning_rate": 5.793329923194977e-05, "loss": -0.178, "step": 28610 }, { "epoch": 16.658905704307333, "grad_norm": 0.1328994482755661, "learning_rate": 5.790608929191187e-05, "loss": -0.1779, "step": 28620 }, { "epoch": 16.664726426076832, "grad_norm": 0.14685849845409393, "learning_rate": 5.78788769502128e-05, "loss": -0.1726, "step": 28630 }, { "epoch": 16.67054714784633, "grad_norm": 0.1484871208667755, "learning_rate": 5.785166221511894e-05, "loss": -0.1755, "step": 28640 }, { "epoch": 16.67636786961583, "grad_norm": 0.13721925020217896, "learning_rate": 5.7824445094897415e-05, "loss": -0.1715, "step": 28650 }, { "epoch": 16.682188591385334, "grad_norm": 0.1647256463766098, "learning_rate": 5.7797225597816065e-05, "loss": -0.176, "step": 28660 }, { "epoch": 16.688009313154833, "grad_norm": 0.24827899038791656, "learning_rate": 5.777000373214345e-05, "loss": -0.1774, "step": 28670 }, { "epoch": 16.693830034924332, "grad_norm": 0.1658962368965149, "learning_rate": 5.774277950614885e-05, "loss": -0.1705, "step": 28680 }, { "epoch": 16.69965075669383, "grad_norm": 0.17826277017593384, "learning_rate": 5.771555292810227e-05, "loss": -0.175, "step": 28690 }, { "epoch": 16.70547147846333, "grad_norm": 0.1987934559583664, "learning_rate": 5.768832400627444e-05, "loss": -0.1766, "step": 28700 }, { "epoch": 16.71129220023283, "grad_norm": 0.25411152839660645, "learning_rate": 5.7661092748936775e-05, "loss": -0.1747, "step": 28710 }, { "epoch": 16.71711292200233, "grad_norm": 0.14634066820144653, "learning_rate": 5.76338591643614e-05, "loss": -0.1719, "step": 28720 }, { "epoch": 16.722933643771828, "grad_norm": 0.1758599430322647, "learning_rate": 5.760662326082118e-05, "loss": -0.1721, "step": 28730 }, { "epoch": 16.728754365541327, "grad_norm": 0.1588016003370285, "learning_rate": 5.757938504658965e-05, "loss": -0.1776, "step": 28740 }, { "epoch": 16.734575087310827, "grad_norm": 0.11636929214000702, "learning_rate": 5.755214452994107e-05, "loss": -0.1764, "step": 28750 }, { "epoch": 16.740395809080326, "grad_norm": 0.12241901457309723, "learning_rate": 5.752490171915039e-05, "loss": -0.1703, "step": 28760 }, { "epoch": 16.746216530849825, "grad_norm": 0.21472761034965515, "learning_rate": 5.749765662249324e-05, "loss": -0.1736, "step": 28770 }, { "epoch": 16.752037252619324, "grad_norm": 0.1667221486568451, "learning_rate": 5.747040924824596e-05, "loss": -0.1805, "step": 28780 }, { "epoch": 16.757857974388823, "grad_norm": 0.20381340384483337, "learning_rate": 5.7443159604685613e-05, "loss": -0.1773, "step": 28790 }, { "epoch": 16.763678696158323, "grad_norm": 0.13158951699733734, "learning_rate": 5.74159077000899e-05, "loss": -0.1744, "step": 28800 }, { "epoch": 16.769499417927822, "grad_norm": 0.2126237154006958, "learning_rate": 5.7388653542737235e-05, "loss": -0.1754, "step": 28810 }, { "epoch": 16.77532013969732, "grad_norm": 0.13495561480522156, "learning_rate": 5.736139714090672e-05, "loss": -0.1738, "step": 28820 }, { "epoch": 16.78114086146682, "grad_norm": 0.2022373378276825, "learning_rate": 5.73341385028781e-05, "loss": -0.1756, "step": 28830 }, { "epoch": 16.78696158323632, "grad_norm": 0.13154925405979156, "learning_rate": 5.7306877636931855e-05, "loss": -0.1777, "step": 28840 }, { "epoch": 16.792782305005822, "grad_norm": 0.18890824913978577, "learning_rate": 5.7279614551349125e-05, "loss": -0.1774, "step": 28850 }, { "epoch": 16.79860302677532, "grad_norm": 0.21443021297454834, "learning_rate": 5.725234925441169e-05, "loss": -0.179, "step": 28860 }, { "epoch": 16.80442374854482, "grad_norm": 0.19665376842021942, "learning_rate": 5.7225081754402044e-05, "loss": -0.1777, "step": 28870 }, { "epoch": 16.81024447031432, "grad_norm": 0.19419962167739868, "learning_rate": 5.7197812059603326e-05, "loss": -0.177, "step": 28880 }, { "epoch": 16.81606519208382, "grad_norm": 0.23447832465171814, "learning_rate": 5.717054017829934e-05, "loss": -0.1738, "step": 28890 }, { "epoch": 16.82188591385332, "grad_norm": 0.20063185691833496, "learning_rate": 5.7143266118774584e-05, "loss": -0.168, "step": 28900 }, { "epoch": 16.827706635622818, "grad_norm": 0.19849364459514618, "learning_rate": 5.711598988931418e-05, "loss": -0.1769, "step": 28910 }, { "epoch": 16.833527357392317, "grad_norm": 0.31132492423057556, "learning_rate": 5.7088711498203954e-05, "loss": -0.1765, "step": 28920 }, { "epoch": 16.839348079161816, "grad_norm": 0.2048233151435852, "learning_rate": 5.706143095373033e-05, "loss": -0.1719, "step": 28930 }, { "epoch": 16.845168800931315, "grad_norm": 0.2025100737810135, "learning_rate": 5.703414826418042e-05, "loss": -0.1762, "step": 28940 }, { "epoch": 16.850989522700814, "grad_norm": 0.1762828230857849, "learning_rate": 5.7006863437842007e-05, "loss": -0.175, "step": 28950 }, { "epoch": 16.856810244470314, "grad_norm": 0.19054938852787018, "learning_rate": 5.697957648300348e-05, "loss": -0.1702, "step": 28960 }, { "epoch": 16.862630966239813, "grad_norm": 0.20504987239837646, "learning_rate": 5.695228740795391e-05, "loss": -0.1748, "step": 28970 }, { "epoch": 16.868451688009312, "grad_norm": 0.19928225874900818, "learning_rate": 5.6924996220982985e-05, "loss": -0.1731, "step": 28980 }, { "epoch": 16.87427240977881, "grad_norm": 0.24749884009361267, "learning_rate": 5.6897702930381045e-05, "loss": -0.1759, "step": 28990 }, { "epoch": 16.88009313154831, "grad_norm": 0.20896300673484802, "learning_rate": 5.687040754443908e-05, "loss": -0.1783, "step": 29000 }, { "epoch": 16.88591385331781, "grad_norm": 0.18576784431934357, "learning_rate": 5.6843110071448725e-05, "loss": -0.1748, "step": 29010 }, { "epoch": 16.891734575087312, "grad_norm": 0.1833566427230835, "learning_rate": 5.6815810519702194e-05, "loss": -0.1764, "step": 29020 }, { "epoch": 16.89755529685681, "grad_norm": 0.1310645490884781, "learning_rate": 5.6788508897492396e-05, "loss": -0.1777, "step": 29030 }, { "epoch": 16.90337601862631, "grad_norm": 0.2356804460287094, "learning_rate": 5.676120521311282e-05, "loss": -0.1747, "step": 29040 }, { "epoch": 16.90919674039581, "grad_norm": 0.1058092713356018, "learning_rate": 5.6733899474857634e-05, "loss": -0.1745, "step": 29050 }, { "epoch": 16.91501746216531, "grad_norm": 0.19692707061767578, "learning_rate": 5.670659169102157e-05, "loss": -0.1762, "step": 29060 }, { "epoch": 16.92083818393481, "grad_norm": 0.2825106084346771, "learning_rate": 5.6679281869900044e-05, "loss": -0.1718, "step": 29070 }, { "epoch": 16.926658905704308, "grad_norm": 0.21678195893764496, "learning_rate": 5.6651970019789045e-05, "loss": -0.1735, "step": 29080 }, { "epoch": 16.932479627473807, "grad_norm": 0.18565888702869415, "learning_rate": 5.662465614898519e-05, "loss": -0.177, "step": 29090 }, { "epoch": 16.938300349243306, "grad_norm": 0.13087978959083557, "learning_rate": 5.6597340265785695e-05, "loss": -0.1695, "step": 29100 }, { "epoch": 16.944121071012805, "grad_norm": 0.19609683752059937, "learning_rate": 5.657002237848843e-05, "loss": -0.1764, "step": 29110 }, { "epoch": 16.949941792782305, "grad_norm": 0.2134268581867218, "learning_rate": 5.654270249539183e-05, "loss": -0.1779, "step": 29120 }, { "epoch": 16.955762514551804, "grad_norm": 0.19688314199447632, "learning_rate": 5.651538062479498e-05, "loss": -0.1709, "step": 29130 }, { "epoch": 16.961583236321303, "grad_norm": 0.17892062664031982, "learning_rate": 5.648805677499751e-05, "loss": -0.1755, "step": 29140 }, { "epoch": 16.967403958090802, "grad_norm": 0.15739832818508148, "learning_rate": 5.646073095429969e-05, "loss": -0.1741, "step": 29150 }, { "epoch": 16.9732246798603, "grad_norm": 0.17315207421779633, "learning_rate": 5.643340317100241e-05, "loss": -0.1758, "step": 29160 }, { "epoch": 16.9790454016298, "grad_norm": 0.21784564852714539, "learning_rate": 5.64060734334071e-05, "loss": -0.1735, "step": 29170 }, { "epoch": 16.9848661233993, "grad_norm": 0.12923327088356018, "learning_rate": 5.637874174981583e-05, "loss": -0.1772, "step": 29180 }, { "epoch": 16.990686845168803, "grad_norm": 0.1559409350156784, "learning_rate": 5.635140812853124e-05, "loss": -0.1761, "step": 29190 }, { "epoch": 16.996507566938302, "grad_norm": 0.1257299780845642, "learning_rate": 5.6324072577856544e-05, "loss": -0.1762, "step": 29200 }, { "epoch": 17.0023282887078, "grad_norm": 0.18117965757846832, "learning_rate": 5.629673510609559e-05, "loss": -0.1769, "step": 29210 }, { "epoch": 17.0081490104773, "grad_norm": 0.10135411471128464, "learning_rate": 5.626939572155276e-05, "loss": -0.1733, "step": 29220 }, { "epoch": 17.0139697322468, "grad_norm": 0.1532469540834427, "learning_rate": 5.6242054432533054e-05, "loss": -0.1779, "step": 29230 }, { "epoch": 17.0197904540163, "grad_norm": 0.16192039847373962, "learning_rate": 5.621471124734201e-05, "loss": -0.1751, "step": 29240 }, { "epoch": 17.025611175785798, "grad_norm": 0.2864427864551544, "learning_rate": 5.6187366174285794e-05, "loss": -0.1668, "step": 29250 }, { "epoch": 17.031431897555297, "grad_norm": 0.19649170339107513, "learning_rate": 5.616001922167109e-05, "loss": -0.1748, "step": 29260 }, { "epoch": 17.037252619324796, "grad_norm": 0.19048118591308594, "learning_rate": 5.61326703978052e-05, "loss": -0.1746, "step": 29270 }, { "epoch": 17.043073341094296, "grad_norm": 0.330770343542099, "learning_rate": 5.6105319710995964e-05, "loss": -0.1761, "step": 29280 }, { "epoch": 17.048894062863795, "grad_norm": 0.117287777364254, "learning_rate": 5.60779671695518e-05, "loss": -0.1764, "step": 29290 }, { "epoch": 17.054714784633294, "grad_norm": 0.15541936457157135, "learning_rate": 5.6050612781781684e-05, "loss": -0.1773, "step": 29300 }, { "epoch": 17.060535506402793, "grad_norm": 0.18289001286029816, "learning_rate": 5.602325655599516e-05, "loss": -0.1775, "step": 29310 }, { "epoch": 17.066356228172292, "grad_norm": 0.2714783251285553, "learning_rate": 5.599589850050234e-05, "loss": -0.1773, "step": 29320 }, { "epoch": 17.07217694994179, "grad_norm": 0.14510613679885864, "learning_rate": 5.5968538623613874e-05, "loss": -0.178, "step": 29330 }, { "epoch": 17.07799767171129, "grad_norm": 0.1774420589208603, "learning_rate": 5.594117693364095e-05, "loss": -0.177, "step": 29340 }, { "epoch": 17.08381839348079, "grad_norm": 0.11438102275133133, "learning_rate": 5.591381343889535e-05, "loss": -0.1753, "step": 29350 }, { "epoch": 17.08963911525029, "grad_norm": 0.11981002986431122, "learning_rate": 5.5886448147689355e-05, "loss": -0.1747, "step": 29360 }, { "epoch": 17.095459837019792, "grad_norm": 0.1552090048789978, "learning_rate": 5.585908106833585e-05, "loss": -0.1776, "step": 29370 }, { "epoch": 17.10128055878929, "grad_norm": 0.10193344950675964, "learning_rate": 5.5831712209148226e-05, "loss": -0.177, "step": 29380 }, { "epoch": 17.10710128055879, "grad_norm": 0.13844841718673706, "learning_rate": 5.58043415784404e-05, "loss": -0.1741, "step": 29390 }, { "epoch": 17.11292200232829, "grad_norm": 0.11890485882759094, "learning_rate": 5.577696918452686e-05, "loss": -0.1784, "step": 29400 }, { "epoch": 17.11874272409779, "grad_norm": 0.1253238320350647, "learning_rate": 5.5749595035722604e-05, "loss": -0.1617, "step": 29410 }, { "epoch": 17.124563445867288, "grad_norm": 0.16089004278182983, "learning_rate": 5.5722219140343193e-05, "loss": -0.1719, "step": 29420 }, { "epoch": 17.130384167636787, "grad_norm": 0.14441828429698944, "learning_rate": 5.56948415067047e-05, "loss": -0.1765, "step": 29430 }, { "epoch": 17.136204889406287, "grad_norm": 0.23450323939323425, "learning_rate": 5.5667462143123704e-05, "loss": -0.1719, "step": 29440 }, { "epoch": 17.142025611175786, "grad_norm": 0.21521060168743134, "learning_rate": 5.564008105791737e-05, "loss": -0.1777, "step": 29450 }, { "epoch": 17.147846332945285, "grad_norm": 0.19669654965400696, "learning_rate": 5.5612698259403316e-05, "loss": -0.1731, "step": 29460 }, { "epoch": 17.153667054714784, "grad_norm": 0.15814116597175598, "learning_rate": 5.5585313755899724e-05, "loss": -0.1721, "step": 29470 }, { "epoch": 17.159487776484283, "grad_norm": 0.20943723618984222, "learning_rate": 5.5557927555725285e-05, "loss": -0.1709, "step": 29480 }, { "epoch": 17.165308498253783, "grad_norm": 0.12675341963768005, "learning_rate": 5.55305396671992e-05, "loss": -0.1776, "step": 29490 }, { "epoch": 17.171129220023282, "grad_norm": 0.20000365376472473, "learning_rate": 5.55031500986412e-05, "loss": -0.1725, "step": 29500 }, { "epoch": 17.17694994179278, "grad_norm": 0.23241311311721802, "learning_rate": 5.547575885837149e-05, "loss": -0.1747, "step": 29510 }, { "epoch": 17.18277066356228, "grad_norm": 0.10176507383584976, "learning_rate": 5.5448365954710825e-05, "loss": -0.176, "step": 29520 }, { "epoch": 17.18859138533178, "grad_norm": 0.10380352288484573, "learning_rate": 5.5420971395980446e-05, "loss": -0.1703, "step": 29530 }, { "epoch": 17.194412107101282, "grad_norm": 0.2441292554140091, "learning_rate": 5.539357519050209e-05, "loss": -0.1721, "step": 29540 }, { "epoch": 17.20023282887078, "grad_norm": 0.14824078977108002, "learning_rate": 5.536617734659799e-05, "loss": -0.1715, "step": 29550 }, { "epoch": 17.20605355064028, "grad_norm": 0.2004268318414688, "learning_rate": 5.533877787259091e-05, "loss": -0.1766, "step": 29560 }, { "epoch": 17.21187427240978, "grad_norm": 0.1164257749915123, "learning_rate": 5.5311376776804044e-05, "loss": -0.1746, "step": 29570 }, { "epoch": 17.21769499417928, "grad_norm": 0.21544183790683746, "learning_rate": 5.528397406756118e-05, "loss": -0.1732, "step": 29580 }, { "epoch": 17.22351571594878, "grad_norm": 0.1612721085548401, "learning_rate": 5.525656975318652e-05, "loss": -0.1781, "step": 29590 }, { "epoch": 17.229336437718278, "grad_norm": 0.10673724859952927, "learning_rate": 5.522916384200474e-05, "loss": -0.1798, "step": 29600 }, { "epoch": 17.235157159487777, "grad_norm": 0.15047021210193634, "learning_rate": 5.520175634234106e-05, "loss": -0.1765, "step": 29610 }, { "epoch": 17.240977881257276, "grad_norm": 0.1782883256673813, "learning_rate": 5.517434726252113e-05, "loss": -0.1777, "step": 29620 }, { "epoch": 17.246798603026775, "grad_norm": 0.1575494408607483, "learning_rate": 5.514693661087113e-05, "loss": -0.1782, "step": 29630 }, { "epoch": 17.252619324796274, "grad_norm": 0.1737419068813324, "learning_rate": 5.511952439571769e-05, "loss": -0.1775, "step": 29640 }, { "epoch": 17.258440046565774, "grad_norm": 0.11933741718530655, "learning_rate": 5.509211062538791e-05, "loss": -0.1788, "step": 29650 }, { "epoch": 17.264260768335273, "grad_norm": 0.1583406627178192, "learning_rate": 5.506469530820939e-05, "loss": -0.1733, "step": 29660 }, { "epoch": 17.270081490104772, "grad_norm": 0.18835929036140442, "learning_rate": 5.503727845251014e-05, "loss": -0.1757, "step": 29670 }, { "epoch": 17.27590221187427, "grad_norm": 0.238698348402977, "learning_rate": 5.50098600666187e-05, "loss": -0.1754, "step": 29680 }, { "epoch": 17.28172293364377, "grad_norm": 0.18920402228832245, "learning_rate": 5.498244015886406e-05, "loss": -0.1759, "step": 29690 }, { "epoch": 17.28754365541327, "grad_norm": 0.13574130833148956, "learning_rate": 5.495501873757565e-05, "loss": -0.1792, "step": 29700 }, { "epoch": 17.29336437718277, "grad_norm": 0.13605017960071564, "learning_rate": 5.492759581108336e-05, "loss": -0.177, "step": 29710 }, { "epoch": 17.29918509895227, "grad_norm": 0.1555427759885788, "learning_rate": 5.490017138771759e-05, "loss": -0.1759, "step": 29720 }, { "epoch": 17.30500582072177, "grad_norm": 0.12545257806777954, "learning_rate": 5.487274547580912e-05, "loss": -0.1758, "step": 29730 }, { "epoch": 17.31082654249127, "grad_norm": 0.25058382749557495, "learning_rate": 5.484531808368923e-05, "loss": -0.1772, "step": 29740 }, { "epoch": 17.31664726426077, "grad_norm": 0.15438595414161682, "learning_rate": 5.4817889219689656e-05, "loss": -0.1769, "step": 29750 }, { "epoch": 17.32246798603027, "grad_norm": 0.25136199593544006, "learning_rate": 5.4790458892142536e-05, "loss": -0.178, "step": 29760 }, { "epoch": 17.328288707799768, "grad_norm": 0.2536737322807312, "learning_rate": 5.476302710938048e-05, "loss": -0.1784, "step": 29770 }, { "epoch": 17.334109429569267, "grad_norm": 0.1819898933172226, "learning_rate": 5.473559387973657e-05, "loss": -0.1756, "step": 29780 }, { "epoch": 17.339930151338766, "grad_norm": 0.20984448492527008, "learning_rate": 5.470815921154425e-05, "loss": -0.1738, "step": 29790 }, { "epoch": 17.345750873108265, "grad_norm": 0.17977435886859894, "learning_rate": 5.468072311313749e-05, "loss": -0.179, "step": 29800 }, { "epoch": 17.351571594877765, "grad_norm": 0.14950419962406158, "learning_rate": 5.465328559285063e-05, "loss": -0.1761, "step": 29810 }, { "epoch": 17.357392316647264, "grad_norm": 0.18023693561553955, "learning_rate": 5.462584665901849e-05, "loss": -0.1774, "step": 29820 }, { "epoch": 17.363213038416763, "grad_norm": 0.2617327868938446, "learning_rate": 5.4598406319976235e-05, "loss": -0.1736, "step": 29830 }, { "epoch": 17.369033760186262, "grad_norm": 0.2410895675420761, "learning_rate": 5.457096458405958e-05, "loss": -0.1687, "step": 29840 }, { "epoch": 17.37485448195576, "grad_norm": 0.12307751178741455, "learning_rate": 5.454352145960457e-05, "loss": -0.1765, "step": 29850 }, { "epoch": 17.38067520372526, "grad_norm": 0.1067681536078453, "learning_rate": 5.4516076954947715e-05, "loss": -0.1776, "step": 29860 }, { "epoch": 17.38649592549476, "grad_norm": 0.4138246476650238, "learning_rate": 5.448863107842591e-05, "loss": -0.1729, "step": 29870 }, { "epoch": 17.39231664726426, "grad_norm": 0.1820836365222931, "learning_rate": 5.446118383837651e-05, "loss": -0.1748, "step": 29880 }, { "epoch": 17.398137369033762, "grad_norm": 0.18760333955287933, "learning_rate": 5.443373524313722e-05, "loss": -0.1724, "step": 29890 }, { "epoch": 17.40395809080326, "grad_norm": 0.24421392381191254, "learning_rate": 5.440628530104626e-05, "loss": -0.1772, "step": 29900 }, { "epoch": 17.40977881257276, "grad_norm": 0.19738024473190308, "learning_rate": 5.4378834020442146e-05, "loss": -0.178, "step": 29910 }, { "epoch": 17.41559953434226, "grad_norm": 0.17660802602767944, "learning_rate": 5.4351381409663884e-05, "loss": -0.177, "step": 29920 }, { "epoch": 17.42142025611176, "grad_norm": 0.17913338541984558, "learning_rate": 5.432392747705084e-05, "loss": -0.1771, "step": 29930 }, { "epoch": 17.427240977881258, "grad_norm": 0.21101221442222595, "learning_rate": 5.429647223094278e-05, "loss": -0.178, "step": 29940 }, { "epoch": 17.433061699650757, "grad_norm": 0.31543105840682983, "learning_rate": 5.4269015679679924e-05, "loss": -0.1753, "step": 29950 }, { "epoch": 17.438882421420256, "grad_norm": 0.1413743793964386, "learning_rate": 5.424155783160281e-05, "loss": -0.1784, "step": 29960 }, { "epoch": 17.444703143189756, "grad_norm": 0.2004459649324417, "learning_rate": 5.4214098695052415e-05, "loss": -0.173, "step": 29970 }, { "epoch": 17.450523864959255, "grad_norm": 0.23296096920967102, "learning_rate": 5.418663827837012e-05, "loss": -0.1744, "step": 29980 }, { "epoch": 17.456344586728754, "grad_norm": 0.15724647045135498, "learning_rate": 5.415917658989763e-05, "loss": -0.179, "step": 29990 }, { "epoch": 17.462165308498253, "grad_norm": 0.20601876080036163, "learning_rate": 5.413171363797713e-05, "loss": -0.1761, "step": 30000 }, { "epoch": 17.467986030267753, "grad_norm": 0.17476770281791687, "learning_rate": 5.4104249430951116e-05, "loss": -0.1774, "step": 30010 }, { "epoch": 17.47380675203725, "grad_norm": 0.20526915788650513, "learning_rate": 5.4076783977162494e-05, "loss": -0.1769, "step": 30020 }, { "epoch": 17.47962747380675, "grad_norm": 0.22096025943756104, "learning_rate": 5.4049317284954525e-05, "loss": -0.1781, "step": 30030 }, { "epoch": 17.48544819557625, "grad_norm": 0.1330428570508957, "learning_rate": 5.4021849362670884e-05, "loss": -0.1777, "step": 30040 }, { "epoch": 17.49126891734575, "grad_norm": 0.2490643858909607, "learning_rate": 5.3994380218655604e-05, "loss": -0.1717, "step": 30050 }, { "epoch": 17.49708963911525, "grad_norm": 0.16579122841358185, "learning_rate": 5.396690986125309e-05, "loss": -0.1765, "step": 30060 }, { "epoch": 17.50291036088475, "grad_norm": 0.13887451589107513, "learning_rate": 5.3939438298808075e-05, "loss": -0.1765, "step": 30070 }, { "epoch": 17.50873108265425, "grad_norm": 0.20129458606243134, "learning_rate": 5.3911965539665744e-05, "loss": -0.1732, "step": 30080 }, { "epoch": 17.51455180442375, "grad_norm": 0.13520315289497375, "learning_rate": 5.388449159217156e-05, "loss": -0.1767, "step": 30090 }, { "epoch": 17.52037252619325, "grad_norm": 0.1192411407828331, "learning_rate": 5.3857016464671385e-05, "loss": -0.1766, "step": 30100 }, { "epoch": 17.52619324796275, "grad_norm": 0.08798696845769882, "learning_rate": 5.382954016551146e-05, "loss": -0.1793, "step": 30110 }, { "epoch": 17.532013969732247, "grad_norm": 0.1776745766401291, "learning_rate": 5.380206270303835e-05, "loss": -0.1767, "step": 30120 }, { "epoch": 17.537834691501747, "grad_norm": 0.13387322425842285, "learning_rate": 5.377458408559897e-05, "loss": -0.1762, "step": 30130 }, { "epoch": 17.543655413271246, "grad_norm": 0.20014606416225433, "learning_rate": 5.374710432154061e-05, "loss": -0.1771, "step": 30140 }, { "epoch": 17.549476135040745, "grad_norm": 0.14802058041095734, "learning_rate": 5.3719623419210886e-05, "loss": -0.1777, "step": 30150 }, { "epoch": 17.555296856810244, "grad_norm": 0.22174249589443207, "learning_rate": 5.3692141386957786e-05, "loss": -0.1786, "step": 30160 }, { "epoch": 17.561117578579744, "grad_norm": 0.1655428409576416, "learning_rate": 5.3664658233129616e-05, "loss": -0.1746, "step": 30170 }, { "epoch": 17.566938300349243, "grad_norm": 0.13416555523872375, "learning_rate": 5.363717396607504e-05, "loss": -0.1778, "step": 30180 }, { "epoch": 17.572759022118742, "grad_norm": 0.2305540144443512, "learning_rate": 5.360968859414305e-05, "loss": -0.176, "step": 30190 }, { "epoch": 17.57857974388824, "grad_norm": 0.16939009726047516, "learning_rate": 5.358220212568295e-05, "loss": -0.1776, "step": 30200 }, { "epoch": 17.58440046565774, "grad_norm": 0.25012680888175964, "learning_rate": 5.355471456904444e-05, "loss": -0.1766, "step": 30210 }, { "epoch": 17.59022118742724, "grad_norm": 0.3050757050514221, "learning_rate": 5.3527225932577495e-05, "loss": -0.1688, "step": 30220 }, { "epoch": 17.59604190919674, "grad_norm": 0.18040123581886292, "learning_rate": 5.349973622463246e-05, "loss": -0.1732, "step": 30230 }, { "epoch": 17.601862630966238, "grad_norm": 0.22528643906116486, "learning_rate": 5.3472245453559956e-05, "loss": -0.1778, "step": 30240 }, { "epoch": 17.60768335273574, "grad_norm": 0.22252948582172394, "learning_rate": 5.3444753627710955e-05, "loss": -0.1747, "step": 30250 }, { "epoch": 17.61350407450524, "grad_norm": 0.23753203451633453, "learning_rate": 5.341726075543676e-05, "loss": -0.1765, "step": 30260 }, { "epoch": 17.61932479627474, "grad_norm": 0.20993858575820923, "learning_rate": 5.338976684508898e-05, "loss": -0.1779, "step": 30270 }, { "epoch": 17.62514551804424, "grad_norm": 0.1308596283197403, "learning_rate": 5.336227190501953e-05, "loss": -0.1791, "step": 30280 }, { "epoch": 17.630966239813738, "grad_norm": 0.14443475008010864, "learning_rate": 5.3334775943580664e-05, "loss": -0.1745, "step": 30290 }, { "epoch": 17.636786961583237, "grad_norm": 0.1548006236553192, "learning_rate": 5.330727896912491e-05, "loss": -0.1786, "step": 30300 }, { "epoch": 17.642607683352736, "grad_norm": 0.17254716157913208, "learning_rate": 5.327978099000511e-05, "loss": -0.1777, "step": 30310 }, { "epoch": 17.648428405122235, "grad_norm": 0.1917858123779297, "learning_rate": 5.3252282014574465e-05, "loss": -0.177, "step": 30320 }, { "epoch": 17.654249126891735, "grad_norm": 0.15362557768821716, "learning_rate": 5.322478205118641e-05, "loss": -0.1801, "step": 30330 }, { "epoch": 17.660069848661234, "grad_norm": 0.14653658866882324, "learning_rate": 5.3197281108194704e-05, "loss": -0.1746, "step": 30340 }, { "epoch": 17.665890570430733, "grad_norm": 0.19157783687114716, "learning_rate": 5.316977919395342e-05, "loss": -0.1797, "step": 30350 }, { "epoch": 17.671711292200232, "grad_norm": 0.18170547485351562, "learning_rate": 5.314227631681691e-05, "loss": -0.1769, "step": 30360 }, { "epoch": 17.67753201396973, "grad_norm": 0.255908340215683, "learning_rate": 5.311477248513982e-05, "loss": -0.1768, "step": 30370 }, { "epoch": 17.68335273573923, "grad_norm": 0.1953708529472351, "learning_rate": 5.30872677072771e-05, "loss": -0.1751, "step": 30380 }, { "epoch": 17.68917345750873, "grad_norm": 0.1609024852514267, "learning_rate": 5.3059761991583954e-05, "loss": -0.175, "step": 30390 }, { "epoch": 17.69499417927823, "grad_norm": 0.08917511999607086, "learning_rate": 5.303225534641592e-05, "loss": -0.1762, "step": 30400 }, { "epoch": 17.70081490104773, "grad_norm": 0.08905752748250961, "learning_rate": 5.300474778012875e-05, "loss": -0.1779, "step": 30410 }, { "epoch": 17.70663562281723, "grad_norm": 0.1360703855752945, "learning_rate": 5.297723930107855e-05, "loss": -0.1784, "step": 30420 }, { "epoch": 17.71245634458673, "grad_norm": 0.13371849060058594, "learning_rate": 5.294972991762167e-05, "loss": -0.1758, "step": 30430 }, { "epoch": 17.71827706635623, "grad_norm": 0.21259626746177673, "learning_rate": 5.292221963811472e-05, "loss": -0.1771, "step": 30440 }, { "epoch": 17.72409778812573, "grad_norm": 0.11781587451696396, "learning_rate": 5.28947084709146e-05, "loss": -0.1777, "step": 30450 }, { "epoch": 17.729918509895228, "grad_norm": 0.1771814525127411, "learning_rate": 5.2867196424378465e-05, "loss": -0.1779, "step": 30460 }, { "epoch": 17.735739231664727, "grad_norm": 0.20502567291259766, "learning_rate": 5.2839683506863765e-05, "loss": -0.1735, "step": 30470 }, { "epoch": 17.741559953434226, "grad_norm": 0.21156860888004303, "learning_rate": 5.281216972672821e-05, "loss": -0.1767, "step": 30480 }, { "epoch": 17.747380675203726, "grad_norm": 0.1469791978597641, "learning_rate": 5.278465509232973e-05, "loss": -0.1721, "step": 30490 }, { "epoch": 17.753201396973225, "grad_norm": 0.22584335505962372, "learning_rate": 5.275713961202655e-05, "loss": -0.1769, "step": 30500 }, { "epoch": 17.759022118742724, "grad_norm": 0.2720491886138916, "learning_rate": 5.2729623294177165e-05, "loss": -0.1789, "step": 30510 }, { "epoch": 17.764842840512223, "grad_norm": 0.19602760672569275, "learning_rate": 5.270210614714028e-05, "loss": -0.1763, "step": 30520 }, { "epoch": 17.770663562281722, "grad_norm": 0.22960634529590607, "learning_rate": 5.267458817927491e-05, "loss": -0.1741, "step": 30530 }, { "epoch": 17.77648428405122, "grad_norm": 0.16788123548030853, "learning_rate": 5.264706939894026e-05, "loss": -0.1777, "step": 30540 }, { "epoch": 17.78230500582072, "grad_norm": 0.1062951534986496, "learning_rate": 5.261954981449584e-05, "loss": -0.1772, "step": 30550 }, { "epoch": 17.78812572759022, "grad_norm": 0.12787948548793793, "learning_rate": 5.2592029434301324e-05, "loss": -0.1777, "step": 30560 }, { "epoch": 17.79394644935972, "grad_norm": 0.20872044563293457, "learning_rate": 5.256450826671672e-05, "loss": -0.1717, "step": 30570 }, { "epoch": 17.79976717112922, "grad_norm": 0.1808810979127884, "learning_rate": 5.253698632010221e-05, "loss": -0.1766, "step": 30580 }, { "epoch": 17.80558789289872, "grad_norm": 0.1285277009010315, "learning_rate": 5.2509463602818246e-05, "loss": -0.1755, "step": 30590 }, { "epoch": 17.81140861466822, "grad_norm": 0.13389018177986145, "learning_rate": 5.248194012322549e-05, "loss": -0.1774, "step": 30600 }, { "epoch": 17.81722933643772, "grad_norm": 0.13190612196922302, "learning_rate": 5.245441588968486e-05, "loss": -0.1756, "step": 30610 }, { "epoch": 17.82305005820722, "grad_norm": 0.15947188436985016, "learning_rate": 5.242689091055748e-05, "loss": -0.1736, "step": 30620 }, { "epoch": 17.828870779976718, "grad_norm": 0.16507238149642944, "learning_rate": 5.239936519420473e-05, "loss": -0.1729, "step": 30630 }, { "epoch": 17.834691501746217, "grad_norm": 0.22593151032924652, "learning_rate": 5.2371838748988175e-05, "loss": -0.1731, "step": 30640 }, { "epoch": 17.840512223515717, "grad_norm": 0.23526281118392944, "learning_rate": 5.234431158326965e-05, "loss": -0.1725, "step": 30650 }, { "epoch": 17.846332945285216, "grad_norm": 0.2852305471897125, "learning_rate": 5.231678370541115e-05, "loss": -0.1747, "step": 30660 }, { "epoch": 17.852153667054715, "grad_norm": 0.1834164261817932, "learning_rate": 5.228925512377495e-05, "loss": -0.1756, "step": 30670 }, { "epoch": 17.857974388824214, "grad_norm": 0.23370268940925598, "learning_rate": 5.2261725846723465e-05, "loss": -0.1755, "step": 30680 }, { "epoch": 17.863795110593713, "grad_norm": 0.14316673576831818, "learning_rate": 5.22341958826194e-05, "loss": -0.1765, "step": 30690 }, { "epoch": 17.869615832363213, "grad_norm": 0.13202723860740662, "learning_rate": 5.22066652398256e-05, "loss": -0.1764, "step": 30700 }, { "epoch": 17.875436554132712, "grad_norm": 0.27100929617881775, "learning_rate": 5.2179133926705185e-05, "loss": -0.1767, "step": 30710 }, { "epoch": 17.88125727590221, "grad_norm": 0.11600127816200256, "learning_rate": 5.215160195162141e-05, "loss": -0.1775, "step": 30720 }, { "epoch": 17.88707799767171, "grad_norm": 0.13702407479286194, "learning_rate": 5.212406932293776e-05, "loss": -0.1792, "step": 30730 }, { "epoch": 17.89289871944121, "grad_norm": 0.2408655285835266, "learning_rate": 5.209653604901795e-05, "loss": -0.1769, "step": 30740 }, { "epoch": 17.89871944121071, "grad_norm": 0.1494527906179428, "learning_rate": 5.206900213822584e-05, "loss": -0.1777, "step": 30750 }, { "epoch": 17.904540162980208, "grad_norm": 0.12217523157596588, "learning_rate": 5.204146759892551e-05, "loss": -0.1765, "step": 30760 }, { "epoch": 17.91036088474971, "grad_norm": 0.21163587272167206, "learning_rate": 5.2013932439481216e-05, "loss": -0.1775, "step": 30770 }, { "epoch": 17.91618160651921, "grad_norm": 0.2306758463382721, "learning_rate": 5.198639666825743e-05, "loss": -0.1692, "step": 30780 }, { "epoch": 17.92200232828871, "grad_norm": 0.154427632689476, "learning_rate": 5.195886029361877e-05, "loss": -0.1758, "step": 30790 }, { "epoch": 17.92782305005821, "grad_norm": 0.20586244761943817, "learning_rate": 5.193132332393009e-05, "loss": -0.1708, "step": 30800 }, { "epoch": 17.933643771827708, "grad_norm": 0.13554218411445618, "learning_rate": 5.1903785767556376e-05, "loss": -0.1769, "step": 30810 }, { "epoch": 17.939464493597207, "grad_norm": 0.2011566460132599, "learning_rate": 5.187624763286282e-05, "loss": -0.1784, "step": 30820 }, { "epoch": 17.945285215366706, "grad_norm": 0.1944594383239746, "learning_rate": 5.184870892821475e-05, "loss": -0.1716, "step": 30830 }, { "epoch": 17.951105937136205, "grad_norm": 0.16575530171394348, "learning_rate": 5.182116966197773e-05, "loss": -0.178, "step": 30840 }, { "epoch": 17.956926658905704, "grad_norm": 0.20080211758613586, "learning_rate": 5.1793629842517466e-05, "loss": -0.1779, "step": 30850 }, { "epoch": 17.962747380675204, "grad_norm": 0.15203967690467834, "learning_rate": 5.17660894781998e-05, "loss": -0.1773, "step": 30860 }, { "epoch": 17.968568102444703, "grad_norm": 0.17016184329986572, "learning_rate": 5.173854857739079e-05, "loss": -0.1758, "step": 30870 }, { "epoch": 17.974388824214202, "grad_norm": 0.1608765870332718, "learning_rate": 5.171100714845661e-05, "loss": -0.1785, "step": 30880 }, { "epoch": 17.9802095459837, "grad_norm": 0.13107353448867798, "learning_rate": 5.1683465199763646e-05, "loss": -0.1717, "step": 30890 }, { "epoch": 17.9860302677532, "grad_norm": 0.21530024707317352, "learning_rate": 5.16559227396784e-05, "loss": -0.1799, "step": 30900 }, { "epoch": 17.9918509895227, "grad_norm": 0.16488490998744965, "learning_rate": 5.1628379776567556e-05, "loss": -0.1764, "step": 30910 }, { "epoch": 17.9976717112922, "grad_norm": 0.14782999455928802, "learning_rate": 5.160083631879792e-05, "loss": -0.1765, "step": 30920 }, { "epoch": 18.003492433061698, "grad_norm": 0.1566154509782791, "learning_rate": 5.1573292374736484e-05, "loss": -0.1771, "step": 30930 }, { "epoch": 18.009313154831197, "grad_norm": 0.19625268876552582, "learning_rate": 5.1545747952750356e-05, "loss": -0.177, "step": 30940 }, { "epoch": 18.0151338766007, "grad_norm": 0.1403869241476059, "learning_rate": 5.151820306120682e-05, "loss": -0.1772, "step": 30950 }, { "epoch": 18.0209545983702, "grad_norm": 0.16386470198631287, "learning_rate": 5.149065770847328e-05, "loss": -0.1791, "step": 30960 }, { "epoch": 18.0267753201397, "grad_norm": 0.22572031617164612, "learning_rate": 5.1463111902917297e-05, "loss": -0.1747, "step": 30970 }, { "epoch": 18.032596041909198, "grad_norm": 0.2510806918144226, "learning_rate": 5.143556565290654e-05, "loss": -0.1732, "step": 30980 }, { "epoch": 18.038416763678697, "grad_norm": 0.21028514206409454, "learning_rate": 5.140801896680882e-05, "loss": -0.1772, "step": 30990 }, { "epoch": 18.044237485448196, "grad_norm": 0.193561851978302, "learning_rate": 5.1380471852992144e-05, "loss": -0.1774, "step": 31000 }, { "epoch": 18.050058207217695, "grad_norm": 0.08337424695491791, "learning_rate": 5.135292431982457e-05, "loss": -0.1766, "step": 31010 }, { "epoch": 18.055878928987195, "grad_norm": 0.15442420542240143, "learning_rate": 5.1325376375674294e-05, "loss": -0.1784, "step": 31020 }, { "epoch": 18.061699650756694, "grad_norm": 0.14207963645458221, "learning_rate": 5.129782802890968e-05, "loss": -0.1771, "step": 31030 }, { "epoch": 18.067520372526193, "grad_norm": 0.14060631394386292, "learning_rate": 5.127027928789916e-05, "loss": -0.1776, "step": 31040 }, { "epoch": 18.073341094295692, "grad_norm": 0.15153786540031433, "learning_rate": 5.124273016101135e-05, "loss": -0.1656, "step": 31050 }, { "epoch": 18.07916181606519, "grad_norm": 0.18578369915485382, "learning_rate": 5.121518065661492e-05, "loss": -0.1762, "step": 31060 }, { "epoch": 18.08498253783469, "grad_norm": 0.13740985095500946, "learning_rate": 5.11876307830787e-05, "loss": -0.1761, "step": 31070 }, { "epoch": 18.09080325960419, "grad_norm": 0.2014651745557785, "learning_rate": 5.1160080548771596e-05, "loss": -0.1732, "step": 31080 }, { "epoch": 18.09662398137369, "grad_norm": 0.1486228108406067, "learning_rate": 5.1132529962062656e-05, "loss": -0.177, "step": 31090 }, { "epoch": 18.10244470314319, "grad_norm": 0.1828269213438034, "learning_rate": 5.110497903132101e-05, "loss": -0.1768, "step": 31100 }, { "epoch": 18.108265424912688, "grad_norm": 0.16743171215057373, "learning_rate": 5.107742776491592e-05, "loss": -0.1786, "step": 31110 }, { "epoch": 18.11408614668219, "grad_norm": 0.15701469779014587, "learning_rate": 5.104987617121673e-05, "loss": -0.1771, "step": 31120 }, { "epoch": 18.11990686845169, "grad_norm": 0.13618692755699158, "learning_rate": 5.102232425859287e-05, "loss": -0.1779, "step": 31130 }, { "epoch": 18.12572759022119, "grad_norm": 0.14876577258110046, "learning_rate": 5.09947720354139e-05, "loss": -0.1779, "step": 31140 }, { "epoch": 18.131548311990688, "grad_norm": 0.1458103209733963, "learning_rate": 5.096721951004942e-05, "loss": -0.1791, "step": 31150 }, { "epoch": 18.137369033760187, "grad_norm": 0.16525766253471375, "learning_rate": 5.0939666690869227e-05, "loss": -0.1774, "step": 31160 }, { "epoch": 18.143189755529686, "grad_norm": 0.20875979959964752, "learning_rate": 5.0912113586243096e-05, "loss": -0.1767, "step": 31170 }, { "epoch": 18.149010477299186, "grad_norm": 0.18514259159564972, "learning_rate": 5.0884560204540935e-05, "loss": -0.1755, "step": 31180 }, { "epoch": 18.154831199068685, "grad_norm": 0.1757805496454239, "learning_rate": 5.0857006554132736e-05, "loss": -0.1767, "step": 31190 }, { "epoch": 18.160651920838184, "grad_norm": 0.08435310423374176, "learning_rate": 5.0829452643388575e-05, "loss": -0.1806, "step": 31200 }, { "epoch": 18.166472642607683, "grad_norm": 0.11832931637763977, "learning_rate": 5.08018984806786e-05, "loss": -0.1803, "step": 31210 }, { "epoch": 18.172293364377182, "grad_norm": 0.1541094332933426, "learning_rate": 5.0774344074373036e-05, "loss": -0.1755, "step": 31220 }, { "epoch": 18.17811408614668, "grad_norm": 0.25724920630455017, "learning_rate": 5.07467894328422e-05, "loss": -0.1766, "step": 31230 }, { "epoch": 18.18393480791618, "grad_norm": 0.2269328087568283, "learning_rate": 5.0719234564456454e-05, "loss": -0.1756, "step": 31240 }, { "epoch": 18.18975552968568, "grad_norm": 0.25365930795669556, "learning_rate": 5.0691679477586216e-05, "loss": -0.1798, "step": 31250 }, { "epoch": 18.19557625145518, "grad_norm": 0.24546052515506744, "learning_rate": 5.0664124180602035e-05, "loss": -0.1759, "step": 31260 }, { "epoch": 18.20139697322468, "grad_norm": 0.15408353507518768, "learning_rate": 5.063656868187447e-05, "loss": -0.1783, "step": 31270 }, { "epoch": 18.207217694994178, "grad_norm": 0.18073996901512146, "learning_rate": 5.060901298977413e-05, "loss": -0.1763, "step": 31280 }, { "epoch": 18.213038416763677, "grad_norm": 0.3504268229007721, "learning_rate": 5.0581457112671725e-05, "loss": -0.1683, "step": 31290 }, { "epoch": 18.21885913853318, "grad_norm": 0.1570201963186264, "learning_rate": 5.0553901058938016e-05, "loss": -0.1782, "step": 31300 }, { "epoch": 18.22467986030268, "grad_norm": 0.18254302442073822, "learning_rate": 5.052634483694377e-05, "loss": -0.1789, "step": 31310 }, { "epoch": 18.230500582072178, "grad_norm": 0.16155481338500977, "learning_rate": 5.049878845505988e-05, "loss": -0.1736, "step": 31320 }, { "epoch": 18.236321303841677, "grad_norm": 0.09943871200084686, "learning_rate": 5.047123192165721e-05, "loss": -0.1782, "step": 31330 }, { "epoch": 18.242142025611177, "grad_norm": 0.15937085449695587, "learning_rate": 5.0443675245106735e-05, "loss": -0.1781, "step": 31340 }, { "epoch": 18.247962747380676, "grad_norm": 0.2071017324924469, "learning_rate": 5.0416118433779426e-05, "loss": -0.1789, "step": 31350 }, { "epoch": 18.253783469150175, "grad_norm": 0.11679933965206146, "learning_rate": 5.038856149604633e-05, "loss": -0.1777, "step": 31360 }, { "epoch": 18.259604190919674, "grad_norm": 0.11332769691944122, "learning_rate": 5.03610044402785e-05, "loss": -0.178, "step": 31370 }, { "epoch": 18.265424912689173, "grad_norm": 0.25866463780403137, "learning_rate": 5.033344727484707e-05, "loss": -0.1742, "step": 31380 }, { "epoch": 18.271245634458673, "grad_norm": 0.161311075091362, "learning_rate": 5.030589000812315e-05, "loss": -0.1774, "step": 31390 }, { "epoch": 18.277066356228172, "grad_norm": 0.14692817628383636, "learning_rate": 5.027833264847793e-05, "loss": -0.1783, "step": 31400 }, { "epoch": 18.28288707799767, "grad_norm": 0.22265233099460602, "learning_rate": 5.025077520428258e-05, "loss": -0.1734, "step": 31410 }, { "epoch": 18.28870779976717, "grad_norm": 0.23267699778079987, "learning_rate": 5.022321768390837e-05, "loss": -0.1792, "step": 31420 }, { "epoch": 18.29452852153667, "grad_norm": 0.2604605257511139, "learning_rate": 5.0195660095726516e-05, "loss": -0.1751, "step": 31430 }, { "epoch": 18.30034924330617, "grad_norm": 0.14862118661403656, "learning_rate": 5.016810244810829e-05, "loss": -0.1785, "step": 31440 }, { "epoch": 18.306169965075668, "grad_norm": 0.3060447871685028, "learning_rate": 5.0140544749424976e-05, "loss": -0.1768, "step": 31450 }, { "epoch": 18.311990686845167, "grad_norm": 0.2108098864555359, "learning_rate": 5.0112987008047874e-05, "loss": -0.1783, "step": 31460 }, { "epoch": 18.31781140861467, "grad_norm": 0.23004022240638733, "learning_rate": 5.008542923234831e-05, "loss": -0.1759, "step": 31470 }, { "epoch": 18.32363213038417, "grad_norm": 0.2056947648525238, "learning_rate": 5.00578714306976e-05, "loss": -0.176, "step": 31480 }, { "epoch": 18.32945285215367, "grad_norm": 0.18372972309589386, "learning_rate": 5.0030313611467084e-05, "loss": -0.1772, "step": 31490 }, { "epoch": 18.335273573923168, "grad_norm": 0.1860833764076233, "learning_rate": 5.0002755783028074e-05, "loss": -0.1775, "step": 31500 }, { "epoch": 18.341094295692667, "grad_norm": 0.25491538643836975, "learning_rate": 4.997519795375194e-05, "loss": -0.176, "step": 31510 }, { "epoch": 18.346915017462166, "grad_norm": 0.16566260159015656, "learning_rate": 4.9947640132010016e-05, "loss": -0.176, "step": 31520 }, { "epoch": 18.352735739231665, "grad_norm": 0.20243297517299652, "learning_rate": 4.9920082326173625e-05, "loss": -0.1768, "step": 31530 }, { "epoch": 18.358556461001164, "grad_norm": 0.3262699842453003, "learning_rate": 4.9892524544614114e-05, "loss": -0.1749, "step": 31540 }, { "epoch": 18.364377182770664, "grad_norm": 0.18126897513866425, "learning_rate": 4.986496679570283e-05, "loss": -0.1777, "step": 31550 }, { "epoch": 18.370197904540163, "grad_norm": 0.14755091071128845, "learning_rate": 4.983740908781105e-05, "loss": -0.1755, "step": 31560 }, { "epoch": 18.376018626309662, "grad_norm": 0.14569061994552612, "learning_rate": 4.9809851429310116e-05, "loss": -0.1798, "step": 31570 }, { "epoch": 18.38183934807916, "grad_norm": 0.12679053843021393, "learning_rate": 4.9782293828571275e-05, "loss": -0.1737, "step": 31580 }, { "epoch": 18.38766006984866, "grad_norm": 0.1766003966331482, "learning_rate": 4.9754736293965846e-05, "loss": -0.1779, "step": 31590 }, { "epoch": 18.39348079161816, "grad_norm": 0.1411529779434204, "learning_rate": 4.972717883386502e-05, "loss": -0.1738, "step": 31600 }, { "epoch": 18.39930151338766, "grad_norm": 0.19642820954322815, "learning_rate": 4.9699621456640075e-05, "loss": -0.1767, "step": 31610 }, { "epoch": 18.405122235157158, "grad_norm": 0.2390287071466446, "learning_rate": 4.9672064170662214e-05, "loss": -0.1776, "step": 31620 }, { "epoch": 18.410942956926657, "grad_norm": 0.22992601990699768, "learning_rate": 4.9644506984302583e-05, "loss": -0.1719, "step": 31630 }, { "epoch": 18.416763678696157, "grad_norm": 0.22839541733264923, "learning_rate": 4.9616949905932356e-05, "loss": -0.1768, "step": 31640 }, { "epoch": 18.42258440046566, "grad_norm": 0.1682366281747818, "learning_rate": 4.9589392943922615e-05, "loss": -0.1768, "step": 31650 }, { "epoch": 18.42840512223516, "grad_norm": 0.16231968998908997, "learning_rate": 4.956183610664447e-05, "loss": -0.1761, "step": 31660 }, { "epoch": 18.434225844004658, "grad_norm": 0.14778104424476624, "learning_rate": 4.9534279402468945e-05, "loss": -0.1793, "step": 31670 }, { "epoch": 18.440046565774157, "grad_norm": 0.15217769145965576, "learning_rate": 4.9506722839767036e-05, "loss": -0.1767, "step": 31680 }, { "epoch": 18.445867287543656, "grad_norm": 0.13544991612434387, "learning_rate": 4.947916642690972e-05, "loss": -0.1693, "step": 31690 }, { "epoch": 18.451688009313155, "grad_norm": 0.14668801426887512, "learning_rate": 4.9451610172267874e-05, "loss": -0.1804, "step": 31700 }, { "epoch": 18.457508731082655, "grad_norm": 0.19101913273334503, "learning_rate": 4.9424054084212376e-05, "loss": -0.1782, "step": 31710 }, { "epoch": 18.463329452852154, "grad_norm": 0.08271446824073792, "learning_rate": 4.939649817111407e-05, "loss": -0.1802, "step": 31720 }, { "epoch": 18.469150174621653, "grad_norm": 0.1515059471130371, "learning_rate": 4.936894244134365e-05, "loss": -0.1776, "step": 31730 }, { "epoch": 18.474970896391152, "grad_norm": 0.15658268332481384, "learning_rate": 4.9341386903271886e-05, "loss": -0.1789, "step": 31740 }, { "epoch": 18.48079161816065, "grad_norm": 0.19302190840244293, "learning_rate": 4.931383156526936e-05, "loss": -0.1722, "step": 31750 }, { "epoch": 18.48661233993015, "grad_norm": 0.1778925210237503, "learning_rate": 4.92862764357067e-05, "loss": -0.1761, "step": 31760 }, { "epoch": 18.49243306169965, "grad_norm": 0.14960815012454987, "learning_rate": 4.925872152295443e-05, "loss": -0.1804, "step": 31770 }, { "epoch": 18.49825378346915, "grad_norm": 0.1806621551513672, "learning_rate": 4.923116683538296e-05, "loss": -0.1763, "step": 31780 }, { "epoch": 18.50407450523865, "grad_norm": 0.17769791185855865, "learning_rate": 4.920361238136273e-05, "loss": -0.1761, "step": 31790 }, { "epoch": 18.509895227008148, "grad_norm": 0.14552544057369232, "learning_rate": 4.9176058169264014e-05, "loss": -0.1752, "step": 31800 }, { "epoch": 18.515715948777647, "grad_norm": 0.10810855776071548, "learning_rate": 4.9148504207457074e-05, "loss": -0.1739, "step": 31810 }, { "epoch": 18.52153667054715, "grad_norm": 0.1585332602262497, "learning_rate": 4.912095050431208e-05, "loss": -0.1737, "step": 31820 }, { "epoch": 18.52735739231665, "grad_norm": 0.09326449781656265, "learning_rate": 4.909339706819911e-05, "loss": -0.1754, "step": 31830 }, { "epoch": 18.533178114086148, "grad_norm": 0.18293575942516327, "learning_rate": 4.906584390748819e-05, "loss": -0.1771, "step": 31840 }, { "epoch": 18.538998835855647, "grad_norm": 0.2436792254447937, "learning_rate": 4.9038291030549195e-05, "loss": -0.1642, "step": 31850 }, { "epoch": 18.544819557625146, "grad_norm": 0.1590101420879364, "learning_rate": 4.9010738445751995e-05, "loss": -0.1766, "step": 31860 }, { "epoch": 18.550640279394646, "grad_norm": 0.14319923520088196, "learning_rate": 4.8983186161466364e-05, "loss": -0.1801, "step": 31870 }, { "epoch": 18.556461001164145, "grad_norm": 0.21154868602752686, "learning_rate": 4.89556341860619e-05, "loss": -0.174, "step": 31880 }, { "epoch": 18.562281722933644, "grad_norm": 0.19041644036769867, "learning_rate": 4.892808252790822e-05, "loss": -0.176, "step": 31890 }, { "epoch": 18.568102444703143, "grad_norm": 0.22722885012626648, "learning_rate": 4.890053119537475e-05, "loss": -0.1774, "step": 31900 }, { "epoch": 18.573923166472643, "grad_norm": 0.14763177931308746, "learning_rate": 4.887298019683087e-05, "loss": -0.1749, "step": 31910 }, { "epoch": 18.57974388824214, "grad_norm": 0.19836397469043732, "learning_rate": 4.884542954064587e-05, "loss": -0.1757, "step": 31920 }, { "epoch": 18.58556461001164, "grad_norm": 0.14624109864234924, "learning_rate": 4.881787923518887e-05, "loss": -0.1753, "step": 31930 }, { "epoch": 18.59138533178114, "grad_norm": 0.1225014328956604, "learning_rate": 4.879032928882896e-05, "loss": -0.179, "step": 31940 }, { "epoch": 18.59720605355064, "grad_norm": 0.1810390204191208, "learning_rate": 4.876277970993505e-05, "loss": -0.179, "step": 31950 }, { "epoch": 18.60302677532014, "grad_norm": 0.15547478199005127, "learning_rate": 4.873523050687602e-05, "loss": -0.1737, "step": 31960 }, { "epoch": 18.608847497089638, "grad_norm": 0.11249440163373947, "learning_rate": 4.870768168802056e-05, "loss": -0.1789, "step": 31970 }, { "epoch": 18.614668218859137, "grad_norm": 0.15336060523986816, "learning_rate": 4.868013326173728e-05, "loss": -0.1772, "step": 31980 }, { "epoch": 18.620488940628636, "grad_norm": 0.1338673233985901, "learning_rate": 4.865258523639468e-05, "loss": -0.178, "step": 31990 }, { "epoch": 18.62630966239814, "grad_norm": 0.16613301634788513, "learning_rate": 4.862503762036109e-05, "loss": -0.1785, "step": 32000 }, { "epoch": 18.63213038416764, "grad_norm": 0.15750622749328613, "learning_rate": 4.859749042200478e-05, "loss": -0.1774, "step": 32010 }, { "epoch": 18.637951105937137, "grad_norm": 0.13440357148647308, "learning_rate": 4.856994364969384e-05, "loss": -0.1777, "step": 32020 }, { "epoch": 18.643771827706637, "grad_norm": 0.12854447960853577, "learning_rate": 4.854239731179625e-05, "loss": -0.1804, "step": 32030 }, { "epoch": 18.649592549476136, "grad_norm": 0.16620388627052307, "learning_rate": 4.85148514166799e-05, "loss": -0.1784, "step": 32040 }, { "epoch": 18.655413271245635, "grad_norm": 0.10998906195163727, "learning_rate": 4.8487305972712456e-05, "loss": -0.1778, "step": 32050 }, { "epoch": 18.661233993015134, "grad_norm": 0.11163543909788132, "learning_rate": 4.8459760988261526e-05, "loss": -0.1807, "step": 32060 }, { "epoch": 18.667054714784634, "grad_norm": 0.17368999123573303, "learning_rate": 4.843221647169453e-05, "loss": -0.1773, "step": 32070 }, { "epoch": 18.672875436554133, "grad_norm": 0.19974598288536072, "learning_rate": 4.840467243137878e-05, "loss": -0.1765, "step": 32080 }, { "epoch": 18.678696158323632, "grad_norm": 0.19076701998710632, "learning_rate": 4.837712887568143e-05, "loss": -0.176, "step": 32090 }, { "epoch": 18.68451688009313, "grad_norm": 0.15582695603370667, "learning_rate": 4.8349585812969464e-05, "loss": -0.1775, "step": 32100 }, { "epoch": 18.69033760186263, "grad_norm": 0.1499674916267395, "learning_rate": 4.8322043251609775e-05, "loss": -0.1783, "step": 32110 }, { "epoch": 18.69615832363213, "grad_norm": 0.20454397797584534, "learning_rate": 4.8294501199969015e-05, "loss": -0.1778, "step": 32120 }, { "epoch": 18.70197904540163, "grad_norm": 0.2111423909664154, "learning_rate": 4.826695966641376e-05, "loss": -0.1786, "step": 32130 }, { "epoch": 18.707799767171128, "grad_norm": 0.22476395964622498, "learning_rate": 4.823941865931043e-05, "loss": -0.1747, "step": 32140 }, { "epoch": 18.713620488940627, "grad_norm": 0.16507455706596375, "learning_rate": 4.82118781870252e-05, "loss": -0.1739, "step": 32150 }, { "epoch": 18.719441210710126, "grad_norm": 0.3267502784729004, "learning_rate": 4.8184338257924185e-05, "loss": -0.1751, "step": 32160 }, { "epoch": 18.725261932479626, "grad_norm": 0.1843174546957016, "learning_rate": 4.815679888037324e-05, "loss": -0.1762, "step": 32170 }, { "epoch": 18.73108265424913, "grad_norm": 0.2157042920589447, "learning_rate": 4.8129260062738135e-05, "loss": -0.1793, "step": 32180 }, { "epoch": 18.736903376018628, "grad_norm": 0.2915855348110199, "learning_rate": 4.810172181338445e-05, "loss": -0.1741, "step": 32190 }, { "epoch": 18.742724097788127, "grad_norm": 0.17437274754047394, "learning_rate": 4.807418414067753e-05, "loss": -0.1794, "step": 32200 }, { "epoch": 18.748544819557626, "grad_norm": 0.11697546392679214, "learning_rate": 4.804664705298264e-05, "loss": -0.1757, "step": 32210 }, { "epoch": 18.754365541327125, "grad_norm": 0.1802394539117813, "learning_rate": 4.80191105586648e-05, "loss": -0.1788, "step": 32220 }, { "epoch": 18.760186263096625, "grad_norm": 0.21676857769489288, "learning_rate": 4.799157466608886e-05, "loss": -0.1781, "step": 32230 }, { "epoch": 18.766006984866124, "grad_norm": 0.07078775763511658, "learning_rate": 4.796403938361951e-05, "loss": -0.176, "step": 32240 }, { "epoch": 18.771827706635623, "grad_norm": 0.14871370792388916, "learning_rate": 4.793650471962123e-05, "loss": -0.1785, "step": 32250 }, { "epoch": 18.777648428405122, "grad_norm": 0.13668851554393768, "learning_rate": 4.790897068245835e-05, "loss": -0.179, "step": 32260 }, { "epoch": 18.78346915017462, "grad_norm": 0.15853184461593628, "learning_rate": 4.7881437280494954e-05, "loss": -0.1756, "step": 32270 }, { "epoch": 18.78928987194412, "grad_norm": 0.19043394923210144, "learning_rate": 4.7853904522094965e-05, "loss": -0.1767, "step": 32280 }, { "epoch": 18.79511059371362, "grad_norm": 0.17811943590641022, "learning_rate": 4.782637241562215e-05, "loss": -0.1755, "step": 32290 }, { "epoch": 18.80093131548312, "grad_norm": 0.2585314214229584, "learning_rate": 4.779884096943997e-05, "loss": -0.1785, "step": 32300 }, { "epoch": 18.80675203725262, "grad_norm": 0.143362894654274, "learning_rate": 4.777131019191182e-05, "loss": -0.1717, "step": 32310 }, { "epoch": 18.812572759022117, "grad_norm": 0.17025689780712128, "learning_rate": 4.774378009140076e-05, "loss": -0.1768, "step": 32320 }, { "epoch": 18.818393480791617, "grad_norm": 0.16907653212547302, "learning_rate": 4.7716250676269735e-05, "loss": -0.1775, "step": 32330 }, { "epoch": 18.824214202561116, "grad_norm": 0.17456863820552826, "learning_rate": 4.7688721954881485e-05, "loss": -0.1796, "step": 32340 }, { "epoch": 18.83003492433062, "grad_norm": 0.15954619646072388, "learning_rate": 4.7661193935598446e-05, "loss": -0.1731, "step": 32350 }, { "epoch": 18.835855646100118, "grad_norm": 0.12061836570501328, "learning_rate": 4.763366662678296e-05, "loss": -0.178, "step": 32360 }, { "epoch": 18.841676367869617, "grad_norm": 0.2637629210948944, "learning_rate": 4.7606140036797064e-05, "loss": -0.1744, "step": 32370 }, { "epoch": 18.847497089639116, "grad_norm": 0.12356079369783401, "learning_rate": 4.7578614174002614e-05, "loss": -0.1798, "step": 32380 }, { "epoch": 18.853317811408616, "grad_norm": 0.17625851929187775, "learning_rate": 4.755108904676125e-05, "loss": -0.1794, "step": 32390 }, { "epoch": 18.859138533178115, "grad_norm": 0.1437457948923111, "learning_rate": 4.752356466343436e-05, "loss": -0.1731, "step": 32400 }, { "epoch": 18.864959254947614, "grad_norm": 0.12575840950012207, "learning_rate": 4.7496041032383174e-05, "loss": -0.179, "step": 32410 }, { "epoch": 18.870779976717113, "grad_norm": 0.1429259032011032, "learning_rate": 4.746851816196858e-05, "loss": -0.1782, "step": 32420 }, { "epoch": 18.876600698486612, "grad_norm": 0.1391216218471527, "learning_rate": 4.744099606055135e-05, "loss": -0.1795, "step": 32430 }, { "epoch": 18.88242142025611, "grad_norm": 0.16622619330883026, "learning_rate": 4.741347473649193e-05, "loss": -0.1747, "step": 32440 }, { "epoch": 18.88824214202561, "grad_norm": 0.1728973090648651, "learning_rate": 4.738595419815058e-05, "loss": -0.1805, "step": 32450 }, { "epoch": 18.89406286379511, "grad_norm": 0.1477382332086563, "learning_rate": 4.7358434453887365e-05, "loss": -0.1744, "step": 32460 }, { "epoch": 18.89988358556461, "grad_norm": 0.14145353436470032, "learning_rate": 4.7330915512061976e-05, "loss": -0.1782, "step": 32470 }, { "epoch": 18.90570430733411, "grad_norm": 0.11336037516593933, "learning_rate": 4.730339738103402e-05, "loss": -0.1773, "step": 32480 }, { "epoch": 18.911525029103608, "grad_norm": 0.1351793110370636, "learning_rate": 4.727588006916271e-05, "loss": -0.175, "step": 32490 }, { "epoch": 18.917345750873107, "grad_norm": 0.17384086549282074, "learning_rate": 4.724836358480711e-05, "loss": -0.1793, "step": 32500 }, { "epoch": 18.923166472642606, "grad_norm": 0.20608530938625336, "learning_rate": 4.722084793632601e-05, "loss": -0.176, "step": 32510 }, { "epoch": 18.92898719441211, "grad_norm": 0.20876988768577576, "learning_rate": 4.719333313207792e-05, "loss": -0.1753, "step": 32520 }, { "epoch": 18.934807916181608, "grad_norm": 0.19034498929977417, "learning_rate": 4.716581918042114e-05, "loss": -0.176, "step": 32530 }, { "epoch": 18.940628637951107, "grad_norm": 0.11770520359277725, "learning_rate": 4.7138306089713636e-05, "loss": -0.1793, "step": 32540 }, { "epoch": 18.946449359720607, "grad_norm": 0.1904144585132599, "learning_rate": 4.7110793868313183e-05, "loss": -0.1798, "step": 32550 }, { "epoch": 18.952270081490106, "grad_norm": 0.21249863505363464, "learning_rate": 4.708328252457729e-05, "loss": -0.1767, "step": 32560 }, { "epoch": 18.958090803259605, "grad_norm": 0.13445596396923065, "learning_rate": 4.7055772066863135e-05, "loss": -0.1707, "step": 32570 }, { "epoch": 18.963911525029104, "grad_norm": 0.12428935617208481, "learning_rate": 4.702826250352771e-05, "loss": -0.1801, "step": 32580 }, { "epoch": 18.969732246798603, "grad_norm": 0.11496039479970932, "learning_rate": 4.7000753842927653e-05, "loss": -0.1763, "step": 32590 }, { "epoch": 18.975552968568103, "grad_norm": 0.14635615050792694, "learning_rate": 4.6973246093419384e-05, "loss": -0.1766, "step": 32600 }, { "epoch": 18.981373690337602, "grad_norm": 0.12984439730644226, "learning_rate": 4.694573926335906e-05, "loss": -0.1744, "step": 32610 }, { "epoch": 18.9871944121071, "grad_norm": 0.15449149906635284, "learning_rate": 4.6918233361102476e-05, "loss": -0.1781, "step": 32620 }, { "epoch": 18.9930151338766, "grad_norm": 0.1989513337612152, "learning_rate": 4.689072839500525e-05, "loss": -0.1761, "step": 32630 }, { "epoch": 18.9988358556461, "grad_norm": 0.22326058149337769, "learning_rate": 4.6863224373422635e-05, "loss": -0.1796, "step": 32640 }, { "epoch": 19.0046565774156, "grad_norm": 0.09698976576328278, "learning_rate": 4.683572130470962e-05, "loss": -0.1787, "step": 32650 }, { "epoch": 19.010477299185098, "grad_norm": 0.2793565094470978, "learning_rate": 4.680821919722094e-05, "loss": -0.1724, "step": 32660 }, { "epoch": 19.016298020954597, "grad_norm": 0.11840222775936127, "learning_rate": 4.6780718059310975e-05, "loss": -0.1759, "step": 32670 }, { "epoch": 19.022118742724096, "grad_norm": 0.182786762714386, "learning_rate": 4.675321789933389e-05, "loss": -0.1805, "step": 32680 }, { "epoch": 19.027939464493596, "grad_norm": 0.18805207312107086, "learning_rate": 4.6725718725643464e-05, "loss": -0.1801, "step": 32690 }, { "epoch": 19.0337601862631, "grad_norm": 0.2611725628376007, "learning_rate": 4.669822054659323e-05, "loss": -0.1747, "step": 32700 }, { "epoch": 19.039580908032598, "grad_norm": 0.1331735998392105, "learning_rate": 4.667072337053644e-05, "loss": -0.176, "step": 32710 }, { "epoch": 19.045401629802097, "grad_norm": 0.14089533686637878, "learning_rate": 4.6643227205825965e-05, "loss": -0.1761, "step": 32720 }, { "epoch": 19.051222351571596, "grad_norm": 0.2468424141407013, "learning_rate": 4.6615732060814454e-05, "loss": -0.1783, "step": 32730 }, { "epoch": 19.057043073341095, "grad_norm": 0.149046391248703, "learning_rate": 4.658823794385417e-05, "loss": -0.1786, "step": 32740 }, { "epoch": 19.062863795110594, "grad_norm": 0.16231052577495575, "learning_rate": 4.6560744863297115e-05, "loss": -0.1756, "step": 32750 }, { "epoch": 19.068684516880094, "grad_norm": 0.1867128610610962, "learning_rate": 4.653325282749498e-05, "loss": -0.1764, "step": 32760 }, { "epoch": 19.074505238649593, "grad_norm": 0.2596605718135834, "learning_rate": 4.6505761844799075e-05, "loss": -0.1775, "step": 32770 }, { "epoch": 19.080325960419092, "grad_norm": 0.2523977756500244, "learning_rate": 4.647827192356048e-05, "loss": -0.1766, "step": 32780 }, { "epoch": 19.08614668218859, "grad_norm": 0.18554805219173431, "learning_rate": 4.645078307212989e-05, "loss": -0.1754, "step": 32790 }, { "epoch": 19.09196740395809, "grad_norm": 0.15468816459178925, "learning_rate": 4.642329529885768e-05, "loss": -0.179, "step": 32800 }, { "epoch": 19.09778812572759, "grad_norm": 0.2519650459289551, "learning_rate": 4.639580861209393e-05, "loss": -0.1751, "step": 32810 }, { "epoch": 19.10360884749709, "grad_norm": 0.1531447172164917, "learning_rate": 4.636832302018835e-05, "loss": -0.1734, "step": 32820 }, { "epoch": 19.109429569266588, "grad_norm": 0.10013777017593384, "learning_rate": 4.6340838531490365e-05, "loss": -0.1801, "step": 32830 }, { "epoch": 19.115250291036087, "grad_norm": 0.19268956780433655, "learning_rate": 4.6313355154349e-05, "loss": -0.1782, "step": 32840 }, { "epoch": 19.121071012805587, "grad_norm": 0.14074911177158356, "learning_rate": 4.6285872897113025e-05, "loss": -0.1795, "step": 32850 }, { "epoch": 19.126891734575086, "grad_norm": 0.07754050195217133, "learning_rate": 4.625839176813077e-05, "loss": -0.1798, "step": 32860 }, { "epoch": 19.132712456344585, "grad_norm": 0.2034909427165985, "learning_rate": 4.623091177575031e-05, "loss": -0.1792, "step": 32870 }, { "epoch": 19.138533178114088, "grad_norm": 0.1545657217502594, "learning_rate": 4.620343292831936e-05, "loss": -0.1774, "step": 32880 }, { "epoch": 19.144353899883587, "grad_norm": 0.17085939645767212, "learning_rate": 4.6175955234185206e-05, "loss": -0.1776, "step": 32890 }, { "epoch": 19.150174621653086, "grad_norm": 0.20666131377220154, "learning_rate": 4.614847870169492e-05, "loss": -0.1797, "step": 32900 }, { "epoch": 19.155995343422585, "grad_norm": 0.16890758275985718, "learning_rate": 4.612100333919509e-05, "loss": -0.1788, "step": 32910 }, { "epoch": 19.161816065192085, "grad_norm": 0.20714755356311798, "learning_rate": 4.609352915503202e-05, "loss": -0.1767, "step": 32920 }, { "epoch": 19.167636786961584, "grad_norm": 0.1710047423839569, "learning_rate": 4.606605615755166e-05, "loss": -0.1793, "step": 32930 }, { "epoch": 19.173457508731083, "grad_norm": 0.11412503570318222, "learning_rate": 4.6038584355099576e-05, "loss": -0.1759, "step": 32940 }, { "epoch": 19.179278230500582, "grad_norm": 0.17295219004154205, "learning_rate": 4.6011113756020964e-05, "loss": -0.1776, "step": 32950 }, { "epoch": 19.18509895227008, "grad_norm": 0.15587648749351501, "learning_rate": 4.598364436866066e-05, "loss": -0.1775, "step": 32960 }, { "epoch": 19.19091967403958, "grad_norm": 0.21155256032943726, "learning_rate": 4.595617620136316e-05, "loss": -0.175, "step": 32970 }, { "epoch": 19.19674039580908, "grad_norm": 0.1686936765909195, "learning_rate": 4.592870926247257e-05, "loss": -0.1765, "step": 32980 }, { "epoch": 19.20256111757858, "grad_norm": 0.17635269463062286, "learning_rate": 4.5901243560332594e-05, "loss": -0.1777, "step": 32990 }, { "epoch": 19.20838183934808, "grad_norm": 0.17807775735855103, "learning_rate": 4.587377910328662e-05, "loss": -0.1779, "step": 33000 }, { "epoch": 19.214202561117578, "grad_norm": 0.16863256692886353, "learning_rate": 4.5846315899677586e-05, "loss": -0.1804, "step": 33010 }, { "epoch": 19.220023282887077, "grad_norm": 0.1715802550315857, "learning_rate": 4.5818853957848114e-05, "loss": -0.1777, "step": 33020 }, { "epoch": 19.225844004656576, "grad_norm": 0.19803540408611298, "learning_rate": 4.579139328614043e-05, "loss": -0.1793, "step": 33030 }, { "epoch": 19.231664726426075, "grad_norm": 0.126821830868721, "learning_rate": 4.576393389289633e-05, "loss": -0.1775, "step": 33040 }, { "epoch": 19.237485448195578, "grad_norm": 0.12155468016862869, "learning_rate": 4.573647578645728e-05, "loss": -0.1766, "step": 33050 }, { "epoch": 19.243306169965077, "grad_norm": 0.1484432816505432, "learning_rate": 4.57090189751643e-05, "loss": -0.1821, "step": 33060 }, { "epoch": 19.249126891734576, "grad_norm": 0.14719940721988678, "learning_rate": 4.568156346735806e-05, "loss": -0.1778, "step": 33070 }, { "epoch": 19.254947613504076, "grad_norm": 0.09288860857486725, "learning_rate": 4.565410927137882e-05, "loss": -0.1806, "step": 33080 }, { "epoch": 19.260768335273575, "grad_norm": 0.15967366099357605, "learning_rate": 4.562665639556644e-05, "loss": -0.1754, "step": 33090 }, { "epoch": 19.266589057043074, "grad_norm": 0.19345934689044952, "learning_rate": 4.559920484826037e-05, "loss": -0.1791, "step": 33100 }, { "epoch": 19.272409778812573, "grad_norm": 0.17917805910110474, "learning_rate": 4.5571754637799665e-05, "loss": -0.1769, "step": 33110 }, { "epoch": 19.278230500582072, "grad_norm": 0.16416333615779877, "learning_rate": 4.554430577252298e-05, "loss": -0.1797, "step": 33120 }, { "epoch": 19.28405122235157, "grad_norm": 0.1327831745147705, "learning_rate": 4.551685826076858e-05, "loss": -0.1782, "step": 33130 }, { "epoch": 19.28987194412107, "grad_norm": 0.14444562792778015, "learning_rate": 4.5489412110874246e-05, "loss": -0.1795, "step": 33140 }, { "epoch": 19.29569266589057, "grad_norm": 0.16800329089164734, "learning_rate": 4.5461967331177444e-05, "loss": -0.175, "step": 33150 }, { "epoch": 19.30151338766007, "grad_norm": 0.23162879049777985, "learning_rate": 4.5434523930015115e-05, "loss": -0.1772, "step": 33160 }, { "epoch": 19.30733410942957, "grad_norm": 0.16263695061206818, "learning_rate": 4.540708191572388e-05, "loss": -0.1774, "step": 33170 }, { "epoch": 19.313154831199068, "grad_norm": 0.19444742798805237, "learning_rate": 4.537964129663991e-05, "loss": -0.1812, "step": 33180 }, { "epoch": 19.318975552968567, "grad_norm": 0.17076048254966736, "learning_rate": 4.535220208109889e-05, "loss": -0.1774, "step": 33190 }, { "epoch": 19.324796274738066, "grad_norm": 0.15196186304092407, "learning_rate": 4.5324764277436194e-05, "loss": -0.1809, "step": 33200 }, { "epoch": 19.330616996507565, "grad_norm": 0.1333421915769577, "learning_rate": 4.529732789398664e-05, "loss": -0.1789, "step": 33210 }, { "epoch": 19.336437718277068, "grad_norm": 0.12476270645856857, "learning_rate": 4.526989293908472e-05, "loss": -0.1754, "step": 33220 }, { "epoch": 19.342258440046567, "grad_norm": 0.14051660895347595, "learning_rate": 4.524245942106442e-05, "loss": -0.1784, "step": 33230 }, { "epoch": 19.348079161816067, "grad_norm": 0.22315581142902374, "learning_rate": 4.5215027348259345e-05, "loss": -0.1756, "step": 33240 }, { "epoch": 19.353899883585566, "grad_norm": 0.1493973433971405, "learning_rate": 4.5187596729002616e-05, "loss": -0.178, "step": 33250 }, { "epoch": 19.359720605355065, "grad_norm": 0.12302917242050171, "learning_rate": 4.516016757162693e-05, "loss": -0.1792, "step": 33260 }, { "epoch": 19.365541327124564, "grad_norm": 0.14546893537044525, "learning_rate": 4.513273988446457e-05, "loss": -0.179, "step": 33270 }, { "epoch": 19.371362048894063, "grad_norm": 0.1345517784357071, "learning_rate": 4.5105313675847296e-05, "loss": -0.1787, "step": 33280 }, { "epoch": 19.377182770663563, "grad_norm": 0.13318495452404022, "learning_rate": 4.5077888954106495e-05, "loss": -0.1789, "step": 33290 }, { "epoch": 19.383003492433062, "grad_norm": 0.15007245540618896, "learning_rate": 4.505046572757309e-05, "loss": -0.1771, "step": 33300 }, { "epoch": 19.38882421420256, "grad_norm": 0.1530643254518509, "learning_rate": 4.502304400457749e-05, "loss": -0.1766, "step": 33310 }, { "epoch": 19.39464493597206, "grad_norm": 0.17206533253192902, "learning_rate": 4.499562379344973e-05, "loss": -0.1797, "step": 33320 }, { "epoch": 19.40046565774156, "grad_norm": 0.17858336865901947, "learning_rate": 4.4968205102519306e-05, "loss": -0.1781, "step": 33330 }, { "epoch": 19.40628637951106, "grad_norm": 0.18238595128059387, "learning_rate": 4.494078794011532e-05, "loss": -0.1782, "step": 33340 }, { "epoch": 19.412107101280558, "grad_norm": 0.17428278923034668, "learning_rate": 4.491337231456639e-05, "loss": -0.1776, "step": 33350 }, { "epoch": 19.417927823050057, "grad_norm": 0.1281193494796753, "learning_rate": 4.4885958234200634e-05, "loss": -0.1794, "step": 33360 }, { "epoch": 19.423748544819556, "grad_norm": 0.15388800203800201, "learning_rate": 4.485854570734575e-05, "loss": -0.1792, "step": 33370 }, { "epoch": 19.429569266589056, "grad_norm": 0.2100384682416916, "learning_rate": 4.483113474232891e-05, "loss": -0.1761, "step": 33380 }, { "epoch": 19.435389988358555, "grad_norm": 0.1439625322818756, "learning_rate": 4.480372534747688e-05, "loss": -0.1781, "step": 33390 }, { "epoch": 19.441210710128058, "grad_norm": 0.21162568032741547, "learning_rate": 4.477631753111588e-05, "loss": -0.1727, "step": 33400 }, { "epoch": 19.447031431897557, "grad_norm": 0.15691544115543365, "learning_rate": 4.4748911301571686e-05, "loss": -0.1785, "step": 33410 }, { "epoch": 19.452852153667056, "grad_norm": 0.13841618597507477, "learning_rate": 4.472150666716961e-05, "loss": -0.1796, "step": 33420 }, { "epoch": 19.458672875436555, "grad_norm": 0.5389226078987122, "learning_rate": 4.469410363623442e-05, "loss": -0.1798, "step": 33430 }, { "epoch": 19.464493597206054, "grad_norm": 0.1657775193452835, "learning_rate": 4.466670221709044e-05, "loss": -0.1721, "step": 33440 }, { "epoch": 19.470314318975554, "grad_norm": 0.17276915907859802, "learning_rate": 4.463930241806154e-05, "loss": -0.1764, "step": 33450 }, { "epoch": 19.476135040745053, "grad_norm": 0.15194685757160187, "learning_rate": 4.4611904247471006e-05, "loss": -0.1799, "step": 33460 }, { "epoch": 19.481955762514552, "grad_norm": 0.17925690114498138, "learning_rate": 4.458450771364171e-05, "loss": -0.1779, "step": 33470 }, { "epoch": 19.48777648428405, "grad_norm": 0.10327086597681046, "learning_rate": 4.4557112824895965e-05, "loss": -0.1783, "step": 33480 }, { "epoch": 19.49359720605355, "grad_norm": 0.1682688444852829, "learning_rate": 4.452971958955563e-05, "loss": -0.1782, "step": 33490 }, { "epoch": 19.49941792782305, "grad_norm": 0.16409021615982056, "learning_rate": 4.450232801594208e-05, "loss": -0.1786, "step": 33500 }, { "epoch": 19.50523864959255, "grad_norm": 0.19975918531417847, "learning_rate": 4.447493811237609e-05, "loss": -0.1785, "step": 33510 }, { "epoch": 19.511059371362048, "grad_norm": 0.1753537803888321, "learning_rate": 4.444754988717804e-05, "loss": -0.1769, "step": 33520 }, { "epoch": 19.516880093131547, "grad_norm": 0.17288240790367126, "learning_rate": 4.442016334866771e-05, "loss": -0.1735, "step": 33530 }, { "epoch": 19.522700814901047, "grad_norm": 0.1250142604112625, "learning_rate": 4.4392778505164445e-05, "loss": -0.1794, "step": 33540 }, { "epoch": 19.528521536670546, "grad_norm": 0.17063859105110168, "learning_rate": 4.436539536498702e-05, "loss": -0.1806, "step": 33550 }, { "epoch": 19.534342258440045, "grad_norm": 0.1493106186389923, "learning_rate": 4.433801393645369e-05, "loss": -0.1778, "step": 33560 }, { "epoch": 19.540162980209544, "grad_norm": 0.2735649049282074, "learning_rate": 4.431063422788226e-05, "loss": -0.1754, "step": 33570 }, { "epoch": 19.545983701979047, "grad_norm": 0.19772480428218842, "learning_rate": 4.428325624758991e-05, "loss": -0.179, "step": 33580 }, { "epoch": 19.551804423748546, "grad_norm": 0.2817606031894684, "learning_rate": 4.4255880003893366e-05, "loss": -0.1757, "step": 33590 }, { "epoch": 19.557625145518045, "grad_norm": 0.1758049726486206, "learning_rate": 4.422850550510884e-05, "loss": -0.1788, "step": 33600 }, { "epoch": 19.563445867287545, "grad_norm": 0.19817659258842468, "learning_rate": 4.4201132759551934e-05, "loss": -0.1756, "step": 33610 }, { "epoch": 19.569266589057044, "grad_norm": 0.16780155897140503, "learning_rate": 4.4173761775537804e-05, "loss": -0.1754, "step": 33620 }, { "epoch": 19.575087310826543, "grad_norm": 0.1657928079366684, "learning_rate": 4.414639256138099e-05, "loss": -0.1765, "step": 33630 }, { "epoch": 19.580908032596042, "grad_norm": 0.1785205453634262, "learning_rate": 4.411902512539557e-05, "loss": -0.1782, "step": 33640 }, { "epoch": 19.58672875436554, "grad_norm": 0.16643130779266357, "learning_rate": 4.4091659475895044e-05, "loss": -0.1805, "step": 33650 }, { "epoch": 19.59254947613504, "grad_norm": 0.14526765048503876, "learning_rate": 4.406429562119235e-05, "loss": -0.1784, "step": 33660 }, { "epoch": 19.59837019790454, "grad_norm": 0.1450069546699524, "learning_rate": 4.4036933569599945e-05, "loss": -0.1776, "step": 33670 }, { "epoch": 19.60419091967404, "grad_norm": 0.13580827414989471, "learning_rate": 4.400957332942965e-05, "loss": -0.1788, "step": 33680 }, { "epoch": 19.61001164144354, "grad_norm": 0.1829906702041626, "learning_rate": 4.3982214908992844e-05, "loss": -0.1785, "step": 33690 }, { "epoch": 19.615832363213038, "grad_norm": 0.17796918749809265, "learning_rate": 4.3954858316600235e-05, "loss": -0.1791, "step": 33700 }, { "epoch": 19.621653084982537, "grad_norm": 0.19287188351154327, "learning_rate": 4.392750356056205e-05, "loss": -0.1787, "step": 33710 }, { "epoch": 19.627473806752036, "grad_norm": 0.1391097605228424, "learning_rate": 4.390015064918798e-05, "loss": -0.1787, "step": 33720 }, { "epoch": 19.633294528521535, "grad_norm": 0.16115424036979675, "learning_rate": 4.387279959078705e-05, "loss": -0.1778, "step": 33730 }, { "epoch": 19.639115250291034, "grad_norm": 0.11337075382471085, "learning_rate": 4.384545039366786e-05, "loss": -0.1794, "step": 33740 }, { "epoch": 19.644935972060537, "grad_norm": 0.15666437149047852, "learning_rate": 4.381810306613831e-05, "loss": -0.1748, "step": 33750 }, { "epoch": 19.650756693830036, "grad_norm": 0.13254967331886292, "learning_rate": 4.3790757616505826e-05, "loss": -0.1767, "step": 33760 }, { "epoch": 19.656577415599536, "grad_norm": 0.1613733172416687, "learning_rate": 4.376341405307725e-05, "loss": -0.1751, "step": 33770 }, { "epoch": 19.662398137369035, "grad_norm": 0.10594093054533005, "learning_rate": 4.37360723841588e-05, "loss": -0.1764, "step": 33780 }, { "epoch": 19.668218859138534, "grad_norm": 0.23954974114894867, "learning_rate": 4.370873261805619e-05, "loss": -0.1744, "step": 33790 }, { "epoch": 19.674039580908033, "grad_norm": 0.3350273668766022, "learning_rate": 4.368139476307449e-05, "loss": -0.1738, "step": 33800 }, { "epoch": 19.679860302677533, "grad_norm": 0.21062159538269043, "learning_rate": 4.365405882751822e-05, "loss": -0.177, "step": 33810 }, { "epoch": 19.68568102444703, "grad_norm": 0.16697253286838531, "learning_rate": 4.3626724819691326e-05, "loss": -0.1781, "step": 33820 }, { "epoch": 19.69150174621653, "grad_norm": 0.16397346556186676, "learning_rate": 4.359939274789715e-05, "loss": -0.1792, "step": 33830 }, { "epoch": 19.69732246798603, "grad_norm": 0.15343374013900757, "learning_rate": 4.357206262043848e-05, "loss": -0.1757, "step": 33840 }, { "epoch": 19.70314318975553, "grad_norm": 0.17860692739486694, "learning_rate": 4.354473444561745e-05, "loss": -0.1807, "step": 33850 }, { "epoch": 19.70896391152503, "grad_norm": 0.2019733041524887, "learning_rate": 4.3517408231735644e-05, "loss": -0.1769, "step": 33860 }, { "epoch": 19.714784633294528, "grad_norm": 0.18235716223716736, "learning_rate": 4.3490083987094086e-05, "loss": -0.1791, "step": 33870 }, { "epoch": 19.720605355064027, "grad_norm": 0.19020988047122955, "learning_rate": 4.34627617199931e-05, "loss": -0.1746, "step": 33880 }, { "epoch": 19.726426076833526, "grad_norm": 0.2317945808172226, "learning_rate": 4.3435441438732526e-05, "loss": -0.1813, "step": 33890 }, { "epoch": 19.732246798603025, "grad_norm": 0.08912000060081482, "learning_rate": 4.340812315161149e-05, "loss": -0.1788, "step": 33900 }, { "epoch": 19.738067520372525, "grad_norm": 0.31675979495048523, "learning_rate": 4.338080686692859e-05, "loss": -0.1737, "step": 33910 }, { "epoch": 19.743888242142027, "grad_norm": 0.16744031012058258, "learning_rate": 4.3353492592981816e-05, "loss": -0.1799, "step": 33920 }, { "epoch": 19.749708963911527, "grad_norm": 0.1665118932723999, "learning_rate": 4.3326180338068485e-05, "loss": -0.173, "step": 33930 }, { "epoch": 19.755529685681026, "grad_norm": 0.10127709060907364, "learning_rate": 4.3298870110485356e-05, "loss": -0.1793, "step": 33940 }, { "epoch": 19.761350407450525, "grad_norm": 0.13098235428333282, "learning_rate": 4.3271561918528567e-05, "loss": -0.1794, "step": 33950 }, { "epoch": 19.767171129220024, "grad_norm": 0.13598813116550446, "learning_rate": 4.324425577049359e-05, "loss": -0.1794, "step": 33960 }, { "epoch": 19.772991850989523, "grad_norm": 0.10720666497945786, "learning_rate": 4.321695167467535e-05, "loss": -0.1765, "step": 33970 }, { "epoch": 19.778812572759023, "grad_norm": 0.1948181539773941, "learning_rate": 4.3189649639368093e-05, "loss": -0.1733, "step": 33980 }, { "epoch": 19.784633294528522, "grad_norm": 0.10863646864891052, "learning_rate": 4.316234967286547e-05, "loss": -0.1783, "step": 33990 }, { "epoch": 19.79045401629802, "grad_norm": 0.24648326635360718, "learning_rate": 4.313505178346046e-05, "loss": -0.1714, "step": 34000 }, { "epoch": 19.79627473806752, "grad_norm": 0.16175930202007294, "learning_rate": 4.3107755979445465e-05, "loss": -0.1774, "step": 34010 }, { "epoch": 19.80209545983702, "grad_norm": 0.10061930865049362, "learning_rate": 4.308046226911224e-05, "loss": -0.1791, "step": 34020 }, { "epoch": 19.80791618160652, "grad_norm": 0.11932136118412018, "learning_rate": 4.305317066075185e-05, "loss": -0.1763, "step": 34030 }, { "epoch": 19.813736903376018, "grad_norm": 0.24821138381958008, "learning_rate": 4.302588116265482e-05, "loss": -0.1774, "step": 34040 }, { "epoch": 19.819557625145517, "grad_norm": 0.13678312301635742, "learning_rate": 4.299859378311094e-05, "loss": -0.1769, "step": 34050 }, { "epoch": 19.825378346915016, "grad_norm": 0.16883663833141327, "learning_rate": 4.2971308530409424e-05, "loss": -0.1797, "step": 34060 }, { "epoch": 19.831199068684516, "grad_norm": 0.1390017718076706, "learning_rate": 4.2944025412838765e-05, "loss": -0.18, "step": 34070 }, { "epoch": 19.837019790454015, "grad_norm": 0.12936589121818542, "learning_rate": 4.291674443868689e-05, "loss": -0.1791, "step": 34080 }, { "epoch": 19.842840512223514, "grad_norm": 0.10790955275297165, "learning_rate": 4.288946561624104e-05, "loss": -0.1781, "step": 34090 }, { "epoch": 19.848661233993017, "grad_norm": 0.10122943669557571, "learning_rate": 4.2862188953787794e-05, "loss": -0.1795, "step": 34100 }, { "epoch": 19.854481955762516, "grad_norm": 0.20103618502616882, "learning_rate": 4.283491445961308e-05, "loss": -0.1785, "step": 34110 }, { "epoch": 19.860302677532015, "grad_norm": 0.19769291579723358, "learning_rate": 4.2807642142002155e-05, "loss": -0.1771, "step": 34120 }, { "epoch": 19.866123399301514, "grad_norm": 0.1809293031692505, "learning_rate": 4.278037200923966e-05, "loss": -0.18, "step": 34130 }, { "epoch": 19.871944121071014, "grad_norm": 0.17442283034324646, "learning_rate": 4.275310406960953e-05, "loss": -0.1718, "step": 34140 }, { "epoch": 19.877764842840513, "grad_norm": 0.17167752981185913, "learning_rate": 4.272583833139502e-05, "loss": -0.1756, "step": 34150 }, { "epoch": 19.883585564610012, "grad_norm": 0.19997864961624146, "learning_rate": 4.2698574802878794e-05, "loss": -0.177, "step": 34160 }, { "epoch": 19.88940628637951, "grad_norm": 0.18596436083316803, "learning_rate": 4.2671313492342734e-05, "loss": -0.1804, "step": 34170 }, { "epoch": 19.89522700814901, "grad_norm": 0.21950730681419373, "learning_rate": 4.264405440806813e-05, "loss": -0.1793, "step": 34180 }, { "epoch": 19.90104772991851, "grad_norm": 0.16271184384822845, "learning_rate": 4.26167975583356e-05, "loss": -0.1722, "step": 34190 }, { "epoch": 19.90686845168801, "grad_norm": 0.1400596648454666, "learning_rate": 4.2589542951425e-05, "loss": -0.1809, "step": 34200 }, { "epoch": 19.912689173457508, "grad_norm": 0.21560007333755493, "learning_rate": 4.2562290595615615e-05, "loss": -0.177, "step": 34210 }, { "epoch": 19.918509895227007, "grad_norm": 0.19745296239852905, "learning_rate": 4.2535040499185946e-05, "loss": -0.1743, "step": 34220 }, { "epoch": 19.924330616996507, "grad_norm": 0.23693718016147614, "learning_rate": 4.250779267041387e-05, "loss": -0.1767, "step": 34230 }, { "epoch": 19.930151338766006, "grad_norm": 0.19208797812461853, "learning_rate": 4.248054711757657e-05, "loss": -0.178, "step": 34240 }, { "epoch": 19.935972060535505, "grad_norm": 0.11607041209936142, "learning_rate": 4.245330384895052e-05, "loss": -0.1754, "step": 34250 }, { "epoch": 19.941792782305004, "grad_norm": 0.14399316906929016, "learning_rate": 4.242606287281151e-05, "loss": -0.1742, "step": 34260 }, { "epoch": 19.947613504074504, "grad_norm": 0.1837504655122757, "learning_rate": 4.2398824197434595e-05, "loss": -0.1722, "step": 34270 }, { "epoch": 19.953434225844006, "grad_norm": 0.12465173751115799, "learning_rate": 4.23715878310942e-05, "loss": -0.1793, "step": 34280 }, { "epoch": 19.959254947613505, "grad_norm": 0.18423469364643097, "learning_rate": 4.234435378206402e-05, "loss": -0.1789, "step": 34290 }, { "epoch": 19.965075669383005, "grad_norm": 0.15227611362934113, "learning_rate": 4.2317122058617006e-05, "loss": -0.1774, "step": 34300 }, { "epoch": 19.970896391152504, "grad_norm": 0.13460762798786163, "learning_rate": 4.2289892669025485e-05, "loss": -0.177, "step": 34310 }, { "epoch": 19.976717112922003, "grad_norm": 0.24492651224136353, "learning_rate": 4.226266562156097e-05, "loss": -0.1765, "step": 34320 }, { "epoch": 19.982537834691502, "grad_norm": 0.1517304927110672, "learning_rate": 4.223544092449435e-05, "loss": -0.1785, "step": 34330 }, { "epoch": 19.988358556461, "grad_norm": 0.12573805451393127, "learning_rate": 4.2208218586095784e-05, "loss": -0.1785, "step": 34340 }, { "epoch": 19.9941792782305, "grad_norm": 0.13597281277179718, "learning_rate": 4.218099861463466e-05, "loss": -0.1793, "step": 34350 }, { "epoch": 20.0, "grad_norm": 0.1083405539393425, "learning_rate": 4.215378101837972e-05, "loss": -0.176, "step": 34360 }, { "epoch": 20.0058207217695, "grad_norm": 0.11273769289255142, "learning_rate": 4.2126565805598937e-05, "loss": -0.1787, "step": 34370 }, { "epoch": 20.011641443539, "grad_norm": 0.2616119384765625, "learning_rate": 4.209935298455957e-05, "loss": -0.1795, "step": 34380 }, { "epoch": 20.017462165308498, "grad_norm": 0.2172195464372635, "learning_rate": 4.207214256352817e-05, "loss": -0.1802, "step": 34390 }, { "epoch": 20.023282887077997, "grad_norm": 0.14100149273872375, "learning_rate": 4.2044934550770524e-05, "loss": -0.1752, "step": 34400 }, { "epoch": 20.029103608847496, "grad_norm": 0.17435288429260254, "learning_rate": 4.201772895455174e-05, "loss": -0.174, "step": 34410 }, { "epoch": 20.034924330616995, "grad_norm": 0.11745970696210861, "learning_rate": 4.199052578313613e-05, "loss": -0.1761, "step": 34420 }, { "epoch": 20.040745052386495, "grad_norm": 0.17307054996490479, "learning_rate": 4.1963325044787294e-05, "loss": -0.176, "step": 34430 }, { "epoch": 20.046565774155994, "grad_norm": 0.2043539136648178, "learning_rate": 4.193612674776814e-05, "loss": -0.1793, "step": 34440 }, { "epoch": 20.052386495925496, "grad_norm": 0.1144176721572876, "learning_rate": 4.1908930900340745e-05, "loss": -0.1781, "step": 34450 }, { "epoch": 20.058207217694996, "grad_norm": 0.1561337113380432, "learning_rate": 4.1881737510766536e-05, "loss": -0.1794, "step": 34460 }, { "epoch": 20.064027939464495, "grad_norm": 0.12703624367713928, "learning_rate": 4.185454658730609e-05, "loss": -0.1803, "step": 34470 }, { "epoch": 20.069848661233994, "grad_norm": 0.1846790462732315, "learning_rate": 4.1827358138219355e-05, "loss": -0.1768, "step": 34480 }, { "epoch": 20.075669383003493, "grad_norm": 0.12877053022384644, "learning_rate": 4.1800172171765404e-05, "loss": -0.1711, "step": 34490 }, { "epoch": 20.081490104772993, "grad_norm": 0.10569977760314941, "learning_rate": 4.177298869620264e-05, "loss": -0.1799, "step": 34500 }, { "epoch": 20.087310826542492, "grad_norm": 0.14604619145393372, "learning_rate": 4.1745807719788705e-05, "loss": -0.1756, "step": 34510 }, { "epoch": 20.09313154831199, "grad_norm": 0.15975618362426758, "learning_rate": 4.1718629250780445e-05, "loss": -0.1784, "step": 34520 }, { "epoch": 20.09895227008149, "grad_norm": 0.12720990180969238, "learning_rate": 4.1691453297433956e-05, "loss": -0.1785, "step": 34530 }, { "epoch": 20.10477299185099, "grad_norm": 0.2607690989971161, "learning_rate": 4.166427986800457e-05, "loss": -0.1785, "step": 34540 }, { "epoch": 20.11059371362049, "grad_norm": 0.12304055690765381, "learning_rate": 4.163710897074688e-05, "loss": -0.1809, "step": 34550 }, { "epoch": 20.116414435389988, "grad_norm": 0.1410612314939499, "learning_rate": 4.1609940613914686e-05, "loss": -0.1729, "step": 34560 }, { "epoch": 20.122235157159487, "grad_norm": 0.1699950098991394, "learning_rate": 4.1582774805760996e-05, "loss": -0.1766, "step": 34570 }, { "epoch": 20.128055878928986, "grad_norm": 0.1813599169254303, "learning_rate": 4.155561155453809e-05, "loss": -0.1782, "step": 34580 }, { "epoch": 20.133876600698486, "grad_norm": 0.20878851413726807, "learning_rate": 4.15284508684974e-05, "loss": -0.1762, "step": 34590 }, { "epoch": 20.139697322467985, "grad_norm": 0.2525637745857239, "learning_rate": 4.1501292755889675e-05, "loss": -0.1738, "step": 34600 }, { "epoch": 20.145518044237484, "grad_norm": 0.2823667526245117, "learning_rate": 4.1474137224964833e-05, "loss": -0.1768, "step": 34610 }, { "epoch": 20.151338766006983, "grad_norm": 0.22932671010494232, "learning_rate": 4.144698428397197e-05, "loss": -0.174, "step": 34620 }, { "epoch": 20.157159487776486, "grad_norm": 0.19641174376010895, "learning_rate": 4.1419833941159466e-05, "loss": -0.1808, "step": 34630 }, { "epoch": 20.162980209545985, "grad_norm": 0.12346027046442032, "learning_rate": 4.1392686204774846e-05, "loss": -0.1789, "step": 34640 }, { "epoch": 20.168800931315484, "grad_norm": 0.2074824422597885, "learning_rate": 4.13655410830649e-05, "loss": -0.1794, "step": 34650 }, { "epoch": 20.174621653084984, "grad_norm": 0.2080565094947815, "learning_rate": 4.1338398584275594e-05, "loss": -0.1737, "step": 34660 }, { "epoch": 20.180442374854483, "grad_norm": 0.13583624362945557, "learning_rate": 4.1311258716652104e-05, "loss": -0.1768, "step": 34670 }, { "epoch": 20.186263096623982, "grad_norm": 0.13091744482517242, "learning_rate": 4.128412148843881e-05, "loss": -0.1796, "step": 34680 }, { "epoch": 20.19208381839348, "grad_norm": 0.23468396067619324, "learning_rate": 4.125698690787926e-05, "loss": -0.1758, "step": 34690 }, { "epoch": 20.19790454016298, "grad_norm": 0.11420794576406479, "learning_rate": 4.1229854983216245e-05, "loss": -0.1766, "step": 34700 }, { "epoch": 20.20372526193248, "grad_norm": 0.1952512115240097, "learning_rate": 4.120272572269175e-05, "loss": -0.1756, "step": 34710 }, { "epoch": 20.20954598370198, "grad_norm": 0.13169585168361664, "learning_rate": 4.117559913454687e-05, "loss": -0.1765, "step": 34720 }, { "epoch": 20.215366705471478, "grad_norm": 0.1377296894788742, "learning_rate": 4.114847522702201e-05, "loss": -0.1788, "step": 34730 }, { "epoch": 20.221187427240977, "grad_norm": 0.09080784022808075, "learning_rate": 4.112135400835664e-05, "loss": -0.1818, "step": 34740 }, { "epoch": 20.227008149010477, "grad_norm": 0.13207316398620605, "learning_rate": 4.109423548678949e-05, "loss": -0.1812, "step": 34750 }, { "epoch": 20.232828870779976, "grad_norm": 0.1429678499698639, "learning_rate": 4.106711967055848e-05, "loss": -0.1775, "step": 34760 }, { "epoch": 20.238649592549475, "grad_norm": 0.1386813372373581, "learning_rate": 4.1040006567900636e-05, "loss": -0.1795, "step": 34770 }, { "epoch": 20.244470314318974, "grad_norm": 0.2100551277399063, "learning_rate": 4.101289618705224e-05, "loss": -0.1807, "step": 34780 }, { "epoch": 20.250291036088473, "grad_norm": 0.12292113155126572, "learning_rate": 4.0985788536248675e-05, "loss": -0.181, "step": 34790 }, { "epoch": 20.256111757857976, "grad_norm": 0.17412178218364716, "learning_rate": 4.095868362372454e-05, "loss": -0.1806, "step": 34800 }, { "epoch": 20.261932479627475, "grad_norm": 0.19064410030841827, "learning_rate": 4.0931581457713614e-05, "loss": -0.1789, "step": 34810 }, { "epoch": 20.267753201396975, "grad_norm": 0.13639777898788452, "learning_rate": 4.09044820464488e-05, "loss": -0.1757, "step": 34820 }, { "epoch": 20.273573923166474, "grad_norm": 0.21044673025608063, "learning_rate": 4.087738539816219e-05, "loss": -0.1777, "step": 34830 }, { "epoch": 20.279394644935973, "grad_norm": 0.2740745544433594, "learning_rate": 4.085029152108501e-05, "loss": -0.175, "step": 34840 }, { "epoch": 20.285215366705472, "grad_norm": 0.223317950963974, "learning_rate": 4.0823200423447714e-05, "loss": -0.1767, "step": 34850 }, { "epoch": 20.29103608847497, "grad_norm": 0.1238710954785347, "learning_rate": 4.079611211347981e-05, "loss": -0.1744, "step": 34860 }, { "epoch": 20.29685681024447, "grad_norm": 0.219783216714859, "learning_rate": 4.076902659941002e-05, "loss": -0.1795, "step": 34870 }, { "epoch": 20.30267753201397, "grad_norm": 0.16305464506149292, "learning_rate": 4.074194388946624e-05, "loss": -0.1794, "step": 34880 }, { "epoch": 20.30849825378347, "grad_norm": 0.21093139052391052, "learning_rate": 4.071486399187545e-05, "loss": -0.1782, "step": 34890 }, { "epoch": 20.31431897555297, "grad_norm": 0.15297754108905792, "learning_rate": 4.0687786914863836e-05, "loss": -0.1808, "step": 34900 }, { "epoch": 20.320139697322467, "grad_norm": 0.14892998337745667, "learning_rate": 4.0660712666656666e-05, "loss": -0.1803, "step": 34910 }, { "epoch": 20.325960419091967, "grad_norm": 0.2626892626285553, "learning_rate": 4.0633641255478394e-05, "loss": -0.1755, "step": 34920 }, { "epoch": 20.331781140861466, "grad_norm": 0.16166438162326813, "learning_rate": 4.0606572689552624e-05, "loss": -0.1808, "step": 34930 }, { "epoch": 20.337601862630965, "grad_norm": 0.18028883635997772, "learning_rate": 4.0579506977102036e-05, "loss": -0.1763, "step": 34940 }, { "epoch": 20.343422584400464, "grad_norm": 0.24187569320201874, "learning_rate": 4.055244412634849e-05, "loss": -0.1784, "step": 34950 }, { "epoch": 20.349243306169964, "grad_norm": 0.179996058344841, "learning_rate": 4.052538414551298e-05, "loss": -0.1763, "step": 34960 }, { "epoch": 20.355064027939463, "grad_norm": 0.13689404726028442, "learning_rate": 4.0498327042815596e-05, "loss": -0.1782, "step": 34970 }, { "epoch": 20.360884749708966, "grad_norm": 0.1939067244529724, "learning_rate": 4.047127282647559e-05, "loss": -0.1803, "step": 34980 }, { "epoch": 20.366705471478465, "grad_norm": 0.151253804564476, "learning_rate": 4.04442215047113e-05, "loss": -0.1816, "step": 34990 }, { "epoch": 20.372526193247964, "grad_norm": 0.15671102702617645, "learning_rate": 4.041717308574023e-05, "loss": -0.1819, "step": 35000 }, { "epoch": 20.378346915017463, "grad_norm": 0.1974450647830963, "learning_rate": 4.039012757777893e-05, "loss": -0.1796, "step": 35010 }, { "epoch": 20.384167636786962, "grad_norm": 0.10228865593671799, "learning_rate": 4.036308498904314e-05, "loss": -0.1818, "step": 35020 }, { "epoch": 20.38998835855646, "grad_norm": 0.11245217174291611, "learning_rate": 4.033604532774771e-05, "loss": -0.1768, "step": 35030 }, { "epoch": 20.39580908032596, "grad_norm": 0.16270768642425537, "learning_rate": 4.030900860210652e-05, "loss": -0.1803, "step": 35040 }, { "epoch": 20.40162980209546, "grad_norm": 0.10069756954908371, "learning_rate": 4.028197482033266e-05, "loss": -0.1779, "step": 35050 }, { "epoch": 20.40745052386496, "grad_norm": 0.2845250964164734, "learning_rate": 4.0254943990638246e-05, "loss": -0.1782, "step": 35060 }, { "epoch": 20.41327124563446, "grad_norm": 0.21358954906463623, "learning_rate": 4.022791612123454e-05, "loss": -0.1726, "step": 35070 }, { "epoch": 20.419091967403958, "grad_norm": 0.13289138674736023, "learning_rate": 4.020089122033192e-05, "loss": -0.1773, "step": 35080 }, { "epoch": 20.424912689173457, "grad_norm": 0.14366619288921356, "learning_rate": 4.01738692961398e-05, "loss": -0.1705, "step": 35090 }, { "epoch": 20.430733410942956, "grad_norm": 0.15902495384216309, "learning_rate": 4.014685035686675e-05, "loss": -0.181, "step": 35100 }, { "epoch": 20.436554132712455, "grad_norm": 0.0986863300204277, "learning_rate": 4.011983441072039e-05, "loss": -0.1797, "step": 35110 }, { "epoch": 20.442374854481955, "grad_norm": 0.23244830965995789, "learning_rate": 4.0092821465907485e-05, "loss": -0.1756, "step": 35120 }, { "epoch": 20.448195576251454, "grad_norm": 0.2033403366804123, "learning_rate": 4.006581153063383e-05, "loss": -0.1791, "step": 35130 }, { "epoch": 20.454016298020953, "grad_norm": 0.15582598745822906, "learning_rate": 4.003880461310432e-05, "loss": -0.1779, "step": 35140 }, { "epoch": 20.459837019790456, "grad_norm": 0.4009537100791931, "learning_rate": 4.001180072152298e-05, "loss": -0.1761, "step": 35150 }, { "epoch": 20.465657741559955, "grad_norm": 0.23468175530433655, "learning_rate": 3.998479986409285e-05, "loss": -0.1773, "step": 35160 }, { "epoch": 20.471478463329454, "grad_norm": 0.14531627297401428, "learning_rate": 3.995780204901607e-05, "loss": -0.1799, "step": 35170 }, { "epoch": 20.477299185098953, "grad_norm": 0.13890929520130157, "learning_rate": 3.993080728449391e-05, "loss": -0.18, "step": 35180 }, { "epoch": 20.483119906868453, "grad_norm": 0.14115385711193085, "learning_rate": 3.990381557872661e-05, "loss": -0.1801, "step": 35190 }, { "epoch": 20.488940628637952, "grad_norm": 0.09514370560646057, "learning_rate": 3.987682693991359e-05, "loss": -0.1798, "step": 35200 }, { "epoch": 20.49476135040745, "grad_norm": 0.12326201796531677, "learning_rate": 3.9849841376253226e-05, "loss": -0.1815, "step": 35210 }, { "epoch": 20.50058207217695, "grad_norm": 0.15169085562229156, "learning_rate": 3.982285889594306e-05, "loss": -0.1759, "step": 35220 }, { "epoch": 20.50640279394645, "grad_norm": 0.18947748839855194, "learning_rate": 3.9795879507179665e-05, "loss": -0.1787, "step": 35230 }, { "epoch": 20.51222351571595, "grad_norm": 0.13931556046009064, "learning_rate": 3.9768903218158634e-05, "loss": -0.1797, "step": 35240 }, { "epoch": 20.518044237485448, "grad_norm": 0.16458255052566528, "learning_rate": 3.974193003707468e-05, "loss": -0.1794, "step": 35250 }, { "epoch": 20.523864959254947, "grad_norm": 0.16803953051567078, "learning_rate": 3.971495997212152e-05, "loss": -0.1785, "step": 35260 }, { "epoch": 20.529685681024446, "grad_norm": 0.2916143238544464, "learning_rate": 3.9687993031491985e-05, "loss": -0.1788, "step": 35270 }, { "epoch": 20.535506402793946, "grad_norm": 0.1854170262813568, "learning_rate": 3.966102922337787e-05, "loss": -0.1767, "step": 35280 }, { "epoch": 20.541327124563445, "grad_norm": 0.12321842461824417, "learning_rate": 3.963406855597009e-05, "loss": -0.1801, "step": 35290 }, { "epoch": 20.547147846332944, "grad_norm": 0.15452095866203308, "learning_rate": 3.960711103745861e-05, "loss": -0.1784, "step": 35300 }, { "epoch": 20.552968568102443, "grad_norm": 0.3250149190425873, "learning_rate": 3.958015667603237e-05, "loss": -0.177, "step": 35310 }, { "epoch": 20.558789289871942, "grad_norm": 0.18295933306217194, "learning_rate": 3.955320547987943e-05, "loss": -0.1787, "step": 35320 }, { "epoch": 20.564610011641445, "grad_norm": 0.30690592527389526, "learning_rate": 3.952625745718681e-05, "loss": -0.1742, "step": 35330 }, { "epoch": 20.570430733410944, "grad_norm": 0.13853363692760468, "learning_rate": 3.949931261614064e-05, "loss": -0.177, "step": 35340 }, { "epoch": 20.576251455180444, "grad_norm": 0.190125972032547, "learning_rate": 3.947237096492605e-05, "loss": -0.1803, "step": 35350 }, { "epoch": 20.582072176949943, "grad_norm": 0.1388597935438156, "learning_rate": 3.944543251172719e-05, "loss": -0.1786, "step": 35360 }, { "epoch": 20.587892898719442, "grad_norm": 0.25916025042533875, "learning_rate": 3.941849726472725e-05, "loss": -0.1804, "step": 35370 }, { "epoch": 20.59371362048894, "grad_norm": 0.17670145630836487, "learning_rate": 3.939156523210846e-05, "loss": -0.1793, "step": 35380 }, { "epoch": 20.59953434225844, "grad_norm": 0.13301733136177063, "learning_rate": 3.9364636422052046e-05, "loss": -0.1752, "step": 35390 }, { "epoch": 20.60535506402794, "grad_norm": 0.1715395599603653, "learning_rate": 3.933771084273828e-05, "loss": -0.1741, "step": 35400 }, { "epoch": 20.61117578579744, "grad_norm": 0.24151834845542908, "learning_rate": 3.931078850234643e-05, "loss": -0.1756, "step": 35410 }, { "epoch": 20.616996507566938, "grad_norm": 0.1541953980922699, "learning_rate": 3.928386940905483e-05, "loss": -0.1794, "step": 35420 }, { "epoch": 20.622817229336437, "grad_norm": 0.10089017450809479, "learning_rate": 3.925695357104073e-05, "loss": -0.1803, "step": 35430 }, { "epoch": 20.628637951105937, "grad_norm": 0.2455669641494751, "learning_rate": 3.923004099648049e-05, "loss": -0.1757, "step": 35440 }, { "epoch": 20.634458672875436, "grad_norm": 0.16185262799263, "learning_rate": 3.920313169354944e-05, "loss": -0.1791, "step": 35450 }, { "epoch": 20.640279394644935, "grad_norm": 0.11399251222610474, "learning_rate": 3.9176225670421897e-05, "loss": -0.1737, "step": 35460 }, { "epoch": 20.646100116414434, "grad_norm": 0.17526686191558838, "learning_rate": 3.9149322935271224e-05, "loss": -0.178, "step": 35470 }, { "epoch": 20.651920838183933, "grad_norm": 0.23236246407032013, "learning_rate": 3.9122423496269725e-05, "loss": -0.1768, "step": 35480 }, { "epoch": 20.657741559953433, "grad_norm": 0.15964210033416748, "learning_rate": 3.909552736158877e-05, "loss": -0.1781, "step": 35490 }, { "epoch": 20.663562281722932, "grad_norm": 0.15323857963085175, "learning_rate": 3.90686345393987e-05, "loss": -0.1807, "step": 35500 }, { "epoch": 20.669383003492435, "grad_norm": 0.15636146068572998, "learning_rate": 3.9041745037868816e-05, "loss": -0.1786, "step": 35510 }, { "epoch": 20.675203725261934, "grad_norm": 0.19476526975631714, "learning_rate": 3.9014858865167465e-05, "loss": -0.1743, "step": 35520 }, { "epoch": 20.681024447031433, "grad_norm": 0.126918762922287, "learning_rate": 3.8987976029461935e-05, "loss": -0.1732, "step": 35530 }, { "epoch": 20.686845168800932, "grad_norm": 0.1462058424949646, "learning_rate": 3.896109653891853e-05, "loss": -0.1784, "step": 35540 }, { "epoch": 20.69266589057043, "grad_norm": 0.11803322285413742, "learning_rate": 3.893422040170254e-05, "loss": -0.1808, "step": 35550 }, { "epoch": 20.69848661233993, "grad_norm": 0.15091748535633087, "learning_rate": 3.8907347625978207e-05, "loss": -0.175, "step": 35560 }, { "epoch": 20.70430733410943, "grad_norm": 0.19283710420131683, "learning_rate": 3.88804782199088e-05, "loss": -0.1754, "step": 35570 }, { "epoch": 20.71012805587893, "grad_norm": 0.2455177754163742, "learning_rate": 3.8853612191656495e-05, "loss": -0.1811, "step": 35580 }, { "epoch": 20.71594877764843, "grad_norm": 0.13431154191493988, "learning_rate": 3.88267495493825e-05, "loss": -0.177, "step": 35590 }, { "epoch": 20.721769499417928, "grad_norm": 0.14657637476921082, "learning_rate": 3.8799890301247004e-05, "loss": -0.1755, "step": 35600 }, { "epoch": 20.727590221187427, "grad_norm": 0.18539056181907654, "learning_rate": 3.8773034455409096e-05, "loss": -0.1788, "step": 35610 }, { "epoch": 20.733410942956926, "grad_norm": 0.1620948165655136, "learning_rate": 3.8746182020026904e-05, "loss": -0.1796, "step": 35620 }, { "epoch": 20.739231664726425, "grad_norm": 0.14043667912483215, "learning_rate": 3.871933300325745e-05, "loss": -0.1805, "step": 35630 }, { "epoch": 20.745052386495924, "grad_norm": 0.3108139634132385, "learning_rate": 3.869248741325679e-05, "loss": -0.1781, "step": 35640 }, { "epoch": 20.750873108265424, "grad_norm": 0.13634052872657776, "learning_rate": 3.866564525817992e-05, "loss": -0.1795, "step": 35650 }, { "epoch": 20.756693830034923, "grad_norm": 0.18439018726348877, "learning_rate": 3.8638806546180725e-05, "loss": -0.1788, "step": 35660 }, { "epoch": 20.762514551804422, "grad_norm": 0.15005391836166382, "learning_rate": 3.861197128541213e-05, "loss": -0.1772, "step": 35670 }, { "epoch": 20.768335273573925, "grad_norm": 0.13978300988674164, "learning_rate": 3.858513948402599e-05, "loss": -0.1759, "step": 35680 }, { "epoch": 20.774155995343424, "grad_norm": 0.20613951981067657, "learning_rate": 3.8558311150173077e-05, "loss": -0.1699, "step": 35690 }, { "epoch": 20.779976717112923, "grad_norm": 0.22815771400928497, "learning_rate": 3.853148629200312e-05, "loss": -0.176, "step": 35700 }, { "epoch": 20.785797438882422, "grad_norm": 0.15528546273708344, "learning_rate": 3.850466491766482e-05, "loss": -0.1778, "step": 35710 }, { "epoch": 20.79161816065192, "grad_norm": 0.16231122612953186, "learning_rate": 3.847784703530583e-05, "loss": -0.1809, "step": 35720 }, { "epoch": 20.79743888242142, "grad_norm": 0.07145806401968002, "learning_rate": 3.845103265307266e-05, "loss": -0.1797, "step": 35730 }, { "epoch": 20.80325960419092, "grad_norm": 0.14941932260990143, "learning_rate": 3.842422177911086e-05, "loss": -0.1806, "step": 35740 }, { "epoch": 20.80908032596042, "grad_norm": 0.1419716328382492, "learning_rate": 3.8397414421564826e-05, "loss": -0.1767, "step": 35750 }, { "epoch": 20.81490104772992, "grad_norm": 0.2460295855998993, "learning_rate": 3.8370610588577935e-05, "loss": -0.1772, "step": 35760 }, { "epoch": 20.820721769499418, "grad_norm": 0.13897021114826202, "learning_rate": 3.834381028829251e-05, "loss": -0.1811, "step": 35770 }, { "epoch": 20.826542491268917, "grad_norm": 0.14582788944244385, "learning_rate": 3.8317013528849745e-05, "loss": -0.1786, "step": 35780 }, { "epoch": 20.832363213038416, "grad_norm": 0.11740929633378983, "learning_rate": 3.8290220318389815e-05, "loss": -0.1764, "step": 35790 }, { "epoch": 20.838183934807915, "grad_norm": 0.08300530910491943, "learning_rate": 3.8263430665051746e-05, "loss": -0.1816, "step": 35800 }, { "epoch": 20.844004656577415, "grad_norm": 0.1626703292131424, "learning_rate": 3.8236644576973554e-05, "loss": -0.1802, "step": 35810 }, { "epoch": 20.849825378346914, "grad_norm": 0.1311126947402954, "learning_rate": 3.820986206229217e-05, "loss": -0.1793, "step": 35820 }, { "epoch": 20.855646100116413, "grad_norm": 0.15115198493003845, "learning_rate": 3.8183083129143384e-05, "loss": -0.1804, "step": 35830 }, { "epoch": 20.861466821885912, "grad_norm": 0.13852252066135406, "learning_rate": 3.815630778566193e-05, "loss": -0.1689, "step": 35840 }, { "epoch": 20.867287543655415, "grad_norm": 0.13661649823188782, "learning_rate": 3.812953603998145e-05, "loss": -0.1761, "step": 35850 }, { "epoch": 20.873108265424914, "grad_norm": 0.14976783096790314, "learning_rate": 3.8102767900234504e-05, "loss": -0.1775, "step": 35860 }, { "epoch": 20.878928987194413, "grad_norm": 0.11553827673196793, "learning_rate": 3.807600337455256e-05, "loss": -0.1776, "step": 35870 }, { "epoch": 20.884749708963913, "grad_norm": 0.19058817625045776, "learning_rate": 3.804924247106593e-05, "loss": -0.1762, "step": 35880 }, { "epoch": 20.890570430733412, "grad_norm": 0.1292882114648819, "learning_rate": 3.8022485197903925e-05, "loss": -0.1793, "step": 35890 }, { "epoch": 20.89639115250291, "grad_norm": 0.12289566546678543, "learning_rate": 3.799573156319464e-05, "loss": -0.1793, "step": 35900 }, { "epoch": 20.90221187427241, "grad_norm": 0.2009572833776474, "learning_rate": 3.796898157506515e-05, "loss": -0.1775, "step": 35910 }, { "epoch": 20.90803259604191, "grad_norm": 0.1286146491765976, "learning_rate": 3.794223524164143e-05, "loss": -0.1795, "step": 35920 }, { "epoch": 20.91385331781141, "grad_norm": 0.20357663929462433, "learning_rate": 3.7915492571048245e-05, "loss": -0.1758, "step": 35930 }, { "epoch": 20.919674039580908, "grad_norm": 0.1936464011669159, "learning_rate": 3.788875357140937e-05, "loss": -0.1706, "step": 35940 }, { "epoch": 20.925494761350407, "grad_norm": 0.14233183860778809, "learning_rate": 3.786201825084736e-05, "loss": -0.1766, "step": 35950 }, { "epoch": 20.931315483119906, "grad_norm": 0.14083583652973175, "learning_rate": 3.783528661748372e-05, "loss": -0.1802, "step": 35960 }, { "epoch": 20.937136204889406, "grad_norm": 0.19201453030109406, "learning_rate": 3.780855867943882e-05, "loss": -0.1743, "step": 35970 }, { "epoch": 20.942956926658905, "grad_norm": 0.17592433094978333, "learning_rate": 3.778183444483189e-05, "loss": -0.1801, "step": 35980 }, { "epoch": 20.948777648428404, "grad_norm": 0.22894501686096191, "learning_rate": 3.775511392178108e-05, "loss": -0.178, "step": 35990 }, { "epoch": 20.954598370197903, "grad_norm": 0.17901240289211273, "learning_rate": 3.772839711840332e-05, "loss": -0.1749, "step": 36000 }, { "epoch": 20.960419091967402, "grad_norm": 0.22982046008110046, "learning_rate": 3.7701684042814515e-05, "loss": -0.1762, "step": 36010 }, { "epoch": 20.9662398137369, "grad_norm": 0.16592223942279816, "learning_rate": 3.76749747031294e-05, "loss": -0.179, "step": 36020 }, { "epoch": 20.972060535506404, "grad_norm": 0.14871327579021454, "learning_rate": 3.764826910746152e-05, "loss": -0.1713, "step": 36030 }, { "epoch": 20.977881257275904, "grad_norm": 0.11697038263082504, "learning_rate": 3.762156726392338e-05, "loss": -0.1807, "step": 36040 }, { "epoch": 20.983701979045403, "grad_norm": 0.1147325187921524, "learning_rate": 3.759486918062625e-05, "loss": -0.1745, "step": 36050 }, { "epoch": 20.989522700814902, "grad_norm": 0.13572491705417633, "learning_rate": 3.756817486568033e-05, "loss": -0.1774, "step": 36060 }, { "epoch": 20.9953434225844, "grad_norm": 0.1990443468093872, "learning_rate": 3.7541484327194654e-05, "loss": -0.1793, "step": 36070 }, { "epoch": 21.0011641443539, "grad_norm": 0.10205939412117004, "learning_rate": 3.751479757327707e-05, "loss": -0.1765, "step": 36080 }, { "epoch": 21.0069848661234, "grad_norm": 0.23101209104061127, "learning_rate": 3.7488114612034345e-05, "loss": -0.177, "step": 36090 }, { "epoch": 21.0128055878929, "grad_norm": 0.20393061637878418, "learning_rate": 3.7461435451572044e-05, "loss": -0.1765, "step": 36100 }, { "epoch": 21.018626309662398, "grad_norm": 0.06230197101831436, "learning_rate": 3.743476009999459e-05, "loss": -0.1746, "step": 36110 }, { "epoch": 21.024447031431897, "grad_norm": 0.14019158482551575, "learning_rate": 3.7408088565405245e-05, "loss": -0.1796, "step": 36120 }, { "epoch": 21.030267753201397, "grad_norm": 0.12092211097478867, "learning_rate": 3.738142085590612e-05, "loss": -0.1767, "step": 36130 }, { "epoch": 21.036088474970896, "grad_norm": 0.18112246692180634, "learning_rate": 3.7354756979598194e-05, "loss": -0.1784, "step": 36140 }, { "epoch": 21.041909196740395, "grad_norm": 0.1295139044523239, "learning_rate": 3.7328096944581187e-05, "loss": -0.1794, "step": 36150 }, { "epoch": 21.047729918509894, "grad_norm": 0.162873774766922, "learning_rate": 3.730144075895377e-05, "loss": -0.1778, "step": 36160 }, { "epoch": 21.053550640279393, "grad_norm": 0.12233775854110718, "learning_rate": 3.727478843081335e-05, "loss": -0.1799, "step": 36170 }, { "epoch": 21.059371362048893, "grad_norm": 0.2113703042268753, "learning_rate": 3.72481399682562e-05, "loss": -0.1798, "step": 36180 }, { "epoch": 21.065192083818392, "grad_norm": 0.15658588707447052, "learning_rate": 3.722149537937747e-05, "loss": -0.1767, "step": 36190 }, { "epoch": 21.07101280558789, "grad_norm": 0.17121796309947968, "learning_rate": 3.7194854672271015e-05, "loss": -0.1812, "step": 36200 }, { "epoch": 21.076833527357394, "grad_norm": 0.11793479323387146, "learning_rate": 3.7168217855029644e-05, "loss": -0.1786, "step": 36210 }, { "epoch": 21.082654249126893, "grad_norm": 0.12322156876325607, "learning_rate": 3.7141584935744856e-05, "loss": -0.1774, "step": 36220 }, { "epoch": 21.088474970896392, "grad_norm": 0.15953992307186127, "learning_rate": 3.7114955922507055e-05, "loss": -0.1805, "step": 36230 }, { "epoch": 21.09429569266589, "grad_norm": 0.10896117240190506, "learning_rate": 3.708833082340545e-05, "loss": -0.1764, "step": 36240 }, { "epoch": 21.10011641443539, "grad_norm": 0.14711833000183105, "learning_rate": 3.7061709646528034e-05, "loss": -0.1815, "step": 36250 }, { "epoch": 21.10593713620489, "grad_norm": 0.13829943537712097, "learning_rate": 3.7035092399961604e-05, "loss": -0.1793, "step": 36260 }, { "epoch": 21.11175785797439, "grad_norm": 0.1370743066072464, "learning_rate": 3.700847909179177e-05, "loss": -0.1788, "step": 36270 }, { "epoch": 21.11757857974389, "grad_norm": 0.1508667916059494, "learning_rate": 3.698186973010297e-05, "loss": -0.1726, "step": 36280 }, { "epoch": 21.123399301513388, "grad_norm": 0.09617247432470322, "learning_rate": 3.695526432297844e-05, "loss": -0.1797, "step": 36290 }, { "epoch": 21.129220023282887, "grad_norm": 0.23788411915302277, "learning_rate": 3.692866287850017e-05, "loss": -0.1802, "step": 36300 }, { "epoch": 21.135040745052386, "grad_norm": 0.13201971352100372, "learning_rate": 3.6902065404749006e-05, "loss": -0.1794, "step": 36310 }, { "epoch": 21.140861466821885, "grad_norm": 0.11541817337274551, "learning_rate": 3.6875471909804516e-05, "loss": -0.1764, "step": 36320 }, { "epoch": 21.146682188591384, "grad_norm": 0.09528110176324844, "learning_rate": 3.6848882401745135e-05, "loss": -0.1827, "step": 36330 }, { "epoch": 21.152502910360884, "grad_norm": 0.12666986882686615, "learning_rate": 3.682229688864806e-05, "loss": -0.1784, "step": 36340 }, { "epoch": 21.158323632130383, "grad_norm": 0.09888942539691925, "learning_rate": 3.6795715378589235e-05, "loss": -0.1817, "step": 36350 }, { "epoch": 21.164144353899882, "grad_norm": 0.13995547592639923, "learning_rate": 3.676913787964345e-05, "loss": -0.1761, "step": 36360 }, { "epoch": 21.16996507566938, "grad_norm": 0.15816910564899445, "learning_rate": 3.674256439988423e-05, "loss": -0.1772, "step": 36370 }, { "epoch": 21.175785797438884, "grad_norm": 0.24077627062797546, "learning_rate": 3.6715994947383904e-05, "loss": -0.1777, "step": 36380 }, { "epoch": 21.181606519208383, "grad_norm": 0.10550013929605484, "learning_rate": 3.668942953021357e-05, "loss": -0.1817, "step": 36390 }, { "epoch": 21.187427240977883, "grad_norm": 0.10577505826950073, "learning_rate": 3.66628681564431e-05, "loss": -0.1806, "step": 36400 }, { "epoch": 21.19324796274738, "grad_norm": 0.2264135777950287, "learning_rate": 3.663631083414114e-05, "loss": -0.1818, "step": 36410 }, { "epoch": 21.19906868451688, "grad_norm": 0.17725402116775513, "learning_rate": 3.660975757137509e-05, "loss": -0.1798, "step": 36420 }, { "epoch": 21.20488940628638, "grad_norm": 0.1643381267786026, "learning_rate": 3.658320837621114e-05, "loss": -0.1795, "step": 36430 }, { "epoch": 21.21071012805588, "grad_norm": 0.18883731961250305, "learning_rate": 3.655666325671426e-05, "loss": -0.178, "step": 36440 }, { "epoch": 21.21653084982538, "grad_norm": 0.13211101293563843, "learning_rate": 3.65301222209481e-05, "loss": -0.1795, "step": 36450 }, { "epoch": 21.222351571594878, "grad_norm": 0.15636563301086426, "learning_rate": 3.650358527697519e-05, "loss": -0.1746, "step": 36460 }, { "epoch": 21.228172293364377, "grad_norm": 0.16690444946289062, "learning_rate": 3.64770524328567e-05, "loss": -0.1787, "step": 36470 }, { "epoch": 21.233993015133876, "grad_norm": 0.16964378952980042, "learning_rate": 3.645052369665265e-05, "loss": -0.1789, "step": 36480 }, { "epoch": 21.239813736903375, "grad_norm": 0.11196733266115189, "learning_rate": 3.6423999076421724e-05, "loss": -0.1779, "step": 36490 }, { "epoch": 21.245634458672875, "grad_norm": 0.1329805999994278, "learning_rate": 3.639747858022142e-05, "loss": -0.1777, "step": 36500 }, { "epoch": 21.251455180442374, "grad_norm": 0.1505051851272583, "learning_rate": 3.637096221610799e-05, "loss": -0.179, "step": 36510 }, { "epoch": 21.257275902211873, "grad_norm": 0.1106114387512207, "learning_rate": 3.634444999213638e-05, "loss": -0.1819, "step": 36520 }, { "epoch": 21.263096623981372, "grad_norm": 0.25660863518714905, "learning_rate": 3.6317941916360296e-05, "loss": -0.1773, "step": 36530 }, { "epoch": 21.26891734575087, "grad_norm": 0.1648140400648117, "learning_rate": 3.629143799683221e-05, "loss": -0.1805, "step": 36540 }, { "epoch": 21.274738067520374, "grad_norm": 0.13443708419799805, "learning_rate": 3.626493824160331e-05, "loss": -0.1795, "step": 36550 }, { "epoch": 21.280558789289874, "grad_norm": 0.23252740502357483, "learning_rate": 3.623844265872352e-05, "loss": -0.1825, "step": 36560 }, { "epoch": 21.286379511059373, "grad_norm": 0.20706817507743835, "learning_rate": 3.621195125624149e-05, "loss": -0.1789, "step": 36570 }, { "epoch": 21.292200232828872, "grad_norm": 0.22824019193649292, "learning_rate": 3.618546404220463e-05, "loss": -0.1771, "step": 36580 }, { "epoch": 21.29802095459837, "grad_norm": 0.14433249831199646, "learning_rate": 3.615898102465903e-05, "loss": -0.1773, "step": 36590 }, { "epoch": 21.30384167636787, "grad_norm": 0.1713522970676422, "learning_rate": 3.6132502211649544e-05, "loss": -0.1779, "step": 36600 }, { "epoch": 21.30966239813737, "grad_norm": 0.12255793809890747, "learning_rate": 3.610602761121975e-05, "loss": -0.1796, "step": 36610 }, { "epoch": 21.31548311990687, "grad_norm": 0.07923822104930878, "learning_rate": 3.6079557231411897e-05, "loss": -0.1775, "step": 36620 }, { "epoch": 21.321303841676368, "grad_norm": 0.11102455109357834, "learning_rate": 3.6053091080267035e-05, "loss": -0.1816, "step": 36630 }, { "epoch": 21.327124563445867, "grad_norm": 0.23419278860092163, "learning_rate": 3.602662916582483e-05, "loss": -0.1808, "step": 36640 }, { "epoch": 21.332945285215366, "grad_norm": 0.19105064868927002, "learning_rate": 3.600017149612375e-05, "loss": -0.1786, "step": 36650 }, { "epoch": 21.338766006984866, "grad_norm": 0.20322081446647644, "learning_rate": 3.5973718079200935e-05, "loss": -0.1781, "step": 36660 }, { "epoch": 21.344586728754365, "grad_norm": 0.15922842919826508, "learning_rate": 3.5947268923092216e-05, "loss": -0.179, "step": 36670 }, { "epoch": 21.350407450523864, "grad_norm": 0.13650193810462952, "learning_rate": 3.592082403583216e-05, "loss": -0.1788, "step": 36680 }, { "epoch": 21.356228172293363, "grad_norm": 0.10030801594257355, "learning_rate": 3.5894383425454004e-05, "loss": -0.18, "step": 36690 }, { "epoch": 21.362048894062863, "grad_norm": 0.10376578569412231, "learning_rate": 3.586794709998975e-05, "loss": -0.1828, "step": 36700 }, { "epoch": 21.36786961583236, "grad_norm": 0.1237216666340828, "learning_rate": 3.584151506747002e-05, "loss": -0.1802, "step": 36710 }, { "epoch": 21.37369033760186, "grad_norm": 0.19985540211200714, "learning_rate": 3.581508733592418e-05, "loss": -0.1781, "step": 36720 }, { "epoch": 21.379511059371364, "grad_norm": 0.12786096334457397, "learning_rate": 3.5788663913380297e-05, "loss": -0.1786, "step": 36730 }, { "epoch": 21.385331781140863, "grad_norm": 0.1750558465719223, "learning_rate": 3.576224480786506e-05, "loss": -0.1734, "step": 36740 }, { "epoch": 21.391152502910362, "grad_norm": 0.10112489014863968, "learning_rate": 3.573583002740393e-05, "loss": -0.1791, "step": 36750 }, { "epoch": 21.39697322467986, "grad_norm": 0.23473481833934784, "learning_rate": 3.570941958002103e-05, "loss": -0.1756, "step": 36760 }, { "epoch": 21.40279394644936, "grad_norm": 0.16491810977458954, "learning_rate": 3.568301347373912e-05, "loss": -0.18, "step": 36770 }, { "epoch": 21.40861466821886, "grad_norm": 0.13397198915481567, "learning_rate": 3.5656611716579726e-05, "loss": -0.1795, "step": 36780 }, { "epoch": 21.41443538998836, "grad_norm": 0.10360664874315262, "learning_rate": 3.5630214316562946e-05, "loss": -0.1703, "step": 36790 }, { "epoch": 21.42025611175786, "grad_norm": 0.08460299670696259, "learning_rate": 3.560382128170766e-05, "loss": -0.181, "step": 36800 }, { "epoch": 21.426076833527357, "grad_norm": 0.11267755925655365, "learning_rate": 3.5577432620031374e-05, "loss": -0.1746, "step": 36810 }, { "epoch": 21.431897555296857, "grad_norm": 0.1753121018409729, "learning_rate": 3.5551048339550216e-05, "loss": -0.1786, "step": 36820 }, { "epoch": 21.437718277066356, "grad_norm": 0.11108968406915665, "learning_rate": 3.55246684482791e-05, "loss": -0.1776, "step": 36830 }, { "epoch": 21.443538998835855, "grad_norm": 0.1660737842321396, "learning_rate": 3.5498292954231496e-05, "loss": -0.1788, "step": 36840 }, { "epoch": 21.449359720605354, "grad_norm": 0.18619322776794434, "learning_rate": 3.54719218654196e-05, "loss": -0.1791, "step": 36850 }, { "epoch": 21.455180442374854, "grad_norm": 0.15424473583698273, "learning_rate": 3.544555518985425e-05, "loss": -0.1796, "step": 36860 }, { "epoch": 21.461001164144353, "grad_norm": 0.13130086660385132, "learning_rate": 3.541919293554494e-05, "loss": -0.1799, "step": 36870 }, { "epoch": 21.466821885913852, "grad_norm": 0.18191984295845032, "learning_rate": 3.539283511049985e-05, "loss": -0.178, "step": 36880 }, { "epoch": 21.47264260768335, "grad_norm": 0.1961999237537384, "learning_rate": 3.5366481722725755e-05, "loss": -0.1792, "step": 36890 }, { "epoch": 21.47846332945285, "grad_norm": 0.1629655957221985, "learning_rate": 3.534013278022816e-05, "loss": -0.1801, "step": 36900 }, { "epoch": 21.484284051222353, "grad_norm": 0.2408684939146042, "learning_rate": 3.531378829101113e-05, "loss": -0.1763, "step": 36910 }, { "epoch": 21.490104772991852, "grad_norm": 0.16159069538116455, "learning_rate": 3.528744826307746e-05, "loss": -0.1796, "step": 36920 }, { "epoch": 21.49592549476135, "grad_norm": 0.19337843358516693, "learning_rate": 3.5261112704428554e-05, "loss": -0.1788, "step": 36930 }, { "epoch": 21.50174621653085, "grad_norm": 0.17488062381744385, "learning_rate": 3.523478162306443e-05, "loss": -0.1813, "step": 36940 }, { "epoch": 21.50756693830035, "grad_norm": 0.1749117076396942, "learning_rate": 3.520845502698381e-05, "loss": -0.1809, "step": 36950 }, { "epoch": 21.51338766006985, "grad_norm": 0.17407365143299103, "learning_rate": 3.5182132924184005e-05, "loss": -0.1794, "step": 36960 }, { "epoch": 21.51920838183935, "grad_norm": 0.13115379214286804, "learning_rate": 3.5155815322660966e-05, "loss": -0.1768, "step": 36970 }, { "epoch": 21.525029103608848, "grad_norm": 0.167379692196846, "learning_rate": 3.512950223040931e-05, "loss": -0.179, "step": 36980 }, { "epoch": 21.530849825378347, "grad_norm": 0.19439266622066498, "learning_rate": 3.5103193655422216e-05, "loss": -0.1793, "step": 36990 }, { "epoch": 21.536670547147846, "grad_norm": 0.11990475654602051, "learning_rate": 3.5076889605691596e-05, "loss": -0.179, "step": 37000 }, { "epoch": 21.542491268917345, "grad_norm": 0.15328866243362427, "learning_rate": 3.505059008920787e-05, "loss": -0.177, "step": 37010 }, { "epoch": 21.548311990686845, "grad_norm": 0.2254042774438858, "learning_rate": 3.502429511396016e-05, "loss": -0.1748, "step": 37020 }, { "epoch": 21.554132712456344, "grad_norm": 0.15437239408493042, "learning_rate": 3.4998004687936196e-05, "loss": -0.1781, "step": 37030 }, { "epoch": 21.559953434225843, "grad_norm": 0.1656959354877472, "learning_rate": 3.497171881912229e-05, "loss": -0.1817, "step": 37040 }, { "epoch": 21.565774155995342, "grad_norm": 0.11220508068799973, "learning_rate": 3.494543751550342e-05, "loss": -0.1789, "step": 37050 }, { "epoch": 21.57159487776484, "grad_norm": 0.1770806908607483, "learning_rate": 3.491916078506313e-05, "loss": -0.1774, "step": 37060 }, { "epoch": 21.57741559953434, "grad_norm": 0.13214580714702606, "learning_rate": 3.489288863578361e-05, "loss": -0.1741, "step": 37070 }, { "epoch": 21.583236321303843, "grad_norm": 0.20547406375408173, "learning_rate": 3.4866621075645646e-05, "loss": -0.1756, "step": 37080 }, { "epoch": 21.589057043073343, "grad_norm": 0.18852964043617249, "learning_rate": 3.4840358112628614e-05, "loss": -0.1788, "step": 37090 }, { "epoch": 21.594877764842842, "grad_norm": 0.22916871309280396, "learning_rate": 3.481409975471053e-05, "loss": -0.1819, "step": 37100 }, { "epoch": 21.60069848661234, "grad_norm": 0.13586993515491486, "learning_rate": 3.4787846009867986e-05, "loss": -0.1754, "step": 37110 }, { "epoch": 21.60651920838184, "grad_norm": 0.15255185961723328, "learning_rate": 3.476159688607615e-05, "loss": -0.1775, "step": 37120 }, { "epoch": 21.61233993015134, "grad_norm": 0.16311582922935486, "learning_rate": 3.4735352391308854e-05, "loss": -0.1806, "step": 37130 }, { "epoch": 21.61816065192084, "grad_norm": 0.08854464441537857, "learning_rate": 3.4709112533538446e-05, "loss": -0.1773, "step": 37140 }, { "epoch": 21.623981373690338, "grad_norm": 0.15706902742385864, "learning_rate": 3.4682877320735934e-05, "loss": -0.1831, "step": 37150 }, { "epoch": 21.629802095459837, "grad_norm": 0.1162324920296669, "learning_rate": 3.465664676087085e-05, "loss": -0.1759, "step": 37160 }, { "epoch": 21.635622817229336, "grad_norm": 0.15587309002876282, "learning_rate": 3.463042086191136e-05, "loss": -0.1771, "step": 37170 }, { "epoch": 21.641443538998836, "grad_norm": 0.20635652542114258, "learning_rate": 3.460419963182423e-05, "loss": -0.1764, "step": 37180 }, { "epoch": 21.647264260768335, "grad_norm": 0.13057425618171692, "learning_rate": 3.457798307857473e-05, "loss": -0.1779, "step": 37190 }, { "epoch": 21.653084982537834, "grad_norm": 0.25394436717033386, "learning_rate": 3.455177121012678e-05, "loss": -0.1775, "step": 37200 }, { "epoch": 21.658905704307333, "grad_norm": 0.16892549395561218, "learning_rate": 3.452556403444285e-05, "loss": -0.1782, "step": 37210 }, { "epoch": 21.664726426076832, "grad_norm": 0.22835205495357513, "learning_rate": 3.4499361559483975e-05, "loss": -0.1801, "step": 37220 }, { "epoch": 21.67054714784633, "grad_norm": 0.17822898924350739, "learning_rate": 3.44731637932098e-05, "loss": -0.181, "step": 37230 }, { "epoch": 21.67636786961583, "grad_norm": 0.17613042891025543, "learning_rate": 3.44469707435785e-05, "loss": -0.1791, "step": 37240 }, { "epoch": 21.682188591385334, "grad_norm": 0.16126900911331177, "learning_rate": 3.4420782418546835e-05, "loss": -0.1798, "step": 37250 }, { "epoch": 21.688009313154833, "grad_norm": 0.13100145757198334, "learning_rate": 3.439459882607012e-05, "loss": -0.1787, "step": 37260 }, { "epoch": 21.693830034924332, "grad_norm": 0.15490855276584625, "learning_rate": 3.436841997410225e-05, "loss": -0.1817, "step": 37270 }, { "epoch": 21.69965075669383, "grad_norm": 0.09858963638544083, "learning_rate": 3.434224587059567e-05, "loss": -0.1802, "step": 37280 }, { "epoch": 21.70547147846333, "grad_norm": 0.0871671587228775, "learning_rate": 3.431607652350136e-05, "loss": -0.1792, "step": 37290 }, { "epoch": 21.71129220023283, "grad_norm": 0.10456562042236328, "learning_rate": 3.428991194076891e-05, "loss": -0.1806, "step": 37300 }, { "epoch": 21.71711292200233, "grad_norm": 0.1380661576986313, "learning_rate": 3.4263752130346394e-05, "loss": -0.1802, "step": 37310 }, { "epoch": 21.722933643771828, "grad_norm": 0.2195664346218109, "learning_rate": 3.4237597100180515e-05, "loss": -0.1794, "step": 37320 }, { "epoch": 21.728754365541327, "grad_norm": 0.19225165247917175, "learning_rate": 3.4211446858216427e-05, "loss": -0.1794, "step": 37330 }, { "epoch": 21.734575087310827, "grad_norm": 0.21895933151245117, "learning_rate": 3.4185301412397915e-05, "loss": -0.1684, "step": 37340 }, { "epoch": 21.740395809080326, "grad_norm": 0.07265221327543259, "learning_rate": 3.415916077066729e-05, "loss": -0.176, "step": 37350 }, { "epoch": 21.746216530849825, "grad_norm": 0.16661907732486725, "learning_rate": 3.413302494096535e-05, "loss": -0.1819, "step": 37360 }, { "epoch": 21.752037252619324, "grad_norm": 0.10626280307769775, "learning_rate": 3.410689393123151e-05, "loss": -0.178, "step": 37370 }, { "epoch": 21.757857974388823, "grad_norm": 0.23687709867954254, "learning_rate": 3.408076774940364e-05, "loss": -0.1794, "step": 37380 }, { "epoch": 21.763678696158323, "grad_norm": 0.21129438281059265, "learning_rate": 3.40546464034182e-05, "loss": -0.1791, "step": 37390 }, { "epoch": 21.769499417927822, "grad_norm": 0.08967669308185577, "learning_rate": 3.4028529901210185e-05, "loss": -0.1746, "step": 37400 }, { "epoch": 21.77532013969732, "grad_norm": 0.11244101822376251, "learning_rate": 3.4002418250713086e-05, "loss": -0.1759, "step": 37410 }, { "epoch": 21.78114086146682, "grad_norm": 0.16287213563919067, "learning_rate": 3.3976311459858936e-05, "loss": -0.1809, "step": 37420 }, { "epoch": 21.78696158323632, "grad_norm": 0.2500605881214142, "learning_rate": 3.395020953657826e-05, "loss": -0.1796, "step": 37430 }, { "epoch": 21.792782305005822, "grad_norm": 0.11504446715116501, "learning_rate": 3.3924112488800165e-05, "loss": -0.181, "step": 37440 }, { "epoch": 21.79860302677532, "grad_norm": 0.11527135968208313, "learning_rate": 3.389802032445225e-05, "loss": -0.1796, "step": 37450 }, { "epoch": 21.80442374854482, "grad_norm": 0.1548144817352295, "learning_rate": 3.38719330514606e-05, "loss": -0.18, "step": 37460 }, { "epoch": 21.81024447031432, "grad_norm": 0.13529227674007416, "learning_rate": 3.3845850677749866e-05, "loss": -0.1784, "step": 37470 }, { "epoch": 21.81606519208382, "grad_norm": 0.22716794908046722, "learning_rate": 3.3819773211243157e-05, "loss": -0.1806, "step": 37480 }, { "epoch": 21.82188591385332, "grad_norm": 0.12086214870214462, "learning_rate": 3.379370065986213e-05, "loss": -0.1818, "step": 37490 }, { "epoch": 21.827706635622818, "grad_norm": 0.16541357338428497, "learning_rate": 3.3767633031526955e-05, "loss": -0.181, "step": 37500 }, { "epoch": 21.833527357392317, "grad_norm": 0.13575740158557892, "learning_rate": 3.374157033415626e-05, "loss": -0.1821, "step": 37510 }, { "epoch": 21.839348079161816, "grad_norm": 0.16395078599452972, "learning_rate": 3.371551257566723e-05, "loss": -0.1801, "step": 37520 }, { "epoch": 21.845168800931315, "grad_norm": 0.13311073184013367, "learning_rate": 3.36894597639755e-05, "loss": -0.1784, "step": 37530 }, { "epoch": 21.850989522700814, "grad_norm": 0.15202347934246063, "learning_rate": 3.366341190699523e-05, "loss": -0.1791, "step": 37540 }, { "epoch": 21.856810244470314, "grad_norm": 0.25877460837364197, "learning_rate": 3.36373690126391e-05, "loss": -0.1809, "step": 37550 }, { "epoch": 21.862630966239813, "grad_norm": 0.22811247408390045, "learning_rate": 3.3611331088818234e-05, "loss": -0.1767, "step": 37560 }, { "epoch": 21.868451688009312, "grad_norm": 0.09302698075771332, "learning_rate": 3.3585298143442265e-05, "loss": -0.1779, "step": 37570 }, { "epoch": 21.87427240977881, "grad_norm": 0.12340329587459564, "learning_rate": 3.35592701844193e-05, "loss": -0.1783, "step": 37580 }, { "epoch": 21.88009313154831, "grad_norm": 0.13530133664608002, "learning_rate": 3.353324721965596e-05, "loss": -0.1781, "step": 37590 }, { "epoch": 21.88591385331781, "grad_norm": 0.20114706456661224, "learning_rate": 3.350722925705736e-05, "loss": -0.1808, "step": 37600 }, { "epoch": 21.891734575087312, "grad_norm": 0.15363608300685883, "learning_rate": 3.348121630452703e-05, "loss": -0.1762, "step": 37610 }, { "epoch": 21.89755529685681, "grad_norm": 0.15107819437980652, "learning_rate": 3.3455208369967044e-05, "loss": -0.1793, "step": 37620 }, { "epoch": 21.90337601862631, "grad_norm": 0.17955343425273895, "learning_rate": 3.34292054612779e-05, "loss": -0.1801, "step": 37630 }, { "epoch": 21.90919674039581, "grad_norm": 0.23471181094646454, "learning_rate": 3.340320758635861e-05, "loss": -0.1772, "step": 37640 }, { "epoch": 21.91501746216531, "grad_norm": 0.19985169172286987, "learning_rate": 3.337721475310666e-05, "loss": -0.1797, "step": 37650 }, { "epoch": 21.92083818393481, "grad_norm": 0.14043031632900238, "learning_rate": 3.335122696941795e-05, "loss": -0.1798, "step": 37660 }, { "epoch": 21.926658905704308, "grad_norm": 0.24096140265464783, "learning_rate": 3.332524424318692e-05, "loss": -0.1802, "step": 37670 }, { "epoch": 21.932479627473807, "grad_norm": 0.09852484613656998, "learning_rate": 3.32992665823064e-05, "loss": -0.1813, "step": 37680 }, { "epoch": 21.938300349243306, "grad_norm": 0.1146775335073471, "learning_rate": 3.327329399466774e-05, "loss": -0.1776, "step": 37690 }, { "epoch": 21.944121071012805, "grad_norm": 0.17027179896831512, "learning_rate": 3.324732648816072e-05, "loss": -0.1804, "step": 37700 }, { "epoch": 21.949941792782305, "grad_norm": 0.2543598711490631, "learning_rate": 3.322136407067358e-05, "loss": -0.1815, "step": 37710 }, { "epoch": 21.955762514551804, "grad_norm": 0.180500328540802, "learning_rate": 3.3195406750093036e-05, "loss": -0.1799, "step": 37720 }, { "epoch": 21.961583236321303, "grad_norm": 0.16598989069461823, "learning_rate": 3.3169454534304205e-05, "loss": -0.1803, "step": 37730 }, { "epoch": 21.967403958090802, "grad_norm": 0.1565598100423813, "learning_rate": 3.3143507431190725e-05, "loss": -0.178, "step": 37740 }, { "epoch": 21.9732246798603, "grad_norm": 0.1716998815536499, "learning_rate": 3.311756544863459e-05, "loss": -0.1809, "step": 37750 }, { "epoch": 21.9790454016298, "grad_norm": 0.15196624398231506, "learning_rate": 3.309162859451633e-05, "loss": -0.1761, "step": 37760 }, { "epoch": 21.9848661233993, "grad_norm": 0.18691684305667877, "learning_rate": 3.306569687671487e-05, "loss": -0.181, "step": 37770 }, { "epoch": 21.990686845168803, "grad_norm": 0.16904380917549133, "learning_rate": 3.303977030310756e-05, "loss": -0.1807, "step": 37780 }, { "epoch": 21.996507566938302, "grad_norm": 0.19601093232631683, "learning_rate": 3.3013848881570245e-05, "loss": -0.1807, "step": 37790 }, { "epoch": 22.0023282887078, "grad_norm": 0.13368159532546997, "learning_rate": 3.298793261997712e-05, "loss": -0.1749, "step": 37800 }, { "epoch": 22.0081490104773, "grad_norm": 0.13462597131729126, "learning_rate": 3.2962021526200893e-05, "loss": -0.1799, "step": 37810 }, { "epoch": 22.0139697322468, "grad_norm": 0.16006126999855042, "learning_rate": 3.293611560811268e-05, "loss": -0.179, "step": 37820 }, { "epoch": 22.0197904540163, "grad_norm": 0.17019964754581451, "learning_rate": 3.291021487358199e-05, "loss": -0.1766, "step": 37830 }, { "epoch": 22.025611175785798, "grad_norm": 0.17579732835292816, "learning_rate": 3.28843193304768e-05, "loss": -0.1757, "step": 37840 }, { "epoch": 22.031431897555297, "grad_norm": 0.1996077597141266, "learning_rate": 3.2858428986663456e-05, "loss": -0.1774, "step": 37850 }, { "epoch": 22.037252619324796, "grad_norm": 0.13154888153076172, "learning_rate": 3.283254385000681e-05, "loss": -0.1813, "step": 37860 }, { "epoch": 22.043073341094296, "grad_norm": 0.13777302205562592, "learning_rate": 3.2806663928370076e-05, "loss": -0.1808, "step": 37870 }, { "epoch": 22.048894062863795, "grad_norm": 0.16244366765022278, "learning_rate": 3.278078922961485e-05, "loss": -0.1804, "step": 37880 }, { "epoch": 22.054714784633294, "grad_norm": 0.14704522490501404, "learning_rate": 3.275491976160123e-05, "loss": -0.181, "step": 37890 }, { "epoch": 22.060535506402793, "grad_norm": 0.10539941489696503, "learning_rate": 3.2729055532187645e-05, "loss": -0.1815, "step": 37900 }, { "epoch": 22.066356228172292, "grad_norm": 0.25761300325393677, "learning_rate": 3.270319654923097e-05, "loss": -0.1777, "step": 37910 }, { "epoch": 22.07217694994179, "grad_norm": 0.15337412059307098, "learning_rate": 3.2677342820586506e-05, "loss": -0.1797, "step": 37920 }, { "epoch": 22.07799767171129, "grad_norm": 0.2627057731151581, "learning_rate": 3.2651494354107905e-05, "loss": -0.1787, "step": 37930 }, { "epoch": 22.08381839348079, "grad_norm": 0.1344236582517624, "learning_rate": 3.2625651157647266e-05, "loss": -0.1813, "step": 37940 }, { "epoch": 22.08963911525029, "grad_norm": 0.0779733657836914, "learning_rate": 3.259981323905505e-05, "loss": -0.1792, "step": 37950 }, { "epoch": 22.095459837019792, "grad_norm": 0.11382563412189484, "learning_rate": 3.257398060618014e-05, "loss": -0.1806, "step": 37960 }, { "epoch": 22.10128055878929, "grad_norm": 0.16119341552257538, "learning_rate": 3.254815326686983e-05, "loss": -0.1824, "step": 37970 }, { "epoch": 22.10710128055879, "grad_norm": 0.1180141493678093, "learning_rate": 3.2522331228969774e-05, "loss": -0.1828, "step": 37980 }, { "epoch": 22.11292200232829, "grad_norm": 0.19183757901191711, "learning_rate": 3.2496514500324006e-05, "loss": -0.18, "step": 37990 }, { "epoch": 22.11874272409779, "grad_norm": 0.06817994266748428, "learning_rate": 3.247070308877498e-05, "loss": -0.1822, "step": 38000 }, { "epoch": 22.124563445867288, "grad_norm": 0.21291962265968323, "learning_rate": 3.2444897002163515e-05, "loss": -0.1787, "step": 38010 }, { "epoch": 22.130384167636787, "grad_norm": 0.15501520037651062, "learning_rate": 3.241909624832885e-05, "loss": -0.1811, "step": 38020 }, { "epoch": 22.136204889406287, "grad_norm": 0.10048498958349228, "learning_rate": 3.239330083510852e-05, "loss": -0.1777, "step": 38030 }, { "epoch": 22.142025611175786, "grad_norm": 0.10289847105741501, "learning_rate": 3.236751077033855e-05, "loss": -0.1806, "step": 38040 }, { "epoch": 22.147846332945285, "grad_norm": 0.19533102214336395, "learning_rate": 3.234172606185322e-05, "loss": -0.1782, "step": 38050 }, { "epoch": 22.153667054714784, "grad_norm": 0.2021837681531906, "learning_rate": 3.231594671748528e-05, "loss": -0.1776, "step": 38060 }, { "epoch": 22.159487776484283, "grad_norm": 0.12362988293170929, "learning_rate": 3.2290172745065815e-05, "loss": -0.182, "step": 38070 }, { "epoch": 22.165308498253783, "grad_norm": 0.13565827906131744, "learning_rate": 3.226440415242426e-05, "loss": -0.1798, "step": 38080 }, { "epoch": 22.171129220023282, "grad_norm": 0.1461605578660965, "learning_rate": 3.223864094738846e-05, "loss": -0.1803, "step": 38090 }, { "epoch": 22.17694994179278, "grad_norm": 0.13735798001289368, "learning_rate": 3.221288313778456e-05, "loss": -0.1821, "step": 38100 }, { "epoch": 22.18277066356228, "grad_norm": 0.09508714824914932, "learning_rate": 3.2187130731437125e-05, "loss": -0.1817, "step": 38110 }, { "epoch": 22.18859138533178, "grad_norm": 0.1024748757481575, "learning_rate": 3.216138373616905e-05, "loss": -0.1797, "step": 38120 }, { "epoch": 22.194412107101282, "grad_norm": 0.11018886417150497, "learning_rate": 3.21356421598016e-05, "loss": -0.1794, "step": 38130 }, { "epoch": 22.20023282887078, "grad_norm": 0.182918518781662, "learning_rate": 3.210990601015438e-05, "loss": -0.1781, "step": 38140 }, { "epoch": 22.20605355064028, "grad_norm": 0.1822367161512375, "learning_rate": 3.208417529504535e-05, "loss": -0.1794, "step": 38150 }, { "epoch": 22.21187427240978, "grad_norm": 0.22851255536079407, "learning_rate": 3.205845002229084e-05, "loss": -0.1777, "step": 38160 }, { "epoch": 22.21769499417928, "grad_norm": 0.20001022517681122, "learning_rate": 3.203273019970547e-05, "loss": -0.1819, "step": 38170 }, { "epoch": 22.22351571594878, "grad_norm": 0.19799824059009552, "learning_rate": 3.200701583510227e-05, "loss": -0.1786, "step": 38180 }, { "epoch": 22.229336437718278, "grad_norm": 0.12155726552009583, "learning_rate": 3.198130693629261e-05, "loss": -0.1792, "step": 38190 }, { "epoch": 22.235157159487777, "grad_norm": 0.16350144147872925, "learning_rate": 3.195560351108612e-05, "loss": -0.1786, "step": 38200 }, { "epoch": 22.240977881257276, "grad_norm": 0.14592468738555908, "learning_rate": 3.1929905567290865e-05, "loss": -0.1819, "step": 38210 }, { "epoch": 22.246798603026775, "grad_norm": 0.1331716775894165, "learning_rate": 3.1904213112713164e-05, "loss": -0.1809, "step": 38220 }, { "epoch": 22.252619324796274, "grad_norm": 0.16018304228782654, "learning_rate": 3.187852615515774e-05, "loss": -0.1783, "step": 38230 }, { "epoch": 22.258440046565774, "grad_norm": 0.15787656605243683, "learning_rate": 3.1852844702427606e-05, "loss": -0.1827, "step": 38240 }, { "epoch": 22.264260768335273, "grad_norm": 0.17042887210845947, "learning_rate": 3.18271687623241e-05, "loss": -0.1795, "step": 38250 }, { "epoch": 22.270081490104772, "grad_norm": 0.18658772110939026, "learning_rate": 3.1801498342646896e-05, "loss": -0.1777, "step": 38260 }, { "epoch": 22.27590221187427, "grad_norm": 0.2291410267353058, "learning_rate": 3.177583345119398e-05, "loss": -0.1781, "step": 38270 }, { "epoch": 22.28172293364377, "grad_norm": 0.225569486618042, "learning_rate": 3.17501740957617e-05, "loss": -0.1809, "step": 38280 }, { "epoch": 22.28754365541327, "grad_norm": 0.13873407244682312, "learning_rate": 3.172452028414467e-05, "loss": -0.1798, "step": 38290 }, { "epoch": 22.29336437718277, "grad_norm": 0.22429294884204865, "learning_rate": 3.169887202413583e-05, "loss": -0.1798, "step": 38300 }, { "epoch": 22.29918509895227, "grad_norm": 0.2065238654613495, "learning_rate": 3.167322932352646e-05, "loss": -0.1788, "step": 38310 }, { "epoch": 22.30500582072177, "grad_norm": 0.10009051859378815, "learning_rate": 3.164759219010613e-05, "loss": -0.1796, "step": 38320 }, { "epoch": 22.31082654249127, "grad_norm": 0.16905245184898376, "learning_rate": 3.1621960631662725e-05, "loss": -0.179, "step": 38330 }, { "epoch": 22.31664726426077, "grad_norm": 0.18333940207958221, "learning_rate": 3.159633465598245e-05, "loss": -0.1776, "step": 38340 }, { "epoch": 22.32246798603027, "grad_norm": 0.20980191230773926, "learning_rate": 3.1570714270849767e-05, "loss": -0.1802, "step": 38350 }, { "epoch": 22.328288707799768, "grad_norm": 0.14611202478408813, "learning_rate": 3.1545099484047516e-05, "loss": -0.18, "step": 38360 }, { "epoch": 22.334109429569267, "grad_norm": 0.13588973879814148, "learning_rate": 3.151949030335674e-05, "loss": -0.1818, "step": 38370 }, { "epoch": 22.339930151338766, "grad_norm": 0.0739699974656105, "learning_rate": 3.149388673655687e-05, "loss": -0.1793, "step": 38380 }, { "epoch": 22.345750873108265, "grad_norm": 0.18938444554805756, "learning_rate": 3.146828879142559e-05, "loss": -0.1783, "step": 38390 }, { "epoch": 22.351571594877765, "grad_norm": 0.15330708026885986, "learning_rate": 3.1442696475738866e-05, "loss": -0.1811, "step": 38400 }, { "epoch": 22.357392316647264, "grad_norm": 0.1545536071062088, "learning_rate": 3.141710979727098e-05, "loss": -0.182, "step": 38410 }, { "epoch": 22.363213038416763, "grad_norm": 0.11230305582284927, "learning_rate": 3.139152876379447e-05, "loss": -0.1812, "step": 38420 }, { "epoch": 22.369033760186262, "grad_norm": 0.18837594985961914, "learning_rate": 3.1365953383080214e-05, "loss": -0.1803, "step": 38430 }, { "epoch": 22.37485448195576, "grad_norm": 0.24184109270572662, "learning_rate": 3.134038366289731e-05, "loss": -0.1811, "step": 38440 }, { "epoch": 22.38067520372526, "grad_norm": 0.14844398200511932, "learning_rate": 3.131481961101317e-05, "loss": -0.1773, "step": 38450 }, { "epoch": 22.38649592549476, "grad_norm": 0.13017280399799347, "learning_rate": 3.128926123519349e-05, "loss": -0.179, "step": 38460 }, { "epoch": 22.39231664726426, "grad_norm": 0.15497133135795593, "learning_rate": 3.1263708543202194e-05, "loss": -0.1826, "step": 38470 }, { "epoch": 22.398137369033762, "grad_norm": 0.21946725249290466, "learning_rate": 3.123816154280155e-05, "loss": -0.1769, "step": 38480 }, { "epoch": 22.40395809080326, "grad_norm": 0.1598946899175644, "learning_rate": 3.121262024175207e-05, "loss": -0.1793, "step": 38490 }, { "epoch": 22.40977881257276, "grad_norm": 0.23427702486515045, "learning_rate": 3.118708464781248e-05, "loss": -0.18, "step": 38500 }, { "epoch": 22.41559953434226, "grad_norm": 0.11762836575508118, "learning_rate": 3.116155476873987e-05, "loss": -0.1797, "step": 38510 }, { "epoch": 22.42142025611176, "grad_norm": 0.11588333547115326, "learning_rate": 3.11360306122895e-05, "loss": -0.1805, "step": 38520 }, { "epoch": 22.427240977881258, "grad_norm": 0.1951257735490799, "learning_rate": 3.1110512186214975e-05, "loss": -0.1791, "step": 38530 }, { "epoch": 22.433061699650757, "grad_norm": 0.1724172681570053, "learning_rate": 3.1084999498268095e-05, "loss": -0.1809, "step": 38540 }, { "epoch": 22.438882421420256, "grad_norm": 0.17598742246627808, "learning_rate": 3.1059492556198934e-05, "loss": -0.1779, "step": 38550 }, { "epoch": 22.444703143189756, "grad_norm": 0.17949579656124115, "learning_rate": 3.103399136775586e-05, "loss": -0.1771, "step": 38560 }, { "epoch": 22.450523864959255, "grad_norm": 0.13296866416931152, "learning_rate": 3.100849594068541e-05, "loss": -0.1787, "step": 38570 }, { "epoch": 22.456344586728754, "grad_norm": 0.15537412464618683, "learning_rate": 3.0983006282732484e-05, "loss": -0.1795, "step": 38580 }, { "epoch": 22.462165308498253, "grad_norm": 0.1507127285003662, "learning_rate": 3.0957522401640116e-05, "loss": -0.1804, "step": 38590 }, { "epoch": 22.467986030267753, "grad_norm": 0.12384460121393204, "learning_rate": 3.0932044305149645e-05, "loss": -0.1814, "step": 38600 }, { "epoch": 22.47380675203725, "grad_norm": 0.14474377036094666, "learning_rate": 3.090657200100068e-05, "loss": -0.1805, "step": 38610 }, { "epoch": 22.47962747380675, "grad_norm": 0.16128601133823395, "learning_rate": 3.088110549693099e-05, "loss": -0.18, "step": 38620 }, { "epoch": 22.48544819557625, "grad_norm": 0.13709662854671478, "learning_rate": 3.085564480067667e-05, "loss": -0.181, "step": 38630 }, { "epoch": 22.49126891734575, "grad_norm": 0.15228396654129028, "learning_rate": 3.0830189919971955e-05, "loss": -0.1811, "step": 38640 }, { "epoch": 22.49708963911525, "grad_norm": 0.20182853937149048, "learning_rate": 3.080474086254939e-05, "loss": -0.1796, "step": 38650 }, { "epoch": 22.50291036088475, "grad_norm": 0.1398334503173828, "learning_rate": 3.077929763613975e-05, "loss": -0.1812, "step": 38660 }, { "epoch": 22.50873108265425, "grad_norm": 0.09157701581716537, "learning_rate": 3.075386024847198e-05, "loss": -0.179, "step": 38670 }, { "epoch": 22.51455180442375, "grad_norm": 0.10701644420623779, "learning_rate": 3.072842870727331e-05, "loss": -0.1811, "step": 38680 }, { "epoch": 22.52037252619325, "grad_norm": 0.1071184054017067, "learning_rate": 3.070300302026916e-05, "loss": -0.1806, "step": 38690 }, { "epoch": 22.52619324796275, "grad_norm": 0.1778586208820343, "learning_rate": 3.067758319518318e-05, "loss": -0.176, "step": 38700 }, { "epoch": 22.532013969732247, "grad_norm": 0.1590312272310257, "learning_rate": 3.065216923973725e-05, "loss": -0.1812, "step": 38710 }, { "epoch": 22.537834691501747, "grad_norm": 0.12661615014076233, "learning_rate": 3.062676116165145e-05, "loss": -0.1819, "step": 38720 }, { "epoch": 22.543655413271246, "grad_norm": 0.20756492018699646, "learning_rate": 3.06013589686441e-05, "loss": -0.181, "step": 38730 }, { "epoch": 22.549476135040745, "grad_norm": 0.17190060019493103, "learning_rate": 3.05759626684317e-05, "loss": -0.1804, "step": 38740 }, { "epoch": 22.555296856810244, "grad_norm": 0.14288589358329773, "learning_rate": 3.055057226872896e-05, "loss": -0.1797, "step": 38750 }, { "epoch": 22.561117578579744, "grad_norm": 0.1068987101316452, "learning_rate": 3.052518777724887e-05, "loss": -0.1796, "step": 38760 }, { "epoch": 22.566938300349243, "grad_norm": 0.09376437216997147, "learning_rate": 3.04998092017025e-05, "loss": -0.1815, "step": 38770 }, { "epoch": 22.572759022118742, "grad_norm": 0.1461574286222458, "learning_rate": 3.0474436549799246e-05, "loss": -0.177, "step": 38780 }, { "epoch": 22.57857974388824, "grad_norm": 0.18213044106960297, "learning_rate": 3.044906982924661e-05, "loss": -0.1811, "step": 38790 }, { "epoch": 22.58440046565774, "grad_norm": 0.11072446405887604, "learning_rate": 3.0423709047750337e-05, "loss": -0.1821, "step": 38800 }, { "epoch": 22.59022118742724, "grad_norm": 0.17017242312431335, "learning_rate": 3.03983542130144e-05, "loss": -0.1798, "step": 38810 }, { "epoch": 22.59604190919674, "grad_norm": 0.17943094670772552, "learning_rate": 3.0373005332740877e-05, "loss": -0.1778, "step": 38820 }, { "epoch": 22.601862630966238, "grad_norm": 0.19168484210968018, "learning_rate": 3.034766241463013e-05, "loss": -0.1804, "step": 38830 }, { "epoch": 22.60768335273574, "grad_norm": 0.133731871843338, "learning_rate": 3.032232546638064e-05, "loss": -0.1816, "step": 38840 }, { "epoch": 22.61350407450524, "grad_norm": 0.15245629847049713, "learning_rate": 3.0296994495689114e-05, "loss": -0.1809, "step": 38850 }, { "epoch": 22.61932479627474, "grad_norm": 0.11241897940635681, "learning_rate": 3.0271669510250444e-05, "loss": -0.1802, "step": 38860 }, { "epoch": 22.62514551804424, "grad_norm": 0.11212421208620071, "learning_rate": 3.024635051775766e-05, "loss": -0.1813, "step": 38870 }, { "epoch": 22.630966239813738, "grad_norm": 0.17047974467277527, "learning_rate": 3.022103752590205e-05, "loss": -0.1762, "step": 38880 }, { "epoch": 22.636786961583237, "grad_norm": 0.19512362778186798, "learning_rate": 3.0195730542372992e-05, "loss": -0.1785, "step": 38890 }, { "epoch": 22.642607683352736, "grad_norm": 0.15197685360908508, "learning_rate": 3.0170429574858084e-05, "loss": -0.1825, "step": 38900 }, { "epoch": 22.648428405122235, "grad_norm": 0.11604340374469757, "learning_rate": 3.0145134631043127e-05, "loss": -0.1809, "step": 38910 }, { "epoch": 22.654249126891735, "grad_norm": 0.12790796160697937, "learning_rate": 3.0119845718612018e-05, "loss": -0.1819, "step": 38920 }, { "epoch": 22.660069848661234, "grad_norm": 0.2169187068939209, "learning_rate": 3.009456284524688e-05, "loss": -0.1786, "step": 38930 }, { "epoch": 22.665890570430733, "grad_norm": 0.14218248426914215, "learning_rate": 3.0069286018627967e-05, "loss": -0.1799, "step": 38940 }, { "epoch": 22.671711292200232, "grad_norm": 0.18093542754650116, "learning_rate": 3.0044015246433743e-05, "loss": -0.1768, "step": 38950 }, { "epoch": 22.67753201396973, "grad_norm": 0.1541113704442978, "learning_rate": 3.0018750536340755e-05, "loss": -0.1765, "step": 38960 }, { "epoch": 22.68335273573923, "grad_norm": 0.14784225821495056, "learning_rate": 2.999349189602378e-05, "loss": -0.1739, "step": 38970 }, { "epoch": 22.68917345750873, "grad_norm": 0.15810662508010864, "learning_rate": 2.9968239333155733e-05, "loss": -0.1799, "step": 38980 }, { "epoch": 22.69499417927823, "grad_norm": 0.14182773232460022, "learning_rate": 2.994299285540767e-05, "loss": -0.1806, "step": 38990 }, { "epoch": 22.70081490104773, "grad_norm": 0.19134856760501862, "learning_rate": 2.9917752470448813e-05, "loss": -0.18, "step": 39000 }, { "epoch": 22.70663562281723, "grad_norm": 0.12289730459451675, "learning_rate": 2.9892518185946495e-05, "loss": -0.1812, "step": 39010 }, { "epoch": 22.71245634458673, "grad_norm": 0.1355304718017578, "learning_rate": 2.986729000956624e-05, "loss": -0.1788, "step": 39020 }, { "epoch": 22.71827706635623, "grad_norm": 0.12690117955207825, "learning_rate": 2.9842067948971736e-05, "loss": -0.1797, "step": 39030 }, { "epoch": 22.72409778812573, "grad_norm": 0.11268749833106995, "learning_rate": 2.9816852011824727e-05, "loss": -0.1818, "step": 39040 }, { "epoch": 22.729918509895228, "grad_norm": 0.11882347613573074, "learning_rate": 2.979164220578519e-05, "loss": -0.1805, "step": 39050 }, { "epoch": 22.735739231664727, "grad_norm": 0.13525891304016113, "learning_rate": 2.9766438538511165e-05, "loss": -0.1788, "step": 39060 }, { "epoch": 22.741559953434226, "grad_norm": 0.19826075434684753, "learning_rate": 2.9741241017658873e-05, "loss": -0.1806, "step": 39070 }, { "epoch": 22.747380675203726, "grad_norm": 0.203539177775383, "learning_rate": 2.971604965088267e-05, "loss": -0.1806, "step": 39080 }, { "epoch": 22.753201396973225, "grad_norm": 0.1455954909324646, "learning_rate": 2.9690864445835008e-05, "loss": -0.1811, "step": 39090 }, { "epoch": 22.759022118742724, "grad_norm": 0.11437104642391205, "learning_rate": 2.966568541016651e-05, "loss": -0.1735, "step": 39100 }, { "epoch": 22.764842840512223, "grad_norm": 0.2303394377231598, "learning_rate": 2.9640512551525867e-05, "loss": -0.1773, "step": 39110 }, { "epoch": 22.770663562281722, "grad_norm": 0.17556235194206238, "learning_rate": 2.961534587755995e-05, "loss": -0.1797, "step": 39120 }, { "epoch": 22.77648428405122, "grad_norm": 0.15134210884571075, "learning_rate": 2.959018539591375e-05, "loss": -0.1789, "step": 39130 }, { "epoch": 22.78230500582072, "grad_norm": 0.11176595836877823, "learning_rate": 2.9565031114230325e-05, "loss": -0.179, "step": 39140 }, { "epoch": 22.78812572759022, "grad_norm": 0.20301039516925812, "learning_rate": 2.9539883040150895e-05, "loss": -0.179, "step": 39150 }, { "epoch": 22.79394644935972, "grad_norm": 0.10786998271942139, "learning_rate": 2.9514741181314774e-05, "loss": -0.1784, "step": 39160 }, { "epoch": 22.79976717112922, "grad_norm": 0.13359999656677246, "learning_rate": 2.94896055453594e-05, "loss": -0.1826, "step": 39170 }, { "epoch": 22.80558789289872, "grad_norm": 0.15303950011730194, "learning_rate": 2.9464476139920332e-05, "loss": -0.1795, "step": 39180 }, { "epoch": 22.81140861466822, "grad_norm": 0.13708379864692688, "learning_rate": 2.9439352972631186e-05, "loss": -0.1822, "step": 39190 }, { "epoch": 22.81722933643772, "grad_norm": 0.1174401342868805, "learning_rate": 2.9414236051123757e-05, "loss": -0.179, "step": 39200 }, { "epoch": 22.82305005820722, "grad_norm": 0.13986124098300934, "learning_rate": 2.938912538302785e-05, "loss": -0.1819, "step": 39210 }, { "epoch": 22.828870779976718, "grad_norm": 0.1369343250989914, "learning_rate": 2.9364020975971464e-05, "loss": -0.1816, "step": 39220 }, { "epoch": 22.834691501746217, "grad_norm": 0.16162893176078796, "learning_rate": 2.9338922837580657e-05, "loss": -0.1819, "step": 39230 }, { "epoch": 22.840512223515717, "grad_norm": 0.2208833545446396, "learning_rate": 2.931383097547955e-05, "loss": -0.1794, "step": 39240 }, { "epoch": 22.846332945285216, "grad_norm": 0.1514851599931717, "learning_rate": 2.928874539729043e-05, "loss": -0.1789, "step": 39250 }, { "epoch": 22.852153667054715, "grad_norm": 0.1441149264574051, "learning_rate": 2.926366611063358e-05, "loss": -0.1816, "step": 39260 }, { "epoch": 22.857974388824214, "grad_norm": 0.09933782368898392, "learning_rate": 2.9238593123127463e-05, "loss": -0.1805, "step": 39270 }, { "epoch": 22.863795110593713, "grad_norm": 0.14838317036628723, "learning_rate": 2.9213526442388583e-05, "loss": -0.1812, "step": 39280 }, { "epoch": 22.869615832363213, "grad_norm": 0.09368664771318436, "learning_rate": 2.9188466076031545e-05, "loss": -0.1812, "step": 39290 }, { "epoch": 22.875436554132712, "grad_norm": 0.1294187754392624, "learning_rate": 2.9163412031669012e-05, "loss": -0.1815, "step": 39300 }, { "epoch": 22.88125727590221, "grad_norm": 0.0925646424293518, "learning_rate": 2.913836431691175e-05, "loss": -0.1785, "step": 39310 }, { "epoch": 22.88707799767171, "grad_norm": 0.1206774041056633, "learning_rate": 2.9113322939368583e-05, "loss": -0.1787, "step": 39320 }, { "epoch": 22.89289871944121, "grad_norm": 0.1624816507101059, "learning_rate": 2.9088287906646427e-05, "loss": -0.1774, "step": 39330 }, { "epoch": 22.89871944121071, "grad_norm": 0.2248918116092682, "learning_rate": 2.906325922635024e-05, "loss": -0.1788, "step": 39340 }, { "epoch": 22.904540162980208, "grad_norm": 0.16874252259731293, "learning_rate": 2.903823690608313e-05, "loss": -0.1817, "step": 39350 }, { "epoch": 22.91036088474971, "grad_norm": 0.10438760370016098, "learning_rate": 2.9013220953446174e-05, "loss": -0.1806, "step": 39360 }, { "epoch": 22.91618160651921, "grad_norm": 0.12050212919712067, "learning_rate": 2.8988211376038564e-05, "loss": -0.1826, "step": 39370 }, { "epoch": 22.92200232828871, "grad_norm": 0.10785871744155884, "learning_rate": 2.8963208181457564e-05, "loss": -0.1791, "step": 39380 }, { "epoch": 22.92782305005821, "grad_norm": 0.10956567525863647, "learning_rate": 2.8938211377298453e-05, "loss": -0.1739, "step": 39390 }, { "epoch": 22.933643771827708, "grad_norm": 0.23009257018566132, "learning_rate": 2.8913220971154652e-05, "loss": -0.1738, "step": 39400 }, { "epoch": 22.939464493597207, "grad_norm": 0.08457411080598831, "learning_rate": 2.888823697061753e-05, "loss": -0.1818, "step": 39410 }, { "epoch": 22.945285215366706, "grad_norm": 0.10277331620454788, "learning_rate": 2.8863259383276618e-05, "loss": -0.1795, "step": 39420 }, { "epoch": 22.951105937136205, "grad_norm": 0.15960122644901276, "learning_rate": 2.8838288216719395e-05, "loss": -0.1778, "step": 39430 }, { "epoch": 22.956926658905704, "grad_norm": 0.10041821748018265, "learning_rate": 2.8813323478531484e-05, "loss": -0.1815, "step": 39440 }, { "epoch": 22.962747380675204, "grad_norm": 0.24057090282440186, "learning_rate": 2.8788365176296496e-05, "loss": -0.1771, "step": 39450 }, { "epoch": 22.968568102444703, "grad_norm": 0.1273084580898285, "learning_rate": 2.876341331759611e-05, "loss": -0.1832, "step": 39460 }, { "epoch": 22.974388824214202, "grad_norm": 0.06345430761575699, "learning_rate": 2.8738467910010036e-05, "loss": -0.176, "step": 39470 }, { "epoch": 22.9802095459837, "grad_norm": 0.223594531416893, "learning_rate": 2.8713528961116032e-05, "loss": -0.1801, "step": 39480 }, { "epoch": 22.9860302677532, "grad_norm": 0.1809658706188202, "learning_rate": 2.8688596478489875e-05, "loss": -0.1771, "step": 39490 }, { "epoch": 22.9918509895227, "grad_norm": 0.12212841957807541, "learning_rate": 2.8663670469705434e-05, "loss": -0.1813, "step": 39500 }, { "epoch": 22.9976717112922, "grad_norm": 0.14039112627506256, "learning_rate": 2.8638750942334546e-05, "loss": -0.176, "step": 39510 }, { "epoch": 23.003492433061698, "grad_norm": 0.15385763347148895, "learning_rate": 2.8613837903947115e-05, "loss": -0.18, "step": 39520 }, { "epoch": 23.009313154831197, "grad_norm": 0.17372949421405792, "learning_rate": 2.858893136211106e-05, "loss": -0.1774, "step": 39530 }, { "epoch": 23.0151338766007, "grad_norm": 0.22021129727363586, "learning_rate": 2.8564031324392315e-05, "loss": -0.1786, "step": 39540 }, { "epoch": 23.0209545983702, "grad_norm": 0.11618846654891968, "learning_rate": 2.85391377983549e-05, "loss": -0.1798, "step": 39550 }, { "epoch": 23.0267753201397, "grad_norm": 0.2220974862575531, "learning_rate": 2.851425079156075e-05, "loss": -0.1769, "step": 39560 }, { "epoch": 23.032596041909198, "grad_norm": 0.179677814245224, "learning_rate": 2.848937031156994e-05, "loss": -0.1799, "step": 39570 }, { "epoch": 23.038416763678697, "grad_norm": 0.20471453666687012, "learning_rate": 2.846449636594044e-05, "loss": -0.1777, "step": 39580 }, { "epoch": 23.044237485448196, "grad_norm": 0.12168356776237488, "learning_rate": 2.843962896222836e-05, "loss": -0.181, "step": 39590 }, { "epoch": 23.050058207217695, "grad_norm": 0.11090419441461563, "learning_rate": 2.8414768107987722e-05, "loss": -0.1793, "step": 39600 }, { "epoch": 23.055878928987195, "grad_norm": 0.20273452997207642, "learning_rate": 2.838991381077061e-05, "loss": -0.1786, "step": 39610 }, { "epoch": 23.061699650756694, "grad_norm": 0.14834189414978027, "learning_rate": 2.83650660781271e-05, "loss": -0.1812, "step": 39620 }, { "epoch": 23.067520372526193, "grad_norm": 0.09666693210601807, "learning_rate": 2.8340224917605285e-05, "loss": -0.1818, "step": 39630 }, { "epoch": 23.073341094295692, "grad_norm": 0.1763438582420349, "learning_rate": 2.831539033675122e-05, "loss": -0.1791, "step": 39640 }, { "epoch": 23.07916181606519, "grad_norm": 0.13493716716766357, "learning_rate": 2.8290562343109038e-05, "loss": -0.1749, "step": 39650 }, { "epoch": 23.08498253783469, "grad_norm": 0.08156983554363251, "learning_rate": 2.826574094422082e-05, "loss": -0.1812, "step": 39660 }, { "epoch": 23.09080325960419, "grad_norm": 0.09577780216932297, "learning_rate": 2.8240926147626645e-05, "loss": -0.1772, "step": 39670 }, { "epoch": 23.09662398137369, "grad_norm": 0.15598462522029877, "learning_rate": 2.8216117960864586e-05, "loss": -0.1777, "step": 39680 }, { "epoch": 23.10244470314319, "grad_norm": 0.22996821999549866, "learning_rate": 2.8191316391470703e-05, "loss": -0.1786, "step": 39690 }, { "epoch": 23.108265424912688, "grad_norm": 0.08620649576187134, "learning_rate": 2.816652144697911e-05, "loss": -0.1759, "step": 39700 }, { "epoch": 23.11408614668219, "grad_norm": 0.12767940759658813, "learning_rate": 2.8141733134921783e-05, "loss": -0.1779, "step": 39710 }, { "epoch": 23.11990686845169, "grad_norm": 0.21940171718597412, "learning_rate": 2.811695146282884e-05, "loss": -0.1783, "step": 39720 }, { "epoch": 23.12572759022119, "grad_norm": 0.1812172532081604, "learning_rate": 2.8092176438228212e-05, "loss": -0.1793, "step": 39730 }, { "epoch": 23.131548311990688, "grad_norm": 0.17148876190185547, "learning_rate": 2.806740806864598e-05, "loss": -0.1786, "step": 39740 }, { "epoch": 23.137369033760187, "grad_norm": 0.1153540387749672, "learning_rate": 2.804264636160604e-05, "loss": -0.1801, "step": 39750 }, { "epoch": 23.143189755529686, "grad_norm": 0.09155658632516861, "learning_rate": 2.8017891324630402e-05, "loss": -0.1823, "step": 39760 }, { "epoch": 23.149010477299186, "grad_norm": 0.13679462671279907, "learning_rate": 2.7993142965238976e-05, "loss": -0.1788, "step": 39770 }, { "epoch": 23.154831199068685, "grad_norm": 0.1264064759016037, "learning_rate": 2.7968401290949665e-05, "loss": -0.1799, "step": 39780 }, { "epoch": 23.160651920838184, "grad_norm": 0.11782736331224442, "learning_rate": 2.7943666309278328e-05, "loss": -0.1803, "step": 39790 }, { "epoch": 23.166472642607683, "grad_norm": 0.10606862604618073, "learning_rate": 2.7918938027738783e-05, "loss": -0.1793, "step": 39800 }, { "epoch": 23.172293364377182, "grad_norm": 0.12047348916530609, "learning_rate": 2.789421645384287e-05, "loss": -0.1829, "step": 39810 }, { "epoch": 23.17811408614668, "grad_norm": 0.1881258636713028, "learning_rate": 2.786950159510032e-05, "loss": -0.1785, "step": 39820 }, { "epoch": 23.18393480791618, "grad_norm": 0.1430751532316208, "learning_rate": 2.7844793459018876e-05, "loss": -0.1808, "step": 39830 }, { "epoch": 23.18975552968568, "grad_norm": 0.16838155686855316, "learning_rate": 2.7820092053104195e-05, "loss": -0.1812, "step": 39840 }, { "epoch": 23.19557625145518, "grad_norm": 0.08762761950492859, "learning_rate": 2.7795397384859933e-05, "loss": -0.1821, "step": 39850 }, { "epoch": 23.20139697322468, "grad_norm": 0.12195619195699692, "learning_rate": 2.7770709461787638e-05, "loss": -0.1805, "step": 39860 }, { "epoch": 23.207217694994178, "grad_norm": 0.13655367493629456, "learning_rate": 2.7746028291386915e-05, "loss": -0.1807, "step": 39870 }, { "epoch": 23.213038416763677, "grad_norm": 0.1514480859041214, "learning_rate": 2.772135388115519e-05, "loss": -0.1796, "step": 39880 }, { "epoch": 23.21885913853318, "grad_norm": 0.13348837196826935, "learning_rate": 2.7696686238587945e-05, "loss": -0.1808, "step": 39890 }, { "epoch": 23.22467986030268, "grad_norm": 0.1544710397720337, "learning_rate": 2.7672025371178505e-05, "loss": -0.1787, "step": 39900 }, { "epoch": 23.230500582072178, "grad_norm": 0.17072127759456635, "learning_rate": 2.7647371286418238e-05, "loss": -0.1802, "step": 39910 }, { "epoch": 23.236321303841677, "grad_norm": 0.10674627125263214, "learning_rate": 2.762272399179639e-05, "loss": -0.1793, "step": 39920 }, { "epoch": 23.242142025611177, "grad_norm": 0.09376343339681625, "learning_rate": 2.7598083494800154e-05, "loss": -0.1819, "step": 39930 }, { "epoch": 23.247962747380676, "grad_norm": 0.18173140287399292, "learning_rate": 2.7573449802914664e-05, "loss": -0.1787, "step": 39940 }, { "epoch": 23.253783469150175, "grad_norm": 0.14002414047718048, "learning_rate": 2.7548822923622964e-05, "loss": -0.1829, "step": 39950 }, { "epoch": 23.259604190919674, "grad_norm": 0.12108604609966278, "learning_rate": 2.752420286440609e-05, "loss": -0.181, "step": 39960 }, { "epoch": 23.265424912689173, "grad_norm": 0.08750016242265701, "learning_rate": 2.749958963274295e-05, "loss": -0.1795, "step": 39970 }, { "epoch": 23.271245634458673, "grad_norm": 0.19255152344703674, "learning_rate": 2.747498323611039e-05, "loss": -0.1777, "step": 39980 }, { "epoch": 23.277066356228172, "grad_norm": 0.049030520021915436, "learning_rate": 2.7450383681983184e-05, "loss": -0.1814, "step": 39990 }, { "epoch": 23.28288707799767, "grad_norm": 0.1469237208366394, "learning_rate": 2.742579097783403e-05, "loss": -0.1811, "step": 40000 }, { "epoch": 23.28870779976717, "grad_norm": 0.1502283215522766, "learning_rate": 2.7401205131133512e-05, "loss": -0.9328, "step": 40010 }, { "epoch": 23.29452852153667, "grad_norm": 0.1746407002210617, "learning_rate": 2.7376626149350238e-05, "loss": -0.9295, "step": 40020 }, { "epoch": 23.30034924330617, "grad_norm": 0.09931579977273941, "learning_rate": 2.735205403995056e-05, "loss": -0.9318, "step": 40030 }, { "epoch": 23.306169965075668, "grad_norm": 0.15117590129375458, "learning_rate": 2.7327488810398917e-05, "loss": -0.9294, "step": 40040 }, { "epoch": 23.311990686845167, "grad_norm": 0.12957648932933807, "learning_rate": 2.7302930468157507e-05, "loss": -0.9296, "step": 40050 }, { "epoch": 23.31781140861467, "grad_norm": 0.2293930947780609, "learning_rate": 2.727837902068655e-05, "loss": -0.9309, "step": 40060 }, { "epoch": 23.32363213038417, "grad_norm": 0.18462367355823517, "learning_rate": 2.7253834475444123e-05, "loss": -0.9324, "step": 40070 }, { "epoch": 23.32945285215367, "grad_norm": 0.2542872130870819, "learning_rate": 2.7229296839886204e-05, "loss": -0.931, "step": 40080 }, { "epoch": 23.335273573923168, "grad_norm": 0.12415321171283722, "learning_rate": 2.720476612146668e-05, "loss": -0.9329, "step": 40090 }, { "epoch": 23.341094295692667, "grad_norm": 0.1561252921819687, "learning_rate": 2.7180242327637317e-05, "loss": -0.9298, "step": 40100 }, { "epoch": 23.346915017462166, "grad_norm": 0.18051189184188843, "learning_rate": 2.7155725465847826e-05, "loss": -0.9256, "step": 40110 }, { "epoch": 23.352735739231665, "grad_norm": 0.10094253718852997, "learning_rate": 2.713121554354578e-05, "loss": -0.9297, "step": 40120 }, { "epoch": 23.358556461001164, "grad_norm": 0.1284315437078476, "learning_rate": 2.7106712568176628e-05, "loss": -0.9317, "step": 40130 }, { "epoch": 23.364377182770664, "grad_norm": 0.20146936178207397, "learning_rate": 2.708221654718374e-05, "loss": -0.9319, "step": 40140 }, { "epoch": 23.370197904540163, "grad_norm": 0.1717757284641266, "learning_rate": 2.7057727488008357e-05, "loss": -0.9336, "step": 40150 }, { "epoch": 23.376018626309662, "grad_norm": 0.10615427792072296, "learning_rate": 2.703324539808961e-05, "loss": -0.9322, "step": 40160 }, { "epoch": 23.38183934807916, "grad_norm": 0.12698039412498474, "learning_rate": 2.7008770284864505e-05, "loss": -0.9327, "step": 40170 }, { "epoch": 23.38766006984866, "grad_norm": 0.15306949615478516, "learning_rate": 2.6984302155767916e-05, "loss": -0.9299, "step": 40180 }, { "epoch": 23.39348079161816, "grad_norm": 0.21178291738033295, "learning_rate": 2.6959841018232683e-05, "loss": -0.9295, "step": 40190 }, { "epoch": 23.39930151338766, "grad_norm": 0.18015490472316742, "learning_rate": 2.693538687968937e-05, "loss": -0.9328, "step": 40200 }, { "epoch": 23.405122235157158, "grad_norm": 0.14626258611679077, "learning_rate": 2.6910939747566556e-05, "loss": -0.927, "step": 40210 }, { "epoch": 23.410942956926657, "grad_norm": 0.11153054982423782, "learning_rate": 2.6886499629290607e-05, "loss": -0.935, "step": 40220 }, { "epoch": 23.416763678696157, "grad_norm": 0.10653960704803467, "learning_rate": 2.6862066532285802e-05, "loss": -0.9331, "step": 40230 }, { "epoch": 23.42258440046566, "grad_norm": 0.16880249977111816, "learning_rate": 2.6837640463974262e-05, "loss": -0.931, "step": 40240 }, { "epoch": 23.42840512223516, "grad_norm": 0.1813185214996338, "learning_rate": 2.681322143177596e-05, "loss": -0.9322, "step": 40250 }, { "epoch": 23.434225844004658, "grad_norm": 0.17065900564193726, "learning_rate": 2.678880944310882e-05, "loss": -0.9322, "step": 40260 }, { "epoch": 23.440046565774157, "grad_norm": 0.147530198097229, "learning_rate": 2.6764404505388474e-05, "loss": -0.9296, "step": 40270 }, { "epoch": 23.445867287543656, "grad_norm": 0.12859633564949036, "learning_rate": 2.6740006626028558e-05, "loss": -0.9276, "step": 40280 }, { "epoch": 23.451688009313155, "grad_norm": 0.09839943796396255, "learning_rate": 2.671561581244048e-05, "loss": -0.9329, "step": 40290 }, { "epoch": 23.457508731082655, "grad_norm": 0.10493113845586777, "learning_rate": 2.6691232072033536e-05, "loss": -0.9292, "step": 40300 }, { "epoch": 23.463329452852154, "grad_norm": 0.15071558952331543, "learning_rate": 2.6666855412214852e-05, "loss": -0.9314, "step": 40310 }, { "epoch": 23.469150174621653, "grad_norm": 0.13618344068527222, "learning_rate": 2.664248584038942e-05, "loss": -0.9301, "step": 40320 }, { "epoch": 23.474970896391152, "grad_norm": 0.17548561096191406, "learning_rate": 2.6618123363960047e-05, "loss": -0.9341, "step": 40330 }, { "epoch": 23.48079161816065, "grad_norm": 0.17425361275672913, "learning_rate": 2.659376799032748e-05, "loss": -0.9304, "step": 40340 }, { "epoch": 23.48661233993015, "grad_norm": 0.1457197070121765, "learning_rate": 2.6569419726890145e-05, "loss": -0.9286, "step": 40350 }, { "epoch": 23.49243306169965, "grad_norm": 0.16862453520298004, "learning_rate": 2.654507858104447e-05, "loss": -0.9308, "step": 40360 }, { "epoch": 23.49825378346915, "grad_norm": 0.14091171324253082, "learning_rate": 2.652074456018463e-05, "loss": -0.9334, "step": 40370 }, { "epoch": 23.50407450523865, "grad_norm": 0.14579647779464722, "learning_rate": 2.6496417671702646e-05, "loss": -0.9302, "step": 40380 }, { "epoch": 23.509895227008148, "grad_norm": 0.161260187625885, "learning_rate": 2.6472097922988427e-05, "loss": -0.9348, "step": 40390 }, { "epoch": 23.515715948777647, "grad_norm": 0.15203092992305756, "learning_rate": 2.6447785321429607e-05, "loss": -0.9223, "step": 40400 }, { "epoch": 23.52153667054715, "grad_norm": 0.15945549309253693, "learning_rate": 2.6423479874411784e-05, "loss": -0.927, "step": 40410 }, { "epoch": 23.52735739231665, "grad_norm": 0.2488812804222107, "learning_rate": 2.6399181589318234e-05, "loss": -0.9317, "step": 40420 }, { "epoch": 23.533178114086148, "grad_norm": 0.23832717537879944, "learning_rate": 2.6374890473530188e-05, "loss": -0.934, "step": 40430 }, { "epoch": 23.538998835855647, "grad_norm": 0.19048038125038147, "learning_rate": 2.635060653442664e-05, "loss": -0.9307, "step": 40440 }, { "epoch": 23.544819557625146, "grad_norm": 0.17602573335170746, "learning_rate": 2.6326329779384395e-05, "loss": -0.9324, "step": 40450 }, { "epoch": 23.550640279394646, "grad_norm": 0.189097598195076, "learning_rate": 2.63020602157781e-05, "loss": -0.9332, "step": 40460 }, { "epoch": 23.556461001164145, "grad_norm": 0.2771202325820923, "learning_rate": 2.62777978509802e-05, "loss": -0.9315, "step": 40470 }, { "epoch": 23.562281722933644, "grad_norm": 0.18951314687728882, "learning_rate": 2.6253542692360954e-05, "loss": -0.93, "step": 40480 }, { "epoch": 23.568102444703143, "grad_norm": 0.14853273332118988, "learning_rate": 2.6229294747288458e-05, "loss": -0.9346, "step": 40490 }, { "epoch": 23.573923166472643, "grad_norm": 0.13221286237239838, "learning_rate": 2.6205054023128596e-05, "loss": -0.9328, "step": 40500 }, { "epoch": 23.57974388824214, "grad_norm": 0.11313170194625854, "learning_rate": 2.6180820527245043e-05, "loss": -0.9293, "step": 40510 }, { "epoch": 23.58556461001164, "grad_norm": 0.16222617030143738, "learning_rate": 2.6156594266999313e-05, "loss": -0.934, "step": 40520 }, { "epoch": 23.59138533178114, "grad_norm": 0.260695219039917, "learning_rate": 2.6132375249750672e-05, "loss": -0.9292, "step": 40530 }, { "epoch": 23.59720605355064, "grad_norm": 0.1799003779888153, "learning_rate": 2.6108163482856286e-05, "loss": -0.9326, "step": 40540 }, { "epoch": 23.60302677532014, "grad_norm": 0.2042316347360611, "learning_rate": 2.6083958973670964e-05, "loss": -0.9319, "step": 40550 }, { "epoch": 23.608847497089638, "grad_norm": 0.14854300022125244, "learning_rate": 2.6059761729547483e-05, "loss": -0.9335, "step": 40560 }, { "epoch": 23.614668218859137, "grad_norm": 0.16481861472129822, "learning_rate": 2.603557175783624e-05, "loss": -0.9332, "step": 40570 }, { "epoch": 23.620488940628636, "grad_norm": 0.10923541337251663, "learning_rate": 2.601138906588559e-05, "loss": -0.933, "step": 40580 }, { "epoch": 23.62630966239814, "grad_norm": 0.1698487401008606, "learning_rate": 2.598721366104152e-05, "loss": -0.9306, "step": 40590 }, { "epoch": 23.63213038416764, "grad_norm": 0.12036171555519104, "learning_rate": 2.5963045550647945e-05, "loss": -0.928, "step": 40600 }, { "epoch": 23.637951105937137, "grad_norm": 0.1713400036096573, "learning_rate": 2.5938884742046466e-05, "loss": -0.9325, "step": 40610 }, { "epoch": 23.643771827706637, "grad_norm": 0.1262817084789276, "learning_rate": 2.5914731242576507e-05, "loss": -0.9329, "step": 40620 }, { "epoch": 23.649592549476136, "grad_norm": 0.10375324636697769, "learning_rate": 2.5890585059575268e-05, "loss": -0.9322, "step": 40630 }, { "epoch": 23.655413271245635, "grad_norm": 0.14080409705638885, "learning_rate": 2.5866446200377688e-05, "loss": -0.9335, "step": 40640 }, { "epoch": 23.661233993015134, "grad_norm": 0.17580446600914001, "learning_rate": 2.5842314672316566e-05, "loss": -0.9317, "step": 40650 }, { "epoch": 23.667054714784634, "grad_norm": 0.1129738911986351, "learning_rate": 2.581819048272239e-05, "loss": -0.9321, "step": 40660 }, { "epoch": 23.672875436554133, "grad_norm": 0.14628614485263824, "learning_rate": 2.5794073638923478e-05, "loss": -0.929, "step": 40670 }, { "epoch": 23.678696158323632, "grad_norm": 0.16107772290706635, "learning_rate": 2.576996414824586e-05, "loss": -0.9223, "step": 40680 }, { "epoch": 23.68451688009313, "grad_norm": 0.21390952169895172, "learning_rate": 2.574586201801339e-05, "loss": -0.9319, "step": 40690 }, { "epoch": 23.69033760186263, "grad_norm": 0.18637730181217194, "learning_rate": 2.572176725554762e-05, "loss": -0.9321, "step": 40700 }, { "epoch": 23.69615832363213, "grad_norm": 0.19755689799785614, "learning_rate": 2.5697679868167966e-05, "loss": -0.9314, "step": 40710 }, { "epoch": 23.70197904540163, "grad_norm": 0.20693571865558624, "learning_rate": 2.5673599863191468e-05, "loss": -0.93, "step": 40720 }, { "epoch": 23.707799767171128, "grad_norm": 0.11639253050088882, "learning_rate": 2.564952724793306e-05, "loss": -0.9347, "step": 40730 }, { "epoch": 23.713620488940627, "grad_norm": 0.08191929757595062, "learning_rate": 2.5625462029705306e-05, "loss": -0.9319, "step": 40740 }, { "epoch": 23.719441210710126, "grad_norm": 0.142026886343956, "learning_rate": 2.5601404215818624e-05, "loss": -0.9328, "step": 40750 }, { "epoch": 23.725261932479626, "grad_norm": 0.1198999285697937, "learning_rate": 2.5577353813581144e-05, "loss": -0.9338, "step": 40760 }, { "epoch": 23.73108265424913, "grad_norm": 0.16424010694026947, "learning_rate": 2.5553310830298733e-05, "loss": -0.9309, "step": 40770 }, { "epoch": 23.736903376018628, "grad_norm": 0.10905163735151291, "learning_rate": 2.5529275273275012e-05, "loss": -0.9328, "step": 40780 }, { "epoch": 23.742724097788127, "grad_norm": 0.16238246858119965, "learning_rate": 2.550524714981133e-05, "loss": -0.9363, "step": 40790 }, { "epoch": 23.748544819557626, "grad_norm": 0.1568821668624878, "learning_rate": 2.5481226467206837e-05, "loss": -0.9356, "step": 40800 }, { "epoch": 23.754365541327125, "grad_norm": 0.11932084709405899, "learning_rate": 2.5457213232758365e-05, "loss": -0.9346, "step": 40810 }, { "epoch": 23.760186263096625, "grad_norm": 0.1440417766571045, "learning_rate": 2.5433207453760498e-05, "loss": -0.9269, "step": 40820 }, { "epoch": 23.766006984866124, "grad_norm": 0.1555541753768921, "learning_rate": 2.5409209137505552e-05, "loss": -0.9333, "step": 40830 }, { "epoch": 23.771827706635623, "grad_norm": 0.11440987139940262, "learning_rate": 2.5385218291283597e-05, "loss": -0.932, "step": 40840 }, { "epoch": 23.777648428405122, "grad_norm": 0.14598596096038818, "learning_rate": 2.5361234922382383e-05, "loss": -0.9311, "step": 40850 }, { "epoch": 23.78346915017462, "grad_norm": 0.12442485988140106, "learning_rate": 2.533725903808749e-05, "loss": -0.9333, "step": 40860 }, { "epoch": 23.78928987194412, "grad_norm": 0.1470082849264145, "learning_rate": 2.5313290645682085e-05, "loss": -0.9317, "step": 40870 }, { "epoch": 23.79511059371362, "grad_norm": 0.13498397171497345, "learning_rate": 2.52893297524472e-05, "loss": -0.9275, "step": 40880 }, { "epoch": 23.80093131548312, "grad_norm": 0.13960906863212585, "learning_rate": 2.526537636566145e-05, "loss": -0.9324, "step": 40890 }, { "epoch": 23.80675203725262, "grad_norm": 0.12751737236976624, "learning_rate": 2.5241430492601305e-05, "loss": -0.9311, "step": 40900 }, { "epoch": 23.812572759022117, "grad_norm": 0.13518697023391724, "learning_rate": 2.5217492140540867e-05, "loss": -0.932, "step": 40910 }, { "epoch": 23.818393480791617, "grad_norm": 0.09494370967149734, "learning_rate": 2.5193561316751967e-05, "loss": -0.9319, "step": 40920 }, { "epoch": 23.824214202561116, "grad_norm": 0.11111778765916824, "learning_rate": 2.516963802850416e-05, "loss": -0.9307, "step": 40930 }, { "epoch": 23.83003492433062, "grad_norm": 0.12439421564340591, "learning_rate": 2.5145722283064698e-05, "loss": -0.9321, "step": 40940 }, { "epoch": 23.835855646100118, "grad_norm": 0.157551109790802, "learning_rate": 2.5121814087698602e-05, "loss": -0.9325, "step": 40950 }, { "epoch": 23.841676367869617, "grad_norm": 0.18854102492332458, "learning_rate": 2.509791344966848e-05, "loss": -0.9338, "step": 40960 }, { "epoch": 23.847497089639116, "grad_norm": 0.10968922078609467, "learning_rate": 2.5074020376234768e-05, "loss": -0.9343, "step": 40970 }, { "epoch": 23.853317811408616, "grad_norm": 0.15771009027957916, "learning_rate": 2.5050134874655534e-05, "loss": -0.9349, "step": 40980 }, { "epoch": 23.859138533178115, "grad_norm": 0.14923451840877533, "learning_rate": 2.5026256952186566e-05, "loss": -0.9335, "step": 40990 }, { "epoch": 23.864959254947614, "grad_norm": 0.15660111606121063, "learning_rate": 2.5002386616081335e-05, "loss": -0.932, "step": 41000 }, { "epoch": 23.870779976717113, "grad_norm": 0.12755775451660156, "learning_rate": 2.497852387359103e-05, "loss": -0.9226, "step": 41010 }, { "epoch": 23.876600698486612, "grad_norm": 0.15237724781036377, "learning_rate": 2.4954668731964496e-05, "loss": -0.9341, "step": 41020 }, { "epoch": 23.88242142025611, "grad_norm": 0.10781808197498322, "learning_rate": 2.4930821198448364e-05, "loss": -0.9327, "step": 41030 }, { "epoch": 23.88824214202561, "grad_norm": 0.17084762454032898, "learning_rate": 2.4906981280286796e-05, "loss": -0.9317, "step": 41040 }, { "epoch": 23.89406286379511, "grad_norm": 0.0828867107629776, "learning_rate": 2.488314898472179e-05, "loss": -0.9312, "step": 41050 }, { "epoch": 23.89988358556461, "grad_norm": 0.13924430310726166, "learning_rate": 2.485932431899295e-05, "loss": -0.9355, "step": 41060 }, { "epoch": 23.90570430733411, "grad_norm": 0.13067130744457245, "learning_rate": 2.4835507290337584e-05, "loss": -0.9318, "step": 41070 }, { "epoch": 23.911525029103608, "grad_norm": 0.12667255103588104, "learning_rate": 2.4811697905990672e-05, "loss": -0.9336, "step": 41080 }, { "epoch": 23.917345750873107, "grad_norm": 0.10771898180246353, "learning_rate": 2.4787896173184854e-05, "loss": -0.9368, "step": 41090 }, { "epoch": 23.923166472642606, "grad_norm": 0.12129296362400055, "learning_rate": 2.4764102099150534e-05, "loss": -0.9267, "step": 41100 }, { "epoch": 23.92898719441211, "grad_norm": 0.11921743303537369, "learning_rate": 2.4740315691115644e-05, "loss": -0.931, "step": 41110 }, { "epoch": 23.934807916181608, "grad_norm": 0.09713809192180634, "learning_rate": 2.4716536956305918e-05, "loss": -0.9281, "step": 41120 }, { "epoch": 23.940628637951107, "grad_norm": 0.19608063995838165, "learning_rate": 2.4692765901944697e-05, "loss": -0.9328, "step": 41130 }, { "epoch": 23.946449359720607, "grad_norm": 0.1760420799255371, "learning_rate": 2.4669002535253e-05, "loss": -0.9305, "step": 41140 }, { "epoch": 23.952270081490106, "grad_norm": 0.1598728448152542, "learning_rate": 2.46452468634495e-05, "loss": -0.9304, "step": 41150 }, { "epoch": 23.958090803259605, "grad_norm": 0.18850471079349518, "learning_rate": 2.462149889375055e-05, "loss": -0.931, "step": 41160 }, { "epoch": 23.963911525029104, "grad_norm": 0.1283911168575287, "learning_rate": 2.459775863337014e-05, "loss": -0.9335, "step": 41170 }, { "epoch": 23.969732246798603, "grad_norm": 0.11372324824333191, "learning_rate": 2.4574026089519985e-05, "loss": -0.9316, "step": 41180 }, { "epoch": 23.975552968568103, "grad_norm": 0.18317902088165283, "learning_rate": 2.4550301269409333e-05, "loss": -0.9303, "step": 41190 }, { "epoch": 23.981373690337602, "grad_norm": 0.18521668016910553, "learning_rate": 2.4526584180245216e-05, "loss": -0.9313, "step": 41200 }, { "epoch": 23.9871944121071, "grad_norm": 0.14931945502758026, "learning_rate": 2.4502874829232236e-05, "loss": -0.9305, "step": 41210 }, { "epoch": 23.9930151338766, "grad_norm": 0.16949671506881714, "learning_rate": 2.447917322357267e-05, "loss": -0.9336, "step": 41220 }, { "epoch": 23.9988358556461, "grad_norm": 0.15950915217399597, "learning_rate": 2.4455479370466443e-05, "loss": -0.9321, "step": 41230 }, { "epoch": 24.0046565774156, "grad_norm": 0.16737763583660126, "learning_rate": 2.4431793277111097e-05, "loss": -0.9344, "step": 41240 }, { "epoch": 24.010477299185098, "grad_norm": 0.2768317461013794, "learning_rate": 2.4408114950701905e-05, "loss": -0.9355, "step": 41250 }, { "epoch": 24.016298020954597, "grad_norm": 0.12093380838632584, "learning_rate": 2.4384444398431634e-05, "loss": -0.9337, "step": 41260 }, { "epoch": 24.022118742724096, "grad_norm": 0.15439409017562866, "learning_rate": 2.4360781627490837e-05, "loss": -0.934, "step": 41270 }, { "epoch": 24.027939464493596, "grad_norm": 0.0984434261918068, "learning_rate": 2.433712664506762e-05, "loss": -0.9334, "step": 41280 }, { "epoch": 24.0337601862631, "grad_norm": 0.20506656169891357, "learning_rate": 2.431347945834774e-05, "loss": -0.9337, "step": 41290 }, { "epoch": 24.039580908032598, "grad_norm": 0.14046251773834229, "learning_rate": 2.428984007451458e-05, "loss": -0.9296, "step": 41300 }, { "epoch": 24.045401629802097, "grad_norm": 0.14586186408996582, "learning_rate": 2.426620850074917e-05, "loss": -0.9326, "step": 41310 }, { "epoch": 24.051222351571596, "grad_norm": 0.12157060205936432, "learning_rate": 2.424258474423014e-05, "loss": -0.9293, "step": 41320 }, { "epoch": 24.057043073341095, "grad_norm": 0.15040655434131622, "learning_rate": 2.421896881213382e-05, "loss": -0.933, "step": 41330 }, { "epoch": 24.062863795110594, "grad_norm": 0.23395372927188873, "learning_rate": 2.419536071163402e-05, "loss": -0.9346, "step": 41340 }, { "epoch": 24.068684516880094, "grad_norm": 0.16976459324359894, "learning_rate": 2.417176044990233e-05, "loss": -0.9352, "step": 41350 }, { "epoch": 24.074505238649593, "grad_norm": 0.12868419289588928, "learning_rate": 2.4148168034107855e-05, "loss": -0.9357, "step": 41360 }, { "epoch": 24.080325960419092, "grad_norm": 0.20220686495304108, "learning_rate": 2.4124583471417355e-05, "loss": -0.9318, "step": 41370 }, { "epoch": 24.08614668218859, "grad_norm": 0.10612045973539352, "learning_rate": 2.41010067689952e-05, "loss": -0.9315, "step": 41380 }, { "epoch": 24.09196740395809, "grad_norm": 0.1523117572069168, "learning_rate": 2.4077437934003338e-05, "loss": -0.932, "step": 41390 }, { "epoch": 24.09778812572759, "grad_norm": 0.09094405919313431, "learning_rate": 2.405387697360143e-05, "loss": -0.931, "step": 41400 }, { "epoch": 24.10360884749709, "grad_norm": 0.16226302087306976, "learning_rate": 2.4030323894946595e-05, "loss": -0.9332, "step": 41410 }, { "epoch": 24.109429569266588, "grad_norm": 0.1375781148672104, "learning_rate": 2.40067787051937e-05, "loss": -0.9334, "step": 41420 }, { "epoch": 24.115250291036087, "grad_norm": 0.09540752321481705, "learning_rate": 2.3983241411495087e-05, "loss": -0.9338, "step": 41430 }, { "epoch": 24.121071012805587, "grad_norm": 0.2535514235496521, "learning_rate": 2.3959712021000823e-05, "loss": -0.9312, "step": 41440 }, { "epoch": 24.126891734575086, "grad_norm": 0.11999431997537613, "learning_rate": 2.3936190540858495e-05, "loss": -0.9319, "step": 41450 }, { "epoch": 24.132712456344585, "grad_norm": 0.1711619794368744, "learning_rate": 2.39126769782133e-05, "loss": -0.9319, "step": 41460 }, { "epoch": 24.138533178114088, "grad_norm": 0.13903141021728516, "learning_rate": 2.388917134020805e-05, "loss": -0.9316, "step": 41470 }, { "epoch": 24.144353899883587, "grad_norm": 0.15970152616500854, "learning_rate": 2.3865673633983128e-05, "loss": -0.9337, "step": 41480 }, { "epoch": 24.150174621653086, "grad_norm": 0.256903737783432, "learning_rate": 2.3842183866676492e-05, "loss": -0.9339, "step": 41490 }, { "epoch": 24.155995343422585, "grad_norm": 0.13393913209438324, "learning_rate": 2.381870204542377e-05, "loss": -0.9345, "step": 41500 }, { "epoch": 24.161816065192085, "grad_norm": 0.17082451283931732, "learning_rate": 2.379522817735808e-05, "loss": -0.9359, "step": 41510 }, { "epoch": 24.167636786961584, "grad_norm": 0.1696610003709793, "learning_rate": 2.377176226961018e-05, "loss": -0.9319, "step": 41520 }, { "epoch": 24.173457508731083, "grad_norm": 0.0836690291762352, "learning_rate": 2.3748304329308384e-05, "loss": -0.9343, "step": 41530 }, { "epoch": 24.179278230500582, "grad_norm": 0.21850481629371643, "learning_rate": 2.372485436357858e-05, "loss": -0.9306, "step": 41540 }, { "epoch": 24.18509895227008, "grad_norm": 0.17139337956905365, "learning_rate": 2.3701412379544296e-05, "loss": -0.9312, "step": 41550 }, { "epoch": 24.19091967403958, "grad_norm": 0.07848071306943893, "learning_rate": 2.367797838432653e-05, "loss": -0.932, "step": 41560 }, { "epoch": 24.19674039580908, "grad_norm": 0.17874816060066223, "learning_rate": 2.3654552385043967e-05, "loss": -0.9302, "step": 41570 }, { "epoch": 24.20256111757858, "grad_norm": 0.13283437490463257, "learning_rate": 2.3631134388812742e-05, "loss": -0.9309, "step": 41580 }, { "epoch": 24.20838183934808, "grad_norm": 0.16923004388809204, "learning_rate": 2.3607724402746684e-05, "loss": -0.9227, "step": 41590 }, { "epoch": 24.214202561117578, "grad_norm": 0.10829882323741913, "learning_rate": 2.35843224339571e-05, "loss": -0.9331, "step": 41600 }, { "epoch": 24.220023282887077, "grad_norm": 0.1054624542593956, "learning_rate": 2.3560928489552897e-05, "loss": -0.9356, "step": 41610 }, { "epoch": 24.225844004656576, "grad_norm": 0.1407937854528427, "learning_rate": 2.353754257664053e-05, "loss": -0.9306, "step": 41620 }, { "epoch": 24.231664726426075, "grad_norm": 0.10003021359443665, "learning_rate": 2.3514164702324037e-05, "loss": -0.9309, "step": 41630 }, { "epoch": 24.237485448195578, "grad_norm": 0.138984814286232, "learning_rate": 2.3490794873704963e-05, "loss": -0.9337, "step": 41640 }, { "epoch": 24.243306169965077, "grad_norm": 0.17005003988742828, "learning_rate": 2.3467433097882496e-05, "loss": -0.936, "step": 41650 }, { "epoch": 24.249126891734576, "grad_norm": 0.11254660785198212, "learning_rate": 2.34440793819533e-05, "loss": -0.9328, "step": 41660 }, { "epoch": 24.254947613504076, "grad_norm": 0.11018196493387222, "learning_rate": 2.3420733733011617e-05, "loss": -0.9362, "step": 41670 }, { "epoch": 24.260768335273575, "grad_norm": 0.16048450767993927, "learning_rate": 2.3397396158149243e-05, "loss": -0.9332, "step": 41680 }, { "epoch": 24.266589057043074, "grad_norm": 0.10407495498657227, "learning_rate": 2.3374066664455498e-05, "loss": -0.9321, "step": 41690 }, { "epoch": 24.272409778812573, "grad_norm": 0.12221153825521469, "learning_rate": 2.3350745259017315e-05, "loss": -0.9299, "step": 41700 }, { "epoch": 24.278230500582072, "grad_norm": 0.17573943734169006, "learning_rate": 2.332743194891906e-05, "loss": -0.9322, "step": 41710 }, { "epoch": 24.28405122235157, "grad_norm": 0.11466671526432037, "learning_rate": 2.330412674124276e-05, "loss": -0.9344, "step": 41720 }, { "epoch": 24.28987194412107, "grad_norm": 0.0849723070859909, "learning_rate": 2.328082964306786e-05, "loss": -0.9355, "step": 41730 }, { "epoch": 24.29569266589057, "grad_norm": 0.07494799047708511, "learning_rate": 2.325754066147145e-05, "loss": -0.9342, "step": 41740 }, { "epoch": 24.30151338766007, "grad_norm": 0.0791093036532402, "learning_rate": 2.32342598035281e-05, "loss": -0.9351, "step": 41750 }, { "epoch": 24.30733410942957, "grad_norm": 0.12797996401786804, "learning_rate": 2.321098707630991e-05, "loss": -0.9299, "step": 41760 }, { "epoch": 24.313154831199068, "grad_norm": 0.1585051417350769, "learning_rate": 2.318772248688652e-05, "loss": -0.9368, "step": 41770 }, { "epoch": 24.318975552968567, "grad_norm": 0.19377192854881287, "learning_rate": 2.3164466042325107e-05, "loss": -0.9335, "step": 41780 }, { "epoch": 24.324796274738066, "grad_norm": 0.14988689124584198, "learning_rate": 2.3141217749690353e-05, "loss": -0.9331, "step": 41790 }, { "epoch": 24.330616996507565, "grad_norm": 0.11193683743476868, "learning_rate": 2.3117977616044466e-05, "loss": -0.9355, "step": 41800 }, { "epoch": 24.336437718277068, "grad_norm": 0.19873245060443878, "learning_rate": 2.309474564844722e-05, "loss": -0.9341, "step": 41810 }, { "epoch": 24.342258440046567, "grad_norm": 0.09031086415052414, "learning_rate": 2.307152185395585e-05, "loss": -0.9327, "step": 41820 }, { "epoch": 24.348079161816067, "grad_norm": 0.13299262523651123, "learning_rate": 2.3048306239625144e-05, "loss": -0.9331, "step": 41830 }, { "epoch": 24.353899883585566, "grad_norm": 0.17237018048763275, "learning_rate": 2.3025098812507378e-05, "loss": -0.9294, "step": 41840 }, { "epoch": 24.359720605355065, "grad_norm": 0.24533888697624207, "learning_rate": 2.3001899579652366e-05, "loss": -0.9344, "step": 41850 }, { "epoch": 24.365541327124564, "grad_norm": 0.12165407091379166, "learning_rate": 2.2978708548107393e-05, "loss": -0.9346, "step": 41860 }, { "epoch": 24.371362048894063, "grad_norm": 0.198603555560112, "learning_rate": 2.2955525724917348e-05, "loss": -0.9352, "step": 41870 }, { "epoch": 24.377182770663563, "grad_norm": 0.18353693187236786, "learning_rate": 2.2932351117124477e-05, "loss": -0.9329, "step": 41880 }, { "epoch": 24.383003492433062, "grad_norm": 0.10940875858068466, "learning_rate": 2.29091847317687e-05, "loss": -0.933, "step": 41890 }, { "epoch": 24.38882421420256, "grad_norm": 0.13013172149658203, "learning_rate": 2.2886026575887277e-05, "loss": -0.933, "step": 41900 }, { "epoch": 24.39464493597206, "grad_norm": 0.18237589299678802, "learning_rate": 2.2862876656515094e-05, "loss": -0.9299, "step": 41910 }, { "epoch": 24.40046565774156, "grad_norm": 0.19763979315757751, "learning_rate": 2.2839734980684464e-05, "loss": -0.9332, "step": 41920 }, { "epoch": 24.40628637951106, "grad_norm": 0.1279846429824829, "learning_rate": 2.281660155542522e-05, "loss": -0.9294, "step": 41930 }, { "epoch": 24.412107101280558, "grad_norm": 0.18913182616233826, "learning_rate": 2.279347638776469e-05, "loss": -0.9324, "step": 41940 }, { "epoch": 24.417927823050057, "grad_norm": 0.19422884285449982, "learning_rate": 2.2770359484727665e-05, "loss": -0.9303, "step": 41950 }, { "epoch": 24.423748544819556, "grad_norm": 0.1539289504289627, "learning_rate": 2.27472508533365e-05, "loss": -0.9347, "step": 41960 }, { "epoch": 24.429569266589056, "grad_norm": 0.1483580321073532, "learning_rate": 2.2724150500610948e-05, "loss": -0.9275, "step": 41970 }, { "epoch": 24.435389988358555, "grad_norm": 0.15373191237449646, "learning_rate": 2.2701058433568302e-05, "loss": -0.931, "step": 41980 }, { "epoch": 24.441210710128058, "grad_norm": 0.15466199815273285, "learning_rate": 2.2677974659223318e-05, "loss": -0.9275, "step": 41990 }, { "epoch": 24.447031431897557, "grad_norm": 0.2038755565881729, "learning_rate": 2.2654899184588235e-05, "loss": -0.9284, "step": 42000 }, { "epoch": 24.452852153667056, "grad_norm": 0.09681922942399979, "learning_rate": 2.2631832016672756e-05, "loss": -0.9366, "step": 42010 }, { "epoch": 24.458672875436555, "grad_norm": 0.22193194925785065, "learning_rate": 2.2608773162484127e-05, "loss": -0.9328, "step": 42020 }, { "epoch": 24.464493597206054, "grad_norm": 0.15837371349334717, "learning_rate": 2.2585722629026958e-05, "loss": -0.9361, "step": 42030 }, { "epoch": 24.470314318975554, "grad_norm": 0.13654227554798126, "learning_rate": 2.2562680423303457e-05, "loss": -0.9341, "step": 42040 }, { "epoch": 24.476135040745053, "grad_norm": 0.10718697309494019, "learning_rate": 2.2539646552313165e-05, "loss": -0.9293, "step": 42050 }, { "epoch": 24.481955762514552, "grad_norm": 0.11818736046552658, "learning_rate": 2.251662102305322e-05, "loss": -0.9344, "step": 42060 }, { "epoch": 24.48777648428405, "grad_norm": 0.10457932949066162, "learning_rate": 2.2493603842518152e-05, "loss": -0.9357, "step": 42070 }, { "epoch": 24.49359720605355, "grad_norm": 0.16361050307750702, "learning_rate": 2.2470595017699974e-05, "loss": -0.9357, "step": 42080 }, { "epoch": 24.49941792782305, "grad_norm": 0.184603750705719, "learning_rate": 2.244759455558816e-05, "loss": -0.9322, "step": 42090 }, { "epoch": 24.50523864959255, "grad_norm": 0.11372027546167374, "learning_rate": 2.2424602463169614e-05, "loss": -0.9329, "step": 42100 }, { "epoch": 24.511059371362048, "grad_norm": 0.0984286293387413, "learning_rate": 2.2401618747428776e-05, "loss": -0.9331, "step": 42110 }, { "epoch": 24.516880093131547, "grad_norm": 0.1497403383255005, "learning_rate": 2.237864341534747e-05, "loss": -0.9328, "step": 42120 }, { "epoch": 24.522700814901047, "grad_norm": 0.09630220383405685, "learning_rate": 2.2355676473904998e-05, "loss": -0.9323, "step": 42130 }, { "epoch": 24.528521536670546, "grad_norm": 0.08495738357305527, "learning_rate": 2.2332717930078108e-05, "loss": -0.9348, "step": 42140 }, { "epoch": 24.534342258440045, "grad_norm": 0.11426421999931335, "learning_rate": 2.2309767790840992e-05, "loss": -0.9298, "step": 42150 }, { "epoch": 24.540162980209544, "grad_norm": 0.11603926122188568, "learning_rate": 2.228682606316529e-05, "loss": -0.933, "step": 42160 }, { "epoch": 24.545983701979047, "grad_norm": 0.18281787633895874, "learning_rate": 2.2263892754020138e-05, "loss": -0.9336, "step": 42170 }, { "epoch": 24.551804423748546, "grad_norm": 0.14123688638210297, "learning_rate": 2.2240967870372004e-05, "loss": -0.935, "step": 42180 }, { "epoch": 24.557625145518045, "grad_norm": 0.10678714513778687, "learning_rate": 2.2218051419184933e-05, "loss": -0.9311, "step": 42190 }, { "epoch": 24.563445867287545, "grad_norm": 0.1665489375591278, "learning_rate": 2.219514340742026e-05, "loss": -0.9328, "step": 42200 }, { "epoch": 24.569266589057044, "grad_norm": 0.22548608481884003, "learning_rate": 2.2172243842036898e-05, "loss": -0.9343, "step": 42210 }, { "epoch": 24.575087310826543, "grad_norm": 0.1560571789741516, "learning_rate": 2.2149352729991107e-05, "loss": -0.9324, "step": 42220 }, { "epoch": 24.580908032596042, "grad_norm": 0.09272485971450806, "learning_rate": 2.2126470078236605e-05, "loss": -0.9324, "step": 42230 }, { "epoch": 24.58672875436554, "grad_norm": 0.1531885266304016, "learning_rate": 2.2103595893724533e-05, "loss": -0.936, "step": 42240 }, { "epoch": 24.59254947613504, "grad_norm": 0.09961236268281937, "learning_rate": 2.208073018340345e-05, "loss": -0.935, "step": 42250 }, { "epoch": 24.59837019790454, "grad_norm": 0.1437329351902008, "learning_rate": 2.2057872954219405e-05, "loss": -0.9312, "step": 42260 }, { "epoch": 24.60419091967404, "grad_norm": 0.2032371163368225, "learning_rate": 2.203502421311575e-05, "loss": -0.9291, "step": 42270 }, { "epoch": 24.61001164144354, "grad_norm": 0.2759249210357666, "learning_rate": 2.2012183967033388e-05, "loss": -0.936, "step": 42280 }, { "epoch": 24.615832363213038, "grad_norm": 0.14663495123386383, "learning_rate": 2.198935222291056e-05, "loss": -0.9305, "step": 42290 }, { "epoch": 24.621653084982537, "grad_norm": 0.11424126476049423, "learning_rate": 2.1966528987682948e-05, "loss": -0.9337, "step": 42300 }, { "epoch": 24.627473806752036, "grad_norm": 0.1998155117034912, "learning_rate": 2.194371426828365e-05, "loss": -0.9297, "step": 42310 }, { "epoch": 24.633294528521535, "grad_norm": 0.14569245278835297, "learning_rate": 2.192090807164317e-05, "loss": -0.9327, "step": 42320 }, { "epoch": 24.639115250291034, "grad_norm": 0.11236929148435593, "learning_rate": 2.1898110404689422e-05, "loss": -0.9345, "step": 42330 }, { "epoch": 24.644935972060537, "grad_norm": 0.12375032901763916, "learning_rate": 2.1875321274347776e-05, "loss": -0.9318, "step": 42340 }, { "epoch": 24.650756693830036, "grad_norm": 0.08431604504585266, "learning_rate": 2.18525406875409e-05, "loss": -0.9332, "step": 42350 }, { "epoch": 24.656577415599536, "grad_norm": 0.19050902128219604, "learning_rate": 2.1829768651188997e-05, "loss": -0.935, "step": 42360 }, { "epoch": 24.662398137369035, "grad_norm": 0.21061812341213226, "learning_rate": 2.180700517220958e-05, "loss": -0.937, "step": 42370 }, { "epoch": 24.668218859138534, "grad_norm": 0.18228578567504883, "learning_rate": 2.1784250257517603e-05, "loss": -0.9348, "step": 42380 }, { "epoch": 24.674039580908033, "grad_norm": 0.17568059265613556, "learning_rate": 2.1761503914025406e-05, "loss": -0.9332, "step": 42390 }, { "epoch": 24.679860302677533, "grad_norm": 0.09909545630216599, "learning_rate": 2.1738766148642705e-05, "loss": -0.9326, "step": 42400 }, { "epoch": 24.68568102444703, "grad_norm": 0.11652502417564392, "learning_rate": 2.1716036968276683e-05, "loss": -0.9347, "step": 42410 }, { "epoch": 24.69150174621653, "grad_norm": 0.08801187574863434, "learning_rate": 2.1693316379831808e-05, "loss": -0.9359, "step": 42420 }, { "epoch": 24.69732246798603, "grad_norm": 0.1596788465976715, "learning_rate": 2.1670604390210037e-05, "loss": -0.9329, "step": 42430 }, { "epoch": 24.70314318975553, "grad_norm": 0.12967129051685333, "learning_rate": 2.1647901006310656e-05, "loss": -0.9332, "step": 42440 }, { "epoch": 24.70896391152503, "grad_norm": 0.14628838002681732, "learning_rate": 2.1625206235030353e-05, "loss": -0.9345, "step": 42450 }, { "epoch": 24.714784633294528, "grad_norm": 0.1622862070798874, "learning_rate": 2.160252008326321e-05, "loss": -0.9306, "step": 42460 }, { "epoch": 24.720605355064027, "grad_norm": 0.09034367650747299, "learning_rate": 2.157984255790067e-05, "loss": -0.933, "step": 42470 }, { "epoch": 24.726426076833526, "grad_norm": 0.17744101583957672, "learning_rate": 2.1557173665831553e-05, "loss": -0.9344, "step": 42480 }, { "epoch": 24.732246798603025, "grad_norm": 0.18263371288776398, "learning_rate": 2.153451341394212e-05, "loss": -0.9319, "step": 42490 }, { "epoch": 24.738067520372525, "grad_norm": 0.12386653572320938, "learning_rate": 2.151186180911589e-05, "loss": -0.9348, "step": 42500 }, { "epoch": 24.743888242142027, "grad_norm": 0.11100191622972488, "learning_rate": 2.1489218858233877e-05, "loss": -0.9349, "step": 42510 }, { "epoch": 24.749708963911527, "grad_norm": 0.1427709013223648, "learning_rate": 2.1466584568174392e-05, "loss": -0.933, "step": 42520 }, { "epoch": 24.755529685681026, "grad_norm": 0.2131630778312683, "learning_rate": 2.1443958945813132e-05, "loss": -0.9308, "step": 42530 }, { "epoch": 24.761350407450525, "grad_norm": 0.1649397611618042, "learning_rate": 2.1421341998023163e-05, "loss": -0.9303, "step": 42540 }, { "epoch": 24.767171129220024, "grad_norm": 0.16790930926799774, "learning_rate": 2.139873373167491e-05, "loss": -0.9315, "step": 42550 }, { "epoch": 24.772991850989523, "grad_norm": 0.11387084424495697, "learning_rate": 2.13761341536362e-05, "loss": -0.9339, "step": 42560 }, { "epoch": 24.778812572759023, "grad_norm": 0.20350335538387299, "learning_rate": 2.1353543270772136e-05, "loss": -0.9337, "step": 42570 }, { "epoch": 24.784633294528522, "grad_norm": 0.09431295096874237, "learning_rate": 2.1330961089945297e-05, "loss": -0.9341, "step": 42580 }, { "epoch": 24.79045401629802, "grad_norm": 0.1250840723514557, "learning_rate": 2.130838761801548e-05, "loss": -0.9292, "step": 42590 }, { "epoch": 24.79627473806752, "grad_norm": 0.1326438933610916, "learning_rate": 2.1285822861839966e-05, "loss": -0.932, "step": 42600 }, { "epoch": 24.80209545983702, "grad_norm": 0.10201260447502136, "learning_rate": 2.126326682827331e-05, "loss": -0.9347, "step": 42610 }, { "epoch": 24.80791618160652, "grad_norm": 0.15232525765895844, "learning_rate": 2.124071952416744e-05, "loss": -0.9352, "step": 42620 }, { "epoch": 24.813736903376018, "grad_norm": 0.10149480402469635, "learning_rate": 2.1218180956371634e-05, "loss": -0.9362, "step": 42630 }, { "epoch": 24.819557625145517, "grad_norm": 0.10009554028511047, "learning_rate": 2.119565113173252e-05, "loss": -0.9305, "step": 42640 }, { "epoch": 24.825378346915016, "grad_norm": 0.11294353008270264, "learning_rate": 2.1173130057094033e-05, "loss": -0.9343, "step": 42650 }, { "epoch": 24.831199068684516, "grad_norm": 0.1683598905801773, "learning_rate": 2.115061773929753e-05, "loss": -0.9368, "step": 42660 }, { "epoch": 24.837019790454015, "grad_norm": 0.24854923784732819, "learning_rate": 2.1128114185181623e-05, "loss": -0.9337, "step": 42670 }, { "epoch": 24.842840512223514, "grad_norm": 0.13201163709163666, "learning_rate": 2.1105619401582317e-05, "loss": -0.9344, "step": 42680 }, { "epoch": 24.848661233993017, "grad_norm": 0.12848219275474548, "learning_rate": 2.1083133395332928e-05, "loss": -0.9369, "step": 42690 }, { "epoch": 24.854481955762516, "grad_norm": 0.10339096188545227, "learning_rate": 2.1060656173264082e-05, "loss": -0.9338, "step": 42700 }, { "epoch": 24.860302677532015, "grad_norm": 0.1490083634853363, "learning_rate": 2.103818774220383e-05, "loss": -0.9274, "step": 42710 }, { "epoch": 24.866123399301514, "grad_norm": 0.17850199341773987, "learning_rate": 2.1015728108977412e-05, "loss": -0.9302, "step": 42720 }, { "epoch": 24.871944121071014, "grad_norm": 0.21082104742527008, "learning_rate": 2.0993277280407548e-05, "loss": -0.9297, "step": 42730 }, { "epoch": 24.877764842840513, "grad_norm": 0.09102898091077805, "learning_rate": 2.0970835263314132e-05, "loss": -0.9357, "step": 42740 }, { "epoch": 24.883585564610012, "grad_norm": 0.1055712103843689, "learning_rate": 2.094840206451451e-05, "loss": -0.9346, "step": 42750 }, { "epoch": 24.88940628637951, "grad_norm": 0.12635253369808197, "learning_rate": 2.0925977690823273e-05, "loss": -0.9342, "step": 42760 }, { "epoch": 24.89522700814901, "grad_norm": 0.1297311633825302, "learning_rate": 2.0903562149052364e-05, "loss": -0.934, "step": 42770 }, { "epoch": 24.90104772991851, "grad_norm": 0.14525654911994934, "learning_rate": 2.0881155446011025e-05, "loss": -0.936, "step": 42780 }, { "epoch": 24.90686845168801, "grad_norm": 0.2015853226184845, "learning_rate": 2.0858757588505823e-05, "loss": -0.9316, "step": 42790 }, { "epoch": 24.912689173457508, "grad_norm": 0.12956412136554718, "learning_rate": 2.0836368583340622e-05, "loss": -0.9353, "step": 42800 }, { "epoch": 24.918509895227007, "grad_norm": 0.10040853917598724, "learning_rate": 2.081398843731664e-05, "loss": -0.9343, "step": 42810 }, { "epoch": 24.924330616996507, "grad_norm": 0.1756938397884369, "learning_rate": 2.0791617157232357e-05, "loss": -0.9291, "step": 42820 }, { "epoch": 24.930151338766006, "grad_norm": 0.11596644669771194, "learning_rate": 2.0769254749883576e-05, "loss": -0.9322, "step": 42830 }, { "epoch": 24.935972060535505, "grad_norm": 0.18071889877319336, "learning_rate": 2.0746901222063415e-05, "loss": -0.931, "step": 42840 }, { "epoch": 24.941792782305004, "grad_norm": 0.14904795587062836, "learning_rate": 2.072455658056226e-05, "loss": -0.934, "step": 42850 }, { "epoch": 24.947613504074504, "grad_norm": 0.1564406007528305, "learning_rate": 2.0702220832167873e-05, "loss": -0.9333, "step": 42860 }, { "epoch": 24.953434225844006, "grad_norm": 0.11781924962997437, "learning_rate": 2.0679893983665205e-05, "loss": -0.9342, "step": 42870 }, { "epoch": 24.959254947613505, "grad_norm": 0.1516544669866562, "learning_rate": 2.0657576041836622e-05, "loss": -0.9291, "step": 42880 }, { "epoch": 24.965075669383005, "grad_norm": 0.1209065169095993, "learning_rate": 2.0635267013461666e-05, "loss": -0.9261, "step": 42890 }, { "epoch": 24.970896391152504, "grad_norm": 0.1927257776260376, "learning_rate": 2.061296690531728e-05, "loss": -0.9305, "step": 42900 }, { "epoch": 24.976717112922003, "grad_norm": 0.1191258355975151, "learning_rate": 2.0590675724177622e-05, "loss": -0.9346, "step": 42910 }, { "epoch": 24.982537834691502, "grad_norm": 0.11326752603054047, "learning_rate": 2.0568393476814167e-05, "loss": -0.9349, "step": 42920 }, { "epoch": 24.988358556461, "grad_norm": 0.17493733763694763, "learning_rate": 2.0546120169995685e-05, "loss": -0.9347, "step": 42930 }, { "epoch": 24.9941792782305, "grad_norm": 0.14051516354084015, "learning_rate": 2.0523855810488214e-05, "loss": -0.9334, "step": 42940 }, { "epoch": 25.0, "grad_norm": 0.15606215596199036, "learning_rate": 2.050160040505505e-05, "loss": -0.9341, "step": 42950 }, { "epoch": 25.0058207217695, "grad_norm": 0.15472139418125153, "learning_rate": 2.0479353960456843e-05, "loss": -0.9332, "step": 42960 }, { "epoch": 25.011641443539, "grad_norm": 0.1484919637441635, "learning_rate": 2.0457116483451456e-05, "loss": -0.9346, "step": 42970 }, { "epoch": 25.017462165308498, "grad_norm": 0.21441620588302612, "learning_rate": 2.0434887980794043e-05, "loss": -0.9328, "step": 42980 }, { "epoch": 25.023282887077997, "grad_norm": 0.1492176055908203, "learning_rate": 2.0412668459237043e-05, "loss": -0.9321, "step": 42990 }, { "epoch": 25.029103608847496, "grad_norm": 0.14815224707126617, "learning_rate": 2.039045792553016e-05, "loss": -0.9339, "step": 43000 }, { "epoch": 25.034924330616995, "grad_norm": 0.12140603363513947, "learning_rate": 2.036825638642036e-05, "loss": -0.9322, "step": 43010 }, { "epoch": 25.040745052386495, "grad_norm": 0.18052950501441956, "learning_rate": 2.0346063848651868e-05, "loss": -0.9318, "step": 43020 }, { "epoch": 25.046565774155994, "grad_norm": 0.15335530042648315, "learning_rate": 2.0323880318966254e-05, "loss": -0.9353, "step": 43030 }, { "epoch": 25.052386495925496, "grad_norm": 0.09977996349334717, "learning_rate": 2.030170580410221e-05, "loss": -0.9337, "step": 43040 }, { "epoch": 25.058207217694996, "grad_norm": 0.16959843039512634, "learning_rate": 2.0279540310795837e-05, "loss": -0.9356, "step": 43050 }, { "epoch": 25.064027939464495, "grad_norm": 0.08206640183925629, "learning_rate": 2.0257383845780365e-05, "loss": -0.9363, "step": 43060 }, { "epoch": 25.069848661233994, "grad_norm": 0.1783669888973236, "learning_rate": 2.0235236415786384e-05, "loss": -0.9325, "step": 43070 }, { "epoch": 25.075669383003493, "grad_norm": 0.08444041013717651, "learning_rate": 2.021309802754169e-05, "loss": -0.9359, "step": 43080 }, { "epoch": 25.081490104772993, "grad_norm": 0.1007680743932724, "learning_rate": 2.0190968687771332e-05, "loss": -0.9369, "step": 43090 }, { "epoch": 25.087310826542492, "grad_norm": 0.2522331476211548, "learning_rate": 2.016884840319763e-05, "loss": -0.9319, "step": 43100 }, { "epoch": 25.09313154831199, "grad_norm": 0.24933882057666779, "learning_rate": 2.0146737180540122e-05, "loss": -0.9327, "step": 43110 }, { "epoch": 25.09895227008149, "grad_norm": 0.09892365336418152, "learning_rate": 2.012463502651564e-05, "loss": -0.9357, "step": 43120 }, { "epoch": 25.10477299185099, "grad_norm": 0.08166685700416565, "learning_rate": 2.0102541947838228e-05, "loss": -0.9351, "step": 43130 }, { "epoch": 25.11059371362049, "grad_norm": 0.10750516504049301, "learning_rate": 2.0080457951219173e-05, "loss": -0.9358, "step": 43140 }, { "epoch": 25.116414435389988, "grad_norm": 0.13020628690719604, "learning_rate": 2.0058383043367017e-05, "loss": -0.9313, "step": 43150 }, { "epoch": 25.122235157159487, "grad_norm": 0.17886632680892944, "learning_rate": 2.0036317230987528e-05, "loss": -0.9349, "step": 43160 }, { "epoch": 25.128055878928986, "grad_norm": 0.15908263623714447, "learning_rate": 2.0014260520783696e-05, "loss": -0.9299, "step": 43170 }, { "epoch": 25.133876600698486, "grad_norm": 0.21445804834365845, "learning_rate": 1.9992212919455834e-05, "loss": -0.9332, "step": 43180 }, { "epoch": 25.139697322467985, "grad_norm": 0.12991787493228912, "learning_rate": 1.9970174433701333e-05, "loss": -0.9312, "step": 43190 }, { "epoch": 25.145518044237484, "grad_norm": 0.18943846225738525, "learning_rate": 1.9948145070214992e-05, "loss": -0.9375, "step": 43200 }, { "epoch": 25.151338766006983, "grad_norm": 0.1047687754034996, "learning_rate": 1.9926124835688663e-05, "loss": -0.9316, "step": 43210 }, { "epoch": 25.157159487776486, "grad_norm": 0.19184699654579163, "learning_rate": 1.9904113736811576e-05, "loss": -0.934, "step": 43220 }, { "epoch": 25.162980209545985, "grad_norm": 0.12530504167079926, "learning_rate": 1.9882111780270096e-05, "loss": -0.9327, "step": 43230 }, { "epoch": 25.168800931315484, "grad_norm": 0.15306217968463898, "learning_rate": 1.986011897274784e-05, "loss": -0.9339, "step": 43240 }, { "epoch": 25.174621653084984, "grad_norm": 0.11935041099786758, "learning_rate": 1.983813532092565e-05, "loss": -0.9313, "step": 43250 }, { "epoch": 25.180442374854483, "grad_norm": 0.22453728318214417, "learning_rate": 1.981616083148155e-05, "loss": -0.9351, "step": 43260 }, { "epoch": 25.186263096623982, "grad_norm": 0.17640715837478638, "learning_rate": 1.9794195511090845e-05, "loss": -0.9357, "step": 43270 }, { "epoch": 25.19208381839348, "grad_norm": 0.17528149485588074, "learning_rate": 1.977223936642601e-05, "loss": -0.9346, "step": 43280 }, { "epoch": 25.19790454016298, "grad_norm": 0.13917973637580872, "learning_rate": 1.975029240415674e-05, "loss": -0.9347, "step": 43290 }, { "epoch": 25.20372526193248, "grad_norm": 0.09949143975973129, "learning_rate": 1.9728354630949936e-05, "loss": -0.9311, "step": 43300 }, { "epoch": 25.20954598370198, "grad_norm": 0.09774322807788849, "learning_rate": 1.9706426053469716e-05, "loss": -0.9326, "step": 43310 }, { "epoch": 25.215366705471478, "grad_norm": 0.131150484085083, "learning_rate": 1.9684506678377396e-05, "loss": -0.9335, "step": 43320 }, { "epoch": 25.221187427240977, "grad_norm": 0.23015855252742767, "learning_rate": 1.9662596512331544e-05, "loss": -0.9361, "step": 43330 }, { "epoch": 25.227008149010477, "grad_norm": 0.12390369921922684, "learning_rate": 1.964069556198782e-05, "loss": -0.9335, "step": 43340 }, { "epoch": 25.232828870779976, "grad_norm": 0.09586004167795181, "learning_rate": 1.9618803833999232e-05, "loss": -0.9316, "step": 43350 }, { "epoch": 25.238649592549475, "grad_norm": 0.1641145646572113, "learning_rate": 1.9596921335015838e-05, "loss": -0.9359, "step": 43360 }, { "epoch": 25.244470314318974, "grad_norm": 0.14998595416545868, "learning_rate": 1.957504807168501e-05, "loss": -0.9325, "step": 43370 }, { "epoch": 25.250291036088473, "grad_norm": 0.13774679601192474, "learning_rate": 1.9553184050651253e-05, "loss": -0.9361, "step": 43380 }, { "epoch": 25.256111757857976, "grad_norm": 0.13243690133094788, "learning_rate": 1.953132927855628e-05, "loss": -0.9318, "step": 43390 }, { "epoch": 25.261932479627475, "grad_norm": 0.16178786754608154, "learning_rate": 1.9509483762038995e-05, "loss": -0.9344, "step": 43400 }, { "epoch": 25.267753201396975, "grad_norm": 0.13048996031284332, "learning_rate": 1.9487647507735467e-05, "loss": -0.9326, "step": 43410 }, { "epoch": 25.273573923166474, "grad_norm": 0.13075454533100128, "learning_rate": 1.9465820522279032e-05, "loss": -0.9348, "step": 43420 }, { "epoch": 25.279394644935973, "grad_norm": 0.1256438046693802, "learning_rate": 1.9444002812300078e-05, "loss": -0.9308, "step": 43430 }, { "epoch": 25.285215366705472, "grad_norm": 0.19641335308551788, "learning_rate": 1.94221943844263e-05, "loss": -0.9363, "step": 43440 }, { "epoch": 25.29103608847497, "grad_norm": 0.16785146296024323, "learning_rate": 1.9400395245282515e-05, "loss": -0.9342, "step": 43450 }, { "epoch": 25.29685681024447, "grad_norm": 0.24969905614852905, "learning_rate": 1.937860540149071e-05, "loss": -0.9341, "step": 43460 }, { "epoch": 25.30267753201397, "grad_norm": 0.19482634961605072, "learning_rate": 1.9356824859670082e-05, "loss": -0.9346, "step": 43470 }, { "epoch": 25.30849825378347, "grad_norm": 0.09594132751226425, "learning_rate": 1.9335053626436967e-05, "loss": -0.9331, "step": 43480 }, { "epoch": 25.31431897555297, "grad_norm": 0.129851832985878, "learning_rate": 1.9313291708404885e-05, "loss": -0.9329, "step": 43490 }, { "epoch": 25.320139697322467, "grad_norm": 0.1084897443652153, "learning_rate": 1.9291539112184587e-05, "loss": -0.9329, "step": 43500 }, { "epoch": 25.325960419091967, "grad_norm": 0.10846779495477676, "learning_rate": 1.9269795844383854e-05, "loss": -0.9353, "step": 43510 }, { "epoch": 25.331781140861466, "grad_norm": 0.11871333420276642, "learning_rate": 1.9248061911607777e-05, "loss": -0.9355, "step": 43520 }, { "epoch": 25.337601862630965, "grad_norm": 0.13560859858989716, "learning_rate": 1.9226337320458538e-05, "loss": -0.9339, "step": 43530 }, { "epoch": 25.343422584400464, "grad_norm": 0.10014659911394119, "learning_rate": 1.9204622077535488e-05, "loss": -0.9356, "step": 43540 }, { "epoch": 25.349243306169964, "grad_norm": 0.16855430603027344, "learning_rate": 1.9182916189435147e-05, "loss": -0.9347, "step": 43550 }, { "epoch": 25.355064027939463, "grad_norm": 0.15748251974582672, "learning_rate": 1.916121966275117e-05, "loss": -0.9324, "step": 43560 }, { "epoch": 25.360884749708966, "grad_norm": 0.0805199146270752, "learning_rate": 1.9139532504074443e-05, "loss": -0.9356, "step": 43570 }, { "epoch": 25.366705471478465, "grad_norm": 0.22868068516254425, "learning_rate": 1.9117854719992885e-05, "loss": -0.9358, "step": 43580 }, { "epoch": 25.372526193247964, "grad_norm": 0.18190477788448334, "learning_rate": 1.9096186317091687e-05, "loss": -0.9366, "step": 43590 }, { "epoch": 25.378346915017463, "grad_norm": 0.12614305317401886, "learning_rate": 1.9074527301953116e-05, "loss": -0.934, "step": 43600 }, { "epoch": 25.384167636786962, "grad_norm": 0.16553789377212524, "learning_rate": 1.9052877681156607e-05, "loss": -0.9324, "step": 43610 }, { "epoch": 25.38998835855646, "grad_norm": 0.23103947937488556, "learning_rate": 1.903123746127875e-05, "loss": -0.9314, "step": 43620 }, { "epoch": 25.39580908032596, "grad_norm": 0.08445896953344345, "learning_rate": 1.900960664889327e-05, "loss": -0.935, "step": 43630 }, { "epoch": 25.40162980209546, "grad_norm": 0.11099570244550705, "learning_rate": 1.8987985250571015e-05, "loss": -0.9361, "step": 43640 }, { "epoch": 25.40745052386496, "grad_norm": 0.27598458528518677, "learning_rate": 1.8966373272880054e-05, "loss": -0.9319, "step": 43650 }, { "epoch": 25.41327124563446, "grad_norm": 0.19741690158843994, "learning_rate": 1.8944770722385462e-05, "loss": -0.9335, "step": 43660 }, { "epoch": 25.419091967403958, "grad_norm": 0.2181212455034256, "learning_rate": 1.8923177605649576e-05, "loss": -0.9344, "step": 43670 }, { "epoch": 25.424912689173457, "grad_norm": 0.233905628323555, "learning_rate": 1.8901593929231802e-05, "loss": -0.9335, "step": 43680 }, { "epoch": 25.430733410942956, "grad_norm": 0.11272606253623962, "learning_rate": 1.8880019699688684e-05, "loss": -0.9338, "step": 43690 }, { "epoch": 25.436554132712455, "grad_norm": 0.17944930493831635, "learning_rate": 1.8858454923573904e-05, "loss": -0.9327, "step": 43700 }, { "epoch": 25.442374854481955, "grad_norm": 0.2210463285446167, "learning_rate": 1.8836899607438253e-05, "loss": -0.9341, "step": 43710 }, { "epoch": 25.448195576251454, "grad_norm": 0.12322226166725159, "learning_rate": 1.8815353757829723e-05, "loss": -0.9319, "step": 43720 }, { "epoch": 25.454016298020953, "grad_norm": 0.12843598425388336, "learning_rate": 1.879381738129331e-05, "loss": -0.9335, "step": 43730 }, { "epoch": 25.459837019790456, "grad_norm": 0.1819077730178833, "learning_rate": 1.8772290484371236e-05, "loss": -0.9351, "step": 43740 }, { "epoch": 25.465657741559955, "grad_norm": 0.19535282254219055, "learning_rate": 1.8750773073602795e-05, "loss": -0.9354, "step": 43750 }, { "epoch": 25.471478463329454, "grad_norm": 0.12637737393379211, "learning_rate": 1.8729265155524405e-05, "loss": -0.9352, "step": 43760 }, { "epoch": 25.477299185098953, "grad_norm": 0.1384631246328354, "learning_rate": 1.8707766736669607e-05, "loss": -0.9342, "step": 43770 }, { "epoch": 25.483119906868453, "grad_norm": 0.11552099883556366, "learning_rate": 1.8686277823569055e-05, "loss": -0.9345, "step": 43780 }, { "epoch": 25.488940628637952, "grad_norm": 0.07316696643829346, "learning_rate": 1.8664798422750484e-05, "loss": -0.9379, "step": 43790 }, { "epoch": 25.49476135040745, "grad_norm": 0.17264628410339355, "learning_rate": 1.8643328540738832e-05, "loss": -0.9344, "step": 43800 }, { "epoch": 25.50058207217695, "grad_norm": 0.12920081615447998, "learning_rate": 1.862186818405601e-05, "loss": -0.9339, "step": 43810 }, { "epoch": 25.50640279394645, "grad_norm": 0.09738942235708237, "learning_rate": 1.8600417359221156e-05, "loss": -0.9344, "step": 43820 }, { "epoch": 25.51222351571595, "grad_norm": 0.10648404806852341, "learning_rate": 1.8578976072750454e-05, "loss": -0.9365, "step": 43830 }, { "epoch": 25.518044237485448, "grad_norm": 0.14233805239200592, "learning_rate": 1.8557544331157194e-05, "loss": -0.9356, "step": 43840 }, { "epoch": 25.523864959254947, "grad_norm": 0.18133245408535004, "learning_rate": 1.8536122140951785e-05, "loss": -0.9297, "step": 43850 }, { "epoch": 25.529685681024446, "grad_norm": 0.18897108733654022, "learning_rate": 1.8514709508641688e-05, "loss": -0.9333, "step": 43860 }, { "epoch": 25.535506402793946, "grad_norm": 0.22214578092098236, "learning_rate": 1.8493306440731555e-05, "loss": -0.9333, "step": 43870 }, { "epoch": 25.541327124563445, "grad_norm": 0.24190956354141235, "learning_rate": 1.8471912943723013e-05, "loss": -0.9338, "step": 43880 }, { "epoch": 25.547147846332944, "grad_norm": 0.12545651197433472, "learning_rate": 1.8450529024114894e-05, "loss": -0.936, "step": 43890 }, { "epoch": 25.552968568102443, "grad_norm": 0.16402758657932281, "learning_rate": 1.842915468840301e-05, "loss": -0.9326, "step": 43900 }, { "epoch": 25.558789289871942, "grad_norm": 0.1306902915239334, "learning_rate": 1.840778994308037e-05, "loss": -0.9336, "step": 43910 }, { "epoch": 25.564610011641445, "grad_norm": 0.21692128479480743, "learning_rate": 1.8386434794637004e-05, "loss": -0.9377, "step": 43920 }, { "epoch": 25.570430733410944, "grad_norm": 0.13826122879981995, "learning_rate": 1.8365089249560034e-05, "loss": -0.937, "step": 43930 }, { "epoch": 25.576251455180444, "grad_norm": 0.22021472454071045, "learning_rate": 1.8343753314333683e-05, "loss": -0.9336, "step": 43940 }, { "epoch": 25.582072176949943, "grad_norm": 0.2696615755558014, "learning_rate": 1.8322426995439236e-05, "loss": -0.9325, "step": 43950 }, { "epoch": 25.587892898719442, "grad_norm": 0.11381670087575912, "learning_rate": 1.8301110299355058e-05, "loss": -0.9263, "step": 43960 }, { "epoch": 25.59371362048894, "grad_norm": 0.20616978406906128, "learning_rate": 1.8279803232556625e-05, "loss": -0.9328, "step": 43970 }, { "epoch": 25.59953434225844, "grad_norm": 0.1058255210518837, "learning_rate": 1.8258505801516444e-05, "loss": -0.9341, "step": 43980 }, { "epoch": 25.60535506402794, "grad_norm": 0.17999406158924103, "learning_rate": 1.8237218012704117e-05, "loss": -0.9344, "step": 43990 }, { "epoch": 25.61117578579744, "grad_norm": 0.12909618020057678, "learning_rate": 1.821593987258631e-05, "loss": -0.9341, "step": 44000 }, { "epoch": 25.616996507566938, "grad_norm": 0.16016073524951935, "learning_rate": 1.8194671387626744e-05, "loss": -0.9335, "step": 44010 }, { "epoch": 25.622817229336437, "grad_norm": 0.08912975341081619, "learning_rate": 1.8173412564286276e-05, "loss": -0.9334, "step": 44020 }, { "epoch": 25.628637951105937, "grad_norm": 0.12298882007598877, "learning_rate": 1.8152163409022697e-05, "loss": -0.9295, "step": 44030 }, { "epoch": 25.634458672875436, "grad_norm": 0.10086125880479813, "learning_rate": 1.8130923928291023e-05, "loss": -0.9341, "step": 44040 }, { "epoch": 25.640279394644935, "grad_norm": 0.24649614095687866, "learning_rate": 1.8109694128543163e-05, "loss": -0.933, "step": 44050 }, { "epoch": 25.646100116414434, "grad_norm": 0.16623111069202423, "learning_rate": 1.8088474016228237e-05, "loss": -0.935, "step": 44060 }, { "epoch": 25.651920838183933, "grad_norm": 0.18118228018283844, "learning_rate": 1.8067263597792328e-05, "loss": -0.9357, "step": 44070 }, { "epoch": 25.657741559953433, "grad_norm": 0.1489943265914917, "learning_rate": 1.80460628796786e-05, "loss": -0.9345, "step": 44080 }, { "epoch": 25.663562281722932, "grad_norm": 0.18003550171852112, "learning_rate": 1.8024871868327276e-05, "loss": -0.9366, "step": 44090 }, { "epoch": 25.669383003492435, "grad_norm": 0.11043475568294525, "learning_rate": 1.8003690570175608e-05, "loss": -0.9351, "step": 44100 }, { "epoch": 25.675203725261934, "grad_norm": 0.12593084573745728, "learning_rate": 1.7982518991657943e-05, "loss": -0.9347, "step": 44110 }, { "epoch": 25.681024447031433, "grad_norm": 0.09820708632469177, "learning_rate": 1.7961357139205643e-05, "loss": -0.9358, "step": 44120 }, { "epoch": 25.686845168800932, "grad_norm": 0.1870216727256775, "learning_rate": 1.7940205019247108e-05, "loss": -0.9362, "step": 44130 }, { "epoch": 25.69266589057043, "grad_norm": 0.2042965292930603, "learning_rate": 1.79190626382078e-05, "loss": -0.9346, "step": 44140 }, { "epoch": 25.69848661233993, "grad_norm": 0.1702173352241516, "learning_rate": 1.7897930002510215e-05, "loss": -0.9318, "step": 44150 }, { "epoch": 25.70430733410943, "grad_norm": 0.14804022014141083, "learning_rate": 1.787680711857387e-05, "loss": -0.9332, "step": 44160 }, { "epoch": 25.71012805587893, "grad_norm": 0.11666128784418106, "learning_rate": 1.7855693992815398e-05, "loss": -0.9347, "step": 44170 }, { "epoch": 25.71594877764843, "grad_norm": 0.13606202602386475, "learning_rate": 1.7834590631648328e-05, "loss": -0.9352, "step": 44180 }, { "epoch": 25.721769499417928, "grad_norm": 0.20363783836364746, "learning_rate": 1.7813497041483384e-05, "loss": -0.9344, "step": 44190 }, { "epoch": 25.727590221187427, "grad_norm": 0.2497156709432602, "learning_rate": 1.779241322872817e-05, "loss": -0.9354, "step": 44200 }, { "epoch": 25.733410942956926, "grad_norm": 0.14790454506874084, "learning_rate": 1.777133919978744e-05, "loss": -0.9354, "step": 44210 }, { "epoch": 25.739231664726425, "grad_norm": 0.1443951278924942, "learning_rate": 1.7750274961062912e-05, "loss": -0.9302, "step": 44220 }, { "epoch": 25.745052386495924, "grad_norm": 0.23307493329048157, "learning_rate": 1.772922051895335e-05, "loss": -0.9364, "step": 44230 }, { "epoch": 25.750873108265424, "grad_norm": 0.08746974915266037, "learning_rate": 1.770817587985453e-05, "loss": -0.9373, "step": 44240 }, { "epoch": 25.756693830034923, "grad_norm": 0.14698857069015503, "learning_rate": 1.7687141050159246e-05, "loss": -0.9359, "step": 44250 }, { "epoch": 25.762514551804422, "grad_norm": 0.13189494609832764, "learning_rate": 1.7666116036257375e-05, "loss": -0.9371, "step": 44260 }, { "epoch": 25.768335273573925, "grad_norm": 0.07820049673318863, "learning_rate": 1.764510084453569e-05, "loss": -0.9367, "step": 44270 }, { "epoch": 25.774155995343424, "grad_norm": 0.13793472945690155, "learning_rate": 1.76240954813781e-05, "loss": -0.9336, "step": 44280 }, { "epoch": 25.779976717112923, "grad_norm": 0.10690619796514511, "learning_rate": 1.7603099953165476e-05, "loss": -0.9361, "step": 44290 }, { "epoch": 25.785797438882422, "grad_norm": 0.1496933102607727, "learning_rate": 1.7582114266275683e-05, "loss": -0.9341, "step": 44300 }, { "epoch": 25.79161816065192, "grad_norm": 0.08150621503591537, "learning_rate": 1.756113842708364e-05, "loss": -0.9337, "step": 44310 }, { "epoch": 25.79743888242142, "grad_norm": 0.09234417229890823, "learning_rate": 1.7540172441961245e-05, "loss": -0.9353, "step": 44320 }, { "epoch": 25.80325960419092, "grad_norm": 0.0936778336763382, "learning_rate": 1.7519216317277387e-05, "loss": -0.9353, "step": 44330 }, { "epoch": 25.80908032596042, "grad_norm": 0.1819676011800766, "learning_rate": 1.7498270059398046e-05, "loss": -0.9339, "step": 44340 }, { "epoch": 25.81490104772992, "grad_norm": 0.1533130407333374, "learning_rate": 1.7477333674686062e-05, "loss": -0.9346, "step": 44350 }, { "epoch": 25.820721769499418, "grad_norm": 0.20680977404117584, "learning_rate": 1.745640716950142e-05, "loss": -0.9353, "step": 44360 }, { "epoch": 25.826542491268917, "grad_norm": 0.22686652839183807, "learning_rate": 1.7435490550201017e-05, "loss": -0.934, "step": 44370 }, { "epoch": 25.832363213038416, "grad_norm": 0.08930793404579163, "learning_rate": 1.7414583823138762e-05, "loss": -0.9385, "step": 44380 }, { "epoch": 25.838183934807915, "grad_norm": 0.10883930325508118, "learning_rate": 1.739368699466558e-05, "loss": -0.9344, "step": 44390 }, { "epoch": 25.844004656577415, "grad_norm": 0.11118926107883453, "learning_rate": 1.737280007112935e-05, "loss": -0.9354, "step": 44400 }, { "epoch": 25.849825378346914, "grad_norm": 0.10188456624746323, "learning_rate": 1.735192305887502e-05, "loss": -0.9376, "step": 44410 }, { "epoch": 25.855646100116413, "grad_norm": 0.11218623071908951, "learning_rate": 1.733105596424441e-05, "loss": -0.9352, "step": 44420 }, { "epoch": 25.861466821885912, "grad_norm": 0.10810297727584839, "learning_rate": 1.7310198793576437e-05, "loss": -0.9306, "step": 44430 }, { "epoch": 25.867287543655415, "grad_norm": 0.11695084720849991, "learning_rate": 1.7289351553206952e-05, "loss": -0.934, "step": 44440 }, { "epoch": 25.873108265424914, "grad_norm": 0.15050579607486725, "learning_rate": 1.7268514249468788e-05, "loss": -0.9315, "step": 44450 }, { "epoch": 25.878928987194413, "grad_norm": 0.13440579175949097, "learning_rate": 1.7247686888691765e-05, "loss": -0.9354, "step": 44460 }, { "epoch": 25.884749708963913, "grad_norm": 0.12883424758911133, "learning_rate": 1.7226869477202694e-05, "loss": -0.9357, "step": 44470 }, { "epoch": 25.890570430733412, "grad_norm": 0.17646551132202148, "learning_rate": 1.7206062021325336e-05, "loss": -0.9306, "step": 44480 }, { "epoch": 25.89639115250291, "grad_norm": 0.1381625235080719, "learning_rate": 1.7185264527380502e-05, "loss": -0.934, "step": 44490 }, { "epoch": 25.90221187427241, "grad_norm": 0.1739361584186554, "learning_rate": 1.716447700168584e-05, "loss": -0.9353, "step": 44500 }, { "epoch": 25.90803259604191, "grad_norm": 0.1603054702281952, "learning_rate": 1.714369945055611e-05, "loss": -0.9308, "step": 44510 }, { "epoch": 25.91385331781141, "grad_norm": 0.10693114995956421, "learning_rate": 1.7122931880302968e-05, "loss": -0.9372, "step": 44520 }, { "epoch": 25.919674039580908, "grad_norm": 0.23245908319950104, "learning_rate": 1.710217429723505e-05, "loss": -0.9336, "step": 44530 }, { "epoch": 25.925494761350407, "grad_norm": 0.1196913868188858, "learning_rate": 1.7081426707657972e-05, "loss": -0.9326, "step": 44540 }, { "epoch": 25.931315483119906, "grad_norm": 0.12461499869823456, "learning_rate": 1.7060689117874275e-05, "loss": -0.932, "step": 44550 }, { "epoch": 25.937136204889406, "grad_norm": 0.11041166633367538, "learning_rate": 1.703996153418354e-05, "loss": -0.9345, "step": 44560 }, { "epoch": 25.942956926658905, "grad_norm": 0.1313466876745224, "learning_rate": 1.7019243962882205e-05, "loss": -0.9367, "step": 44570 }, { "epoch": 25.948777648428404, "grad_norm": 0.10024883598089218, "learning_rate": 1.6998536410263754e-05, "loss": -0.9367, "step": 44580 }, { "epoch": 25.954598370197903, "grad_norm": 0.1344425529241562, "learning_rate": 1.6977838882618596e-05, "loss": -0.9355, "step": 44590 }, { "epoch": 25.960419091967402, "grad_norm": 0.20990295708179474, "learning_rate": 1.6957151386234088e-05, "loss": -0.9357, "step": 44600 }, { "epoch": 25.9662398137369, "grad_norm": 0.09781515598297119, "learning_rate": 1.6936473927394536e-05, "loss": -0.9358, "step": 44610 }, { "epoch": 25.972060535506404, "grad_norm": 0.1069989949464798, "learning_rate": 1.6915806512381222e-05, "loss": -0.9336, "step": 44620 }, { "epoch": 25.977881257275904, "grad_norm": 0.1333031803369522, "learning_rate": 1.6895149147472344e-05, "loss": -0.9341, "step": 44630 }, { "epoch": 25.983701979045403, "grad_norm": 0.1695823073387146, "learning_rate": 1.6874501838943073e-05, "loss": -0.933, "step": 44640 }, { "epoch": 25.989522700814902, "grad_norm": 0.20349881052970886, "learning_rate": 1.6853864593065506e-05, "loss": -0.9355, "step": 44650 }, { "epoch": 25.9953434225844, "grad_norm": 0.1773354560136795, "learning_rate": 1.683323741610871e-05, "loss": -0.9343, "step": 44660 }, { "epoch": 26.0011641443539, "grad_norm": 0.1993999183177948, "learning_rate": 1.6812620314338674e-05, "loss": -0.9324, "step": 44670 }, { "epoch": 26.0069848661234, "grad_norm": 0.15082822740077972, "learning_rate": 1.6792013294018326e-05, "loss": -0.9359, "step": 44680 }, { "epoch": 26.0128055878929, "grad_norm": 0.07388532161712646, "learning_rate": 1.6771416361407526e-05, "loss": -0.9343, "step": 44690 }, { "epoch": 26.018626309662398, "grad_norm": 0.16183491051197052, "learning_rate": 1.675082952276308e-05, "loss": -0.9366, "step": 44700 }, { "epoch": 26.024447031431897, "grad_norm": 0.22394753992557526, "learning_rate": 1.6730252784338757e-05, "loss": -0.9341, "step": 44710 }, { "epoch": 26.030267753201397, "grad_norm": 0.11271888762712479, "learning_rate": 1.6709686152385166e-05, "loss": -0.9347, "step": 44720 }, { "epoch": 26.036088474970896, "grad_norm": 0.1785217821598053, "learning_rate": 1.668912963314998e-05, "loss": -0.9339, "step": 44730 }, { "epoch": 26.041909196740395, "grad_norm": 0.13400574028491974, "learning_rate": 1.6668583232877653e-05, "loss": -0.9333, "step": 44740 }, { "epoch": 26.047729918509894, "grad_norm": 0.12115117907524109, "learning_rate": 1.6648046957809698e-05, "loss": -0.9324, "step": 44750 }, { "epoch": 26.053550640279393, "grad_norm": 0.10946536809206009, "learning_rate": 1.6627520814184462e-05, "loss": -0.9332, "step": 44760 }, { "epoch": 26.059371362048893, "grad_norm": 0.11491274833679199, "learning_rate": 1.660700480823726e-05, "loss": -0.9352, "step": 44770 }, { "epoch": 26.065192083818392, "grad_norm": 0.13994760811328888, "learning_rate": 1.65864989462003e-05, "loss": -0.9355, "step": 44780 }, { "epoch": 26.07101280558789, "grad_norm": 0.15030166506767273, "learning_rate": 1.656600323430273e-05, "loss": -0.9325, "step": 44790 }, { "epoch": 26.076833527357394, "grad_norm": 0.16861005127429962, "learning_rate": 1.654551767877059e-05, "loss": -0.9361, "step": 44800 }, { "epoch": 26.082654249126893, "grad_norm": 0.1466304212808609, "learning_rate": 1.6525042285826874e-05, "loss": -0.9309, "step": 44810 }, { "epoch": 26.088474970896392, "grad_norm": 0.28785622119903564, "learning_rate": 1.6504577061691468e-05, "loss": -0.9304, "step": 44820 }, { "epoch": 26.09429569266589, "grad_norm": 0.1569855809211731, "learning_rate": 1.6484122012581143e-05, "loss": -0.9331, "step": 44830 }, { "epoch": 26.10011641443539, "grad_norm": 0.10378258675336838, "learning_rate": 1.6463677144709623e-05, "loss": -0.9356, "step": 44840 }, { "epoch": 26.10593713620489, "grad_norm": 0.0992962047457695, "learning_rate": 1.6443242464287493e-05, "loss": -0.9357, "step": 44850 }, { "epoch": 26.11175785797439, "grad_norm": 0.08711462467908859, "learning_rate": 1.642281797752232e-05, "loss": -0.9356, "step": 44860 }, { "epoch": 26.11757857974389, "grad_norm": 0.14872093498706818, "learning_rate": 1.6402403690618456e-05, "loss": -0.9353, "step": 44870 }, { "epoch": 26.123399301513388, "grad_norm": 0.07404684275388718, "learning_rate": 1.6381999609777295e-05, "loss": -0.9372, "step": 44880 }, { "epoch": 26.129220023282887, "grad_norm": 0.14209292829036713, "learning_rate": 1.6361605741196983e-05, "loss": -0.9246, "step": 44890 }, { "epoch": 26.135040745052386, "grad_norm": 0.1964363306760788, "learning_rate": 1.63412220910727e-05, "loss": -0.9326, "step": 44900 }, { "epoch": 26.140861466821885, "grad_norm": 0.06173272058367729, "learning_rate": 1.6320848665596433e-05, "loss": -0.9328, "step": 44910 }, { "epoch": 26.146682188591384, "grad_norm": 0.11998110264539719, "learning_rate": 1.6300485470957095e-05, "loss": -0.9319, "step": 44920 }, { "epoch": 26.152502910360884, "grad_norm": 0.12328352779150009, "learning_rate": 1.6280132513340483e-05, "loss": -0.9352, "step": 44930 }, { "epoch": 26.158323632130383, "grad_norm": 0.1131834164261818, "learning_rate": 1.62597897989293e-05, "loss": -0.9367, "step": 44940 }, { "epoch": 26.164144353899882, "grad_norm": 0.11377294361591339, "learning_rate": 1.623945733390309e-05, "loss": -0.9344, "step": 44950 }, { "epoch": 26.16996507566938, "grad_norm": 0.1169891208410263, "learning_rate": 1.6219135124438374e-05, "loss": -0.9356, "step": 44960 }, { "epoch": 26.175785797438884, "grad_norm": 0.10898012667894363, "learning_rate": 1.6198823176708465e-05, "loss": -0.9374, "step": 44970 }, { "epoch": 26.181606519208383, "grad_norm": 0.21181006729602814, "learning_rate": 1.6178521496883613e-05, "loss": -0.9312, "step": 44980 }, { "epoch": 26.187427240977883, "grad_norm": 0.2008611559867859, "learning_rate": 1.6158230091130926e-05, "loss": -0.9359, "step": 44990 }, { "epoch": 26.19324796274738, "grad_norm": 0.10610105842351913, "learning_rate": 1.613794896561438e-05, "loss": -0.9367, "step": 45000 }, { "epoch": 26.19906868451688, "grad_norm": 0.2177945226430893, "learning_rate": 1.6117678126494894e-05, "loss": -0.9349, "step": 45010 }, { "epoch": 26.20488940628638, "grad_norm": 0.15271998941898346, "learning_rate": 1.6097417579930153e-05, "loss": -0.9286, "step": 45020 }, { "epoch": 26.21071012805588, "grad_norm": 0.16686800122261047, "learning_rate": 1.6077167332074834e-05, "loss": -0.9322, "step": 45030 }, { "epoch": 26.21653084982538, "grad_norm": 0.16145211458206177, "learning_rate": 1.605692738908037e-05, "loss": -0.9354, "step": 45040 }, { "epoch": 26.222351571594878, "grad_norm": 0.13805466890335083, "learning_rate": 1.6036697757095176e-05, "loss": -0.9334, "step": 45050 }, { "epoch": 26.228172293364377, "grad_norm": 0.11090385168790817, "learning_rate": 1.6016478442264428e-05, "loss": -0.9342, "step": 45060 }, { "epoch": 26.233993015133876, "grad_norm": 0.11534208059310913, "learning_rate": 1.599626945073026e-05, "loss": -0.936, "step": 45070 }, { "epoch": 26.239813736903375, "grad_norm": 0.180705726146698, "learning_rate": 1.597607078863162e-05, "loss": -0.9361, "step": 45080 }, { "epoch": 26.245634458672875, "grad_norm": 0.12216658145189285, "learning_rate": 1.595588246210432e-05, "loss": -0.9355, "step": 45090 }, { "epoch": 26.251455180442374, "grad_norm": 0.11762382090091705, "learning_rate": 1.5935704477281048e-05, "loss": -0.9375, "step": 45100 }, { "epoch": 26.257275902211873, "grad_norm": 0.13628479838371277, "learning_rate": 1.5915536840291323e-05, "loss": -0.9366, "step": 45110 }, { "epoch": 26.263096623981372, "grad_norm": 0.08347539603710175, "learning_rate": 1.5895379557261576e-05, "loss": -0.9369, "step": 45120 }, { "epoch": 26.26891734575087, "grad_norm": 0.23010411858558655, "learning_rate": 1.5875232634315033e-05, "loss": -0.9342, "step": 45130 }, { "epoch": 26.274738067520374, "grad_norm": 0.15350934863090515, "learning_rate": 1.5855096077571812e-05, "loss": -0.9324, "step": 45140 }, { "epoch": 26.280558789289874, "grad_norm": 0.1644039750099182, "learning_rate": 1.5834969893148855e-05, "loss": -0.9348, "step": 45150 }, { "epoch": 26.286379511059373, "grad_norm": 0.137131929397583, "learning_rate": 1.581485408715997e-05, "loss": -0.9364, "step": 45160 }, { "epoch": 26.292200232828872, "grad_norm": 0.09890823066234589, "learning_rate": 1.5794748665715785e-05, "loss": -0.9346, "step": 45170 }, { "epoch": 26.29802095459837, "grad_norm": 0.14635929465293884, "learning_rate": 1.5774653634923857e-05, "loss": -0.9374, "step": 45180 }, { "epoch": 26.30384167636787, "grad_norm": 0.10405567288398743, "learning_rate": 1.575456900088845e-05, "loss": -0.9363, "step": 45190 }, { "epoch": 26.30966239813737, "grad_norm": 0.07423227280378342, "learning_rate": 1.5734494769710816e-05, "loss": -0.9321, "step": 45200 }, { "epoch": 26.31548311990687, "grad_norm": 0.18683110177516937, "learning_rate": 1.5714430947488912e-05, "loss": -0.9378, "step": 45210 }, { "epoch": 26.321303841676368, "grad_norm": 0.1859297752380371, "learning_rate": 1.5694377540317645e-05, "loss": -0.9335, "step": 45220 }, { "epoch": 26.327124563445867, "grad_norm": 0.21224786341190338, "learning_rate": 1.5674334554288694e-05, "loss": -0.9369, "step": 45230 }, { "epoch": 26.332945285215366, "grad_norm": 0.1357458382844925, "learning_rate": 1.5654301995490582e-05, "loss": -0.9357, "step": 45240 }, { "epoch": 26.338766006984866, "grad_norm": 0.16361483931541443, "learning_rate": 1.5634279870008685e-05, "loss": -0.9377, "step": 45250 }, { "epoch": 26.344586728754365, "grad_norm": 0.07900457829236984, "learning_rate": 1.5614268183925174e-05, "loss": -0.9358, "step": 45260 }, { "epoch": 26.350407450523864, "grad_norm": 0.08725795894861221, "learning_rate": 1.5594266943319097e-05, "loss": -0.9287, "step": 45270 }, { "epoch": 26.356228172293363, "grad_norm": 0.1357833445072174, "learning_rate": 1.5574276154266294e-05, "loss": -0.9373, "step": 45280 }, { "epoch": 26.362048894062863, "grad_norm": 0.10885200649499893, "learning_rate": 1.5554295822839437e-05, "loss": -0.9362, "step": 45290 }, { "epoch": 26.36786961583236, "grad_norm": 0.14606481790542603, "learning_rate": 1.5534325955108025e-05, "loss": -0.9373, "step": 45300 }, { "epoch": 26.37369033760186, "grad_norm": 0.07114507257938385, "learning_rate": 1.5514366557138373e-05, "loss": -0.936, "step": 45310 }, { "epoch": 26.379511059371364, "grad_norm": 0.10060346871614456, "learning_rate": 1.5494417634993602e-05, "loss": -0.9336, "step": 45320 }, { "epoch": 26.385331781140863, "grad_norm": 0.18659360706806183, "learning_rate": 1.547447919473372e-05, "loss": -0.9322, "step": 45330 }, { "epoch": 26.391152502910362, "grad_norm": 0.11927664279937744, "learning_rate": 1.5454551242415434e-05, "loss": -0.9374, "step": 45340 }, { "epoch": 26.39697322467986, "grad_norm": 0.13990052044391632, "learning_rate": 1.543463378409239e-05, "loss": -0.9359, "step": 45350 }, { "epoch": 26.40279394644936, "grad_norm": 0.10414054244756699, "learning_rate": 1.541472682581493e-05, "loss": -0.9285, "step": 45360 }, { "epoch": 26.40861466821886, "grad_norm": 0.13431230187416077, "learning_rate": 1.5394830373630298e-05, "loss": -0.936, "step": 45370 }, { "epoch": 26.41443538998836, "grad_norm": 0.18780528008937836, "learning_rate": 1.5374944433582506e-05, "loss": -0.9343, "step": 45380 }, { "epoch": 26.42025611175786, "grad_norm": 0.14389269053936005, "learning_rate": 1.5355069011712375e-05, "loss": -0.934, "step": 45390 }, { "epoch": 26.426076833527357, "grad_norm": 0.2056005448102951, "learning_rate": 1.5335204114057526e-05, "loss": -0.9285, "step": 45400 }, { "epoch": 26.431897555296857, "grad_norm": 0.12393411993980408, "learning_rate": 1.5315349746652387e-05, "loss": -0.9348, "step": 45410 }, { "epoch": 26.437718277066356, "grad_norm": 0.13510891795158386, "learning_rate": 1.5295505915528212e-05, "loss": -0.936, "step": 45420 }, { "epoch": 26.443538998835855, "grad_norm": 0.14096248149871826, "learning_rate": 1.5275672626713024e-05, "loss": -0.932, "step": 45430 }, { "epoch": 26.449359720605354, "grad_norm": 0.12790653109550476, "learning_rate": 1.5255849886231643e-05, "loss": -0.9337, "step": 45440 }, { "epoch": 26.455180442374854, "grad_norm": 0.09651829302310944, "learning_rate": 1.523603770010571e-05, "loss": -0.931, "step": 45450 }, { "epoch": 26.461001164144353, "grad_norm": 0.22208595275878906, "learning_rate": 1.521623607435363e-05, "loss": -0.9366, "step": 45460 }, { "epoch": 26.466821885913852, "grad_norm": 0.14081458747386932, "learning_rate": 1.5196445014990612e-05, "loss": -0.9349, "step": 45470 }, { "epoch": 26.47264260768335, "grad_norm": 0.12210070341825485, "learning_rate": 1.5176664528028672e-05, "loss": -0.9356, "step": 45480 }, { "epoch": 26.47846332945285, "grad_norm": 0.09013782441616058, "learning_rate": 1.5156894619476574e-05, "loss": -0.935, "step": 45490 }, { "epoch": 26.484284051222353, "grad_norm": 0.11508670449256897, "learning_rate": 1.5137135295339938e-05, "loss": -0.9316, "step": 45500 }, { "epoch": 26.490104772991852, "grad_norm": 0.09706952422857285, "learning_rate": 1.5117386561621073e-05, "loss": -0.9362, "step": 45510 }, { "epoch": 26.49592549476135, "grad_norm": 0.12411504983901978, "learning_rate": 1.5097648424319167e-05, "loss": -0.9346, "step": 45520 }, { "epoch": 26.50174621653085, "grad_norm": 0.14304432272911072, "learning_rate": 1.5077920889430119e-05, "loss": -0.934, "step": 45530 }, { "epoch": 26.50756693830035, "grad_norm": 0.22927169501781464, "learning_rate": 1.5058203962946644e-05, "loss": -0.9341, "step": 45540 }, { "epoch": 26.51338766006985, "grad_norm": 0.14749416708946228, "learning_rate": 1.503849765085822e-05, "loss": -0.9365, "step": 45550 }, { "epoch": 26.51920838183935, "grad_norm": 0.11162085086107254, "learning_rate": 1.501880195915109e-05, "loss": -0.9325, "step": 45560 }, { "epoch": 26.525029103608848, "grad_norm": 0.14596424996852875, "learning_rate": 1.499911689380833e-05, "loss": -0.9299, "step": 45570 }, { "epoch": 26.530849825378347, "grad_norm": 0.09477996081113815, "learning_rate": 1.4979442460809683e-05, "loss": -0.9384, "step": 45580 }, { "epoch": 26.536670547147846, "grad_norm": 0.1046941950917244, "learning_rate": 1.4959778666131763e-05, "loss": -0.9351, "step": 45590 }, { "epoch": 26.542491268917345, "grad_norm": 0.1805019974708557, "learning_rate": 1.4940125515747905e-05, "loss": -0.9349, "step": 45600 }, { "epoch": 26.548311990686845, "grad_norm": 0.11514927446842194, "learning_rate": 1.4920483015628211e-05, "loss": -0.9348, "step": 45610 }, { "epoch": 26.554132712456344, "grad_norm": 0.14309288561344147, "learning_rate": 1.490085117173956e-05, "loss": -0.9334, "step": 45620 }, { "epoch": 26.559953434225843, "grad_norm": 0.12807445228099823, "learning_rate": 1.488122999004558e-05, "loss": -0.9375, "step": 45630 }, { "epoch": 26.565774155995342, "grad_norm": 0.1333373486995697, "learning_rate": 1.486161947650666e-05, "loss": -0.9331, "step": 45640 }, { "epoch": 26.57159487776484, "grad_norm": 0.1349499225616455, "learning_rate": 1.4842019637079995e-05, "loss": -0.9352, "step": 45650 }, { "epoch": 26.57741559953434, "grad_norm": 0.21016491949558258, "learning_rate": 1.482243047771944e-05, "loss": -0.9357, "step": 45660 }, { "epoch": 26.583236321303843, "grad_norm": 0.11265542358160019, "learning_rate": 1.4802852004375712e-05, "loss": -0.9367, "step": 45670 }, { "epoch": 26.589057043073343, "grad_norm": 0.11115086823701859, "learning_rate": 1.4783284222996218e-05, "loss": -0.9353, "step": 45680 }, { "epoch": 26.594877764842842, "grad_norm": 0.07308612018823624, "learning_rate": 1.4763727139525135e-05, "loss": -0.9371, "step": 45690 }, { "epoch": 26.60069848661234, "grad_norm": 0.09977754950523376, "learning_rate": 1.4744180759903392e-05, "loss": -0.9361, "step": 45700 }, { "epoch": 26.60651920838184, "grad_norm": 0.1957748532295227, "learning_rate": 1.4724645090068635e-05, "loss": -0.934, "step": 45710 }, { "epoch": 26.61233993015134, "grad_norm": 0.11684589087963104, "learning_rate": 1.4705120135955341e-05, "loss": -0.9367, "step": 45720 }, { "epoch": 26.61816065192084, "grad_norm": 0.12339306622743607, "learning_rate": 1.4685605903494614e-05, "loss": -0.935, "step": 45730 }, { "epoch": 26.623981373690338, "grad_norm": 0.2098325490951538, "learning_rate": 1.46661023986144e-05, "loss": -0.934, "step": 45740 }, { "epoch": 26.629802095459837, "grad_norm": 0.1645570993423462, "learning_rate": 1.4646609627239344e-05, "loss": -0.9323, "step": 45750 }, { "epoch": 26.635622817229336, "grad_norm": 0.09780345112085342, "learning_rate": 1.4627127595290835e-05, "loss": -0.9338, "step": 45760 }, { "epoch": 26.641443538998836, "grad_norm": 0.15948791801929474, "learning_rate": 1.460765630868699e-05, "loss": -0.9366, "step": 45770 }, { "epoch": 26.647264260768335, "grad_norm": 0.14848092198371887, "learning_rate": 1.4588195773342678e-05, "loss": -0.9333, "step": 45780 }, { "epoch": 26.653084982537834, "grad_norm": 0.12979286909103394, "learning_rate": 1.4568745995169485e-05, "loss": -0.9368, "step": 45790 }, { "epoch": 26.658905704307333, "grad_norm": 0.178396537899971, "learning_rate": 1.4549306980075778e-05, "loss": -0.9355, "step": 45800 }, { "epoch": 26.664726426076832, "grad_norm": 0.10501090437173843, "learning_rate": 1.4529878733966557e-05, "loss": -0.9367, "step": 45810 }, { "epoch": 26.67054714784633, "grad_norm": 0.08715013414621353, "learning_rate": 1.4510461262743658e-05, "loss": -0.9327, "step": 45820 }, { "epoch": 26.67636786961583, "grad_norm": 0.15235169231891632, "learning_rate": 1.4491054572305585e-05, "loss": -0.9324, "step": 45830 }, { "epoch": 26.682188591385334, "grad_norm": 0.13894720375537872, "learning_rate": 1.4471658668547566e-05, "loss": -0.9375, "step": 45840 }, { "epoch": 26.688009313154833, "grad_norm": 0.2031213343143463, "learning_rate": 1.4452273557361579e-05, "loss": -0.9337, "step": 45850 }, { "epoch": 26.693830034924332, "grad_norm": 0.2125617265701294, "learning_rate": 1.4432899244636282e-05, "loss": -0.9377, "step": 45860 }, { "epoch": 26.69965075669383, "grad_norm": 0.13251860439777374, "learning_rate": 1.4413535736257134e-05, "loss": -0.9359, "step": 45870 }, { "epoch": 26.70547147846333, "grad_norm": 0.13893243670463562, "learning_rate": 1.439418303810619e-05, "loss": -0.937, "step": 45880 }, { "epoch": 26.71129220023283, "grad_norm": 0.20054438710212708, "learning_rate": 1.4374841156062352e-05, "loss": -0.9344, "step": 45890 }, { "epoch": 26.71711292200233, "grad_norm": 0.17861081659793854, "learning_rate": 1.4355510096001112e-05, "loss": -0.9367, "step": 45900 }, { "epoch": 26.722933643771828, "grad_norm": 0.1284267157316208, "learning_rate": 1.4336189863794786e-05, "loss": -0.9346, "step": 45910 }, { "epoch": 26.728754365541327, "grad_norm": 0.13676974177360535, "learning_rate": 1.4316880465312327e-05, "loss": -0.9365, "step": 45920 }, { "epoch": 26.734575087310827, "grad_norm": 0.10657631605863571, "learning_rate": 1.4297581906419426e-05, "loss": -0.9363, "step": 45930 }, { "epoch": 26.740395809080326, "grad_norm": 0.11923947185277939, "learning_rate": 1.4278294192978475e-05, "loss": -0.9379, "step": 45940 }, { "epoch": 26.746216530849825, "grad_norm": 0.22254857420921326, "learning_rate": 1.4259017330848574e-05, "loss": -0.9361, "step": 45950 }, { "epoch": 26.752037252619324, "grad_norm": 0.0845443457365036, "learning_rate": 1.4239751325885498e-05, "loss": -0.937, "step": 45960 }, { "epoch": 26.757857974388823, "grad_norm": 0.10015898197889328, "learning_rate": 1.4220496183941795e-05, "loss": -0.9372, "step": 45970 }, { "epoch": 26.763678696158323, "grad_norm": 0.2841353714466095, "learning_rate": 1.4201251910866648e-05, "loss": -0.9353, "step": 45980 }, { "epoch": 26.769499417927822, "grad_norm": 0.16383320093154907, "learning_rate": 1.4182018512505957e-05, "loss": -0.9357, "step": 45990 }, { "epoch": 26.77532013969732, "grad_norm": 0.17548856139183044, "learning_rate": 1.4162795994702327e-05, "loss": -0.9338, "step": 46000 }, { "epoch": 26.78114086146682, "grad_norm": 0.14898231625556946, "learning_rate": 1.4143584363295032e-05, "loss": -0.9347, "step": 46010 }, { "epoch": 26.78696158323632, "grad_norm": 0.11930838972330093, "learning_rate": 1.4124383624120101e-05, "loss": -0.935, "step": 46020 }, { "epoch": 26.792782305005822, "grad_norm": 0.2500593066215515, "learning_rate": 1.4105193783010151e-05, "loss": -0.9332, "step": 46030 }, { "epoch": 26.79860302677532, "grad_norm": 0.13947990536689758, "learning_rate": 1.4086014845794621e-05, "loss": -0.9352, "step": 46040 }, { "epoch": 26.80442374854482, "grad_norm": 0.1440248191356659, "learning_rate": 1.4066846818299489e-05, "loss": -0.9359, "step": 46050 }, { "epoch": 26.81024447031432, "grad_norm": 0.09003128111362457, "learning_rate": 1.4047689706347555e-05, "loss": -0.9341, "step": 46060 }, { "epoch": 26.81606519208382, "grad_norm": 0.2164662778377533, "learning_rate": 1.402854351575822e-05, "loss": -0.9343, "step": 46070 }, { "epoch": 26.82188591385332, "grad_norm": 0.10622787475585938, "learning_rate": 1.4009408252347588e-05, "loss": -0.9359, "step": 46080 }, { "epoch": 26.827706635622818, "grad_norm": 0.1275077611207962, "learning_rate": 1.399028392192846e-05, "loss": -0.9343, "step": 46090 }, { "epoch": 26.833527357392317, "grad_norm": 0.08104334771633148, "learning_rate": 1.397117053031029e-05, "loss": -0.9312, "step": 46100 }, { "epoch": 26.839348079161816, "grad_norm": 0.13584357500076294, "learning_rate": 1.3952068083299213e-05, "loss": -0.9361, "step": 46110 }, { "epoch": 26.845168800931315, "grad_norm": 0.17503076791763306, "learning_rate": 1.3932976586698082e-05, "loss": -0.9352, "step": 46120 }, { "epoch": 26.850989522700814, "grad_norm": 0.1208733469247818, "learning_rate": 1.3913896046306363e-05, "loss": -0.9375, "step": 46130 }, { "epoch": 26.856810244470314, "grad_norm": 0.16373197734355927, "learning_rate": 1.389482646792023e-05, "loss": -0.9304, "step": 46140 }, { "epoch": 26.862630966239813, "grad_norm": 0.14532312750816345, "learning_rate": 1.387576785733251e-05, "loss": -0.9329, "step": 46150 }, { "epoch": 26.868451688009312, "grad_norm": 0.1555541604757309, "learning_rate": 1.3856720220332703e-05, "loss": -0.9297, "step": 46160 }, { "epoch": 26.87427240977881, "grad_norm": 0.16531722247600555, "learning_rate": 1.383768356270701e-05, "loss": -0.9361, "step": 46170 }, { "epoch": 26.88009313154831, "grad_norm": 0.19427330791950226, "learning_rate": 1.3818657890238207e-05, "loss": -0.9343, "step": 46180 }, { "epoch": 26.88591385331781, "grad_norm": 0.16101421415805817, "learning_rate": 1.3799643208705859e-05, "loss": -0.9364, "step": 46190 }, { "epoch": 26.891734575087312, "grad_norm": 0.1320715844631195, "learning_rate": 1.3780639523886058e-05, "loss": -0.9301, "step": 46200 }, { "epoch": 26.89755529685681, "grad_norm": 0.12471801042556763, "learning_rate": 1.3761646841551668e-05, "loss": -0.9339, "step": 46210 }, { "epoch": 26.90337601862631, "grad_norm": 0.09649226069450378, "learning_rate": 1.3742665167472146e-05, "loss": -0.9364, "step": 46220 }, { "epoch": 26.90919674039581, "grad_norm": 0.1860319972038269, "learning_rate": 1.372369450741363e-05, "loss": -0.9343, "step": 46230 }, { "epoch": 26.91501746216531, "grad_norm": 0.0840318351984024, "learning_rate": 1.3704734867138901e-05, "loss": -0.937, "step": 46240 }, { "epoch": 26.92083818393481, "grad_norm": 0.09392165392637253, "learning_rate": 1.36857862524074e-05, "loss": -0.9356, "step": 46250 }, { "epoch": 26.926658905704308, "grad_norm": 0.14209674298763275, "learning_rate": 1.3666848668975213e-05, "loss": -0.9321, "step": 46260 }, { "epoch": 26.932479627473807, "grad_norm": 0.15075460076332092, "learning_rate": 1.3647922122595063e-05, "loss": -0.9364, "step": 46270 }, { "epoch": 26.938300349243306, "grad_norm": 0.12723512947559357, "learning_rate": 1.3629006619016366e-05, "loss": -0.9299, "step": 46280 }, { "epoch": 26.944121071012805, "grad_norm": 0.10830626636743546, "learning_rate": 1.3610102163985139e-05, "loss": -0.935, "step": 46290 }, { "epoch": 26.949941792782305, "grad_norm": 0.17076809704303741, "learning_rate": 1.3591208763244057e-05, "loss": -0.9349, "step": 46300 }, { "epoch": 26.955762514551804, "grad_norm": 0.08239228278398514, "learning_rate": 1.3572326422532428e-05, "loss": -0.931, "step": 46310 }, { "epoch": 26.961583236321303, "grad_norm": 0.16005606949329376, "learning_rate": 1.355345514758622e-05, "loss": -0.9356, "step": 46320 }, { "epoch": 26.967403958090802, "grad_norm": 0.15125302970409393, "learning_rate": 1.3534594944138007e-05, "loss": -0.9339, "step": 46330 }, { "epoch": 26.9732246798603, "grad_norm": 0.21529582142829895, "learning_rate": 1.3515745817917069e-05, "loss": -0.9348, "step": 46340 }, { "epoch": 26.9790454016298, "grad_norm": 0.15618662536144257, "learning_rate": 1.3496907774649208e-05, "loss": -0.9367, "step": 46350 }, { "epoch": 26.9848661233993, "grad_norm": 0.07845944911241531, "learning_rate": 1.3478080820056987e-05, "loss": -0.9327, "step": 46360 }, { "epoch": 26.990686845168803, "grad_norm": 0.08709315955638885, "learning_rate": 1.3459264959859474e-05, "loss": -0.9362, "step": 46370 }, { "epoch": 26.996507566938302, "grad_norm": 0.1613200604915619, "learning_rate": 1.3440460199772487e-05, "loss": -0.9364, "step": 46380 }, { "epoch": 27.0023282887078, "grad_norm": 0.2585001587867737, "learning_rate": 1.3421666545508382e-05, "loss": -0.9352, "step": 46390 }, { "epoch": 27.0081490104773, "grad_norm": 0.12435885518789291, "learning_rate": 1.3402884002776194e-05, "loss": -0.9363, "step": 46400 }, { "epoch": 27.0139697322468, "grad_norm": 0.15240521728992462, "learning_rate": 1.3384112577281555e-05, "loss": -0.9344, "step": 46410 }, { "epoch": 27.0197904540163, "grad_norm": 0.13507838547229767, "learning_rate": 1.3365352274726711e-05, "loss": -0.9337, "step": 46420 }, { "epoch": 27.025611175785798, "grad_norm": 0.17862658202648163, "learning_rate": 1.3346603100810578e-05, "loss": -0.9323, "step": 46430 }, { "epoch": 27.031431897555297, "grad_norm": 0.13321833312511444, "learning_rate": 1.3327865061228645e-05, "loss": -0.9369, "step": 46440 }, { "epoch": 27.037252619324796, "grad_norm": 0.11980454623699188, "learning_rate": 1.330913816167304e-05, "loss": -0.9274, "step": 46450 }, { "epoch": 27.043073341094296, "grad_norm": 0.15809811651706696, "learning_rate": 1.3290422407832492e-05, "loss": -0.9334, "step": 46460 }, { "epoch": 27.048894062863795, "grad_norm": 0.1279197782278061, "learning_rate": 1.3271717805392354e-05, "loss": -0.9352, "step": 46470 }, { "epoch": 27.054714784633294, "grad_norm": 0.09550513327121735, "learning_rate": 1.3253024360034582e-05, "loss": -0.9357, "step": 46480 }, { "epoch": 27.060535506402793, "grad_norm": 0.0805693119764328, "learning_rate": 1.323434207743779e-05, "loss": -0.9317, "step": 46490 }, { "epoch": 27.066356228172292, "grad_norm": 0.12632739543914795, "learning_rate": 1.3215670963277105e-05, "loss": -0.9334, "step": 46500 }, { "epoch": 27.07217694994179, "grad_norm": 0.07718442380428314, "learning_rate": 1.3197011023224376e-05, "loss": -0.9394, "step": 46510 }, { "epoch": 27.07799767171129, "grad_norm": 0.2483242154121399, "learning_rate": 1.3178362262947941e-05, "loss": -0.9294, "step": 46520 }, { "epoch": 27.08381839348079, "grad_norm": 0.1292409896850586, "learning_rate": 1.3159724688112845e-05, "loss": -0.9348, "step": 46530 }, { "epoch": 27.08963911525029, "grad_norm": 0.2081037014722824, "learning_rate": 1.3141098304380683e-05, "loss": -0.9367, "step": 46540 }, { "epoch": 27.095459837019792, "grad_norm": 0.11760297417640686, "learning_rate": 1.3122483117409651e-05, "loss": -0.9334, "step": 46550 }, { "epoch": 27.10128055878929, "grad_norm": 0.32095420360565186, "learning_rate": 1.3103879132854552e-05, "loss": -0.9325, "step": 46560 }, { "epoch": 27.10710128055879, "grad_norm": 0.13511280715465546, "learning_rate": 1.3085286356366771e-05, "loss": -0.9337, "step": 46570 }, { "epoch": 27.11292200232829, "grad_norm": 0.102131687104702, "learning_rate": 1.3066704793594337e-05, "loss": -0.9346, "step": 46580 }, { "epoch": 27.11874272409779, "grad_norm": 0.17466257512569427, "learning_rate": 1.3048134450181816e-05, "loss": -0.9359, "step": 46590 }, { "epoch": 27.124563445867288, "grad_norm": 0.14442938566207886, "learning_rate": 1.3029575331770394e-05, "loss": -0.934, "step": 46600 }, { "epoch": 27.130384167636787, "grad_norm": 0.21211472153663635, "learning_rate": 1.3011027443997837e-05, "loss": -0.9374, "step": 46610 }, { "epoch": 27.136204889406287, "grad_norm": 0.14850202202796936, "learning_rate": 1.2992490792498507e-05, "loss": -0.9347, "step": 46620 }, { "epoch": 27.142025611175786, "grad_norm": 0.14633601903915405, "learning_rate": 1.297396538290333e-05, "loss": -0.9339, "step": 46630 }, { "epoch": 27.147846332945285, "grad_norm": 0.1394307166337967, "learning_rate": 1.2955451220839888e-05, "loss": -0.9382, "step": 46640 }, { "epoch": 27.153667054714784, "grad_norm": 0.19916392862796783, "learning_rate": 1.2936948311932223e-05, "loss": -0.9335, "step": 46650 }, { "epoch": 27.159487776484283, "grad_norm": 0.10808055102825165, "learning_rate": 1.2918456661801104e-05, "loss": -0.9384, "step": 46660 }, { "epoch": 27.165308498253783, "grad_norm": 0.09808054566383362, "learning_rate": 1.2899976276063736e-05, "loss": -0.9351, "step": 46670 }, { "epoch": 27.171129220023282, "grad_norm": 0.1287054717540741, "learning_rate": 1.2881507160334022e-05, "loss": -0.9337, "step": 46680 }, { "epoch": 27.17694994179278, "grad_norm": 0.10550316423177719, "learning_rate": 1.286304932022238e-05, "loss": -0.9373, "step": 46690 }, { "epoch": 27.18277066356228, "grad_norm": 0.1716678887605667, "learning_rate": 1.2844602761335806e-05, "loss": -0.9334, "step": 46700 }, { "epoch": 27.18859138533178, "grad_norm": 0.11007659882307053, "learning_rate": 1.2826167489277885e-05, "loss": -0.9364, "step": 46710 }, { "epoch": 27.194412107101282, "grad_norm": 0.17772063612937927, "learning_rate": 1.2807743509648745e-05, "loss": -0.938, "step": 46720 }, { "epoch": 27.20023282887078, "grad_norm": 0.11154142767190933, "learning_rate": 1.2789330828045149e-05, "loss": -0.9337, "step": 46730 }, { "epoch": 27.20605355064028, "grad_norm": 0.08174966275691986, "learning_rate": 1.2770929450060332e-05, "loss": -0.9317, "step": 46740 }, { "epoch": 27.21187427240978, "grad_norm": 0.1299540400505066, "learning_rate": 1.2752539381284184e-05, "loss": -0.9323, "step": 46750 }, { "epoch": 27.21769499417928, "grad_norm": 0.1725776046514511, "learning_rate": 1.273416062730311e-05, "loss": -0.9354, "step": 46760 }, { "epoch": 27.22351571594878, "grad_norm": 0.10506072640419006, "learning_rate": 1.2715793193700088e-05, "loss": -0.9357, "step": 46770 }, { "epoch": 27.229336437718278, "grad_norm": 0.08075789362192154, "learning_rate": 1.2697437086054664e-05, "loss": -0.9309, "step": 46780 }, { "epoch": 27.235157159487777, "grad_norm": 0.1898321956396103, "learning_rate": 1.2679092309942937e-05, "loss": -0.9311, "step": 46790 }, { "epoch": 27.240977881257276, "grad_norm": 0.12097009271383286, "learning_rate": 1.266075887093755e-05, "loss": -0.9382, "step": 46800 }, { "epoch": 27.246798603026775, "grad_norm": 0.20731967687606812, "learning_rate": 1.2642436774607757e-05, "loss": -0.9343, "step": 46810 }, { "epoch": 27.252619324796274, "grad_norm": 0.128164142370224, "learning_rate": 1.2624126026519278e-05, "loss": -0.9389, "step": 46820 }, { "epoch": 27.258440046565774, "grad_norm": 0.12265448272228241, "learning_rate": 1.2605826632234474e-05, "loss": -0.9359, "step": 46830 }, { "epoch": 27.264260768335273, "grad_norm": 0.08823522180318832, "learning_rate": 1.2587538597312198e-05, "loss": -0.9366, "step": 46840 }, { "epoch": 27.270081490104772, "grad_norm": 0.08815411478281021, "learning_rate": 1.2569261927307884e-05, "loss": -0.931, "step": 46850 }, { "epoch": 27.27590221187427, "grad_norm": 0.10061539709568024, "learning_rate": 1.2550996627773493e-05, "loss": -0.9347, "step": 46860 }, { "epoch": 27.28172293364377, "grad_norm": 0.22792072594165802, "learning_rate": 1.2532742704257527e-05, "loss": -0.9334, "step": 46870 }, { "epoch": 27.28754365541327, "grad_norm": 0.12663593888282776, "learning_rate": 1.2514500162305087e-05, "loss": -0.9375, "step": 46880 }, { "epoch": 27.29336437718277, "grad_norm": 0.10397398471832275, "learning_rate": 1.2496269007457728e-05, "loss": -0.934, "step": 46890 }, { "epoch": 27.29918509895227, "grad_norm": 0.13426513969898224, "learning_rate": 1.2478049245253625e-05, "loss": -0.9375, "step": 46900 }, { "epoch": 27.30500582072177, "grad_norm": 0.14305545389652252, "learning_rate": 1.2459840881227459e-05, "loss": -0.9338, "step": 46910 }, { "epoch": 27.31082654249127, "grad_norm": 0.09502486139535904, "learning_rate": 1.2441643920910435e-05, "loss": -0.9349, "step": 46920 }, { "epoch": 27.31664726426077, "grad_norm": 0.08501165360212326, "learning_rate": 1.2423458369830322e-05, "loss": -0.9379, "step": 46930 }, { "epoch": 27.32246798603027, "grad_norm": 0.12090422958135605, "learning_rate": 1.2405284233511406e-05, "loss": -0.9331, "step": 46940 }, { "epoch": 27.328288707799768, "grad_norm": 0.12147846072912216, "learning_rate": 1.2387121517474487e-05, "loss": -0.9367, "step": 46950 }, { "epoch": 27.334109429569267, "grad_norm": 0.11219131201505661, "learning_rate": 1.2368970227236975e-05, "loss": -0.9348, "step": 46960 }, { "epoch": 27.339930151338766, "grad_norm": 0.15743152797222137, "learning_rate": 1.2350830368312688e-05, "loss": -0.936, "step": 46970 }, { "epoch": 27.345750873108265, "grad_norm": 0.1534235179424286, "learning_rate": 1.2332701946212083e-05, "loss": -0.9341, "step": 46980 }, { "epoch": 27.351571594877765, "grad_norm": 0.22071358561515808, "learning_rate": 1.2314584966442077e-05, "loss": -0.9338, "step": 46990 }, { "epoch": 27.357392316647264, "grad_norm": 0.07342888414859772, "learning_rate": 1.2296479434506136e-05, "loss": -0.9362, "step": 47000 }, { "epoch": 27.363213038416763, "grad_norm": 0.09361544251441956, "learning_rate": 1.2278385355904232e-05, "loss": -0.9382, "step": 47010 }, { "epoch": 27.369033760186262, "grad_norm": 0.1576872318983078, "learning_rate": 1.2260302736132867e-05, "loss": -0.9384, "step": 47020 }, { "epoch": 27.37485448195576, "grad_norm": 0.10419673472642899, "learning_rate": 1.2242231580685098e-05, "loss": -0.9339, "step": 47030 }, { "epoch": 27.38067520372526, "grad_norm": 0.1168067455291748, "learning_rate": 1.2224171895050413e-05, "loss": -0.9341, "step": 47040 }, { "epoch": 27.38649592549476, "grad_norm": 0.19829456508159637, "learning_rate": 1.2206123684714903e-05, "loss": -0.9356, "step": 47050 }, { "epoch": 27.39231664726426, "grad_norm": 0.19276471436023712, "learning_rate": 1.2188086955161132e-05, "loss": -0.9358, "step": 47060 }, { "epoch": 27.398137369033762, "grad_norm": 0.11624128371477127, "learning_rate": 1.2170061711868175e-05, "loss": -0.9357, "step": 47070 }, { "epoch": 27.40395809080326, "grad_norm": 0.11792868375778198, "learning_rate": 1.215204796031163e-05, "loss": -0.9355, "step": 47080 }, { "epoch": 27.40977881257276, "grad_norm": 0.10175003856420517, "learning_rate": 1.2134045705963599e-05, "loss": -0.9391, "step": 47090 }, { "epoch": 27.41559953434226, "grad_norm": 0.1321471929550171, "learning_rate": 1.2116054954292689e-05, "loss": -0.9367, "step": 47100 }, { "epoch": 27.42142025611176, "grad_norm": 0.09889103472232819, "learning_rate": 1.2098075710764011e-05, "loss": -0.9357, "step": 47110 }, { "epoch": 27.427240977881258, "grad_norm": 0.10617395490407944, "learning_rate": 1.2080107980839183e-05, "loss": -0.935, "step": 47120 }, { "epoch": 27.433061699650757, "grad_norm": 0.14091980457305908, "learning_rate": 1.2062151769976343e-05, "loss": -0.9355, "step": 47130 }, { "epoch": 27.438882421420256, "grad_norm": 0.09020212292671204, "learning_rate": 1.204420708363011e-05, "loss": -0.9366, "step": 47140 }, { "epoch": 27.444703143189756, "grad_norm": 0.08935444802045822, "learning_rate": 1.2026273927251597e-05, "loss": -0.9317, "step": 47150 }, { "epoch": 27.450523864959255, "grad_norm": 0.18572364747524261, "learning_rate": 1.2008352306288424e-05, "loss": -0.9355, "step": 47160 }, { "epoch": 27.456344586728754, "grad_norm": 0.14041262865066528, "learning_rate": 1.1990442226184695e-05, "loss": -0.9376, "step": 47170 }, { "epoch": 27.462165308498253, "grad_norm": 0.25085094571113586, "learning_rate": 1.1972543692381066e-05, "loss": -0.9349, "step": 47180 }, { "epoch": 27.467986030267753, "grad_norm": 0.17013108730316162, "learning_rate": 1.1954656710314576e-05, "loss": -0.9355, "step": 47190 }, { "epoch": 27.47380675203725, "grad_norm": 0.18727003037929535, "learning_rate": 1.1936781285418875e-05, "loss": -0.9363, "step": 47200 }, { "epoch": 27.47962747380675, "grad_norm": 0.13076505064964294, "learning_rate": 1.1918917423123993e-05, "loss": -0.935, "step": 47210 }, { "epoch": 27.48544819557625, "grad_norm": 0.1898912787437439, "learning_rate": 1.1901065128856537e-05, "loss": -0.933, "step": 47220 }, { "epoch": 27.49126891734575, "grad_norm": 0.13561826944351196, "learning_rate": 1.1883224408039551e-05, "loss": -0.9376, "step": 47230 }, { "epoch": 27.49708963911525, "grad_norm": 0.1448095589876175, "learning_rate": 1.1865395266092578e-05, "loss": -0.936, "step": 47240 }, { "epoch": 27.50291036088475, "grad_norm": 0.08067786693572998, "learning_rate": 1.1847577708431633e-05, "loss": -0.9354, "step": 47250 }, { "epoch": 27.50873108265425, "grad_norm": 0.12923558056354523, "learning_rate": 1.1829771740469225e-05, "loss": -0.9375, "step": 47260 }, { "epoch": 27.51455180442375, "grad_norm": 0.08333507180213928, "learning_rate": 1.1811977367614324e-05, "loss": -0.9382, "step": 47270 }, { "epoch": 27.52037252619325, "grad_norm": 0.13060836493968964, "learning_rate": 1.1794194595272412e-05, "loss": -0.9263, "step": 47280 }, { "epoch": 27.52619324796275, "grad_norm": 0.1447184681892395, "learning_rate": 1.1776423428845423e-05, "loss": -0.9366, "step": 47290 }, { "epoch": 27.532013969732247, "grad_norm": 0.12155622988939285, "learning_rate": 1.1758663873731756e-05, "loss": -0.9369, "step": 47300 }, { "epoch": 27.537834691501747, "grad_norm": 0.16523060202598572, "learning_rate": 1.1740915935326302e-05, "loss": -0.9334, "step": 47310 }, { "epoch": 27.543655413271246, "grad_norm": 0.1069270595908165, "learning_rate": 1.1723179619020396e-05, "loss": -0.9368, "step": 47320 }, { "epoch": 27.549476135040745, "grad_norm": 0.12940379977226257, "learning_rate": 1.1705454930201914e-05, "loss": -0.9353, "step": 47330 }, { "epoch": 27.555296856810244, "grad_norm": 0.09745872020721436, "learning_rate": 1.1687741874255087e-05, "loss": -0.932, "step": 47340 }, { "epoch": 27.561117578579744, "grad_norm": 0.13190150260925293, "learning_rate": 1.1670040456560728e-05, "loss": -0.9368, "step": 47350 }, { "epoch": 27.566938300349243, "grad_norm": 0.19656634330749512, "learning_rate": 1.1652350682496005e-05, "loss": -0.9335, "step": 47360 }, { "epoch": 27.572759022118742, "grad_norm": 0.093878373503685, "learning_rate": 1.163467255743465e-05, "loss": -0.9365, "step": 47370 }, { "epoch": 27.57857974388824, "grad_norm": 0.15094265341758728, "learning_rate": 1.1617006086746796e-05, "loss": -0.9346, "step": 47380 }, { "epoch": 27.58440046565774, "grad_norm": 0.12661106884479523, "learning_rate": 1.1599351275799047e-05, "loss": -0.9363, "step": 47390 }, { "epoch": 27.59022118742724, "grad_norm": 0.09168675541877747, "learning_rate": 1.1581708129954466e-05, "loss": -0.9365, "step": 47400 }, { "epoch": 27.59604190919674, "grad_norm": 0.1115710437297821, "learning_rate": 1.1564076654572587e-05, "loss": -0.9377, "step": 47410 }, { "epoch": 27.601862630966238, "grad_norm": 0.08452600240707397, "learning_rate": 1.1546456855009358e-05, "loss": -0.9379, "step": 47420 }, { "epoch": 27.60768335273574, "grad_norm": 0.12876984477043152, "learning_rate": 1.1528848736617248e-05, "loss": -0.9341, "step": 47430 }, { "epoch": 27.61350407450524, "grad_norm": 0.09114592522382736, "learning_rate": 1.1511252304745112e-05, "loss": -0.9366, "step": 47440 }, { "epoch": 27.61932479627474, "grad_norm": 0.14075422286987305, "learning_rate": 1.1493667564738297e-05, "loss": -0.923, "step": 47450 }, { "epoch": 27.62514551804424, "grad_norm": 0.07908499240875244, "learning_rate": 1.1476094521938574e-05, "loss": -0.9371, "step": 47460 }, { "epoch": 27.630966239813738, "grad_norm": 0.12373363971710205, "learning_rate": 1.1458533181684167e-05, "loss": -0.9314, "step": 47470 }, { "epoch": 27.636786961583237, "grad_norm": 0.09262668341398239, "learning_rate": 1.1440983549309753e-05, "loss": -0.937, "step": 47480 }, { "epoch": 27.642607683352736, "grad_norm": 0.11523168534040451, "learning_rate": 1.1423445630146434e-05, "loss": -0.9395, "step": 47490 }, { "epoch": 27.648428405122235, "grad_norm": 0.1372528225183487, "learning_rate": 1.1405919429521799e-05, "loss": -0.9323, "step": 47500 }, { "epoch": 27.654249126891735, "grad_norm": 0.12039178609848022, "learning_rate": 1.1388404952759802e-05, "loss": -0.9357, "step": 47510 }, { "epoch": 27.660069848661234, "grad_norm": 0.12578725814819336, "learning_rate": 1.1370902205180923e-05, "loss": -0.9353, "step": 47520 }, { "epoch": 27.665890570430733, "grad_norm": 0.11506897211074829, "learning_rate": 1.1353411192101987e-05, "loss": -0.9344, "step": 47530 }, { "epoch": 27.671711292200232, "grad_norm": 0.12433168292045593, "learning_rate": 1.133593191883634e-05, "loss": -0.9368, "step": 47540 }, { "epoch": 27.67753201396973, "grad_norm": 0.11640393733978271, "learning_rate": 1.1318464390693711e-05, "loss": -0.9369, "step": 47550 }, { "epoch": 27.68335273573923, "grad_norm": 0.09882389008998871, "learning_rate": 1.1301008612980257e-05, "loss": -0.9353, "step": 47560 }, { "epoch": 27.68917345750873, "grad_norm": 0.15993759036064148, "learning_rate": 1.128356459099863e-05, "loss": -0.9345, "step": 47570 }, { "epoch": 27.69499417927823, "grad_norm": 0.22500616312026978, "learning_rate": 1.1266132330047802e-05, "loss": -0.9358, "step": 47580 }, { "epoch": 27.70081490104773, "grad_norm": 0.07864271849393845, "learning_rate": 1.1248711835423281e-05, "loss": -0.933, "step": 47590 }, { "epoch": 27.70663562281723, "grad_norm": 0.09311271458864212, "learning_rate": 1.123130311241693e-05, "loss": -0.9368, "step": 47600 }, { "epoch": 27.71245634458673, "grad_norm": 0.09545977413654327, "learning_rate": 1.1213906166317068e-05, "loss": -0.9365, "step": 47610 }, { "epoch": 27.71827706635623, "grad_norm": 0.2775057256221771, "learning_rate": 1.1196521002408427e-05, "loss": -0.9319, "step": 47620 }, { "epoch": 27.72409778812573, "grad_norm": 0.09006386995315552, "learning_rate": 1.1179147625972159e-05, "loss": -0.9352, "step": 47630 }, { "epoch": 27.729918509895228, "grad_norm": 0.09248031675815582, "learning_rate": 1.1161786042285822e-05, "loss": -0.9385, "step": 47640 }, { "epoch": 27.735739231664727, "grad_norm": 0.14558258652687073, "learning_rate": 1.1144436256623447e-05, "loss": -0.9357, "step": 47650 }, { "epoch": 27.741559953434226, "grad_norm": 0.13219185173511505, "learning_rate": 1.1127098274255392e-05, "loss": -0.9358, "step": 47660 }, { "epoch": 27.747380675203726, "grad_norm": 0.10747206211090088, "learning_rate": 1.1109772100448512e-05, "loss": -0.9376, "step": 47670 }, { "epoch": 27.753201396973225, "grad_norm": 0.14688387513160706, "learning_rate": 1.1092457740466033e-05, "loss": -0.9353, "step": 47680 }, { "epoch": 27.759022118742724, "grad_norm": 0.1264665424823761, "learning_rate": 1.10751551995676e-05, "loss": -0.9296, "step": 47690 }, { "epoch": 27.764842840512223, "grad_norm": 0.15295173227787018, "learning_rate": 1.1057864483009262e-05, "loss": -0.9356, "step": 47700 }, { "epoch": 27.770663562281722, "grad_norm": 0.1462605893611908, "learning_rate": 1.1040585596043473e-05, "loss": -0.9339, "step": 47710 }, { "epoch": 27.77648428405122, "grad_norm": 0.12782487273216248, "learning_rate": 1.1023318543919148e-05, "loss": -0.936, "step": 47720 }, { "epoch": 27.78230500582072, "grad_norm": 0.17888997495174408, "learning_rate": 1.10060633318815e-05, "loss": -0.934, "step": 47730 }, { "epoch": 27.78812572759022, "grad_norm": 0.10906761139631271, "learning_rate": 1.0988819965172248e-05, "loss": -0.9358, "step": 47740 }, { "epoch": 27.79394644935972, "grad_norm": 0.2056194692850113, "learning_rate": 1.0971588449029462e-05, "loss": -0.9308, "step": 47750 }, { "epoch": 27.79976717112922, "grad_norm": 0.2123502790927887, "learning_rate": 1.095436878868762e-05, "loss": -0.9293, "step": 47760 }, { "epoch": 27.80558789289872, "grad_norm": 0.14177429676055908, "learning_rate": 1.0937160989377598e-05, "loss": -0.9357, "step": 47770 }, { "epoch": 27.81140861466822, "grad_norm": 0.08065170049667358, "learning_rate": 1.0919965056326676e-05, "loss": -0.9365, "step": 47780 }, { "epoch": 27.81722933643772, "grad_norm": 0.07044228911399841, "learning_rate": 1.0902780994758504e-05, "loss": -0.9368, "step": 47790 }, { "epoch": 27.82305005820722, "grad_norm": 0.1342020183801651, "learning_rate": 1.0885608809893193e-05, "loss": -0.9378, "step": 47800 }, { "epoch": 27.828870779976718, "grad_norm": 0.15029336512088776, "learning_rate": 1.0868448506947142e-05, "loss": -0.9377, "step": 47810 }, { "epoch": 27.834691501746217, "grad_norm": 0.21891477704048157, "learning_rate": 1.0851300091133243e-05, "loss": -0.9337, "step": 47820 }, { "epoch": 27.840512223515717, "grad_norm": 0.13810457289218903, "learning_rate": 1.083416356766071e-05, "loss": -0.9366, "step": 47830 }, { "epoch": 27.846332945285216, "grad_norm": 0.09559766948223114, "learning_rate": 1.0817038941735175e-05, "loss": -0.9335, "step": 47840 }, { "epoch": 27.852153667054715, "grad_norm": 0.14166142046451569, "learning_rate": 1.0799926218558642e-05, "loss": -0.9358, "step": 47850 }, { "epoch": 27.857974388824214, "grad_norm": 0.20136995613574982, "learning_rate": 1.0782825403329488e-05, "loss": -0.9358, "step": 47860 }, { "epoch": 27.863795110593713, "grad_norm": 0.18242131173610687, "learning_rate": 1.076573650124254e-05, "loss": -0.9337, "step": 47870 }, { "epoch": 27.869615832363213, "grad_norm": 0.09794682264328003, "learning_rate": 1.0748659517488891e-05, "loss": -0.9334, "step": 47880 }, { "epoch": 27.875436554132712, "grad_norm": 0.09833754599094391, "learning_rate": 1.0731594457256138e-05, "loss": -0.9357, "step": 47890 }, { "epoch": 27.88125727590221, "grad_norm": 0.07752703130245209, "learning_rate": 1.0714541325728139e-05, "loss": -0.9357, "step": 47900 }, { "epoch": 27.88707799767171, "grad_norm": 0.11148948967456818, "learning_rate": 1.0697500128085231e-05, "loss": -0.9378, "step": 47910 }, { "epoch": 27.89289871944121, "grad_norm": 0.13520799577236176, "learning_rate": 1.0680470869504055e-05, "loss": -0.9371, "step": 47920 }, { "epoch": 27.89871944121071, "grad_norm": 0.20044127106666565, "learning_rate": 1.066345355515766e-05, "loss": -0.9359, "step": 47930 }, { "epoch": 27.904540162980208, "grad_norm": 0.12611447274684906, "learning_rate": 1.0646448190215453e-05, "loss": -0.9395, "step": 47940 }, { "epoch": 27.91036088474971, "grad_norm": 0.13063271343708038, "learning_rate": 1.0629454779843217e-05, "loss": -0.9357, "step": 47950 }, { "epoch": 27.91618160651921, "grad_norm": 0.07053439319133759, "learning_rate": 1.0612473329203082e-05, "loss": -0.9376, "step": 47960 }, { "epoch": 27.92200232828871, "grad_norm": 0.19265025854110718, "learning_rate": 1.0595503843453596e-05, "loss": -0.9292, "step": 47970 }, { "epoch": 27.92782305005821, "grad_norm": 0.1277347356081009, "learning_rate": 1.0578546327749634e-05, "loss": -0.9329, "step": 47980 }, { "epoch": 27.933643771827708, "grad_norm": 0.08385191857814789, "learning_rate": 1.0561600787242425e-05, "loss": -0.935, "step": 47990 }, { "epoch": 27.939464493597207, "grad_norm": 0.06821504980325699, "learning_rate": 1.0544667227079591e-05, "loss": -0.9334, "step": 48000 }, { "epoch": 27.945285215366706, "grad_norm": 0.11841904371976852, "learning_rate": 1.0527745652405085e-05, "loss": -0.9348, "step": 48010 }, { "epoch": 27.951105937136205, "grad_norm": 0.0736529752612114, "learning_rate": 1.051083606835927e-05, "loss": -0.9359, "step": 48020 }, { "epoch": 27.956926658905704, "grad_norm": 0.13227030634880066, "learning_rate": 1.049393848007878e-05, "loss": -0.9348, "step": 48030 }, { "epoch": 27.962747380675204, "grad_norm": 0.235407754778862, "learning_rate": 1.0477052892696709e-05, "loss": -0.9334, "step": 48040 }, { "epoch": 27.968568102444703, "grad_norm": 0.10193449258804321, "learning_rate": 1.0460179311342394e-05, "loss": -0.9315, "step": 48050 }, { "epoch": 27.974388824214202, "grad_norm": 0.08715729415416718, "learning_rate": 1.0443317741141634e-05, "loss": -0.9371, "step": 48060 }, { "epoch": 27.9802095459837, "grad_norm": 0.13327284157276154, "learning_rate": 1.0426468187216514e-05, "loss": -0.9343, "step": 48070 }, { "epoch": 27.9860302677532, "grad_norm": 0.09007088094949722, "learning_rate": 1.0409630654685477e-05, "loss": -0.9386, "step": 48080 }, { "epoch": 27.9918509895227, "grad_norm": 0.08323445916175842, "learning_rate": 1.039280514866332e-05, "loss": -0.937, "step": 48090 }, { "epoch": 27.9976717112922, "grad_norm": 0.10889314860105515, "learning_rate": 1.0375991674261198e-05, "loss": -0.9355, "step": 48100 }, { "epoch": 28.003492433061698, "grad_norm": 0.07639646530151367, "learning_rate": 1.0359190236586575e-05, "loss": -0.9344, "step": 48110 }, { "epoch": 28.009313154831197, "grad_norm": 0.09868007153272629, "learning_rate": 1.0342400840743322e-05, "loss": -0.9372, "step": 48120 }, { "epoch": 28.0151338766007, "grad_norm": 0.28157004714012146, "learning_rate": 1.0325623491831593e-05, "loss": -0.9381, "step": 48130 }, { "epoch": 28.0209545983702, "grad_norm": 0.18366900086402893, "learning_rate": 1.0308858194947906e-05, "loss": -0.935, "step": 48140 }, { "epoch": 28.0267753201397, "grad_norm": 0.10995405912399292, "learning_rate": 1.0292104955185111e-05, "loss": -0.9323, "step": 48150 }, { "epoch": 28.032596041909198, "grad_norm": 0.11212825030088425, "learning_rate": 1.0275363777632396e-05, "loss": -0.9378, "step": 48160 }, { "epoch": 28.038416763678697, "grad_norm": 0.08148346841335297, "learning_rate": 1.0258634667375321e-05, "loss": -0.9308, "step": 48170 }, { "epoch": 28.044237485448196, "grad_norm": 0.18017105758190155, "learning_rate": 1.02419176294957e-05, "loss": -0.9376, "step": 48180 }, { "epoch": 28.050058207217695, "grad_norm": 0.07684989273548126, "learning_rate": 1.0225212669071782e-05, "loss": -0.9374, "step": 48190 }, { "epoch": 28.055878928987195, "grad_norm": 0.09716226905584335, "learning_rate": 1.0208519791178029e-05, "loss": -0.9382, "step": 48200 }, { "epoch": 28.061699650756694, "grad_norm": 0.21392804384231567, "learning_rate": 1.019183900088535e-05, "loss": -0.9368, "step": 48210 }, { "epoch": 28.067520372526193, "grad_norm": 0.18625286221504211, "learning_rate": 1.0175170303260906e-05, "loss": -0.9359, "step": 48220 }, { "epoch": 28.073341094295692, "grad_norm": 0.1238577589392662, "learning_rate": 1.0158513703368206e-05, "loss": -0.9355, "step": 48230 }, { "epoch": 28.07916181606519, "grad_norm": 0.10363657027482986, "learning_rate": 1.0141869206267095e-05, "loss": -0.9373, "step": 48240 }, { "epoch": 28.08498253783469, "grad_norm": 0.14242391288280487, "learning_rate": 1.0125236817013723e-05, "loss": -0.9349, "step": 48250 }, { "epoch": 28.09080325960419, "grad_norm": 0.10511238873004913, "learning_rate": 1.010861654066056e-05, "loss": -0.9374, "step": 48260 }, { "epoch": 28.09662398137369, "grad_norm": 0.1139792799949646, "learning_rate": 1.0092008382256434e-05, "loss": -0.936, "step": 48270 }, { "epoch": 28.10244470314319, "grad_norm": 0.18226255476474762, "learning_rate": 1.0075412346846458e-05, "loss": -0.9368, "step": 48280 }, { "epoch": 28.108265424912688, "grad_norm": 0.11589402705430984, "learning_rate": 1.0058828439472056e-05, "loss": -0.9373, "step": 48290 }, { "epoch": 28.11408614668219, "grad_norm": 0.14022283256053925, "learning_rate": 1.0042256665170996e-05, "loss": -0.9364, "step": 48300 }, { "epoch": 28.11990686845169, "grad_norm": 0.2265452742576599, "learning_rate": 1.0025697028977332e-05, "loss": -0.9351, "step": 48310 }, { "epoch": 28.12572759022119, "grad_norm": 0.10395557433366776, "learning_rate": 1.0009149535921454e-05, "loss": -0.9346, "step": 48320 }, { "epoch": 28.131548311990688, "grad_norm": 0.11649578809738159, "learning_rate": 9.992614191030031e-06, "loss": -0.939, "step": 48330 }, { "epoch": 28.137369033760187, "grad_norm": 0.14412324130535126, "learning_rate": 9.976090999326115e-06, "loss": -0.9343, "step": 48340 }, { "epoch": 28.143189755529686, "grad_norm": 0.08575454354286194, "learning_rate": 9.959579965828952e-06, "loss": -0.9391, "step": 48350 }, { "epoch": 28.149010477299186, "grad_norm": 0.09057649224996567, "learning_rate": 9.943081095554218e-06, "loss": -0.9367, "step": 48360 }, { "epoch": 28.154831199068685, "grad_norm": 0.1197955384850502, "learning_rate": 9.926594393513783e-06, "loss": -0.9341, "step": 48370 }, { "epoch": 28.160651920838184, "grad_norm": 0.10689648985862732, "learning_rate": 9.910119864715906e-06, "loss": -0.9361, "step": 48380 }, { "epoch": 28.166472642607683, "grad_norm": 0.07418028265237808, "learning_rate": 9.8936575141651e-06, "loss": -0.9377, "step": 48390 }, { "epoch": 28.172293364377182, "grad_norm": 0.18961015343666077, "learning_rate": 9.877207346862194e-06, "loss": -0.9345, "step": 48400 }, { "epoch": 28.17811408614668, "grad_norm": 0.14068341255187988, "learning_rate": 9.860769367804312e-06, "loss": -0.9347, "step": 48410 }, { "epoch": 28.18393480791618, "grad_norm": 0.09820258617401123, "learning_rate": 9.844343581984877e-06, "loss": -0.9364, "step": 48420 }, { "epoch": 28.18975552968568, "grad_norm": 0.21006186306476593, "learning_rate": 9.82792999439362e-06, "loss": -0.9359, "step": 48430 }, { "epoch": 28.19557625145518, "grad_norm": 0.19397765398025513, "learning_rate": 9.811528610016546e-06, "loss": -0.9369, "step": 48440 }, { "epoch": 28.20139697322468, "grad_norm": 0.1161760464310646, "learning_rate": 9.79513943383597e-06, "loss": -0.9341, "step": 48450 }, { "epoch": 28.207217694994178, "grad_norm": 0.12445014715194702, "learning_rate": 9.778762470830489e-06, "loss": -0.932, "step": 48460 }, { "epoch": 28.213038416763677, "grad_norm": 0.10706830769777298, "learning_rate": 9.762397725974982e-06, "loss": -0.9344, "step": 48470 }, { "epoch": 28.21885913853318, "grad_norm": 0.14801082015037537, "learning_rate": 9.746045204240622e-06, "loss": -0.9349, "step": 48480 }, { "epoch": 28.22467986030268, "grad_norm": 0.10673796385526657, "learning_rate": 9.729704910594917e-06, "loss": -0.9369, "step": 48490 }, { "epoch": 28.230500582072178, "grad_norm": 0.17853280901908875, "learning_rate": 9.713376850001554e-06, "loss": -0.9351, "step": 48500 }, { "epoch": 28.236321303841677, "grad_norm": 0.07766059041023254, "learning_rate": 9.697061027420622e-06, "loss": -0.9363, "step": 48510 }, { "epoch": 28.242142025611177, "grad_norm": 0.05588354542851448, "learning_rate": 9.680757447808385e-06, "loss": -0.935, "step": 48520 }, { "epoch": 28.247962747380676, "grad_norm": 0.2345523089170456, "learning_rate": 9.664466116117488e-06, "loss": -0.9376, "step": 48530 }, { "epoch": 28.253783469150175, "grad_norm": 0.12551943957805634, "learning_rate": 9.64818703729678e-06, "loss": -0.9357, "step": 48540 }, { "epoch": 28.259604190919674, "grad_norm": 0.17595048248767853, "learning_rate": 9.631920216291423e-06, "loss": -0.9376, "step": 48550 }, { "epoch": 28.265424912689173, "grad_norm": 0.13742664456367493, "learning_rate": 9.615665658042849e-06, "loss": -0.9353, "step": 48560 }, { "epoch": 28.271245634458673, "grad_norm": 0.18147246539592743, "learning_rate": 9.599423367488747e-06, "loss": -0.9346, "step": 48570 }, { "epoch": 28.277066356228172, "grad_norm": 0.16285037994384766, "learning_rate": 9.583193349563124e-06, "loss": -0.9354, "step": 48580 }, { "epoch": 28.28288707799767, "grad_norm": 0.15228335559368134, "learning_rate": 9.566975609196216e-06, "loss": -0.9347, "step": 48590 }, { "epoch": 28.28870779976717, "grad_norm": 0.11846761405467987, "learning_rate": 9.550770151314548e-06, "loss": -0.9374, "step": 48600 }, { "epoch": 28.29452852153667, "grad_norm": 0.12357566505670547, "learning_rate": 9.53457698084091e-06, "loss": -0.9364, "step": 48610 }, { "epoch": 28.30034924330617, "grad_norm": 0.06960710883140564, "learning_rate": 9.518396102694355e-06, "loss": -0.9389, "step": 48620 }, { "epoch": 28.306169965075668, "grad_norm": 0.08511938899755478, "learning_rate": 9.502227521790198e-06, "loss": -0.9325, "step": 48630 }, { "epoch": 28.311990686845167, "grad_norm": 0.13984078168869019, "learning_rate": 9.486071243040063e-06, "loss": -0.9329, "step": 48640 }, { "epoch": 28.31781140861467, "grad_norm": 0.1600600630044937, "learning_rate": 9.469927271351747e-06, "loss": -0.9357, "step": 48650 }, { "epoch": 28.32363213038417, "grad_norm": 0.10906491428613663, "learning_rate": 9.453795611629419e-06, "loss": -0.9381, "step": 48660 }, { "epoch": 28.32945285215367, "grad_norm": 0.20233212411403656, "learning_rate": 9.437676268773399e-06, "loss": -0.9375, "step": 48670 }, { "epoch": 28.335273573923168, "grad_norm": 0.17488908767700195, "learning_rate": 9.421569247680357e-06, "loss": -0.9383, "step": 48680 }, { "epoch": 28.341094295692667, "grad_norm": 0.105021171271801, "learning_rate": 9.40547455324316e-06, "loss": -0.9353, "step": 48690 }, { "epoch": 28.346915017462166, "grad_norm": 0.15229330956935883, "learning_rate": 9.389392190350965e-06, "loss": -0.9356, "step": 48700 }, { "epoch": 28.352735739231665, "grad_norm": 0.09236662834882736, "learning_rate": 9.373322163889153e-06, "loss": -0.934, "step": 48710 }, { "epoch": 28.358556461001164, "grad_norm": 0.2250857949256897, "learning_rate": 9.357264478739375e-06, "loss": -0.9371, "step": 48720 }, { "epoch": 28.364377182770664, "grad_norm": 0.08753598481416702, "learning_rate": 9.341219139779567e-06, "loss": -0.9387, "step": 48730 }, { "epoch": 28.370197904540163, "grad_norm": 0.0843726247549057, "learning_rate": 9.325186151883824e-06, "loss": -0.9398, "step": 48740 }, { "epoch": 28.376018626309662, "grad_norm": 0.101463682949543, "learning_rate": 9.30916551992258e-06, "loss": -0.9389, "step": 48750 }, { "epoch": 28.38183934807916, "grad_norm": 0.15171445906162262, "learning_rate": 9.293157248762479e-06, "loss": -0.9347, "step": 48760 }, { "epoch": 28.38766006984866, "grad_norm": 0.12796811759471893, "learning_rate": 9.2771613432664e-06, "loss": -0.9356, "step": 48770 }, { "epoch": 28.39348079161816, "grad_norm": 0.08902321010828018, "learning_rate": 9.261177808293481e-06, "loss": -0.9283, "step": 48780 }, { "epoch": 28.39930151338766, "grad_norm": 0.08618178963661194, "learning_rate": 9.245206648699096e-06, "loss": -0.938, "step": 48790 }, { "epoch": 28.405122235157158, "grad_norm": 0.1744702309370041, "learning_rate": 9.22924786933485e-06, "loss": -0.9342, "step": 48800 }, { "epoch": 28.410942956926657, "grad_norm": 0.1859487146139145, "learning_rate": 9.213301475048642e-06, "loss": -0.9377, "step": 48810 }, { "epoch": 28.416763678696157, "grad_norm": 0.11309627443552017, "learning_rate": 9.197367470684504e-06, "loss": -0.936, "step": 48820 }, { "epoch": 28.42258440046566, "grad_norm": 0.14344726502895355, "learning_rate": 9.181445861082816e-06, "loss": -0.9358, "step": 48830 }, { "epoch": 28.42840512223516, "grad_norm": 0.23067069053649902, "learning_rate": 9.16553665108012e-06, "loss": -0.9284, "step": 48840 }, { "epoch": 28.434225844004658, "grad_norm": 0.12682226300239563, "learning_rate": 9.149639845509223e-06, "loss": -0.9378, "step": 48850 }, { "epoch": 28.440046565774157, "grad_norm": 0.1720772534608841, "learning_rate": 9.133755449199144e-06, "loss": -0.9368, "step": 48860 }, { "epoch": 28.445867287543656, "grad_norm": 0.10810475796461105, "learning_rate": 9.117883466975135e-06, "loss": -0.9359, "step": 48870 }, { "epoch": 28.451688009313155, "grad_norm": 0.12535801529884338, "learning_rate": 9.10202390365873e-06, "loss": -0.9389, "step": 48880 }, { "epoch": 28.457508731082655, "grad_norm": 0.09255783259868622, "learning_rate": 9.086176764067583e-06, "loss": -0.9351, "step": 48890 }, { "epoch": 28.463329452852154, "grad_norm": 0.2155771553516388, "learning_rate": 9.070342053015684e-06, "loss": -0.937, "step": 48900 }, { "epoch": 28.469150174621653, "grad_norm": 0.18147847056388855, "learning_rate": 9.054519775313187e-06, "loss": -0.9356, "step": 48910 }, { "epoch": 28.474970896391152, "grad_norm": 0.07191275805234909, "learning_rate": 9.038709935766476e-06, "loss": -0.9382, "step": 48920 }, { "epoch": 28.48079161816065, "grad_norm": 0.10398237407207489, "learning_rate": 9.02291253917817e-06, "loss": -0.9335, "step": 48930 }, { "epoch": 28.48661233993015, "grad_norm": 0.1418694257736206, "learning_rate": 9.007127590347091e-06, "loss": -0.9356, "step": 48940 }, { "epoch": 28.49243306169965, "grad_norm": 0.24273958802223206, "learning_rate": 8.991355094068288e-06, "loss": -0.9366, "step": 48950 }, { "epoch": 28.49825378346915, "grad_norm": 0.0817200168967247, "learning_rate": 8.975595055133062e-06, "loss": -0.9361, "step": 48960 }, { "epoch": 28.50407450523865, "grad_norm": 0.12698149681091309, "learning_rate": 8.959847478328848e-06, "loss": -0.9366, "step": 48970 }, { "epoch": 28.509895227008148, "grad_norm": 0.11670562624931335, "learning_rate": 8.944112368439378e-06, "loss": -0.9375, "step": 48980 }, { "epoch": 28.515715948777647, "grad_norm": 0.09316731244325638, "learning_rate": 8.928389730244552e-06, "loss": -0.9388, "step": 48990 }, { "epoch": 28.52153667054715, "grad_norm": 0.09785845130681992, "learning_rate": 8.912679568520494e-06, "loss": -0.9348, "step": 49000 }, { "epoch": 28.52735739231665, "grad_norm": 0.13095390796661377, "learning_rate": 8.896981888039534e-06, "loss": -0.937, "step": 49010 }, { "epoch": 28.533178114086148, "grad_norm": 0.11757024377584457, "learning_rate": 8.881296693570201e-06, "loss": -0.9385, "step": 49020 }, { "epoch": 28.538998835855647, "grad_norm": 0.09585171192884445, "learning_rate": 8.865623989877281e-06, "loss": -0.9363, "step": 49030 }, { "epoch": 28.544819557625146, "grad_norm": 0.06532522290945053, "learning_rate": 8.849963781721681e-06, "loss": -0.9369, "step": 49040 }, { "epoch": 28.550640279394646, "grad_norm": 0.102845199406147, "learning_rate": 8.834316073860588e-06, "loss": -0.9365, "step": 49050 }, { "epoch": 28.556461001164145, "grad_norm": 0.1395825445652008, "learning_rate": 8.818680871047357e-06, "loss": -0.9339, "step": 49060 }, { "epoch": 28.562281722933644, "grad_norm": 0.08058172464370728, "learning_rate": 8.803058178031549e-06, "loss": -0.9373, "step": 49070 }, { "epoch": 28.568102444703143, "grad_norm": 0.10793484002351761, "learning_rate": 8.787447999558922e-06, "loss": -0.9352, "step": 49080 }, { "epoch": 28.573923166472643, "grad_norm": 0.10666542500257492, "learning_rate": 8.77185034037144e-06, "loss": -0.935, "step": 49090 }, { "epoch": 28.57974388824214, "grad_norm": 0.09935316443443298, "learning_rate": 8.756265205207259e-06, "loss": -0.9394, "step": 49100 }, { "epoch": 28.58556461001164, "grad_norm": 0.12928156554698944, "learning_rate": 8.740692598800732e-06, "loss": -0.9396, "step": 49110 }, { "epoch": 28.59138533178114, "grad_norm": 0.128725066781044, "learning_rate": 8.72513252588239e-06, "loss": -0.9367, "step": 49120 }, { "epoch": 28.59720605355064, "grad_norm": 0.11440110951662064, "learning_rate": 8.709584991178998e-06, "loss": -0.939, "step": 49130 }, { "epoch": 28.60302677532014, "grad_norm": 0.10238692164421082, "learning_rate": 8.694049999413479e-06, "loss": -0.937, "step": 49140 }, { "epoch": 28.608847497089638, "grad_norm": 0.15577809512615204, "learning_rate": 8.678527555304945e-06, "loss": -0.9378, "step": 49150 }, { "epoch": 28.614668218859137, "grad_norm": 0.08282480388879776, "learning_rate": 8.663017663568712e-06, "loss": -0.9381, "step": 49160 }, { "epoch": 28.620488940628636, "grad_norm": 0.14286312460899353, "learning_rate": 8.647520328916259e-06, "loss": -0.9351, "step": 49170 }, { "epoch": 28.62630966239814, "grad_norm": 0.08649265021085739, "learning_rate": 8.632035556055307e-06, "loss": -0.9332, "step": 49180 }, { "epoch": 28.63213038416764, "grad_norm": 0.18711768090724945, "learning_rate": 8.616563349689672e-06, "loss": -0.9259, "step": 49190 }, { "epoch": 28.637951105937137, "grad_norm": 0.14532004296779633, "learning_rate": 8.601103714519448e-06, "loss": -0.9354, "step": 49200 }, { "epoch": 28.643771827706637, "grad_norm": 0.1716323345899582, "learning_rate": 8.58565665524082e-06, "loss": -0.9373, "step": 49210 }, { "epoch": 28.649592549476136, "grad_norm": 0.10152114182710648, "learning_rate": 8.570222176546222e-06, "loss": -0.9361, "step": 49220 }, { "epoch": 28.655413271245635, "grad_norm": 0.1956663876771927, "learning_rate": 8.554800283124242e-06, "loss": -0.935, "step": 49230 }, { "epoch": 28.661233993015134, "grad_norm": 0.07459784299135208, "learning_rate": 8.539390979659639e-06, "loss": -0.9367, "step": 49240 }, { "epoch": 28.667054714784634, "grad_norm": 0.1311727613210678, "learning_rate": 8.523994270833352e-06, "loss": -0.9362, "step": 49250 }, { "epoch": 28.672875436554133, "grad_norm": 0.08122646063566208, "learning_rate": 8.5086101613225e-06, "loss": -0.9353, "step": 49260 }, { "epoch": 28.678696158323632, "grad_norm": 0.16342487931251526, "learning_rate": 8.493238655800346e-06, "loss": -0.9374, "step": 49270 }, { "epoch": 28.68451688009313, "grad_norm": 0.11515693366527557, "learning_rate": 8.47787975893638e-06, "loss": -0.9362, "step": 49280 }, { "epoch": 28.69033760186263, "grad_norm": 0.09079621732234955, "learning_rate": 8.462533475396211e-06, "loss": -0.9364, "step": 49290 }, { "epoch": 28.69615832363213, "grad_norm": 0.1417325884103775, "learning_rate": 8.447199809841643e-06, "loss": -0.9374, "step": 49300 }, { "epoch": 28.70197904540163, "grad_norm": 0.15714886784553528, "learning_rate": 8.431878766930635e-06, "loss": -0.9356, "step": 49310 }, { "epoch": 28.707799767171128, "grad_norm": 0.12020458281040192, "learning_rate": 8.416570351317304e-06, "loss": -0.9371, "step": 49320 }, { "epoch": 28.713620488940627, "grad_norm": 0.0888902097940445, "learning_rate": 8.401274567651973e-06, "loss": -0.9365, "step": 49330 }, { "epoch": 28.719441210710126, "grad_norm": 0.09641752392053604, "learning_rate": 8.385991420581058e-06, "loss": -0.9368, "step": 49340 }, { "epoch": 28.725261932479626, "grad_norm": 0.1125888079404831, "learning_rate": 8.370720914747215e-06, "loss": -0.9363, "step": 49350 }, { "epoch": 28.73108265424913, "grad_norm": 0.1274133026599884, "learning_rate": 8.355463054789181e-06, "loss": -0.9363, "step": 49360 }, { "epoch": 28.736903376018628, "grad_norm": 0.20246663689613342, "learning_rate": 8.340217845341919e-06, "loss": -0.9339, "step": 49370 }, { "epoch": 28.742724097788127, "grad_norm": 0.21376574039459229, "learning_rate": 8.324985291036514e-06, "loss": -0.9352, "step": 49380 }, { "epoch": 28.748544819557626, "grad_norm": 0.14183436334133148, "learning_rate": 8.309765396500213e-06, "loss": -0.937, "step": 49390 }, { "epoch": 28.754365541327125, "grad_norm": 0.09889157116413116, "learning_rate": 8.294558166356419e-06, "loss": -0.9403, "step": 49400 }, { "epoch": 28.760186263096625, "grad_norm": 0.08508223295211792, "learning_rate": 8.279363605224683e-06, "loss": -0.9391, "step": 49410 }, { "epoch": 28.766006984866124, "grad_norm": 0.1269424855709076, "learning_rate": 8.264181717720704e-06, "loss": -0.9381, "step": 49420 }, { "epoch": 28.771827706635623, "grad_norm": 0.14653229713439941, "learning_rate": 8.249012508456361e-06, "loss": -0.9315, "step": 49430 }, { "epoch": 28.777648428405122, "grad_norm": 0.0988946259021759, "learning_rate": 8.233855982039646e-06, "loss": -0.9366, "step": 49440 }, { "epoch": 28.78346915017462, "grad_norm": 0.1696433126926422, "learning_rate": 8.218712143074708e-06, "loss": -0.9377, "step": 49450 }, { "epoch": 28.78928987194412, "grad_norm": 0.07547357678413391, "learning_rate": 8.203580996161858e-06, "loss": -0.9395, "step": 49460 }, { "epoch": 28.79511059371362, "grad_norm": 0.19682642817497253, "learning_rate": 8.188462545897512e-06, "loss": -0.9352, "step": 49470 }, { "epoch": 28.80093131548312, "grad_norm": 0.12220530211925507, "learning_rate": 8.173356796874304e-06, "loss": -0.9367, "step": 49480 }, { "epoch": 28.80675203725262, "grad_norm": 0.08665665239095688, "learning_rate": 8.158263753680906e-06, "loss": -0.9352, "step": 49490 }, { "epoch": 28.812572759022117, "grad_norm": 0.12404076009988785, "learning_rate": 8.143183420902239e-06, "loss": -0.9369, "step": 49500 }, { "epoch": 28.818393480791617, "grad_norm": 0.13399170339107513, "learning_rate": 8.128115803119258e-06, "loss": -0.9378, "step": 49510 }, { "epoch": 28.824214202561116, "grad_norm": 0.1443783938884735, "learning_rate": 8.11306090490916e-06, "loss": -0.9392, "step": 49520 }, { "epoch": 28.83003492433062, "grad_norm": 0.0948430523276329, "learning_rate": 8.098018730845169e-06, "loss": -0.9396, "step": 49530 }, { "epoch": 28.835855646100118, "grad_norm": 0.10017208755016327, "learning_rate": 8.082989285496745e-06, "loss": -0.9386, "step": 49540 }, { "epoch": 28.841676367869617, "grad_norm": 0.10284726321697235, "learning_rate": 8.067972573429416e-06, "loss": -0.9364, "step": 49550 }, { "epoch": 28.847497089639116, "grad_norm": 0.18630656599998474, "learning_rate": 8.052968599204874e-06, "loss": -0.9366, "step": 49560 }, { "epoch": 28.853317811408616, "grad_norm": 0.15553736686706543, "learning_rate": 8.037977367380922e-06, "loss": -0.9389, "step": 49570 }, { "epoch": 28.859138533178115, "grad_norm": 0.1403537094593048, "learning_rate": 8.022998882511495e-06, "loss": -0.9358, "step": 49580 }, { "epoch": 28.864959254947614, "grad_norm": 0.10163412988185883, "learning_rate": 8.008033149146677e-06, "loss": -0.9345, "step": 49590 }, { "epoch": 28.870779976717113, "grad_norm": 0.16656649112701416, "learning_rate": 7.993080171832656e-06, "loss": -0.9334, "step": 49600 }, { "epoch": 28.876600698486612, "grad_norm": 0.11702661961317062, "learning_rate": 7.978139955111752e-06, "loss": -0.9393, "step": 49610 }, { "epoch": 28.88242142025611, "grad_norm": 0.0994417667388916, "learning_rate": 7.9632125035224e-06, "loss": -0.9362, "step": 49620 }, { "epoch": 28.88824214202561, "grad_norm": 0.13296976685523987, "learning_rate": 7.948297821599177e-06, "loss": -0.9382, "step": 49630 }, { "epoch": 28.89406286379511, "grad_norm": 0.060835834592580795, "learning_rate": 7.933395913872755e-06, "loss": -0.9391, "step": 49640 }, { "epoch": 28.89988358556461, "grad_norm": 0.16663183271884918, "learning_rate": 7.918506784869972e-06, "loss": -0.9362, "step": 49650 }, { "epoch": 28.90570430733411, "grad_norm": 0.10345620661973953, "learning_rate": 7.903630439113707e-06, "loss": -0.9373, "step": 49660 }, { "epoch": 28.911525029103608, "grad_norm": 0.15076448023319244, "learning_rate": 7.888766881123044e-06, "loss": -0.9371, "step": 49670 }, { "epoch": 28.917345750873107, "grad_norm": 0.10456418991088867, "learning_rate": 7.873916115413099e-06, "loss": -0.9396, "step": 49680 }, { "epoch": 28.923166472642606, "grad_norm": 0.11419312655925751, "learning_rate": 7.85907814649518e-06, "loss": -0.9286, "step": 49690 }, { "epoch": 28.92898719441211, "grad_norm": 0.08495960384607315, "learning_rate": 7.844252978876649e-06, "loss": -0.9368, "step": 49700 }, { "epoch": 28.934807916181608, "grad_norm": 0.1905825436115265, "learning_rate": 7.829440617061001e-06, "loss": -0.9342, "step": 49710 }, { "epoch": 28.940628637951107, "grad_norm": 0.07314437627792358, "learning_rate": 7.814641065547851e-06, "loss": -0.9342, "step": 49720 }, { "epoch": 28.946449359720607, "grad_norm": 0.08071978390216827, "learning_rate": 7.79985432883289e-06, "loss": -0.9405, "step": 49730 }, { "epoch": 28.952270081490106, "grad_norm": 0.07455985248088837, "learning_rate": 7.78508041140797e-06, "loss": -0.9388, "step": 49740 }, { "epoch": 28.958090803259605, "grad_norm": 0.19528929889202118, "learning_rate": 7.770319317760993e-06, "loss": -0.9353, "step": 49750 }, { "epoch": 28.963911525029104, "grad_norm": 0.09035547077655792, "learning_rate": 7.755571052376004e-06, "loss": -0.936, "step": 49760 }, { "epoch": 28.969732246798603, "grad_norm": 0.08567517250776291, "learning_rate": 7.740835619733128e-06, "loss": -0.9362, "step": 49770 }, { "epoch": 28.975552968568103, "grad_norm": 0.09703561663627625, "learning_rate": 7.726113024308601e-06, "loss": -0.9351, "step": 49780 }, { "epoch": 28.981373690337602, "grad_norm": 0.10467787832021713, "learning_rate": 7.711403270574746e-06, "loss": -0.9382, "step": 49790 }, { "epoch": 28.9871944121071, "grad_norm": 0.08219499886035919, "learning_rate": 7.696706363000039e-06, "loss": -0.9385, "step": 49800 }, { "epoch": 28.9930151338766, "grad_norm": 0.10016726702451706, "learning_rate": 7.682022306048959e-06, "loss": -0.9396, "step": 49810 }, { "epoch": 28.9988358556461, "grad_norm": 0.1595323085784912, "learning_rate": 7.667351104182186e-06, "loss": -0.9358, "step": 49820 }, { "epoch": 29.0046565774156, "grad_norm": 0.0666186735033989, "learning_rate": 7.652692761856395e-06, "loss": -0.939, "step": 49830 }, { "epoch": 29.010477299185098, "grad_norm": 0.12176519632339478, "learning_rate": 7.63804728352444e-06, "loss": -0.9376, "step": 49840 }, { "epoch": 29.016298020954597, "grad_norm": 0.18946094810962677, "learning_rate": 7.623414673635215e-06, "loss": -0.9381, "step": 49850 }, { "epoch": 29.022118742724096, "grad_norm": 0.32202672958374023, "learning_rate": 7.608794936633723e-06, "loss": -0.9326, "step": 49860 }, { "epoch": 29.027939464493596, "grad_norm": 0.165020152926445, "learning_rate": 7.594188076961056e-06, "loss": -0.9349, "step": 49870 }, { "epoch": 29.0337601862631, "grad_norm": 0.11661825329065323, "learning_rate": 7.579594099054382e-06, "loss": -0.9353, "step": 49880 }, { "epoch": 29.039580908032598, "grad_norm": 0.07862138003110886, "learning_rate": 7.565013007346983e-06, "loss": -0.9364, "step": 49890 }, { "epoch": 29.045401629802097, "grad_norm": 0.1306396722793579, "learning_rate": 7.5504448062682035e-06, "loss": -0.9376, "step": 49900 }, { "epoch": 29.051222351571596, "grad_norm": 0.13119328022003174, "learning_rate": 7.53588950024347e-06, "loss": -0.937, "step": 49910 }, { "epoch": 29.057043073341095, "grad_norm": 0.10958311706781387, "learning_rate": 7.5213470936943145e-06, "loss": -0.9392, "step": 49920 }, { "epoch": 29.062863795110594, "grad_norm": 0.09070287644863129, "learning_rate": 7.506817591038323e-06, "loss": -0.9385, "step": 49930 }, { "epoch": 29.068684516880094, "grad_norm": 0.08501184731721878, "learning_rate": 7.492300996689183e-06, "loss": -0.9294, "step": 49940 }, { "epoch": 29.074505238649593, "grad_norm": 0.18737643957138062, "learning_rate": 7.477797315056645e-06, "loss": -0.9377, "step": 49950 }, { "epoch": 29.080325960419092, "grad_norm": 0.15274296700954437, "learning_rate": 7.463306550546539e-06, "loss": -0.9381, "step": 49960 }, { "epoch": 29.08614668218859, "grad_norm": 0.14760954678058624, "learning_rate": 7.448828707560812e-06, "loss": -0.9363, "step": 49970 }, { "epoch": 29.09196740395809, "grad_norm": 0.1739044189453125, "learning_rate": 7.4343637904974e-06, "loss": -0.9368, "step": 49980 }, { "epoch": 29.09778812572759, "grad_norm": 0.09859549254179001, "learning_rate": 7.419911803750401e-06, "loss": -0.9372, "step": 49990 }, { "epoch": 29.10360884749709, "grad_norm": 0.2124466598033905, "learning_rate": 7.405472751709935e-06, "loss": -0.931, "step": 50000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }