{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 201340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.966722956193504e-05, "grad_norm": 3.921875, "learning_rate": 0.0008, "loss": 3.0588, "step": 1 }, { "epoch": 0.0004966722956193504, "grad_norm": 1.3125, "learning_rate": 0.0007999642395947154, "loss": 2.3153, "step": 10 }, { "epoch": 0.0009933445912387007, "grad_norm": 0.53125, "learning_rate": 0.0007999245058110659, "loss": 1.8684, "step": 20 }, { "epoch": 0.001490016886858051, "grad_norm": 0.53515625, "learning_rate": 0.0007998847720274163, "loss": 1.8397, "step": 30 }, { "epoch": 0.0019866891824774015, "grad_norm": 0.609375, "learning_rate": 0.0007998450382437668, "loss": 1.7934, "step": 40 }, { "epoch": 0.0024833614780967518, "grad_norm": 1.09375, "learning_rate": 0.0007998053044601173, "loss": 1.7761, "step": 50 }, { "epoch": 0.002980033773716102, "grad_norm": 0.419921875, "learning_rate": 0.0007997655706764677, "loss": 1.7596, "step": 60 }, { "epoch": 0.0034767060693354523, "grad_norm": 0.8671875, "learning_rate": 0.0007997258368928181, "loss": 1.7166, "step": 70 }, { "epoch": 0.003973378364954803, "grad_norm": 0.482421875, "learning_rate": 0.0007996861031091686, "loss": 1.6598, "step": 80 }, { "epoch": 0.004470050660574153, "grad_norm": 0.5234375, "learning_rate": 0.0007996463693255191, "loss": 1.6145, "step": 90 }, { "epoch": 0.0049667229561935035, "grad_norm": 0.53125, "learning_rate": 0.0007996066355418695, "loss": 1.5857, "step": 100 }, { "epoch": 0.005463395251812854, "grad_norm": 0.40625, "learning_rate": 0.00079956690175822, "loss": 1.5669, "step": 110 }, { "epoch": 0.005960067547432204, "grad_norm": 0.50390625, "learning_rate": 0.0007995271679745704, "loss": 1.5253, "step": 120 }, { "epoch": 0.006456739843051554, "grad_norm": 0.396484375, "learning_rate": 0.0007994874341909208, "loss": 1.5324, "step": 130 }, { "epoch": 0.006953412138670905, "grad_norm": 0.60546875, "learning_rate": 0.0007994477004072714, "loss": 1.4808, "step": 140 }, { "epoch": 0.007450084434290256, "grad_norm": 0.48046875, "learning_rate": 0.0007994079666236218, "loss": 1.4696, "step": 150 }, { "epoch": 0.007946756729909606, "grad_norm": 0.3984375, "learning_rate": 0.0007993682328399723, "loss": 1.45, "step": 160 }, { "epoch": 0.008443429025528956, "grad_norm": 0.408203125, "learning_rate": 0.0007993284990563226, "loss": 1.4663, "step": 170 }, { "epoch": 0.008940101321148307, "grad_norm": 0.44921875, "learning_rate": 0.0007992887652726731, "loss": 1.4412, "step": 180 }, { "epoch": 0.009436773616767657, "grad_norm": 0.427734375, "learning_rate": 0.0007992490314890237, "loss": 1.3908, "step": 190 }, { "epoch": 0.009933445912387007, "grad_norm": 0.51953125, "learning_rate": 0.000799209297705374, "loss": 1.3938, "step": 200 }, { "epoch": 0.010430118208006357, "grad_norm": 0.43359375, "learning_rate": 0.0007991695639217245, "loss": 1.3474, "step": 210 }, { "epoch": 0.010926790503625708, "grad_norm": 0.36328125, "learning_rate": 0.0007991298301380749, "loss": 1.3319, "step": 220 }, { "epoch": 0.011423462799245058, "grad_norm": 0.35546875, "learning_rate": 0.0007990900963544253, "loss": 1.3428, "step": 230 }, { "epoch": 0.011920135094864408, "grad_norm": 0.431640625, "learning_rate": 0.0007990503625707759, "loss": 1.3505, "step": 240 }, { "epoch": 0.012416807390483758, "grad_norm": 0.53125, "learning_rate": 0.0007990106287871263, "loss": 1.3445, "step": 250 }, { "epoch": 0.012913479686103109, "grad_norm": 0.345703125, "learning_rate": 0.0007989708950034767, "loss": 1.3059, "step": 260 }, { "epoch": 0.013410151981722459, "grad_norm": 0.318359375, "learning_rate": 0.0007989311612198272, "loss": 1.3184, "step": 270 }, { "epoch": 0.01390682427734181, "grad_norm": 0.359375, "learning_rate": 0.0007988914274361776, "loss": 1.2753, "step": 280 }, { "epoch": 0.01440349657296116, "grad_norm": 0.4140625, "learning_rate": 0.0007988516936525282, "loss": 1.2823, "step": 290 }, { "epoch": 0.014900168868580511, "grad_norm": 0.451171875, "learning_rate": 0.0007988119598688786, "loss": 1.3007, "step": 300 }, { "epoch": 0.015396841164199862, "grad_norm": 0.4296875, "learning_rate": 0.000798772226085229, "loss": 1.2959, "step": 310 }, { "epoch": 0.015893513459819212, "grad_norm": 0.3828125, "learning_rate": 0.0007987324923015795, "loss": 1.2481, "step": 320 }, { "epoch": 0.01639018575543856, "grad_norm": 0.53515625, "learning_rate": 0.0007986927585179299, "loss": 1.2824, "step": 330 }, { "epoch": 0.016886858051057912, "grad_norm": 0.412109375, "learning_rate": 0.0007986530247342804, "loss": 1.242, "step": 340 }, { "epoch": 0.01738353034667726, "grad_norm": 0.42578125, "learning_rate": 0.0007986132909506309, "loss": 1.2588, "step": 350 }, { "epoch": 0.017880202642296613, "grad_norm": 0.35546875, "learning_rate": 0.0007985735571669812, "loss": 1.2736, "step": 360 }, { "epoch": 0.01837687493791596, "grad_norm": 0.4765625, "learning_rate": 0.0007985338233833317, "loss": 1.2348, "step": 370 }, { "epoch": 0.018873547233535314, "grad_norm": 0.283203125, "learning_rate": 0.0007984940895996822, "loss": 1.2293, "step": 380 }, { "epoch": 0.019370219529154662, "grad_norm": 0.361328125, "learning_rate": 0.0007984543558160325, "loss": 1.1832, "step": 390 }, { "epoch": 0.019866891824774014, "grad_norm": 0.326171875, "learning_rate": 0.0007984146220323831, "loss": 1.2007, "step": 400 }, { "epoch": 0.020363564120393366, "grad_norm": 0.314453125, "learning_rate": 0.0007983748882487336, "loss": 1.2085, "step": 410 }, { "epoch": 0.020860236416012715, "grad_norm": 0.333984375, "learning_rate": 0.0007983351544650839, "loss": 1.2127, "step": 420 }, { "epoch": 0.021356908711632067, "grad_norm": 0.388671875, "learning_rate": 0.0007982954206814344, "loss": 1.1983, "step": 430 }, { "epoch": 0.021853581007251415, "grad_norm": 0.408203125, "learning_rate": 0.0007982556868977848, "loss": 1.2332, "step": 440 }, { "epoch": 0.022350253302870767, "grad_norm": 0.349609375, "learning_rate": 0.0007982159531141354, "loss": 1.1835, "step": 450 }, { "epoch": 0.022846925598490116, "grad_norm": 0.345703125, "learning_rate": 0.0007981762193304858, "loss": 1.21, "step": 460 }, { "epoch": 0.023343597894109468, "grad_norm": 0.3515625, "learning_rate": 0.0007981364855468362, "loss": 1.1797, "step": 470 }, { "epoch": 0.023840270189728816, "grad_norm": 0.34765625, "learning_rate": 0.0007980967517631867, "loss": 1.1599, "step": 480 }, { "epoch": 0.024336942485348168, "grad_norm": 0.318359375, "learning_rate": 0.0007980570179795371, "loss": 1.1898, "step": 490 }, { "epoch": 0.024833614780967517, "grad_norm": 0.291015625, "learning_rate": 0.0007980172841958876, "loss": 1.1622, "step": 500 }, { "epoch": 0.02533028707658687, "grad_norm": 0.373046875, "learning_rate": 0.0007979775504122381, "loss": 1.182, "step": 510 }, { "epoch": 0.025826959372206217, "grad_norm": 0.3203125, "learning_rate": 0.0007979378166285885, "loss": 1.1502, "step": 520 }, { "epoch": 0.02632363166782557, "grad_norm": 0.365234375, "learning_rate": 0.0007978980828449389, "loss": 1.1264, "step": 530 }, { "epoch": 0.026820303963444918, "grad_norm": 0.37890625, "learning_rate": 0.0007978583490612895, "loss": 1.1711, "step": 540 }, { "epoch": 0.02731697625906427, "grad_norm": 0.333984375, "learning_rate": 0.0007978186152776398, "loss": 1.1233, "step": 550 }, { "epoch": 0.02781364855468362, "grad_norm": 0.396484375, "learning_rate": 0.0007977788814939903, "loss": 1.1435, "step": 560 }, { "epoch": 0.02831032085030297, "grad_norm": 0.330078125, "learning_rate": 0.0007977391477103408, "loss": 1.1424, "step": 570 }, { "epoch": 0.02880699314592232, "grad_norm": 0.353515625, "learning_rate": 0.0007976994139266911, "loss": 1.1642, "step": 580 }, { "epoch": 0.02930366544154167, "grad_norm": 0.337890625, "learning_rate": 0.0007976596801430416, "loss": 1.11, "step": 590 }, { "epoch": 0.029800337737161023, "grad_norm": 0.33984375, "learning_rate": 0.0007976199463593922, "loss": 1.152, "step": 600 }, { "epoch": 0.03029701003278037, "grad_norm": 0.3203125, "learning_rate": 0.0007975802125757426, "loss": 1.1054, "step": 610 }, { "epoch": 0.030793682328399723, "grad_norm": 0.369140625, "learning_rate": 0.000797540478792093, "loss": 1.1078, "step": 620 }, { "epoch": 0.03129035462401907, "grad_norm": 0.333984375, "learning_rate": 0.0007975007450084434, "loss": 1.1475, "step": 630 }, { "epoch": 0.031787026919638424, "grad_norm": 0.296875, "learning_rate": 0.000797461011224794, "loss": 1.1248, "step": 640 }, { "epoch": 0.032283699215257776, "grad_norm": 0.349609375, "learning_rate": 0.0007974212774411444, "loss": 1.1052, "step": 650 }, { "epoch": 0.03278037151087712, "grad_norm": 0.3515625, "learning_rate": 0.0007973815436574948, "loss": 1.1349, "step": 660 }, { "epoch": 0.03327704380649647, "grad_norm": 0.337890625, "learning_rate": 0.0007973418098738453, "loss": 1.1056, "step": 670 }, { "epoch": 0.033773716102115825, "grad_norm": 0.396484375, "learning_rate": 0.0007973020760901957, "loss": 1.1263, "step": 680 }, { "epoch": 0.03427038839773518, "grad_norm": 0.30078125, "learning_rate": 0.0007972623423065461, "loss": 1.1334, "step": 690 }, { "epoch": 0.03476706069335452, "grad_norm": 0.3203125, "learning_rate": 0.0007972226085228967, "loss": 1.1171, "step": 700 }, { "epoch": 0.035263732988973874, "grad_norm": 0.33984375, "learning_rate": 0.0007971828747392471, "loss": 1.1007, "step": 710 }, { "epoch": 0.035760405284593226, "grad_norm": 0.326171875, "learning_rate": 0.0007971431409555975, "loss": 1.1092, "step": 720 }, { "epoch": 0.03625707758021258, "grad_norm": 0.35546875, "learning_rate": 0.000797103407171948, "loss": 1.1299, "step": 730 }, { "epoch": 0.03675374987583192, "grad_norm": 0.314453125, "learning_rate": 0.0007970636733882984, "loss": 1.1215, "step": 740 }, { "epoch": 0.037250422171451275, "grad_norm": 0.294921875, "learning_rate": 0.0007970239396046489, "loss": 1.0959, "step": 750 }, { "epoch": 0.03774709446707063, "grad_norm": 0.34765625, "learning_rate": 0.0007969842058209994, "loss": 1.0947, "step": 760 }, { "epoch": 0.03824376676268998, "grad_norm": 0.3984375, "learning_rate": 0.0007969444720373498, "loss": 1.0743, "step": 770 }, { "epoch": 0.038740439058309324, "grad_norm": 0.3125, "learning_rate": 0.0007969047382537002, "loss": 1.0936, "step": 780 }, { "epoch": 0.039237111353928676, "grad_norm": 0.306640625, "learning_rate": 0.0007968650044700507, "loss": 1.0967, "step": 790 }, { "epoch": 0.03973378364954803, "grad_norm": 0.314453125, "learning_rate": 0.0007968252706864012, "loss": 1.0531, "step": 800 }, { "epoch": 0.04023045594516738, "grad_norm": 0.2734375, "learning_rate": 0.0007967855369027516, "loss": 1.0668, "step": 810 }, { "epoch": 0.04072712824078673, "grad_norm": 0.294921875, "learning_rate": 0.000796745803119102, "loss": 1.0908, "step": 820 }, { "epoch": 0.04122380053640608, "grad_norm": 0.265625, "learning_rate": 0.0007967060693354525, "loss": 1.0599, "step": 830 }, { "epoch": 0.04172047283202543, "grad_norm": 0.29296875, "learning_rate": 0.0007966663355518029, "loss": 1.0601, "step": 840 }, { "epoch": 0.04221714512764478, "grad_norm": 0.28515625, "learning_rate": 0.0007966266017681534, "loss": 1.0303, "step": 850 }, { "epoch": 0.04271381742326413, "grad_norm": 0.271484375, "learning_rate": 0.0007965868679845039, "loss": 1.0721, "step": 860 }, { "epoch": 0.04321048971888348, "grad_norm": 0.322265625, "learning_rate": 0.0007965471342008543, "loss": 1.0544, "step": 870 }, { "epoch": 0.04370716201450283, "grad_norm": 0.32421875, "learning_rate": 0.0007965074004172047, "loss": 1.0499, "step": 880 }, { "epoch": 0.04420383431012218, "grad_norm": 0.326171875, "learning_rate": 0.0007964676666335552, "loss": 1.0578, "step": 890 }, { "epoch": 0.044700506605741534, "grad_norm": 0.3046875, "learning_rate": 0.0007964279328499057, "loss": 1.103, "step": 900 }, { "epoch": 0.04519717890136088, "grad_norm": 0.2734375, "learning_rate": 0.0007963881990662561, "loss": 1.0236, "step": 910 }, { "epoch": 0.04569385119698023, "grad_norm": 0.31640625, "learning_rate": 0.0007963484652826066, "loss": 1.0282, "step": 920 }, { "epoch": 0.04619052349259958, "grad_norm": 0.33984375, "learning_rate": 0.000796308731498957, "loss": 1.0686, "step": 930 }, { "epoch": 0.046687195788218935, "grad_norm": 0.291015625, "learning_rate": 0.0007962689977153074, "loss": 1.0468, "step": 940 }, { "epoch": 0.04718386808383828, "grad_norm": 0.31640625, "learning_rate": 0.000796229263931658, "loss": 1.0379, "step": 950 }, { "epoch": 0.04768054037945763, "grad_norm": 0.349609375, "learning_rate": 0.0007961895301480084, "loss": 1.0553, "step": 960 }, { "epoch": 0.048177212675076984, "grad_norm": 0.2890625, "learning_rate": 0.0007961497963643588, "loss": 1.0768, "step": 970 }, { "epoch": 0.048673884970696336, "grad_norm": 0.34765625, "learning_rate": 0.0007961100625807093, "loss": 1.0261, "step": 980 }, { "epoch": 0.04917055726631568, "grad_norm": 0.255859375, "learning_rate": 0.0007960703287970597, "loss": 1.0311, "step": 990 }, { "epoch": 0.04966722956193503, "grad_norm": 0.326171875, "learning_rate": 0.0007960305950134102, "loss": 1.0556, "step": 1000 }, { "epoch": 0.050163901857554385, "grad_norm": 0.28125, "learning_rate": 0.0007959908612297607, "loss": 1.034, "step": 1010 }, { "epoch": 0.05066057415317374, "grad_norm": 0.330078125, "learning_rate": 0.0007959511274461111, "loss": 1.0009, "step": 1020 }, { "epoch": 0.05115724644879309, "grad_norm": 0.310546875, "learning_rate": 0.0007959113936624615, "loss": 1.0109, "step": 1030 }, { "epoch": 0.051653918744412435, "grad_norm": 0.27734375, "learning_rate": 0.0007958716598788119, "loss": 1.0085, "step": 1040 }, { "epoch": 0.052150591040031787, "grad_norm": 0.27734375, "learning_rate": 0.0007958319260951625, "loss": 1.0898, "step": 1050 }, { "epoch": 0.05264726333565114, "grad_norm": 0.34765625, "learning_rate": 0.000795792192311513, "loss": 1.0379, "step": 1060 }, { "epoch": 0.05314393563127049, "grad_norm": 0.291015625, "learning_rate": 0.0007957524585278633, "loss": 0.9888, "step": 1070 }, { "epoch": 0.053640607926889836, "grad_norm": 0.275390625, "learning_rate": 0.0007957127247442138, "loss": 0.9989, "step": 1080 }, { "epoch": 0.05413728022250919, "grad_norm": 0.333984375, "learning_rate": 0.0007956729909605642, "loss": 1.046, "step": 1090 }, { "epoch": 0.05463395251812854, "grad_norm": 0.306640625, "learning_rate": 0.0007956332571769147, "loss": 0.9776, "step": 1100 }, { "epoch": 0.05513062481374789, "grad_norm": 0.3125, "learning_rate": 0.0007955935233932652, "loss": 1.0245, "step": 1110 }, { "epoch": 0.05562729710936724, "grad_norm": 0.302734375, "learning_rate": 0.0007955537896096156, "loss": 1.0557, "step": 1120 }, { "epoch": 0.05612396940498659, "grad_norm": 0.2734375, "learning_rate": 0.000795514055825966, "loss": 1.0043, "step": 1130 }, { "epoch": 0.05662064170060594, "grad_norm": 0.279296875, "learning_rate": 0.0007954743220423165, "loss": 0.9955, "step": 1140 }, { "epoch": 0.05711731399622529, "grad_norm": 0.328125, "learning_rate": 0.000795434588258667, "loss": 1.0221, "step": 1150 }, { "epoch": 0.05761398629184464, "grad_norm": 0.279296875, "learning_rate": 0.0007953948544750174, "loss": 0.9994, "step": 1160 }, { "epoch": 0.05811065858746399, "grad_norm": 0.275390625, "learning_rate": 0.0007953551206913679, "loss": 1.031, "step": 1170 }, { "epoch": 0.05860733088308334, "grad_norm": 0.279296875, "learning_rate": 0.0007953153869077183, "loss": 0.9968, "step": 1180 }, { "epoch": 0.059104003178702694, "grad_norm": 0.2734375, "learning_rate": 0.0007952756531240688, "loss": 1.0061, "step": 1190 }, { "epoch": 0.059600675474322046, "grad_norm": 0.298828125, "learning_rate": 0.0007952359193404193, "loss": 1.0162, "step": 1200 }, { "epoch": 0.06009734776994139, "grad_norm": 0.29296875, "learning_rate": 0.0007951961855567697, "loss": 0.9925, "step": 1210 }, { "epoch": 0.06059402006556074, "grad_norm": 0.271484375, "learning_rate": 0.0007951564517731202, "loss": 1.0197, "step": 1220 }, { "epoch": 0.061090692361180095, "grad_norm": 0.265625, "learning_rate": 0.0007951167179894705, "loss": 1.0029, "step": 1230 }, { "epoch": 0.06158736465679945, "grad_norm": 0.27734375, "learning_rate": 0.000795076984205821, "loss": 0.9652, "step": 1240 }, { "epoch": 0.06208403695241879, "grad_norm": 0.361328125, "learning_rate": 0.0007950372504221716, "loss": 1.0034, "step": 1250 }, { "epoch": 0.06258070924803814, "grad_norm": 0.2734375, "learning_rate": 0.0007949975166385219, "loss": 1.0125, "step": 1260 }, { "epoch": 0.0630773815436575, "grad_norm": 0.3203125, "learning_rate": 0.0007949577828548724, "loss": 0.999, "step": 1270 }, { "epoch": 0.06357405383927685, "grad_norm": 0.30078125, "learning_rate": 0.0007949180490712229, "loss": 0.9831, "step": 1280 }, { "epoch": 0.0640707261348962, "grad_norm": 0.30859375, "learning_rate": 0.0007948783152875732, "loss": 0.9889, "step": 1290 }, { "epoch": 0.06456739843051555, "grad_norm": 0.259765625, "learning_rate": 0.0007948385815039238, "loss": 0.9954, "step": 1300 }, { "epoch": 0.06506407072613489, "grad_norm": 0.40234375, "learning_rate": 0.0007947988477202742, "loss": 1.0029, "step": 1310 }, { "epoch": 0.06556074302175424, "grad_norm": 0.279296875, "learning_rate": 0.0007947591139366246, "loss": 0.9666, "step": 1320 }, { "epoch": 0.0660574153173736, "grad_norm": 0.31640625, "learning_rate": 0.0007947193801529751, "loss": 0.9921, "step": 1330 }, { "epoch": 0.06655408761299295, "grad_norm": 0.302734375, "learning_rate": 0.0007946796463693255, "loss": 0.9799, "step": 1340 }, { "epoch": 0.0670507599086123, "grad_norm": 0.279296875, "learning_rate": 0.0007946399125856761, "loss": 1.0026, "step": 1350 }, { "epoch": 0.06754743220423165, "grad_norm": 0.26171875, "learning_rate": 0.0007946001788020265, "loss": 0.9782, "step": 1360 }, { "epoch": 0.068044104499851, "grad_norm": 0.283203125, "learning_rate": 0.0007945604450183769, "loss": 1.0086, "step": 1370 }, { "epoch": 0.06854077679547035, "grad_norm": 0.271484375, "learning_rate": 0.0007945207112347274, "loss": 0.9763, "step": 1380 }, { "epoch": 0.0690374490910897, "grad_norm": 0.30078125, "learning_rate": 0.0007944809774510778, "loss": 1.0011, "step": 1390 }, { "epoch": 0.06953412138670904, "grad_norm": 0.267578125, "learning_rate": 0.0007944412436674283, "loss": 0.9751, "step": 1400 }, { "epoch": 0.0700307936823284, "grad_norm": 0.29296875, "learning_rate": 0.0007944015098837788, "loss": 1.0073, "step": 1410 }, { "epoch": 0.07052746597794775, "grad_norm": 0.287109375, "learning_rate": 0.0007943617761001291, "loss": 0.9551, "step": 1420 }, { "epoch": 0.0710241382735671, "grad_norm": 0.271484375, "learning_rate": 0.0007943220423164796, "loss": 0.9414, "step": 1430 }, { "epoch": 0.07152081056918645, "grad_norm": 0.3125, "learning_rate": 0.0007942823085328301, "loss": 0.9908, "step": 1440 }, { "epoch": 0.0720174828648058, "grad_norm": 0.294921875, "learning_rate": 0.0007942425747491804, "loss": 0.9451, "step": 1450 }, { "epoch": 0.07251415516042516, "grad_norm": 0.234375, "learning_rate": 0.000794202840965531, "loss": 0.9761, "step": 1460 }, { "epoch": 0.07301082745604451, "grad_norm": 0.322265625, "learning_rate": 0.0007941631071818815, "loss": 0.9389, "step": 1470 }, { "epoch": 0.07350749975166385, "grad_norm": 0.267578125, "learning_rate": 0.0007941233733982319, "loss": 0.9704, "step": 1480 }, { "epoch": 0.0740041720472832, "grad_norm": 0.283203125, "learning_rate": 0.0007940836396145823, "loss": 0.9748, "step": 1490 }, { "epoch": 0.07450084434290255, "grad_norm": 0.3046875, "learning_rate": 0.0007940439058309328, "loss": 0.982, "step": 1500 }, { "epoch": 0.0749975166385219, "grad_norm": 0.26953125, "learning_rate": 0.0007940041720472833, "loss": 0.9661, "step": 1510 }, { "epoch": 0.07549418893414125, "grad_norm": 0.2578125, "learning_rate": 0.0007939644382636337, "loss": 0.9424, "step": 1520 }, { "epoch": 0.0759908612297606, "grad_norm": 0.29296875, "learning_rate": 0.0007939247044799841, "loss": 0.9927, "step": 1530 }, { "epoch": 0.07648753352537996, "grad_norm": 0.2451171875, "learning_rate": 0.0007938849706963346, "loss": 0.9794, "step": 1540 }, { "epoch": 0.07698420582099931, "grad_norm": 0.333984375, "learning_rate": 0.000793845236912685, "loss": 0.9505, "step": 1550 }, { "epoch": 0.07748087811661865, "grad_norm": 0.296875, "learning_rate": 0.0007938055031290355, "loss": 0.9565, "step": 1560 }, { "epoch": 0.077977550412238, "grad_norm": 0.3046875, "learning_rate": 0.000793765769345386, "loss": 0.9998, "step": 1570 }, { "epoch": 0.07847422270785735, "grad_norm": 0.287109375, "learning_rate": 0.0007937260355617364, "loss": 0.9816, "step": 1580 }, { "epoch": 0.0789708950034767, "grad_norm": 0.30078125, "learning_rate": 0.0007936863017780868, "loss": 0.9523, "step": 1590 }, { "epoch": 0.07946756729909606, "grad_norm": 0.255859375, "learning_rate": 0.0007936465679944374, "loss": 0.9512, "step": 1600 }, { "epoch": 0.07996423959471541, "grad_norm": 0.26171875, "learning_rate": 0.0007936068342107878, "loss": 0.9186, "step": 1610 }, { "epoch": 0.08046091189033476, "grad_norm": 0.296875, "learning_rate": 0.0007935671004271382, "loss": 0.9518, "step": 1620 }, { "epoch": 0.08095758418595411, "grad_norm": 0.33203125, "learning_rate": 0.0007935273666434887, "loss": 0.9269, "step": 1630 }, { "epoch": 0.08145425648157346, "grad_norm": 0.25390625, "learning_rate": 0.0007934876328598391, "loss": 0.9667, "step": 1640 }, { "epoch": 0.0819509287771928, "grad_norm": 0.2578125, "learning_rate": 0.0007934478990761896, "loss": 0.947, "step": 1650 }, { "epoch": 0.08244760107281215, "grad_norm": 0.30859375, "learning_rate": 0.0007934081652925401, "loss": 0.9563, "step": 1660 }, { "epoch": 0.0829442733684315, "grad_norm": 0.28515625, "learning_rate": 0.0007933684315088905, "loss": 0.9397, "step": 1670 }, { "epoch": 0.08344094566405086, "grad_norm": 0.275390625, "learning_rate": 0.0007933286977252409, "loss": 0.965, "step": 1680 }, { "epoch": 0.08393761795967021, "grad_norm": 0.25, "learning_rate": 0.0007932889639415913, "loss": 0.9461, "step": 1690 }, { "epoch": 0.08443429025528956, "grad_norm": 0.26171875, "learning_rate": 0.0007932492301579419, "loss": 0.9537, "step": 1700 }, { "epoch": 0.08493096255090891, "grad_norm": 0.2392578125, "learning_rate": 0.0007932094963742923, "loss": 0.9146, "step": 1710 }, { "epoch": 0.08542763484652827, "grad_norm": 0.259765625, "learning_rate": 0.0007931697625906427, "loss": 0.9558, "step": 1720 }, { "epoch": 0.0859243071421476, "grad_norm": 0.263671875, "learning_rate": 0.0007931300288069932, "loss": 0.9614, "step": 1730 }, { "epoch": 0.08642097943776696, "grad_norm": 0.2412109375, "learning_rate": 0.0007930902950233436, "loss": 0.9227, "step": 1740 }, { "epoch": 0.08691765173338631, "grad_norm": 0.279296875, "learning_rate": 0.000793050561239694, "loss": 0.9533, "step": 1750 }, { "epoch": 0.08741432402900566, "grad_norm": 0.2373046875, "learning_rate": 0.0007930108274560446, "loss": 0.9488, "step": 1760 }, { "epoch": 0.08791099632462501, "grad_norm": 0.25390625, "learning_rate": 0.000792971093672395, "loss": 0.9396, "step": 1770 }, { "epoch": 0.08840766862024436, "grad_norm": 0.294921875, "learning_rate": 0.0007929313598887454, "loss": 0.9199, "step": 1780 }, { "epoch": 0.08890434091586372, "grad_norm": 0.267578125, "learning_rate": 0.0007928916261050959, "loss": 0.9593, "step": 1790 }, { "epoch": 0.08940101321148307, "grad_norm": 0.2490234375, "learning_rate": 0.0007928518923214463, "loss": 0.9639, "step": 1800 }, { "epoch": 0.08989768550710242, "grad_norm": 0.2578125, "learning_rate": 0.0007928121585377968, "loss": 0.9267, "step": 1810 }, { "epoch": 0.09039435780272176, "grad_norm": 0.287109375, "learning_rate": 0.0007927724247541473, "loss": 0.9629, "step": 1820 }, { "epoch": 0.09089103009834111, "grad_norm": 0.248046875, "learning_rate": 0.0007927326909704977, "loss": 0.9051, "step": 1830 }, { "epoch": 0.09138770239396046, "grad_norm": 0.25390625, "learning_rate": 0.0007926929571868481, "loss": 0.9445, "step": 1840 }, { "epoch": 0.09188437468957981, "grad_norm": 0.2490234375, "learning_rate": 0.0007926532234031987, "loss": 0.9441, "step": 1850 }, { "epoch": 0.09238104698519917, "grad_norm": 0.28515625, "learning_rate": 0.0007926134896195491, "loss": 0.9459, "step": 1860 }, { "epoch": 0.09287771928081852, "grad_norm": 0.263671875, "learning_rate": 0.0007925737558358995, "loss": 0.944, "step": 1870 }, { "epoch": 0.09337439157643787, "grad_norm": 0.2431640625, "learning_rate": 0.00079253402205225, "loss": 0.9664, "step": 1880 }, { "epoch": 0.09387106387205722, "grad_norm": 0.25390625, "learning_rate": 0.0007924942882686004, "loss": 0.9384, "step": 1890 }, { "epoch": 0.09436773616767656, "grad_norm": 0.25390625, "learning_rate": 0.0007924545544849508, "loss": 0.936, "step": 1900 }, { "epoch": 0.09486440846329591, "grad_norm": 0.259765625, "learning_rate": 0.0007924148207013013, "loss": 0.9484, "step": 1910 }, { "epoch": 0.09536108075891526, "grad_norm": 0.28125, "learning_rate": 0.0007923750869176518, "loss": 0.8862, "step": 1920 }, { "epoch": 0.09585775305453462, "grad_norm": 0.2412109375, "learning_rate": 0.0007923353531340023, "loss": 0.933, "step": 1930 }, { "epoch": 0.09635442535015397, "grad_norm": 0.220703125, "learning_rate": 0.0007922956193503526, "loss": 0.9179, "step": 1940 }, { "epoch": 0.09685109764577332, "grad_norm": 0.302734375, "learning_rate": 0.0007922558855667031, "loss": 0.9129, "step": 1950 }, { "epoch": 0.09734776994139267, "grad_norm": 0.259765625, "learning_rate": 0.0007922161517830536, "loss": 0.9578, "step": 1960 }, { "epoch": 0.09784444223701202, "grad_norm": 0.314453125, "learning_rate": 0.000792176417999404, "loss": 0.8939, "step": 1970 }, { "epoch": 0.09834111453263136, "grad_norm": 0.28515625, "learning_rate": 0.0007921366842157545, "loss": 0.9061, "step": 1980 }, { "epoch": 0.09883778682825071, "grad_norm": 0.271484375, "learning_rate": 0.0007920969504321049, "loss": 0.9174, "step": 1990 }, { "epoch": 0.09933445912387007, "grad_norm": 0.2373046875, "learning_rate": 0.0007920572166484553, "loss": 0.9491, "step": 2000 }, { "epoch": 0.09983113141948942, "grad_norm": 0.265625, "learning_rate": 0.0007920174828648059, "loss": 0.9257, "step": 2010 }, { "epoch": 0.10032780371510877, "grad_norm": 0.2578125, "learning_rate": 0.0007919777490811563, "loss": 0.8943, "step": 2020 }, { "epoch": 0.10082447601072812, "grad_norm": 0.25390625, "learning_rate": 0.0007919380152975067, "loss": 0.9324, "step": 2030 }, { "epoch": 0.10132114830634747, "grad_norm": 0.2373046875, "learning_rate": 0.0007918982815138572, "loss": 0.9209, "step": 2040 }, { "epoch": 0.10181782060196683, "grad_norm": 0.302734375, "learning_rate": 0.0007918585477302076, "loss": 0.9143, "step": 2050 }, { "epoch": 0.10231449289758618, "grad_norm": 0.28125, "learning_rate": 0.0007918188139465581, "loss": 0.8881, "step": 2060 }, { "epoch": 0.10281116519320552, "grad_norm": 0.255859375, "learning_rate": 0.0007917790801629086, "loss": 0.9361, "step": 2070 }, { "epoch": 0.10330783748882487, "grad_norm": 0.23828125, "learning_rate": 0.000791739346379259, "loss": 0.8759, "step": 2080 }, { "epoch": 0.10380450978444422, "grad_norm": 0.2421875, "learning_rate": 0.0007916996125956095, "loss": 0.9115, "step": 2090 }, { "epoch": 0.10430118208006357, "grad_norm": 0.25, "learning_rate": 0.0007916598788119598, "loss": 0.9272, "step": 2100 }, { "epoch": 0.10479785437568293, "grad_norm": 0.294921875, "learning_rate": 0.0007916201450283104, "loss": 0.9501, "step": 2110 }, { "epoch": 0.10529452667130228, "grad_norm": 0.2490234375, "learning_rate": 0.0007915804112446609, "loss": 0.8966, "step": 2120 }, { "epoch": 0.10579119896692163, "grad_norm": 0.2490234375, "learning_rate": 0.0007915406774610112, "loss": 0.9069, "step": 2130 }, { "epoch": 0.10628787126254098, "grad_norm": 0.267578125, "learning_rate": 0.0007915009436773617, "loss": 0.9063, "step": 2140 }, { "epoch": 0.10678454355816032, "grad_norm": 0.26171875, "learning_rate": 0.0007914612098937123, "loss": 0.9479, "step": 2150 }, { "epoch": 0.10728121585377967, "grad_norm": 0.23828125, "learning_rate": 0.0007914214761100626, "loss": 0.8811, "step": 2160 }, { "epoch": 0.10777788814939902, "grad_norm": 0.275390625, "learning_rate": 0.0007913817423264131, "loss": 0.9181, "step": 2170 }, { "epoch": 0.10827456044501838, "grad_norm": 0.275390625, "learning_rate": 0.0007913420085427635, "loss": 0.8811, "step": 2180 }, { "epoch": 0.10877123274063773, "grad_norm": 0.248046875, "learning_rate": 0.0007913022747591139, "loss": 0.8769, "step": 2190 }, { "epoch": 0.10926790503625708, "grad_norm": 0.275390625, "learning_rate": 0.0007912625409754644, "loss": 0.9298, "step": 2200 }, { "epoch": 0.10976457733187643, "grad_norm": 0.25390625, "learning_rate": 0.0007912228071918149, "loss": 0.8929, "step": 2210 }, { "epoch": 0.11026124962749578, "grad_norm": 0.296875, "learning_rate": 0.0007911830734081653, "loss": 0.9101, "step": 2220 }, { "epoch": 0.11075792192311514, "grad_norm": 0.234375, "learning_rate": 0.0007911433396245158, "loss": 0.9398, "step": 2230 }, { "epoch": 0.11125459421873447, "grad_norm": 0.2333984375, "learning_rate": 0.0007911036058408662, "loss": 0.9077, "step": 2240 }, { "epoch": 0.11175126651435383, "grad_norm": 0.294921875, "learning_rate": 0.0007910638720572167, "loss": 0.9285, "step": 2250 }, { "epoch": 0.11224793880997318, "grad_norm": 0.26953125, "learning_rate": 0.0007910241382735672, "loss": 0.8824, "step": 2260 }, { "epoch": 0.11274461110559253, "grad_norm": 0.2734375, "learning_rate": 0.0007909844044899176, "loss": 0.8869, "step": 2270 }, { "epoch": 0.11324128340121188, "grad_norm": 0.302734375, "learning_rate": 0.0007909446707062681, "loss": 0.9341, "step": 2280 }, { "epoch": 0.11373795569683123, "grad_norm": 0.240234375, "learning_rate": 0.0007909049369226184, "loss": 0.9054, "step": 2290 }, { "epoch": 0.11423462799245059, "grad_norm": 0.2578125, "learning_rate": 0.0007908652031389689, "loss": 0.91, "step": 2300 }, { "epoch": 0.11473130028806994, "grad_norm": 0.2353515625, "learning_rate": 0.0007908254693553195, "loss": 0.9317, "step": 2310 }, { "epoch": 0.11522797258368928, "grad_norm": 0.267578125, "learning_rate": 0.0007907857355716698, "loss": 0.9031, "step": 2320 }, { "epoch": 0.11572464487930863, "grad_norm": 0.2734375, "learning_rate": 0.0007907460017880203, "loss": 0.9089, "step": 2330 }, { "epoch": 0.11622131717492798, "grad_norm": 0.2578125, "learning_rate": 0.0007907062680043708, "loss": 0.9035, "step": 2340 }, { "epoch": 0.11671798947054733, "grad_norm": 0.29296875, "learning_rate": 0.0007906665342207211, "loss": 0.8807, "step": 2350 }, { "epoch": 0.11721466176616668, "grad_norm": 0.24609375, "learning_rate": 0.0007906268004370717, "loss": 0.8563, "step": 2360 }, { "epoch": 0.11771133406178604, "grad_norm": 0.2353515625, "learning_rate": 0.0007905870666534221, "loss": 0.8702, "step": 2370 }, { "epoch": 0.11820800635740539, "grad_norm": 0.2109375, "learning_rate": 0.0007905473328697726, "loss": 0.9268, "step": 2380 }, { "epoch": 0.11870467865302474, "grad_norm": 0.240234375, "learning_rate": 0.000790507599086123, "loss": 0.902, "step": 2390 }, { "epoch": 0.11920135094864409, "grad_norm": 0.26171875, "learning_rate": 0.0007904678653024734, "loss": 0.9036, "step": 2400 }, { "epoch": 0.11969802324426343, "grad_norm": 0.263671875, "learning_rate": 0.000790428131518824, "loss": 0.9041, "step": 2410 }, { "epoch": 0.12019469553988278, "grad_norm": 0.25390625, "learning_rate": 0.0007903883977351744, "loss": 0.8601, "step": 2420 }, { "epoch": 0.12069136783550213, "grad_norm": 0.27734375, "learning_rate": 0.0007903486639515248, "loss": 0.8791, "step": 2430 }, { "epoch": 0.12118804013112149, "grad_norm": 0.263671875, "learning_rate": 0.0007903089301678753, "loss": 0.9051, "step": 2440 }, { "epoch": 0.12168471242674084, "grad_norm": 0.255859375, "learning_rate": 0.0007902691963842257, "loss": 0.8775, "step": 2450 }, { "epoch": 0.12218138472236019, "grad_norm": 0.2255859375, "learning_rate": 0.0007902294626005762, "loss": 0.8495, "step": 2460 }, { "epoch": 0.12267805701797954, "grad_norm": 0.25, "learning_rate": 0.0007901897288169267, "loss": 0.9162, "step": 2470 }, { "epoch": 0.1231747293135989, "grad_norm": 0.259765625, "learning_rate": 0.000790149995033277, "loss": 0.8772, "step": 2480 }, { "epoch": 0.12367140160921823, "grad_norm": 0.2490234375, "learning_rate": 0.0007901102612496275, "loss": 0.8825, "step": 2490 }, { "epoch": 0.12416807390483758, "grad_norm": 0.251953125, "learning_rate": 0.000790070527465978, "loss": 0.9092, "step": 2500 }, { "epoch": 0.12466474620045694, "grad_norm": 0.263671875, "learning_rate": 0.0007900307936823284, "loss": 0.8917, "step": 2510 }, { "epoch": 0.1251614184960763, "grad_norm": 0.3125, "learning_rate": 0.0007899910598986789, "loss": 0.8767, "step": 2520 }, { "epoch": 0.12565809079169563, "grad_norm": 0.2353515625, "learning_rate": 0.0007899513261150294, "loss": 0.8803, "step": 2530 }, { "epoch": 0.126154763087315, "grad_norm": 0.2412109375, "learning_rate": 0.0007899115923313798, "loss": 0.895, "step": 2540 }, { "epoch": 0.12665143538293433, "grad_norm": 0.259765625, "learning_rate": 0.0007898718585477302, "loss": 0.9083, "step": 2550 }, { "epoch": 0.1271481076785537, "grad_norm": 0.240234375, "learning_rate": 0.0007898321247640807, "loss": 0.8967, "step": 2560 }, { "epoch": 0.12764477997417303, "grad_norm": 0.2373046875, "learning_rate": 0.0007897923909804312, "loss": 0.8881, "step": 2570 }, { "epoch": 0.1281414522697924, "grad_norm": 0.220703125, "learning_rate": 0.0007897526571967816, "loss": 0.9021, "step": 2580 }, { "epoch": 0.12863812456541174, "grad_norm": 0.306640625, "learning_rate": 0.000789712923413132, "loss": 0.8832, "step": 2590 }, { "epoch": 0.1291347968610311, "grad_norm": 0.255859375, "learning_rate": 0.0007896731896294825, "loss": 0.9154, "step": 2600 }, { "epoch": 0.12963146915665044, "grad_norm": 0.2255859375, "learning_rate": 0.000789633455845833, "loss": 0.8943, "step": 2610 }, { "epoch": 0.13012814145226978, "grad_norm": 0.248046875, "learning_rate": 0.0007895937220621834, "loss": 0.8884, "step": 2620 }, { "epoch": 0.13062481374788915, "grad_norm": 0.248046875, "learning_rate": 0.0007895539882785339, "loss": 0.8645, "step": 2630 }, { "epoch": 0.13112148604350848, "grad_norm": 0.26171875, "learning_rate": 0.0007895142544948843, "loss": 0.8769, "step": 2640 }, { "epoch": 0.13161815833912785, "grad_norm": 0.271484375, "learning_rate": 0.0007894745207112347, "loss": 0.9108, "step": 2650 }, { "epoch": 0.1321148306347472, "grad_norm": 0.267578125, "learning_rate": 0.0007894347869275853, "loss": 0.8442, "step": 2660 }, { "epoch": 0.13261150293036655, "grad_norm": 0.25, "learning_rate": 0.0007893950531439357, "loss": 0.8799, "step": 2670 }, { "epoch": 0.1331081752259859, "grad_norm": 0.2265625, "learning_rate": 0.0007893553193602861, "loss": 0.9038, "step": 2680 }, { "epoch": 0.13360484752160526, "grad_norm": 0.224609375, "learning_rate": 0.0007893155855766366, "loss": 0.8603, "step": 2690 }, { "epoch": 0.1341015198172246, "grad_norm": 0.22265625, "learning_rate": 0.000789275851792987, "loss": 0.8633, "step": 2700 }, { "epoch": 0.13459819211284393, "grad_norm": 0.234375, "learning_rate": 0.0007892361180093375, "loss": 0.8866, "step": 2710 }, { "epoch": 0.1350948644084633, "grad_norm": 0.25, "learning_rate": 0.000789196384225688, "loss": 0.873, "step": 2720 }, { "epoch": 0.13559153670408264, "grad_norm": 0.248046875, "learning_rate": 0.0007891566504420384, "loss": 0.85, "step": 2730 }, { "epoch": 0.136088208999702, "grad_norm": 0.23046875, "learning_rate": 0.0007891169166583888, "loss": 0.8394, "step": 2740 }, { "epoch": 0.13658488129532134, "grad_norm": 0.251953125, "learning_rate": 0.0007890771828747392, "loss": 0.8567, "step": 2750 }, { "epoch": 0.1370815535909407, "grad_norm": 0.265625, "learning_rate": 0.0007890374490910898, "loss": 0.9067, "step": 2760 }, { "epoch": 0.13757822588656005, "grad_norm": 0.25, "learning_rate": 0.0007889977153074402, "loss": 0.8816, "step": 2770 }, { "epoch": 0.1380748981821794, "grad_norm": 0.2265625, "learning_rate": 0.0007889579815237906, "loss": 0.8963, "step": 2780 }, { "epoch": 0.13857157047779875, "grad_norm": 0.251953125, "learning_rate": 0.0007889182477401411, "loss": 0.838, "step": 2790 }, { "epoch": 0.1390682427734181, "grad_norm": 0.212890625, "learning_rate": 0.0007888785139564915, "loss": 0.8586, "step": 2800 }, { "epoch": 0.13956491506903745, "grad_norm": 0.23046875, "learning_rate": 0.000788838780172842, "loss": 0.8659, "step": 2810 }, { "epoch": 0.1400615873646568, "grad_norm": 0.2421875, "learning_rate": 0.0007887990463891925, "loss": 0.9058, "step": 2820 }, { "epoch": 0.14055825966027616, "grad_norm": 0.228515625, "learning_rate": 0.0007887593126055429, "loss": 0.8714, "step": 2830 }, { "epoch": 0.1410549319558955, "grad_norm": 0.2294921875, "learning_rate": 0.0007887195788218933, "loss": 0.8725, "step": 2840 }, { "epoch": 0.14155160425151486, "grad_norm": 0.2353515625, "learning_rate": 0.0007886798450382438, "loss": 0.8797, "step": 2850 }, { "epoch": 0.1420482765471342, "grad_norm": 0.2294921875, "learning_rate": 0.0007886401112545943, "loss": 0.866, "step": 2860 }, { "epoch": 0.14254494884275354, "grad_norm": 0.2470703125, "learning_rate": 0.0007886003774709447, "loss": 0.8559, "step": 2870 }, { "epoch": 0.1430416211383729, "grad_norm": 0.25, "learning_rate": 0.0007885606436872952, "loss": 0.8683, "step": 2880 }, { "epoch": 0.14353829343399224, "grad_norm": 0.216796875, "learning_rate": 0.0007885209099036456, "loss": 0.8658, "step": 2890 }, { "epoch": 0.1440349657296116, "grad_norm": 0.234375, "learning_rate": 0.000788481176119996, "loss": 0.8778, "step": 2900 }, { "epoch": 0.14453163802523095, "grad_norm": 0.2255859375, "learning_rate": 0.0007884414423363466, "loss": 0.8682, "step": 2910 }, { "epoch": 0.1450283103208503, "grad_norm": 0.25390625, "learning_rate": 0.000788401708552697, "loss": 0.8524, "step": 2920 }, { "epoch": 0.14552498261646965, "grad_norm": 0.251953125, "learning_rate": 0.0007883619747690474, "loss": 0.8535, "step": 2930 }, { "epoch": 0.14602165491208902, "grad_norm": 0.216796875, "learning_rate": 0.0007883222409853979, "loss": 0.8414, "step": 2940 }, { "epoch": 0.14651832720770835, "grad_norm": 0.271484375, "learning_rate": 0.0007882825072017483, "loss": 0.8294, "step": 2950 }, { "epoch": 0.1470149995033277, "grad_norm": 0.22265625, "learning_rate": 0.0007882427734180987, "loss": 0.8701, "step": 2960 }, { "epoch": 0.14751167179894706, "grad_norm": 0.259765625, "learning_rate": 0.0007882030396344492, "loss": 0.8428, "step": 2970 }, { "epoch": 0.1480083440945664, "grad_norm": 0.2265625, "learning_rate": 0.0007881633058507997, "loss": 0.8341, "step": 2980 }, { "epoch": 0.14850501639018576, "grad_norm": 0.2578125, "learning_rate": 0.0007881235720671502, "loss": 0.839, "step": 2990 }, { "epoch": 0.1490016886858051, "grad_norm": 0.2333984375, "learning_rate": 0.0007880838382835005, "loss": 0.8262, "step": 3000 }, { "epoch": 0.14949836098142447, "grad_norm": 0.2265625, "learning_rate": 0.000788044104499851, "loss": 0.8626, "step": 3010 }, { "epoch": 0.1499950332770438, "grad_norm": 0.240234375, "learning_rate": 0.0007880043707162015, "loss": 0.8461, "step": 3020 }, { "epoch": 0.15049170557266317, "grad_norm": 0.208984375, "learning_rate": 0.0007879646369325519, "loss": 0.8441, "step": 3030 }, { "epoch": 0.1509883778682825, "grad_norm": 0.216796875, "learning_rate": 0.0007879249031489024, "loss": 0.8595, "step": 3040 }, { "epoch": 0.15148505016390185, "grad_norm": 0.265625, "learning_rate": 0.0007878851693652528, "loss": 0.877, "step": 3050 }, { "epoch": 0.1519817224595212, "grad_norm": 0.21875, "learning_rate": 0.0007878454355816032, "loss": 0.8823, "step": 3060 }, { "epoch": 0.15247839475514055, "grad_norm": 0.25390625, "learning_rate": 0.0007878057017979538, "loss": 0.8481, "step": 3070 }, { "epoch": 0.15297506705075992, "grad_norm": 0.23828125, "learning_rate": 0.0007877659680143042, "loss": 0.8838, "step": 3080 }, { "epoch": 0.15347173934637925, "grad_norm": 0.2392578125, "learning_rate": 0.0007877262342306546, "loss": 0.8341, "step": 3090 }, { "epoch": 0.15396841164199862, "grad_norm": 0.2412109375, "learning_rate": 0.0007876865004470051, "loss": 0.8893, "step": 3100 }, { "epoch": 0.15446508393761796, "grad_norm": 0.2412109375, "learning_rate": 0.0007876467666633555, "loss": 0.8442, "step": 3110 }, { "epoch": 0.1549617562332373, "grad_norm": 0.26171875, "learning_rate": 0.000787607032879706, "loss": 0.837, "step": 3120 }, { "epoch": 0.15545842852885666, "grad_norm": 0.2197265625, "learning_rate": 0.0007875672990960565, "loss": 0.8601, "step": 3130 }, { "epoch": 0.155955100824476, "grad_norm": 0.216796875, "learning_rate": 0.0007875275653124069, "loss": 0.8514, "step": 3140 }, { "epoch": 0.15645177312009537, "grad_norm": 0.2451171875, "learning_rate": 0.0007874878315287574, "loss": 0.886, "step": 3150 }, { "epoch": 0.1569484454157147, "grad_norm": 0.212890625, "learning_rate": 0.0007874480977451077, "loss": 0.8572, "step": 3160 }, { "epoch": 0.15744511771133407, "grad_norm": 0.2294921875, "learning_rate": 0.0007874083639614583, "loss": 0.862, "step": 3170 }, { "epoch": 0.1579417900069534, "grad_norm": 0.2138671875, "learning_rate": 0.0007873686301778088, "loss": 0.8112, "step": 3180 }, { "epoch": 0.15843846230257277, "grad_norm": 0.2470703125, "learning_rate": 0.0007873288963941591, "loss": 0.8726, "step": 3190 }, { "epoch": 0.1589351345981921, "grad_norm": 0.2197265625, "learning_rate": 0.0007872891626105096, "loss": 0.8618, "step": 3200 }, { "epoch": 0.15943180689381145, "grad_norm": 0.228515625, "learning_rate": 0.0007872494288268602, "loss": 0.8528, "step": 3210 }, { "epoch": 0.15992847918943082, "grad_norm": 0.24609375, "learning_rate": 0.0007872096950432105, "loss": 0.8395, "step": 3220 }, { "epoch": 0.16042515148505015, "grad_norm": 0.25, "learning_rate": 0.000787169961259561, "loss": 0.827, "step": 3230 }, { "epoch": 0.16092182378066952, "grad_norm": 0.232421875, "learning_rate": 0.0007871302274759114, "loss": 0.8478, "step": 3240 }, { "epoch": 0.16141849607628886, "grad_norm": 0.236328125, "learning_rate": 0.0007870904936922618, "loss": 0.8247, "step": 3250 }, { "epoch": 0.16191516837190822, "grad_norm": 0.2275390625, "learning_rate": 0.0007870507599086123, "loss": 0.8557, "step": 3260 }, { "epoch": 0.16241184066752756, "grad_norm": 0.2265625, "learning_rate": 0.0007870110261249628, "loss": 0.8532, "step": 3270 }, { "epoch": 0.16290851296314693, "grad_norm": 0.23828125, "learning_rate": 0.0007869712923413133, "loss": 0.8545, "step": 3280 }, { "epoch": 0.16340518525876627, "grad_norm": 0.2578125, "learning_rate": 0.0007869315585576637, "loss": 0.8675, "step": 3290 }, { "epoch": 0.1639018575543856, "grad_norm": 0.203125, "learning_rate": 0.0007868918247740141, "loss": 0.8209, "step": 3300 }, { "epoch": 0.16439852985000497, "grad_norm": 0.2265625, "learning_rate": 0.0007868520909903647, "loss": 0.8332, "step": 3310 }, { "epoch": 0.1648952021456243, "grad_norm": 0.212890625, "learning_rate": 0.0007868123572067151, "loss": 0.8373, "step": 3320 }, { "epoch": 0.16539187444124367, "grad_norm": 0.2333984375, "learning_rate": 0.0007867726234230655, "loss": 0.862, "step": 3330 }, { "epoch": 0.165888546736863, "grad_norm": 0.2060546875, "learning_rate": 0.000786732889639416, "loss": 0.8397, "step": 3340 }, { "epoch": 0.16638521903248238, "grad_norm": 0.248046875, "learning_rate": 0.0007866931558557663, "loss": 0.8448, "step": 3350 }, { "epoch": 0.16688189132810172, "grad_norm": 0.2294921875, "learning_rate": 0.0007866534220721168, "loss": 0.8382, "step": 3360 }, { "epoch": 0.16737856362372106, "grad_norm": 0.23046875, "learning_rate": 0.0007866136882884674, "loss": 0.837, "step": 3370 }, { "epoch": 0.16787523591934042, "grad_norm": 0.2294921875, "learning_rate": 0.0007865739545048177, "loss": 0.8321, "step": 3380 }, { "epoch": 0.16837190821495976, "grad_norm": 0.2255859375, "learning_rate": 0.0007865342207211682, "loss": 0.8347, "step": 3390 }, { "epoch": 0.16886858051057912, "grad_norm": 0.26953125, "learning_rate": 0.0007864944869375187, "loss": 0.879, "step": 3400 }, { "epoch": 0.16936525280619846, "grad_norm": 0.2021484375, "learning_rate": 0.000786454753153869, "loss": 0.8377, "step": 3410 }, { "epoch": 0.16986192510181783, "grad_norm": 0.2138671875, "learning_rate": 0.0007864150193702196, "loss": 0.7883, "step": 3420 }, { "epoch": 0.17035859739743717, "grad_norm": 0.228515625, "learning_rate": 0.00078637528558657, "loss": 0.8456, "step": 3430 }, { "epoch": 0.17085526969305653, "grad_norm": 0.26171875, "learning_rate": 0.0007863355518029205, "loss": 0.8309, "step": 3440 }, { "epoch": 0.17135194198867587, "grad_norm": 0.2314453125, "learning_rate": 0.0007862958180192709, "loss": 0.8715, "step": 3450 }, { "epoch": 0.1718486142842952, "grad_norm": 0.2197265625, "learning_rate": 0.0007862560842356213, "loss": 0.8108, "step": 3460 }, { "epoch": 0.17234528657991458, "grad_norm": 0.2216796875, "learning_rate": 0.0007862163504519719, "loss": 0.8348, "step": 3470 }, { "epoch": 0.1728419588755339, "grad_norm": 0.2138671875, "learning_rate": 0.0007861766166683223, "loss": 0.8579, "step": 3480 }, { "epoch": 0.17333863117115328, "grad_norm": 0.201171875, "learning_rate": 0.0007861368828846727, "loss": 0.8149, "step": 3490 }, { "epoch": 0.17383530346677262, "grad_norm": 0.2412109375, "learning_rate": 0.0007860971491010232, "loss": 0.8031, "step": 3500 }, { "epoch": 0.17433197576239198, "grad_norm": 0.2421875, "learning_rate": 0.0007860574153173736, "loss": 0.8666, "step": 3510 }, { "epoch": 0.17482864805801132, "grad_norm": 0.2060546875, "learning_rate": 0.0007860176815337241, "loss": 0.8271, "step": 3520 }, { "epoch": 0.1753253203536307, "grad_norm": 0.220703125, "learning_rate": 0.0007859779477500746, "loss": 0.8059, "step": 3530 }, { "epoch": 0.17582199264925003, "grad_norm": 0.2001953125, "learning_rate": 0.000785938213966425, "loss": 0.8332, "step": 3540 }, { "epoch": 0.17631866494486936, "grad_norm": 0.2412109375, "learning_rate": 0.0007858984801827754, "loss": 0.827, "step": 3550 }, { "epoch": 0.17681533724048873, "grad_norm": 0.23046875, "learning_rate": 0.000785858746399126, "loss": 0.8217, "step": 3560 }, { "epoch": 0.17731200953610807, "grad_norm": 0.2265625, "learning_rate": 0.0007858190126154763, "loss": 0.8426, "step": 3570 }, { "epoch": 0.17780868183172743, "grad_norm": 0.2294921875, "learning_rate": 0.0007857792788318268, "loss": 0.8383, "step": 3580 }, { "epoch": 0.17830535412734677, "grad_norm": 0.2060546875, "learning_rate": 0.0007857395450481773, "loss": 0.8361, "step": 3590 }, { "epoch": 0.17880202642296614, "grad_norm": 0.2060546875, "learning_rate": 0.0007856998112645277, "loss": 0.815, "step": 3600 }, { "epoch": 0.17929869871858548, "grad_norm": 0.2099609375, "learning_rate": 0.0007856600774808781, "loss": 0.822, "step": 3610 }, { "epoch": 0.17979537101420484, "grad_norm": 0.25390625, "learning_rate": 0.0007856203436972286, "loss": 0.8272, "step": 3620 }, { "epoch": 0.18029204330982418, "grad_norm": 0.251953125, "learning_rate": 0.0007855806099135791, "loss": 0.8239, "step": 3630 }, { "epoch": 0.18078871560544352, "grad_norm": 0.2138671875, "learning_rate": 0.0007855408761299295, "loss": 0.8254, "step": 3640 }, { "epoch": 0.18128538790106288, "grad_norm": 0.236328125, "learning_rate": 0.0007855011423462799, "loss": 0.8422, "step": 3650 }, { "epoch": 0.18178206019668222, "grad_norm": 0.26171875, "learning_rate": 0.0007854614085626304, "loss": 0.8412, "step": 3660 }, { "epoch": 0.1822787324923016, "grad_norm": 0.2373046875, "learning_rate": 0.0007854216747789809, "loss": 0.8247, "step": 3670 }, { "epoch": 0.18277540478792093, "grad_norm": 0.20703125, "learning_rate": 0.0007853819409953313, "loss": 0.8094, "step": 3680 }, { "epoch": 0.1832720770835403, "grad_norm": 0.267578125, "learning_rate": 0.0007853422072116818, "loss": 0.8558, "step": 3690 }, { "epoch": 0.18376874937915963, "grad_norm": 0.2333984375, "learning_rate": 0.0007853024734280322, "loss": 0.8463, "step": 3700 }, { "epoch": 0.18426542167477897, "grad_norm": 0.2412109375, "learning_rate": 0.0007852627396443826, "loss": 0.834, "step": 3710 }, { "epoch": 0.18476209397039833, "grad_norm": 0.2373046875, "learning_rate": 0.0007852230058607332, "loss": 0.8033, "step": 3720 }, { "epoch": 0.18525876626601767, "grad_norm": 0.1982421875, "learning_rate": 0.0007851832720770836, "loss": 0.8285, "step": 3730 }, { "epoch": 0.18575543856163704, "grad_norm": 0.2353515625, "learning_rate": 0.000785143538293434, "loss": 0.7968, "step": 3740 }, { "epoch": 0.18625211085725638, "grad_norm": 0.2451171875, "learning_rate": 0.0007851038045097845, "loss": 0.823, "step": 3750 }, { "epoch": 0.18674878315287574, "grad_norm": 0.2470703125, "learning_rate": 0.0007850640707261349, "loss": 0.8429, "step": 3760 }, { "epoch": 0.18724545544849508, "grad_norm": 0.2373046875, "learning_rate": 0.0007850243369424854, "loss": 0.8029, "step": 3770 }, { "epoch": 0.18774212774411445, "grad_norm": 0.216796875, "learning_rate": 0.0007849846031588359, "loss": 0.7974, "step": 3780 }, { "epoch": 0.18823880003973378, "grad_norm": 0.2119140625, "learning_rate": 0.0007849448693751863, "loss": 0.8167, "step": 3790 }, { "epoch": 0.18873547233535312, "grad_norm": 0.216796875, "learning_rate": 0.0007849051355915367, "loss": 0.8322, "step": 3800 }, { "epoch": 0.1892321446309725, "grad_norm": 0.23046875, "learning_rate": 0.0007848654018078872, "loss": 0.8332, "step": 3810 }, { "epoch": 0.18972881692659183, "grad_norm": 0.2041015625, "learning_rate": 0.0007848256680242377, "loss": 0.8058, "step": 3820 }, { "epoch": 0.1902254892222112, "grad_norm": 0.2197265625, "learning_rate": 0.0007847859342405881, "loss": 0.8551, "step": 3830 }, { "epoch": 0.19072216151783053, "grad_norm": 0.2099609375, "learning_rate": 0.0007847462004569385, "loss": 0.8265, "step": 3840 }, { "epoch": 0.1912188338134499, "grad_norm": 0.2421875, "learning_rate": 0.000784706466673289, "loss": 0.8347, "step": 3850 }, { "epoch": 0.19171550610906923, "grad_norm": 0.216796875, "learning_rate": 0.0007846667328896394, "loss": 0.803, "step": 3860 }, { "epoch": 0.1922121784046886, "grad_norm": 0.2275390625, "learning_rate": 0.0007846269991059899, "loss": 0.8361, "step": 3870 }, { "epoch": 0.19270885070030794, "grad_norm": 0.2392578125, "learning_rate": 0.0007845872653223404, "loss": 0.8125, "step": 3880 }, { "epoch": 0.19320552299592728, "grad_norm": 0.197265625, "learning_rate": 0.0007845475315386908, "loss": 0.8293, "step": 3890 }, { "epoch": 0.19370219529154664, "grad_norm": 0.205078125, "learning_rate": 0.0007845077977550412, "loss": 0.8149, "step": 3900 }, { "epoch": 0.19419886758716598, "grad_norm": 0.2265625, "learning_rate": 0.0007844680639713917, "loss": 0.8187, "step": 3910 }, { "epoch": 0.19469553988278535, "grad_norm": 0.251953125, "learning_rate": 0.0007844283301877422, "loss": 0.8111, "step": 3920 }, { "epoch": 0.19519221217840468, "grad_norm": 0.21875, "learning_rate": 0.0007843885964040926, "loss": 0.8085, "step": 3930 }, { "epoch": 0.19568888447402405, "grad_norm": 0.2294921875, "learning_rate": 0.0007843488626204431, "loss": 0.8489, "step": 3940 }, { "epoch": 0.1961855567696434, "grad_norm": 0.2041015625, "learning_rate": 0.0007843091288367935, "loss": 0.8231, "step": 3950 }, { "epoch": 0.19668222906526273, "grad_norm": 0.2109375, "learning_rate": 0.0007842693950531439, "loss": 0.8353, "step": 3960 }, { "epoch": 0.1971789013608821, "grad_norm": 0.2314453125, "learning_rate": 0.0007842296612694945, "loss": 0.8116, "step": 3970 }, { "epoch": 0.19767557365650143, "grad_norm": 0.2451171875, "learning_rate": 0.0007841899274858449, "loss": 0.8392, "step": 3980 }, { "epoch": 0.1981722459521208, "grad_norm": 0.21484375, "learning_rate": 0.0007841501937021953, "loss": 0.8145, "step": 3990 }, { "epoch": 0.19866891824774013, "grad_norm": 0.1953125, "learning_rate": 0.0007841104599185458, "loss": 0.8646, "step": 4000 }, { "epoch": 0.1991655905433595, "grad_norm": 0.2216796875, "learning_rate": 0.0007840707261348962, "loss": 0.792, "step": 4010 }, { "epoch": 0.19966226283897884, "grad_norm": 0.19921875, "learning_rate": 0.0007840309923512468, "loss": 0.7947, "step": 4020 }, { "epoch": 0.2001589351345982, "grad_norm": 0.216796875, "learning_rate": 0.0007839912585675971, "loss": 0.7848, "step": 4030 }, { "epoch": 0.20065560743021754, "grad_norm": 0.21875, "learning_rate": 0.0007839515247839476, "loss": 0.8112, "step": 4040 }, { "epoch": 0.20115227972583688, "grad_norm": 0.23046875, "learning_rate": 0.0007839117910002981, "loss": 0.8311, "step": 4050 }, { "epoch": 0.20164895202145625, "grad_norm": 0.2021484375, "learning_rate": 0.0007838720572166484, "loss": 0.7896, "step": 4060 }, { "epoch": 0.20214562431707558, "grad_norm": 0.21875, "learning_rate": 0.000783832323432999, "loss": 0.7951, "step": 4070 }, { "epoch": 0.20264229661269495, "grad_norm": 0.228515625, "learning_rate": 0.0007837925896493495, "loss": 0.757, "step": 4080 }, { "epoch": 0.2031389689083143, "grad_norm": 0.2158203125, "learning_rate": 0.0007837528558656998, "loss": 0.7988, "step": 4090 }, { "epoch": 0.20363564120393365, "grad_norm": 0.19140625, "learning_rate": 0.0007837131220820503, "loss": 0.8171, "step": 4100 }, { "epoch": 0.204132313499553, "grad_norm": 0.2275390625, "learning_rate": 0.0007836733882984007, "loss": 0.806, "step": 4110 }, { "epoch": 0.20462898579517236, "grad_norm": 0.197265625, "learning_rate": 0.0007836336545147512, "loss": 0.8052, "step": 4120 }, { "epoch": 0.2051256580907917, "grad_norm": 0.2080078125, "learning_rate": 0.0007835939207311017, "loss": 0.814, "step": 4130 }, { "epoch": 0.20562233038641103, "grad_norm": 0.2099609375, "learning_rate": 0.0007835541869474521, "loss": 0.8323, "step": 4140 }, { "epoch": 0.2061190026820304, "grad_norm": 0.1953125, "learning_rate": 0.0007835144531638025, "loss": 0.7967, "step": 4150 }, { "epoch": 0.20661567497764974, "grad_norm": 0.197265625, "learning_rate": 0.000783474719380153, "loss": 0.7966, "step": 4160 }, { "epoch": 0.2071123472732691, "grad_norm": 0.205078125, "learning_rate": 0.0007834349855965035, "loss": 0.8208, "step": 4170 }, { "epoch": 0.20760901956888844, "grad_norm": 0.23046875, "learning_rate": 0.000783395251812854, "loss": 0.8258, "step": 4180 }, { "epoch": 0.2081056918645078, "grad_norm": 0.251953125, "learning_rate": 0.0007833555180292044, "loss": 0.8061, "step": 4190 }, { "epoch": 0.20860236416012715, "grad_norm": 0.1953125, "learning_rate": 0.0007833157842455548, "loss": 0.824, "step": 4200 }, { "epoch": 0.2090990364557465, "grad_norm": 0.2236328125, "learning_rate": 0.0007832760504619053, "loss": 0.8106, "step": 4210 }, { "epoch": 0.20959570875136585, "grad_norm": 0.23046875, "learning_rate": 0.0007832363166782556, "loss": 0.8123, "step": 4220 }, { "epoch": 0.2100923810469852, "grad_norm": 0.2138671875, "learning_rate": 0.0007831965828946062, "loss": 0.863, "step": 4230 }, { "epoch": 0.21058905334260455, "grad_norm": 0.25, "learning_rate": 0.0007831568491109567, "loss": 0.8205, "step": 4240 }, { "epoch": 0.2110857256382239, "grad_norm": 0.2236328125, "learning_rate": 0.000783117115327307, "loss": 0.8325, "step": 4250 }, { "epoch": 0.21158239793384326, "grad_norm": 0.2412109375, "learning_rate": 0.0007830773815436575, "loss": 0.8257, "step": 4260 }, { "epoch": 0.2120790702294626, "grad_norm": 0.2041015625, "learning_rate": 0.0007830376477600081, "loss": 0.8101, "step": 4270 }, { "epoch": 0.21257574252508196, "grad_norm": 0.2197265625, "learning_rate": 0.0007829979139763584, "loss": 0.8094, "step": 4280 }, { "epoch": 0.2130724148207013, "grad_norm": 0.19921875, "learning_rate": 0.0007829581801927089, "loss": 0.8194, "step": 4290 }, { "epoch": 0.21356908711632064, "grad_norm": 0.234375, "learning_rate": 0.0007829184464090593, "loss": 0.8438, "step": 4300 }, { "epoch": 0.21406575941194, "grad_norm": 0.248046875, "learning_rate": 0.0007828787126254097, "loss": 0.8385, "step": 4310 }, { "epoch": 0.21456243170755934, "grad_norm": 0.1953125, "learning_rate": 0.0007828389788417603, "loss": 0.8205, "step": 4320 }, { "epoch": 0.2150591040031787, "grad_norm": 0.2099609375, "learning_rate": 0.0007827992450581107, "loss": 0.7999, "step": 4330 }, { "epoch": 0.21555577629879805, "grad_norm": 0.2099609375, "learning_rate": 0.0007827595112744612, "loss": 0.7847, "step": 4340 }, { "epoch": 0.2160524485944174, "grad_norm": 0.2001953125, "learning_rate": 0.0007827197774908116, "loss": 0.7887, "step": 4350 }, { "epoch": 0.21654912089003675, "grad_norm": 0.232421875, "learning_rate": 0.000782680043707162, "loss": 0.8135, "step": 4360 }, { "epoch": 0.21704579318565612, "grad_norm": 0.255859375, "learning_rate": 0.0007826403099235126, "loss": 0.8124, "step": 4370 }, { "epoch": 0.21754246548127545, "grad_norm": 0.208984375, "learning_rate": 0.000782600576139863, "loss": 0.7823, "step": 4380 }, { "epoch": 0.2180391377768948, "grad_norm": 0.1953125, "learning_rate": 0.0007825608423562134, "loss": 0.8115, "step": 4390 }, { "epoch": 0.21853581007251416, "grad_norm": 0.2001953125, "learning_rate": 0.0007825211085725639, "loss": 0.8017, "step": 4400 }, { "epoch": 0.2190324823681335, "grad_norm": 0.2236328125, "learning_rate": 0.0007824813747889143, "loss": 0.7821, "step": 4410 }, { "epoch": 0.21952915466375286, "grad_norm": 0.2041015625, "learning_rate": 0.0007824416410052647, "loss": 0.8001, "step": 4420 }, { "epoch": 0.2200258269593722, "grad_norm": 0.2158203125, "learning_rate": 0.0007824019072216153, "loss": 0.7993, "step": 4430 }, { "epoch": 0.22052249925499157, "grad_norm": 0.2099609375, "learning_rate": 0.0007823621734379656, "loss": 0.8269, "step": 4440 }, { "epoch": 0.2210191715506109, "grad_norm": 0.2451171875, "learning_rate": 0.0007823224396543161, "loss": 0.8173, "step": 4450 }, { "epoch": 0.22151584384623027, "grad_norm": 0.2177734375, "learning_rate": 0.0007822827058706666, "loss": 0.7898, "step": 4460 }, { "epoch": 0.2220125161418496, "grad_norm": 0.236328125, "learning_rate": 0.000782242972087017, "loss": 0.8034, "step": 4470 }, { "epoch": 0.22250918843746895, "grad_norm": 0.228515625, "learning_rate": 0.0007822032383033675, "loss": 0.7929, "step": 4480 }, { "epoch": 0.2230058607330883, "grad_norm": 0.2265625, "learning_rate": 0.0007821635045197179, "loss": 0.7875, "step": 4490 }, { "epoch": 0.22350253302870765, "grad_norm": 0.212890625, "learning_rate": 0.0007821237707360684, "loss": 0.7699, "step": 4500 }, { "epoch": 0.22399920532432702, "grad_norm": 0.2080078125, "learning_rate": 0.0007820840369524188, "loss": 0.815, "step": 4510 }, { "epoch": 0.22449587761994635, "grad_norm": 0.2060546875, "learning_rate": 0.0007820443031687692, "loss": 0.7915, "step": 4520 }, { "epoch": 0.22499254991556572, "grad_norm": 0.197265625, "learning_rate": 0.0007820045693851198, "loss": 0.8077, "step": 4530 }, { "epoch": 0.22548922221118506, "grad_norm": 0.2216796875, "learning_rate": 0.0007819648356014702, "loss": 0.7716, "step": 4540 }, { "epoch": 0.2259858945068044, "grad_norm": 0.2001953125, "learning_rate": 0.0007819251018178206, "loss": 0.7838, "step": 4550 }, { "epoch": 0.22648256680242376, "grad_norm": 0.2197265625, "learning_rate": 0.0007818853680341711, "loss": 0.8343, "step": 4560 }, { "epoch": 0.2269792390980431, "grad_norm": 0.2373046875, "learning_rate": 0.0007818456342505215, "loss": 0.7671, "step": 4570 }, { "epoch": 0.22747591139366247, "grad_norm": 0.2060546875, "learning_rate": 0.000781805900466872, "loss": 0.7987, "step": 4580 }, { "epoch": 0.2279725836892818, "grad_norm": 0.212890625, "learning_rate": 0.0007817661666832225, "loss": 0.7885, "step": 4590 }, { "epoch": 0.22846925598490117, "grad_norm": 0.208984375, "learning_rate": 0.0007817264328995729, "loss": 0.7943, "step": 4600 }, { "epoch": 0.2289659282805205, "grad_norm": 0.20703125, "learning_rate": 0.0007816866991159233, "loss": 0.8023, "step": 4610 }, { "epoch": 0.22946260057613987, "grad_norm": 0.2373046875, "learning_rate": 0.0007816469653322739, "loss": 0.7773, "step": 4620 }, { "epoch": 0.2299592728717592, "grad_norm": 0.232421875, "learning_rate": 0.0007816072315486243, "loss": 0.8065, "step": 4630 }, { "epoch": 0.23045594516737855, "grad_norm": 0.19921875, "learning_rate": 0.0007815674977649747, "loss": 0.7635, "step": 4640 }, { "epoch": 0.23095261746299792, "grad_norm": 0.1845703125, "learning_rate": 0.0007815277639813252, "loss": 0.7777, "step": 4650 }, { "epoch": 0.23144928975861725, "grad_norm": 0.201171875, "learning_rate": 0.0007814880301976756, "loss": 0.7775, "step": 4660 }, { "epoch": 0.23194596205423662, "grad_norm": 0.2265625, "learning_rate": 0.000781448296414026, "loss": 0.8366, "step": 4670 }, { "epoch": 0.23244263434985596, "grad_norm": 0.2060546875, "learning_rate": 0.0007814085626303766, "loss": 0.7759, "step": 4680 }, { "epoch": 0.23293930664547532, "grad_norm": 0.2060546875, "learning_rate": 0.000781368828846727, "loss": 0.7545, "step": 4690 }, { "epoch": 0.23343597894109466, "grad_norm": 0.205078125, "learning_rate": 0.0007813290950630774, "loss": 0.8055, "step": 4700 }, { "epoch": 0.23393265123671403, "grad_norm": 0.2021484375, "learning_rate": 0.0007812893612794278, "loss": 0.7886, "step": 4710 }, { "epoch": 0.23442932353233337, "grad_norm": 0.189453125, "learning_rate": 0.0007812496274957783, "loss": 0.8074, "step": 4720 }, { "epoch": 0.2349259958279527, "grad_norm": 0.2119140625, "learning_rate": 0.0007812098937121288, "loss": 0.8098, "step": 4730 }, { "epoch": 0.23542266812357207, "grad_norm": 0.1845703125, "learning_rate": 0.0007811701599284792, "loss": 0.8287, "step": 4740 }, { "epoch": 0.2359193404191914, "grad_norm": 0.2158203125, "learning_rate": 0.0007811304261448297, "loss": 0.7923, "step": 4750 }, { "epoch": 0.23641601271481077, "grad_norm": 0.216796875, "learning_rate": 0.0007810906923611801, "loss": 0.8065, "step": 4760 }, { "epoch": 0.2369126850104301, "grad_norm": 0.20703125, "learning_rate": 0.0007810509585775305, "loss": 0.7706, "step": 4770 }, { "epoch": 0.23740935730604948, "grad_norm": 0.208984375, "learning_rate": 0.0007810112247938811, "loss": 0.8026, "step": 4780 }, { "epoch": 0.23790602960166882, "grad_norm": 0.2294921875, "learning_rate": 0.0007809714910102315, "loss": 0.8084, "step": 4790 }, { "epoch": 0.23840270189728818, "grad_norm": 0.21875, "learning_rate": 0.0007809317572265819, "loss": 0.7945, "step": 4800 }, { "epoch": 0.23889937419290752, "grad_norm": 0.2138671875, "learning_rate": 0.0007808920234429324, "loss": 0.8042, "step": 4810 }, { "epoch": 0.23939604648852686, "grad_norm": 0.1953125, "learning_rate": 0.0007808522896592828, "loss": 0.7626, "step": 4820 }, { "epoch": 0.23989271878414622, "grad_norm": 0.193359375, "learning_rate": 0.0007808125558756333, "loss": 0.7616, "step": 4830 }, { "epoch": 0.24038939107976556, "grad_norm": 0.1962890625, "learning_rate": 0.0007807728220919838, "loss": 0.7974, "step": 4840 }, { "epoch": 0.24088606337538493, "grad_norm": 0.2060546875, "learning_rate": 0.0007807330883083342, "loss": 0.8034, "step": 4850 }, { "epoch": 0.24138273567100427, "grad_norm": 0.2216796875, "learning_rate": 0.0007806933545246846, "loss": 0.8325, "step": 4860 }, { "epoch": 0.24187940796662363, "grad_norm": 0.1962890625, "learning_rate": 0.0007806536207410351, "loss": 0.8055, "step": 4870 }, { "epoch": 0.24237608026224297, "grad_norm": 0.20703125, "learning_rate": 0.0007806138869573856, "loss": 0.7872, "step": 4880 }, { "epoch": 0.2428727525578623, "grad_norm": 0.2275390625, "learning_rate": 0.000780574153173736, "loss": 0.8107, "step": 4890 }, { "epoch": 0.24336942485348168, "grad_norm": 0.197265625, "learning_rate": 0.0007805344193900864, "loss": 0.7817, "step": 4900 }, { "epoch": 0.243866097149101, "grad_norm": 0.2021484375, "learning_rate": 0.0007804946856064369, "loss": 0.8126, "step": 4910 }, { "epoch": 0.24436276944472038, "grad_norm": 0.1875, "learning_rate": 0.0007804549518227874, "loss": 0.7746, "step": 4920 }, { "epoch": 0.24485944174033972, "grad_norm": 0.2119140625, "learning_rate": 0.0007804152180391378, "loss": 0.7759, "step": 4930 }, { "epoch": 0.24535611403595908, "grad_norm": 0.203125, "learning_rate": 0.0007803754842554883, "loss": 0.77, "step": 4940 }, { "epoch": 0.24585278633157842, "grad_norm": 0.181640625, "learning_rate": 0.0007803357504718388, "loss": 0.7883, "step": 4950 }, { "epoch": 0.2463494586271978, "grad_norm": 0.19921875, "learning_rate": 0.0007802960166881891, "loss": 0.7896, "step": 4960 }, { "epoch": 0.24684613092281713, "grad_norm": 0.208984375, "learning_rate": 0.0007802562829045396, "loss": 0.796, "step": 4970 }, { "epoch": 0.24734280321843646, "grad_norm": 0.1962890625, "learning_rate": 0.0007802165491208901, "loss": 0.8069, "step": 4980 }, { "epoch": 0.24783947551405583, "grad_norm": 0.220703125, "learning_rate": 0.0007801768153372405, "loss": 0.8263, "step": 4990 }, { "epoch": 0.24833614780967517, "grad_norm": 0.2275390625, "learning_rate": 0.000780137081553591, "loss": 0.7691, "step": 5000 }, { "epoch": 0.24883282010529453, "grad_norm": 0.18359375, "learning_rate": 0.0007800973477699414, "loss": 0.7552, "step": 5010 }, { "epoch": 0.24932949240091387, "grad_norm": 0.205078125, "learning_rate": 0.0007800576139862918, "loss": 0.7764, "step": 5020 }, { "epoch": 0.24982616469653324, "grad_norm": 0.197265625, "learning_rate": 0.0007800178802026424, "loss": 0.7935, "step": 5030 }, { "epoch": 0.2503228369921526, "grad_norm": 0.2255859375, "learning_rate": 0.0007799781464189928, "loss": 0.7723, "step": 5040 }, { "epoch": 0.2508195092877719, "grad_norm": 0.1982421875, "learning_rate": 0.0007799384126353432, "loss": 0.7674, "step": 5050 }, { "epoch": 0.25131618158339125, "grad_norm": 0.1962890625, "learning_rate": 0.0007798986788516937, "loss": 0.7987, "step": 5060 }, { "epoch": 0.25181285387901065, "grad_norm": 0.19921875, "learning_rate": 0.0007798589450680441, "loss": 0.8071, "step": 5070 }, { "epoch": 0.25230952617463, "grad_norm": 0.203125, "learning_rate": 0.0007798192112843947, "loss": 0.8022, "step": 5080 }, { "epoch": 0.2528061984702493, "grad_norm": 0.2255859375, "learning_rate": 0.000779779477500745, "loss": 0.7887, "step": 5090 }, { "epoch": 0.25330287076586866, "grad_norm": 0.20703125, "learning_rate": 0.0007797397437170955, "loss": 0.7665, "step": 5100 }, { "epoch": 0.25379954306148805, "grad_norm": 0.1982421875, "learning_rate": 0.000779700009933446, "loss": 0.8016, "step": 5110 }, { "epoch": 0.2542962153571074, "grad_norm": 0.21484375, "learning_rate": 0.0007796602761497963, "loss": 0.7871, "step": 5120 }, { "epoch": 0.25479288765272673, "grad_norm": 0.2021484375, "learning_rate": 0.0007796205423661469, "loss": 0.8054, "step": 5130 }, { "epoch": 0.25528955994834607, "grad_norm": 0.208984375, "learning_rate": 0.0007795808085824974, "loss": 0.7859, "step": 5140 }, { "epoch": 0.2557862322439654, "grad_norm": 0.1953125, "learning_rate": 0.0007795410747988477, "loss": 0.7812, "step": 5150 }, { "epoch": 0.2562829045395848, "grad_norm": 0.181640625, "learning_rate": 0.0007795013410151982, "loss": 0.7794, "step": 5160 }, { "epoch": 0.25677957683520414, "grad_norm": 0.23046875, "learning_rate": 0.0007794616072315486, "loss": 0.7664, "step": 5170 }, { "epoch": 0.2572762491308235, "grad_norm": 0.1904296875, "learning_rate": 0.000779421873447899, "loss": 0.7864, "step": 5180 }, { "epoch": 0.2577729214264428, "grad_norm": 0.255859375, "learning_rate": 0.0007793821396642496, "loss": 0.8203, "step": 5190 }, { "epoch": 0.2582695937220622, "grad_norm": 0.1796875, "learning_rate": 0.0007793424058806, "loss": 0.7712, "step": 5200 }, { "epoch": 0.25876626601768155, "grad_norm": 0.259765625, "learning_rate": 0.0007793026720969504, "loss": 0.7664, "step": 5210 }, { "epoch": 0.2592629383133009, "grad_norm": 0.1953125, "learning_rate": 0.0007792629383133009, "loss": 0.7916, "step": 5220 }, { "epoch": 0.2597596106089202, "grad_norm": 0.2197265625, "learning_rate": 0.0007792232045296514, "loss": 0.7596, "step": 5230 }, { "epoch": 0.26025628290453956, "grad_norm": 0.2255859375, "learning_rate": 0.0007791834707460019, "loss": 0.7672, "step": 5240 }, { "epoch": 0.26075295520015895, "grad_norm": 0.1982421875, "learning_rate": 0.0007791437369623523, "loss": 0.7835, "step": 5250 }, { "epoch": 0.2612496274957783, "grad_norm": 0.208984375, "learning_rate": 0.0007791040031787027, "loss": 0.7874, "step": 5260 }, { "epoch": 0.26174629979139763, "grad_norm": 0.21875, "learning_rate": 0.0007790642693950532, "loss": 0.7597, "step": 5270 }, { "epoch": 0.26224297208701697, "grad_norm": 0.185546875, "learning_rate": 0.0007790245356114037, "loss": 0.7825, "step": 5280 }, { "epoch": 0.26273964438263636, "grad_norm": 0.2177734375, "learning_rate": 0.0007789848018277541, "loss": 0.8065, "step": 5290 }, { "epoch": 0.2632363166782557, "grad_norm": 0.21484375, "learning_rate": 0.0007789450680441046, "loss": 0.7338, "step": 5300 }, { "epoch": 0.26373298897387504, "grad_norm": 0.1923828125, "learning_rate": 0.0007789053342604549, "loss": 0.769, "step": 5310 }, { "epoch": 0.2642296612694944, "grad_norm": 0.193359375, "learning_rate": 0.0007788656004768054, "loss": 0.776, "step": 5320 }, { "epoch": 0.2647263335651137, "grad_norm": 0.189453125, "learning_rate": 0.000778825866693156, "loss": 0.8244, "step": 5330 }, { "epoch": 0.2652230058607331, "grad_norm": 0.1962890625, "learning_rate": 0.0007787861329095063, "loss": 0.7883, "step": 5340 }, { "epoch": 0.26571967815635245, "grad_norm": 0.212890625, "learning_rate": 0.0007787463991258568, "loss": 0.7951, "step": 5350 }, { "epoch": 0.2662163504519718, "grad_norm": 0.2060546875, "learning_rate": 0.0007787066653422072, "loss": 0.8077, "step": 5360 }, { "epoch": 0.2667130227475911, "grad_norm": 0.197265625, "learning_rate": 0.0007786669315585577, "loss": 0.7914, "step": 5370 }, { "epoch": 0.2672096950432105, "grad_norm": 0.19140625, "learning_rate": 0.0007786271977749082, "loss": 0.7563, "step": 5380 }, { "epoch": 0.26770636733882985, "grad_norm": 0.224609375, "learning_rate": 0.0007785874639912586, "loss": 0.7683, "step": 5390 }, { "epoch": 0.2682030396344492, "grad_norm": 0.1845703125, "learning_rate": 0.0007785477302076091, "loss": 0.7949, "step": 5400 }, { "epoch": 0.26869971193006853, "grad_norm": 0.18359375, "learning_rate": 0.0007785079964239595, "loss": 0.8195, "step": 5410 }, { "epoch": 0.26919638422568787, "grad_norm": 0.1962890625, "learning_rate": 0.0007784682626403099, "loss": 0.7995, "step": 5420 }, { "epoch": 0.26969305652130726, "grad_norm": 0.173828125, "learning_rate": 0.0007784285288566605, "loss": 0.7776, "step": 5430 }, { "epoch": 0.2701897288169266, "grad_norm": 0.19921875, "learning_rate": 0.0007783887950730109, "loss": 0.7899, "step": 5440 }, { "epoch": 0.27068640111254594, "grad_norm": 0.2080078125, "learning_rate": 0.0007783490612893613, "loss": 0.8031, "step": 5450 }, { "epoch": 0.2711830734081653, "grad_norm": 0.1962890625, "learning_rate": 0.0007783093275057118, "loss": 0.749, "step": 5460 }, { "epoch": 0.27167974570378467, "grad_norm": 0.224609375, "learning_rate": 0.0007782695937220622, "loss": 0.7685, "step": 5470 }, { "epoch": 0.272176417999404, "grad_norm": 0.216796875, "learning_rate": 0.0007782298599384127, "loss": 0.765, "step": 5480 }, { "epoch": 0.27267309029502335, "grad_norm": 0.2001953125, "learning_rate": 0.0007781901261547632, "loss": 0.7843, "step": 5490 }, { "epoch": 0.2731697625906427, "grad_norm": 0.2080078125, "learning_rate": 0.0007781503923711135, "loss": 0.7801, "step": 5500 }, { "epoch": 0.273666434886262, "grad_norm": 0.1865234375, "learning_rate": 0.000778110658587464, "loss": 0.7614, "step": 5510 }, { "epoch": 0.2741631071818814, "grad_norm": 0.2119140625, "learning_rate": 0.0007780709248038145, "loss": 0.8162, "step": 5520 }, { "epoch": 0.27465977947750075, "grad_norm": 0.1875, "learning_rate": 0.000778031191020165, "loss": 0.7625, "step": 5530 }, { "epoch": 0.2751564517731201, "grad_norm": 0.1943359375, "learning_rate": 0.0007779914572365154, "loss": 0.7837, "step": 5540 }, { "epoch": 0.27565312406873943, "grad_norm": 0.1953125, "learning_rate": 0.0007779517234528659, "loss": 0.7613, "step": 5550 }, { "epoch": 0.2761497963643588, "grad_norm": 0.244140625, "learning_rate": 0.0007779119896692163, "loss": 0.7512, "step": 5560 }, { "epoch": 0.27664646865997816, "grad_norm": 0.1865234375, "learning_rate": 0.0007778722558855667, "loss": 0.7763, "step": 5570 }, { "epoch": 0.2771431409555975, "grad_norm": 0.1982421875, "learning_rate": 0.0007778325221019171, "loss": 0.7752, "step": 5580 }, { "epoch": 0.27763981325121684, "grad_norm": 0.177734375, "learning_rate": 0.0007777927883182677, "loss": 0.8011, "step": 5590 }, { "epoch": 0.2781364855468362, "grad_norm": 0.224609375, "learning_rate": 0.0007777530545346181, "loss": 0.7834, "step": 5600 }, { "epoch": 0.27863315784245557, "grad_norm": 0.2119140625, "learning_rate": 0.0007777133207509685, "loss": 0.8148, "step": 5610 }, { "epoch": 0.2791298301380749, "grad_norm": 0.1953125, "learning_rate": 0.000777673586967319, "loss": 0.7647, "step": 5620 }, { "epoch": 0.27962650243369425, "grad_norm": 0.193359375, "learning_rate": 0.0007776338531836695, "loss": 0.7446, "step": 5630 }, { "epoch": 0.2801231747293136, "grad_norm": 0.2001953125, "learning_rate": 0.0007775941194000199, "loss": 0.7964, "step": 5640 }, { "epoch": 0.2806198470249329, "grad_norm": 0.197265625, "learning_rate": 0.0007775543856163704, "loss": 0.7563, "step": 5650 }, { "epoch": 0.2811165193205523, "grad_norm": 0.1845703125, "learning_rate": 0.0007775146518327208, "loss": 0.7848, "step": 5660 }, { "epoch": 0.28161319161617165, "grad_norm": 0.244140625, "learning_rate": 0.0007774749180490712, "loss": 0.7805, "step": 5670 }, { "epoch": 0.282109863911791, "grad_norm": 0.205078125, "learning_rate": 0.0007774351842654218, "loss": 0.7383, "step": 5680 }, { "epoch": 0.28260653620741033, "grad_norm": 0.2021484375, "learning_rate": 0.0007773954504817722, "loss": 0.7549, "step": 5690 }, { "epoch": 0.2831032085030297, "grad_norm": 0.2177734375, "learning_rate": 0.0007773557166981226, "loss": 0.7672, "step": 5700 }, { "epoch": 0.28359988079864906, "grad_norm": 0.21484375, "learning_rate": 0.0007773159829144731, "loss": 0.7659, "step": 5710 }, { "epoch": 0.2840965530942684, "grad_norm": 0.1982421875, "learning_rate": 0.0007772762491308235, "loss": 0.7696, "step": 5720 }, { "epoch": 0.28459322538988774, "grad_norm": 0.220703125, "learning_rate": 0.000777236515347174, "loss": 0.7976, "step": 5730 }, { "epoch": 0.2850898976855071, "grad_norm": 0.1826171875, "learning_rate": 0.0007771967815635245, "loss": 0.7625, "step": 5740 }, { "epoch": 0.28558656998112647, "grad_norm": 0.2021484375, "learning_rate": 0.0007771570477798749, "loss": 0.8234, "step": 5750 }, { "epoch": 0.2860832422767458, "grad_norm": 0.244140625, "learning_rate": 0.0007771173139962253, "loss": 0.7991, "step": 5760 }, { "epoch": 0.28657991457236515, "grad_norm": 0.1767578125, "learning_rate": 0.0007770775802125757, "loss": 0.7791, "step": 5770 }, { "epoch": 0.2870765868679845, "grad_norm": 0.2119140625, "learning_rate": 0.0007770378464289263, "loss": 0.8082, "step": 5780 }, { "epoch": 0.2875732591636039, "grad_norm": 0.193359375, "learning_rate": 0.0007769981126452767, "loss": 0.7729, "step": 5790 }, { "epoch": 0.2880699314592232, "grad_norm": 0.2041015625, "learning_rate": 0.0007769583788616271, "loss": 0.7724, "step": 5800 }, { "epoch": 0.28856660375484255, "grad_norm": 0.19140625, "learning_rate": 0.0007769186450779776, "loss": 0.7698, "step": 5810 }, { "epoch": 0.2890632760504619, "grad_norm": 0.20703125, "learning_rate": 0.000776878911294328, "loss": 0.7587, "step": 5820 }, { "epoch": 0.28955994834608123, "grad_norm": 0.1982421875, "learning_rate": 0.0007768391775106784, "loss": 0.7634, "step": 5830 }, { "epoch": 0.2900566206417006, "grad_norm": 0.2080078125, "learning_rate": 0.000776799443727029, "loss": 0.7557, "step": 5840 }, { "epoch": 0.29055329293731996, "grad_norm": 0.177734375, "learning_rate": 0.0007767597099433794, "loss": 0.7405, "step": 5850 }, { "epoch": 0.2910499652329393, "grad_norm": 0.18359375, "learning_rate": 0.0007767199761597298, "loss": 0.7826, "step": 5860 }, { "epoch": 0.29154663752855864, "grad_norm": 0.1884765625, "learning_rate": 0.0007766802423760803, "loss": 0.7608, "step": 5870 }, { "epoch": 0.29204330982417803, "grad_norm": 0.2236328125, "learning_rate": 0.0007766405085924307, "loss": 0.739, "step": 5880 }, { "epoch": 0.29253998211979737, "grad_norm": 0.19921875, "learning_rate": 0.0007766007748087812, "loss": 0.7819, "step": 5890 }, { "epoch": 0.2930366544154167, "grad_norm": 0.19921875, "learning_rate": 0.0007765610410251317, "loss": 0.7584, "step": 5900 }, { "epoch": 0.29353332671103605, "grad_norm": 0.24609375, "learning_rate": 0.0007765213072414821, "loss": 0.7396, "step": 5910 }, { "epoch": 0.2940299990066554, "grad_norm": 0.205078125, "learning_rate": 0.0007764815734578325, "loss": 0.7617, "step": 5920 }, { "epoch": 0.2945266713022748, "grad_norm": 0.2080078125, "learning_rate": 0.000776441839674183, "loss": 0.7464, "step": 5930 }, { "epoch": 0.2950233435978941, "grad_norm": 0.1962890625, "learning_rate": 0.0007764021058905335, "loss": 0.757, "step": 5940 }, { "epoch": 0.29552001589351345, "grad_norm": 0.1767578125, "learning_rate": 0.0007763623721068839, "loss": 0.7461, "step": 5950 }, { "epoch": 0.2960166881891328, "grad_norm": 0.1923828125, "learning_rate": 0.0007763226383232343, "loss": 0.77, "step": 5960 }, { "epoch": 0.2965133604847522, "grad_norm": 0.181640625, "learning_rate": 0.0007762829045395848, "loss": 0.7571, "step": 5970 }, { "epoch": 0.2970100327803715, "grad_norm": 0.2119140625, "learning_rate": 0.0007762431707559354, "loss": 0.7764, "step": 5980 }, { "epoch": 0.29750670507599086, "grad_norm": 0.208984375, "learning_rate": 0.0007762034369722857, "loss": 0.7419, "step": 5990 }, { "epoch": 0.2980033773716102, "grad_norm": 0.1962890625, "learning_rate": 0.0007761637031886362, "loss": 0.7873, "step": 6000 }, { "epoch": 0.29850004966722954, "grad_norm": 0.2001953125, "learning_rate": 0.0007761239694049867, "loss": 0.7525, "step": 6010 }, { "epoch": 0.29899672196284893, "grad_norm": 0.1953125, "learning_rate": 0.000776084235621337, "loss": 0.7782, "step": 6020 }, { "epoch": 0.29949339425846827, "grad_norm": 0.2216796875, "learning_rate": 0.0007760445018376875, "loss": 0.7807, "step": 6030 }, { "epoch": 0.2999900665540876, "grad_norm": 0.1943359375, "learning_rate": 0.000776004768054038, "loss": 0.7586, "step": 6040 }, { "epoch": 0.30048673884970695, "grad_norm": 0.169921875, "learning_rate": 0.0007759650342703884, "loss": 0.7722, "step": 6050 }, { "epoch": 0.30098341114532634, "grad_norm": 0.181640625, "learning_rate": 0.0007759253004867389, "loss": 0.7659, "step": 6060 }, { "epoch": 0.3014800834409457, "grad_norm": 0.1787109375, "learning_rate": 0.0007758855667030893, "loss": 0.7636, "step": 6070 }, { "epoch": 0.301976755736565, "grad_norm": 0.1953125, "learning_rate": 0.0007758458329194397, "loss": 0.7604, "step": 6080 }, { "epoch": 0.30247342803218435, "grad_norm": 0.177734375, "learning_rate": 0.0007758060991357903, "loss": 0.7528, "step": 6090 }, { "epoch": 0.3029701003278037, "grad_norm": 0.189453125, "learning_rate": 0.0007757663653521407, "loss": 0.7645, "step": 6100 }, { "epoch": 0.3034667726234231, "grad_norm": 0.201171875, "learning_rate": 0.0007757266315684912, "loss": 0.7779, "step": 6110 }, { "epoch": 0.3039634449190424, "grad_norm": 0.2265625, "learning_rate": 0.0007756868977848416, "loss": 0.758, "step": 6120 }, { "epoch": 0.30446011721466176, "grad_norm": 0.189453125, "learning_rate": 0.000775647164001192, "loss": 0.7451, "step": 6130 }, { "epoch": 0.3049567895102811, "grad_norm": 0.1875, "learning_rate": 0.0007756074302175426, "loss": 0.7465, "step": 6140 }, { "epoch": 0.30545346180590044, "grad_norm": 0.1796875, "learning_rate": 0.0007755676964338929, "loss": 0.7755, "step": 6150 }, { "epoch": 0.30595013410151983, "grad_norm": 0.234375, "learning_rate": 0.0007755279626502434, "loss": 0.7573, "step": 6160 }, { "epoch": 0.30644680639713917, "grad_norm": 0.1728515625, "learning_rate": 0.0007754882288665939, "loss": 0.7536, "step": 6170 }, { "epoch": 0.3069434786927585, "grad_norm": 0.19140625, "learning_rate": 0.0007754484950829442, "loss": 0.7855, "step": 6180 }, { "epoch": 0.30744015098837785, "grad_norm": 0.1865234375, "learning_rate": 0.0007754087612992948, "loss": 0.756, "step": 6190 }, { "epoch": 0.30793682328399724, "grad_norm": 0.197265625, "learning_rate": 0.0007753690275156453, "loss": 0.7682, "step": 6200 }, { "epoch": 0.3084334955796166, "grad_norm": 0.1962890625, "learning_rate": 0.0007753292937319956, "loss": 0.7463, "step": 6210 }, { "epoch": 0.3089301678752359, "grad_norm": 0.2080078125, "learning_rate": 0.0007752895599483461, "loss": 0.7839, "step": 6220 }, { "epoch": 0.30942684017085526, "grad_norm": 0.2080078125, "learning_rate": 0.0007752498261646965, "loss": 0.7485, "step": 6230 }, { "epoch": 0.3099235124664746, "grad_norm": 0.1826171875, "learning_rate": 0.000775210092381047, "loss": 0.7452, "step": 6240 }, { "epoch": 0.310420184762094, "grad_norm": 0.19140625, "learning_rate": 0.0007751703585973975, "loss": 0.7546, "step": 6250 }, { "epoch": 0.3109168570577133, "grad_norm": 0.1875, "learning_rate": 0.0007751306248137479, "loss": 0.7653, "step": 6260 }, { "epoch": 0.31141352935333266, "grad_norm": 0.185546875, "learning_rate": 0.0007750908910300984, "loss": 0.7725, "step": 6270 }, { "epoch": 0.311910201648952, "grad_norm": 0.1806640625, "learning_rate": 0.0007750511572464488, "loss": 0.7692, "step": 6280 }, { "epoch": 0.3124068739445714, "grad_norm": 0.29296875, "learning_rate": 0.0007750114234627993, "loss": 0.7469, "step": 6290 }, { "epoch": 0.31290354624019073, "grad_norm": 0.2001953125, "learning_rate": 0.0007749716896791498, "loss": 0.7579, "step": 6300 }, { "epoch": 0.31340021853581007, "grad_norm": 0.197265625, "learning_rate": 0.0007749319558955002, "loss": 0.7455, "step": 6310 }, { "epoch": 0.3138968908314294, "grad_norm": 0.2158203125, "learning_rate": 0.0007748922221118506, "loss": 0.7563, "step": 6320 }, { "epoch": 0.31439356312704875, "grad_norm": 0.1875, "learning_rate": 0.0007748524883282011, "loss": 0.7535, "step": 6330 }, { "epoch": 0.31489023542266814, "grad_norm": 0.1962890625, "learning_rate": 0.0007748127545445516, "loss": 0.7547, "step": 6340 }, { "epoch": 0.3153869077182875, "grad_norm": 0.1826171875, "learning_rate": 0.000774773020760902, "loss": 0.7392, "step": 6350 }, { "epoch": 0.3158835800139068, "grad_norm": 0.189453125, "learning_rate": 0.0007747332869772525, "loss": 0.7651, "step": 6360 }, { "epoch": 0.31638025230952616, "grad_norm": 0.1796875, "learning_rate": 0.0007746935531936028, "loss": 0.7575, "step": 6370 }, { "epoch": 0.31687692460514555, "grad_norm": 0.21484375, "learning_rate": 0.0007746538194099533, "loss": 0.7446, "step": 6380 }, { "epoch": 0.3173735969007649, "grad_norm": 0.166015625, "learning_rate": 0.0007746140856263039, "loss": 0.7425, "step": 6390 }, { "epoch": 0.3178702691963842, "grad_norm": 0.1748046875, "learning_rate": 0.0007745743518426542, "loss": 0.7696, "step": 6400 }, { "epoch": 0.31836694149200356, "grad_norm": 0.216796875, "learning_rate": 0.0007745346180590047, "loss": 0.7632, "step": 6410 }, { "epoch": 0.3188636137876229, "grad_norm": 0.2138671875, "learning_rate": 0.0007744948842753551, "loss": 0.7838, "step": 6420 }, { "epoch": 0.3193602860832423, "grad_norm": 0.1943359375, "learning_rate": 0.0007744551504917056, "loss": 0.7685, "step": 6430 }, { "epoch": 0.31985695837886163, "grad_norm": 0.1689453125, "learning_rate": 0.0007744154167080561, "loss": 0.7652, "step": 6440 }, { "epoch": 0.32035363067448097, "grad_norm": 0.1806640625, "learning_rate": 0.0007743756829244065, "loss": 0.755, "step": 6450 }, { "epoch": 0.3208503029701003, "grad_norm": 0.197265625, "learning_rate": 0.000774335949140757, "loss": 0.7277, "step": 6460 }, { "epoch": 0.3213469752657197, "grad_norm": 0.19140625, "learning_rate": 0.0007742962153571074, "loss": 0.7414, "step": 6470 }, { "epoch": 0.32184364756133904, "grad_norm": 0.201171875, "learning_rate": 0.0007742564815734578, "loss": 0.7466, "step": 6480 }, { "epoch": 0.3223403198569584, "grad_norm": 0.1962890625, "learning_rate": 0.0007742167477898084, "loss": 0.7707, "step": 6490 }, { "epoch": 0.3228369921525777, "grad_norm": 0.166015625, "learning_rate": 0.0007741770140061588, "loss": 0.7345, "step": 6500 }, { "epoch": 0.32333366444819706, "grad_norm": 0.1923828125, "learning_rate": 0.0007741372802225092, "loss": 0.7477, "step": 6510 }, { "epoch": 0.32383033674381645, "grad_norm": 0.1923828125, "learning_rate": 0.0007740975464388597, "loss": 0.7523, "step": 6520 }, { "epoch": 0.3243270090394358, "grad_norm": 0.169921875, "learning_rate": 0.0007740578126552101, "loss": 0.7233, "step": 6530 }, { "epoch": 0.3248236813350551, "grad_norm": 0.17578125, "learning_rate": 0.0007740180788715606, "loss": 0.7454, "step": 6540 }, { "epoch": 0.32532035363067446, "grad_norm": 0.20703125, "learning_rate": 0.0007739783450879111, "loss": 0.8268, "step": 6550 }, { "epoch": 0.32581702592629386, "grad_norm": 0.181640625, "learning_rate": 0.0007739386113042615, "loss": 0.7481, "step": 6560 }, { "epoch": 0.3263136982219132, "grad_norm": 0.1826171875, "learning_rate": 0.0007738988775206119, "loss": 0.742, "step": 6570 }, { "epoch": 0.32681037051753253, "grad_norm": 0.2001953125, "learning_rate": 0.0007738591437369624, "loss": 0.7556, "step": 6580 }, { "epoch": 0.32730704281315187, "grad_norm": 0.19140625, "learning_rate": 0.0007738194099533129, "loss": 0.7327, "step": 6590 }, { "epoch": 0.3278037151087712, "grad_norm": 0.1748046875, "learning_rate": 0.0007737796761696633, "loss": 0.7419, "step": 6600 }, { "epoch": 0.3283003874043906, "grad_norm": 0.181640625, "learning_rate": 0.0007737399423860138, "loss": 0.7433, "step": 6610 }, { "epoch": 0.32879705970000994, "grad_norm": 0.1748046875, "learning_rate": 0.0007737002086023642, "loss": 0.7658, "step": 6620 }, { "epoch": 0.3292937319956293, "grad_norm": 0.1748046875, "learning_rate": 0.0007736604748187146, "loss": 0.7514, "step": 6630 }, { "epoch": 0.3297904042912486, "grad_norm": 0.1796875, "learning_rate": 0.000773620741035065, "loss": 0.7481, "step": 6640 }, { "epoch": 0.330287076586868, "grad_norm": 0.193359375, "learning_rate": 0.0007735810072514156, "loss": 0.741, "step": 6650 }, { "epoch": 0.33078374888248735, "grad_norm": 0.2177734375, "learning_rate": 0.000773541273467766, "loss": 0.7469, "step": 6660 }, { "epoch": 0.3312804211781067, "grad_norm": 0.1806640625, "learning_rate": 0.0007735015396841164, "loss": 0.7719, "step": 6670 }, { "epoch": 0.331777093473726, "grad_norm": 0.1767578125, "learning_rate": 0.0007734618059004669, "loss": 0.7762, "step": 6680 }, { "epoch": 0.33227376576934536, "grad_norm": 0.169921875, "learning_rate": 0.0007734220721168174, "loss": 0.7379, "step": 6690 }, { "epoch": 0.33277043806496476, "grad_norm": 0.19140625, "learning_rate": 0.0007733823383331678, "loss": 0.7259, "step": 6700 }, { "epoch": 0.3332671103605841, "grad_norm": 0.205078125, "learning_rate": 0.0007733426045495183, "loss": 0.7645, "step": 6710 }, { "epoch": 0.33376378265620343, "grad_norm": 0.19140625, "learning_rate": 0.0007733028707658687, "loss": 0.7616, "step": 6720 }, { "epoch": 0.33426045495182277, "grad_norm": 0.1669921875, "learning_rate": 0.0007732631369822191, "loss": 0.7393, "step": 6730 }, { "epoch": 0.3347571272474421, "grad_norm": 0.1904296875, "learning_rate": 0.0007732234031985697, "loss": 0.7931, "step": 6740 }, { "epoch": 0.3352537995430615, "grad_norm": 0.2158203125, "learning_rate": 0.0007731836694149201, "loss": 0.7229, "step": 6750 }, { "epoch": 0.33575047183868084, "grad_norm": 0.1787109375, "learning_rate": 0.0007731439356312705, "loss": 0.7394, "step": 6760 }, { "epoch": 0.3362471441343002, "grad_norm": 0.2119140625, "learning_rate": 0.000773104201847621, "loss": 0.7355, "step": 6770 }, { "epoch": 0.3367438164299195, "grad_norm": 0.2080078125, "learning_rate": 0.0007730644680639714, "loss": 0.7552, "step": 6780 }, { "epoch": 0.3372404887255389, "grad_norm": 0.1591796875, "learning_rate": 0.0007730247342803219, "loss": 0.7536, "step": 6790 }, { "epoch": 0.33773716102115825, "grad_norm": 0.203125, "learning_rate": 0.0007729850004966724, "loss": 0.7641, "step": 6800 }, { "epoch": 0.3382338333167776, "grad_norm": 0.224609375, "learning_rate": 0.0007729452667130228, "loss": 0.7434, "step": 6810 }, { "epoch": 0.3387305056123969, "grad_norm": 0.1962890625, "learning_rate": 0.0007729055329293732, "loss": 0.7578, "step": 6820 }, { "epoch": 0.33922717790801626, "grad_norm": 0.1787109375, "learning_rate": 0.0007728657991457236, "loss": 0.7551, "step": 6830 }, { "epoch": 0.33972385020363566, "grad_norm": 0.1650390625, "learning_rate": 0.0007728260653620742, "loss": 0.7743, "step": 6840 }, { "epoch": 0.340220522499255, "grad_norm": 0.1826171875, "learning_rate": 0.0007727863315784246, "loss": 0.7424, "step": 6850 }, { "epoch": 0.34071719479487433, "grad_norm": 0.1884765625, "learning_rate": 0.000772746597794775, "loss": 0.7685, "step": 6860 }, { "epoch": 0.34121386709049367, "grad_norm": 0.15625, "learning_rate": 0.0007727068640111255, "loss": 0.7318, "step": 6870 }, { "epoch": 0.34171053938611307, "grad_norm": 0.169921875, "learning_rate": 0.000772667130227476, "loss": 0.7452, "step": 6880 }, { "epoch": 0.3422072116817324, "grad_norm": 0.1953125, "learning_rate": 0.0007726273964438263, "loss": 0.7754, "step": 6890 }, { "epoch": 0.34270388397735174, "grad_norm": 0.1787109375, "learning_rate": 0.0007725876626601769, "loss": 0.763, "step": 6900 }, { "epoch": 0.3432005562729711, "grad_norm": 0.1962890625, "learning_rate": 0.0007725479288765273, "loss": 0.7346, "step": 6910 }, { "epoch": 0.3436972285685904, "grad_norm": 0.177734375, "learning_rate": 0.0007725081950928777, "loss": 0.7633, "step": 6920 }, { "epoch": 0.3441939008642098, "grad_norm": 0.185546875, "learning_rate": 0.0007724684613092282, "loss": 0.7258, "step": 6930 }, { "epoch": 0.34469057315982915, "grad_norm": 0.1806640625, "learning_rate": 0.0007724287275255787, "loss": 0.7323, "step": 6940 }, { "epoch": 0.3451872454554485, "grad_norm": 0.166015625, "learning_rate": 0.0007723889937419291, "loss": 0.7417, "step": 6950 }, { "epoch": 0.3456839177510678, "grad_norm": 0.18359375, "learning_rate": 0.0007723492599582796, "loss": 0.7511, "step": 6960 }, { "epoch": 0.3461805900466872, "grad_norm": 0.2060546875, "learning_rate": 0.00077230952617463, "loss": 0.703, "step": 6970 }, { "epoch": 0.34667726234230656, "grad_norm": 0.1943359375, "learning_rate": 0.0007722697923909804, "loss": 0.7316, "step": 6980 }, { "epoch": 0.3471739346379259, "grad_norm": 0.185546875, "learning_rate": 0.000772230058607331, "loss": 0.7663, "step": 6990 }, { "epoch": 0.34767060693354523, "grad_norm": 0.1806640625, "learning_rate": 0.0007721903248236814, "loss": 0.7679, "step": 7000 }, { "epoch": 0.3481672792291646, "grad_norm": 0.1865234375, "learning_rate": 0.0007721505910400319, "loss": 0.7432, "step": 7010 }, { "epoch": 0.34866395152478397, "grad_norm": 0.19921875, "learning_rate": 0.0007721108572563822, "loss": 0.7137, "step": 7020 }, { "epoch": 0.3491606238204033, "grad_norm": 0.1513671875, "learning_rate": 0.0007720711234727327, "loss": 0.7484, "step": 7030 }, { "epoch": 0.34965729611602264, "grad_norm": 0.193359375, "learning_rate": 0.0007720313896890833, "loss": 0.8066, "step": 7040 }, { "epoch": 0.350153968411642, "grad_norm": 0.1904296875, "learning_rate": 0.0007719916559054336, "loss": 0.7591, "step": 7050 }, { "epoch": 0.3506506407072614, "grad_norm": 0.17578125, "learning_rate": 0.0007719519221217841, "loss": 0.7204, "step": 7060 }, { "epoch": 0.3511473130028807, "grad_norm": 0.18359375, "learning_rate": 0.0007719121883381346, "loss": 0.7619, "step": 7070 }, { "epoch": 0.35164398529850005, "grad_norm": 0.171875, "learning_rate": 0.0007718724545544849, "loss": 0.7494, "step": 7080 }, { "epoch": 0.3521406575941194, "grad_norm": 0.16796875, "learning_rate": 0.0007718327207708355, "loss": 0.7119, "step": 7090 }, { "epoch": 0.3526373298897387, "grad_norm": 0.173828125, "learning_rate": 0.0007717929869871859, "loss": 0.74, "step": 7100 }, { "epoch": 0.3531340021853581, "grad_norm": 0.1728515625, "learning_rate": 0.0007717532532035363, "loss": 0.7364, "step": 7110 }, { "epoch": 0.35363067448097746, "grad_norm": 0.18359375, "learning_rate": 0.0007717135194198868, "loss": 0.723, "step": 7120 }, { "epoch": 0.3541273467765968, "grad_norm": 0.1669921875, "learning_rate": 0.0007716737856362372, "loss": 0.7895, "step": 7130 }, { "epoch": 0.35462401907221613, "grad_norm": 0.1787109375, "learning_rate": 0.0007716340518525876, "loss": 0.7586, "step": 7140 }, { "epoch": 0.35512069136783553, "grad_norm": 0.18359375, "learning_rate": 0.0007715943180689382, "loss": 0.7063, "step": 7150 }, { "epoch": 0.35561736366345487, "grad_norm": 0.19921875, "learning_rate": 0.0007715545842852886, "loss": 0.7826, "step": 7160 }, { "epoch": 0.3561140359590742, "grad_norm": 0.1669921875, "learning_rate": 0.0007715148505016391, "loss": 0.7246, "step": 7170 }, { "epoch": 0.35661070825469354, "grad_norm": 0.181640625, "learning_rate": 0.0007714751167179895, "loss": 0.7159, "step": 7180 }, { "epoch": 0.3571073805503129, "grad_norm": 0.1787109375, "learning_rate": 0.00077143538293434, "loss": 0.7204, "step": 7190 }, { "epoch": 0.3576040528459323, "grad_norm": 0.1904296875, "learning_rate": 0.0007713956491506905, "loss": 0.7538, "step": 7200 }, { "epoch": 0.3581007251415516, "grad_norm": 0.1767578125, "learning_rate": 0.0007713559153670409, "loss": 0.7469, "step": 7210 }, { "epoch": 0.35859739743717095, "grad_norm": 0.181640625, "learning_rate": 0.0007713161815833913, "loss": 0.7408, "step": 7220 }, { "epoch": 0.3590940697327903, "grad_norm": 0.1748046875, "learning_rate": 0.0007712764477997418, "loss": 0.7432, "step": 7230 }, { "epoch": 0.3595907420284097, "grad_norm": 0.1796875, "learning_rate": 0.0007712367140160921, "loss": 0.7366, "step": 7240 }, { "epoch": 0.360087414324029, "grad_norm": 0.1806640625, "learning_rate": 0.0007711969802324427, "loss": 0.728, "step": 7250 }, { "epoch": 0.36058408661964836, "grad_norm": 0.1806640625, "learning_rate": 0.0007711572464487932, "loss": 0.7584, "step": 7260 }, { "epoch": 0.3610807589152677, "grad_norm": 0.158203125, "learning_rate": 0.0007711175126651435, "loss": 0.6997, "step": 7270 }, { "epoch": 0.36157743121088703, "grad_norm": 0.1806640625, "learning_rate": 0.000771077778881494, "loss": 0.7144, "step": 7280 }, { "epoch": 0.36207410350650643, "grad_norm": 0.19921875, "learning_rate": 0.0007710380450978444, "loss": 0.728, "step": 7290 }, { "epoch": 0.36257077580212577, "grad_norm": 0.1650390625, "learning_rate": 0.0007709983113141949, "loss": 0.7011, "step": 7300 }, { "epoch": 0.3630674480977451, "grad_norm": 0.1962890625, "learning_rate": 0.0007709585775305454, "loss": 0.7655, "step": 7310 }, { "epoch": 0.36356412039336444, "grad_norm": 0.1875, "learning_rate": 0.0007709188437468958, "loss": 0.7247, "step": 7320 }, { "epoch": 0.3640607926889838, "grad_norm": 0.15625, "learning_rate": 0.0007708791099632463, "loss": 0.7441, "step": 7330 }, { "epoch": 0.3645574649846032, "grad_norm": 0.21484375, "learning_rate": 0.0007708393761795967, "loss": 0.7515, "step": 7340 }, { "epoch": 0.3650541372802225, "grad_norm": 0.177734375, "learning_rate": 0.0007707996423959472, "loss": 0.7171, "step": 7350 }, { "epoch": 0.36555080957584185, "grad_norm": 0.1689453125, "learning_rate": 0.0007707599086122977, "loss": 0.6862, "step": 7360 }, { "epoch": 0.3660474818714612, "grad_norm": 0.1630859375, "learning_rate": 0.0007707201748286481, "loss": 0.706, "step": 7370 }, { "epoch": 0.3665441541670806, "grad_norm": 0.216796875, "learning_rate": 0.0007706804410449985, "loss": 0.7725, "step": 7380 }, { "epoch": 0.3670408264626999, "grad_norm": 0.15625, "learning_rate": 0.000770640707261349, "loss": 0.7314, "step": 7390 }, { "epoch": 0.36753749875831926, "grad_norm": 0.1796875, "learning_rate": 0.0007706009734776995, "loss": 0.7398, "step": 7400 }, { "epoch": 0.3680341710539386, "grad_norm": 0.1806640625, "learning_rate": 0.0007705612396940499, "loss": 0.7266, "step": 7410 }, { "epoch": 0.36853084334955793, "grad_norm": 0.173828125, "learning_rate": 0.0007705215059104004, "loss": 0.7625, "step": 7420 }, { "epoch": 0.36902751564517733, "grad_norm": 0.1708984375, "learning_rate": 0.0007704817721267507, "loss": 0.716, "step": 7430 }, { "epoch": 0.36952418794079667, "grad_norm": 0.1708984375, "learning_rate": 0.0007704420383431012, "loss": 0.7418, "step": 7440 }, { "epoch": 0.370020860236416, "grad_norm": 0.17578125, "learning_rate": 0.0007704023045594518, "loss": 0.7125, "step": 7450 }, { "epoch": 0.37051753253203534, "grad_norm": 0.1875, "learning_rate": 0.0007703625707758022, "loss": 0.7312, "step": 7460 }, { "epoch": 0.37101420482765474, "grad_norm": 0.1708984375, "learning_rate": 0.0007703228369921526, "loss": 0.7424, "step": 7470 }, { "epoch": 0.3715108771232741, "grad_norm": 0.1904296875, "learning_rate": 0.0007702831032085031, "loss": 0.7156, "step": 7480 }, { "epoch": 0.3720075494188934, "grad_norm": 0.193359375, "learning_rate": 0.0007702433694248535, "loss": 0.7348, "step": 7490 }, { "epoch": 0.37250422171451275, "grad_norm": 0.2021484375, "learning_rate": 0.000770203635641204, "loss": 0.6993, "step": 7500 }, { "epoch": 0.3730008940101321, "grad_norm": 0.171875, "learning_rate": 0.0007701639018575544, "loss": 0.7259, "step": 7510 }, { "epoch": 0.3734975663057515, "grad_norm": 0.1572265625, "learning_rate": 0.0007701241680739049, "loss": 0.681, "step": 7520 }, { "epoch": 0.3739942386013708, "grad_norm": 0.189453125, "learning_rate": 0.0007700844342902553, "loss": 0.734, "step": 7530 }, { "epoch": 0.37449091089699016, "grad_norm": 0.169921875, "learning_rate": 0.0007700447005066057, "loss": 0.7473, "step": 7540 }, { "epoch": 0.3749875831926095, "grad_norm": 0.1875, "learning_rate": 0.0007700049667229563, "loss": 0.7197, "step": 7550 }, { "epoch": 0.3754842554882289, "grad_norm": 0.1845703125, "learning_rate": 0.0007699652329393067, "loss": 0.7397, "step": 7560 }, { "epoch": 0.37598092778384823, "grad_norm": 0.1650390625, "learning_rate": 0.0007699254991556571, "loss": 0.749, "step": 7570 }, { "epoch": 0.37647760007946757, "grad_norm": 0.166015625, "learning_rate": 0.0007698857653720076, "loss": 0.7399, "step": 7580 }, { "epoch": 0.3769742723750869, "grad_norm": 0.181640625, "learning_rate": 0.000769846031588358, "loss": 0.7477, "step": 7590 }, { "epoch": 0.37747094467070624, "grad_norm": 0.1748046875, "learning_rate": 0.0007698062978047085, "loss": 0.7321, "step": 7600 }, { "epoch": 0.37796761696632564, "grad_norm": 0.1533203125, "learning_rate": 0.000769766564021059, "loss": 0.7328, "step": 7610 }, { "epoch": 0.378464289261945, "grad_norm": 0.1591796875, "learning_rate": 0.0007697268302374094, "loss": 0.7521, "step": 7620 }, { "epoch": 0.3789609615575643, "grad_norm": 0.2041015625, "learning_rate": 0.0007696870964537598, "loss": 0.762, "step": 7630 }, { "epoch": 0.37945763385318365, "grad_norm": 0.169921875, "learning_rate": 0.0007696473626701103, "loss": 0.7221, "step": 7640 }, { "epoch": 0.37995430614880304, "grad_norm": 0.1572265625, "learning_rate": 0.0007696076288864608, "loss": 0.7237, "step": 7650 }, { "epoch": 0.3804509784444224, "grad_norm": 0.1728515625, "learning_rate": 0.0007695678951028112, "loss": 0.7303, "step": 7660 }, { "epoch": 0.3809476507400417, "grad_norm": 0.220703125, "learning_rate": 0.0007695281613191617, "loss": 0.7715, "step": 7670 }, { "epoch": 0.38144432303566106, "grad_norm": 0.181640625, "learning_rate": 0.0007694884275355121, "loss": 0.7481, "step": 7680 }, { "epoch": 0.3819409953312804, "grad_norm": 0.1953125, "learning_rate": 0.0007694486937518625, "loss": 0.7587, "step": 7690 }, { "epoch": 0.3824376676268998, "grad_norm": 0.166015625, "learning_rate": 0.000769408959968213, "loss": 0.7155, "step": 7700 }, { "epoch": 0.38293433992251913, "grad_norm": 0.171875, "learning_rate": 0.0007693692261845635, "loss": 0.7326, "step": 7710 }, { "epoch": 0.38343101221813847, "grad_norm": 0.1767578125, "learning_rate": 0.0007693294924009139, "loss": 0.7087, "step": 7720 }, { "epoch": 0.3839276845137578, "grad_norm": 0.2060546875, "learning_rate": 0.0007692897586172643, "loss": 0.7131, "step": 7730 }, { "epoch": 0.3844243568093772, "grad_norm": 0.1689453125, "learning_rate": 0.0007692500248336148, "loss": 0.7382, "step": 7740 }, { "epoch": 0.38492102910499654, "grad_norm": 0.1611328125, "learning_rate": 0.0007692102910499653, "loss": 0.7186, "step": 7750 }, { "epoch": 0.3854177014006159, "grad_norm": 0.1796875, "learning_rate": 0.0007691705572663157, "loss": 0.7351, "step": 7760 }, { "epoch": 0.3859143736962352, "grad_norm": 0.17578125, "learning_rate": 0.0007691308234826662, "loss": 0.7372, "step": 7770 }, { "epoch": 0.38641104599185455, "grad_norm": 0.1650390625, "learning_rate": 0.0007690910896990166, "loss": 0.751, "step": 7780 }, { "epoch": 0.38690771828747395, "grad_norm": 0.1669921875, "learning_rate": 0.000769051355915367, "loss": 0.7708, "step": 7790 }, { "epoch": 0.3874043905830933, "grad_norm": 0.1669921875, "learning_rate": 0.0007690116221317176, "loss": 0.7545, "step": 7800 }, { "epoch": 0.3879010628787126, "grad_norm": 0.1806640625, "learning_rate": 0.000768971888348068, "loss": 0.7645, "step": 7810 }, { "epoch": 0.38839773517433196, "grad_norm": 0.1650390625, "learning_rate": 0.0007689321545644184, "loss": 0.7348, "step": 7820 }, { "epoch": 0.38889440746995135, "grad_norm": 0.162109375, "learning_rate": 0.0007688924207807689, "loss": 0.7003, "step": 7830 }, { "epoch": 0.3893910797655707, "grad_norm": 0.1552734375, "learning_rate": 0.0007688526869971193, "loss": 0.7449, "step": 7840 }, { "epoch": 0.38988775206119003, "grad_norm": 0.16015625, "learning_rate": 0.0007688129532134698, "loss": 0.7048, "step": 7850 }, { "epoch": 0.39038442435680937, "grad_norm": 0.1767578125, "learning_rate": 0.0007687732194298203, "loss": 0.6932, "step": 7860 }, { "epoch": 0.3908810966524287, "grad_norm": 0.1533203125, "learning_rate": 0.0007687334856461707, "loss": 0.6957, "step": 7870 }, { "epoch": 0.3913777689480481, "grad_norm": 0.1796875, "learning_rate": 0.0007686937518625211, "loss": 0.7196, "step": 7880 }, { "epoch": 0.39187444124366744, "grad_norm": 0.162109375, "learning_rate": 0.0007686540180788715, "loss": 0.7493, "step": 7890 }, { "epoch": 0.3923711135392868, "grad_norm": 0.171875, "learning_rate": 0.0007686142842952221, "loss": 0.7667, "step": 7900 }, { "epoch": 0.3928677858349061, "grad_norm": 0.1708984375, "learning_rate": 0.0007685745505115726, "loss": 0.7573, "step": 7910 }, { "epoch": 0.39336445813052545, "grad_norm": 0.2060546875, "learning_rate": 0.0007685348167279229, "loss": 0.7438, "step": 7920 }, { "epoch": 0.39386113042614485, "grad_norm": 0.177734375, "learning_rate": 0.0007684950829442734, "loss": 0.7369, "step": 7930 }, { "epoch": 0.3943578027217642, "grad_norm": 0.15625, "learning_rate": 0.0007684553491606239, "loss": 0.7336, "step": 7940 }, { "epoch": 0.3948544750173835, "grad_norm": 0.162109375, "learning_rate": 0.0007684156153769743, "loss": 0.7205, "step": 7950 }, { "epoch": 0.39535114731300286, "grad_norm": 0.173828125, "learning_rate": 0.0007683758815933248, "loss": 0.699, "step": 7960 }, { "epoch": 0.39584781960862225, "grad_norm": 0.189453125, "learning_rate": 0.0007683361478096752, "loss": 0.7284, "step": 7970 }, { "epoch": 0.3963444919042416, "grad_norm": 0.1591796875, "learning_rate": 0.0007682964140260256, "loss": 0.727, "step": 7980 }, { "epoch": 0.39684116419986093, "grad_norm": 0.1787109375, "learning_rate": 0.0007682566802423761, "loss": 0.7521, "step": 7990 }, { "epoch": 0.39733783649548027, "grad_norm": 0.193359375, "learning_rate": 0.0007682169464587266, "loss": 0.7427, "step": 8000 }, { "epoch": 0.3978345087910996, "grad_norm": 0.16796875, "learning_rate": 0.000768177212675077, "loss": 0.6822, "step": 8010 }, { "epoch": 0.398331181086719, "grad_norm": 0.1669921875, "learning_rate": 0.0007681374788914275, "loss": 0.718, "step": 8020 }, { "epoch": 0.39882785338233834, "grad_norm": 0.16015625, "learning_rate": 0.0007680977451077779, "loss": 0.7153, "step": 8030 }, { "epoch": 0.3993245256779577, "grad_norm": 0.15625, "learning_rate": 0.0007680580113241283, "loss": 0.7443, "step": 8040 }, { "epoch": 0.399821197973577, "grad_norm": 0.1552734375, "learning_rate": 0.0007680182775404789, "loss": 0.7849, "step": 8050 }, { "epoch": 0.4003178702691964, "grad_norm": 0.1748046875, "learning_rate": 0.0007679785437568293, "loss": 0.7288, "step": 8060 }, { "epoch": 0.40081454256481575, "grad_norm": 0.1640625, "learning_rate": 0.0007679388099731798, "loss": 0.7566, "step": 8070 }, { "epoch": 0.4013112148604351, "grad_norm": 0.158203125, "learning_rate": 0.0007678990761895302, "loss": 0.7008, "step": 8080 }, { "epoch": 0.4018078871560544, "grad_norm": 0.150390625, "learning_rate": 0.0007678593424058806, "loss": 0.754, "step": 8090 }, { "epoch": 0.40230455945167376, "grad_norm": 0.15625, "learning_rate": 0.0007678196086222312, "loss": 0.7161, "step": 8100 }, { "epoch": 0.40280123174729315, "grad_norm": 0.1806640625, "learning_rate": 0.0007677798748385815, "loss": 0.7508, "step": 8110 }, { "epoch": 0.4032979040429125, "grad_norm": 0.158203125, "learning_rate": 0.000767740141054932, "loss": 0.7183, "step": 8120 }, { "epoch": 0.40379457633853183, "grad_norm": 0.1806640625, "learning_rate": 0.0007677004072712825, "loss": 0.7561, "step": 8130 }, { "epoch": 0.40429124863415117, "grad_norm": 0.16015625, "learning_rate": 0.0007676606734876328, "loss": 0.7123, "step": 8140 }, { "epoch": 0.40478792092977056, "grad_norm": 0.1806640625, "learning_rate": 0.0007676209397039834, "loss": 0.7231, "step": 8150 }, { "epoch": 0.4052845932253899, "grad_norm": 0.197265625, "learning_rate": 0.0007675812059203338, "loss": 0.7267, "step": 8160 }, { "epoch": 0.40578126552100924, "grad_norm": 0.171875, "learning_rate": 0.0007675414721366842, "loss": 0.7173, "step": 8170 }, { "epoch": 0.4062779378166286, "grad_norm": 0.16796875, "learning_rate": 0.0007675017383530347, "loss": 0.7031, "step": 8180 }, { "epoch": 0.4067746101122479, "grad_norm": 0.15234375, "learning_rate": 0.0007674620045693851, "loss": 0.7208, "step": 8190 }, { "epoch": 0.4072712824078673, "grad_norm": 0.2021484375, "learning_rate": 0.0007674222707857355, "loss": 0.7613, "step": 8200 }, { "epoch": 0.40776795470348665, "grad_norm": 0.181640625, "learning_rate": 0.0007673825370020861, "loss": 0.7161, "step": 8210 }, { "epoch": 0.408264626999106, "grad_norm": 0.1640625, "learning_rate": 0.0007673428032184365, "loss": 0.7485, "step": 8220 }, { "epoch": 0.4087612992947253, "grad_norm": 0.1767578125, "learning_rate": 0.000767303069434787, "loss": 0.7091, "step": 8230 }, { "epoch": 0.4092579715903447, "grad_norm": 0.16015625, "learning_rate": 0.0007672633356511374, "loss": 0.6908, "step": 8240 }, { "epoch": 0.40975464388596405, "grad_norm": 0.166015625, "learning_rate": 0.0007672236018674879, "loss": 0.7208, "step": 8250 }, { "epoch": 0.4102513161815834, "grad_norm": 0.171875, "learning_rate": 0.0007671838680838384, "loss": 0.735, "step": 8260 }, { "epoch": 0.41074798847720273, "grad_norm": 0.1552734375, "learning_rate": 0.0007671441343001888, "loss": 0.7397, "step": 8270 }, { "epoch": 0.41124466077282207, "grad_norm": 0.16796875, "learning_rate": 0.0007671044005165392, "loss": 0.7125, "step": 8280 }, { "epoch": 0.41174133306844146, "grad_norm": 0.1962890625, "learning_rate": 0.0007670646667328897, "loss": 0.7101, "step": 8290 }, { "epoch": 0.4122380053640608, "grad_norm": 0.173828125, "learning_rate": 0.00076702493294924, "loss": 0.74, "step": 8300 }, { "epoch": 0.41273467765968014, "grad_norm": 0.1708984375, "learning_rate": 0.0007669851991655906, "loss": 0.7184, "step": 8310 }, { "epoch": 0.4132313499552995, "grad_norm": 0.15234375, "learning_rate": 0.0007669454653819411, "loss": 0.7223, "step": 8320 }, { "epoch": 0.41372802225091887, "grad_norm": 0.1572265625, "learning_rate": 0.0007669057315982914, "loss": 0.7655, "step": 8330 }, { "epoch": 0.4142246945465382, "grad_norm": 0.1591796875, "learning_rate": 0.0007668659978146419, "loss": 0.7088, "step": 8340 }, { "epoch": 0.41472136684215755, "grad_norm": 0.1728515625, "learning_rate": 0.0007668262640309925, "loss": 0.7325, "step": 8350 }, { "epoch": 0.4152180391377769, "grad_norm": 0.169921875, "learning_rate": 0.0007667865302473429, "loss": 0.7189, "step": 8360 }, { "epoch": 0.4157147114333962, "grad_norm": 0.1650390625, "learning_rate": 0.0007667467964636933, "loss": 0.7222, "step": 8370 }, { "epoch": 0.4162113837290156, "grad_norm": 0.16796875, "learning_rate": 0.0007667070626800437, "loss": 0.7308, "step": 8380 }, { "epoch": 0.41670805602463495, "grad_norm": 0.1728515625, "learning_rate": 0.0007666673288963942, "loss": 0.6973, "step": 8390 }, { "epoch": 0.4172047283202543, "grad_norm": 0.166015625, "learning_rate": 0.0007666275951127447, "loss": 0.6914, "step": 8400 }, { "epoch": 0.41770140061587363, "grad_norm": 0.162109375, "learning_rate": 0.0007665878613290951, "loss": 0.72, "step": 8410 }, { "epoch": 0.418198072911493, "grad_norm": 0.15234375, "learning_rate": 0.0007665481275454456, "loss": 0.6967, "step": 8420 }, { "epoch": 0.41869474520711236, "grad_norm": 0.15234375, "learning_rate": 0.000766508393761796, "loss": 0.7139, "step": 8430 }, { "epoch": 0.4191914175027317, "grad_norm": 0.158203125, "learning_rate": 0.0007664686599781464, "loss": 0.7003, "step": 8440 }, { "epoch": 0.41968808979835104, "grad_norm": 0.150390625, "learning_rate": 0.000766428926194497, "loss": 0.7177, "step": 8450 }, { "epoch": 0.4201847620939704, "grad_norm": 0.16015625, "learning_rate": 0.0007663891924108474, "loss": 0.7192, "step": 8460 }, { "epoch": 0.42068143438958977, "grad_norm": 0.1640625, "learning_rate": 0.0007663494586271978, "loss": 0.7029, "step": 8470 }, { "epoch": 0.4211781066852091, "grad_norm": 0.16015625, "learning_rate": 0.0007663097248435483, "loss": 0.6977, "step": 8480 }, { "epoch": 0.42167477898082845, "grad_norm": 0.15625, "learning_rate": 0.0007662699910598986, "loss": 0.7426, "step": 8490 }, { "epoch": 0.4221714512764478, "grad_norm": 0.16015625, "learning_rate": 0.0007662302572762491, "loss": 0.7655, "step": 8500 }, { "epoch": 0.4226681235720671, "grad_norm": 0.1787109375, "learning_rate": 0.0007661905234925997, "loss": 0.7211, "step": 8510 }, { "epoch": 0.4231647958676865, "grad_norm": 0.1650390625, "learning_rate": 0.0007661507897089501, "loss": 0.7069, "step": 8520 }, { "epoch": 0.42366146816330585, "grad_norm": 0.158203125, "learning_rate": 0.0007661110559253005, "loss": 0.7189, "step": 8530 }, { "epoch": 0.4241581404589252, "grad_norm": 0.1875, "learning_rate": 0.000766071322141651, "loss": 0.7167, "step": 8540 }, { "epoch": 0.42465481275454453, "grad_norm": 0.1845703125, "learning_rate": 0.0007660315883580014, "loss": 0.7307, "step": 8550 }, { "epoch": 0.4251514850501639, "grad_norm": 0.173828125, "learning_rate": 0.0007659918545743519, "loss": 0.7289, "step": 8560 }, { "epoch": 0.42564815734578326, "grad_norm": 0.1806640625, "learning_rate": 0.0007659521207907023, "loss": 0.7157, "step": 8570 }, { "epoch": 0.4261448296414026, "grad_norm": 0.1708984375, "learning_rate": 0.0007659123870070528, "loss": 0.7354, "step": 8580 }, { "epoch": 0.42664150193702194, "grad_norm": 0.181640625, "learning_rate": 0.0007658726532234032, "loss": 0.7431, "step": 8590 }, { "epoch": 0.4271381742326413, "grad_norm": 0.1611328125, "learning_rate": 0.0007658329194397536, "loss": 0.7369, "step": 8600 }, { "epoch": 0.42763484652826067, "grad_norm": 0.1865234375, "learning_rate": 0.0007657931856561042, "loss": 0.7348, "step": 8610 }, { "epoch": 0.42813151882388, "grad_norm": 0.1826171875, "learning_rate": 0.0007657534518724546, "loss": 0.7231, "step": 8620 }, { "epoch": 0.42862819111949935, "grad_norm": 0.1640625, "learning_rate": 0.000765713718088805, "loss": 0.7217, "step": 8630 }, { "epoch": 0.4291248634151187, "grad_norm": 0.1728515625, "learning_rate": 0.0007656739843051555, "loss": 0.7138, "step": 8640 }, { "epoch": 0.4296215357107381, "grad_norm": 0.1455078125, "learning_rate": 0.000765634250521506, "loss": 0.7112, "step": 8650 }, { "epoch": 0.4301182080063574, "grad_norm": 0.1591796875, "learning_rate": 0.0007655945167378564, "loss": 0.7388, "step": 8660 }, { "epoch": 0.43061488030197675, "grad_norm": 0.189453125, "learning_rate": 0.0007655547829542069, "loss": 0.7088, "step": 8670 }, { "epoch": 0.4311115525975961, "grad_norm": 0.1650390625, "learning_rate": 0.0007655150491705573, "loss": 0.7148, "step": 8680 }, { "epoch": 0.43160822489321543, "grad_norm": 0.1572265625, "learning_rate": 0.0007654753153869077, "loss": 0.7137, "step": 8690 }, { "epoch": 0.4321048971888348, "grad_norm": 0.1484375, "learning_rate": 0.0007654355816032582, "loss": 0.6875, "step": 8700 }, { "epoch": 0.43260156948445416, "grad_norm": 0.158203125, "learning_rate": 0.0007653958478196087, "loss": 0.6891, "step": 8710 }, { "epoch": 0.4330982417800735, "grad_norm": 0.1484375, "learning_rate": 0.0007653561140359591, "loss": 0.7064, "step": 8720 }, { "epoch": 0.43359491407569284, "grad_norm": 0.146484375, "learning_rate": 0.0007653163802523096, "loss": 0.7231, "step": 8730 }, { "epoch": 0.43409158637131223, "grad_norm": 0.1494140625, "learning_rate": 0.00076527664646866, "loss": 0.7066, "step": 8740 }, { "epoch": 0.43458825866693157, "grad_norm": 0.1669921875, "learning_rate": 0.0007652369126850104, "loss": 0.7, "step": 8750 }, { "epoch": 0.4350849309625509, "grad_norm": 0.1611328125, "learning_rate": 0.0007651971789013609, "loss": 0.7378, "step": 8760 }, { "epoch": 0.43558160325817025, "grad_norm": 0.169921875, "learning_rate": 0.0007651574451177114, "loss": 0.7336, "step": 8770 }, { "epoch": 0.4360782755537896, "grad_norm": 0.158203125, "learning_rate": 0.0007651177113340618, "loss": 0.7575, "step": 8780 }, { "epoch": 0.436574947849409, "grad_norm": 0.150390625, "learning_rate": 0.0007650779775504122, "loss": 0.7354, "step": 8790 }, { "epoch": 0.4370716201450283, "grad_norm": 0.15625, "learning_rate": 0.0007650382437667627, "loss": 0.7177, "step": 8800 }, { "epoch": 0.43756829244064765, "grad_norm": 0.1630859375, "learning_rate": 0.0007649985099831133, "loss": 0.7363, "step": 8810 }, { "epoch": 0.438064964736267, "grad_norm": 0.177734375, "learning_rate": 0.0007649587761994636, "loss": 0.7019, "step": 8820 }, { "epoch": 0.4385616370318864, "grad_norm": 0.15234375, "learning_rate": 0.0007649190424158141, "loss": 0.6998, "step": 8830 }, { "epoch": 0.4390583093275057, "grad_norm": 0.1640625, "learning_rate": 0.0007648793086321645, "loss": 0.7236, "step": 8840 }, { "epoch": 0.43955498162312506, "grad_norm": 0.189453125, "learning_rate": 0.0007648395748485149, "loss": 0.7097, "step": 8850 }, { "epoch": 0.4400516539187444, "grad_norm": 0.15625, "learning_rate": 0.0007647998410648655, "loss": 0.6748, "step": 8860 }, { "epoch": 0.44054832621436374, "grad_norm": 0.2041015625, "learning_rate": 0.0007647601072812159, "loss": 0.6921, "step": 8870 }, { "epoch": 0.44104499850998313, "grad_norm": 0.15234375, "learning_rate": 0.0007647203734975663, "loss": 0.7079, "step": 8880 }, { "epoch": 0.44154167080560247, "grad_norm": 0.1455078125, "learning_rate": 0.0007646806397139168, "loss": 0.7137, "step": 8890 }, { "epoch": 0.4420383431012218, "grad_norm": 0.1494140625, "learning_rate": 0.0007646409059302672, "loss": 0.7328, "step": 8900 }, { "epoch": 0.44253501539684115, "grad_norm": 0.1748046875, "learning_rate": 0.0007646011721466177, "loss": 0.7409, "step": 8910 }, { "epoch": 0.44303168769246054, "grad_norm": 0.1767578125, "learning_rate": 0.0007645614383629682, "loss": 0.7505, "step": 8920 }, { "epoch": 0.4435283599880799, "grad_norm": 0.16796875, "learning_rate": 0.0007645217045793186, "loss": 0.7348, "step": 8930 }, { "epoch": 0.4440250322836992, "grad_norm": 0.1591796875, "learning_rate": 0.000764481970795669, "loss": 0.7477, "step": 8940 }, { "epoch": 0.44452170457931856, "grad_norm": 0.1640625, "learning_rate": 0.0007644422370120194, "loss": 0.71, "step": 8950 }, { "epoch": 0.4450183768749379, "grad_norm": 0.1630859375, "learning_rate": 0.00076440250322837, "loss": 0.7137, "step": 8960 }, { "epoch": 0.4455150491705573, "grad_norm": 0.1650390625, "learning_rate": 0.0007643627694447205, "loss": 0.7641, "step": 8970 }, { "epoch": 0.4460117214661766, "grad_norm": 0.203125, "learning_rate": 0.0007643230356610708, "loss": 0.699, "step": 8980 }, { "epoch": 0.44650839376179596, "grad_norm": 0.1806640625, "learning_rate": 0.0007642833018774213, "loss": 0.6906, "step": 8990 }, { "epoch": 0.4470050660574153, "grad_norm": 0.18359375, "learning_rate": 0.0007642435680937718, "loss": 0.7058, "step": 9000 }, { "epoch": 0.4475017383530347, "grad_norm": 0.166015625, "learning_rate": 0.0007642038343101222, "loss": 0.7377, "step": 9010 }, { "epoch": 0.44799841064865403, "grad_norm": 0.1650390625, "learning_rate": 0.0007641641005264727, "loss": 0.6808, "step": 9020 }, { "epoch": 0.44849508294427337, "grad_norm": 0.185546875, "learning_rate": 0.0007641243667428231, "loss": 0.7154, "step": 9030 }, { "epoch": 0.4489917552398927, "grad_norm": 0.15625, "learning_rate": 0.0007640846329591735, "loss": 0.7077, "step": 9040 }, { "epoch": 0.44948842753551205, "grad_norm": 0.162109375, "learning_rate": 0.000764044899175524, "loss": 0.6977, "step": 9050 }, { "epoch": 0.44998509983113144, "grad_norm": 0.16796875, "learning_rate": 0.0007640051653918745, "loss": 0.7314, "step": 9060 }, { "epoch": 0.4504817721267508, "grad_norm": 0.177734375, "learning_rate": 0.0007639654316082249, "loss": 0.7332, "step": 9070 }, { "epoch": 0.4509784444223701, "grad_norm": 0.1591796875, "learning_rate": 0.0007639256978245754, "loss": 0.7256, "step": 9080 }, { "epoch": 0.45147511671798946, "grad_norm": 0.18359375, "learning_rate": 0.0007638859640409258, "loss": 0.6945, "step": 9090 }, { "epoch": 0.4519717890136088, "grad_norm": 0.1513671875, "learning_rate": 0.0007638462302572763, "loss": 0.7337, "step": 9100 }, { "epoch": 0.4524684613092282, "grad_norm": 0.15234375, "learning_rate": 0.0007638064964736268, "loss": 0.7143, "step": 9110 }, { "epoch": 0.4529651336048475, "grad_norm": 0.162109375, "learning_rate": 0.0007637667626899772, "loss": 0.7248, "step": 9120 }, { "epoch": 0.45346180590046686, "grad_norm": 0.15625, "learning_rate": 0.0007637270289063277, "loss": 0.7323, "step": 9130 }, { "epoch": 0.4539584781960862, "grad_norm": 0.181640625, "learning_rate": 0.0007636872951226781, "loss": 0.7023, "step": 9140 }, { "epoch": 0.4544551504917056, "grad_norm": 0.1533203125, "learning_rate": 0.0007636475613390285, "loss": 0.7105, "step": 9150 }, { "epoch": 0.45495182278732493, "grad_norm": 0.158203125, "learning_rate": 0.0007636078275553791, "loss": 0.7187, "step": 9160 }, { "epoch": 0.45544849508294427, "grad_norm": 0.189453125, "learning_rate": 0.0007635680937717294, "loss": 0.7122, "step": 9170 }, { "epoch": 0.4559451673785636, "grad_norm": 0.15625, "learning_rate": 0.0007635283599880799, "loss": 0.7229, "step": 9180 }, { "epoch": 0.45644183967418295, "grad_norm": 0.1708984375, "learning_rate": 0.0007634886262044304, "loss": 0.7008, "step": 9190 }, { "epoch": 0.45693851196980234, "grad_norm": 0.1591796875, "learning_rate": 0.0007634488924207807, "loss": 0.6972, "step": 9200 }, { "epoch": 0.4574351842654217, "grad_norm": 0.140625, "learning_rate": 0.0007634091586371313, "loss": 0.6998, "step": 9210 }, { "epoch": 0.457931856561041, "grad_norm": 0.177734375, "learning_rate": 0.0007633694248534817, "loss": 0.7409, "step": 9220 }, { "epoch": 0.45842852885666036, "grad_norm": 0.1796875, "learning_rate": 0.0007633296910698321, "loss": 0.7144, "step": 9230 }, { "epoch": 0.45892520115227975, "grad_norm": 0.158203125, "learning_rate": 0.0007632899572861826, "loss": 0.7273, "step": 9240 }, { "epoch": 0.4594218734478991, "grad_norm": 0.171875, "learning_rate": 0.000763250223502533, "loss": 0.7022, "step": 9250 }, { "epoch": 0.4599185457435184, "grad_norm": 0.162109375, "learning_rate": 0.0007632104897188836, "loss": 0.7172, "step": 9260 }, { "epoch": 0.46041521803913776, "grad_norm": 0.1591796875, "learning_rate": 0.000763170755935234, "loss": 0.7561, "step": 9270 }, { "epoch": 0.4609118903347571, "grad_norm": 0.1796875, "learning_rate": 0.0007631310221515844, "loss": 0.7032, "step": 9280 }, { "epoch": 0.4614085626303765, "grad_norm": 0.197265625, "learning_rate": 0.0007630912883679349, "loss": 0.7463, "step": 9290 }, { "epoch": 0.46190523492599583, "grad_norm": 0.15234375, "learning_rate": 0.0007630515545842853, "loss": 0.7138, "step": 9300 }, { "epoch": 0.46240190722161517, "grad_norm": 0.1591796875, "learning_rate": 0.0007630118208006358, "loss": 0.7258, "step": 9310 }, { "epoch": 0.4628985795172345, "grad_norm": 0.18359375, "learning_rate": 0.0007629720870169863, "loss": 0.7383, "step": 9320 }, { "epoch": 0.4633952518128539, "grad_norm": 0.146484375, "learning_rate": 0.0007629323532333367, "loss": 0.7449, "step": 9330 }, { "epoch": 0.46389192410847324, "grad_norm": 0.16015625, "learning_rate": 0.0007628926194496871, "loss": 0.7348, "step": 9340 }, { "epoch": 0.4643885964040926, "grad_norm": 0.16015625, "learning_rate": 0.0007628528856660376, "loss": 0.7234, "step": 9350 }, { "epoch": 0.4648852686997119, "grad_norm": 0.146484375, "learning_rate": 0.000762813151882388, "loss": 0.6861, "step": 9360 }, { "epoch": 0.46538194099533126, "grad_norm": 0.1611328125, "learning_rate": 0.0007627734180987385, "loss": 0.6995, "step": 9370 }, { "epoch": 0.46587861329095065, "grad_norm": 0.169921875, "learning_rate": 0.000762733684315089, "loss": 0.6967, "step": 9380 }, { "epoch": 0.46637528558657, "grad_norm": 0.16015625, "learning_rate": 0.0007626939505314393, "loss": 0.6964, "step": 9390 }, { "epoch": 0.4668719578821893, "grad_norm": 0.162109375, "learning_rate": 0.0007626542167477898, "loss": 0.7155, "step": 9400 }, { "epoch": 0.46736863017780866, "grad_norm": 0.1416015625, "learning_rate": 0.0007626144829641404, "loss": 0.7281, "step": 9410 }, { "epoch": 0.46786530247342806, "grad_norm": 0.1640625, "learning_rate": 0.0007625747491804908, "loss": 0.7683, "step": 9420 }, { "epoch": 0.4683619747690474, "grad_norm": 0.146484375, "learning_rate": 0.0007625350153968412, "loss": 0.6822, "step": 9430 }, { "epoch": 0.46885864706466673, "grad_norm": 0.1494140625, "learning_rate": 0.0007624952816131916, "loss": 0.7145, "step": 9440 }, { "epoch": 0.46935531936028607, "grad_norm": 0.14453125, "learning_rate": 0.0007624555478295421, "loss": 0.7266, "step": 9450 }, { "epoch": 0.4698519916559054, "grad_norm": 0.1767578125, "learning_rate": 0.0007624158140458926, "loss": 0.7053, "step": 9460 }, { "epoch": 0.4703486639515248, "grad_norm": 0.154296875, "learning_rate": 0.000762376080262243, "loss": 0.6892, "step": 9470 }, { "epoch": 0.47084533624714414, "grad_norm": 0.1591796875, "learning_rate": 0.0007623363464785935, "loss": 0.7253, "step": 9480 }, { "epoch": 0.4713420085427635, "grad_norm": 0.150390625, "learning_rate": 0.0007622966126949439, "loss": 0.7233, "step": 9490 }, { "epoch": 0.4718386808383828, "grad_norm": 0.173828125, "learning_rate": 0.0007622568789112943, "loss": 0.7241, "step": 9500 }, { "epoch": 0.4723353531340022, "grad_norm": 0.1552734375, "learning_rate": 0.0007622171451276449, "loss": 0.7205, "step": 9510 }, { "epoch": 0.47283202542962155, "grad_norm": 0.1767578125, "learning_rate": 0.0007621774113439953, "loss": 0.7085, "step": 9520 }, { "epoch": 0.4733286977252409, "grad_norm": 0.150390625, "learning_rate": 0.0007621376775603457, "loss": 0.6921, "step": 9530 }, { "epoch": 0.4738253700208602, "grad_norm": 0.13671875, "learning_rate": 0.0007620979437766962, "loss": 0.7269, "step": 9540 }, { "epoch": 0.47432204231647956, "grad_norm": 0.1572265625, "learning_rate": 0.0007620582099930466, "loss": 0.7659, "step": 9550 }, { "epoch": 0.47481871461209896, "grad_norm": 0.1611328125, "learning_rate": 0.000762018476209397, "loss": 0.7237, "step": 9560 }, { "epoch": 0.4753153869077183, "grad_norm": 0.14453125, "learning_rate": 0.0007619787424257476, "loss": 0.7036, "step": 9570 }, { "epoch": 0.47581205920333763, "grad_norm": 0.189453125, "learning_rate": 0.000761939008642098, "loss": 0.7232, "step": 9580 }, { "epoch": 0.47630873149895697, "grad_norm": 0.154296875, "learning_rate": 0.0007618992748584484, "loss": 0.6916, "step": 9590 }, { "epoch": 0.47680540379457637, "grad_norm": 0.1669921875, "learning_rate": 0.0007618595410747989, "loss": 0.6853, "step": 9600 }, { "epoch": 0.4773020760901957, "grad_norm": 0.1689453125, "learning_rate": 0.0007618198072911494, "loss": 0.6997, "step": 9610 }, { "epoch": 0.47779874838581504, "grad_norm": 0.1552734375, "learning_rate": 0.0007617800735074998, "loss": 0.7772, "step": 9620 }, { "epoch": 0.4782954206814344, "grad_norm": 0.15625, "learning_rate": 0.0007617403397238502, "loss": 0.7037, "step": 9630 }, { "epoch": 0.4787920929770537, "grad_norm": 0.16015625, "learning_rate": 0.0007617006059402007, "loss": 0.7249, "step": 9640 }, { "epoch": 0.4792887652726731, "grad_norm": 0.1435546875, "learning_rate": 0.0007616608721565511, "loss": 0.7063, "step": 9650 }, { "epoch": 0.47978543756829245, "grad_norm": 0.1533203125, "learning_rate": 0.0007616211383729015, "loss": 0.6947, "step": 9660 }, { "epoch": 0.4802821098639118, "grad_norm": 0.1572265625, "learning_rate": 0.0007615814045892521, "loss": 0.6985, "step": 9670 }, { "epoch": 0.4807787821595311, "grad_norm": 0.16796875, "learning_rate": 0.0007615416708056025, "loss": 0.7313, "step": 9680 }, { "epoch": 0.48127545445515046, "grad_norm": 0.140625, "learning_rate": 0.0007615019370219529, "loss": 0.708, "step": 9690 }, { "epoch": 0.48177212675076986, "grad_norm": 0.142578125, "learning_rate": 0.0007614622032383034, "loss": 0.7357, "step": 9700 }, { "epoch": 0.4822687990463892, "grad_norm": 0.1474609375, "learning_rate": 0.0007614224694546539, "loss": 0.7357, "step": 9710 }, { "epoch": 0.48276547134200853, "grad_norm": 0.1513671875, "learning_rate": 0.0007613827356710043, "loss": 0.6737, "step": 9720 }, { "epoch": 0.4832621436376279, "grad_norm": 0.166015625, "learning_rate": 0.0007613430018873548, "loss": 0.6925, "step": 9730 }, { "epoch": 0.48375881593324727, "grad_norm": 0.1484375, "learning_rate": 0.0007613032681037052, "loss": 0.6754, "step": 9740 }, { "epoch": 0.4842554882288666, "grad_norm": 0.1484375, "learning_rate": 0.0007612635343200556, "loss": 0.7491, "step": 9750 }, { "epoch": 0.48475216052448594, "grad_norm": 0.1591796875, "learning_rate": 0.0007612238005364062, "loss": 0.7246, "step": 9760 }, { "epoch": 0.4852488328201053, "grad_norm": 0.158203125, "learning_rate": 0.0007611840667527566, "loss": 0.6921, "step": 9770 }, { "epoch": 0.4857455051157246, "grad_norm": 0.1572265625, "learning_rate": 0.000761144332969107, "loss": 0.6853, "step": 9780 }, { "epoch": 0.486242177411344, "grad_norm": 0.1474609375, "learning_rate": 0.0007611045991854575, "loss": 0.6736, "step": 9790 }, { "epoch": 0.48673884970696335, "grad_norm": 0.138671875, "learning_rate": 0.0007610648654018079, "loss": 0.7246, "step": 9800 }, { "epoch": 0.4872355220025827, "grad_norm": 0.1455078125, "learning_rate": 0.0007610251316181583, "loss": 0.7048, "step": 9810 }, { "epoch": 0.487732194298202, "grad_norm": 0.1630859375, "learning_rate": 0.0007609853978345088, "loss": 0.688, "step": 9820 }, { "epoch": 0.4882288665938214, "grad_norm": 0.1435546875, "learning_rate": 0.0007609456640508593, "loss": 0.7032, "step": 9830 }, { "epoch": 0.48872553888944076, "grad_norm": 0.150390625, "learning_rate": 0.0007609059302672097, "loss": 0.6997, "step": 9840 }, { "epoch": 0.4892222111850601, "grad_norm": 0.1591796875, "learning_rate": 0.0007608661964835601, "loss": 0.6674, "step": 9850 }, { "epoch": 0.48971888348067943, "grad_norm": 0.1416015625, "learning_rate": 0.0007608264626999106, "loss": 0.7512, "step": 9860 }, { "epoch": 0.4902155557762988, "grad_norm": 0.150390625, "learning_rate": 0.0007607867289162612, "loss": 0.7171, "step": 9870 }, { "epoch": 0.49071222807191817, "grad_norm": 0.158203125, "learning_rate": 0.0007607469951326115, "loss": 0.7149, "step": 9880 }, { "epoch": 0.4912089003675375, "grad_norm": 0.140625, "learning_rate": 0.000760707261348962, "loss": 0.6987, "step": 9890 }, { "epoch": 0.49170557266315684, "grad_norm": 0.154296875, "learning_rate": 0.0007606675275653124, "loss": 0.728, "step": 9900 }, { "epoch": 0.4922022449587762, "grad_norm": 0.13671875, "learning_rate": 0.0007606277937816628, "loss": 0.669, "step": 9910 }, { "epoch": 0.4926989172543956, "grad_norm": 0.1591796875, "learning_rate": 0.0007605880599980134, "loss": 0.6901, "step": 9920 }, { "epoch": 0.4931955895500149, "grad_norm": 0.1533203125, "learning_rate": 0.0007605483262143638, "loss": 0.7053, "step": 9930 }, { "epoch": 0.49369226184563425, "grad_norm": 0.171875, "learning_rate": 0.0007605085924307142, "loss": 0.7132, "step": 9940 }, { "epoch": 0.4941889341412536, "grad_norm": 0.11962890625, "learning_rate": 0.0007604688586470647, "loss": 0.6702, "step": 9950 }, { "epoch": 0.4946856064368729, "grad_norm": 0.126953125, "learning_rate": 0.0007604291248634151, "loss": 0.7449, "step": 9960 }, { "epoch": 0.4951822787324923, "grad_norm": 0.1826171875, "learning_rate": 0.0007603893910797656, "loss": 0.6888, "step": 9970 }, { "epoch": 0.49567895102811166, "grad_norm": 0.142578125, "learning_rate": 0.0007603496572961161, "loss": 0.7041, "step": 9980 }, { "epoch": 0.496175623323731, "grad_norm": 0.1533203125, "learning_rate": 0.0007603099235124665, "loss": 0.7158, "step": 9990 }, { "epoch": 0.49667229561935033, "grad_norm": 0.15234375, "learning_rate": 0.000760270189728817, "loss": 0.7205, "step": 10000 }, { "epoch": 0.49716896791496973, "grad_norm": 0.130859375, "learning_rate": 0.0007602304559451674, "loss": 0.7384, "step": 10010 }, { "epoch": 0.49766564021058907, "grad_norm": 0.1376953125, "learning_rate": 0.0007601907221615179, "loss": 0.7441, "step": 10020 }, { "epoch": 0.4981623125062084, "grad_norm": 0.1416015625, "learning_rate": 0.0007601509883778684, "loss": 0.6916, "step": 10030 }, { "epoch": 0.49865898480182774, "grad_norm": 0.15234375, "learning_rate": 0.0007601112545942187, "loss": 0.7187, "step": 10040 }, { "epoch": 0.4991556570974471, "grad_norm": 0.1259765625, "learning_rate": 0.0007600715208105692, "loss": 0.7011, "step": 10050 }, { "epoch": 0.4996523293930665, "grad_norm": 0.13671875, "learning_rate": 0.0007600317870269198, "loss": 0.6988, "step": 10060 }, { "epoch": 0.5001490016886858, "grad_norm": 0.1328125, "learning_rate": 0.0007599920532432701, "loss": 0.6829, "step": 10070 }, { "epoch": 0.5006456739843052, "grad_norm": 0.1318359375, "learning_rate": 0.0007599523194596206, "loss": 0.7256, "step": 10080 }, { "epoch": 0.5011423462799245, "grad_norm": 0.134765625, "learning_rate": 0.000759912585675971, "loss": 0.741, "step": 10090 }, { "epoch": 0.5016390185755438, "grad_norm": 0.126953125, "learning_rate": 0.0007598728518923214, "loss": 0.6929, "step": 10100 }, { "epoch": 0.5021356908711632, "grad_norm": 0.13671875, "learning_rate": 0.0007598331181086719, "loss": 0.7239, "step": 10110 }, { "epoch": 0.5026323631667825, "grad_norm": 0.1572265625, "learning_rate": 0.0007597933843250224, "loss": 0.7027, "step": 10120 }, { "epoch": 0.503129035462402, "grad_norm": 0.1259765625, "learning_rate": 0.0007597536505413728, "loss": 0.6563, "step": 10130 }, { "epoch": 0.5036257077580213, "grad_norm": 0.140625, "learning_rate": 0.0007597139167577233, "loss": 0.727, "step": 10140 }, { "epoch": 0.5041223800536406, "grad_norm": 0.234375, "learning_rate": 0.0007596741829740737, "loss": 0.7071, "step": 10150 }, { "epoch": 0.50461905234926, "grad_norm": 0.1376953125, "learning_rate": 0.0007596344491904242, "loss": 0.7103, "step": 10160 }, { "epoch": 0.5051157246448793, "grad_norm": 0.154296875, "learning_rate": 0.0007595947154067747, "loss": 0.6813, "step": 10170 }, { "epoch": 0.5056123969404986, "grad_norm": 0.1416015625, "learning_rate": 0.0007595549816231251, "loss": 0.7, "step": 10180 }, { "epoch": 0.506109069236118, "grad_norm": 0.146484375, "learning_rate": 0.0007595152478394756, "loss": 0.7448, "step": 10190 }, { "epoch": 0.5066057415317373, "grad_norm": 0.181640625, "learning_rate": 0.000759475514055826, "loss": 0.6848, "step": 10200 }, { "epoch": 0.5071024138273567, "grad_norm": 0.150390625, "learning_rate": 0.0007594357802721764, "loss": 0.7022, "step": 10210 }, { "epoch": 0.5075990861229761, "grad_norm": 0.1455078125, "learning_rate": 0.000759396046488527, "loss": 0.6825, "step": 10220 }, { "epoch": 0.5080957584185954, "grad_norm": 0.1435546875, "learning_rate": 0.0007593563127048773, "loss": 0.6914, "step": 10230 }, { "epoch": 0.5085924307142148, "grad_norm": 0.134765625, "learning_rate": 0.0007593165789212278, "loss": 0.737, "step": 10240 }, { "epoch": 0.5090891030098341, "grad_norm": 0.140625, "learning_rate": 0.0007592768451375783, "loss": 0.6901, "step": 10250 }, { "epoch": 0.5095857753054535, "grad_norm": 0.1474609375, "learning_rate": 0.0007592371113539286, "loss": 0.7328, "step": 10260 }, { "epoch": 0.5100824476010728, "grad_norm": 0.123046875, "learning_rate": 0.0007591973775702792, "loss": 0.7049, "step": 10270 }, { "epoch": 0.5105791198966921, "grad_norm": 0.1484375, "learning_rate": 0.0007591576437866297, "loss": 0.6783, "step": 10280 }, { "epoch": 0.5110757921923115, "grad_norm": 0.14453125, "learning_rate": 0.00075911791000298, "loss": 0.7185, "step": 10290 }, { "epoch": 0.5115724644879308, "grad_norm": 0.1435546875, "learning_rate": 0.0007590781762193305, "loss": 0.7185, "step": 10300 }, { "epoch": 0.5120691367835503, "grad_norm": 0.1357421875, "learning_rate": 0.0007590384424356809, "loss": 0.7252, "step": 10310 }, { "epoch": 0.5125658090791696, "grad_norm": 0.140625, "learning_rate": 0.0007589987086520315, "loss": 0.7113, "step": 10320 }, { "epoch": 0.5130624813747889, "grad_norm": 0.1259765625, "learning_rate": 0.0007589589748683819, "loss": 0.6794, "step": 10330 }, { "epoch": 0.5135591536704083, "grad_norm": 0.146484375, "learning_rate": 0.0007589192410847323, "loss": 0.7233, "step": 10340 }, { "epoch": 0.5140558259660276, "grad_norm": 0.1279296875, "learning_rate": 0.0007588795073010828, "loss": 0.6812, "step": 10350 }, { "epoch": 0.514552498261647, "grad_norm": 0.130859375, "learning_rate": 0.0007588397735174332, "loss": 0.7091, "step": 10360 }, { "epoch": 0.5150491705572663, "grad_norm": 0.13671875, "learning_rate": 0.0007588000397337837, "loss": 0.734, "step": 10370 }, { "epoch": 0.5155458428528856, "grad_norm": 0.1259765625, "learning_rate": 0.0007587603059501342, "loss": 0.7, "step": 10380 }, { "epoch": 0.516042515148505, "grad_norm": 0.140625, "learning_rate": 0.0007587205721664846, "loss": 0.6981, "step": 10390 }, { "epoch": 0.5165391874441244, "grad_norm": 0.1611328125, "learning_rate": 0.000758680838382835, "loss": 0.7074, "step": 10400 }, { "epoch": 0.5170358597397438, "grad_norm": 0.1357421875, "learning_rate": 0.0007586411045991855, "loss": 0.722, "step": 10410 }, { "epoch": 0.5175325320353631, "grad_norm": 0.126953125, "learning_rate": 0.0007586013708155359, "loss": 0.7269, "step": 10420 }, { "epoch": 0.5180292043309824, "grad_norm": 0.1279296875, "learning_rate": 0.0007585616370318864, "loss": 0.7256, "step": 10430 }, { "epoch": 0.5185258766266018, "grad_norm": 0.1572265625, "learning_rate": 0.0007585219032482369, "loss": 0.711, "step": 10440 }, { "epoch": 0.5190225489222211, "grad_norm": 0.1328125, "learning_rate": 0.0007584821694645873, "loss": 0.6722, "step": 10450 }, { "epoch": 0.5195192212178404, "grad_norm": 0.130859375, "learning_rate": 0.0007584424356809377, "loss": 0.6736, "step": 10460 }, { "epoch": 0.5200158935134598, "grad_norm": 0.1357421875, "learning_rate": 0.0007584027018972883, "loss": 0.6942, "step": 10470 }, { "epoch": 0.5205125658090791, "grad_norm": 0.1240234375, "learning_rate": 0.0007583629681136387, "loss": 0.6936, "step": 10480 }, { "epoch": 0.5210092381046986, "grad_norm": 0.125, "learning_rate": 0.0007583232343299891, "loss": 0.7083, "step": 10490 }, { "epoch": 0.5215059104003179, "grad_norm": 0.134765625, "learning_rate": 0.0007582835005463395, "loss": 0.7019, "step": 10500 }, { "epoch": 0.5220025826959372, "grad_norm": 0.126953125, "learning_rate": 0.00075824376676269, "loss": 0.7379, "step": 10510 }, { "epoch": 0.5224992549915566, "grad_norm": 0.142578125, "learning_rate": 0.0007582040329790405, "loss": 0.6855, "step": 10520 }, { "epoch": 0.5229959272871759, "grad_norm": 0.1376953125, "learning_rate": 0.0007581642991953909, "loss": 0.7362, "step": 10530 }, { "epoch": 0.5234925995827953, "grad_norm": 0.1474609375, "learning_rate": 0.0007581245654117414, "loss": 0.6975, "step": 10540 }, { "epoch": 0.5239892718784146, "grad_norm": 0.1591796875, "learning_rate": 0.0007580848316280918, "loss": 0.668, "step": 10550 }, { "epoch": 0.5244859441740339, "grad_norm": 0.1259765625, "learning_rate": 0.0007580450978444422, "loss": 0.7077, "step": 10560 }, { "epoch": 0.5249826164696533, "grad_norm": 0.1533203125, "learning_rate": 0.0007580053640607928, "loss": 0.7183, "step": 10570 }, { "epoch": 0.5254792887652727, "grad_norm": 0.1494140625, "learning_rate": 0.0007579656302771432, "loss": 0.6935, "step": 10580 }, { "epoch": 0.5259759610608921, "grad_norm": 0.134765625, "learning_rate": 0.0007579258964934936, "loss": 0.7131, "step": 10590 }, { "epoch": 0.5264726333565114, "grad_norm": 0.1240234375, "learning_rate": 0.0007578861627098441, "loss": 0.6672, "step": 10600 }, { "epoch": 0.5269693056521307, "grad_norm": 0.12890625, "learning_rate": 0.0007578464289261945, "loss": 0.6699, "step": 10610 }, { "epoch": 0.5274659779477501, "grad_norm": 0.1279296875, "learning_rate": 0.000757806695142545, "loss": 0.6993, "step": 10620 }, { "epoch": 0.5279626502433694, "grad_norm": 0.12353515625, "learning_rate": 0.0007577669613588955, "loss": 0.6946, "step": 10630 }, { "epoch": 0.5284593225389888, "grad_norm": 0.1298828125, "learning_rate": 0.0007577272275752459, "loss": 0.6683, "step": 10640 }, { "epoch": 0.5289559948346081, "grad_norm": 0.1279296875, "learning_rate": 0.0007576874937915963, "loss": 0.7298, "step": 10650 }, { "epoch": 0.5294526671302274, "grad_norm": 0.1552734375, "learning_rate": 0.0007576477600079468, "loss": 0.6767, "step": 10660 }, { "epoch": 0.5299493394258469, "grad_norm": 0.166015625, "learning_rate": 0.0007576080262242973, "loss": 0.7152, "step": 10670 }, { "epoch": 0.5304460117214662, "grad_norm": 0.1279296875, "learning_rate": 0.0007575682924406477, "loss": 0.7154, "step": 10680 }, { "epoch": 0.5309426840170856, "grad_norm": 0.126953125, "learning_rate": 0.0007575285586569981, "loss": 0.6851, "step": 10690 }, { "epoch": 0.5314393563127049, "grad_norm": 0.1298828125, "learning_rate": 0.0007574888248733486, "loss": 0.7228, "step": 10700 }, { "epoch": 0.5319360286083242, "grad_norm": 0.13671875, "learning_rate": 0.000757449091089699, "loss": 0.6955, "step": 10710 }, { "epoch": 0.5324327009039436, "grad_norm": 0.11767578125, "learning_rate": 0.0007574093573060495, "loss": 0.6805, "step": 10720 }, { "epoch": 0.5329293731995629, "grad_norm": 0.1220703125, "learning_rate": 0.0007573696235224, "loss": 0.6945, "step": 10730 }, { "epoch": 0.5334260454951822, "grad_norm": 0.1474609375, "learning_rate": 0.0007573298897387504, "loss": 0.6913, "step": 10740 }, { "epoch": 0.5339227177908016, "grad_norm": 0.12451171875, "learning_rate": 0.0007572901559551008, "loss": 0.6735, "step": 10750 }, { "epoch": 0.534419390086421, "grad_norm": 0.13671875, "learning_rate": 0.0007572504221714513, "loss": 0.7134, "step": 10760 }, { "epoch": 0.5349160623820404, "grad_norm": 0.140625, "learning_rate": 0.0007572106883878018, "loss": 0.6831, "step": 10770 }, { "epoch": 0.5354127346776597, "grad_norm": 0.1533203125, "learning_rate": 0.0007571709546041522, "loss": 0.7085, "step": 10780 }, { "epoch": 0.535909406973279, "grad_norm": 0.12353515625, "learning_rate": 0.0007571312208205027, "loss": 0.6748, "step": 10790 }, { "epoch": 0.5364060792688984, "grad_norm": 0.12353515625, "learning_rate": 0.0007570914870368531, "loss": 0.7158, "step": 10800 }, { "epoch": 0.5369027515645177, "grad_norm": 0.1298828125, "learning_rate": 0.0007570517532532035, "loss": 0.7052, "step": 10810 }, { "epoch": 0.5373994238601371, "grad_norm": 0.130859375, "learning_rate": 0.0007570120194695541, "loss": 0.7261, "step": 10820 }, { "epoch": 0.5378960961557564, "grad_norm": 0.1259765625, "learning_rate": 0.0007569722856859045, "loss": 0.7455, "step": 10830 }, { "epoch": 0.5383927684513757, "grad_norm": 0.1240234375, "learning_rate": 0.0007569325519022549, "loss": 0.6688, "step": 10840 }, { "epoch": 0.5388894407469952, "grad_norm": 0.1318359375, "learning_rate": 0.0007568928181186054, "loss": 0.6693, "step": 10850 }, { "epoch": 0.5393861130426145, "grad_norm": 0.1162109375, "learning_rate": 0.0007568530843349558, "loss": 0.7121, "step": 10860 }, { "epoch": 0.5398827853382339, "grad_norm": 0.1181640625, "learning_rate": 0.0007568133505513063, "loss": 0.6963, "step": 10870 }, { "epoch": 0.5403794576338532, "grad_norm": 0.1376953125, "learning_rate": 0.0007567736167676568, "loss": 0.7068, "step": 10880 }, { "epoch": 0.5408761299294725, "grad_norm": 0.12255859375, "learning_rate": 0.0007567338829840072, "loss": 0.6676, "step": 10890 }, { "epoch": 0.5413728022250919, "grad_norm": 0.125, "learning_rate": 0.0007566941492003577, "loss": 0.6815, "step": 10900 }, { "epoch": 0.5418694745207112, "grad_norm": 0.1357421875, "learning_rate": 0.000756654415416708, "loss": 0.7308, "step": 10910 }, { "epoch": 0.5423661468163306, "grad_norm": 0.1318359375, "learning_rate": 0.0007566146816330586, "loss": 0.6691, "step": 10920 }, { "epoch": 0.5428628191119499, "grad_norm": 0.12451171875, "learning_rate": 0.0007565749478494091, "loss": 0.7001, "step": 10930 }, { "epoch": 0.5433594914075693, "grad_norm": 0.1396484375, "learning_rate": 0.0007565352140657594, "loss": 0.6703, "step": 10940 }, { "epoch": 0.5438561637031887, "grad_norm": 0.1318359375, "learning_rate": 0.0007564954802821099, "loss": 0.6909, "step": 10950 }, { "epoch": 0.544352835998808, "grad_norm": 0.1318359375, "learning_rate": 0.0007564557464984603, "loss": 0.6848, "step": 10960 }, { "epoch": 0.5448495082944274, "grad_norm": 0.1357421875, "learning_rate": 0.0007564160127148107, "loss": 0.6682, "step": 10970 }, { "epoch": 0.5453461805900467, "grad_norm": 0.15625, "learning_rate": 0.0007563762789311613, "loss": 0.7165, "step": 10980 }, { "epoch": 0.545842852885666, "grad_norm": 0.12890625, "learning_rate": 0.0007563365451475117, "loss": 0.6916, "step": 10990 }, { "epoch": 0.5463395251812854, "grad_norm": 0.13671875, "learning_rate": 0.0007562968113638621, "loss": 0.7004, "step": 11000 }, { "epoch": 0.5468361974769047, "grad_norm": 0.1650390625, "learning_rate": 0.0007562570775802126, "loss": 0.7482, "step": 11010 }, { "epoch": 0.547332869772524, "grad_norm": 0.115234375, "learning_rate": 0.000756217343796563, "loss": 0.6794, "step": 11020 }, { "epoch": 0.5478295420681435, "grad_norm": 0.1357421875, "learning_rate": 0.0007561776100129135, "loss": 0.7104, "step": 11030 }, { "epoch": 0.5483262143637628, "grad_norm": 0.11083984375, "learning_rate": 0.000756137876229264, "loss": 0.681, "step": 11040 }, { "epoch": 0.5488228866593822, "grad_norm": 0.12255859375, "learning_rate": 0.0007560981424456144, "loss": 0.6743, "step": 11050 }, { "epoch": 0.5493195589550015, "grad_norm": 0.12158203125, "learning_rate": 0.0007560584086619649, "loss": 0.7129, "step": 11060 }, { "epoch": 0.5498162312506208, "grad_norm": 0.1513671875, "learning_rate": 0.0007560186748783154, "loss": 0.7309, "step": 11070 }, { "epoch": 0.5503129035462402, "grad_norm": 0.1376953125, "learning_rate": 0.0007559789410946658, "loss": 0.6973, "step": 11080 }, { "epoch": 0.5508095758418595, "grad_norm": 0.134765625, "learning_rate": 0.0007559392073110163, "loss": 0.6736, "step": 11090 }, { "epoch": 0.5513062481374789, "grad_norm": 0.12158203125, "learning_rate": 0.0007558994735273666, "loss": 0.6728, "step": 11100 }, { "epoch": 0.5518029204330982, "grad_norm": 0.11865234375, "learning_rate": 0.0007558597397437171, "loss": 0.6845, "step": 11110 }, { "epoch": 0.5522995927287176, "grad_norm": 0.12158203125, "learning_rate": 0.0007558200059600677, "loss": 0.6922, "step": 11120 }, { "epoch": 0.552796265024337, "grad_norm": 0.1142578125, "learning_rate": 0.000755780272176418, "loss": 0.6936, "step": 11130 }, { "epoch": 0.5532929373199563, "grad_norm": 0.11669921875, "learning_rate": 0.0007557405383927685, "loss": 0.6968, "step": 11140 }, { "epoch": 0.5537896096155757, "grad_norm": 0.12109375, "learning_rate": 0.000755700804609119, "loss": 0.6866, "step": 11150 }, { "epoch": 0.554286281911195, "grad_norm": 0.1279296875, "learning_rate": 0.0007556610708254693, "loss": 0.6958, "step": 11160 }, { "epoch": 0.5547829542068143, "grad_norm": 0.130859375, "learning_rate": 0.0007556213370418198, "loss": 0.6782, "step": 11170 }, { "epoch": 0.5552796265024337, "grad_norm": 0.130859375, "learning_rate": 0.0007555816032581703, "loss": 0.6872, "step": 11180 }, { "epoch": 0.555776298798053, "grad_norm": 0.1279296875, "learning_rate": 0.0007555418694745208, "loss": 0.6499, "step": 11190 }, { "epoch": 0.5562729710936724, "grad_norm": 0.1298828125, "learning_rate": 0.0007555021356908712, "loss": 0.6933, "step": 11200 }, { "epoch": 0.5567696433892917, "grad_norm": 0.12451171875, "learning_rate": 0.0007554624019072216, "loss": 0.7007, "step": 11210 }, { "epoch": 0.5572663156849111, "grad_norm": 0.1279296875, "learning_rate": 0.0007554226681235722, "loss": 0.7212, "step": 11220 }, { "epoch": 0.5577629879805305, "grad_norm": 0.1240234375, "learning_rate": 0.0007553829343399226, "loss": 0.6869, "step": 11230 }, { "epoch": 0.5582596602761498, "grad_norm": 0.12353515625, "learning_rate": 0.000755343200556273, "loss": 0.7108, "step": 11240 }, { "epoch": 0.5587563325717692, "grad_norm": 0.126953125, "learning_rate": 0.0007553034667726235, "loss": 0.7133, "step": 11250 }, { "epoch": 0.5592530048673885, "grad_norm": 0.125, "learning_rate": 0.0007552637329889739, "loss": 0.6634, "step": 11260 }, { "epoch": 0.5597496771630078, "grad_norm": 0.1201171875, "learning_rate": 0.0007552239992053243, "loss": 0.7046, "step": 11270 }, { "epoch": 0.5602463494586272, "grad_norm": 0.126953125, "learning_rate": 0.0007551842654216749, "loss": 0.6988, "step": 11280 }, { "epoch": 0.5607430217542465, "grad_norm": 0.1279296875, "learning_rate": 0.0007551445316380252, "loss": 0.681, "step": 11290 }, { "epoch": 0.5612396940498658, "grad_norm": 0.12890625, "learning_rate": 0.0007551047978543757, "loss": 0.7067, "step": 11300 }, { "epoch": 0.5617363663454853, "grad_norm": 0.11328125, "learning_rate": 0.0007550650640707262, "loss": 0.6846, "step": 11310 }, { "epoch": 0.5622330386411046, "grad_norm": 0.11083984375, "learning_rate": 0.0007550253302870765, "loss": 0.6974, "step": 11320 }, { "epoch": 0.562729710936724, "grad_norm": 0.1337890625, "learning_rate": 0.0007549855965034271, "loss": 0.6877, "step": 11330 }, { "epoch": 0.5632263832323433, "grad_norm": 0.10498046875, "learning_rate": 0.0007549458627197776, "loss": 0.6674, "step": 11340 }, { "epoch": 0.5637230555279626, "grad_norm": 0.12255859375, "learning_rate": 0.000754906128936128, "loss": 0.6544, "step": 11350 }, { "epoch": 0.564219727823582, "grad_norm": 0.123046875, "learning_rate": 0.0007548663951524784, "loss": 0.6775, "step": 11360 }, { "epoch": 0.5647164001192013, "grad_norm": 0.1201171875, "learning_rate": 0.0007548266613688288, "loss": 0.6838, "step": 11370 }, { "epoch": 0.5652130724148207, "grad_norm": 0.14453125, "learning_rate": 0.0007547869275851794, "loss": 0.6775, "step": 11380 }, { "epoch": 0.56570974471044, "grad_norm": 0.11181640625, "learning_rate": 0.0007547471938015298, "loss": 0.6907, "step": 11390 }, { "epoch": 0.5662064170060594, "grad_norm": 0.119140625, "learning_rate": 0.0007547074600178802, "loss": 0.669, "step": 11400 }, { "epoch": 0.5667030893016788, "grad_norm": 0.11865234375, "learning_rate": 0.0007546677262342307, "loss": 0.681, "step": 11410 }, { "epoch": 0.5671997615972981, "grad_norm": 0.11474609375, "learning_rate": 0.0007546279924505811, "loss": 0.685, "step": 11420 }, { "epoch": 0.5676964338929175, "grad_norm": 0.1259765625, "learning_rate": 0.0007545882586669316, "loss": 0.7101, "step": 11430 }, { "epoch": 0.5681931061885368, "grad_norm": 0.11376953125, "learning_rate": 0.0007545485248832821, "loss": 0.7297, "step": 11440 }, { "epoch": 0.5686897784841561, "grad_norm": 0.10986328125, "learning_rate": 0.0007545087910996325, "loss": 0.7116, "step": 11450 }, { "epoch": 0.5691864507797755, "grad_norm": 0.123046875, "learning_rate": 0.0007544690573159829, "loss": 0.6823, "step": 11460 }, { "epoch": 0.5696831230753948, "grad_norm": 0.1201171875, "learning_rate": 0.0007544293235323334, "loss": 0.7039, "step": 11470 }, { "epoch": 0.5701797953710142, "grad_norm": 0.111328125, "learning_rate": 0.0007543895897486839, "loss": 0.6877, "step": 11480 }, { "epoch": 0.5706764676666336, "grad_norm": 0.1279296875, "learning_rate": 0.0007543498559650343, "loss": 0.7188, "step": 11490 }, { "epoch": 0.5711731399622529, "grad_norm": 0.1162109375, "learning_rate": 0.0007543101221813848, "loss": 0.6998, "step": 11500 }, { "epoch": 0.5716698122578723, "grad_norm": 0.1689453125, "learning_rate": 0.0007542703883977352, "loss": 0.6943, "step": 11510 }, { "epoch": 0.5721664845534916, "grad_norm": 0.12353515625, "learning_rate": 0.0007542306546140856, "loss": 0.6909, "step": 11520 }, { "epoch": 0.572663156849111, "grad_norm": 0.11669921875, "learning_rate": 0.0007541909208304362, "loss": 0.6551, "step": 11530 }, { "epoch": 0.5731598291447303, "grad_norm": 0.15234375, "learning_rate": 0.0007541511870467866, "loss": 0.6907, "step": 11540 }, { "epoch": 0.5736565014403496, "grad_norm": 0.12353515625, "learning_rate": 0.000754111453263137, "loss": 0.6967, "step": 11550 }, { "epoch": 0.574153173735969, "grad_norm": 0.11865234375, "learning_rate": 0.0007540717194794874, "loss": 0.6866, "step": 11560 }, { "epoch": 0.5746498460315883, "grad_norm": 0.1162109375, "learning_rate": 0.0007540319856958379, "loss": 0.6793, "step": 11570 }, { "epoch": 0.5751465183272078, "grad_norm": 0.1240234375, "learning_rate": 0.0007539922519121884, "loss": 0.71, "step": 11580 }, { "epoch": 0.5756431906228271, "grad_norm": 0.12158203125, "learning_rate": 0.0007539525181285388, "loss": 0.6951, "step": 11590 }, { "epoch": 0.5761398629184464, "grad_norm": 0.1201171875, "learning_rate": 0.0007539127843448893, "loss": 0.6879, "step": 11600 }, { "epoch": 0.5766365352140658, "grad_norm": 0.11962890625, "learning_rate": 0.0007538730505612397, "loss": 0.6827, "step": 11610 }, { "epoch": 0.5771332075096851, "grad_norm": 0.11474609375, "learning_rate": 0.0007538333167775901, "loss": 0.666, "step": 11620 }, { "epoch": 0.5776298798053044, "grad_norm": 0.11572265625, "learning_rate": 0.0007537935829939407, "loss": 0.6948, "step": 11630 }, { "epoch": 0.5781265521009238, "grad_norm": 0.10986328125, "learning_rate": 0.0007537538492102911, "loss": 0.6813, "step": 11640 }, { "epoch": 0.5786232243965431, "grad_norm": 0.1181640625, "learning_rate": 0.0007537141154266415, "loss": 0.6757, "step": 11650 }, { "epoch": 0.5791198966921625, "grad_norm": 0.11865234375, "learning_rate": 0.000753674381642992, "loss": 0.7123, "step": 11660 }, { "epoch": 0.5796165689877819, "grad_norm": 0.1328125, "learning_rate": 0.0007536346478593424, "loss": 0.7048, "step": 11670 }, { "epoch": 0.5801132412834012, "grad_norm": 0.11181640625, "learning_rate": 0.0007535949140756929, "loss": 0.6927, "step": 11680 }, { "epoch": 0.5806099135790206, "grad_norm": 0.119140625, "learning_rate": 0.0007535551802920434, "loss": 0.6679, "step": 11690 }, { "epoch": 0.5811065858746399, "grad_norm": 0.1201171875, "learning_rate": 0.0007535154465083938, "loss": 0.729, "step": 11700 }, { "epoch": 0.5816032581702593, "grad_norm": 0.111328125, "learning_rate": 0.0007534757127247442, "loss": 0.7099, "step": 11710 }, { "epoch": 0.5820999304658786, "grad_norm": 0.1240234375, "learning_rate": 0.0007534359789410947, "loss": 0.6524, "step": 11720 }, { "epoch": 0.5825966027614979, "grad_norm": 0.1064453125, "learning_rate": 0.0007533962451574452, "loss": 0.7253, "step": 11730 }, { "epoch": 0.5830932750571173, "grad_norm": 0.11669921875, "learning_rate": 0.0007533565113737956, "loss": 0.6756, "step": 11740 }, { "epoch": 0.5835899473527366, "grad_norm": 0.1298828125, "learning_rate": 0.0007533167775901461, "loss": 0.6726, "step": 11750 }, { "epoch": 0.5840866196483561, "grad_norm": 0.1142578125, "learning_rate": 0.0007532770438064965, "loss": 0.6854, "step": 11760 }, { "epoch": 0.5845832919439754, "grad_norm": 0.1181640625, "learning_rate": 0.0007532373100228469, "loss": 0.6881, "step": 11770 }, { "epoch": 0.5850799642395947, "grad_norm": 0.1201171875, "learning_rate": 0.0007531975762391974, "loss": 0.7094, "step": 11780 }, { "epoch": 0.5855766365352141, "grad_norm": 0.1123046875, "learning_rate": 0.0007531578424555479, "loss": 0.6962, "step": 11790 }, { "epoch": 0.5860733088308334, "grad_norm": 0.10595703125, "learning_rate": 0.0007531181086718984, "loss": 0.7148, "step": 11800 }, { "epoch": 0.5865699811264528, "grad_norm": 0.1259765625, "learning_rate": 0.0007530783748882487, "loss": 0.6397, "step": 11810 }, { "epoch": 0.5870666534220721, "grad_norm": 0.154296875, "learning_rate": 0.0007530386411045992, "loss": 0.6775, "step": 11820 }, { "epoch": 0.5875633257176914, "grad_norm": 0.1220703125, "learning_rate": 0.0007529989073209497, "loss": 0.7079, "step": 11830 }, { "epoch": 0.5880599980133108, "grad_norm": 0.134765625, "learning_rate": 0.0007529591735373001, "loss": 0.7033, "step": 11840 }, { "epoch": 0.5885566703089302, "grad_norm": 0.14453125, "learning_rate": 0.0007529194397536506, "loss": 0.6891, "step": 11850 }, { "epoch": 0.5890533426045496, "grad_norm": 0.1328125, "learning_rate": 0.000752879705970001, "loss": 0.6991, "step": 11860 }, { "epoch": 0.5895500149001689, "grad_norm": 0.126953125, "learning_rate": 0.0007528399721863514, "loss": 0.7138, "step": 11870 }, { "epoch": 0.5900466871957882, "grad_norm": 0.12353515625, "learning_rate": 0.000752800238402702, "loss": 0.694, "step": 11880 }, { "epoch": 0.5905433594914076, "grad_norm": 0.115234375, "learning_rate": 0.0007527605046190524, "loss": 0.6693, "step": 11890 }, { "epoch": 0.5910400317870269, "grad_norm": 0.12451171875, "learning_rate": 0.0007527207708354028, "loss": 0.6626, "step": 11900 }, { "epoch": 0.5915367040826462, "grad_norm": 0.1123046875, "learning_rate": 0.0007526810370517533, "loss": 0.6696, "step": 11910 }, { "epoch": 0.5920333763782656, "grad_norm": 0.138671875, "learning_rate": 0.0007526413032681037, "loss": 0.7056, "step": 11920 }, { "epoch": 0.5925300486738849, "grad_norm": 0.15234375, "learning_rate": 0.0007526015694844542, "loss": 0.7229, "step": 11930 }, { "epoch": 0.5930267209695044, "grad_norm": 0.130859375, "learning_rate": 0.0007525618357008047, "loss": 0.6533, "step": 11940 }, { "epoch": 0.5935233932651237, "grad_norm": 0.119140625, "learning_rate": 0.0007525221019171551, "loss": 0.6976, "step": 11950 }, { "epoch": 0.594020065560743, "grad_norm": 0.12353515625, "learning_rate": 0.0007524823681335056, "loss": 0.6907, "step": 11960 }, { "epoch": 0.5945167378563624, "grad_norm": 0.12109375, "learning_rate": 0.0007524426343498559, "loss": 0.6898, "step": 11970 }, { "epoch": 0.5950134101519817, "grad_norm": 0.12158203125, "learning_rate": 0.0007524029005662065, "loss": 0.6755, "step": 11980 }, { "epoch": 0.5955100824476011, "grad_norm": 0.1162109375, "learning_rate": 0.000752363166782557, "loss": 0.7117, "step": 11990 }, { "epoch": 0.5960067547432204, "grad_norm": 0.12109375, "learning_rate": 0.0007523234329989073, "loss": 0.6984, "step": 12000 }, { "epoch": 0.5965034270388397, "grad_norm": 0.15234375, "learning_rate": 0.0007522836992152578, "loss": 0.693, "step": 12010 }, { "epoch": 0.5970000993344591, "grad_norm": 0.11572265625, "learning_rate": 0.0007522439654316083, "loss": 0.6968, "step": 12020 }, { "epoch": 0.5974967716300785, "grad_norm": 0.11181640625, "learning_rate": 0.0007522042316479587, "loss": 0.726, "step": 12030 }, { "epoch": 0.5979934439256979, "grad_norm": 0.109375, "learning_rate": 0.0007521644978643092, "loss": 0.6597, "step": 12040 }, { "epoch": 0.5984901162213172, "grad_norm": 0.1396484375, "learning_rate": 0.0007521247640806596, "loss": 0.6991, "step": 12050 }, { "epoch": 0.5989867885169365, "grad_norm": 0.11767578125, "learning_rate": 0.00075208503029701, "loss": 0.69, "step": 12060 }, { "epoch": 0.5994834608125559, "grad_norm": 0.1357421875, "learning_rate": 0.0007520452965133605, "loss": 0.6338, "step": 12070 }, { "epoch": 0.5999801331081752, "grad_norm": 0.123046875, "learning_rate": 0.000752005562729711, "loss": 0.6749, "step": 12080 }, { "epoch": 0.6004768054037946, "grad_norm": 0.109375, "learning_rate": 0.0007519658289460615, "loss": 0.6509, "step": 12090 }, { "epoch": 0.6009734776994139, "grad_norm": 0.1279296875, "learning_rate": 0.0007519260951624119, "loss": 0.6835, "step": 12100 }, { "epoch": 0.6014701499950332, "grad_norm": 0.11083984375, "learning_rate": 0.0007518863613787623, "loss": 0.6922, "step": 12110 }, { "epoch": 0.6019668222906527, "grad_norm": 0.1220703125, "learning_rate": 0.0007518466275951128, "loss": 0.6965, "step": 12120 }, { "epoch": 0.602463494586272, "grad_norm": 0.11865234375, "learning_rate": 0.0007518068938114633, "loss": 0.6965, "step": 12130 }, { "epoch": 0.6029601668818914, "grad_norm": 0.1435546875, "learning_rate": 0.0007517671600278137, "loss": 0.6884, "step": 12140 }, { "epoch": 0.6034568391775107, "grad_norm": 0.11572265625, "learning_rate": 0.0007517274262441642, "loss": 0.6822, "step": 12150 }, { "epoch": 0.60395351147313, "grad_norm": 0.11669921875, "learning_rate": 0.0007516876924605145, "loss": 0.6637, "step": 12160 }, { "epoch": 0.6044501837687494, "grad_norm": 0.11962890625, "learning_rate": 0.000751647958676865, "loss": 0.6784, "step": 12170 }, { "epoch": 0.6049468560643687, "grad_norm": 0.1181640625, "learning_rate": 0.0007516082248932156, "loss": 0.6741, "step": 12180 }, { "epoch": 0.605443528359988, "grad_norm": 0.11962890625, "learning_rate": 0.0007515684911095659, "loss": 0.6529, "step": 12190 }, { "epoch": 0.6059402006556074, "grad_norm": 0.11279296875, "learning_rate": 0.0007515287573259164, "loss": 0.6806, "step": 12200 }, { "epoch": 0.6064368729512268, "grad_norm": 0.111328125, "learning_rate": 0.0007514890235422669, "loss": 0.6758, "step": 12210 }, { "epoch": 0.6069335452468462, "grad_norm": 0.1357421875, "learning_rate": 0.0007514492897586172, "loss": 0.7012, "step": 12220 }, { "epoch": 0.6074302175424655, "grad_norm": 0.1259765625, "learning_rate": 0.0007514095559749678, "loss": 0.7264, "step": 12230 }, { "epoch": 0.6079268898380848, "grad_norm": 0.1240234375, "learning_rate": 0.0007513698221913182, "loss": 0.6513, "step": 12240 }, { "epoch": 0.6084235621337042, "grad_norm": 0.11279296875, "learning_rate": 0.0007513300884076687, "loss": 0.6646, "step": 12250 }, { "epoch": 0.6089202344293235, "grad_norm": 0.11865234375, "learning_rate": 0.0007512903546240191, "loss": 0.6786, "step": 12260 }, { "epoch": 0.6094169067249429, "grad_norm": 0.1259765625, "learning_rate": 0.0007512506208403695, "loss": 0.6541, "step": 12270 }, { "epoch": 0.6099135790205622, "grad_norm": 0.11767578125, "learning_rate": 0.0007512108870567201, "loss": 0.6807, "step": 12280 }, { "epoch": 0.6104102513161815, "grad_norm": 0.12890625, "learning_rate": 0.0007511711532730705, "loss": 0.6737, "step": 12290 }, { "epoch": 0.6109069236118009, "grad_norm": 0.11572265625, "learning_rate": 0.0007511314194894209, "loss": 0.6765, "step": 12300 }, { "epoch": 0.6114035959074203, "grad_norm": 0.10888671875, "learning_rate": 0.0007510916857057714, "loss": 0.7203, "step": 12310 }, { "epoch": 0.6119002682030397, "grad_norm": 0.12255859375, "learning_rate": 0.0007510519519221218, "loss": 0.665, "step": 12320 }, { "epoch": 0.612396940498659, "grad_norm": 0.1181640625, "learning_rate": 0.0007510122181384722, "loss": 0.6752, "step": 12330 }, { "epoch": 0.6128936127942783, "grad_norm": 0.140625, "learning_rate": 0.0007509724843548228, "loss": 0.6806, "step": 12340 }, { "epoch": 0.6133902850898977, "grad_norm": 0.11181640625, "learning_rate": 0.0007509327505711731, "loss": 0.6697, "step": 12350 }, { "epoch": 0.613886957385517, "grad_norm": 0.11767578125, "learning_rate": 0.0007508930167875236, "loss": 0.6807, "step": 12360 }, { "epoch": 0.6143836296811364, "grad_norm": 0.1103515625, "learning_rate": 0.0007508532830038741, "loss": 0.6599, "step": 12370 }, { "epoch": 0.6148803019767557, "grad_norm": 0.125, "learning_rate": 0.0007508135492202244, "loss": 0.6782, "step": 12380 }, { "epoch": 0.615376974272375, "grad_norm": 0.126953125, "learning_rate": 0.000750773815436575, "loss": 0.6866, "step": 12390 }, { "epoch": 0.6158736465679945, "grad_norm": 0.10986328125, "learning_rate": 0.0007507340816529255, "loss": 0.6832, "step": 12400 }, { "epoch": 0.6163703188636138, "grad_norm": 0.10791015625, "learning_rate": 0.0007506943478692759, "loss": 0.6741, "step": 12410 }, { "epoch": 0.6168669911592332, "grad_norm": 0.1025390625, "learning_rate": 0.0007506546140856263, "loss": 0.6638, "step": 12420 }, { "epoch": 0.6173636634548525, "grad_norm": 0.134765625, "learning_rate": 0.0007506148803019767, "loss": 0.6734, "step": 12430 }, { "epoch": 0.6178603357504718, "grad_norm": 0.1162109375, "learning_rate": 0.0007505751465183273, "loss": 0.6718, "step": 12440 }, { "epoch": 0.6183570080460912, "grad_norm": 0.1279296875, "learning_rate": 0.0007505354127346777, "loss": 0.7069, "step": 12450 }, { "epoch": 0.6188536803417105, "grad_norm": 0.10986328125, "learning_rate": 0.0007504956789510281, "loss": 0.7089, "step": 12460 }, { "epoch": 0.6193503526373298, "grad_norm": 0.11767578125, "learning_rate": 0.0007504559451673786, "loss": 0.6918, "step": 12470 }, { "epoch": 0.6198470249329492, "grad_norm": 0.1591796875, "learning_rate": 0.000750416211383729, "loss": 0.695, "step": 12480 }, { "epoch": 0.6203436972285686, "grad_norm": 0.134765625, "learning_rate": 0.0007503764776000795, "loss": 0.6542, "step": 12490 }, { "epoch": 0.620840369524188, "grad_norm": 0.14453125, "learning_rate": 0.00075033674381643, "loss": 0.7126, "step": 12500 }, { "epoch": 0.6213370418198073, "grad_norm": 0.1171875, "learning_rate": 0.0007502970100327804, "loss": 0.6595, "step": 12510 }, { "epoch": 0.6218337141154267, "grad_norm": 0.11279296875, "learning_rate": 0.0007502572762491308, "loss": 0.6853, "step": 12520 }, { "epoch": 0.622330386411046, "grad_norm": 0.11376953125, "learning_rate": 0.0007502175424654814, "loss": 0.6536, "step": 12530 }, { "epoch": 0.6228270587066653, "grad_norm": 0.1123046875, "learning_rate": 0.0007501778086818318, "loss": 0.6682, "step": 12540 }, { "epoch": 0.6233237310022847, "grad_norm": 0.11181640625, "learning_rate": 0.0007501380748981822, "loss": 0.6557, "step": 12550 }, { "epoch": 0.623820403297904, "grad_norm": 0.10791015625, "learning_rate": 0.0007500983411145327, "loss": 0.6871, "step": 12560 }, { "epoch": 0.6243170755935233, "grad_norm": 0.12451171875, "learning_rate": 0.0007500586073308831, "loss": 0.6744, "step": 12570 }, { "epoch": 0.6248137478891428, "grad_norm": 0.10498046875, "learning_rate": 0.0007500188735472335, "loss": 0.6408, "step": 12580 }, { "epoch": 0.6253104201847621, "grad_norm": 0.1064453125, "learning_rate": 0.0007499791397635841, "loss": 0.6815, "step": 12590 }, { "epoch": 0.6258070924803815, "grad_norm": 0.10693359375, "learning_rate": 0.0007499394059799345, "loss": 0.6647, "step": 12600 }, { "epoch": 0.6263037647760008, "grad_norm": 0.12255859375, "learning_rate": 0.0007498996721962849, "loss": 0.666, "step": 12610 }, { "epoch": 0.6268004370716201, "grad_norm": 0.10400390625, "learning_rate": 0.0007498599384126353, "loss": 0.6886, "step": 12620 }, { "epoch": 0.6272971093672395, "grad_norm": 0.1083984375, "learning_rate": 0.0007498202046289858, "loss": 0.7055, "step": 12630 }, { "epoch": 0.6277937816628588, "grad_norm": 0.12109375, "learning_rate": 0.0007497804708453363, "loss": 0.6681, "step": 12640 }, { "epoch": 0.6282904539584782, "grad_norm": 0.11767578125, "learning_rate": 0.0007497407370616867, "loss": 0.6827, "step": 12650 }, { "epoch": 0.6287871262540975, "grad_norm": 0.1220703125, "learning_rate": 0.0007497010032780372, "loss": 0.6637, "step": 12660 }, { "epoch": 0.6292837985497169, "grad_norm": 0.11083984375, "learning_rate": 0.0007496612694943876, "loss": 0.6761, "step": 12670 }, { "epoch": 0.6297804708453363, "grad_norm": 0.1162109375, "learning_rate": 0.000749621535710738, "loss": 0.6713, "step": 12680 }, { "epoch": 0.6302771431409556, "grad_norm": 0.1103515625, "learning_rate": 0.0007495818019270886, "loss": 0.6797, "step": 12690 }, { "epoch": 0.630773815436575, "grad_norm": 0.11474609375, "learning_rate": 0.000749542068143439, "loss": 0.6841, "step": 12700 }, { "epoch": 0.6312704877321943, "grad_norm": 0.1064453125, "learning_rate": 0.0007495023343597894, "loss": 0.6869, "step": 12710 }, { "epoch": 0.6317671600278136, "grad_norm": 0.140625, "learning_rate": 0.0007494626005761399, "loss": 0.6889, "step": 12720 }, { "epoch": 0.632263832323433, "grad_norm": 0.10498046875, "learning_rate": 0.0007494228667924903, "loss": 0.7024, "step": 12730 }, { "epoch": 0.6327605046190523, "grad_norm": 0.11279296875, "learning_rate": 0.0007493831330088408, "loss": 0.6904, "step": 12740 }, { "epoch": 0.6332571769146716, "grad_norm": 0.1337890625, "learning_rate": 0.0007493433992251913, "loss": 0.6811, "step": 12750 }, { "epoch": 0.6337538492102911, "grad_norm": 0.11083984375, "learning_rate": 0.0007493036654415417, "loss": 0.6713, "step": 12760 }, { "epoch": 0.6342505215059104, "grad_norm": 0.11376953125, "learning_rate": 0.0007492639316578921, "loss": 0.6732, "step": 12770 }, { "epoch": 0.6347471938015298, "grad_norm": 0.1298828125, "learning_rate": 0.0007492241978742426, "loss": 0.6961, "step": 12780 }, { "epoch": 0.6352438660971491, "grad_norm": 0.1103515625, "learning_rate": 0.0007491844640905931, "loss": 0.672, "step": 12790 }, { "epoch": 0.6357405383927685, "grad_norm": 0.1171875, "learning_rate": 0.0007491447303069435, "loss": 0.7121, "step": 12800 }, { "epoch": 0.6362372106883878, "grad_norm": 0.10400390625, "learning_rate": 0.000749104996523294, "loss": 0.6957, "step": 12810 }, { "epoch": 0.6367338829840071, "grad_norm": 0.11767578125, "learning_rate": 0.0007490652627396444, "loss": 0.6893, "step": 12820 }, { "epoch": 0.6372305552796265, "grad_norm": 0.11474609375, "learning_rate": 0.0007490255289559948, "loss": 0.6501, "step": 12830 }, { "epoch": 0.6377272275752458, "grad_norm": 0.10986328125, "learning_rate": 0.0007489857951723453, "loss": 0.6912, "step": 12840 }, { "epoch": 0.6382238998708653, "grad_norm": 0.1181640625, "learning_rate": 0.0007489460613886958, "loss": 0.6945, "step": 12850 }, { "epoch": 0.6387205721664846, "grad_norm": 0.130859375, "learning_rate": 0.0007489063276050463, "loss": 0.66, "step": 12860 }, { "epoch": 0.6392172444621039, "grad_norm": 0.1171875, "learning_rate": 0.0007488665938213966, "loss": 0.7147, "step": 12870 }, { "epoch": 0.6397139167577233, "grad_norm": 0.1123046875, "learning_rate": 0.0007488268600377471, "loss": 0.668, "step": 12880 }, { "epoch": 0.6402105890533426, "grad_norm": 0.10791015625, "learning_rate": 0.0007487871262540976, "loss": 0.6798, "step": 12890 }, { "epoch": 0.6407072613489619, "grad_norm": 0.11767578125, "learning_rate": 0.000748747392470448, "loss": 0.6702, "step": 12900 }, { "epoch": 0.6412039336445813, "grad_norm": 0.11669921875, "learning_rate": 0.0007487076586867985, "loss": 0.661, "step": 12910 }, { "epoch": 0.6417006059402006, "grad_norm": 0.1220703125, "learning_rate": 0.0007486679249031489, "loss": 0.6679, "step": 12920 }, { "epoch": 0.64219727823582, "grad_norm": 0.1044921875, "learning_rate": 0.0007486281911194993, "loss": 0.659, "step": 12930 }, { "epoch": 0.6426939505314394, "grad_norm": 0.10205078125, "learning_rate": 0.0007485884573358499, "loss": 0.6966, "step": 12940 }, { "epoch": 0.6431906228270587, "grad_norm": 0.1328125, "learning_rate": 0.0007485487235522003, "loss": 0.7063, "step": 12950 }, { "epoch": 0.6436872951226781, "grad_norm": 0.111328125, "learning_rate": 0.0007485089897685507, "loss": 0.6778, "step": 12960 }, { "epoch": 0.6441839674182974, "grad_norm": 0.1044921875, "learning_rate": 0.0007484692559849012, "loss": 0.7112, "step": 12970 }, { "epoch": 0.6446806397139168, "grad_norm": 0.1064453125, "learning_rate": 0.0007484295222012516, "loss": 0.7005, "step": 12980 }, { "epoch": 0.6451773120095361, "grad_norm": 0.10986328125, "learning_rate": 0.0007483897884176022, "loss": 0.657, "step": 12990 }, { "epoch": 0.6456739843051554, "grad_norm": 0.140625, "learning_rate": 0.0007483500546339526, "loss": 0.6676, "step": 13000 }, { "epoch": 0.6461706566007748, "grad_norm": 0.1123046875, "learning_rate": 0.000748310320850303, "loss": 0.6943, "step": 13010 }, { "epoch": 0.6466673288963941, "grad_norm": 0.1103515625, "learning_rate": 0.0007482705870666535, "loss": 0.6727, "step": 13020 }, { "epoch": 0.6471640011920136, "grad_norm": 0.1220703125, "learning_rate": 0.0007482308532830038, "loss": 0.6818, "step": 13030 }, { "epoch": 0.6476606734876329, "grad_norm": 0.11083984375, "learning_rate": 0.0007481911194993544, "loss": 0.6715, "step": 13040 }, { "epoch": 0.6481573457832522, "grad_norm": 0.107421875, "learning_rate": 0.0007481513857157049, "loss": 0.6993, "step": 13050 }, { "epoch": 0.6486540180788716, "grad_norm": 0.11474609375, "learning_rate": 0.0007481116519320552, "loss": 0.6996, "step": 13060 }, { "epoch": 0.6491506903744909, "grad_norm": 0.154296875, "learning_rate": 0.0007480719181484057, "loss": 0.6714, "step": 13070 }, { "epoch": 0.6496473626701103, "grad_norm": 0.11279296875, "learning_rate": 0.0007480321843647562, "loss": 0.702, "step": 13080 }, { "epoch": 0.6501440349657296, "grad_norm": 0.11328125, "learning_rate": 0.0007479924505811066, "loss": 0.6975, "step": 13090 }, { "epoch": 0.6506407072613489, "grad_norm": 0.12255859375, "learning_rate": 0.0007479527167974571, "loss": 0.7115, "step": 13100 }, { "epoch": 0.6511373795569683, "grad_norm": 0.107421875, "learning_rate": 0.0007479129830138075, "loss": 0.7013, "step": 13110 }, { "epoch": 0.6516340518525877, "grad_norm": 0.1162109375, "learning_rate": 0.0007478732492301579, "loss": 0.6469, "step": 13120 }, { "epoch": 0.652130724148207, "grad_norm": 0.1484375, "learning_rate": 0.0007478335154465084, "loss": 0.7094, "step": 13130 }, { "epoch": 0.6526273964438264, "grad_norm": 0.134765625, "learning_rate": 0.0007477937816628589, "loss": 0.6609, "step": 13140 }, { "epoch": 0.6531240687394457, "grad_norm": 0.11328125, "learning_rate": 0.0007477540478792094, "loss": 0.684, "step": 13150 }, { "epoch": 0.6536207410350651, "grad_norm": 0.10888671875, "learning_rate": 0.0007477143140955598, "loss": 0.6833, "step": 13160 }, { "epoch": 0.6541174133306844, "grad_norm": 0.11669921875, "learning_rate": 0.0007476745803119102, "loss": 0.6783, "step": 13170 }, { "epoch": 0.6546140856263037, "grad_norm": 0.11083984375, "learning_rate": 0.0007476348465282607, "loss": 0.6597, "step": 13180 }, { "epoch": 0.6551107579219231, "grad_norm": 0.123046875, "learning_rate": 0.0007475951127446112, "loss": 0.6951, "step": 13190 }, { "epoch": 0.6556074302175424, "grad_norm": 0.1240234375, "learning_rate": 0.0007475553789609616, "loss": 0.6596, "step": 13200 }, { "epoch": 0.6561041025131619, "grad_norm": 0.12353515625, "learning_rate": 0.0007475156451773121, "loss": 0.6579, "step": 13210 }, { "epoch": 0.6566007748087812, "grad_norm": 0.1142578125, "learning_rate": 0.0007474759113936624, "loss": 0.669, "step": 13220 }, { "epoch": 0.6570974471044005, "grad_norm": 0.1181640625, "learning_rate": 0.0007474361776100129, "loss": 0.6524, "step": 13230 }, { "epoch": 0.6575941194000199, "grad_norm": 0.10986328125, "learning_rate": 0.0007473964438263635, "loss": 0.6781, "step": 13240 }, { "epoch": 0.6580907916956392, "grad_norm": 0.111328125, "learning_rate": 0.0007473567100427138, "loss": 0.6868, "step": 13250 }, { "epoch": 0.6585874639912586, "grad_norm": 0.1123046875, "learning_rate": 0.0007473169762590643, "loss": 0.6969, "step": 13260 }, { "epoch": 0.6590841362868779, "grad_norm": 0.10986328125, "learning_rate": 0.0007472772424754148, "loss": 0.7009, "step": 13270 }, { "epoch": 0.6595808085824972, "grad_norm": 0.109375, "learning_rate": 0.0007472375086917652, "loss": 0.681, "step": 13280 }, { "epoch": 0.6600774808781166, "grad_norm": 0.1015625, "learning_rate": 0.0007471977749081157, "loss": 0.6342, "step": 13290 }, { "epoch": 0.660574153173736, "grad_norm": 0.111328125, "learning_rate": 0.0007471580411244661, "loss": 0.6394, "step": 13300 }, { "epoch": 0.6610708254693554, "grad_norm": 0.11279296875, "learning_rate": 0.0007471183073408166, "loss": 0.7037, "step": 13310 }, { "epoch": 0.6615674977649747, "grad_norm": 0.12109375, "learning_rate": 0.000747078573557167, "loss": 0.6478, "step": 13320 }, { "epoch": 0.662064170060594, "grad_norm": 0.11865234375, "learning_rate": 0.0007470388397735174, "loss": 0.6727, "step": 13330 }, { "epoch": 0.6625608423562134, "grad_norm": 0.123046875, "learning_rate": 0.000746999105989868, "loss": 0.6829, "step": 13340 }, { "epoch": 0.6630575146518327, "grad_norm": 0.125, "learning_rate": 0.0007469593722062184, "loss": 0.6971, "step": 13350 }, { "epoch": 0.663554186947452, "grad_norm": 0.10791015625, "learning_rate": 0.0007469196384225688, "loss": 0.6622, "step": 13360 }, { "epoch": 0.6640508592430714, "grad_norm": 0.10546875, "learning_rate": 0.0007468799046389193, "loss": 0.6789, "step": 13370 }, { "epoch": 0.6645475315386907, "grad_norm": 0.1162109375, "learning_rate": 0.0007468401708552697, "loss": 0.6986, "step": 13380 }, { "epoch": 0.6650442038343102, "grad_norm": 0.1142578125, "learning_rate": 0.0007468004370716202, "loss": 0.7204, "step": 13390 }, { "epoch": 0.6655408761299295, "grad_norm": 0.111328125, "learning_rate": 0.0007467607032879707, "loss": 0.6522, "step": 13400 }, { "epoch": 0.6660375484255489, "grad_norm": 0.1083984375, "learning_rate": 0.0007467209695043211, "loss": 0.6546, "step": 13410 }, { "epoch": 0.6665342207211682, "grad_norm": 0.10693359375, "learning_rate": 0.0007466812357206715, "loss": 0.6535, "step": 13420 }, { "epoch": 0.6670308930167875, "grad_norm": 0.1123046875, "learning_rate": 0.000746641501937022, "loss": 0.6729, "step": 13430 }, { "epoch": 0.6675275653124069, "grad_norm": 0.11279296875, "learning_rate": 0.0007466017681533725, "loss": 0.6886, "step": 13440 }, { "epoch": 0.6680242376080262, "grad_norm": 0.134765625, "learning_rate": 0.0007465620343697229, "loss": 0.6564, "step": 13450 }, { "epoch": 0.6685209099036455, "grad_norm": 0.1259765625, "learning_rate": 0.0007465223005860734, "loss": 0.6938, "step": 13460 }, { "epoch": 0.6690175821992649, "grad_norm": 0.1357421875, "learning_rate": 0.0007464825668024238, "loss": 0.6715, "step": 13470 }, { "epoch": 0.6695142544948842, "grad_norm": 0.11865234375, "learning_rate": 0.0007464428330187742, "loss": 0.6789, "step": 13480 }, { "epoch": 0.6700109267905037, "grad_norm": 0.1337890625, "learning_rate": 0.0007464030992351247, "loss": 0.6537, "step": 13490 }, { "epoch": 0.670507599086123, "grad_norm": 0.109375, "learning_rate": 0.0007463633654514752, "loss": 0.6739, "step": 13500 }, { "epoch": 0.6710042713817423, "grad_norm": 0.103515625, "learning_rate": 0.0007463236316678256, "loss": 0.6662, "step": 13510 }, { "epoch": 0.6715009436773617, "grad_norm": 0.10888671875, "learning_rate": 0.000746283897884176, "loss": 0.6451, "step": 13520 }, { "epoch": 0.671997615972981, "grad_norm": 0.12158203125, "learning_rate": 0.0007462441641005265, "loss": 0.6754, "step": 13530 }, { "epoch": 0.6724942882686004, "grad_norm": 0.12255859375, "learning_rate": 0.000746204430316877, "loss": 0.6632, "step": 13540 }, { "epoch": 0.6729909605642197, "grad_norm": 0.119140625, "learning_rate": 0.0007461646965332274, "loss": 0.7027, "step": 13550 }, { "epoch": 0.673487632859839, "grad_norm": 0.11669921875, "learning_rate": 0.0007461249627495779, "loss": 0.6755, "step": 13560 }, { "epoch": 0.6739843051554584, "grad_norm": 0.109375, "learning_rate": 0.0007460852289659283, "loss": 0.688, "step": 13570 }, { "epoch": 0.6744809774510778, "grad_norm": 0.11865234375, "learning_rate": 0.0007460454951822787, "loss": 0.69, "step": 13580 }, { "epoch": 0.6749776497466972, "grad_norm": 0.11279296875, "learning_rate": 0.0007460057613986293, "loss": 0.7026, "step": 13590 }, { "epoch": 0.6754743220423165, "grad_norm": 0.1123046875, "learning_rate": 0.0007459660276149797, "loss": 0.6592, "step": 13600 }, { "epoch": 0.6759709943379358, "grad_norm": 0.1259765625, "learning_rate": 0.0007459262938313301, "loss": 0.6609, "step": 13610 }, { "epoch": 0.6764676666335552, "grad_norm": 0.125, "learning_rate": 0.0007458865600476806, "loss": 0.6837, "step": 13620 }, { "epoch": 0.6769643389291745, "grad_norm": 0.11181640625, "learning_rate": 0.000745846826264031, "loss": 0.6865, "step": 13630 }, { "epoch": 0.6774610112247939, "grad_norm": 0.10107421875, "learning_rate": 0.0007458070924803814, "loss": 0.6354, "step": 13640 }, { "epoch": 0.6779576835204132, "grad_norm": 0.12060546875, "learning_rate": 0.000745767358696732, "loss": 0.7152, "step": 13650 }, { "epoch": 0.6784543558160325, "grad_norm": 0.12890625, "learning_rate": 0.0007457276249130824, "loss": 0.6739, "step": 13660 }, { "epoch": 0.678951028111652, "grad_norm": 0.1220703125, "learning_rate": 0.0007456878911294328, "loss": 0.6947, "step": 13670 }, { "epoch": 0.6794477004072713, "grad_norm": 0.11376953125, "learning_rate": 0.0007456481573457833, "loss": 0.6693, "step": 13680 }, { "epoch": 0.6799443727028907, "grad_norm": 0.13671875, "learning_rate": 0.0007456084235621338, "loss": 0.6785, "step": 13690 }, { "epoch": 0.68044104499851, "grad_norm": 0.1162109375, "learning_rate": 0.0007455686897784842, "loss": 0.6698, "step": 13700 }, { "epoch": 0.6809377172941293, "grad_norm": 0.1083984375, "learning_rate": 0.0007455289559948346, "loss": 0.6377, "step": 13710 }, { "epoch": 0.6814343895897487, "grad_norm": 0.130859375, "learning_rate": 0.0007454892222111851, "loss": 0.6667, "step": 13720 }, { "epoch": 0.681931061885368, "grad_norm": 0.1103515625, "learning_rate": 0.0007454494884275356, "loss": 0.7018, "step": 13730 }, { "epoch": 0.6824277341809873, "grad_norm": 0.10888671875, "learning_rate": 0.000745409754643886, "loss": 0.6803, "step": 13740 }, { "epoch": 0.6829244064766067, "grad_norm": 0.1044921875, "learning_rate": 0.0007453700208602365, "loss": 0.6521, "step": 13750 }, { "epoch": 0.6834210787722261, "grad_norm": 0.11279296875, "learning_rate": 0.0007453302870765869, "loss": 0.696, "step": 13760 }, { "epoch": 0.6839177510678455, "grad_norm": 0.1025390625, "learning_rate": 0.0007452905532929373, "loss": 0.6783, "step": 13770 }, { "epoch": 0.6844144233634648, "grad_norm": 0.10693359375, "learning_rate": 0.0007452508195092878, "loss": 0.6695, "step": 13780 }, { "epoch": 0.6849110956590841, "grad_norm": 0.111328125, "learning_rate": 0.0007452110857256382, "loss": 0.694, "step": 13790 }, { "epoch": 0.6854077679547035, "grad_norm": 0.12158203125, "learning_rate": 0.0007451713519419887, "loss": 0.6664, "step": 13800 }, { "epoch": 0.6859044402503228, "grad_norm": 0.1044921875, "learning_rate": 0.0007451316181583392, "loss": 0.6734, "step": 13810 }, { "epoch": 0.6864011125459422, "grad_norm": 0.111328125, "learning_rate": 0.0007450918843746896, "loss": 0.6455, "step": 13820 }, { "epoch": 0.6868977848415615, "grad_norm": 0.10546875, "learning_rate": 0.00074505215059104, "loss": 0.6509, "step": 13830 }, { "epoch": 0.6873944571371808, "grad_norm": 0.1103515625, "learning_rate": 0.0007450124168073906, "loss": 0.6705, "step": 13840 }, { "epoch": 0.6878911294328003, "grad_norm": 0.1171875, "learning_rate": 0.000744972683023741, "loss": 0.6952, "step": 13850 }, { "epoch": 0.6883878017284196, "grad_norm": 0.1142578125, "learning_rate": 0.0007449329492400914, "loss": 0.6787, "step": 13860 }, { "epoch": 0.688884474024039, "grad_norm": 0.1259765625, "learning_rate": 0.0007448932154564419, "loss": 0.6715, "step": 13870 }, { "epoch": 0.6893811463196583, "grad_norm": 0.123046875, "learning_rate": 0.0007448534816727923, "loss": 0.6722, "step": 13880 }, { "epoch": 0.6898778186152776, "grad_norm": 0.1669921875, "learning_rate": 0.0007448137478891429, "loss": 0.6656, "step": 13890 }, { "epoch": 0.690374490910897, "grad_norm": 0.11279296875, "learning_rate": 0.0007447740141054932, "loss": 0.6341, "step": 13900 }, { "epoch": 0.6908711632065163, "grad_norm": 0.123046875, "learning_rate": 0.0007447342803218437, "loss": 0.687, "step": 13910 }, { "epoch": 0.6913678355021357, "grad_norm": 0.1083984375, "learning_rate": 0.0007446945465381942, "loss": 0.7099, "step": 13920 }, { "epoch": 0.691864507797755, "grad_norm": 0.1337890625, "learning_rate": 0.0007446548127545445, "loss": 0.6738, "step": 13930 }, { "epoch": 0.6923611800933744, "grad_norm": 0.111328125, "learning_rate": 0.000744615078970895, "loss": 0.6398, "step": 13940 }, { "epoch": 0.6928578523889938, "grad_norm": 0.1142578125, "learning_rate": 0.0007445753451872456, "loss": 0.66, "step": 13950 }, { "epoch": 0.6933545246846131, "grad_norm": 0.1142578125, "learning_rate": 0.0007445356114035959, "loss": 0.6886, "step": 13960 }, { "epoch": 0.6938511969802325, "grad_norm": 0.10986328125, "learning_rate": 0.0007444958776199464, "loss": 0.6649, "step": 13970 }, { "epoch": 0.6943478692758518, "grad_norm": 0.11767578125, "learning_rate": 0.0007444561438362968, "loss": 0.6863, "step": 13980 }, { "epoch": 0.6948445415714711, "grad_norm": 0.126953125, "learning_rate": 0.0007444164100526472, "loss": 0.6929, "step": 13990 }, { "epoch": 0.6953412138670905, "grad_norm": 0.107421875, "learning_rate": 0.0007443766762689978, "loss": 0.6631, "step": 14000 }, { "epoch": 0.6958378861627098, "grad_norm": 0.134765625, "learning_rate": 0.0007443369424853482, "loss": 0.6998, "step": 14010 }, { "epoch": 0.6963345584583291, "grad_norm": 0.1474609375, "learning_rate": 0.0007442972087016986, "loss": 0.6604, "step": 14020 }, { "epoch": 0.6968312307539486, "grad_norm": 0.11279296875, "learning_rate": 0.0007442574749180491, "loss": 0.6781, "step": 14030 }, { "epoch": 0.6973279030495679, "grad_norm": 0.11279296875, "learning_rate": 0.0007442177411343995, "loss": 0.653, "step": 14040 }, { "epoch": 0.6978245753451873, "grad_norm": 0.11376953125, "learning_rate": 0.0007441780073507501, "loss": 0.6524, "step": 14050 }, { "epoch": 0.6983212476408066, "grad_norm": 0.1083984375, "learning_rate": 0.0007441382735671005, "loss": 0.6796, "step": 14060 }, { "epoch": 0.698817919936426, "grad_norm": 0.1064453125, "learning_rate": 0.0007440985397834509, "loss": 0.6614, "step": 14070 }, { "epoch": 0.6993145922320453, "grad_norm": 0.1259765625, "learning_rate": 0.0007440588059998014, "loss": 0.6559, "step": 14080 }, { "epoch": 0.6998112645276646, "grad_norm": 0.10791015625, "learning_rate": 0.0007440190722161517, "loss": 0.6349, "step": 14090 }, { "epoch": 0.700307936823284, "grad_norm": 0.1171875, "learning_rate": 0.0007439793384325023, "loss": 0.6741, "step": 14100 }, { "epoch": 0.7008046091189033, "grad_norm": 0.1474609375, "learning_rate": 0.0007439396046488528, "loss": 0.6455, "step": 14110 }, { "epoch": 0.7013012814145227, "grad_norm": 0.11669921875, "learning_rate": 0.0007438998708652031, "loss": 0.6997, "step": 14120 }, { "epoch": 0.7017979537101421, "grad_norm": 0.109375, "learning_rate": 0.0007438601370815536, "loss": 0.661, "step": 14130 }, { "epoch": 0.7022946260057614, "grad_norm": 0.11962890625, "learning_rate": 0.0007438204032979041, "loss": 0.6776, "step": 14140 }, { "epoch": 0.7027912983013808, "grad_norm": 0.123046875, "learning_rate": 0.0007437806695142545, "loss": 0.6656, "step": 14150 }, { "epoch": 0.7032879705970001, "grad_norm": 0.1064453125, "learning_rate": 0.000743740935730605, "loss": 0.6608, "step": 14160 }, { "epoch": 0.7037846428926194, "grad_norm": 0.10693359375, "learning_rate": 0.0007437012019469554, "loss": 0.6501, "step": 14170 }, { "epoch": 0.7042813151882388, "grad_norm": 0.1005859375, "learning_rate": 0.0007436614681633059, "loss": 0.6619, "step": 14180 }, { "epoch": 0.7047779874838581, "grad_norm": 0.125, "learning_rate": 0.0007436217343796563, "loss": 0.6832, "step": 14190 }, { "epoch": 0.7052746597794775, "grad_norm": 0.111328125, "learning_rate": 0.0007435820005960068, "loss": 0.66, "step": 14200 }, { "epoch": 0.7057713320750969, "grad_norm": 0.12109375, "learning_rate": 0.0007435422668123573, "loss": 0.6392, "step": 14210 }, { "epoch": 0.7062680043707162, "grad_norm": 0.1171875, "learning_rate": 0.0007435025330287077, "loss": 0.6357, "step": 14220 }, { "epoch": 0.7067646766663356, "grad_norm": 0.1171875, "learning_rate": 0.0007434627992450581, "loss": 0.6774, "step": 14230 }, { "epoch": 0.7072613489619549, "grad_norm": 0.1083984375, "learning_rate": 0.0007434230654614086, "loss": 0.63, "step": 14240 }, { "epoch": 0.7077580212575743, "grad_norm": 0.1220703125, "learning_rate": 0.0007433833316777591, "loss": 0.6643, "step": 14250 }, { "epoch": 0.7082546935531936, "grad_norm": 0.10009765625, "learning_rate": 0.0007433435978941095, "loss": 0.6845, "step": 14260 }, { "epoch": 0.7087513658488129, "grad_norm": 0.1044921875, "learning_rate": 0.00074330386411046, "loss": 0.6639, "step": 14270 }, { "epoch": 0.7092480381444323, "grad_norm": 0.1044921875, "learning_rate": 0.0007432641303268104, "loss": 0.7032, "step": 14280 }, { "epoch": 0.7097447104400516, "grad_norm": 0.10986328125, "learning_rate": 0.0007432243965431608, "loss": 0.6655, "step": 14290 }, { "epoch": 0.7102413827356711, "grad_norm": 0.158203125, "learning_rate": 0.0007431846627595114, "loss": 0.6785, "step": 14300 }, { "epoch": 0.7107380550312904, "grad_norm": 0.11767578125, "learning_rate": 0.0007431449289758617, "loss": 0.673, "step": 14310 }, { "epoch": 0.7112347273269097, "grad_norm": 0.12255859375, "learning_rate": 0.0007431051951922122, "loss": 0.6534, "step": 14320 }, { "epoch": 0.7117313996225291, "grad_norm": 0.109375, "learning_rate": 0.0007430654614085627, "loss": 0.6963, "step": 14330 }, { "epoch": 0.7122280719181484, "grad_norm": 0.111328125, "learning_rate": 0.0007430257276249131, "loss": 0.6584, "step": 14340 }, { "epoch": 0.7127247442137677, "grad_norm": 0.11279296875, "learning_rate": 0.0007429859938412636, "loss": 0.6742, "step": 14350 }, { "epoch": 0.7132214165093871, "grad_norm": 0.1025390625, "learning_rate": 0.000742946260057614, "loss": 0.6555, "step": 14360 }, { "epoch": 0.7137180888050064, "grad_norm": 0.1376953125, "learning_rate": 0.0007429065262739645, "loss": 0.6705, "step": 14370 }, { "epoch": 0.7142147611006258, "grad_norm": 0.10693359375, "learning_rate": 0.0007428667924903149, "loss": 0.661, "step": 14380 }, { "epoch": 0.7147114333962452, "grad_norm": 0.11328125, "learning_rate": 0.0007428270587066653, "loss": 0.6886, "step": 14390 }, { "epoch": 0.7152081056918645, "grad_norm": 0.10205078125, "learning_rate": 0.0007427873249230159, "loss": 0.6865, "step": 14400 }, { "epoch": 0.7157047779874839, "grad_norm": 0.12353515625, "learning_rate": 0.0007427475911393663, "loss": 0.6566, "step": 14410 }, { "epoch": 0.7162014502831032, "grad_norm": 0.1171875, "learning_rate": 0.0007427078573557167, "loss": 0.6595, "step": 14420 }, { "epoch": 0.7166981225787226, "grad_norm": 0.1201171875, "learning_rate": 0.0007426681235720672, "loss": 0.6846, "step": 14430 }, { "epoch": 0.7171947948743419, "grad_norm": 0.10888671875, "learning_rate": 0.0007426283897884176, "loss": 0.6815, "step": 14440 }, { "epoch": 0.7176914671699612, "grad_norm": 0.1064453125, "learning_rate": 0.0007425886560047681, "loss": 0.6479, "step": 14450 }, { "epoch": 0.7181881394655806, "grad_norm": 0.1181640625, "learning_rate": 0.0007425489222211186, "loss": 0.6771, "step": 14460 }, { "epoch": 0.7186848117611999, "grad_norm": 0.1064453125, "learning_rate": 0.000742509188437469, "loss": 0.6768, "step": 14470 }, { "epoch": 0.7191814840568194, "grad_norm": 0.12451171875, "learning_rate": 0.0007424694546538194, "loss": 0.6603, "step": 14480 }, { "epoch": 0.7196781563524387, "grad_norm": 0.11767578125, "learning_rate": 0.0007424297208701699, "loss": 0.6645, "step": 14490 }, { "epoch": 0.720174828648058, "grad_norm": 0.10546875, "learning_rate": 0.0007423899870865204, "loss": 0.6885, "step": 14500 }, { "epoch": 0.7206715009436774, "grad_norm": 0.1181640625, "learning_rate": 0.0007423502533028708, "loss": 0.6573, "step": 14510 }, { "epoch": 0.7211681732392967, "grad_norm": 0.11669921875, "learning_rate": 0.0007423105195192213, "loss": 0.6914, "step": 14520 }, { "epoch": 0.721664845534916, "grad_norm": 0.10595703125, "learning_rate": 0.0007422707857355717, "loss": 0.6392, "step": 14530 }, { "epoch": 0.7221615178305354, "grad_norm": 0.111328125, "learning_rate": 0.0007422310519519221, "loss": 0.6606, "step": 14540 }, { "epoch": 0.7226581901261547, "grad_norm": 0.1279296875, "learning_rate": 0.0007421913181682727, "loss": 0.7015, "step": 14550 }, { "epoch": 0.7231548624217741, "grad_norm": 0.11767578125, "learning_rate": 0.0007421515843846231, "loss": 0.6693, "step": 14560 }, { "epoch": 0.7236515347173935, "grad_norm": 0.1259765625, "learning_rate": 0.0007421118506009735, "loss": 0.6702, "step": 14570 }, { "epoch": 0.7241482070130129, "grad_norm": 0.109375, "learning_rate": 0.0007420721168173239, "loss": 0.7158, "step": 14580 }, { "epoch": 0.7246448793086322, "grad_norm": 0.1259765625, "learning_rate": 0.0007420323830336744, "loss": 0.6439, "step": 14590 }, { "epoch": 0.7251415516042515, "grad_norm": 0.1259765625, "learning_rate": 0.0007419926492500249, "loss": 0.6677, "step": 14600 }, { "epoch": 0.7256382238998709, "grad_norm": 0.11181640625, "learning_rate": 0.0007419529154663753, "loss": 0.6892, "step": 14610 }, { "epoch": 0.7261348961954902, "grad_norm": 0.10888671875, "learning_rate": 0.0007419131816827258, "loss": 0.6827, "step": 14620 }, { "epoch": 0.7266315684911095, "grad_norm": 0.099609375, "learning_rate": 0.0007418734478990762, "loss": 0.6628, "step": 14630 }, { "epoch": 0.7271282407867289, "grad_norm": 0.11328125, "learning_rate": 0.0007418337141154266, "loss": 0.6387, "step": 14640 }, { "epoch": 0.7276249130823482, "grad_norm": 0.12060546875, "learning_rate": 0.0007417939803317772, "loss": 0.6581, "step": 14650 }, { "epoch": 0.7281215853779676, "grad_norm": 0.11962890625, "learning_rate": 0.0007417542465481276, "loss": 0.6713, "step": 14660 }, { "epoch": 0.728618257673587, "grad_norm": 0.10107421875, "learning_rate": 0.000741714512764478, "loss": 0.6754, "step": 14670 }, { "epoch": 0.7291149299692063, "grad_norm": 0.1044921875, "learning_rate": 0.0007416747789808285, "loss": 0.6784, "step": 14680 }, { "epoch": 0.7296116022648257, "grad_norm": 0.10107421875, "learning_rate": 0.0007416350451971789, "loss": 0.6548, "step": 14690 }, { "epoch": 0.730108274560445, "grad_norm": 0.1064453125, "learning_rate": 0.0007415953114135294, "loss": 0.6828, "step": 14700 }, { "epoch": 0.7306049468560644, "grad_norm": 0.1025390625, "learning_rate": 0.0007415555776298799, "loss": 0.6802, "step": 14710 }, { "epoch": 0.7311016191516837, "grad_norm": 0.10888671875, "learning_rate": 0.0007415158438462303, "loss": 0.657, "step": 14720 }, { "epoch": 0.731598291447303, "grad_norm": 0.10986328125, "learning_rate": 0.0007414761100625807, "loss": 0.6715, "step": 14730 }, { "epoch": 0.7320949637429224, "grad_norm": 0.1162109375, "learning_rate": 0.0007414363762789312, "loss": 0.633, "step": 14740 }, { "epoch": 0.7325916360385417, "grad_norm": 0.12255859375, "learning_rate": 0.0007413966424952817, "loss": 0.643, "step": 14750 }, { "epoch": 0.7330883083341612, "grad_norm": 0.09228515625, "learning_rate": 0.0007413569087116321, "loss": 0.6803, "step": 14760 }, { "epoch": 0.7335849806297805, "grad_norm": 0.13671875, "learning_rate": 0.0007413171749279825, "loss": 0.6491, "step": 14770 }, { "epoch": 0.7340816529253998, "grad_norm": 0.10498046875, "learning_rate": 0.000741277441144333, "loss": 0.6822, "step": 14780 }, { "epoch": 0.7345783252210192, "grad_norm": 0.123046875, "learning_rate": 0.0007412377073606835, "loss": 0.6758, "step": 14790 }, { "epoch": 0.7350749975166385, "grad_norm": 0.11376953125, "learning_rate": 0.0007411979735770338, "loss": 0.6977, "step": 14800 }, { "epoch": 0.7355716698122579, "grad_norm": 0.1318359375, "learning_rate": 0.0007411582397933844, "loss": 0.6655, "step": 14810 }, { "epoch": 0.7360683421078772, "grad_norm": 0.10498046875, "learning_rate": 0.0007411185060097349, "loss": 0.6554, "step": 14820 }, { "epoch": 0.7365650144034965, "grad_norm": 0.1650390625, "learning_rate": 0.0007410787722260852, "loss": 0.623, "step": 14830 }, { "epoch": 0.7370616866991159, "grad_norm": 0.10546875, "learning_rate": 0.0007410390384424357, "loss": 0.6476, "step": 14840 }, { "epoch": 0.7375583589947353, "grad_norm": 0.126953125, "learning_rate": 0.0007409993046587862, "loss": 0.6634, "step": 14850 }, { "epoch": 0.7380550312903547, "grad_norm": 0.10595703125, "learning_rate": 0.0007409595708751366, "loss": 0.6542, "step": 14860 }, { "epoch": 0.738551703585974, "grad_norm": 0.1220703125, "learning_rate": 0.0007409198370914871, "loss": 0.6161, "step": 14870 }, { "epoch": 0.7390483758815933, "grad_norm": 0.1279296875, "learning_rate": 0.0007408801033078375, "loss": 0.6548, "step": 14880 }, { "epoch": 0.7395450481772127, "grad_norm": 0.1083984375, "learning_rate": 0.0007408403695241879, "loss": 0.6417, "step": 14890 }, { "epoch": 0.740041720472832, "grad_norm": 0.12109375, "learning_rate": 0.0007408006357405385, "loss": 0.6701, "step": 14900 }, { "epoch": 0.7405383927684513, "grad_norm": 0.11474609375, "learning_rate": 0.0007407609019568889, "loss": 0.6672, "step": 14910 }, { "epoch": 0.7410350650640707, "grad_norm": 0.1298828125, "learning_rate": 0.0007407211681732393, "loss": 0.6359, "step": 14920 }, { "epoch": 0.74153173735969, "grad_norm": 0.11279296875, "learning_rate": 0.0007406814343895898, "loss": 0.668, "step": 14930 }, { "epoch": 0.7420284096553095, "grad_norm": 0.10546875, "learning_rate": 0.0007406417006059402, "loss": 0.6952, "step": 14940 }, { "epoch": 0.7425250819509288, "grad_norm": 0.12890625, "learning_rate": 0.0007406019668222908, "loss": 0.6725, "step": 14950 }, { "epoch": 0.7430217542465481, "grad_norm": 0.1123046875, "learning_rate": 0.0007405622330386411, "loss": 0.6643, "step": 14960 }, { "epoch": 0.7435184265421675, "grad_norm": 0.115234375, "learning_rate": 0.0007405224992549916, "loss": 0.6796, "step": 14970 }, { "epoch": 0.7440150988377868, "grad_norm": 0.11767578125, "learning_rate": 0.0007404827654713421, "loss": 0.6462, "step": 14980 }, { "epoch": 0.7445117711334062, "grad_norm": 0.12060546875, "learning_rate": 0.0007404430316876924, "loss": 0.6689, "step": 14990 }, { "epoch": 0.7450084434290255, "grad_norm": 0.1162109375, "learning_rate": 0.000740403297904043, "loss": 0.7093, "step": 15000 }, { "epoch": 0.7455051157246448, "grad_norm": 0.1181640625, "learning_rate": 0.0007403635641203935, "loss": 0.6595, "step": 15010 }, { "epoch": 0.7460017880202642, "grad_norm": 0.1259765625, "learning_rate": 0.0007403238303367438, "loss": 0.7084, "step": 15020 }, { "epoch": 0.7464984603158836, "grad_norm": 0.1171875, "learning_rate": 0.0007402840965530943, "loss": 0.6587, "step": 15030 }, { "epoch": 0.746995132611503, "grad_norm": 0.10302734375, "learning_rate": 0.0007402443627694447, "loss": 0.6821, "step": 15040 }, { "epoch": 0.7474918049071223, "grad_norm": 0.109375, "learning_rate": 0.0007402046289857951, "loss": 0.6574, "step": 15050 }, { "epoch": 0.7479884772027416, "grad_norm": 0.11083984375, "learning_rate": 0.0007401648952021457, "loss": 0.6512, "step": 15060 }, { "epoch": 0.748485149498361, "grad_norm": 0.1298828125, "learning_rate": 0.0007401251614184961, "loss": 0.6612, "step": 15070 }, { "epoch": 0.7489818217939803, "grad_norm": 0.11376953125, "learning_rate": 0.0007400854276348466, "loss": 0.6923, "step": 15080 }, { "epoch": 0.7494784940895997, "grad_norm": 0.1162109375, "learning_rate": 0.000740045693851197, "loss": 0.6737, "step": 15090 }, { "epoch": 0.749975166385219, "grad_norm": 0.12060546875, "learning_rate": 0.0007400059600675474, "loss": 0.6633, "step": 15100 }, { "epoch": 0.7504718386808383, "grad_norm": 0.10791015625, "learning_rate": 0.000739966226283898, "loss": 0.6599, "step": 15110 }, { "epoch": 0.7509685109764578, "grad_norm": 0.1279296875, "learning_rate": 0.0007399264925002484, "loss": 0.6532, "step": 15120 }, { "epoch": 0.7514651832720771, "grad_norm": 0.10888671875, "learning_rate": 0.0007398867587165988, "loss": 0.6475, "step": 15130 }, { "epoch": 0.7519618555676965, "grad_norm": 0.10302734375, "learning_rate": 0.0007398470249329493, "loss": 0.627, "step": 15140 }, { "epoch": 0.7524585278633158, "grad_norm": 0.11083984375, "learning_rate": 0.0007398072911492998, "loss": 0.6591, "step": 15150 }, { "epoch": 0.7529552001589351, "grad_norm": 0.1318359375, "learning_rate": 0.0007397675573656502, "loss": 0.678, "step": 15160 }, { "epoch": 0.7534518724545545, "grad_norm": 0.1259765625, "learning_rate": 0.0007397278235820007, "loss": 0.6714, "step": 15170 }, { "epoch": 0.7539485447501738, "grad_norm": 0.10205078125, "learning_rate": 0.000739688089798351, "loss": 0.6759, "step": 15180 }, { "epoch": 0.7544452170457931, "grad_norm": 0.15234375, "learning_rate": 0.0007396483560147015, "loss": 0.7192, "step": 15190 }, { "epoch": 0.7549418893414125, "grad_norm": 0.138671875, "learning_rate": 0.000739608622231052, "loss": 0.6535, "step": 15200 }, { "epoch": 0.7554385616370319, "grad_norm": 0.10791015625, "learning_rate": 0.0007395688884474024, "loss": 0.6535, "step": 15210 }, { "epoch": 0.7559352339326513, "grad_norm": 0.119140625, "learning_rate": 0.0007395291546637529, "loss": 0.6493, "step": 15220 }, { "epoch": 0.7564319062282706, "grad_norm": 0.123046875, "learning_rate": 0.0007394894208801033, "loss": 0.6432, "step": 15230 }, { "epoch": 0.75692857852389, "grad_norm": 0.1318359375, "learning_rate": 0.0007394496870964538, "loss": 0.6387, "step": 15240 }, { "epoch": 0.7574252508195093, "grad_norm": 0.10009765625, "learning_rate": 0.0007394099533128042, "loss": 0.6892, "step": 15250 }, { "epoch": 0.7579219231151286, "grad_norm": 0.099609375, "learning_rate": 0.0007393702195291547, "loss": 0.6835, "step": 15260 }, { "epoch": 0.758418595410748, "grad_norm": 0.109375, "learning_rate": 0.0007393304857455052, "loss": 0.6625, "step": 15270 }, { "epoch": 0.7589152677063673, "grad_norm": 0.1123046875, "learning_rate": 0.0007392907519618556, "loss": 0.687, "step": 15280 }, { "epoch": 0.7594119400019866, "grad_norm": 0.111328125, "learning_rate": 0.000739251018178206, "loss": 0.63, "step": 15290 }, { "epoch": 0.7599086122976061, "grad_norm": 0.1142578125, "learning_rate": 0.0007392112843945566, "loss": 0.6379, "step": 15300 }, { "epoch": 0.7604052845932254, "grad_norm": 0.1015625, "learning_rate": 0.000739171550610907, "loss": 0.6866, "step": 15310 }, { "epoch": 0.7609019568888448, "grad_norm": 0.1044921875, "learning_rate": 0.0007391318168272574, "loss": 0.651, "step": 15320 }, { "epoch": 0.7613986291844641, "grad_norm": 0.10546875, "learning_rate": 0.0007390920830436079, "loss": 0.6841, "step": 15330 }, { "epoch": 0.7618953014800834, "grad_norm": 0.126953125, "learning_rate": 0.0007390523492599583, "loss": 0.6586, "step": 15340 }, { "epoch": 0.7623919737757028, "grad_norm": 0.11669921875, "learning_rate": 0.0007390126154763087, "loss": 0.6854, "step": 15350 }, { "epoch": 0.7628886460713221, "grad_norm": 0.10107421875, "learning_rate": 0.0007389728816926593, "loss": 0.668, "step": 15360 }, { "epoch": 0.7633853183669415, "grad_norm": 0.1376953125, "learning_rate": 0.0007389331479090096, "loss": 0.6417, "step": 15370 }, { "epoch": 0.7638819906625608, "grad_norm": 0.1142578125, "learning_rate": 0.0007388934141253601, "loss": 0.6731, "step": 15380 }, { "epoch": 0.7643786629581802, "grad_norm": 0.11669921875, "learning_rate": 0.0007388536803417106, "loss": 0.6381, "step": 15390 }, { "epoch": 0.7648753352537996, "grad_norm": 0.10791015625, "learning_rate": 0.000738813946558061, "loss": 0.6457, "step": 15400 }, { "epoch": 0.7653720075494189, "grad_norm": 0.11083984375, "learning_rate": 0.0007387742127744115, "loss": 0.6486, "step": 15410 }, { "epoch": 0.7658686798450383, "grad_norm": 0.1259765625, "learning_rate": 0.000738734478990762, "loss": 0.6659, "step": 15420 }, { "epoch": 0.7663653521406576, "grad_norm": 0.10595703125, "learning_rate": 0.0007386947452071124, "loss": 0.6527, "step": 15430 }, { "epoch": 0.7668620244362769, "grad_norm": 0.09765625, "learning_rate": 0.0007386550114234628, "loss": 0.6608, "step": 15440 }, { "epoch": 0.7673586967318963, "grad_norm": 0.09765625, "learning_rate": 0.0007386152776398132, "loss": 0.6618, "step": 15450 }, { "epoch": 0.7678553690275156, "grad_norm": 0.10888671875, "learning_rate": 0.0007385755438561638, "loss": 0.6524, "step": 15460 }, { "epoch": 0.768352041323135, "grad_norm": 0.11181640625, "learning_rate": 0.0007385358100725142, "loss": 0.6715, "step": 15470 }, { "epoch": 0.7688487136187544, "grad_norm": 0.11767578125, "learning_rate": 0.0007384960762888646, "loss": 0.6431, "step": 15480 }, { "epoch": 0.7693453859143737, "grad_norm": 0.11279296875, "learning_rate": 0.0007384563425052151, "loss": 0.6363, "step": 15490 }, { "epoch": 0.7698420582099931, "grad_norm": 0.142578125, "learning_rate": 0.0007384166087215655, "loss": 0.6678, "step": 15500 }, { "epoch": 0.7703387305056124, "grad_norm": 0.1123046875, "learning_rate": 0.000738376874937916, "loss": 0.6862, "step": 15510 }, { "epoch": 0.7708354028012318, "grad_norm": 0.10009765625, "learning_rate": 0.0007383371411542665, "loss": 0.6488, "step": 15520 }, { "epoch": 0.7713320750968511, "grad_norm": 0.10498046875, "learning_rate": 0.0007382974073706169, "loss": 0.6608, "step": 15530 }, { "epoch": 0.7718287473924704, "grad_norm": 0.12255859375, "learning_rate": 0.0007382576735869673, "loss": 0.6887, "step": 15540 }, { "epoch": 0.7723254196880898, "grad_norm": 0.11474609375, "learning_rate": 0.0007382179398033178, "loss": 0.65, "step": 15550 }, { "epoch": 0.7728220919837091, "grad_norm": 0.1083984375, "learning_rate": 0.0007381782060196683, "loss": 0.6542, "step": 15560 }, { "epoch": 0.7733187642793286, "grad_norm": 0.09765625, "learning_rate": 0.0007381384722360187, "loss": 0.6324, "step": 15570 }, { "epoch": 0.7738154365749479, "grad_norm": 0.10498046875, "learning_rate": 0.0007380987384523692, "loss": 0.6682, "step": 15580 }, { "epoch": 0.7743121088705672, "grad_norm": 0.1259765625, "learning_rate": 0.0007380590046687196, "loss": 0.6871, "step": 15590 }, { "epoch": 0.7748087811661866, "grad_norm": 0.125, "learning_rate": 0.00073801927088507, "loss": 0.6528, "step": 15600 }, { "epoch": 0.7753054534618059, "grad_norm": 0.1337890625, "learning_rate": 0.0007379795371014206, "loss": 0.6642, "step": 15610 }, { "epoch": 0.7758021257574252, "grad_norm": 0.1259765625, "learning_rate": 0.000737939803317771, "loss": 0.657, "step": 15620 }, { "epoch": 0.7762987980530446, "grad_norm": 0.11083984375, "learning_rate": 0.0007379000695341214, "loss": 0.672, "step": 15630 }, { "epoch": 0.7767954703486639, "grad_norm": 0.10595703125, "learning_rate": 0.0007378603357504718, "loss": 0.6719, "step": 15640 }, { "epoch": 0.7772921426442833, "grad_norm": 0.10302734375, "learning_rate": 0.0007378206019668223, "loss": 0.6735, "step": 15650 }, { "epoch": 0.7777888149399027, "grad_norm": 0.1103515625, "learning_rate": 0.0007377808681831728, "loss": 0.6648, "step": 15660 }, { "epoch": 0.778285487235522, "grad_norm": 0.11865234375, "learning_rate": 0.0007377411343995232, "loss": 0.6676, "step": 15670 }, { "epoch": 0.7787821595311414, "grad_norm": 0.11279296875, "learning_rate": 0.0007377014006158737, "loss": 0.685, "step": 15680 }, { "epoch": 0.7792788318267607, "grad_norm": 0.1142578125, "learning_rate": 0.0007376616668322241, "loss": 0.6626, "step": 15690 }, { "epoch": 0.7797755041223801, "grad_norm": 0.107421875, "learning_rate": 0.0007376219330485745, "loss": 0.6375, "step": 15700 }, { "epoch": 0.7802721764179994, "grad_norm": 0.140625, "learning_rate": 0.0007375821992649251, "loss": 0.6498, "step": 15710 }, { "epoch": 0.7807688487136187, "grad_norm": 0.13671875, "learning_rate": 0.0007375424654812755, "loss": 0.6627, "step": 15720 }, { "epoch": 0.7812655210092381, "grad_norm": 0.1298828125, "learning_rate": 0.0007375027316976259, "loss": 0.6476, "step": 15730 }, { "epoch": 0.7817621933048574, "grad_norm": 0.12255859375, "learning_rate": 0.0007374629979139764, "loss": 0.6621, "step": 15740 }, { "epoch": 0.7822588656004769, "grad_norm": 0.1083984375, "learning_rate": 0.0007374232641303268, "loss": 0.6777, "step": 15750 }, { "epoch": 0.7827555378960962, "grad_norm": 0.10546875, "learning_rate": 0.0007373835303466773, "loss": 0.6634, "step": 15760 }, { "epoch": 0.7832522101917155, "grad_norm": 0.099609375, "learning_rate": 0.0007373437965630278, "loss": 0.6365, "step": 15770 }, { "epoch": 0.7837488824873349, "grad_norm": 0.1259765625, "learning_rate": 0.0007373040627793782, "loss": 0.6644, "step": 15780 }, { "epoch": 0.7842455547829542, "grad_norm": 0.12890625, "learning_rate": 0.0007372643289957286, "loss": 0.6768, "step": 15790 }, { "epoch": 0.7847422270785736, "grad_norm": 0.10888671875, "learning_rate": 0.0007372245952120791, "loss": 0.6666, "step": 15800 }, { "epoch": 0.7852388993741929, "grad_norm": 0.1279296875, "learning_rate": 0.0007371848614284296, "loss": 0.6616, "step": 15810 }, { "epoch": 0.7857355716698122, "grad_norm": 0.11572265625, "learning_rate": 0.0007371451276447801, "loss": 0.6727, "step": 15820 }, { "epoch": 0.7862322439654316, "grad_norm": 0.10546875, "learning_rate": 0.0007371053938611304, "loss": 0.6297, "step": 15830 }, { "epoch": 0.7867289162610509, "grad_norm": 0.11376953125, "learning_rate": 0.0007370656600774809, "loss": 0.661, "step": 15840 }, { "epoch": 0.7872255885566704, "grad_norm": 0.0927734375, "learning_rate": 0.0007370259262938314, "loss": 0.6471, "step": 15850 }, { "epoch": 0.7877222608522897, "grad_norm": 0.10693359375, "learning_rate": 0.0007369861925101818, "loss": 0.6412, "step": 15860 }, { "epoch": 0.788218933147909, "grad_norm": 0.1083984375, "learning_rate": 0.0007369464587265323, "loss": 0.647, "step": 15870 }, { "epoch": 0.7887156054435284, "grad_norm": 0.10107421875, "learning_rate": 0.0007369067249428828, "loss": 0.6365, "step": 15880 }, { "epoch": 0.7892122777391477, "grad_norm": 0.130859375, "learning_rate": 0.0007368669911592331, "loss": 0.6588, "step": 15890 }, { "epoch": 0.789708950034767, "grad_norm": 0.130859375, "learning_rate": 0.0007368272573755836, "loss": 0.6499, "step": 15900 }, { "epoch": 0.7902056223303864, "grad_norm": 0.10498046875, "learning_rate": 0.0007367875235919341, "loss": 0.6606, "step": 15910 }, { "epoch": 0.7907022946260057, "grad_norm": 0.109375, "learning_rate": 0.0007367477898082845, "loss": 0.6713, "step": 15920 }, { "epoch": 0.7911989669216251, "grad_norm": 0.11474609375, "learning_rate": 0.000736708056024635, "loss": 0.633, "step": 15930 }, { "epoch": 0.7916956392172445, "grad_norm": 0.10888671875, "learning_rate": 0.0007366683222409854, "loss": 0.6714, "step": 15940 }, { "epoch": 0.7921923115128638, "grad_norm": 0.1162109375, "learning_rate": 0.0007366285884573358, "loss": 0.6649, "step": 15950 }, { "epoch": 0.7926889838084832, "grad_norm": 0.111328125, "learning_rate": 0.0007365888546736864, "loss": 0.68, "step": 15960 }, { "epoch": 0.7931856561041025, "grad_norm": 0.10888671875, "learning_rate": 0.0007365491208900368, "loss": 0.6256, "step": 15970 }, { "epoch": 0.7936823283997219, "grad_norm": 0.11474609375, "learning_rate": 0.0007365093871063873, "loss": 0.6611, "step": 15980 }, { "epoch": 0.7941790006953412, "grad_norm": 0.109375, "learning_rate": 0.0007364696533227377, "loss": 0.6605, "step": 15990 }, { "epoch": 0.7946756729909605, "grad_norm": 0.1044921875, "learning_rate": 0.0007364299195390881, "loss": 0.6494, "step": 16000 }, { "epoch": 0.7951723452865799, "grad_norm": 0.09912109375, "learning_rate": 0.0007363901857554387, "loss": 0.6674, "step": 16010 }, { "epoch": 0.7956690175821992, "grad_norm": 0.12353515625, "learning_rate": 0.000736350451971789, "loss": 0.6483, "step": 16020 }, { "epoch": 0.7961656898778187, "grad_norm": 0.11962890625, "learning_rate": 0.0007363107181881395, "loss": 0.6441, "step": 16030 }, { "epoch": 0.796662362173438, "grad_norm": 0.1533203125, "learning_rate": 0.00073627098440449, "loss": 0.6642, "step": 16040 }, { "epoch": 0.7971590344690573, "grad_norm": 0.10205078125, "learning_rate": 0.0007362312506208403, "loss": 0.6259, "step": 16050 }, { "epoch": 0.7976557067646767, "grad_norm": 0.09912109375, "learning_rate": 0.0007361915168371909, "loss": 0.6641, "step": 16060 }, { "epoch": 0.798152379060296, "grad_norm": 0.1064453125, "learning_rate": 0.0007361517830535414, "loss": 0.6431, "step": 16070 }, { "epoch": 0.7986490513559154, "grad_norm": 0.1083984375, "learning_rate": 0.0007361120492698917, "loss": 0.6418, "step": 16080 }, { "epoch": 0.7991457236515347, "grad_norm": 0.109375, "learning_rate": 0.0007360723154862422, "loss": 0.6362, "step": 16090 }, { "epoch": 0.799642395947154, "grad_norm": 0.10400390625, "learning_rate": 0.0007360325817025926, "loss": 0.6457, "step": 16100 }, { "epoch": 0.8001390682427734, "grad_norm": 0.10888671875, "learning_rate": 0.000735992847918943, "loss": 0.6353, "step": 16110 }, { "epoch": 0.8006357405383928, "grad_norm": 0.10791015625, "learning_rate": 0.0007359531141352936, "loss": 0.6309, "step": 16120 }, { "epoch": 0.8011324128340122, "grad_norm": 0.11572265625, "learning_rate": 0.000735913380351644, "loss": 0.6537, "step": 16130 }, { "epoch": 0.8016290851296315, "grad_norm": 0.099609375, "learning_rate": 0.0007358736465679945, "loss": 0.6506, "step": 16140 }, { "epoch": 0.8021257574252508, "grad_norm": 0.1279296875, "learning_rate": 0.0007358339127843449, "loss": 0.6716, "step": 16150 }, { "epoch": 0.8026224297208702, "grad_norm": 0.10009765625, "learning_rate": 0.0007357941790006954, "loss": 0.6404, "step": 16160 }, { "epoch": 0.8031191020164895, "grad_norm": 0.103515625, "learning_rate": 0.0007357544452170459, "loss": 0.6493, "step": 16170 }, { "epoch": 0.8036157743121088, "grad_norm": 0.11083984375, "learning_rate": 0.0007357147114333963, "loss": 0.6635, "step": 16180 }, { "epoch": 0.8041124466077282, "grad_norm": 0.1064453125, "learning_rate": 0.0007356749776497467, "loss": 0.619, "step": 16190 }, { "epoch": 0.8046091189033475, "grad_norm": 0.11474609375, "learning_rate": 0.0007356352438660972, "loss": 0.6638, "step": 16200 }, { "epoch": 0.805105791198967, "grad_norm": 0.10546875, "learning_rate": 0.0007355955100824477, "loss": 0.665, "step": 16210 }, { "epoch": 0.8056024634945863, "grad_norm": 0.1474609375, "learning_rate": 0.0007355557762987981, "loss": 0.6213, "step": 16220 }, { "epoch": 0.8060991357902056, "grad_norm": 0.12353515625, "learning_rate": 0.0007355160425151486, "loss": 0.6451, "step": 16230 }, { "epoch": 0.806595808085825, "grad_norm": 0.115234375, "learning_rate": 0.0007354763087314989, "loss": 0.6398, "step": 16240 }, { "epoch": 0.8070924803814443, "grad_norm": 0.10302734375, "learning_rate": 0.0007354365749478494, "loss": 0.6553, "step": 16250 }, { "epoch": 0.8075891526770637, "grad_norm": 0.1103515625, "learning_rate": 0.0007353968411642, "loss": 0.691, "step": 16260 }, { "epoch": 0.808085824972683, "grad_norm": 0.10888671875, "learning_rate": 0.0007353571073805504, "loss": 0.6825, "step": 16270 }, { "epoch": 0.8085824972683023, "grad_norm": 0.11474609375, "learning_rate": 0.0007353173735969008, "loss": 0.6685, "step": 16280 }, { "epoch": 0.8090791695639217, "grad_norm": 0.1181640625, "learning_rate": 0.0007352776398132512, "loss": 0.6356, "step": 16290 }, { "epoch": 0.8095758418595411, "grad_norm": 0.12158203125, "learning_rate": 0.0007352379060296017, "loss": 0.6552, "step": 16300 }, { "epoch": 0.8100725141551605, "grad_norm": 0.10791015625, "learning_rate": 0.0007351981722459522, "loss": 0.6721, "step": 16310 }, { "epoch": 0.8105691864507798, "grad_norm": 0.11669921875, "learning_rate": 0.0007351584384623026, "loss": 0.6464, "step": 16320 }, { "epoch": 0.8110658587463991, "grad_norm": 0.1123046875, "learning_rate": 0.0007351187046786531, "loss": 0.6584, "step": 16330 }, { "epoch": 0.8115625310420185, "grad_norm": 0.1103515625, "learning_rate": 0.0007350789708950035, "loss": 0.6583, "step": 16340 }, { "epoch": 0.8120592033376378, "grad_norm": 0.1171875, "learning_rate": 0.0007350392371113539, "loss": 0.6785, "step": 16350 }, { "epoch": 0.8125558756332572, "grad_norm": 0.10888671875, "learning_rate": 0.0007349995033277045, "loss": 0.6511, "step": 16360 }, { "epoch": 0.8130525479288765, "grad_norm": 0.1474609375, "learning_rate": 0.0007349597695440549, "loss": 0.6634, "step": 16370 }, { "epoch": 0.8135492202244958, "grad_norm": 0.111328125, "learning_rate": 0.0007349200357604053, "loss": 0.7017, "step": 16380 }, { "epoch": 0.8140458925201153, "grad_norm": 0.115234375, "learning_rate": 0.0007348803019767558, "loss": 0.6716, "step": 16390 }, { "epoch": 0.8145425648157346, "grad_norm": 0.11083984375, "learning_rate": 0.0007348405681931062, "loss": 0.6769, "step": 16400 }, { "epoch": 0.815039237111354, "grad_norm": 0.11376953125, "learning_rate": 0.0007348008344094566, "loss": 0.6457, "step": 16410 }, { "epoch": 0.8155359094069733, "grad_norm": 0.09912109375, "learning_rate": 0.0007347611006258072, "loss": 0.6589, "step": 16420 }, { "epoch": 0.8160325817025926, "grad_norm": 0.119140625, "learning_rate": 0.0007347213668421576, "loss": 0.6282, "step": 16430 }, { "epoch": 0.816529253998212, "grad_norm": 0.1328125, "learning_rate": 0.000734681633058508, "loss": 0.6914, "step": 16440 }, { "epoch": 0.8170259262938313, "grad_norm": 0.10302734375, "learning_rate": 0.0007346418992748585, "loss": 0.6693, "step": 16450 }, { "epoch": 0.8175225985894506, "grad_norm": 0.11376953125, "learning_rate": 0.000734602165491209, "loss": 0.6435, "step": 16460 }, { "epoch": 0.81801927088507, "grad_norm": 0.1142578125, "learning_rate": 0.0007345624317075594, "loss": 0.6676, "step": 16470 }, { "epoch": 0.8185159431806894, "grad_norm": 0.109375, "learning_rate": 0.0007345226979239099, "loss": 0.6642, "step": 16480 }, { "epoch": 0.8190126154763088, "grad_norm": 0.11083984375, "learning_rate": 0.0007344829641402603, "loss": 0.677, "step": 16490 }, { "epoch": 0.8195092877719281, "grad_norm": 0.123046875, "learning_rate": 0.0007344432303566107, "loss": 0.6594, "step": 16500 }, { "epoch": 0.8200059600675474, "grad_norm": 0.1162109375, "learning_rate": 0.0007344034965729611, "loss": 0.6555, "step": 16510 }, { "epoch": 0.8205026323631668, "grad_norm": 0.115234375, "learning_rate": 0.0007343637627893117, "loss": 0.6422, "step": 16520 }, { "epoch": 0.8209993046587861, "grad_norm": 0.11474609375, "learning_rate": 0.0007343240290056621, "loss": 0.6481, "step": 16530 }, { "epoch": 0.8214959769544055, "grad_norm": 0.10791015625, "learning_rate": 0.0007342842952220125, "loss": 0.678, "step": 16540 }, { "epoch": 0.8219926492500248, "grad_norm": 0.1044921875, "learning_rate": 0.000734244561438363, "loss": 0.638, "step": 16550 }, { "epoch": 0.8224893215456441, "grad_norm": 0.12060546875, "learning_rate": 0.0007342048276547134, "loss": 0.6303, "step": 16560 }, { "epoch": 0.8229859938412636, "grad_norm": 0.119140625, "learning_rate": 0.0007341650938710639, "loss": 0.6615, "step": 16570 }, { "epoch": 0.8234826661368829, "grad_norm": 0.1015625, "learning_rate": 0.0007341253600874144, "loss": 0.6814, "step": 16580 }, { "epoch": 0.8239793384325023, "grad_norm": 0.1064453125, "learning_rate": 0.0007340856263037648, "loss": 0.6632, "step": 16590 }, { "epoch": 0.8244760107281216, "grad_norm": 0.11474609375, "learning_rate": 0.0007340458925201152, "loss": 0.6387, "step": 16600 }, { "epoch": 0.8249726830237409, "grad_norm": 0.115234375, "learning_rate": 0.0007340061587364658, "loss": 0.6394, "step": 16610 }, { "epoch": 0.8254693553193603, "grad_norm": 0.11328125, "learning_rate": 0.0007339664249528162, "loss": 0.6571, "step": 16620 }, { "epoch": 0.8259660276149796, "grad_norm": 0.11328125, "learning_rate": 0.0007339266911691666, "loss": 0.662, "step": 16630 }, { "epoch": 0.826462699910599, "grad_norm": 0.109375, "learning_rate": 0.0007338869573855171, "loss": 0.6662, "step": 16640 }, { "epoch": 0.8269593722062183, "grad_norm": 0.11279296875, "learning_rate": 0.0007338472236018675, "loss": 0.6464, "step": 16650 }, { "epoch": 0.8274560445018377, "grad_norm": 0.1005859375, "learning_rate": 0.0007338074898182179, "loss": 0.6808, "step": 16660 }, { "epoch": 0.8279527167974571, "grad_norm": 0.10400390625, "learning_rate": 0.0007337677560345685, "loss": 0.6563, "step": 16670 }, { "epoch": 0.8284493890930764, "grad_norm": 0.1259765625, "learning_rate": 0.0007337280222509189, "loss": 0.6661, "step": 16680 }, { "epoch": 0.8289460613886958, "grad_norm": 0.11669921875, "learning_rate": 0.0007336882884672693, "loss": 0.6458, "step": 16690 }, { "epoch": 0.8294427336843151, "grad_norm": 0.10986328125, "learning_rate": 0.0007336485546836197, "loss": 0.6569, "step": 16700 }, { "epoch": 0.8299394059799344, "grad_norm": 0.109375, "learning_rate": 0.0007336088208999702, "loss": 0.6661, "step": 16710 }, { "epoch": 0.8304360782755538, "grad_norm": 0.1123046875, "learning_rate": 0.0007335690871163208, "loss": 0.6804, "step": 16720 }, { "epoch": 0.8309327505711731, "grad_norm": 0.11279296875, "learning_rate": 0.0007335293533326711, "loss": 0.6887, "step": 16730 }, { "epoch": 0.8314294228667924, "grad_norm": 0.1259765625, "learning_rate": 0.0007334896195490216, "loss": 0.6213, "step": 16740 }, { "epoch": 0.8319260951624119, "grad_norm": 0.107421875, "learning_rate": 0.0007334498857653721, "loss": 0.649, "step": 16750 }, { "epoch": 0.8324227674580312, "grad_norm": 0.1103515625, "learning_rate": 0.0007334101519817224, "loss": 0.6543, "step": 16760 }, { "epoch": 0.8329194397536506, "grad_norm": 0.1103515625, "learning_rate": 0.000733370418198073, "loss": 0.6634, "step": 16770 }, { "epoch": 0.8334161120492699, "grad_norm": 0.1025390625, "learning_rate": 0.0007333306844144234, "loss": 0.6675, "step": 16780 }, { "epoch": 0.8339127843448892, "grad_norm": 0.1064453125, "learning_rate": 0.0007332909506307738, "loss": 0.6521, "step": 16790 }, { "epoch": 0.8344094566405086, "grad_norm": 0.1298828125, "learning_rate": 0.0007332512168471243, "loss": 0.6378, "step": 16800 }, { "epoch": 0.8349061289361279, "grad_norm": 0.11181640625, "learning_rate": 0.0007332114830634747, "loss": 0.6496, "step": 16810 }, { "epoch": 0.8354028012317473, "grad_norm": 0.10009765625, "learning_rate": 0.0007331717492798252, "loss": 0.6638, "step": 16820 }, { "epoch": 0.8358994735273666, "grad_norm": 0.10546875, "learning_rate": 0.0007331320154961757, "loss": 0.6344, "step": 16830 }, { "epoch": 0.836396145822986, "grad_norm": 0.185546875, "learning_rate": 0.0007330922817125261, "loss": 0.6616, "step": 16840 }, { "epoch": 0.8368928181186054, "grad_norm": 0.11376953125, "learning_rate": 0.0007330525479288765, "loss": 0.6469, "step": 16850 }, { "epoch": 0.8373894904142247, "grad_norm": 0.107421875, "learning_rate": 0.000733012814145227, "loss": 0.6803, "step": 16860 }, { "epoch": 0.8378861627098441, "grad_norm": 0.12255859375, "learning_rate": 0.0007329730803615775, "loss": 0.6452, "step": 16870 }, { "epoch": 0.8383828350054634, "grad_norm": 0.09423828125, "learning_rate": 0.000732933346577928, "loss": 0.6699, "step": 16880 }, { "epoch": 0.8388795073010827, "grad_norm": 0.1015625, "learning_rate": 0.0007328936127942783, "loss": 0.6362, "step": 16890 }, { "epoch": 0.8393761795967021, "grad_norm": 0.107421875, "learning_rate": 0.0007328538790106288, "loss": 0.6622, "step": 16900 }, { "epoch": 0.8398728518923214, "grad_norm": 0.099609375, "learning_rate": 0.0007328141452269793, "loss": 0.6484, "step": 16910 }, { "epoch": 0.8403695241879408, "grad_norm": 0.15234375, "learning_rate": 0.0007327744114433297, "loss": 0.6497, "step": 16920 }, { "epoch": 0.8408661964835602, "grad_norm": 0.12890625, "learning_rate": 0.0007327346776596802, "loss": 0.6783, "step": 16930 }, { "epoch": 0.8413628687791795, "grad_norm": 0.1552734375, "learning_rate": 0.0007326949438760307, "loss": 0.6665, "step": 16940 }, { "epoch": 0.8418595410747989, "grad_norm": 0.1142578125, "learning_rate": 0.000732655210092381, "loss": 0.6902, "step": 16950 }, { "epoch": 0.8423562133704182, "grad_norm": 0.1220703125, "learning_rate": 0.0007326154763087315, "loss": 0.6522, "step": 16960 }, { "epoch": 0.8428528856660376, "grad_norm": 0.10986328125, "learning_rate": 0.000732575742525082, "loss": 0.6571, "step": 16970 }, { "epoch": 0.8433495579616569, "grad_norm": 0.1396484375, "learning_rate": 0.0007325360087414324, "loss": 0.657, "step": 16980 }, { "epoch": 0.8438462302572762, "grad_norm": 0.10595703125, "learning_rate": 0.0007324962749577829, "loss": 0.648, "step": 16990 }, { "epoch": 0.8443429025528956, "grad_norm": 0.11279296875, "learning_rate": 0.0007324565411741333, "loss": 0.6394, "step": 17000 }, { "epoch": 0.8448395748485149, "grad_norm": 0.119140625, "learning_rate": 0.0007324168073904837, "loss": 0.6504, "step": 17010 }, { "epoch": 0.8453362471441342, "grad_norm": 0.1181640625, "learning_rate": 0.0007323770736068343, "loss": 0.6483, "step": 17020 }, { "epoch": 0.8458329194397537, "grad_norm": 0.115234375, "learning_rate": 0.0007323373398231847, "loss": 0.6492, "step": 17030 }, { "epoch": 0.846329591735373, "grad_norm": 0.11669921875, "learning_rate": 0.0007322976060395352, "loss": 0.6662, "step": 17040 }, { "epoch": 0.8468262640309924, "grad_norm": 0.14453125, "learning_rate": 0.0007322578722558856, "loss": 0.6364, "step": 17050 }, { "epoch": 0.8473229363266117, "grad_norm": 0.10302734375, "learning_rate": 0.000732218138472236, "loss": 0.6674, "step": 17060 }, { "epoch": 0.847819608622231, "grad_norm": 0.109375, "learning_rate": 0.0007321784046885866, "loss": 0.6588, "step": 17070 }, { "epoch": 0.8483162809178504, "grad_norm": 0.11181640625, "learning_rate": 0.000732138670904937, "loss": 0.6432, "step": 17080 }, { "epoch": 0.8488129532134697, "grad_norm": 0.162109375, "learning_rate": 0.0007320989371212874, "loss": 0.6654, "step": 17090 }, { "epoch": 0.8493096255090891, "grad_norm": 0.177734375, "learning_rate": 0.0007320592033376379, "loss": 0.6632, "step": 17100 }, { "epoch": 0.8498062978047084, "grad_norm": 0.125, "learning_rate": 0.0007320194695539882, "loss": 0.6595, "step": 17110 }, { "epoch": 0.8503029701003278, "grad_norm": 0.1142578125, "learning_rate": 0.0007319797357703388, "loss": 0.6614, "step": 17120 }, { "epoch": 0.8507996423959472, "grad_norm": 0.1201171875, "learning_rate": 0.0007319400019866893, "loss": 0.6409, "step": 17130 }, { "epoch": 0.8512963146915665, "grad_norm": 0.103515625, "learning_rate": 0.0007319002682030396, "loss": 0.6245, "step": 17140 }, { "epoch": 0.8517929869871859, "grad_norm": 0.126953125, "learning_rate": 0.0007318605344193901, "loss": 0.6708, "step": 17150 }, { "epoch": 0.8522896592828052, "grad_norm": 0.1103515625, "learning_rate": 0.0007318208006357405, "loss": 0.6608, "step": 17160 }, { "epoch": 0.8527863315784245, "grad_norm": 0.1220703125, "learning_rate": 0.0007317810668520911, "loss": 0.6411, "step": 17170 }, { "epoch": 0.8532830038740439, "grad_norm": 0.140625, "learning_rate": 0.0007317413330684415, "loss": 0.6536, "step": 17180 }, { "epoch": 0.8537796761696632, "grad_norm": 0.11328125, "learning_rate": 0.0007317015992847919, "loss": 0.6427, "step": 17190 }, { "epoch": 0.8542763484652826, "grad_norm": 0.12451171875, "learning_rate": 0.0007316618655011424, "loss": 0.6913, "step": 17200 }, { "epoch": 0.854773020760902, "grad_norm": 0.10595703125, "learning_rate": 0.0007316221317174928, "loss": 0.6436, "step": 17210 }, { "epoch": 0.8552696930565213, "grad_norm": 0.1083984375, "learning_rate": 0.0007315823979338433, "loss": 0.6381, "step": 17220 }, { "epoch": 0.8557663653521407, "grad_norm": 0.1357421875, "learning_rate": 0.0007315426641501938, "loss": 0.66, "step": 17230 }, { "epoch": 0.85626303764776, "grad_norm": 0.14453125, "learning_rate": 0.0007315029303665442, "loss": 0.6569, "step": 17240 }, { "epoch": 0.8567597099433794, "grad_norm": 0.115234375, "learning_rate": 0.0007314631965828946, "loss": 0.6972, "step": 17250 }, { "epoch": 0.8572563822389987, "grad_norm": 0.126953125, "learning_rate": 0.0007314234627992451, "loss": 0.6668, "step": 17260 }, { "epoch": 0.857753054534618, "grad_norm": 0.1328125, "learning_rate": 0.0007313837290155956, "loss": 0.6675, "step": 17270 }, { "epoch": 0.8582497268302374, "grad_norm": 0.12255859375, "learning_rate": 0.000731343995231946, "loss": 0.6405, "step": 17280 }, { "epoch": 0.8587463991258567, "grad_norm": 0.09765625, "learning_rate": 0.0007313042614482965, "loss": 0.657, "step": 17290 }, { "epoch": 0.8592430714214762, "grad_norm": 0.11767578125, "learning_rate": 0.0007312645276646468, "loss": 0.6507, "step": 17300 }, { "epoch": 0.8597397437170955, "grad_norm": 0.1376953125, "learning_rate": 0.0007312247938809973, "loss": 0.6625, "step": 17310 }, { "epoch": 0.8602364160127148, "grad_norm": 0.109375, "learning_rate": 0.0007311850600973479, "loss": 0.6755, "step": 17320 }, { "epoch": 0.8607330883083342, "grad_norm": 0.103515625, "learning_rate": 0.0007311453263136983, "loss": 0.6671, "step": 17330 }, { "epoch": 0.8612297606039535, "grad_norm": 0.10009765625, "learning_rate": 0.0007311055925300487, "loss": 0.6828, "step": 17340 }, { "epoch": 0.8617264328995728, "grad_norm": 0.09130859375, "learning_rate": 0.0007310658587463992, "loss": 0.6169, "step": 17350 }, { "epoch": 0.8622231051951922, "grad_norm": 0.109375, "learning_rate": 0.0007310261249627496, "loss": 0.6387, "step": 17360 }, { "epoch": 0.8627197774908115, "grad_norm": 0.10791015625, "learning_rate": 0.0007309863911791001, "loss": 0.7025, "step": 17370 }, { "epoch": 0.8632164497864309, "grad_norm": 0.095703125, "learning_rate": 0.0007309466573954505, "loss": 0.6599, "step": 17380 }, { "epoch": 0.8637131220820503, "grad_norm": 0.1005859375, "learning_rate": 0.000730906923611801, "loss": 0.6288, "step": 17390 }, { "epoch": 0.8642097943776696, "grad_norm": 0.130859375, "learning_rate": 0.0007308671898281514, "loss": 0.6718, "step": 17400 }, { "epoch": 0.864706466673289, "grad_norm": 0.10498046875, "learning_rate": 0.0007308274560445018, "loss": 0.6607, "step": 17410 }, { "epoch": 0.8652031389689083, "grad_norm": 0.09912109375, "learning_rate": 0.0007307877222608524, "loss": 0.6593, "step": 17420 }, { "epoch": 0.8656998112645277, "grad_norm": 0.109375, "learning_rate": 0.0007307479884772028, "loss": 0.6401, "step": 17430 }, { "epoch": 0.866196483560147, "grad_norm": 0.10498046875, "learning_rate": 0.0007307082546935532, "loss": 0.6316, "step": 17440 }, { "epoch": 0.8666931558557663, "grad_norm": 0.0986328125, "learning_rate": 0.0007306685209099037, "loss": 0.6319, "step": 17450 }, { "epoch": 0.8671898281513857, "grad_norm": 0.11669921875, "learning_rate": 0.0007306287871262541, "loss": 0.6675, "step": 17460 }, { "epoch": 0.867686500447005, "grad_norm": 0.12109375, "learning_rate": 0.0007305890533426046, "loss": 0.6245, "step": 17470 }, { "epoch": 0.8681831727426245, "grad_norm": 0.1259765625, "learning_rate": 0.0007305493195589551, "loss": 0.6518, "step": 17480 }, { "epoch": 0.8686798450382438, "grad_norm": 0.1376953125, "learning_rate": 0.0007305095857753055, "loss": 0.6574, "step": 17490 }, { "epoch": 0.8691765173338631, "grad_norm": 0.10888671875, "learning_rate": 0.0007304698519916559, "loss": 0.6386, "step": 17500 }, { "epoch": 0.8696731896294825, "grad_norm": 0.1357421875, "learning_rate": 0.0007304301182080064, "loss": 0.6485, "step": 17510 }, { "epoch": 0.8701698619251018, "grad_norm": 0.11767578125, "learning_rate": 0.0007303903844243569, "loss": 0.653, "step": 17520 }, { "epoch": 0.8706665342207212, "grad_norm": 0.1162109375, "learning_rate": 0.0007303506506407073, "loss": 0.6777, "step": 17530 }, { "epoch": 0.8711632065163405, "grad_norm": 0.150390625, "learning_rate": 0.0007303109168570578, "loss": 0.6541, "step": 17540 }, { "epoch": 0.8716598788119598, "grad_norm": 0.12890625, "learning_rate": 0.0007302711830734082, "loss": 0.6746, "step": 17550 }, { "epoch": 0.8721565511075792, "grad_norm": 0.11474609375, "learning_rate": 0.0007302314492897586, "loss": 0.6732, "step": 17560 }, { "epoch": 0.8726532234031986, "grad_norm": 0.1171875, "learning_rate": 0.000730191715506109, "loss": 0.6948, "step": 17570 }, { "epoch": 0.873149895698818, "grad_norm": 0.11279296875, "learning_rate": 0.0007301519817224596, "loss": 0.6498, "step": 17580 }, { "epoch": 0.8736465679944373, "grad_norm": 0.115234375, "learning_rate": 0.00073011224793881, "loss": 0.6505, "step": 17590 }, { "epoch": 0.8741432402900566, "grad_norm": 0.099609375, "learning_rate": 0.0007300725141551604, "loss": 0.6503, "step": 17600 }, { "epoch": 0.874639912585676, "grad_norm": 0.1005859375, "learning_rate": 0.0007300327803715109, "loss": 0.6586, "step": 17610 }, { "epoch": 0.8751365848812953, "grad_norm": 0.138671875, "learning_rate": 0.0007299930465878615, "loss": 0.6732, "step": 17620 }, { "epoch": 0.8756332571769146, "grad_norm": 0.10693359375, "learning_rate": 0.0007299533128042118, "loss": 0.6564, "step": 17630 }, { "epoch": 0.876129929472534, "grad_norm": 0.1103515625, "learning_rate": 0.0007299135790205623, "loss": 0.6678, "step": 17640 }, { "epoch": 0.8766266017681533, "grad_norm": 0.10986328125, "learning_rate": 0.0007298738452369127, "loss": 0.6443, "step": 17650 }, { "epoch": 0.8771232740637728, "grad_norm": 0.1123046875, "learning_rate": 0.0007298341114532631, "loss": 0.643, "step": 17660 }, { "epoch": 0.8776199463593921, "grad_norm": 0.10400390625, "learning_rate": 0.0007297943776696137, "loss": 0.6385, "step": 17670 }, { "epoch": 0.8781166186550114, "grad_norm": 0.09423828125, "learning_rate": 0.0007297546438859641, "loss": 0.6425, "step": 17680 }, { "epoch": 0.8786132909506308, "grad_norm": 0.09716796875, "learning_rate": 0.0007297149101023145, "loss": 0.6334, "step": 17690 }, { "epoch": 0.8791099632462501, "grad_norm": 0.134765625, "learning_rate": 0.000729675176318665, "loss": 0.6628, "step": 17700 }, { "epoch": 0.8796066355418695, "grad_norm": 0.134765625, "learning_rate": 0.0007296354425350154, "loss": 0.674, "step": 17710 }, { "epoch": 0.8801033078374888, "grad_norm": 0.107421875, "learning_rate": 0.0007295957087513658, "loss": 0.6799, "step": 17720 }, { "epoch": 0.8805999801331081, "grad_norm": 0.11328125, "learning_rate": 0.0007295559749677164, "loss": 0.6566, "step": 17730 }, { "epoch": 0.8810966524287275, "grad_norm": 0.10107421875, "learning_rate": 0.0007295162411840668, "loss": 0.6363, "step": 17740 }, { "epoch": 0.8815933247243469, "grad_norm": 0.10595703125, "learning_rate": 0.0007294765074004172, "loss": 0.681, "step": 17750 }, { "epoch": 0.8820899970199663, "grad_norm": 0.1259765625, "learning_rate": 0.0007294367736167676, "loss": 0.6795, "step": 17760 }, { "epoch": 0.8825866693155856, "grad_norm": 0.10400390625, "learning_rate": 0.0007293970398331182, "loss": 0.668, "step": 17770 }, { "epoch": 0.8830833416112049, "grad_norm": 0.12060546875, "learning_rate": 0.0007293573060494687, "loss": 0.6346, "step": 17780 }, { "epoch": 0.8835800139068243, "grad_norm": 0.11328125, "learning_rate": 0.000729317572265819, "loss": 0.6488, "step": 17790 }, { "epoch": 0.8840766862024436, "grad_norm": 0.11279296875, "learning_rate": 0.0007292778384821695, "loss": 0.6297, "step": 17800 }, { "epoch": 0.884573358498063, "grad_norm": 0.099609375, "learning_rate": 0.00072923810469852, "loss": 0.6498, "step": 17810 }, { "epoch": 0.8850700307936823, "grad_norm": 0.1064453125, "learning_rate": 0.0007291983709148703, "loss": 0.6379, "step": 17820 }, { "epoch": 0.8855667030893016, "grad_norm": 0.12890625, "learning_rate": 0.0007291586371312209, "loss": 0.6485, "step": 17830 }, { "epoch": 0.8860633753849211, "grad_norm": 0.119140625, "learning_rate": 0.0007291189033475713, "loss": 0.6255, "step": 17840 }, { "epoch": 0.8865600476805404, "grad_norm": 0.11376953125, "learning_rate": 0.0007290791695639217, "loss": 0.6269, "step": 17850 }, { "epoch": 0.8870567199761598, "grad_norm": 0.1279296875, "learning_rate": 0.0007290394357802722, "loss": 0.658, "step": 17860 }, { "epoch": 0.8875533922717791, "grad_norm": 0.12060546875, "learning_rate": 0.0007289997019966226, "loss": 0.6499, "step": 17870 }, { "epoch": 0.8880500645673984, "grad_norm": 0.12060546875, "learning_rate": 0.0007289599682129731, "loss": 0.6215, "step": 17880 }, { "epoch": 0.8885467368630178, "grad_norm": 0.103515625, "learning_rate": 0.0007289202344293236, "loss": 0.638, "step": 17890 }, { "epoch": 0.8890434091586371, "grad_norm": 0.12109375, "learning_rate": 0.000728880500645674, "loss": 0.6508, "step": 17900 }, { "epoch": 0.8895400814542564, "grad_norm": 0.197265625, "learning_rate": 0.0007288407668620245, "loss": 0.6515, "step": 17910 }, { "epoch": 0.8900367537498758, "grad_norm": 0.12158203125, "learning_rate": 0.000728801033078375, "loss": 0.6798, "step": 17920 }, { "epoch": 0.8905334260454952, "grad_norm": 0.1220703125, "learning_rate": 0.0007287612992947254, "loss": 0.6558, "step": 17930 }, { "epoch": 0.8910300983411146, "grad_norm": 0.1005859375, "learning_rate": 0.0007287215655110759, "loss": 0.6627, "step": 17940 }, { "epoch": 0.8915267706367339, "grad_norm": 0.1357421875, "learning_rate": 0.0007286818317274263, "loss": 0.6434, "step": 17950 }, { "epoch": 0.8920234429323532, "grad_norm": 0.111328125, "learning_rate": 0.0007286420979437767, "loss": 0.636, "step": 17960 }, { "epoch": 0.8925201152279726, "grad_norm": 0.10888671875, "learning_rate": 0.0007286023641601273, "loss": 0.6488, "step": 17970 }, { "epoch": 0.8930167875235919, "grad_norm": 0.119140625, "learning_rate": 0.0007285626303764776, "loss": 0.6529, "step": 17980 }, { "epoch": 0.8935134598192113, "grad_norm": 0.12158203125, "learning_rate": 0.0007285228965928281, "loss": 0.6544, "step": 17990 }, { "epoch": 0.8940101321148306, "grad_norm": 0.111328125, "learning_rate": 0.0007284831628091786, "loss": 0.6757, "step": 18000 }, { "epoch": 0.8945068044104499, "grad_norm": 0.10009765625, "learning_rate": 0.0007284434290255289, "loss": 0.6446, "step": 18010 }, { "epoch": 0.8950034767060694, "grad_norm": 0.10546875, "learning_rate": 0.0007284036952418794, "loss": 0.6686, "step": 18020 }, { "epoch": 0.8955001490016887, "grad_norm": 0.146484375, "learning_rate": 0.0007283639614582299, "loss": 0.6257, "step": 18030 }, { "epoch": 0.8959968212973081, "grad_norm": 0.10888671875, "learning_rate": 0.0007283242276745803, "loss": 0.684, "step": 18040 }, { "epoch": 0.8964934935929274, "grad_norm": 0.1162109375, "learning_rate": 0.0007282844938909308, "loss": 0.6261, "step": 18050 }, { "epoch": 0.8969901658885467, "grad_norm": 0.123046875, "learning_rate": 0.0007282447601072812, "loss": 0.6511, "step": 18060 }, { "epoch": 0.8974868381841661, "grad_norm": 0.1416015625, "learning_rate": 0.0007282050263236317, "loss": 0.669, "step": 18070 }, { "epoch": 0.8979835104797854, "grad_norm": 0.11669921875, "learning_rate": 0.0007281652925399822, "loss": 0.6212, "step": 18080 }, { "epoch": 0.8984801827754048, "grad_norm": 0.1025390625, "learning_rate": 0.0007281255587563326, "loss": 0.6908, "step": 18090 }, { "epoch": 0.8989768550710241, "grad_norm": 0.10009765625, "learning_rate": 0.0007280858249726831, "loss": 0.6437, "step": 18100 }, { "epoch": 0.8994735273666435, "grad_norm": 0.203125, "learning_rate": 0.0007280460911890335, "loss": 0.6976, "step": 18110 }, { "epoch": 0.8999701996622629, "grad_norm": 0.119140625, "learning_rate": 0.0007280063574053839, "loss": 0.6431, "step": 18120 }, { "epoch": 0.9004668719578822, "grad_norm": 0.1474609375, "learning_rate": 0.0007279666236217345, "loss": 0.6197, "step": 18130 }, { "epoch": 0.9009635442535016, "grad_norm": 0.1259765625, "learning_rate": 0.0007279268898380849, "loss": 0.6198, "step": 18140 }, { "epoch": 0.9014602165491209, "grad_norm": 0.10595703125, "learning_rate": 0.0007278871560544353, "loss": 0.6532, "step": 18150 }, { "epoch": 0.9019568888447402, "grad_norm": 0.10400390625, "learning_rate": 0.0007278474222707858, "loss": 0.6735, "step": 18160 }, { "epoch": 0.9024535611403596, "grad_norm": 0.1044921875, "learning_rate": 0.0007278076884871361, "loss": 0.6289, "step": 18170 }, { "epoch": 0.9029502334359789, "grad_norm": 0.1064453125, "learning_rate": 0.0007277679547034867, "loss": 0.6466, "step": 18180 }, { "epoch": 0.9034469057315982, "grad_norm": 0.109375, "learning_rate": 0.0007277282209198372, "loss": 0.6393, "step": 18190 }, { "epoch": 0.9039435780272176, "grad_norm": 0.0986328125, "learning_rate": 0.0007276884871361875, "loss": 0.6556, "step": 18200 }, { "epoch": 0.904440250322837, "grad_norm": 0.1005859375, "learning_rate": 0.000727648753352538, "loss": 0.6643, "step": 18210 }, { "epoch": 0.9049369226184564, "grad_norm": 0.1513671875, "learning_rate": 0.0007276090195688885, "loss": 0.6401, "step": 18220 }, { "epoch": 0.9054335949140757, "grad_norm": 0.1455078125, "learning_rate": 0.000727569285785239, "loss": 0.6655, "step": 18230 }, { "epoch": 0.905930267209695, "grad_norm": 0.11328125, "learning_rate": 0.0007275295520015894, "loss": 0.6161, "step": 18240 }, { "epoch": 0.9064269395053144, "grad_norm": 0.10791015625, "learning_rate": 0.0007274898182179398, "loss": 0.6177, "step": 18250 }, { "epoch": 0.9069236118009337, "grad_norm": 0.1484375, "learning_rate": 0.0007274500844342903, "loss": 0.6307, "step": 18260 }, { "epoch": 0.9074202840965531, "grad_norm": 0.11181640625, "learning_rate": 0.0007274103506506407, "loss": 0.6363, "step": 18270 }, { "epoch": 0.9079169563921724, "grad_norm": 0.1533203125, "learning_rate": 0.0007273706168669912, "loss": 0.6561, "step": 18280 }, { "epoch": 0.9084136286877917, "grad_norm": 0.10498046875, "learning_rate": 0.0007273308830833417, "loss": 0.6347, "step": 18290 }, { "epoch": 0.9089103009834112, "grad_norm": 0.10400390625, "learning_rate": 0.0007272911492996921, "loss": 0.6362, "step": 18300 }, { "epoch": 0.9094069732790305, "grad_norm": 0.10546875, "learning_rate": 0.0007272514155160425, "loss": 0.6456, "step": 18310 }, { "epoch": 0.9099036455746499, "grad_norm": 0.1025390625, "learning_rate": 0.000727211681732393, "loss": 0.6526, "step": 18320 }, { "epoch": 0.9104003178702692, "grad_norm": 0.11962890625, "learning_rate": 0.0007271719479487435, "loss": 0.6516, "step": 18330 }, { "epoch": 0.9108969901658885, "grad_norm": 0.1005859375, "learning_rate": 0.0007271322141650939, "loss": 0.6862, "step": 18340 }, { "epoch": 0.9113936624615079, "grad_norm": 0.1044921875, "learning_rate": 0.0007270924803814444, "loss": 0.6343, "step": 18350 }, { "epoch": 0.9118903347571272, "grad_norm": 0.1015625, "learning_rate": 0.0007270527465977948, "loss": 0.6607, "step": 18360 }, { "epoch": 0.9123870070527466, "grad_norm": 0.10888671875, "learning_rate": 0.0007270130128141452, "loss": 0.6355, "step": 18370 }, { "epoch": 0.9128836793483659, "grad_norm": 0.10888671875, "learning_rate": 0.0007269732790304958, "loss": 0.6391, "step": 18380 }, { "epoch": 0.9133803516439853, "grad_norm": 0.10205078125, "learning_rate": 0.0007269335452468462, "loss": 0.638, "step": 18390 }, { "epoch": 0.9138770239396047, "grad_norm": 0.1123046875, "learning_rate": 0.0007268938114631966, "loss": 0.6353, "step": 18400 }, { "epoch": 0.914373696235224, "grad_norm": 0.10791015625, "learning_rate": 0.0007268540776795471, "loss": 0.6596, "step": 18410 }, { "epoch": 0.9148703685308434, "grad_norm": 0.115234375, "learning_rate": 0.0007268143438958975, "loss": 0.6434, "step": 18420 }, { "epoch": 0.9153670408264627, "grad_norm": 0.126953125, "learning_rate": 0.000726774610112248, "loss": 0.6595, "step": 18430 }, { "epoch": 0.915863713122082, "grad_norm": 0.11474609375, "learning_rate": 0.0007267348763285984, "loss": 0.6257, "step": 18440 }, { "epoch": 0.9163603854177014, "grad_norm": 0.11474609375, "learning_rate": 0.0007266951425449489, "loss": 0.6293, "step": 18450 }, { "epoch": 0.9168570577133207, "grad_norm": 0.10888671875, "learning_rate": 0.0007266554087612993, "loss": 0.6326, "step": 18460 }, { "epoch": 0.91735373000894, "grad_norm": 0.1259765625, "learning_rate": 0.0007266156749776497, "loss": 0.6738, "step": 18470 }, { "epoch": 0.9178504023045595, "grad_norm": 0.1015625, "learning_rate": 0.0007265759411940003, "loss": 0.64, "step": 18480 }, { "epoch": 0.9183470746001788, "grad_norm": 0.10791015625, "learning_rate": 0.0007265362074103507, "loss": 0.6349, "step": 18490 }, { "epoch": 0.9188437468957982, "grad_norm": 0.1142578125, "learning_rate": 0.0007264964736267011, "loss": 0.6403, "step": 18500 }, { "epoch": 0.9193404191914175, "grad_norm": 0.10107421875, "learning_rate": 0.0007264567398430516, "loss": 0.666, "step": 18510 }, { "epoch": 0.9198370914870369, "grad_norm": 0.11767578125, "learning_rate": 0.000726417006059402, "loss": 0.642, "step": 18520 }, { "epoch": 0.9203337637826562, "grad_norm": 0.10302734375, "learning_rate": 0.0007263772722757525, "loss": 0.6183, "step": 18530 }, { "epoch": 0.9208304360782755, "grad_norm": 0.0986328125, "learning_rate": 0.000726337538492103, "loss": 0.6488, "step": 18540 }, { "epoch": 0.9213271083738949, "grad_norm": 0.11572265625, "learning_rate": 0.0007262978047084534, "loss": 0.6425, "step": 18550 }, { "epoch": 0.9218237806695142, "grad_norm": 0.10888671875, "learning_rate": 0.0007262580709248038, "loss": 0.6517, "step": 18560 }, { "epoch": 0.9223204529651337, "grad_norm": 0.146484375, "learning_rate": 0.0007262183371411543, "loss": 0.6648, "step": 18570 }, { "epoch": 0.922817125260753, "grad_norm": 0.10595703125, "learning_rate": 0.0007261786033575048, "loss": 0.6293, "step": 18580 }, { "epoch": 0.9233137975563723, "grad_norm": 0.138671875, "learning_rate": 0.0007261388695738552, "loss": 0.6352, "step": 18590 }, { "epoch": 0.9238104698519917, "grad_norm": 0.103515625, "learning_rate": 0.0007260991357902057, "loss": 0.6556, "step": 18600 }, { "epoch": 0.924307142147611, "grad_norm": 0.09423828125, "learning_rate": 0.0007260594020065561, "loss": 0.6334, "step": 18610 }, { "epoch": 0.9248038144432303, "grad_norm": 0.1025390625, "learning_rate": 0.0007260196682229065, "loss": 0.6108, "step": 18620 }, { "epoch": 0.9253004867388497, "grad_norm": 0.10205078125, "learning_rate": 0.000725979934439257, "loss": 0.6333, "step": 18630 }, { "epoch": 0.925797159034469, "grad_norm": 0.11328125, "learning_rate": 0.0007259402006556075, "loss": 0.6463, "step": 18640 }, { "epoch": 0.9262938313300884, "grad_norm": 0.10400390625, "learning_rate": 0.0007259004668719579, "loss": 0.6333, "step": 18650 }, { "epoch": 0.9267905036257078, "grad_norm": 0.08837890625, "learning_rate": 0.0007258607330883083, "loss": 0.676, "step": 18660 }, { "epoch": 0.9272871759213271, "grad_norm": 0.10888671875, "learning_rate": 0.0007258209993046588, "loss": 0.6571, "step": 18670 }, { "epoch": 0.9277838482169465, "grad_norm": 0.11279296875, "learning_rate": 0.0007257812655210094, "loss": 0.6544, "step": 18680 }, { "epoch": 0.9282805205125658, "grad_norm": 0.10693359375, "learning_rate": 0.0007257415317373597, "loss": 0.6293, "step": 18690 }, { "epoch": 0.9287771928081852, "grad_norm": 0.1015625, "learning_rate": 0.0007257017979537102, "loss": 0.6482, "step": 18700 }, { "epoch": 0.9292738651038045, "grad_norm": 0.1220703125, "learning_rate": 0.0007256620641700606, "loss": 0.6875, "step": 18710 }, { "epoch": 0.9297705373994238, "grad_norm": 0.11181640625, "learning_rate": 0.000725622330386411, "loss": 0.6797, "step": 18720 }, { "epoch": 0.9302672096950432, "grad_norm": 0.09130859375, "learning_rate": 0.0007255825966027616, "loss": 0.6506, "step": 18730 }, { "epoch": 0.9307638819906625, "grad_norm": 0.11279296875, "learning_rate": 0.000725542862819112, "loss": 0.6089, "step": 18740 }, { "epoch": 0.931260554286282, "grad_norm": 0.1708984375, "learning_rate": 0.0007255031290354624, "loss": 0.6626, "step": 18750 }, { "epoch": 0.9317572265819013, "grad_norm": 0.10498046875, "learning_rate": 0.0007254633952518129, "loss": 0.6508, "step": 18760 }, { "epoch": 0.9322538988775206, "grad_norm": 0.11962890625, "learning_rate": 0.0007254236614681633, "loss": 0.6255, "step": 18770 }, { "epoch": 0.93275057117314, "grad_norm": 0.10205078125, "learning_rate": 0.0007253839276845138, "loss": 0.6643, "step": 18780 }, { "epoch": 0.9332472434687593, "grad_norm": 0.10498046875, "learning_rate": 0.0007253441939008643, "loss": 0.6427, "step": 18790 }, { "epoch": 0.9337439157643787, "grad_norm": 0.1416015625, "learning_rate": 0.0007253044601172147, "loss": 0.641, "step": 18800 }, { "epoch": 0.934240588059998, "grad_norm": 0.09912109375, "learning_rate": 0.0007252647263335652, "loss": 0.6462, "step": 18810 }, { "epoch": 0.9347372603556173, "grad_norm": 0.09765625, "learning_rate": 0.0007252249925499155, "loss": 0.6797, "step": 18820 }, { "epoch": 0.9352339326512367, "grad_norm": 0.12451171875, "learning_rate": 0.000725185258766266, "loss": 0.6372, "step": 18830 }, { "epoch": 0.9357306049468561, "grad_norm": 0.1005859375, "learning_rate": 0.0007251455249826166, "loss": 0.6683, "step": 18840 }, { "epoch": 0.9362272772424755, "grad_norm": 0.1162109375, "learning_rate": 0.0007251057911989669, "loss": 0.6337, "step": 18850 }, { "epoch": 0.9367239495380948, "grad_norm": 0.103515625, "learning_rate": 0.0007250660574153174, "loss": 0.6449, "step": 18860 }, { "epoch": 0.9372206218337141, "grad_norm": 0.10791015625, "learning_rate": 0.0007250263236316679, "loss": 0.6515, "step": 18870 }, { "epoch": 0.9377172941293335, "grad_norm": 0.1044921875, "learning_rate": 0.0007249865898480182, "loss": 0.6232, "step": 18880 }, { "epoch": 0.9382139664249528, "grad_norm": 0.11328125, "learning_rate": 0.0007249468560643688, "loss": 0.6598, "step": 18890 }, { "epoch": 0.9387106387205721, "grad_norm": 0.10888671875, "learning_rate": 0.0007249071222807192, "loss": 0.6842, "step": 18900 }, { "epoch": 0.9392073110161915, "grad_norm": 0.111328125, "learning_rate": 0.0007248673884970696, "loss": 0.6436, "step": 18910 }, { "epoch": 0.9397039833118108, "grad_norm": 0.12353515625, "learning_rate": 0.0007248276547134201, "loss": 0.6466, "step": 18920 }, { "epoch": 0.9402006556074303, "grad_norm": 0.10693359375, "learning_rate": 0.0007247879209297706, "loss": 0.6312, "step": 18930 }, { "epoch": 0.9406973279030496, "grad_norm": 0.10302734375, "learning_rate": 0.000724748187146121, "loss": 0.676, "step": 18940 }, { "epoch": 0.941194000198669, "grad_norm": 0.1044921875, "learning_rate": 0.0007247084533624715, "loss": 0.6399, "step": 18950 }, { "epoch": 0.9416906724942883, "grad_norm": 0.10546875, "learning_rate": 0.0007246687195788219, "loss": 0.6462, "step": 18960 }, { "epoch": 0.9421873447899076, "grad_norm": 0.1328125, "learning_rate": 0.0007246289857951724, "loss": 0.6322, "step": 18970 }, { "epoch": 0.942684017085527, "grad_norm": 0.095703125, "learning_rate": 0.0007245892520115229, "loss": 0.6487, "step": 18980 }, { "epoch": 0.9431806893811463, "grad_norm": 0.10400390625, "learning_rate": 0.0007245495182278733, "loss": 0.6321, "step": 18990 }, { "epoch": 0.9436773616767656, "grad_norm": 0.1220703125, "learning_rate": 0.0007245097844442238, "loss": 0.6716, "step": 19000 }, { "epoch": 0.944174033972385, "grad_norm": 0.1005859375, "learning_rate": 0.0007244700506605742, "loss": 0.6537, "step": 19010 }, { "epoch": 0.9446707062680044, "grad_norm": 0.1171875, "learning_rate": 0.0007244303168769246, "loss": 0.6177, "step": 19020 }, { "epoch": 0.9451673785636238, "grad_norm": 0.146484375, "learning_rate": 0.0007243905830932752, "loss": 0.6269, "step": 19030 }, { "epoch": 0.9456640508592431, "grad_norm": 0.10009765625, "learning_rate": 0.0007243508493096255, "loss": 0.6405, "step": 19040 }, { "epoch": 0.9461607231548624, "grad_norm": 0.10791015625, "learning_rate": 0.000724311115525976, "loss": 0.6169, "step": 19050 }, { "epoch": 0.9466573954504818, "grad_norm": 0.11474609375, "learning_rate": 0.0007242713817423265, "loss": 0.6676, "step": 19060 }, { "epoch": 0.9471540677461011, "grad_norm": 0.09619140625, "learning_rate": 0.0007242316479586768, "loss": 0.6352, "step": 19070 }, { "epoch": 0.9476507400417205, "grad_norm": 0.11474609375, "learning_rate": 0.0007241919141750274, "loss": 0.6393, "step": 19080 }, { "epoch": 0.9481474123373398, "grad_norm": 0.09423828125, "learning_rate": 0.0007241521803913778, "loss": 0.6591, "step": 19090 }, { "epoch": 0.9486440846329591, "grad_norm": 0.10498046875, "learning_rate": 0.0007241124466077282, "loss": 0.6649, "step": 19100 }, { "epoch": 0.9491407569285786, "grad_norm": 0.123046875, "learning_rate": 0.0007240727128240787, "loss": 0.6774, "step": 19110 }, { "epoch": 0.9496374292241979, "grad_norm": 0.09619140625, "learning_rate": 0.0007240329790404291, "loss": 0.6421, "step": 19120 }, { "epoch": 0.9501341015198173, "grad_norm": 0.1630859375, "learning_rate": 0.0007239932452567797, "loss": 0.6432, "step": 19130 }, { "epoch": 0.9506307738154366, "grad_norm": 0.150390625, "learning_rate": 0.0007239535114731301, "loss": 0.6313, "step": 19140 }, { "epoch": 0.9511274461110559, "grad_norm": 0.11181640625, "learning_rate": 0.0007239137776894805, "loss": 0.6938, "step": 19150 }, { "epoch": 0.9516241184066753, "grad_norm": 0.1572265625, "learning_rate": 0.000723874043905831, "loss": 0.6474, "step": 19160 }, { "epoch": 0.9521207907022946, "grad_norm": 0.125, "learning_rate": 0.0007238343101221814, "loss": 0.66, "step": 19170 }, { "epoch": 0.9526174629979139, "grad_norm": 0.1357421875, "learning_rate": 0.0007237945763385318, "loss": 0.6707, "step": 19180 }, { "epoch": 0.9531141352935333, "grad_norm": 0.12353515625, "learning_rate": 0.0007237548425548824, "loss": 0.6322, "step": 19190 }, { "epoch": 0.9536108075891527, "grad_norm": 0.09130859375, "learning_rate": 0.0007237151087712328, "loss": 0.6423, "step": 19200 }, { "epoch": 0.9541074798847721, "grad_norm": 0.11962890625, "learning_rate": 0.0007236753749875832, "loss": 0.6438, "step": 19210 }, { "epoch": 0.9546041521803914, "grad_norm": 0.154296875, "learning_rate": 0.0007236356412039337, "loss": 0.65, "step": 19220 }, { "epoch": 0.9551008244760107, "grad_norm": 0.099609375, "learning_rate": 0.000723595907420284, "loss": 0.6024, "step": 19230 }, { "epoch": 0.9555974967716301, "grad_norm": 0.11279296875, "learning_rate": 0.0007235561736366346, "loss": 0.6478, "step": 19240 }, { "epoch": 0.9560941690672494, "grad_norm": 0.107421875, "learning_rate": 0.0007235164398529851, "loss": 0.6551, "step": 19250 }, { "epoch": 0.9565908413628688, "grad_norm": 0.10498046875, "learning_rate": 0.0007234767060693355, "loss": 0.6244, "step": 19260 }, { "epoch": 0.9570875136584881, "grad_norm": 0.10498046875, "learning_rate": 0.0007234369722856859, "loss": 0.6126, "step": 19270 }, { "epoch": 0.9575841859541074, "grad_norm": 0.10791015625, "learning_rate": 0.0007233972385020365, "loss": 0.6673, "step": 19280 }, { "epoch": 0.9580808582497269, "grad_norm": 0.10302734375, "learning_rate": 0.0007233575047183869, "loss": 0.6338, "step": 19290 }, { "epoch": 0.9585775305453462, "grad_norm": 0.140625, "learning_rate": 0.0007233177709347373, "loss": 0.6413, "step": 19300 }, { "epoch": 0.9590742028409656, "grad_norm": 0.1015625, "learning_rate": 0.0007232780371510877, "loss": 0.6325, "step": 19310 }, { "epoch": 0.9595708751365849, "grad_norm": 0.1025390625, "learning_rate": 0.0007232383033674382, "loss": 0.6368, "step": 19320 }, { "epoch": 0.9600675474322042, "grad_norm": 0.1015625, "learning_rate": 0.0007231985695837886, "loss": 0.6868, "step": 19330 }, { "epoch": 0.9605642197278236, "grad_norm": 0.11376953125, "learning_rate": 0.0007231588358001391, "loss": 0.6554, "step": 19340 }, { "epoch": 0.9610608920234429, "grad_norm": 0.109375, "learning_rate": 0.0007231191020164896, "loss": 0.6511, "step": 19350 }, { "epoch": 0.9615575643190623, "grad_norm": 0.1015625, "learning_rate": 0.00072307936823284, "loss": 0.601, "step": 19360 }, { "epoch": 0.9620542366146816, "grad_norm": 0.0986328125, "learning_rate": 0.0007230396344491904, "loss": 0.6424, "step": 19370 }, { "epoch": 0.9625509089103009, "grad_norm": 0.126953125, "learning_rate": 0.000722999900665541, "loss": 0.6213, "step": 19380 }, { "epoch": 0.9630475812059204, "grad_norm": 0.10205078125, "learning_rate": 0.0007229601668818914, "loss": 0.6322, "step": 19390 }, { "epoch": 0.9635442535015397, "grad_norm": 0.10888671875, "learning_rate": 0.0007229204330982418, "loss": 0.639, "step": 19400 }, { "epoch": 0.964040925797159, "grad_norm": 0.09814453125, "learning_rate": 0.0007228806993145923, "loss": 0.6287, "step": 19410 }, { "epoch": 0.9645375980927784, "grad_norm": 0.12060546875, "learning_rate": 0.0007228409655309427, "loss": 0.6506, "step": 19420 }, { "epoch": 0.9650342703883977, "grad_norm": 0.11083984375, "learning_rate": 0.0007228012317472931, "loss": 0.6512, "step": 19430 }, { "epoch": 0.9655309426840171, "grad_norm": 0.1103515625, "learning_rate": 0.0007227614979636437, "loss": 0.6371, "step": 19440 }, { "epoch": 0.9660276149796364, "grad_norm": 0.1240234375, "learning_rate": 0.0007227217641799941, "loss": 0.6314, "step": 19450 }, { "epoch": 0.9665242872752557, "grad_norm": 0.125, "learning_rate": 0.0007226820303963445, "loss": 0.633, "step": 19460 }, { "epoch": 0.9670209595708751, "grad_norm": 0.09716796875, "learning_rate": 0.000722642296612695, "loss": 0.6049, "step": 19470 }, { "epoch": 0.9675176318664945, "grad_norm": 0.123046875, "learning_rate": 0.0007226025628290454, "loss": 0.6461, "step": 19480 }, { "epoch": 0.9680143041621139, "grad_norm": 0.11572265625, "learning_rate": 0.0007225628290453959, "loss": 0.6392, "step": 19490 }, { "epoch": 0.9685109764577332, "grad_norm": 0.109375, "learning_rate": 0.0007225230952617463, "loss": 0.6055, "step": 19500 }, { "epoch": 0.9690076487533525, "grad_norm": 0.11669921875, "learning_rate": 0.0007224833614780968, "loss": 0.6556, "step": 19510 }, { "epoch": 0.9695043210489719, "grad_norm": 0.1005859375, "learning_rate": 0.0007224436276944472, "loss": 0.6339, "step": 19520 }, { "epoch": 0.9700009933445912, "grad_norm": 0.10400390625, "learning_rate": 0.0007224038939107976, "loss": 0.655, "step": 19530 }, { "epoch": 0.9704976656402106, "grad_norm": 0.1708984375, "learning_rate": 0.0007223641601271482, "loss": 0.6511, "step": 19540 }, { "epoch": 0.9709943379358299, "grad_norm": 0.1064453125, "learning_rate": 0.0007223244263434986, "loss": 0.6243, "step": 19550 }, { "epoch": 0.9714910102314492, "grad_norm": 0.10302734375, "learning_rate": 0.000722284692559849, "loss": 0.6578, "step": 19560 }, { "epoch": 0.9719876825270687, "grad_norm": 0.103515625, "learning_rate": 0.0007222449587761995, "loss": 0.6603, "step": 19570 }, { "epoch": 0.972484354822688, "grad_norm": 0.1171875, "learning_rate": 0.0007222052249925499, "loss": 0.6494, "step": 19580 }, { "epoch": 0.9729810271183074, "grad_norm": 0.11669921875, "learning_rate": 0.0007221654912089004, "loss": 0.6439, "step": 19590 }, { "epoch": 0.9734776994139267, "grad_norm": 0.10302734375, "learning_rate": 0.0007221257574252509, "loss": 0.6378, "step": 19600 }, { "epoch": 0.973974371709546, "grad_norm": 0.1025390625, "learning_rate": 0.0007220860236416013, "loss": 0.6525, "step": 19610 }, { "epoch": 0.9744710440051654, "grad_norm": 0.1318359375, "learning_rate": 0.0007220462898579517, "loss": 0.6577, "step": 19620 }, { "epoch": 0.9749677163007847, "grad_norm": 0.11083984375, "learning_rate": 0.0007220065560743022, "loss": 0.6399, "step": 19630 }, { "epoch": 0.975464388596404, "grad_norm": 0.12890625, "learning_rate": 0.0007219668222906527, "loss": 0.6335, "step": 19640 }, { "epoch": 0.9759610608920234, "grad_norm": 0.1044921875, "learning_rate": 0.0007219270885070031, "loss": 0.6269, "step": 19650 }, { "epoch": 0.9764577331876428, "grad_norm": 0.11669921875, "learning_rate": 0.0007218873547233536, "loss": 0.653, "step": 19660 }, { "epoch": 0.9769544054832622, "grad_norm": 0.107421875, "learning_rate": 0.000721847620939704, "loss": 0.6416, "step": 19670 }, { "epoch": 0.9774510777788815, "grad_norm": 0.1083984375, "learning_rate": 0.0007218078871560544, "loss": 0.6566, "step": 19680 }, { "epoch": 0.9779477500745009, "grad_norm": 0.1416015625, "learning_rate": 0.0007217681533724049, "loss": 0.6215, "step": 19690 }, { "epoch": 0.9784444223701202, "grad_norm": 0.11865234375, "learning_rate": 0.0007217284195887554, "loss": 0.6143, "step": 19700 }, { "epoch": 0.9789410946657395, "grad_norm": 0.11181640625, "learning_rate": 0.0007216886858051059, "loss": 0.6546, "step": 19710 }, { "epoch": 0.9794377669613589, "grad_norm": 0.1064453125, "learning_rate": 0.0007216489520214562, "loss": 0.6309, "step": 19720 }, { "epoch": 0.9799344392569782, "grad_norm": 0.1181640625, "learning_rate": 0.0007216092182378067, "loss": 0.6613, "step": 19730 }, { "epoch": 0.9804311115525975, "grad_norm": 0.11767578125, "learning_rate": 0.0007215694844541573, "loss": 0.634, "step": 19740 }, { "epoch": 0.980927783848217, "grad_norm": 0.1240234375, "learning_rate": 0.0007215297506705076, "loss": 0.6508, "step": 19750 }, { "epoch": 0.9814244561438363, "grad_norm": 0.12060546875, "learning_rate": 0.0007214900168868581, "loss": 0.6455, "step": 19760 }, { "epoch": 0.9819211284394557, "grad_norm": 0.1162109375, "learning_rate": 0.0007214502831032085, "loss": 0.6486, "step": 19770 }, { "epoch": 0.982417800735075, "grad_norm": 0.12060546875, "learning_rate": 0.0007214105493195589, "loss": 0.6511, "step": 19780 }, { "epoch": 0.9829144730306943, "grad_norm": 0.10498046875, "learning_rate": 0.0007213708155359095, "loss": 0.6744, "step": 19790 }, { "epoch": 0.9834111453263137, "grad_norm": 0.10888671875, "learning_rate": 0.0007213310817522599, "loss": 0.6283, "step": 19800 }, { "epoch": 0.983907817621933, "grad_norm": 0.15625, "learning_rate": 0.0007212913479686103, "loss": 0.666, "step": 19810 }, { "epoch": 0.9844044899175524, "grad_norm": 0.12890625, "learning_rate": 0.0007212516141849608, "loss": 0.6695, "step": 19820 }, { "epoch": 0.9849011622131717, "grad_norm": 0.109375, "learning_rate": 0.0007212118804013112, "loss": 0.6422, "step": 19830 }, { "epoch": 0.9853978345087911, "grad_norm": 0.1083984375, "learning_rate": 0.0007211721466176617, "loss": 0.6549, "step": 19840 }, { "epoch": 0.9858945068044105, "grad_norm": 0.1025390625, "learning_rate": 0.0007211324128340122, "loss": 0.6556, "step": 19850 }, { "epoch": 0.9863911791000298, "grad_norm": 0.1171875, "learning_rate": 0.0007210926790503626, "loss": 0.645, "step": 19860 }, { "epoch": 0.9868878513956492, "grad_norm": 0.130859375, "learning_rate": 0.0007210529452667131, "loss": 0.621, "step": 19870 }, { "epoch": 0.9873845236912685, "grad_norm": 0.09716796875, "learning_rate": 0.0007210132114830635, "loss": 0.6638, "step": 19880 }, { "epoch": 0.9878811959868878, "grad_norm": 0.130859375, "learning_rate": 0.000720973477699414, "loss": 0.6561, "step": 19890 }, { "epoch": 0.9883778682825072, "grad_norm": 0.10400390625, "learning_rate": 0.0007209337439157645, "loss": 0.6335, "step": 19900 }, { "epoch": 0.9888745405781265, "grad_norm": 0.0986328125, "learning_rate": 0.0007208940101321148, "loss": 0.6598, "step": 19910 }, { "epoch": 0.9893712128737459, "grad_norm": 0.11962890625, "learning_rate": 0.0007208542763484653, "loss": 0.6381, "step": 19920 }, { "epoch": 0.9898678851693653, "grad_norm": 0.1396484375, "learning_rate": 0.0007208145425648158, "loss": 0.6276, "step": 19930 }, { "epoch": 0.9903645574649846, "grad_norm": 0.1171875, "learning_rate": 0.0007207748087811662, "loss": 0.6665, "step": 19940 }, { "epoch": 0.990861229760604, "grad_norm": 0.1064453125, "learning_rate": 0.0007207350749975167, "loss": 0.6354, "step": 19950 }, { "epoch": 0.9913579020562233, "grad_norm": 0.10888671875, "learning_rate": 0.0007206953412138671, "loss": 0.6357, "step": 19960 }, { "epoch": 0.9918545743518427, "grad_norm": 0.1064453125, "learning_rate": 0.0007206556074302175, "loss": 0.6177, "step": 19970 }, { "epoch": 0.992351246647462, "grad_norm": 0.107421875, "learning_rate": 0.000720615873646568, "loss": 0.6346, "step": 19980 }, { "epoch": 0.9928479189430813, "grad_norm": 0.12255859375, "learning_rate": 0.0007205761398629185, "loss": 0.6313, "step": 19990 }, { "epoch": 0.9933445912387007, "grad_norm": 0.10986328125, "learning_rate": 0.0007205364060792689, "loss": 0.6217, "step": 20000 }, { "epoch": 0.99384126353432, "grad_norm": 0.134765625, "learning_rate": 0.0007204966722956194, "loss": 0.6423, "step": 20010 }, { "epoch": 0.9943379358299395, "grad_norm": 0.10205078125, "learning_rate": 0.0007204569385119698, "loss": 0.6366, "step": 20020 }, { "epoch": 0.9948346081255588, "grad_norm": 0.1376953125, "learning_rate": 0.0007204172047283203, "loss": 0.6465, "step": 20030 }, { "epoch": 0.9953312804211781, "grad_norm": 0.09912109375, "learning_rate": 0.0007203774709446708, "loss": 0.6485, "step": 20040 }, { "epoch": 0.9958279527167975, "grad_norm": 0.09619140625, "learning_rate": 0.0007203377371610212, "loss": 0.6753, "step": 20050 }, { "epoch": 0.9963246250124168, "grad_norm": 0.09326171875, "learning_rate": 0.0007202980033773717, "loss": 0.6464, "step": 20060 }, { "epoch": 0.9968212973080361, "grad_norm": 0.1103515625, "learning_rate": 0.0007202582695937221, "loss": 0.6352, "step": 20070 }, { "epoch": 0.9973179696036555, "grad_norm": 0.125, "learning_rate": 0.0007202185358100725, "loss": 0.6392, "step": 20080 }, { "epoch": 0.9978146418992748, "grad_norm": 0.095703125, "learning_rate": 0.0007201788020264231, "loss": 0.6418, "step": 20090 }, { "epoch": 0.9983113141948942, "grad_norm": 0.095703125, "learning_rate": 0.0007201390682427734, "loss": 0.6289, "step": 20100 }, { "epoch": 0.9988079864905136, "grad_norm": 0.09814453125, "learning_rate": 0.0007200993344591239, "loss": 0.6458, "step": 20110 }, { "epoch": 0.999304658786133, "grad_norm": 0.109375, "learning_rate": 0.0007200596006754744, "loss": 0.6511, "step": 20120 }, { "epoch": 0.9998013310817523, "grad_norm": 0.11328125, "learning_rate": 0.0007200198668918247, "loss": 0.6132, "step": 20130 }, { "epoch": 1.0002980033773716, "grad_norm": 0.11962890625, "learning_rate": 0.0007199801331081753, "loss": 0.6431, "step": 20140 }, { "epoch": 1.0007946756729909, "grad_norm": 0.11328125, "learning_rate": 0.0007199403993245258, "loss": 0.6414, "step": 20150 }, { "epoch": 1.0012913479686103, "grad_norm": 0.1142578125, "learning_rate": 0.0007199006655408762, "loss": 0.6431, "step": 20160 }, { "epoch": 1.0017880202642298, "grad_norm": 0.09228515625, "learning_rate": 0.0007198609317572266, "loss": 0.6413, "step": 20170 }, { "epoch": 1.002284692559849, "grad_norm": 0.0966796875, "learning_rate": 0.000719821197973577, "loss": 0.6061, "step": 20180 }, { "epoch": 1.0027813648554684, "grad_norm": 0.1591796875, "learning_rate": 0.0007197814641899276, "loss": 0.6099, "step": 20190 }, { "epoch": 1.0032780371510877, "grad_norm": 0.11474609375, "learning_rate": 0.000719741730406278, "loss": 0.6247, "step": 20200 }, { "epoch": 1.003774709446707, "grad_norm": 0.171875, "learning_rate": 0.0007197019966226284, "loss": 0.6223, "step": 20210 }, { "epoch": 1.0042713817423263, "grad_norm": 0.10498046875, "learning_rate": 0.0007196622628389789, "loss": 0.6178, "step": 20220 }, { "epoch": 1.0047680540379458, "grad_norm": 0.1025390625, "learning_rate": 0.0007196225290553293, "loss": 0.6223, "step": 20230 }, { "epoch": 1.005264726333565, "grad_norm": 0.1259765625, "learning_rate": 0.0007195827952716798, "loss": 0.6333, "step": 20240 }, { "epoch": 1.0057613986291845, "grad_norm": 0.103515625, "learning_rate": 0.0007195430614880303, "loss": 0.6397, "step": 20250 }, { "epoch": 1.006258070924804, "grad_norm": 0.109375, "learning_rate": 0.0007195033277043807, "loss": 0.6309, "step": 20260 }, { "epoch": 1.0067547432204231, "grad_norm": 0.1259765625, "learning_rate": 0.0007194635939207311, "loss": 0.6561, "step": 20270 }, { "epoch": 1.0072514155160426, "grad_norm": 0.1357421875, "learning_rate": 0.0007194238601370816, "loss": 0.6394, "step": 20280 }, { "epoch": 1.0077480878116618, "grad_norm": 0.1435546875, "learning_rate": 0.0007193841263534319, "loss": 0.6141, "step": 20290 }, { "epoch": 1.0082447601072813, "grad_norm": 0.1279296875, "learning_rate": 0.0007193443925697825, "loss": 0.6236, "step": 20300 }, { "epoch": 1.0087414324029005, "grad_norm": 0.09716796875, "learning_rate": 0.000719304658786133, "loss": 0.6215, "step": 20310 }, { "epoch": 1.00923810469852, "grad_norm": 0.1064453125, "learning_rate": 0.0007192649250024834, "loss": 0.5914, "step": 20320 }, { "epoch": 1.0097347769941392, "grad_norm": 0.10400390625, "learning_rate": 0.0007192251912188338, "loss": 0.6805, "step": 20330 }, { "epoch": 1.0102314492897586, "grad_norm": 0.09228515625, "learning_rate": 0.0007191854574351844, "loss": 0.6498, "step": 20340 }, { "epoch": 1.010728121585378, "grad_norm": 0.1171875, "learning_rate": 0.0007191457236515348, "loss": 0.6419, "step": 20350 }, { "epoch": 1.0112247938809973, "grad_norm": 0.09912109375, "learning_rate": 0.0007191059898678852, "loss": 0.614, "step": 20360 }, { "epoch": 1.0117214661766167, "grad_norm": 0.10009765625, "learning_rate": 0.0007190662560842356, "loss": 0.6385, "step": 20370 }, { "epoch": 1.012218138472236, "grad_norm": 0.142578125, "learning_rate": 0.0007190265223005861, "loss": 0.6472, "step": 20380 }, { "epoch": 1.0127148107678554, "grad_norm": 0.1142578125, "learning_rate": 0.0007189867885169365, "loss": 0.6339, "step": 20390 }, { "epoch": 1.0132114830634746, "grad_norm": 0.1572265625, "learning_rate": 0.000718947054733287, "loss": 0.629, "step": 20400 }, { "epoch": 1.013708155359094, "grad_norm": 0.1142578125, "learning_rate": 0.0007189073209496375, "loss": 0.6393, "step": 20410 }, { "epoch": 1.0142048276547133, "grad_norm": 0.09521484375, "learning_rate": 0.0007188675871659879, "loss": 0.615, "step": 20420 }, { "epoch": 1.0147014999503328, "grad_norm": 0.10595703125, "learning_rate": 0.0007188278533823383, "loss": 0.6203, "step": 20430 }, { "epoch": 1.0151981722459522, "grad_norm": 0.142578125, "learning_rate": 0.0007187881195986889, "loss": 0.6366, "step": 20440 }, { "epoch": 1.0156948445415714, "grad_norm": 0.12060546875, "learning_rate": 0.0007187483858150393, "loss": 0.6522, "step": 20450 }, { "epoch": 1.016191516837191, "grad_norm": 0.12353515625, "learning_rate": 0.0007187086520313897, "loss": 0.6664, "step": 20460 }, { "epoch": 1.0166881891328101, "grad_norm": 0.11767578125, "learning_rate": 0.0007186689182477402, "loss": 0.6364, "step": 20470 }, { "epoch": 1.0171848614284296, "grad_norm": 0.1103515625, "learning_rate": 0.0007186291844640906, "loss": 0.6269, "step": 20480 }, { "epoch": 1.0176815337240488, "grad_norm": 0.1103515625, "learning_rate": 0.000718589450680441, "loss": 0.6337, "step": 20490 }, { "epoch": 1.0181782060196682, "grad_norm": 0.146484375, "learning_rate": 0.0007185497168967916, "loss": 0.6397, "step": 20500 }, { "epoch": 1.0186748783152875, "grad_norm": 0.09716796875, "learning_rate": 0.000718509983113142, "loss": 0.6124, "step": 20510 }, { "epoch": 1.019171550610907, "grad_norm": 0.1103515625, "learning_rate": 0.0007184702493294924, "loss": 0.6223, "step": 20520 }, { "epoch": 1.0196682229065264, "grad_norm": 0.0986328125, "learning_rate": 0.0007184305155458429, "loss": 0.6654, "step": 20530 }, { "epoch": 1.0201648952021456, "grad_norm": 0.10302734375, "learning_rate": 0.0007183907817621933, "loss": 0.6354, "step": 20540 }, { "epoch": 1.020661567497765, "grad_norm": 0.11376953125, "learning_rate": 0.0007183510479785438, "loss": 0.6546, "step": 20550 }, { "epoch": 1.0211582397933843, "grad_norm": 0.1240234375, "learning_rate": 0.0007183113141948942, "loss": 0.6305, "step": 20560 }, { "epoch": 1.0216549120890037, "grad_norm": 0.10009765625, "learning_rate": 0.0007182715804112447, "loss": 0.6463, "step": 20570 }, { "epoch": 1.022151584384623, "grad_norm": 0.1123046875, "learning_rate": 0.0007182318466275951, "loss": 0.6449, "step": 20580 }, { "epoch": 1.0226482566802424, "grad_norm": 0.09521484375, "learning_rate": 0.0007181921128439455, "loss": 0.6536, "step": 20590 }, { "epoch": 1.0231449289758616, "grad_norm": 0.10986328125, "learning_rate": 0.0007181523790602961, "loss": 0.6502, "step": 20600 }, { "epoch": 1.023641601271481, "grad_norm": 0.11962890625, "learning_rate": 0.0007181126452766466, "loss": 0.6262, "step": 20610 }, { "epoch": 1.0241382735671005, "grad_norm": 0.1259765625, "learning_rate": 0.0007180729114929969, "loss": 0.6113, "step": 20620 }, { "epoch": 1.0246349458627197, "grad_norm": 0.1708984375, "learning_rate": 0.0007180331777093474, "loss": 0.6546, "step": 20630 }, { "epoch": 1.0251316181583392, "grad_norm": 0.09228515625, "learning_rate": 0.0007179934439256978, "loss": 0.6563, "step": 20640 }, { "epoch": 1.0256282904539584, "grad_norm": 0.09912109375, "learning_rate": 0.0007179537101420483, "loss": 0.6274, "step": 20650 }, { "epoch": 1.0261249627495779, "grad_norm": 0.1015625, "learning_rate": 0.0007179139763583988, "loss": 0.636, "step": 20660 }, { "epoch": 1.026621635045197, "grad_norm": 0.09912109375, "learning_rate": 0.0007178742425747492, "loss": 0.6469, "step": 20670 }, { "epoch": 1.0271183073408165, "grad_norm": 0.109375, "learning_rate": 0.0007178345087910996, "loss": 0.6293, "step": 20680 }, { "epoch": 1.0276149796364358, "grad_norm": 0.11474609375, "learning_rate": 0.0007177947750074501, "loss": 0.6118, "step": 20690 }, { "epoch": 1.0281116519320552, "grad_norm": 0.10791015625, "learning_rate": 0.0007177550412238006, "loss": 0.6164, "step": 20700 }, { "epoch": 1.0286083242276747, "grad_norm": 0.134765625, "learning_rate": 0.000717715307440151, "loss": 0.6503, "step": 20710 }, { "epoch": 1.029104996523294, "grad_norm": 0.10546875, "learning_rate": 0.0007176755736565015, "loss": 0.6428, "step": 20720 }, { "epoch": 1.0296016688189134, "grad_norm": 0.099609375, "learning_rate": 0.0007176358398728519, "loss": 0.6194, "step": 20730 }, { "epoch": 1.0300983411145326, "grad_norm": 0.12109375, "learning_rate": 0.0007175961060892023, "loss": 0.6299, "step": 20740 }, { "epoch": 1.030595013410152, "grad_norm": 0.1298828125, "learning_rate": 0.0007175563723055529, "loss": 0.6277, "step": 20750 }, { "epoch": 1.0310916857057713, "grad_norm": 0.10009765625, "learning_rate": 0.0007175166385219033, "loss": 0.6155, "step": 20760 }, { "epoch": 1.0315883580013907, "grad_norm": 0.1376953125, "learning_rate": 0.0007174769047382538, "loss": 0.6441, "step": 20770 }, { "epoch": 1.03208503029701, "grad_norm": 0.1005859375, "learning_rate": 0.0007174371709546041, "loss": 0.6151, "step": 20780 }, { "epoch": 1.0325817025926294, "grad_norm": 0.099609375, "learning_rate": 0.0007173974371709546, "loss": 0.6207, "step": 20790 }, { "epoch": 1.0330783748882488, "grad_norm": 0.10888671875, "learning_rate": 0.0007173577033873052, "loss": 0.6286, "step": 20800 }, { "epoch": 1.033575047183868, "grad_norm": 0.1162109375, "learning_rate": 0.0007173179696036555, "loss": 0.661, "step": 20810 }, { "epoch": 1.0340717194794875, "grad_norm": 0.1416015625, "learning_rate": 0.000717278235820006, "loss": 0.6232, "step": 20820 }, { "epoch": 1.0345683917751067, "grad_norm": 0.10888671875, "learning_rate": 0.0007172385020363564, "loss": 0.6403, "step": 20830 }, { "epoch": 1.0350650640707262, "grad_norm": 0.126953125, "learning_rate": 0.0007171987682527068, "loss": 0.6231, "step": 20840 }, { "epoch": 1.0355617363663454, "grad_norm": 0.11474609375, "learning_rate": 0.0007171590344690574, "loss": 0.6646, "step": 20850 }, { "epoch": 1.0360584086619649, "grad_norm": 0.10888671875, "learning_rate": 0.0007171193006854078, "loss": 0.6139, "step": 20860 }, { "epoch": 1.036555080957584, "grad_norm": 0.1328125, "learning_rate": 0.0007170795669017582, "loss": 0.6497, "step": 20870 }, { "epoch": 1.0370517532532035, "grad_norm": 0.1015625, "learning_rate": 0.0007170398331181087, "loss": 0.644, "step": 20880 }, { "epoch": 1.037548425548823, "grad_norm": 0.10498046875, "learning_rate": 0.0007170000993344591, "loss": 0.6199, "step": 20890 }, { "epoch": 1.0380450978444422, "grad_norm": 0.11767578125, "learning_rate": 0.0007169603655508097, "loss": 0.6642, "step": 20900 }, { "epoch": 1.0385417701400617, "grad_norm": 0.1103515625, "learning_rate": 0.0007169206317671601, "loss": 0.6234, "step": 20910 }, { "epoch": 1.0390384424356809, "grad_norm": 0.09716796875, "learning_rate": 0.0007168808979835105, "loss": 0.6432, "step": 20920 }, { "epoch": 1.0395351147313003, "grad_norm": 0.0927734375, "learning_rate": 0.000716841164199861, "loss": 0.6174, "step": 20930 }, { "epoch": 1.0400317870269196, "grad_norm": 0.1064453125, "learning_rate": 0.0007168014304162114, "loss": 0.6398, "step": 20940 }, { "epoch": 1.040528459322539, "grad_norm": 0.109375, "learning_rate": 0.0007167616966325619, "loss": 0.6372, "step": 20950 }, { "epoch": 1.0410251316181582, "grad_norm": 0.10791015625, "learning_rate": 0.0007167219628489124, "loss": 0.6382, "step": 20960 }, { "epoch": 1.0415218039137777, "grad_norm": 0.126953125, "learning_rate": 0.0007166822290652627, "loss": 0.6185, "step": 20970 }, { "epoch": 1.0420184762093971, "grad_norm": 0.11767578125, "learning_rate": 0.0007166424952816132, "loss": 0.6185, "step": 20980 }, { "epoch": 1.0425151485050164, "grad_norm": 0.146484375, "learning_rate": 0.0007166027614979637, "loss": 0.6616, "step": 20990 }, { "epoch": 1.0430118208006358, "grad_norm": 0.10888671875, "learning_rate": 0.0007165630277143141, "loss": 0.6378, "step": 21000 }, { "epoch": 1.043508493096255, "grad_norm": 0.11328125, "learning_rate": 0.0007165232939306646, "loss": 0.6186, "step": 21010 }, { "epoch": 1.0440051653918745, "grad_norm": 0.1240234375, "learning_rate": 0.0007164835601470151, "loss": 0.6414, "step": 21020 }, { "epoch": 1.0445018376874937, "grad_norm": 0.1298828125, "learning_rate": 0.0007164438263633654, "loss": 0.6409, "step": 21030 }, { "epoch": 1.0449985099831132, "grad_norm": 0.11962890625, "learning_rate": 0.0007164040925797159, "loss": 0.6121, "step": 21040 }, { "epoch": 1.0454951822787324, "grad_norm": 0.10302734375, "learning_rate": 0.0007163643587960664, "loss": 0.614, "step": 21050 }, { "epoch": 1.0459918545743518, "grad_norm": 0.12451171875, "learning_rate": 0.0007163246250124169, "loss": 0.6208, "step": 21060 }, { "epoch": 1.0464885268699713, "grad_norm": 0.10888671875, "learning_rate": 0.0007162848912287673, "loss": 0.6057, "step": 21070 }, { "epoch": 1.0469851991655905, "grad_norm": 0.11376953125, "learning_rate": 0.0007162451574451177, "loss": 0.6317, "step": 21080 }, { "epoch": 1.04748187146121, "grad_norm": 0.1630859375, "learning_rate": 0.0007162054236614682, "loss": 0.6259, "step": 21090 }, { "epoch": 1.0479785437568292, "grad_norm": 0.1298828125, "learning_rate": 0.0007161656898778187, "loss": 0.6358, "step": 21100 }, { "epoch": 1.0484752160524486, "grad_norm": 0.126953125, "learning_rate": 0.0007161259560941691, "loss": 0.6219, "step": 21110 }, { "epoch": 1.0489718883480679, "grad_norm": 0.11328125, "learning_rate": 0.0007160862223105196, "loss": 0.6467, "step": 21120 }, { "epoch": 1.0494685606436873, "grad_norm": 0.10693359375, "learning_rate": 0.00071604648852687, "loss": 0.5926, "step": 21130 }, { "epoch": 1.0499652329393065, "grad_norm": 0.15234375, "learning_rate": 0.0007160067547432204, "loss": 0.6026, "step": 21140 }, { "epoch": 1.050461905234926, "grad_norm": 0.09814453125, "learning_rate": 0.000715967020959571, "loss": 0.6293, "step": 21150 }, { "epoch": 1.0509585775305454, "grad_norm": 0.10498046875, "learning_rate": 0.0007159272871759213, "loss": 0.6204, "step": 21160 }, { "epoch": 1.0514552498261647, "grad_norm": 0.1513671875, "learning_rate": 0.0007158875533922718, "loss": 0.6413, "step": 21170 }, { "epoch": 1.0519519221217841, "grad_norm": 0.095703125, "learning_rate": 0.0007158478196086223, "loss": 0.6339, "step": 21180 }, { "epoch": 1.0524485944174033, "grad_norm": 0.1064453125, "learning_rate": 0.0007158080858249726, "loss": 0.6545, "step": 21190 }, { "epoch": 1.0529452667130228, "grad_norm": 0.10400390625, "learning_rate": 0.0007157683520413232, "loss": 0.6232, "step": 21200 }, { "epoch": 1.053441939008642, "grad_norm": 0.10302734375, "learning_rate": 0.0007157286182576737, "loss": 0.6383, "step": 21210 }, { "epoch": 1.0539386113042615, "grad_norm": 0.09814453125, "learning_rate": 0.0007156888844740241, "loss": 0.647, "step": 21220 }, { "epoch": 1.0544352835998807, "grad_norm": 0.11279296875, "learning_rate": 0.0007156491506903745, "loss": 0.6368, "step": 21230 }, { "epoch": 1.0549319558955002, "grad_norm": 0.134765625, "learning_rate": 0.0007156094169067249, "loss": 0.637, "step": 21240 }, { "epoch": 1.0554286281911196, "grad_norm": 0.111328125, "learning_rate": 0.0007155696831230755, "loss": 0.6766, "step": 21250 }, { "epoch": 1.0559253004867388, "grad_norm": 0.1298828125, "learning_rate": 0.0007155299493394259, "loss": 0.6468, "step": 21260 }, { "epoch": 1.0564219727823583, "grad_norm": 0.107421875, "learning_rate": 0.0007154902155557763, "loss": 0.6206, "step": 21270 }, { "epoch": 1.0569186450779775, "grad_norm": 0.0966796875, "learning_rate": 0.0007154504817721268, "loss": 0.6368, "step": 21280 }, { "epoch": 1.057415317373597, "grad_norm": 0.1005859375, "learning_rate": 0.0007154107479884772, "loss": 0.6367, "step": 21290 }, { "epoch": 1.0579119896692162, "grad_norm": 0.11083984375, "learning_rate": 0.0007153710142048277, "loss": 0.593, "step": 21300 }, { "epoch": 1.0584086619648356, "grad_norm": 0.0986328125, "learning_rate": 0.0007153312804211782, "loss": 0.6003, "step": 21310 }, { "epoch": 1.0589053342604549, "grad_norm": 0.125, "learning_rate": 0.0007152915466375286, "loss": 0.6045, "step": 21320 }, { "epoch": 1.0594020065560743, "grad_norm": 0.1044921875, "learning_rate": 0.000715251812853879, "loss": 0.6367, "step": 21330 }, { "epoch": 1.0598986788516938, "grad_norm": 0.1005859375, "learning_rate": 0.0007152120790702295, "loss": 0.616, "step": 21340 }, { "epoch": 1.060395351147313, "grad_norm": 0.10693359375, "learning_rate": 0.00071517234528658, "loss": 0.5995, "step": 21350 }, { "epoch": 1.0608920234429324, "grad_norm": 0.12890625, "learning_rate": 0.0007151326115029304, "loss": 0.6266, "step": 21360 }, { "epoch": 1.0613886957385517, "grad_norm": 0.11962890625, "learning_rate": 0.0007150928777192809, "loss": 0.6305, "step": 21370 }, { "epoch": 1.061885368034171, "grad_norm": 0.11083984375, "learning_rate": 0.0007150531439356313, "loss": 0.6097, "step": 21380 }, { "epoch": 1.0623820403297903, "grad_norm": 0.1357421875, "learning_rate": 0.0007150134101519817, "loss": 0.6639, "step": 21390 }, { "epoch": 1.0628787126254098, "grad_norm": 0.1201171875, "learning_rate": 0.0007149736763683323, "loss": 0.5871, "step": 21400 }, { "epoch": 1.063375384921029, "grad_norm": 0.1318359375, "learning_rate": 0.0007149339425846827, "loss": 0.6388, "step": 21410 }, { "epoch": 1.0638720572166485, "grad_norm": 0.09814453125, "learning_rate": 0.0007148942088010331, "loss": 0.6103, "step": 21420 }, { "epoch": 1.064368729512268, "grad_norm": 0.1591796875, "learning_rate": 0.0007148544750173835, "loss": 0.6276, "step": 21430 }, { "epoch": 1.0648654018078871, "grad_norm": 0.109375, "learning_rate": 0.000714814741233734, "loss": 0.6362, "step": 21440 }, { "epoch": 1.0653620741035066, "grad_norm": 0.1005859375, "learning_rate": 0.0007147750074500845, "loss": 0.6413, "step": 21450 }, { "epoch": 1.0658587463991258, "grad_norm": 0.1005859375, "learning_rate": 0.0007147352736664349, "loss": 0.5809, "step": 21460 }, { "epoch": 1.0663554186947453, "grad_norm": 0.1015625, "learning_rate": 0.0007146955398827854, "loss": 0.6338, "step": 21470 }, { "epoch": 1.0668520909903645, "grad_norm": 0.10595703125, "learning_rate": 0.0007146558060991358, "loss": 0.6367, "step": 21480 }, { "epoch": 1.067348763285984, "grad_norm": 0.109375, "learning_rate": 0.0007146160723154862, "loss": 0.6383, "step": 21490 }, { "epoch": 1.0678454355816032, "grad_norm": 0.10693359375, "learning_rate": 0.0007145763385318368, "loss": 0.6074, "step": 21500 }, { "epoch": 1.0683421078772226, "grad_norm": 0.10888671875, "learning_rate": 0.0007145366047481872, "loss": 0.606, "step": 21510 }, { "epoch": 1.068838780172842, "grad_norm": 0.15234375, "learning_rate": 0.0007144968709645376, "loss": 0.641, "step": 21520 }, { "epoch": 1.0693354524684613, "grad_norm": 0.10400390625, "learning_rate": 0.0007144571371808881, "loss": 0.6228, "step": 21530 }, { "epoch": 1.0698321247640807, "grad_norm": 0.158203125, "learning_rate": 0.0007144174033972385, "loss": 0.6205, "step": 21540 }, { "epoch": 1.0703287970597, "grad_norm": 0.10498046875, "learning_rate": 0.000714377669613589, "loss": 0.6452, "step": 21550 }, { "epoch": 1.0708254693553194, "grad_norm": 0.1005859375, "learning_rate": 0.0007143379358299395, "loss": 0.638, "step": 21560 }, { "epoch": 1.0713221416509386, "grad_norm": 0.09423828125, "learning_rate": 0.0007142982020462899, "loss": 0.6696, "step": 21570 }, { "epoch": 1.071818813946558, "grad_norm": 0.09814453125, "learning_rate": 0.0007142584682626403, "loss": 0.6129, "step": 21580 }, { "epoch": 1.0723154862421773, "grad_norm": 0.11572265625, "learning_rate": 0.0007142187344789908, "loss": 0.6459, "step": 21590 }, { "epoch": 1.0728121585377968, "grad_norm": 0.123046875, "learning_rate": 0.0007141790006953413, "loss": 0.6551, "step": 21600 }, { "epoch": 1.0733088308334162, "grad_norm": 0.1025390625, "learning_rate": 0.0007141392669116917, "loss": 0.6482, "step": 21610 }, { "epoch": 1.0738055031290354, "grad_norm": 0.103515625, "learning_rate": 0.0007140995331280422, "loss": 0.5979, "step": 21620 }, { "epoch": 1.074302175424655, "grad_norm": 0.10791015625, "learning_rate": 0.0007140597993443926, "loss": 0.6278, "step": 21630 }, { "epoch": 1.0747988477202741, "grad_norm": 0.10595703125, "learning_rate": 0.000714020065560743, "loss": 0.627, "step": 21640 }, { "epoch": 1.0752955200158936, "grad_norm": 0.09375, "learning_rate": 0.0007139803317770934, "loss": 0.6193, "step": 21650 }, { "epoch": 1.0757921923115128, "grad_norm": 0.1279296875, "learning_rate": 0.000713940597993444, "loss": 0.6542, "step": 21660 }, { "epoch": 1.0762888646071322, "grad_norm": 0.107421875, "learning_rate": 0.0007139008642097945, "loss": 0.6136, "step": 21670 }, { "epoch": 1.0767855369027515, "grad_norm": 0.10693359375, "learning_rate": 0.0007138611304261448, "loss": 0.6423, "step": 21680 }, { "epoch": 1.077282209198371, "grad_norm": 0.115234375, "learning_rate": 0.0007138213966424953, "loss": 0.6457, "step": 21690 }, { "epoch": 1.0777788814939901, "grad_norm": 0.1259765625, "learning_rate": 0.0007137816628588457, "loss": 0.6406, "step": 21700 }, { "epoch": 1.0782755537896096, "grad_norm": 0.1044921875, "learning_rate": 0.0007137419290751962, "loss": 0.6145, "step": 21710 }, { "epoch": 1.078772226085229, "grad_norm": 0.1552734375, "learning_rate": 0.0007137021952915467, "loss": 0.6083, "step": 21720 }, { "epoch": 1.0792688983808483, "grad_norm": 0.12255859375, "learning_rate": 0.0007136624615078971, "loss": 0.6084, "step": 21730 }, { "epoch": 1.0797655706764677, "grad_norm": 0.10498046875, "learning_rate": 0.0007136227277242475, "loss": 0.6413, "step": 21740 }, { "epoch": 1.080262242972087, "grad_norm": 0.09814453125, "learning_rate": 0.000713582993940598, "loss": 0.6146, "step": 21750 }, { "epoch": 1.0807589152677064, "grad_norm": 0.11474609375, "learning_rate": 0.0007135432601569485, "loss": 0.6419, "step": 21760 }, { "epoch": 1.0812555875633256, "grad_norm": 0.09912109375, "learning_rate": 0.0007135035263732989, "loss": 0.6509, "step": 21770 }, { "epoch": 1.081752259858945, "grad_norm": 0.11474609375, "learning_rate": 0.0007134637925896494, "loss": 0.6484, "step": 21780 }, { "epoch": 1.0822489321545645, "grad_norm": 0.107421875, "learning_rate": 0.0007134240588059998, "loss": 0.6472, "step": 21790 }, { "epoch": 1.0827456044501838, "grad_norm": 0.134765625, "learning_rate": 0.0007133843250223504, "loss": 0.6434, "step": 21800 }, { "epoch": 1.0832422767458032, "grad_norm": 0.14453125, "learning_rate": 0.0007133445912387008, "loss": 0.6067, "step": 21810 }, { "epoch": 1.0837389490414224, "grad_norm": 0.1328125, "learning_rate": 0.0007133048574550512, "loss": 0.6255, "step": 21820 }, { "epoch": 1.0842356213370419, "grad_norm": 0.09521484375, "learning_rate": 0.0007132651236714017, "loss": 0.6374, "step": 21830 }, { "epoch": 1.084732293632661, "grad_norm": 0.087890625, "learning_rate": 0.000713225389887752, "loss": 0.6203, "step": 21840 }, { "epoch": 1.0852289659282806, "grad_norm": 0.1103515625, "learning_rate": 0.0007131856561041025, "loss": 0.6495, "step": 21850 }, { "epoch": 1.0857256382238998, "grad_norm": 0.1162109375, "learning_rate": 0.0007131459223204531, "loss": 0.6453, "step": 21860 }, { "epoch": 1.0862223105195192, "grad_norm": 0.09765625, "learning_rate": 0.0007131061885368034, "loss": 0.5869, "step": 21870 }, { "epoch": 1.0867189828151385, "grad_norm": 0.09326171875, "learning_rate": 0.0007130664547531539, "loss": 0.6145, "step": 21880 }, { "epoch": 1.087215655110758, "grad_norm": 0.1142578125, "learning_rate": 0.0007130267209695044, "loss": 0.6281, "step": 21890 }, { "epoch": 1.0877123274063774, "grad_norm": 0.123046875, "learning_rate": 0.0007129869871858547, "loss": 0.6336, "step": 21900 }, { "epoch": 1.0882089997019966, "grad_norm": 0.087890625, "learning_rate": 0.0007129472534022053, "loss": 0.6367, "step": 21910 }, { "epoch": 1.088705671997616, "grad_norm": 0.103515625, "learning_rate": 0.0007129075196185557, "loss": 0.6467, "step": 21920 }, { "epoch": 1.0892023442932353, "grad_norm": 0.10400390625, "learning_rate": 0.0007128677858349061, "loss": 0.633, "step": 21930 }, { "epoch": 1.0896990165888547, "grad_norm": 0.10107421875, "learning_rate": 0.0007128280520512566, "loss": 0.6505, "step": 21940 }, { "epoch": 1.090195688884474, "grad_norm": 0.115234375, "learning_rate": 0.000712788318267607, "loss": 0.6666, "step": 21950 }, { "epoch": 1.0906923611800934, "grad_norm": 0.11376953125, "learning_rate": 0.0007127485844839576, "loss": 0.6298, "step": 21960 }, { "epoch": 1.0911890334757128, "grad_norm": 0.11669921875, "learning_rate": 0.000712708850700308, "loss": 0.6172, "step": 21970 }, { "epoch": 1.091685705771332, "grad_norm": 0.1083984375, "learning_rate": 0.0007126691169166584, "loss": 0.6471, "step": 21980 }, { "epoch": 1.0921823780669515, "grad_norm": 0.10400390625, "learning_rate": 0.0007126293831330089, "loss": 0.649, "step": 21990 }, { "epoch": 1.0926790503625707, "grad_norm": 0.11328125, "learning_rate": 0.0007125896493493593, "loss": 0.5827, "step": 22000 }, { "epoch": 1.0931757226581902, "grad_norm": 0.1416015625, "learning_rate": 0.0007125499155657098, "loss": 0.6342, "step": 22010 }, { "epoch": 1.0936723949538094, "grad_norm": 0.107421875, "learning_rate": 0.0007125101817820603, "loss": 0.6023, "step": 22020 }, { "epoch": 1.0941690672494289, "grad_norm": 0.158203125, "learning_rate": 0.0007124704479984106, "loss": 0.6409, "step": 22030 }, { "epoch": 1.094665739545048, "grad_norm": 0.1279296875, "learning_rate": 0.0007124307142147611, "loss": 0.6335, "step": 22040 }, { "epoch": 1.0951624118406675, "grad_norm": 0.1328125, "learning_rate": 0.0007123909804311117, "loss": 0.6375, "step": 22050 }, { "epoch": 1.0956590841362868, "grad_norm": 0.1240234375, "learning_rate": 0.000712351246647462, "loss": 0.6516, "step": 22060 }, { "epoch": 1.0961557564319062, "grad_norm": 0.10595703125, "learning_rate": 0.0007123115128638125, "loss": 0.6722, "step": 22070 }, { "epoch": 1.0966524287275257, "grad_norm": 0.11669921875, "learning_rate": 0.000712271779080163, "loss": 0.6483, "step": 22080 }, { "epoch": 1.097149101023145, "grad_norm": 0.1025390625, "learning_rate": 0.0007122320452965133, "loss": 0.6566, "step": 22090 }, { "epoch": 1.0976457733187643, "grad_norm": 0.1123046875, "learning_rate": 0.0007121923115128638, "loss": 0.6415, "step": 22100 }, { "epoch": 1.0981424456143836, "grad_norm": 0.10302734375, "learning_rate": 0.0007121525777292143, "loss": 0.6417, "step": 22110 }, { "epoch": 1.098639117910003, "grad_norm": 0.0986328125, "learning_rate": 0.0007121128439455648, "loss": 0.6333, "step": 22120 }, { "epoch": 1.0991357902056222, "grad_norm": 0.10107421875, "learning_rate": 0.0007120731101619152, "loss": 0.6129, "step": 22130 }, { "epoch": 1.0996324625012417, "grad_norm": 0.09326171875, "learning_rate": 0.0007120333763782656, "loss": 0.5927, "step": 22140 }, { "epoch": 1.1001291347968611, "grad_norm": 0.1123046875, "learning_rate": 0.0007119936425946161, "loss": 0.627, "step": 22150 }, { "epoch": 1.1006258070924804, "grad_norm": 0.171875, "learning_rate": 0.0007119539088109666, "loss": 0.5961, "step": 22160 }, { "epoch": 1.1011224793880998, "grad_norm": 0.1396484375, "learning_rate": 0.000711914175027317, "loss": 0.6226, "step": 22170 }, { "epoch": 1.101619151683719, "grad_norm": 0.1044921875, "learning_rate": 0.0007118744412436675, "loss": 0.6391, "step": 22180 }, { "epoch": 1.1021158239793385, "grad_norm": 0.10595703125, "learning_rate": 0.0007118347074600179, "loss": 0.6212, "step": 22190 }, { "epoch": 1.1026124962749577, "grad_norm": 0.1396484375, "learning_rate": 0.0007117949736763683, "loss": 0.6268, "step": 22200 }, { "epoch": 1.1031091685705772, "grad_norm": 0.12060546875, "learning_rate": 0.0007117552398927189, "loss": 0.6589, "step": 22210 }, { "epoch": 1.1036058408661964, "grad_norm": 0.09326171875, "learning_rate": 0.0007117155061090692, "loss": 0.6267, "step": 22220 }, { "epoch": 1.1041025131618158, "grad_norm": 0.103515625, "learning_rate": 0.0007116757723254197, "loss": 0.613, "step": 22230 }, { "epoch": 1.104599185457435, "grad_norm": 0.10986328125, "learning_rate": 0.0007116360385417702, "loss": 0.6083, "step": 22240 }, { "epoch": 1.1050958577530545, "grad_norm": 0.1044921875, "learning_rate": 0.0007115963047581206, "loss": 0.613, "step": 22250 }, { "epoch": 1.105592530048674, "grad_norm": 0.1337890625, "learning_rate": 0.0007115565709744711, "loss": 0.5983, "step": 22260 }, { "epoch": 1.1060892023442932, "grad_norm": 0.1494140625, "learning_rate": 0.0007115168371908216, "loss": 0.6171, "step": 22270 }, { "epoch": 1.1065858746399126, "grad_norm": 0.11865234375, "learning_rate": 0.000711477103407172, "loss": 0.6404, "step": 22280 }, { "epoch": 1.1070825469355319, "grad_norm": 0.10595703125, "learning_rate": 0.0007114373696235224, "loss": 0.6187, "step": 22290 }, { "epoch": 1.1075792192311513, "grad_norm": 0.09716796875, "learning_rate": 0.0007113976358398728, "loss": 0.6012, "step": 22300 }, { "epoch": 1.1080758915267706, "grad_norm": 0.09423828125, "learning_rate": 0.0007113579020562234, "loss": 0.6123, "step": 22310 }, { "epoch": 1.10857256382239, "grad_norm": 0.10302734375, "learning_rate": 0.0007113181682725738, "loss": 0.6281, "step": 22320 }, { "epoch": 1.1090692361180094, "grad_norm": 0.0947265625, "learning_rate": 0.0007112784344889242, "loss": 0.6013, "step": 22330 }, { "epoch": 1.1095659084136287, "grad_norm": 0.1298828125, "learning_rate": 0.0007112387007052747, "loss": 0.6682, "step": 22340 }, { "epoch": 1.1100625807092481, "grad_norm": 0.09521484375, "learning_rate": 0.0007111989669216251, "loss": 0.6218, "step": 22350 }, { "epoch": 1.1105592530048674, "grad_norm": 0.1328125, "learning_rate": 0.0007111592331379756, "loss": 0.6293, "step": 22360 }, { "epoch": 1.1110559253004868, "grad_norm": 0.12158203125, "learning_rate": 0.0007111194993543261, "loss": 0.6096, "step": 22370 }, { "epoch": 1.111552597596106, "grad_norm": 0.107421875, "learning_rate": 0.0007110797655706765, "loss": 0.628, "step": 22380 }, { "epoch": 1.1120492698917255, "grad_norm": 0.103515625, "learning_rate": 0.0007110400317870269, "loss": 0.6113, "step": 22390 }, { "epoch": 1.1125459421873447, "grad_norm": 0.1396484375, "learning_rate": 0.0007110002980033774, "loss": 0.5945, "step": 22400 }, { "epoch": 1.1130426144829642, "grad_norm": 0.1533203125, "learning_rate": 0.0007109605642197279, "loss": 0.6667, "step": 22410 }, { "epoch": 1.1135392867785834, "grad_norm": 0.13671875, "learning_rate": 0.0007109208304360783, "loss": 0.6661, "step": 22420 }, { "epoch": 1.1140359590742028, "grad_norm": 0.1298828125, "learning_rate": 0.0007108810966524288, "loss": 0.6505, "step": 22430 }, { "epoch": 1.1145326313698223, "grad_norm": 0.1240234375, "learning_rate": 0.0007108413628687792, "loss": 0.6354, "step": 22440 }, { "epoch": 1.1150293036654415, "grad_norm": 0.10009765625, "learning_rate": 0.0007108016290851296, "loss": 0.6554, "step": 22450 }, { "epoch": 1.115525975961061, "grad_norm": 0.1416015625, "learning_rate": 0.0007107618953014802, "loss": 0.6206, "step": 22460 }, { "epoch": 1.1160226482566802, "grad_norm": 0.10400390625, "learning_rate": 0.0007107221615178306, "loss": 0.6328, "step": 22470 }, { "epoch": 1.1165193205522996, "grad_norm": 0.1005859375, "learning_rate": 0.000710682427734181, "loss": 0.5999, "step": 22480 }, { "epoch": 1.1170159928479189, "grad_norm": 0.1064453125, "learning_rate": 0.0007106426939505314, "loss": 0.6318, "step": 22490 }, { "epoch": 1.1175126651435383, "grad_norm": 0.10791015625, "learning_rate": 0.0007106029601668819, "loss": 0.6513, "step": 22500 }, { "epoch": 1.1180093374391578, "grad_norm": 0.111328125, "learning_rate": 0.0007105632263832324, "loss": 0.6258, "step": 22510 }, { "epoch": 1.118506009734777, "grad_norm": 0.103515625, "learning_rate": 0.0007105234925995828, "loss": 0.6303, "step": 22520 }, { "epoch": 1.1190026820303964, "grad_norm": 0.1298828125, "learning_rate": 0.0007104837588159333, "loss": 0.709, "step": 22530 }, { "epoch": 1.1194993543260157, "grad_norm": 0.11328125, "learning_rate": 0.0007104440250322838, "loss": 0.6219, "step": 22540 }, { "epoch": 1.119996026621635, "grad_norm": 0.10400390625, "learning_rate": 0.0007104042912486341, "loss": 0.6181, "step": 22550 }, { "epoch": 1.1204926989172543, "grad_norm": 0.1025390625, "learning_rate": 0.0007103645574649847, "loss": 0.621, "step": 22560 }, { "epoch": 1.1209893712128738, "grad_norm": 0.1015625, "learning_rate": 0.0007103248236813351, "loss": 0.619, "step": 22570 }, { "epoch": 1.121486043508493, "grad_norm": 0.10107421875, "learning_rate": 0.0007102850898976855, "loss": 0.619, "step": 22580 }, { "epoch": 1.1219827158041125, "grad_norm": 0.11181640625, "learning_rate": 0.000710245356114036, "loss": 0.6185, "step": 22590 }, { "epoch": 1.1224793880997317, "grad_norm": 0.1015625, "learning_rate": 0.0007102056223303864, "loss": 0.6314, "step": 22600 }, { "epoch": 1.1229760603953511, "grad_norm": 0.1396484375, "learning_rate": 0.0007101658885467369, "loss": 0.6068, "step": 22610 }, { "epoch": 1.1234727326909706, "grad_norm": 0.10009765625, "learning_rate": 0.0007101261547630874, "loss": 0.6185, "step": 22620 }, { "epoch": 1.1239694049865898, "grad_norm": 0.10302734375, "learning_rate": 0.0007100864209794378, "loss": 0.6095, "step": 22630 }, { "epoch": 1.1244660772822093, "grad_norm": 0.10595703125, "learning_rate": 0.0007100466871957882, "loss": 0.6314, "step": 22640 }, { "epoch": 1.1249627495778285, "grad_norm": 0.10986328125, "learning_rate": 0.0007100069534121387, "loss": 0.6239, "step": 22650 }, { "epoch": 1.125459421873448, "grad_norm": 0.1015625, "learning_rate": 0.0007099672196284892, "loss": 0.6411, "step": 22660 }, { "epoch": 1.1259560941690672, "grad_norm": 0.1162109375, "learning_rate": 0.0007099274858448396, "loss": 0.6537, "step": 22670 }, { "epoch": 1.1264527664646866, "grad_norm": 0.09765625, "learning_rate": 0.0007098877520611901, "loss": 0.6395, "step": 22680 }, { "epoch": 1.126949438760306, "grad_norm": 0.1083984375, "learning_rate": 0.0007098480182775405, "loss": 0.6465, "step": 22690 }, { "epoch": 1.1274461110559253, "grad_norm": 0.123046875, "learning_rate": 0.000709808284493891, "loss": 0.5975, "step": 22700 }, { "epoch": 1.1279427833515447, "grad_norm": 0.10107421875, "learning_rate": 0.0007097685507102414, "loss": 0.6149, "step": 22710 }, { "epoch": 1.128439455647164, "grad_norm": 0.11279296875, "learning_rate": 0.0007097288169265919, "loss": 0.625, "step": 22720 }, { "epoch": 1.1289361279427834, "grad_norm": 0.1044921875, "learning_rate": 0.0007096890831429424, "loss": 0.6185, "step": 22730 }, { "epoch": 1.1294328002384026, "grad_norm": 0.1220703125, "learning_rate": 0.0007096493493592927, "loss": 0.6329, "step": 22740 }, { "epoch": 1.129929472534022, "grad_norm": 0.1689453125, "learning_rate": 0.0007096096155756432, "loss": 0.6179, "step": 22750 }, { "epoch": 1.1304261448296413, "grad_norm": 0.1484375, "learning_rate": 0.0007095698817919937, "loss": 0.6177, "step": 22760 }, { "epoch": 1.1309228171252608, "grad_norm": 0.10400390625, "learning_rate": 0.0007095301480083441, "loss": 0.6089, "step": 22770 }, { "epoch": 1.13141948942088, "grad_norm": 0.103515625, "learning_rate": 0.0007094904142246946, "loss": 0.6127, "step": 22780 }, { "epoch": 1.1319161617164994, "grad_norm": 0.1171875, "learning_rate": 0.000709450680441045, "loss": 0.6463, "step": 22790 }, { "epoch": 1.132412834012119, "grad_norm": 0.10791015625, "learning_rate": 0.0007094109466573954, "loss": 0.6197, "step": 22800 }, { "epoch": 1.1329095063077381, "grad_norm": 0.09375, "learning_rate": 0.000709371212873746, "loss": 0.6144, "step": 22810 }, { "epoch": 1.1334061786033576, "grad_norm": 0.134765625, "learning_rate": 0.0007093314790900964, "loss": 0.6395, "step": 22820 }, { "epoch": 1.1339028508989768, "grad_norm": 0.09716796875, "learning_rate": 0.0007092917453064468, "loss": 0.6231, "step": 22830 }, { "epoch": 1.1343995231945962, "grad_norm": 0.1328125, "learning_rate": 0.0007092520115227973, "loss": 0.6377, "step": 22840 }, { "epoch": 1.1348961954902155, "grad_norm": 0.1513671875, "learning_rate": 0.0007092122777391477, "loss": 0.6589, "step": 22850 }, { "epoch": 1.135392867785835, "grad_norm": 0.10546875, "learning_rate": 0.0007091725439554983, "loss": 0.6392, "step": 22860 }, { "epoch": 1.1358895400814544, "grad_norm": 0.10595703125, "learning_rate": 0.0007091328101718487, "loss": 0.6901, "step": 22870 }, { "epoch": 1.1363862123770736, "grad_norm": 0.09912109375, "learning_rate": 0.0007090930763881991, "loss": 0.6257, "step": 22880 }, { "epoch": 1.136882884672693, "grad_norm": 0.130859375, "learning_rate": 0.0007090533426045496, "loss": 0.609, "step": 22890 }, { "epoch": 1.1373795569683123, "grad_norm": 0.1484375, "learning_rate": 0.0007090136088208999, "loss": 0.6255, "step": 22900 }, { "epoch": 1.1378762292639317, "grad_norm": 0.123046875, "learning_rate": 0.0007089738750372505, "loss": 0.6209, "step": 22910 }, { "epoch": 1.138372901559551, "grad_norm": 0.1416015625, "learning_rate": 0.000708934141253601, "loss": 0.6024, "step": 22920 }, { "epoch": 1.1388695738551704, "grad_norm": 0.10693359375, "learning_rate": 0.0007088944074699513, "loss": 0.6234, "step": 22930 }, { "epoch": 1.1393662461507896, "grad_norm": 0.09765625, "learning_rate": 0.0007088546736863018, "loss": 0.635, "step": 22940 }, { "epoch": 1.139862918446409, "grad_norm": 0.11767578125, "learning_rate": 0.0007088149399026523, "loss": 0.6387, "step": 22950 }, { "epoch": 1.1403595907420283, "grad_norm": 0.1015625, "learning_rate": 0.0007087752061190026, "loss": 0.6197, "step": 22960 }, { "epoch": 1.1408562630376478, "grad_norm": 0.16015625, "learning_rate": 0.0007087354723353532, "loss": 0.6047, "step": 22970 }, { "epoch": 1.1413529353332672, "grad_norm": 0.11376953125, "learning_rate": 0.0007086957385517036, "loss": 0.6384, "step": 22980 }, { "epoch": 1.1418496076288864, "grad_norm": 0.169921875, "learning_rate": 0.0007086560047680541, "loss": 0.6246, "step": 22990 }, { "epoch": 1.1423462799245059, "grad_norm": 0.10791015625, "learning_rate": 0.0007086162709844045, "loss": 0.6167, "step": 23000 }, { "epoch": 1.142842952220125, "grad_norm": 0.1572265625, "learning_rate": 0.000708576537200755, "loss": 0.6267, "step": 23010 }, { "epoch": 1.1433396245157446, "grad_norm": 0.1435546875, "learning_rate": 0.0007085368034171055, "loss": 0.628, "step": 23020 }, { "epoch": 1.1438362968113638, "grad_norm": 0.130859375, "learning_rate": 0.0007084970696334559, "loss": 0.6131, "step": 23030 }, { "epoch": 1.1443329691069832, "grad_norm": 0.1298828125, "learning_rate": 0.0007084573358498063, "loss": 0.6425, "step": 23040 }, { "epoch": 1.1448296414026027, "grad_norm": 0.10693359375, "learning_rate": 0.0007084176020661568, "loss": 0.6234, "step": 23050 }, { "epoch": 1.145326313698222, "grad_norm": 0.1044921875, "learning_rate": 0.0007083778682825073, "loss": 0.6404, "step": 23060 }, { "epoch": 1.1458229859938414, "grad_norm": 0.11865234375, "learning_rate": 0.0007083381344988577, "loss": 0.617, "step": 23070 }, { "epoch": 1.1463196582894606, "grad_norm": 0.11669921875, "learning_rate": 0.0007082984007152082, "loss": 0.5922, "step": 23080 }, { "epoch": 1.14681633058508, "grad_norm": 0.138671875, "learning_rate": 0.0007082586669315585, "loss": 0.618, "step": 23090 }, { "epoch": 1.1473130028806993, "grad_norm": 0.12060546875, "learning_rate": 0.000708218933147909, "loss": 0.613, "step": 23100 }, { "epoch": 1.1478096751763187, "grad_norm": 0.10693359375, "learning_rate": 0.0007081791993642596, "loss": 0.6369, "step": 23110 }, { "epoch": 1.148306347471938, "grad_norm": 0.0927734375, "learning_rate": 0.0007081394655806099, "loss": 0.6305, "step": 23120 }, { "epoch": 1.1488030197675574, "grad_norm": 0.12890625, "learning_rate": 0.0007080997317969604, "loss": 0.6211, "step": 23130 }, { "epoch": 1.1492996920631766, "grad_norm": 0.1044921875, "learning_rate": 0.0007080599980133109, "loss": 0.6008, "step": 23140 }, { "epoch": 1.149796364358796, "grad_norm": 0.126953125, "learning_rate": 0.0007080202642296613, "loss": 0.6009, "step": 23150 }, { "epoch": 1.1502930366544155, "grad_norm": 0.1025390625, "learning_rate": 0.0007079805304460117, "loss": 0.6652, "step": 23160 }, { "epoch": 1.1507897089500347, "grad_norm": 0.1064453125, "learning_rate": 0.0007079407966623622, "loss": 0.6498, "step": 23170 }, { "epoch": 1.1512863812456542, "grad_norm": 0.09716796875, "learning_rate": 0.0007079010628787127, "loss": 0.6265, "step": 23180 }, { "epoch": 1.1517830535412734, "grad_norm": 0.10546875, "learning_rate": 0.0007078613290950631, "loss": 0.6222, "step": 23190 }, { "epoch": 1.1522797258368929, "grad_norm": 0.095703125, "learning_rate": 0.0007078215953114135, "loss": 0.6105, "step": 23200 }, { "epoch": 1.152776398132512, "grad_norm": 0.09619140625, "learning_rate": 0.000707781861527764, "loss": 0.6407, "step": 23210 }, { "epoch": 1.1532730704281315, "grad_norm": 0.11767578125, "learning_rate": 0.0007077421277441145, "loss": 0.6244, "step": 23220 }, { "epoch": 1.153769742723751, "grad_norm": 0.11083984375, "learning_rate": 0.0007077023939604649, "loss": 0.6351, "step": 23230 }, { "epoch": 1.1542664150193702, "grad_norm": 0.107421875, "learning_rate": 0.0007076626601768154, "loss": 0.6386, "step": 23240 }, { "epoch": 1.1547630873149897, "grad_norm": 0.1064453125, "learning_rate": 0.0007076229263931658, "loss": 0.639, "step": 23250 }, { "epoch": 1.155259759610609, "grad_norm": 0.091796875, "learning_rate": 0.0007075831926095162, "loss": 0.6513, "step": 23260 }, { "epoch": 1.1557564319062283, "grad_norm": 0.119140625, "learning_rate": 0.0007075434588258668, "loss": 0.6354, "step": 23270 }, { "epoch": 1.1562531042018476, "grad_norm": 0.10986328125, "learning_rate": 0.0007075037250422172, "loss": 0.6214, "step": 23280 }, { "epoch": 1.156749776497467, "grad_norm": 0.1044921875, "learning_rate": 0.0007074639912585676, "loss": 0.6353, "step": 23290 }, { "epoch": 1.1572464487930862, "grad_norm": 0.0966796875, "learning_rate": 0.0007074242574749181, "loss": 0.6349, "step": 23300 }, { "epoch": 1.1577431210887057, "grad_norm": 0.103515625, "learning_rate": 0.0007073845236912685, "loss": 0.6332, "step": 23310 }, { "epoch": 1.158239793384325, "grad_norm": 0.1005859375, "learning_rate": 0.000707344789907619, "loss": 0.6123, "step": 23320 }, { "epoch": 1.1587364656799444, "grad_norm": 0.0966796875, "learning_rate": 0.0007073050561239695, "loss": 0.6024, "step": 23330 }, { "epoch": 1.1592331379755638, "grad_norm": 0.1259765625, "learning_rate": 0.0007072653223403199, "loss": 0.6049, "step": 23340 }, { "epoch": 1.159729810271183, "grad_norm": 0.1220703125, "learning_rate": 0.0007072255885566703, "loss": 0.6323, "step": 23350 }, { "epoch": 1.1602264825668025, "grad_norm": 0.10400390625, "learning_rate": 0.0007071858547730207, "loss": 0.643, "step": 23360 }, { "epoch": 1.1607231548624217, "grad_norm": 0.09326171875, "learning_rate": 0.0007071461209893713, "loss": 0.6096, "step": 23370 }, { "epoch": 1.1612198271580412, "grad_norm": 0.111328125, "learning_rate": 0.0007071063872057217, "loss": 0.6407, "step": 23380 }, { "epoch": 1.1617164994536604, "grad_norm": 0.10986328125, "learning_rate": 0.0007070666534220721, "loss": 0.5905, "step": 23390 }, { "epoch": 1.1622131717492798, "grad_norm": 0.1064453125, "learning_rate": 0.0007070269196384226, "loss": 0.5927, "step": 23400 }, { "epoch": 1.1627098440448993, "grad_norm": 0.1142578125, "learning_rate": 0.000706987185854773, "loss": 0.6414, "step": 23410 }, { "epoch": 1.1632065163405185, "grad_norm": 0.1259765625, "learning_rate": 0.0007069474520711235, "loss": 0.6149, "step": 23420 }, { "epoch": 1.1637031886361378, "grad_norm": 0.10546875, "learning_rate": 0.000706907718287474, "loss": 0.6216, "step": 23430 }, { "epoch": 1.1641998609317572, "grad_norm": 0.09912109375, "learning_rate": 0.0007068679845038244, "loss": 0.6228, "step": 23440 }, { "epoch": 1.1646965332273767, "grad_norm": 0.1123046875, "learning_rate": 0.0007068282507201748, "loss": 0.6317, "step": 23450 }, { "epoch": 1.1651932055229959, "grad_norm": 0.1142578125, "learning_rate": 0.0007067885169365253, "loss": 0.6172, "step": 23460 }, { "epoch": 1.1656898778186153, "grad_norm": 0.10009765625, "learning_rate": 0.0007067487831528758, "loss": 0.6362, "step": 23470 }, { "epoch": 1.1661865501142346, "grad_norm": 0.1005859375, "learning_rate": 0.0007067090493692262, "loss": 0.6011, "step": 23480 }, { "epoch": 1.166683222409854, "grad_norm": 0.09326171875, "learning_rate": 0.0007066693155855767, "loss": 0.6261, "step": 23490 }, { "epoch": 1.1671798947054732, "grad_norm": 0.10205078125, "learning_rate": 0.0007066295818019271, "loss": 0.6458, "step": 23500 }, { "epoch": 1.1676765670010927, "grad_norm": 0.09765625, "learning_rate": 0.0007065898480182775, "loss": 0.6441, "step": 23510 }, { "epoch": 1.1681732392967121, "grad_norm": 0.09130859375, "learning_rate": 0.0007065501142346281, "loss": 0.602, "step": 23520 }, { "epoch": 1.1686699115923314, "grad_norm": 0.107421875, "learning_rate": 0.0007065103804509785, "loss": 0.6454, "step": 23530 }, { "epoch": 1.1691665838879508, "grad_norm": 0.1044921875, "learning_rate": 0.0007064706466673289, "loss": 0.6184, "step": 23540 }, { "epoch": 1.16966325618357, "grad_norm": 0.0966796875, "learning_rate": 0.0007064309128836794, "loss": 0.6447, "step": 23550 }, { "epoch": 1.1701599284791895, "grad_norm": 0.09619140625, "learning_rate": 0.0007063911791000298, "loss": 0.6074, "step": 23560 }, { "epoch": 1.1706566007748087, "grad_norm": 0.08984375, "learning_rate": 0.0007063514453163803, "loss": 0.6108, "step": 23570 }, { "epoch": 1.1711532730704282, "grad_norm": 0.1640625, "learning_rate": 0.0007063117115327307, "loss": 0.6396, "step": 23580 }, { "epoch": 1.1716499453660474, "grad_norm": 0.1005859375, "learning_rate": 0.0007062719777490812, "loss": 0.6014, "step": 23590 }, { "epoch": 1.1721466176616668, "grad_norm": 0.12109375, "learning_rate": 0.0007062322439654317, "loss": 0.6143, "step": 23600 }, { "epoch": 1.172643289957286, "grad_norm": 0.10205078125, "learning_rate": 0.000706192510181782, "loss": 0.5961, "step": 23610 }, { "epoch": 1.1731399622529055, "grad_norm": 0.0966796875, "learning_rate": 0.0007061527763981326, "loss": 0.6356, "step": 23620 }, { "epoch": 1.173636634548525, "grad_norm": 0.130859375, "learning_rate": 0.000706113042614483, "loss": 0.6159, "step": 23630 }, { "epoch": 1.1741333068441442, "grad_norm": 0.1103515625, "learning_rate": 0.0007060733088308334, "loss": 0.6085, "step": 23640 }, { "epoch": 1.1746299791397636, "grad_norm": 0.10595703125, "learning_rate": 0.0007060335750471839, "loss": 0.5906, "step": 23650 }, { "epoch": 1.1751266514353829, "grad_norm": 0.099609375, "learning_rate": 0.0007059938412635343, "loss": 0.6355, "step": 23660 }, { "epoch": 1.1756233237310023, "grad_norm": 0.09814453125, "learning_rate": 0.0007059541074798848, "loss": 0.6162, "step": 23670 }, { "epoch": 1.1761199960266215, "grad_norm": 0.1376953125, "learning_rate": 0.0007059143736962353, "loss": 0.616, "step": 23680 }, { "epoch": 1.176616668322241, "grad_norm": 0.1416015625, "learning_rate": 0.0007058746399125857, "loss": 0.5947, "step": 23690 }, { "epoch": 1.1771133406178604, "grad_norm": 0.0947265625, "learning_rate": 0.0007058349061289361, "loss": 0.6463, "step": 23700 }, { "epoch": 1.1776100129134797, "grad_norm": 0.12451171875, "learning_rate": 0.0007057951723452866, "loss": 0.6078, "step": 23710 }, { "epoch": 1.1781066852090991, "grad_norm": 0.1083984375, "learning_rate": 0.0007057554385616371, "loss": 0.6306, "step": 23720 }, { "epoch": 1.1786033575047183, "grad_norm": 0.13671875, "learning_rate": 0.0007057157047779875, "loss": 0.6252, "step": 23730 }, { "epoch": 1.1791000298003378, "grad_norm": 0.11328125, "learning_rate": 0.000705675970994338, "loss": 0.6268, "step": 23740 }, { "epoch": 1.179596702095957, "grad_norm": 0.11328125, "learning_rate": 0.0007056362372106884, "loss": 0.6547, "step": 23750 }, { "epoch": 1.1800933743915765, "grad_norm": 0.10888671875, "learning_rate": 0.0007055965034270389, "loss": 0.5791, "step": 23760 }, { "epoch": 1.1805900466871957, "grad_norm": 0.1474609375, "learning_rate": 0.0007055567696433893, "loss": 0.6184, "step": 23770 }, { "epoch": 1.1810867189828151, "grad_norm": 0.09912109375, "learning_rate": 0.0007055170358597398, "loss": 0.6027, "step": 23780 }, { "epoch": 1.1815833912784344, "grad_norm": 0.1162109375, "learning_rate": 0.0007054773020760903, "loss": 0.6148, "step": 23790 }, { "epoch": 1.1820800635740538, "grad_norm": 0.1337890625, "learning_rate": 0.0007054375682924406, "loss": 0.6185, "step": 23800 }, { "epoch": 1.1825767358696733, "grad_norm": 0.10546875, "learning_rate": 0.0007053978345087911, "loss": 0.6371, "step": 23810 }, { "epoch": 1.1830734081652925, "grad_norm": 0.138671875, "learning_rate": 0.0007053581007251417, "loss": 0.6398, "step": 23820 }, { "epoch": 1.183570080460912, "grad_norm": 0.1171875, "learning_rate": 0.000705318366941492, "loss": 0.6341, "step": 23830 }, { "epoch": 1.1840667527565312, "grad_norm": 0.11962890625, "learning_rate": 0.0007052786331578425, "loss": 0.6218, "step": 23840 }, { "epoch": 1.1845634250521506, "grad_norm": 0.1064453125, "learning_rate": 0.0007052388993741929, "loss": 0.6058, "step": 23850 }, { "epoch": 1.1850600973477698, "grad_norm": 0.10009765625, "learning_rate": 0.0007051991655905433, "loss": 0.6475, "step": 23860 }, { "epoch": 1.1855567696433893, "grad_norm": 0.11474609375, "learning_rate": 0.0007051594318068939, "loss": 0.6322, "step": 23870 }, { "epoch": 1.1860534419390087, "grad_norm": 0.099609375, "learning_rate": 0.0007051196980232443, "loss": 0.5993, "step": 23880 }, { "epoch": 1.186550114234628, "grad_norm": 0.10546875, "learning_rate": 0.0007050799642395948, "loss": 0.6268, "step": 23890 }, { "epoch": 1.1870467865302474, "grad_norm": 0.11767578125, "learning_rate": 0.0007050402304559452, "loss": 0.5902, "step": 23900 }, { "epoch": 1.1875434588258666, "grad_norm": 0.0947265625, "learning_rate": 0.0007050004966722956, "loss": 0.6241, "step": 23910 }, { "epoch": 1.188040131121486, "grad_norm": 0.11376953125, "learning_rate": 0.0007049607628886462, "loss": 0.6016, "step": 23920 }, { "epoch": 1.1885368034171053, "grad_norm": 0.1142578125, "learning_rate": 0.0007049210291049966, "loss": 0.6004, "step": 23930 }, { "epoch": 1.1890334757127248, "grad_norm": 0.1025390625, "learning_rate": 0.000704881295321347, "loss": 0.6316, "step": 23940 }, { "epoch": 1.189530148008344, "grad_norm": 0.103515625, "learning_rate": 0.0007048415615376975, "loss": 0.6467, "step": 23950 }, { "epoch": 1.1900268203039635, "grad_norm": 0.1162109375, "learning_rate": 0.0007048018277540478, "loss": 0.6248, "step": 23960 }, { "epoch": 1.1905234925995827, "grad_norm": 0.1044921875, "learning_rate": 0.0007047620939703984, "loss": 0.6385, "step": 23970 }, { "epoch": 1.1910201648952021, "grad_norm": 0.2041015625, "learning_rate": 0.0007047223601867489, "loss": 0.6219, "step": 23980 }, { "epoch": 1.1915168371908216, "grad_norm": 0.1484375, "learning_rate": 0.0007046826264030992, "loss": 0.6193, "step": 23990 }, { "epoch": 1.1920135094864408, "grad_norm": 0.1220703125, "learning_rate": 0.0007046428926194497, "loss": 0.6297, "step": 24000 }, { "epoch": 1.1925101817820603, "grad_norm": 0.09521484375, "learning_rate": 0.0007046031588358002, "loss": 0.6158, "step": 24010 }, { "epoch": 1.1930068540776795, "grad_norm": 0.12890625, "learning_rate": 0.0007045634250521506, "loss": 0.6023, "step": 24020 }, { "epoch": 1.193503526373299, "grad_norm": 0.115234375, "learning_rate": 0.0007045236912685011, "loss": 0.6306, "step": 24030 }, { "epoch": 1.1940001986689182, "grad_norm": 0.1162109375, "learning_rate": 0.0007044839574848515, "loss": 0.6125, "step": 24040 }, { "epoch": 1.1944968709645376, "grad_norm": 0.09716796875, "learning_rate": 0.000704444223701202, "loss": 0.5975, "step": 24050 }, { "epoch": 1.194993543260157, "grad_norm": 0.1005859375, "learning_rate": 0.0007044044899175524, "loss": 0.6436, "step": 24060 }, { "epoch": 1.1954902155557763, "grad_norm": 0.11328125, "learning_rate": 0.0007043647561339029, "loss": 0.6371, "step": 24070 }, { "epoch": 1.1959868878513957, "grad_norm": 0.1005859375, "learning_rate": 0.0007043250223502534, "loss": 0.6063, "step": 24080 }, { "epoch": 1.196483560147015, "grad_norm": 0.107421875, "learning_rate": 0.0007042852885666038, "loss": 0.6436, "step": 24090 }, { "epoch": 1.1969802324426344, "grad_norm": 0.11181640625, "learning_rate": 0.0007042455547829542, "loss": 0.6478, "step": 24100 }, { "epoch": 1.1974769047382536, "grad_norm": 0.099609375, "learning_rate": 0.0007042058209993047, "loss": 0.6237, "step": 24110 }, { "epoch": 1.197973577033873, "grad_norm": 0.11328125, "learning_rate": 0.0007041660872156552, "loss": 0.5966, "step": 24120 }, { "epoch": 1.1984702493294923, "grad_norm": 0.1845703125, "learning_rate": 0.0007041263534320056, "loss": 0.6514, "step": 24130 }, { "epoch": 1.1989669216251118, "grad_norm": 0.1240234375, "learning_rate": 0.0007040866196483561, "loss": 0.6267, "step": 24140 }, { "epoch": 1.199463593920731, "grad_norm": 0.12060546875, "learning_rate": 0.0007040468858647065, "loss": 0.6382, "step": 24150 }, { "epoch": 1.1999602662163504, "grad_norm": 0.10546875, "learning_rate": 0.0007040071520810569, "loss": 0.6262, "step": 24160 }, { "epoch": 1.2004569385119699, "grad_norm": 0.10546875, "learning_rate": 0.0007039674182974075, "loss": 0.6151, "step": 24170 }, { "epoch": 1.2009536108075891, "grad_norm": 0.109375, "learning_rate": 0.0007039276845137578, "loss": 0.6319, "step": 24180 }, { "epoch": 1.2014502831032086, "grad_norm": 0.12060546875, "learning_rate": 0.0007038879507301083, "loss": 0.6521, "step": 24190 }, { "epoch": 1.2019469553988278, "grad_norm": 0.10595703125, "learning_rate": 0.0007038482169464588, "loss": 0.5974, "step": 24200 }, { "epoch": 1.2024436276944472, "grad_norm": 0.11572265625, "learning_rate": 0.0007038084831628092, "loss": 0.655, "step": 24210 }, { "epoch": 1.2029402999900665, "grad_norm": 0.130859375, "learning_rate": 0.0007037687493791597, "loss": 0.6258, "step": 24220 }, { "epoch": 1.203436972285686, "grad_norm": 0.10400390625, "learning_rate": 0.0007037290155955101, "loss": 0.6048, "step": 24230 }, { "epoch": 1.2039336445813054, "grad_norm": 0.13671875, "learning_rate": 0.0007036892818118606, "loss": 0.6375, "step": 24240 }, { "epoch": 1.2044303168769246, "grad_norm": 0.10498046875, "learning_rate": 0.000703649548028211, "loss": 0.6155, "step": 24250 }, { "epoch": 1.204926989172544, "grad_norm": 0.1064453125, "learning_rate": 0.0007036098142445614, "loss": 0.595, "step": 24260 }, { "epoch": 1.2054236614681633, "grad_norm": 0.099609375, "learning_rate": 0.000703570080460912, "loss": 0.605, "step": 24270 }, { "epoch": 1.2059203337637827, "grad_norm": 0.126953125, "learning_rate": 0.0007035303466772624, "loss": 0.6043, "step": 24280 }, { "epoch": 1.206417006059402, "grad_norm": 0.11474609375, "learning_rate": 0.0007034906128936128, "loss": 0.6582, "step": 24290 }, { "epoch": 1.2069136783550214, "grad_norm": 0.0986328125, "learning_rate": 0.0007034508791099633, "loss": 0.6373, "step": 24300 }, { "epoch": 1.2074103506506406, "grad_norm": 0.12451171875, "learning_rate": 0.0007034111453263137, "loss": 0.6126, "step": 24310 }, { "epoch": 1.20790702294626, "grad_norm": 0.10498046875, "learning_rate": 0.0007033714115426641, "loss": 0.6255, "step": 24320 }, { "epoch": 1.2084036952418793, "grad_norm": 0.134765625, "learning_rate": 0.0007033316777590147, "loss": 0.6219, "step": 24330 }, { "epoch": 1.2089003675374987, "grad_norm": 0.1220703125, "learning_rate": 0.0007032919439753651, "loss": 0.6258, "step": 24340 }, { "epoch": 1.2093970398331182, "grad_norm": 0.087890625, "learning_rate": 0.0007032522101917155, "loss": 0.6137, "step": 24350 }, { "epoch": 1.2098937121287374, "grad_norm": 0.1328125, "learning_rate": 0.000703212476408066, "loss": 0.5958, "step": 24360 }, { "epoch": 1.2103903844243569, "grad_norm": 0.111328125, "learning_rate": 0.0007031727426244165, "loss": 0.6078, "step": 24370 }, { "epoch": 1.210887056719976, "grad_norm": 0.1103515625, "learning_rate": 0.0007031330088407669, "loss": 0.6212, "step": 24380 }, { "epoch": 1.2113837290155955, "grad_norm": 0.09521484375, "learning_rate": 0.0007030932750571174, "loss": 0.5888, "step": 24390 }, { "epoch": 1.2118804013112148, "grad_norm": 0.10302734375, "learning_rate": 0.0007030535412734678, "loss": 0.611, "step": 24400 }, { "epoch": 1.2123770736068342, "grad_norm": 0.109375, "learning_rate": 0.0007030138074898182, "loss": 0.5952, "step": 24410 }, { "epoch": 1.2128737459024537, "grad_norm": 0.111328125, "learning_rate": 0.0007029740737061688, "loss": 0.6439, "step": 24420 }, { "epoch": 1.213370418198073, "grad_norm": 0.10986328125, "learning_rate": 0.0007029343399225192, "loss": 0.6053, "step": 24430 }, { "epoch": 1.2138670904936923, "grad_norm": 0.1181640625, "learning_rate": 0.0007028946061388696, "loss": 0.6181, "step": 24440 }, { "epoch": 1.2143637627893116, "grad_norm": 0.11865234375, "learning_rate": 0.00070285487235522, "loss": 0.6271, "step": 24450 }, { "epoch": 1.214860435084931, "grad_norm": 0.1484375, "learning_rate": 0.0007028151385715705, "loss": 0.6332, "step": 24460 }, { "epoch": 1.2153571073805503, "grad_norm": 0.14453125, "learning_rate": 0.000702775404787921, "loss": 0.6302, "step": 24470 }, { "epoch": 1.2158537796761697, "grad_norm": 0.103515625, "learning_rate": 0.0007027356710042714, "loss": 0.5947, "step": 24480 }, { "epoch": 1.216350451971789, "grad_norm": 0.1416015625, "learning_rate": 0.0007026959372206219, "loss": 0.645, "step": 24490 }, { "epoch": 1.2168471242674084, "grad_norm": 0.11669921875, "learning_rate": 0.0007026562034369723, "loss": 0.6371, "step": 24500 }, { "epoch": 1.2173437965630276, "grad_norm": 0.09814453125, "learning_rate": 0.0007026164696533227, "loss": 0.6097, "step": 24510 }, { "epoch": 1.217840468858647, "grad_norm": 0.1455078125, "learning_rate": 0.0007025767358696733, "loss": 0.6174, "step": 24520 }, { "epoch": 1.2183371411542665, "grad_norm": 0.1572265625, "learning_rate": 0.0007025370020860237, "loss": 0.6005, "step": 24530 }, { "epoch": 1.2188338134498857, "grad_norm": 0.1025390625, "learning_rate": 0.0007024972683023741, "loss": 0.59, "step": 24540 }, { "epoch": 1.2193304857455052, "grad_norm": 0.11669921875, "learning_rate": 0.0007024575345187246, "loss": 0.6203, "step": 24550 }, { "epoch": 1.2198271580411244, "grad_norm": 0.10302734375, "learning_rate": 0.000702417800735075, "loss": 0.6041, "step": 24560 }, { "epoch": 1.2203238303367439, "grad_norm": 0.1416015625, "learning_rate": 0.0007023780669514254, "loss": 0.6207, "step": 24570 }, { "epoch": 1.220820502632363, "grad_norm": 0.09765625, "learning_rate": 0.000702338333167776, "loss": 0.6145, "step": 24580 }, { "epoch": 1.2213171749279825, "grad_norm": 0.103515625, "learning_rate": 0.0007022985993841264, "loss": 0.6016, "step": 24590 }, { "epoch": 1.221813847223602, "grad_norm": 0.11572265625, "learning_rate": 0.0007022588656004768, "loss": 0.615, "step": 24600 }, { "epoch": 1.2223105195192212, "grad_norm": 0.1044921875, "learning_rate": 0.0007022191318168273, "loss": 0.6351, "step": 24610 }, { "epoch": 1.2228071918148407, "grad_norm": 0.09521484375, "learning_rate": 0.0007021793980331777, "loss": 0.6276, "step": 24620 }, { "epoch": 1.2233038641104599, "grad_norm": 0.0986328125, "learning_rate": 0.0007021396642495282, "loss": 0.6095, "step": 24630 }, { "epoch": 1.2238005364060793, "grad_norm": 0.1025390625, "learning_rate": 0.0007020999304658786, "loss": 0.6215, "step": 24640 }, { "epoch": 1.2242972087016986, "grad_norm": 0.12890625, "learning_rate": 0.0007020601966822291, "loss": 0.5976, "step": 24650 }, { "epoch": 1.224793880997318, "grad_norm": 0.1298828125, "learning_rate": 0.0007020204628985796, "loss": 0.5812, "step": 24660 }, { "epoch": 1.2252905532929372, "grad_norm": 0.1337890625, "learning_rate": 0.0007019807291149299, "loss": 0.6122, "step": 24670 }, { "epoch": 1.2257872255885567, "grad_norm": 0.134765625, "learning_rate": 0.0007019409953312805, "loss": 0.6131, "step": 24680 }, { "epoch": 1.226283897884176, "grad_norm": 0.1005859375, "learning_rate": 0.000701901261547631, "loss": 0.6229, "step": 24690 }, { "epoch": 1.2267805701797954, "grad_norm": 0.0927734375, "learning_rate": 0.0007018615277639813, "loss": 0.594, "step": 24700 }, { "epoch": 1.2272772424754148, "grad_norm": 0.123046875, "learning_rate": 0.0007018217939803318, "loss": 0.5905, "step": 24710 }, { "epoch": 1.227773914771034, "grad_norm": 0.1474609375, "learning_rate": 0.0007017820601966822, "loss": 0.6067, "step": 24720 }, { "epoch": 1.2282705870666535, "grad_norm": 0.1279296875, "learning_rate": 0.0007017423264130327, "loss": 0.6426, "step": 24730 }, { "epoch": 1.2287672593622727, "grad_norm": 0.1005859375, "learning_rate": 0.0007017025926293832, "loss": 0.6224, "step": 24740 }, { "epoch": 1.2292639316578922, "grad_norm": 0.0986328125, "learning_rate": 0.0007016628588457336, "loss": 0.6083, "step": 24750 }, { "epoch": 1.2297606039535114, "grad_norm": 0.0927734375, "learning_rate": 0.000701623125062084, "loss": 0.6234, "step": 24760 }, { "epoch": 1.2302572762491308, "grad_norm": 0.12451171875, "learning_rate": 0.0007015833912784345, "loss": 0.6315, "step": 24770 }, { "epoch": 1.2307539485447503, "grad_norm": 0.1142578125, "learning_rate": 0.000701543657494785, "loss": 0.6274, "step": 24780 }, { "epoch": 1.2312506208403695, "grad_norm": 0.0986328125, "learning_rate": 0.0007015039237111355, "loss": 0.6201, "step": 24790 }, { "epoch": 1.231747293135989, "grad_norm": 0.09912109375, "learning_rate": 0.0007014641899274859, "loss": 0.6321, "step": 24800 }, { "epoch": 1.2322439654316082, "grad_norm": 0.1005859375, "learning_rate": 0.0007014244561438363, "loss": 0.6348, "step": 24810 }, { "epoch": 1.2327406377272276, "grad_norm": 0.099609375, "learning_rate": 0.0007013847223601868, "loss": 0.5963, "step": 24820 }, { "epoch": 1.2332373100228469, "grad_norm": 0.10302734375, "learning_rate": 0.0007013449885765372, "loss": 0.6164, "step": 24830 }, { "epoch": 1.2337339823184663, "grad_norm": 0.111328125, "learning_rate": 0.0007013052547928877, "loss": 0.6486, "step": 24840 }, { "epoch": 1.2342306546140855, "grad_norm": 0.154296875, "learning_rate": 0.0007012655210092382, "loss": 0.6338, "step": 24850 }, { "epoch": 1.234727326909705, "grad_norm": 0.09326171875, "learning_rate": 0.0007012257872255885, "loss": 0.6248, "step": 24860 }, { "epoch": 1.2352239992053242, "grad_norm": 0.1044921875, "learning_rate": 0.000701186053441939, "loss": 0.622, "step": 24870 }, { "epoch": 1.2357206715009437, "grad_norm": 0.0966796875, "learning_rate": 0.0007011463196582896, "loss": 0.5822, "step": 24880 }, { "epoch": 1.2362173437965631, "grad_norm": 0.09765625, "learning_rate": 0.0007011065858746399, "loss": 0.61, "step": 24890 }, { "epoch": 1.2367140160921823, "grad_norm": 0.1103515625, "learning_rate": 0.0007010668520909904, "loss": 0.6277, "step": 24900 }, { "epoch": 1.2372106883878018, "grad_norm": 0.134765625, "learning_rate": 0.0007010271183073408, "loss": 0.6312, "step": 24910 }, { "epoch": 1.237707360683421, "grad_norm": 0.111328125, "learning_rate": 0.0007009873845236912, "loss": 0.5847, "step": 24920 }, { "epoch": 1.2382040329790405, "grad_norm": 0.1201171875, "learning_rate": 0.0007009476507400418, "loss": 0.6328, "step": 24930 }, { "epoch": 1.2387007052746597, "grad_norm": 0.09423828125, "learning_rate": 0.0007009079169563922, "loss": 0.6421, "step": 24940 }, { "epoch": 1.2391973775702791, "grad_norm": 0.10498046875, "learning_rate": 0.0007008681831727427, "loss": 0.6063, "step": 24950 }, { "epoch": 1.2396940498658986, "grad_norm": 0.10009765625, "learning_rate": 0.0007008284493890931, "loss": 0.6355, "step": 24960 }, { "epoch": 1.2401907221615178, "grad_norm": 0.1064453125, "learning_rate": 0.0007007887156054435, "loss": 0.6116, "step": 24970 }, { "epoch": 1.2406873944571373, "grad_norm": 0.1484375, "learning_rate": 0.0007007489818217941, "loss": 0.6389, "step": 24980 }, { "epoch": 1.2411840667527565, "grad_norm": 0.09765625, "learning_rate": 0.0007007092480381445, "loss": 0.641, "step": 24990 }, { "epoch": 1.241680739048376, "grad_norm": 0.1044921875, "learning_rate": 0.0007006695142544949, "loss": 0.6491, "step": 25000 }, { "epoch": 1.2421774113439952, "grad_norm": 0.10888671875, "learning_rate": 0.0007006297804708454, "loss": 0.6464, "step": 25010 }, { "epoch": 1.2426740836396146, "grad_norm": 0.1103515625, "learning_rate": 0.0007005900466871958, "loss": 0.6064, "step": 25020 }, { "epoch": 1.2431707559352339, "grad_norm": 0.1533203125, "learning_rate": 0.0007005503129035463, "loss": 0.6449, "step": 25030 }, { "epoch": 1.2436674282308533, "grad_norm": 0.1279296875, "learning_rate": 0.0007005105791198968, "loss": 0.6289, "step": 25040 }, { "epoch": 1.2441641005264725, "grad_norm": 0.10888671875, "learning_rate": 0.0007004708453362471, "loss": 0.6168, "step": 25050 }, { "epoch": 1.244660772822092, "grad_norm": 0.09765625, "learning_rate": 0.0007004311115525976, "loss": 0.6012, "step": 25060 }, { "epoch": 1.2451574451177114, "grad_norm": 0.11572265625, "learning_rate": 0.0007003913777689481, "loss": 0.6305, "step": 25070 }, { "epoch": 1.2456541174133307, "grad_norm": 0.09033203125, "learning_rate": 0.0007003516439852986, "loss": 0.6238, "step": 25080 }, { "epoch": 1.24615078970895, "grad_norm": 0.08740234375, "learning_rate": 0.000700311910201649, "loss": 0.6106, "step": 25090 }, { "epoch": 1.2466474620045693, "grad_norm": 0.10302734375, "learning_rate": 0.0007002721764179994, "loss": 0.6166, "step": 25100 }, { "epoch": 1.2471441343001888, "grad_norm": 0.103515625, "learning_rate": 0.0007002324426343499, "loss": 0.6587, "step": 25110 }, { "epoch": 1.247640806595808, "grad_norm": 0.10791015625, "learning_rate": 0.0007001927088507003, "loss": 0.632, "step": 25120 }, { "epoch": 1.2481374788914275, "grad_norm": 0.1337890625, "learning_rate": 0.0007001529750670508, "loss": 0.6218, "step": 25130 }, { "epoch": 1.248634151187047, "grad_norm": 0.08837890625, "learning_rate": 0.0007001132412834013, "loss": 0.605, "step": 25140 }, { "epoch": 1.2491308234826661, "grad_norm": 0.10205078125, "learning_rate": 0.0007000735074997517, "loss": 0.6058, "step": 25150 }, { "epoch": 1.2496274957782856, "grad_norm": 0.1611328125, "learning_rate": 0.0007000337737161021, "loss": 0.6327, "step": 25160 }, { "epoch": 1.2501241680739048, "grad_norm": 0.091796875, "learning_rate": 0.0006999940399324526, "loss": 0.6189, "step": 25170 }, { "epoch": 1.2506208403695243, "grad_norm": 0.18359375, "learning_rate": 0.0006999543061488031, "loss": 0.6172, "step": 25180 }, { "epoch": 1.2511175126651435, "grad_norm": 0.10986328125, "learning_rate": 0.0006999145723651535, "loss": 0.6432, "step": 25190 }, { "epoch": 1.251614184960763, "grad_norm": 0.109375, "learning_rate": 0.000699874838581504, "loss": 0.6251, "step": 25200 }, { "epoch": 1.2521108572563822, "grad_norm": 0.134765625, "learning_rate": 0.0006998351047978544, "loss": 0.6038, "step": 25210 }, { "epoch": 1.2526075295520016, "grad_norm": 0.10693359375, "learning_rate": 0.0006997953710142048, "loss": 0.6242, "step": 25220 }, { "epoch": 1.2531042018476208, "grad_norm": 0.09326171875, "learning_rate": 0.0006997556372305554, "loss": 0.612, "step": 25230 }, { "epoch": 1.2536008741432403, "grad_norm": 0.12890625, "learning_rate": 0.0006997159034469058, "loss": 0.6356, "step": 25240 }, { "epoch": 1.2540975464388597, "grad_norm": 0.09716796875, "learning_rate": 0.0006996761696632562, "loss": 0.5935, "step": 25250 }, { "epoch": 1.254594218734479, "grad_norm": 0.11767578125, "learning_rate": 0.0006996364358796067, "loss": 0.5989, "step": 25260 }, { "epoch": 1.2550908910300984, "grad_norm": 0.125, "learning_rate": 0.0006995967020959571, "loss": 0.615, "step": 25270 }, { "epoch": 1.2555875633257176, "grad_norm": 0.09619140625, "learning_rate": 0.0006995569683123076, "loss": 0.634, "step": 25280 }, { "epoch": 1.256084235621337, "grad_norm": 0.109375, "learning_rate": 0.0006995172345286581, "loss": 0.6228, "step": 25290 }, { "epoch": 1.2565809079169563, "grad_norm": 0.09619140625, "learning_rate": 0.0006994775007450085, "loss": 0.6097, "step": 25300 }, { "epoch": 1.2570775802125758, "grad_norm": 0.111328125, "learning_rate": 0.0006994377669613589, "loss": 0.6059, "step": 25310 }, { "epoch": 1.2575742525081952, "grad_norm": 0.126953125, "learning_rate": 0.0006993980331777093, "loss": 0.6045, "step": 25320 }, { "epoch": 1.2580709248038144, "grad_norm": 0.1064453125, "learning_rate": 0.0006993582993940599, "loss": 0.6311, "step": 25330 }, { "epoch": 1.2585675970994337, "grad_norm": 0.1083984375, "learning_rate": 0.0006993185656104103, "loss": 0.5906, "step": 25340 }, { "epoch": 1.2590642693950531, "grad_norm": 0.1640625, "learning_rate": 0.0006992788318267607, "loss": 0.6082, "step": 25350 }, { "epoch": 1.2595609416906726, "grad_norm": 0.10302734375, "learning_rate": 0.0006992390980431112, "loss": 0.5942, "step": 25360 }, { "epoch": 1.2600576139862918, "grad_norm": 0.11279296875, "learning_rate": 0.0006991993642594616, "loss": 0.606, "step": 25370 }, { "epoch": 1.2605542862819112, "grad_norm": 0.11328125, "learning_rate": 0.000699159630475812, "loss": 0.6422, "step": 25380 }, { "epoch": 1.2610509585775305, "grad_norm": 0.1171875, "learning_rate": 0.0006991198966921626, "loss": 0.6, "step": 25390 }, { "epoch": 1.26154763087315, "grad_norm": 0.0908203125, "learning_rate": 0.000699080162908513, "loss": 0.6161, "step": 25400 }, { "epoch": 1.2620443031687691, "grad_norm": 0.10107421875, "learning_rate": 0.0006990404291248634, "loss": 0.5868, "step": 25410 }, { "epoch": 1.2625409754643886, "grad_norm": 0.13671875, "learning_rate": 0.0006990006953412139, "loss": 0.5998, "step": 25420 }, { "epoch": 1.263037647760008, "grad_norm": 0.140625, "learning_rate": 0.0006989609615575644, "loss": 0.5932, "step": 25430 }, { "epoch": 1.2635343200556273, "grad_norm": 0.09814453125, "learning_rate": 0.0006989212277739148, "loss": 0.6253, "step": 25440 }, { "epoch": 1.2640309923512467, "grad_norm": 0.10546875, "learning_rate": 0.0006988814939902653, "loss": 0.6573, "step": 25450 }, { "epoch": 1.264527664646866, "grad_norm": 0.1259765625, "learning_rate": 0.0006988417602066157, "loss": 0.6146, "step": 25460 }, { "epoch": 1.2650243369424854, "grad_norm": 0.146484375, "learning_rate": 0.0006988020264229661, "loss": 0.6223, "step": 25470 }, { "epoch": 1.2655210092381046, "grad_norm": 0.10302734375, "learning_rate": 0.0006987622926393167, "loss": 0.6207, "step": 25480 }, { "epoch": 1.266017681533724, "grad_norm": 0.1044921875, "learning_rate": 0.0006987225588556671, "loss": 0.6174, "step": 25490 }, { "epoch": 1.2665143538293435, "grad_norm": 0.10693359375, "learning_rate": 0.0006986828250720175, "loss": 0.6209, "step": 25500 }, { "epoch": 1.2670110261249627, "grad_norm": 0.115234375, "learning_rate": 0.0006986430912883679, "loss": 0.6172, "step": 25510 }, { "epoch": 1.267507698420582, "grad_norm": 0.1015625, "learning_rate": 0.0006986033575047184, "loss": 0.61, "step": 25520 }, { "epoch": 1.2680043707162014, "grad_norm": 0.0966796875, "learning_rate": 0.000698563623721069, "loss": 0.6227, "step": 25530 }, { "epoch": 1.2685010430118209, "grad_norm": 0.10693359375, "learning_rate": 0.0006985238899374193, "loss": 0.6064, "step": 25540 }, { "epoch": 1.26899771530744, "grad_norm": 0.099609375, "learning_rate": 0.0006984841561537698, "loss": 0.6189, "step": 25550 }, { "epoch": 1.2694943876030595, "grad_norm": 0.130859375, "learning_rate": 0.0006984444223701202, "loss": 0.6138, "step": 25560 }, { "epoch": 1.2699910598986788, "grad_norm": 0.1396484375, "learning_rate": 0.0006984046885864706, "loss": 0.6371, "step": 25570 }, { "epoch": 1.2704877321942982, "grad_norm": 0.099609375, "learning_rate": 0.0006983649548028212, "loss": 0.6274, "step": 25580 }, { "epoch": 1.2709844044899175, "grad_norm": 0.12109375, "learning_rate": 0.0006983252210191716, "loss": 0.613, "step": 25590 }, { "epoch": 1.271481076785537, "grad_norm": 0.11181640625, "learning_rate": 0.000698285487235522, "loss": 0.5777, "step": 25600 }, { "epoch": 1.2719777490811564, "grad_norm": 0.09814453125, "learning_rate": 0.0006982457534518725, "loss": 0.6199, "step": 25610 }, { "epoch": 1.2724744213767756, "grad_norm": 0.123046875, "learning_rate": 0.0006982060196682229, "loss": 0.6128, "step": 25620 }, { "epoch": 1.272971093672395, "grad_norm": 0.1064453125, "learning_rate": 0.0006981662858845733, "loss": 0.6039, "step": 25630 }, { "epoch": 1.2734677659680143, "grad_norm": 0.11767578125, "learning_rate": 0.0006981265521009239, "loss": 0.5967, "step": 25640 }, { "epoch": 1.2739644382636337, "grad_norm": 0.109375, "learning_rate": 0.0006980868183172743, "loss": 0.6332, "step": 25650 }, { "epoch": 1.274461110559253, "grad_norm": 0.11474609375, "learning_rate": 0.0006980470845336247, "loss": 0.6355, "step": 25660 }, { "epoch": 1.2749577828548724, "grad_norm": 0.12451171875, "learning_rate": 0.0006980073507499752, "loss": 0.6018, "step": 25670 }, { "epoch": 1.2754544551504918, "grad_norm": 0.12109375, "learning_rate": 0.0006979676169663257, "loss": 0.618, "step": 25680 }, { "epoch": 1.275951127446111, "grad_norm": 0.2265625, "learning_rate": 0.0006979278831826762, "loss": 0.609, "step": 25690 }, { "epoch": 1.2764477997417303, "grad_norm": 0.09912109375, "learning_rate": 0.0006978881493990265, "loss": 0.631, "step": 25700 }, { "epoch": 1.2769444720373497, "grad_norm": 0.099609375, "learning_rate": 0.000697848415615377, "loss": 0.6446, "step": 25710 }, { "epoch": 1.2774411443329692, "grad_norm": 0.09130859375, "learning_rate": 0.0006978086818317275, "loss": 0.6415, "step": 25720 }, { "epoch": 1.2779378166285884, "grad_norm": 0.162109375, "learning_rate": 0.0006977689480480778, "loss": 0.6365, "step": 25730 }, { "epoch": 1.2784344889242079, "grad_norm": 0.1083984375, "learning_rate": 0.0006977292142644284, "loss": 0.669, "step": 25740 }, { "epoch": 1.278931161219827, "grad_norm": 0.1201171875, "learning_rate": 0.0006976894804807789, "loss": 0.6098, "step": 25750 }, { "epoch": 1.2794278335154465, "grad_norm": 0.09375, "learning_rate": 0.0006976497466971292, "loss": 0.6255, "step": 25760 }, { "epoch": 1.2799245058110658, "grad_norm": 0.1171875, "learning_rate": 0.0006976100129134797, "loss": 0.6222, "step": 25770 }, { "epoch": 1.2804211781066852, "grad_norm": 0.095703125, "learning_rate": 0.0006975702791298301, "loss": 0.6459, "step": 25780 }, { "epoch": 1.2809178504023047, "grad_norm": 0.138671875, "learning_rate": 0.0006975305453461806, "loss": 0.6226, "step": 25790 }, { "epoch": 1.2814145226979239, "grad_norm": 0.146484375, "learning_rate": 0.0006974908115625311, "loss": 0.6342, "step": 25800 }, { "epoch": 1.2819111949935433, "grad_norm": 0.1474609375, "learning_rate": 0.0006974510777788815, "loss": 0.6535, "step": 25810 }, { "epoch": 1.2824078672891626, "grad_norm": 0.11767578125, "learning_rate": 0.0006974113439952319, "loss": 0.6288, "step": 25820 }, { "epoch": 1.282904539584782, "grad_norm": 0.12109375, "learning_rate": 0.0006973716102115825, "loss": 0.6131, "step": 25830 }, { "epoch": 1.2834012118804012, "grad_norm": 0.10498046875, "learning_rate": 0.0006973318764279329, "loss": 0.6023, "step": 25840 }, { "epoch": 1.2838978841760207, "grad_norm": 0.09423828125, "learning_rate": 0.0006972921426442834, "loss": 0.6053, "step": 25850 }, { "epoch": 1.2843945564716401, "grad_norm": 0.1650390625, "learning_rate": 0.0006972524088606338, "loss": 0.6479, "step": 25860 }, { "epoch": 1.2848912287672594, "grad_norm": 0.109375, "learning_rate": 0.0006972126750769842, "loss": 0.6086, "step": 25870 }, { "epoch": 1.2853879010628786, "grad_norm": 0.12255859375, "learning_rate": 0.0006971729412933348, "loss": 0.6075, "step": 25880 }, { "epoch": 1.285884573358498, "grad_norm": 0.11083984375, "learning_rate": 0.0006971332075096851, "loss": 0.616, "step": 25890 }, { "epoch": 1.2863812456541175, "grad_norm": 0.09814453125, "learning_rate": 0.0006970934737260356, "loss": 0.6025, "step": 25900 }, { "epoch": 1.2868779179497367, "grad_norm": 0.09228515625, "learning_rate": 0.0006970537399423861, "loss": 0.5992, "step": 25910 }, { "epoch": 1.2873745902453562, "grad_norm": 0.11181640625, "learning_rate": 0.0006970140061587364, "loss": 0.6205, "step": 25920 }, { "epoch": 1.2878712625409754, "grad_norm": 0.1064453125, "learning_rate": 0.000696974272375087, "loss": 0.6286, "step": 25930 }, { "epoch": 1.2883679348365948, "grad_norm": 0.095703125, "learning_rate": 0.0006969345385914375, "loss": 0.648, "step": 25940 }, { "epoch": 1.288864607132214, "grad_norm": 0.138671875, "learning_rate": 0.0006968948048077878, "loss": 0.6122, "step": 25950 }, { "epoch": 1.2893612794278335, "grad_norm": 0.1044921875, "learning_rate": 0.0006968550710241383, "loss": 0.6291, "step": 25960 }, { "epoch": 1.289857951723453, "grad_norm": 0.1064453125, "learning_rate": 0.0006968153372404887, "loss": 0.6159, "step": 25970 }, { "epoch": 1.2903546240190722, "grad_norm": 0.1083984375, "learning_rate": 0.0006967756034568392, "loss": 0.6452, "step": 25980 }, { "epoch": 1.2908512963146916, "grad_norm": 0.115234375, "learning_rate": 0.0006967358696731897, "loss": 0.6365, "step": 25990 }, { "epoch": 1.2913479686103109, "grad_norm": 0.09814453125, "learning_rate": 0.0006966961358895401, "loss": 0.6077, "step": 26000 }, { "epoch": 1.2918446409059303, "grad_norm": 0.130859375, "learning_rate": 0.0006966564021058906, "loss": 0.6097, "step": 26010 }, { "epoch": 1.2923413132015495, "grad_norm": 0.09765625, "learning_rate": 0.000696616668322241, "loss": 0.622, "step": 26020 }, { "epoch": 1.292837985497169, "grad_norm": 0.09375, "learning_rate": 0.0006965769345385914, "loss": 0.5867, "step": 26030 }, { "epoch": 1.2933346577927884, "grad_norm": 0.142578125, "learning_rate": 0.000696537200754942, "loss": 0.6268, "step": 26040 }, { "epoch": 1.2938313300884077, "grad_norm": 0.09912109375, "learning_rate": 0.0006964974669712924, "loss": 0.6172, "step": 26050 }, { "epoch": 1.294328002384027, "grad_norm": 0.126953125, "learning_rate": 0.0006964577331876428, "loss": 0.61, "step": 26060 }, { "epoch": 1.2948246746796463, "grad_norm": 0.1064453125, "learning_rate": 0.0006964179994039933, "loss": 0.6454, "step": 26070 }, { "epoch": 1.2953213469752658, "grad_norm": 0.11083984375, "learning_rate": 0.0006963782656203437, "loss": 0.6162, "step": 26080 }, { "epoch": 1.295818019270885, "grad_norm": 0.111328125, "learning_rate": 0.0006963385318366942, "loss": 0.6403, "step": 26090 }, { "epoch": 1.2963146915665045, "grad_norm": 0.10400390625, "learning_rate": 0.0006962987980530447, "loss": 0.6284, "step": 26100 }, { "epoch": 1.2968113638621237, "grad_norm": 0.08935546875, "learning_rate": 0.000696259064269395, "loss": 0.5865, "step": 26110 }, { "epoch": 1.2973080361577431, "grad_norm": 0.1298828125, "learning_rate": 0.0006962193304857455, "loss": 0.5768, "step": 26120 }, { "epoch": 1.2978047084533624, "grad_norm": 0.142578125, "learning_rate": 0.000696179596702096, "loss": 0.6388, "step": 26130 }, { "epoch": 1.2983013807489818, "grad_norm": 0.10400390625, "learning_rate": 0.0006961398629184465, "loss": 0.632, "step": 26140 }, { "epoch": 1.2987980530446013, "grad_norm": 0.119140625, "learning_rate": 0.0006961001291347969, "loss": 0.6346, "step": 26150 }, { "epoch": 1.2992947253402205, "grad_norm": 0.12353515625, "learning_rate": 0.0006960603953511473, "loss": 0.6329, "step": 26160 }, { "epoch": 1.29979139763584, "grad_norm": 0.1796875, "learning_rate": 0.0006960206615674978, "loss": 0.6127, "step": 26170 }, { "epoch": 1.3002880699314592, "grad_norm": 0.09716796875, "learning_rate": 0.0006959809277838482, "loss": 0.6357, "step": 26180 }, { "epoch": 1.3007847422270786, "grad_norm": 0.09130859375, "learning_rate": 0.0006959411940001987, "loss": 0.6332, "step": 26190 }, { "epoch": 1.3012814145226979, "grad_norm": 0.119140625, "learning_rate": 0.0006959014602165492, "loss": 0.612, "step": 26200 }, { "epoch": 1.3017780868183173, "grad_norm": 0.11181640625, "learning_rate": 0.0006958617264328996, "loss": 0.6465, "step": 26210 }, { "epoch": 1.3022747591139368, "grad_norm": 0.099609375, "learning_rate": 0.00069582199264925, "loss": 0.629, "step": 26220 }, { "epoch": 1.302771431409556, "grad_norm": 0.140625, "learning_rate": 0.0006957822588656005, "loss": 0.5934, "step": 26230 }, { "epoch": 1.3032681037051752, "grad_norm": 0.099609375, "learning_rate": 0.000695742525081951, "loss": 0.6104, "step": 26240 }, { "epoch": 1.3037647760007947, "grad_norm": 0.0927734375, "learning_rate": 0.0006957027912983014, "loss": 0.6124, "step": 26250 }, { "epoch": 1.304261448296414, "grad_norm": 0.1142578125, "learning_rate": 0.0006956630575146519, "loss": 0.6126, "step": 26260 }, { "epoch": 1.3047581205920333, "grad_norm": 0.13671875, "learning_rate": 0.0006956233237310023, "loss": 0.5908, "step": 26270 }, { "epoch": 1.3052547928876528, "grad_norm": 0.10986328125, "learning_rate": 0.0006955835899473527, "loss": 0.5986, "step": 26280 }, { "epoch": 1.305751465183272, "grad_norm": 0.09716796875, "learning_rate": 0.0006955438561637033, "loss": 0.6358, "step": 26290 }, { "epoch": 1.3062481374788915, "grad_norm": 0.1298828125, "learning_rate": 0.0006955041223800537, "loss": 0.6091, "step": 26300 }, { "epoch": 1.3067448097745107, "grad_norm": 0.1484375, "learning_rate": 0.0006954643885964041, "loss": 0.6536, "step": 26310 }, { "epoch": 1.3072414820701301, "grad_norm": 0.1025390625, "learning_rate": 0.0006954246548127546, "loss": 0.6112, "step": 26320 }, { "epoch": 1.3077381543657496, "grad_norm": 0.09423828125, "learning_rate": 0.000695384921029105, "loss": 0.6137, "step": 26330 }, { "epoch": 1.3082348266613688, "grad_norm": 0.1044921875, "learning_rate": 0.0006953451872454555, "loss": 0.6373, "step": 26340 }, { "epoch": 1.3087314989569883, "grad_norm": 0.1103515625, "learning_rate": 0.000695305453461806, "loss": 0.6115, "step": 26350 }, { "epoch": 1.3092281712526075, "grad_norm": 0.099609375, "learning_rate": 0.0006952657196781564, "loss": 0.6095, "step": 26360 }, { "epoch": 1.309724843548227, "grad_norm": 0.11767578125, "learning_rate": 0.0006952259858945068, "loss": 0.594, "step": 26370 }, { "epoch": 1.3102215158438462, "grad_norm": 0.11767578125, "learning_rate": 0.0006951862521108572, "loss": 0.6177, "step": 26380 }, { "epoch": 1.3107181881394656, "grad_norm": 0.11669921875, "learning_rate": 0.0006951465183272078, "loss": 0.5909, "step": 26390 }, { "epoch": 1.311214860435085, "grad_norm": 0.0986328125, "learning_rate": 0.0006951067845435582, "loss": 0.5882, "step": 26400 }, { "epoch": 1.3117115327307043, "grad_norm": 0.12353515625, "learning_rate": 0.0006950670507599086, "loss": 0.6251, "step": 26410 }, { "epoch": 1.3122082050263235, "grad_norm": 0.10693359375, "learning_rate": 0.0006950273169762591, "loss": 0.6158, "step": 26420 }, { "epoch": 1.312704877321943, "grad_norm": 0.1318359375, "learning_rate": 0.0006949875831926095, "loss": 0.636, "step": 26430 }, { "epoch": 1.3132015496175624, "grad_norm": 0.1015625, "learning_rate": 0.00069494784940896, "loss": 0.6297, "step": 26440 }, { "epoch": 1.3136982219131816, "grad_norm": 0.11669921875, "learning_rate": 0.0006949081156253105, "loss": 0.5981, "step": 26450 }, { "epoch": 1.314194894208801, "grad_norm": 0.1005859375, "learning_rate": 0.0006948683818416609, "loss": 0.6133, "step": 26460 }, { "epoch": 1.3146915665044203, "grad_norm": 0.1123046875, "learning_rate": 0.0006948286480580113, "loss": 0.6271, "step": 26470 }, { "epoch": 1.3151882388000398, "grad_norm": 0.11376953125, "learning_rate": 0.0006947889142743618, "loss": 0.6233, "step": 26480 }, { "epoch": 1.315684911095659, "grad_norm": 0.11474609375, "learning_rate": 0.0006947491804907123, "loss": 0.6188, "step": 26490 }, { "epoch": 1.3161815833912784, "grad_norm": 0.099609375, "learning_rate": 0.0006947094467070627, "loss": 0.588, "step": 26500 }, { "epoch": 1.316678255686898, "grad_norm": 0.0966796875, "learning_rate": 0.0006946697129234132, "loss": 0.6201, "step": 26510 }, { "epoch": 1.3171749279825171, "grad_norm": 0.107421875, "learning_rate": 0.0006946299791397636, "loss": 0.6294, "step": 26520 }, { "epoch": 1.3176716002781366, "grad_norm": 0.1171875, "learning_rate": 0.000694590245356114, "loss": 0.6203, "step": 26530 }, { "epoch": 1.3181682725737558, "grad_norm": 0.09521484375, "learning_rate": 0.0006945505115724646, "loss": 0.6211, "step": 26540 }, { "epoch": 1.3186649448693752, "grad_norm": 0.125, "learning_rate": 0.000694510777788815, "loss": 0.6181, "step": 26550 }, { "epoch": 1.3191616171649945, "grad_norm": 0.220703125, "learning_rate": 0.0006944710440051654, "loss": 0.6553, "step": 26560 }, { "epoch": 1.319658289460614, "grad_norm": 0.09814453125, "learning_rate": 0.0006944313102215158, "loss": 0.5941, "step": 26570 }, { "epoch": 1.3201549617562334, "grad_norm": 0.12255859375, "learning_rate": 0.0006943915764378663, "loss": 0.6245, "step": 26580 }, { "epoch": 1.3206516340518526, "grad_norm": 0.09130859375, "learning_rate": 0.0006943518426542169, "loss": 0.6374, "step": 26590 }, { "epoch": 1.3211483063474718, "grad_norm": 0.1630859375, "learning_rate": 0.0006943121088705672, "loss": 0.6443, "step": 26600 }, { "epoch": 1.3216449786430913, "grad_norm": 0.1962890625, "learning_rate": 0.0006942723750869177, "loss": 0.6219, "step": 26610 }, { "epoch": 1.3221416509387107, "grad_norm": 0.1025390625, "learning_rate": 0.0006942326413032682, "loss": 0.6492, "step": 26620 }, { "epoch": 1.32263832323433, "grad_norm": 0.087890625, "learning_rate": 0.0006941929075196185, "loss": 0.6174, "step": 26630 }, { "epoch": 1.3231349955299494, "grad_norm": 0.11474609375, "learning_rate": 0.0006941531737359691, "loss": 0.5641, "step": 26640 }, { "epoch": 1.3236316678255686, "grad_norm": 0.1044921875, "learning_rate": 0.0006941134399523195, "loss": 0.6142, "step": 26650 }, { "epoch": 1.324128340121188, "grad_norm": 0.11376953125, "learning_rate": 0.0006940737061686699, "loss": 0.6077, "step": 26660 }, { "epoch": 1.3246250124168073, "grad_norm": 0.099609375, "learning_rate": 0.0006940339723850204, "loss": 0.6305, "step": 26670 }, { "epoch": 1.3251216847124268, "grad_norm": 0.10498046875, "learning_rate": 0.0006939942386013708, "loss": 0.6343, "step": 26680 }, { "epoch": 1.3256183570080462, "grad_norm": 0.10205078125, "learning_rate": 0.0006939545048177213, "loss": 0.5751, "step": 26690 }, { "epoch": 1.3261150293036654, "grad_norm": 0.09814453125, "learning_rate": 0.0006939147710340718, "loss": 0.6058, "step": 26700 }, { "epoch": 1.3266117015992847, "grad_norm": 0.1171875, "learning_rate": 0.0006938750372504222, "loss": 0.6087, "step": 26710 }, { "epoch": 1.327108373894904, "grad_norm": 0.1357421875, "learning_rate": 0.0006938353034667726, "loss": 0.6055, "step": 26720 }, { "epoch": 1.3276050461905236, "grad_norm": 0.09912109375, "learning_rate": 0.0006937955696831231, "loss": 0.6121, "step": 26730 }, { "epoch": 1.3281017184861428, "grad_norm": 0.10400390625, "learning_rate": 0.0006937558358994736, "loss": 0.5855, "step": 26740 }, { "epoch": 1.3285983907817622, "grad_norm": 0.1630859375, "learning_rate": 0.0006937161021158241, "loss": 0.5932, "step": 26750 }, { "epoch": 1.3290950630773817, "grad_norm": 0.1064453125, "learning_rate": 0.0006936763683321744, "loss": 0.6314, "step": 26760 }, { "epoch": 1.329591735373001, "grad_norm": 0.0986328125, "learning_rate": 0.0006936366345485249, "loss": 0.6129, "step": 26770 }, { "epoch": 1.3300884076686201, "grad_norm": 0.10009765625, "learning_rate": 0.0006935969007648754, "loss": 0.5807, "step": 26780 }, { "epoch": 1.3305850799642396, "grad_norm": 0.12158203125, "learning_rate": 0.0006935571669812257, "loss": 0.5593, "step": 26790 }, { "epoch": 1.331081752259859, "grad_norm": 0.09912109375, "learning_rate": 0.0006935174331975763, "loss": 0.6105, "step": 26800 }, { "epoch": 1.3315784245554783, "grad_norm": 0.1474609375, "learning_rate": 0.0006934776994139268, "loss": 0.589, "step": 26810 }, { "epoch": 1.3320750968510977, "grad_norm": 0.123046875, "learning_rate": 0.0006934379656302771, "loss": 0.6002, "step": 26820 }, { "epoch": 1.332571769146717, "grad_norm": 0.10693359375, "learning_rate": 0.0006933982318466276, "loss": 0.5998, "step": 26830 }, { "epoch": 1.3330684414423364, "grad_norm": 0.10888671875, "learning_rate": 0.000693358498062978, "loss": 0.6258, "step": 26840 }, { "epoch": 1.3335651137379556, "grad_norm": 0.0947265625, "learning_rate": 0.0006933187642793285, "loss": 0.628, "step": 26850 }, { "epoch": 1.334061786033575, "grad_norm": 0.15625, "learning_rate": 0.000693279030495679, "loss": 0.6142, "step": 26860 }, { "epoch": 1.3345584583291945, "grad_norm": 0.11376953125, "learning_rate": 0.0006932392967120294, "loss": 0.6212, "step": 26870 }, { "epoch": 1.3350551306248137, "grad_norm": 0.10693359375, "learning_rate": 0.0006931995629283799, "loss": 0.6142, "step": 26880 }, { "epoch": 1.335551802920433, "grad_norm": 0.1572265625, "learning_rate": 0.0006931598291447304, "loss": 0.6429, "step": 26890 }, { "epoch": 1.3360484752160524, "grad_norm": 0.09228515625, "learning_rate": 0.0006931200953610808, "loss": 0.6278, "step": 26900 }, { "epoch": 1.3365451475116719, "grad_norm": 0.1611328125, "learning_rate": 0.0006930803615774313, "loss": 0.6065, "step": 26910 }, { "epoch": 1.337041819807291, "grad_norm": 0.16796875, "learning_rate": 0.0006930406277937817, "loss": 0.62, "step": 26920 }, { "epoch": 1.3375384921029105, "grad_norm": 0.10009765625, "learning_rate": 0.0006930008940101321, "loss": 0.6124, "step": 26930 }, { "epoch": 1.33803516439853, "grad_norm": 0.150390625, "learning_rate": 0.0006929611602264827, "loss": 0.618, "step": 26940 }, { "epoch": 1.3385318366941492, "grad_norm": 0.1103515625, "learning_rate": 0.0006929214264428331, "loss": 0.6175, "step": 26950 }, { "epoch": 1.3390285089897684, "grad_norm": 0.1005859375, "learning_rate": 0.0006928816926591835, "loss": 0.6139, "step": 26960 }, { "epoch": 1.339525181285388, "grad_norm": 0.09765625, "learning_rate": 0.000692841958875534, "loss": 0.6035, "step": 26970 }, { "epoch": 1.3400218535810073, "grad_norm": 0.1201171875, "learning_rate": 0.0006928022250918843, "loss": 0.6273, "step": 26980 }, { "epoch": 1.3405185258766266, "grad_norm": 0.0986328125, "learning_rate": 0.0006927624913082349, "loss": 0.5981, "step": 26990 }, { "epoch": 1.341015198172246, "grad_norm": 0.15625, "learning_rate": 0.0006927227575245854, "loss": 0.6299, "step": 27000 }, { "epoch": 1.3415118704678652, "grad_norm": 0.107421875, "learning_rate": 0.0006926830237409357, "loss": 0.6188, "step": 27010 }, { "epoch": 1.3420085427634847, "grad_norm": 0.10888671875, "learning_rate": 0.0006926432899572862, "loss": 0.6372, "step": 27020 }, { "epoch": 1.342505215059104, "grad_norm": 0.11572265625, "learning_rate": 0.0006926035561736366, "loss": 0.6362, "step": 27030 }, { "epoch": 1.3430018873547234, "grad_norm": 0.1162109375, "learning_rate": 0.0006925638223899872, "loss": 0.6245, "step": 27040 }, { "epoch": 1.3434985596503428, "grad_norm": 0.10693359375, "learning_rate": 0.0006925240886063376, "loss": 0.6176, "step": 27050 }, { "epoch": 1.343995231945962, "grad_norm": 0.0986328125, "learning_rate": 0.000692484354822688, "loss": 0.5695, "step": 27060 }, { "epoch": 1.3444919042415813, "grad_norm": 0.095703125, "learning_rate": 0.0006924446210390385, "loss": 0.6038, "step": 27070 }, { "epoch": 1.3449885765372007, "grad_norm": 0.09423828125, "learning_rate": 0.0006924048872553889, "loss": 0.6044, "step": 27080 }, { "epoch": 1.3454852488328202, "grad_norm": 0.177734375, "learning_rate": 0.0006923651534717393, "loss": 0.606, "step": 27090 }, { "epoch": 1.3459819211284394, "grad_norm": 0.1767578125, "learning_rate": 0.0006923254196880899, "loss": 0.5946, "step": 27100 }, { "epoch": 1.3464785934240588, "grad_norm": 0.1044921875, "learning_rate": 0.0006922856859044403, "loss": 0.6438, "step": 27110 }, { "epoch": 1.3469752657196783, "grad_norm": 0.1171875, "learning_rate": 0.0006922459521207907, "loss": 0.5927, "step": 27120 }, { "epoch": 1.3474719380152975, "grad_norm": 0.1611328125, "learning_rate": 0.0006922062183371412, "loss": 0.5905, "step": 27130 }, { "epoch": 1.3479686103109167, "grad_norm": 0.10107421875, "learning_rate": 0.0006921664845534917, "loss": 0.6242, "step": 27140 }, { "epoch": 1.3484652826065362, "grad_norm": 0.10498046875, "learning_rate": 0.0006921267507698421, "loss": 0.6159, "step": 27150 }, { "epoch": 1.3489619549021556, "grad_norm": 0.09326171875, "learning_rate": 0.0006920870169861926, "loss": 0.6114, "step": 27160 }, { "epoch": 1.3494586271977749, "grad_norm": 0.1513671875, "learning_rate": 0.000692047283202543, "loss": 0.5995, "step": 27170 }, { "epoch": 1.3499552994933943, "grad_norm": 0.10693359375, "learning_rate": 0.0006920075494188934, "loss": 0.6224, "step": 27180 }, { "epoch": 1.3504519717890135, "grad_norm": 0.10791015625, "learning_rate": 0.000691967815635244, "loss": 0.6009, "step": 27190 }, { "epoch": 1.350948644084633, "grad_norm": 0.142578125, "learning_rate": 0.0006919280818515944, "loss": 0.5902, "step": 27200 }, { "epoch": 1.3514453163802522, "grad_norm": 0.1083984375, "learning_rate": 0.0006918883480679448, "loss": 0.606, "step": 27210 }, { "epoch": 1.3519419886758717, "grad_norm": 0.0966796875, "learning_rate": 0.0006918486142842953, "loss": 0.6061, "step": 27220 }, { "epoch": 1.3524386609714911, "grad_norm": 0.08984375, "learning_rate": 0.0006918088805006457, "loss": 0.6187, "step": 27230 }, { "epoch": 1.3529353332671104, "grad_norm": 0.10302734375, "learning_rate": 0.0006917691467169961, "loss": 0.6309, "step": 27240 }, { "epoch": 1.3534320055627296, "grad_norm": 0.1005859375, "learning_rate": 0.0006917294129333466, "loss": 0.625, "step": 27250 }, { "epoch": 1.353928677858349, "grad_norm": 0.10205078125, "learning_rate": 0.0006916896791496971, "loss": 0.6102, "step": 27260 }, { "epoch": 1.3544253501539685, "grad_norm": 0.11669921875, "learning_rate": 0.0006916499453660475, "loss": 0.6332, "step": 27270 }, { "epoch": 1.3549220224495877, "grad_norm": 0.10546875, "learning_rate": 0.0006916102115823979, "loss": 0.5904, "step": 27280 }, { "epoch": 1.3554186947452072, "grad_norm": 0.1005859375, "learning_rate": 0.0006915704777987484, "loss": 0.6378, "step": 27290 }, { "epoch": 1.3559153670408264, "grad_norm": 0.109375, "learning_rate": 0.0006915307440150989, "loss": 0.61, "step": 27300 }, { "epoch": 1.3564120393364458, "grad_norm": 0.0947265625, "learning_rate": 0.0006914910102314493, "loss": 0.6269, "step": 27310 }, { "epoch": 1.356908711632065, "grad_norm": 0.10791015625, "learning_rate": 0.0006914512764477998, "loss": 0.603, "step": 27320 }, { "epoch": 1.3574053839276845, "grad_norm": 0.10205078125, "learning_rate": 0.0006914115426641502, "loss": 0.619, "step": 27330 }, { "epoch": 1.357902056223304, "grad_norm": 0.103515625, "learning_rate": 0.0006913718088805006, "loss": 0.6086, "step": 27340 }, { "epoch": 1.3583987285189232, "grad_norm": 0.09228515625, "learning_rate": 0.0006913320750968512, "loss": 0.608, "step": 27350 }, { "epoch": 1.3588954008145426, "grad_norm": 0.2197265625, "learning_rate": 0.0006912923413132016, "loss": 0.595, "step": 27360 }, { "epoch": 1.3593920731101619, "grad_norm": 0.1064453125, "learning_rate": 0.000691252607529552, "loss": 0.6164, "step": 27370 }, { "epoch": 1.3598887454057813, "grad_norm": 0.1103515625, "learning_rate": 0.0006912128737459025, "loss": 0.6428, "step": 27380 }, { "epoch": 1.3603854177014005, "grad_norm": 0.0986328125, "learning_rate": 0.000691173139962253, "loss": 0.6131, "step": 27390 }, { "epoch": 1.36088208999702, "grad_norm": 0.146484375, "learning_rate": 0.0006911334061786034, "loss": 0.6363, "step": 27400 }, { "epoch": 1.3613787622926394, "grad_norm": 0.0888671875, "learning_rate": 0.0006910936723949539, "loss": 0.5877, "step": 27410 }, { "epoch": 1.3618754345882587, "grad_norm": 0.11376953125, "learning_rate": 0.0006910539386113043, "loss": 0.6, "step": 27420 }, { "epoch": 1.3623721068838779, "grad_norm": 0.11181640625, "learning_rate": 0.0006910142048276547, "loss": 0.6287, "step": 27430 }, { "epoch": 1.3628687791794973, "grad_norm": 0.10693359375, "learning_rate": 0.0006909744710440051, "loss": 0.6438, "step": 27440 }, { "epoch": 1.3633654514751168, "grad_norm": 0.11474609375, "learning_rate": 0.0006909347372603557, "loss": 0.6215, "step": 27450 }, { "epoch": 1.363862123770736, "grad_norm": 0.10595703125, "learning_rate": 0.0006908950034767061, "loss": 0.6013, "step": 27460 }, { "epoch": 1.3643587960663555, "grad_norm": 0.1123046875, "learning_rate": 0.0006908552696930565, "loss": 0.6294, "step": 27470 }, { "epoch": 1.3648554683619747, "grad_norm": 0.1015625, "learning_rate": 0.000690815535909407, "loss": 0.6018, "step": 27480 }, { "epoch": 1.3653521406575941, "grad_norm": 0.10400390625, "learning_rate": 0.0006907758021257576, "loss": 0.5911, "step": 27490 }, { "epoch": 1.3658488129532134, "grad_norm": 0.10888671875, "learning_rate": 0.0006907360683421079, "loss": 0.6479, "step": 27500 }, { "epoch": 1.3663454852488328, "grad_norm": 0.1240234375, "learning_rate": 0.0006906963345584584, "loss": 0.6117, "step": 27510 }, { "epoch": 1.3668421575444523, "grad_norm": 0.10009765625, "learning_rate": 0.0006906566007748088, "loss": 0.6127, "step": 27520 }, { "epoch": 1.3673388298400715, "grad_norm": 0.099609375, "learning_rate": 0.0006906168669911592, "loss": 0.6233, "step": 27530 }, { "epoch": 1.367835502135691, "grad_norm": 0.09814453125, "learning_rate": 0.0006905771332075097, "loss": 0.6019, "step": 27540 }, { "epoch": 1.3683321744313102, "grad_norm": 0.15234375, "learning_rate": 0.0006905373994238602, "loss": 0.6169, "step": 27550 }, { "epoch": 1.3688288467269296, "grad_norm": 0.115234375, "learning_rate": 0.0006904976656402106, "loss": 0.5841, "step": 27560 }, { "epoch": 1.3693255190225488, "grad_norm": 0.08740234375, "learning_rate": 0.0006904579318565611, "loss": 0.6046, "step": 27570 }, { "epoch": 1.3698221913181683, "grad_norm": 0.0966796875, "learning_rate": 0.0006904181980729115, "loss": 0.609, "step": 27580 }, { "epoch": 1.3703188636137877, "grad_norm": 0.09814453125, "learning_rate": 0.0006903784642892619, "loss": 0.6145, "step": 27590 }, { "epoch": 1.370815535909407, "grad_norm": 0.11767578125, "learning_rate": 0.0006903387305056125, "loss": 0.6483, "step": 27600 }, { "epoch": 1.3713122082050262, "grad_norm": 0.1298828125, "learning_rate": 0.0006902989967219629, "loss": 0.6199, "step": 27610 }, { "epoch": 1.3718088805006456, "grad_norm": 0.11376953125, "learning_rate": 0.0006902592629383134, "loss": 0.6271, "step": 27620 }, { "epoch": 1.372305552796265, "grad_norm": 0.09521484375, "learning_rate": 0.0006902195291546637, "loss": 0.6004, "step": 27630 }, { "epoch": 1.3728022250918843, "grad_norm": 0.1240234375, "learning_rate": 0.0006901797953710142, "loss": 0.6027, "step": 27640 }, { "epoch": 1.3732988973875038, "grad_norm": 0.1884765625, "learning_rate": 0.0006901400615873648, "loss": 0.6139, "step": 27650 }, { "epoch": 1.373795569683123, "grad_norm": 0.10546875, "learning_rate": 0.0006901003278037151, "loss": 0.6204, "step": 27660 }, { "epoch": 1.3742922419787424, "grad_norm": 0.1572265625, "learning_rate": 0.0006900605940200656, "loss": 0.5861, "step": 27670 }, { "epoch": 1.3747889142743617, "grad_norm": 0.10693359375, "learning_rate": 0.0006900208602364161, "loss": 0.6278, "step": 27680 }, { "epoch": 1.3752855865699811, "grad_norm": 0.1328125, "learning_rate": 0.0006899811264527664, "loss": 0.6139, "step": 27690 }, { "epoch": 1.3757822588656006, "grad_norm": 0.099609375, "learning_rate": 0.000689941392669117, "loss": 0.6202, "step": 27700 }, { "epoch": 1.3762789311612198, "grad_norm": 0.11572265625, "learning_rate": 0.0006899016588854674, "loss": 0.5959, "step": 27710 }, { "epoch": 1.3767756034568392, "grad_norm": 0.09912109375, "learning_rate": 0.0006898619251018178, "loss": 0.6025, "step": 27720 }, { "epoch": 1.3772722757524585, "grad_norm": 0.09912109375, "learning_rate": 0.0006898221913181683, "loss": 0.5889, "step": 27730 }, { "epoch": 1.377768948048078, "grad_norm": 0.16796875, "learning_rate": 0.0006897824575345187, "loss": 0.6065, "step": 27740 }, { "epoch": 1.3782656203436972, "grad_norm": 0.1318359375, "learning_rate": 0.0006897427237508692, "loss": 0.5845, "step": 27750 }, { "epoch": 1.3787622926393166, "grad_norm": 0.1064453125, "learning_rate": 0.0006897029899672197, "loss": 0.5899, "step": 27760 }, { "epoch": 1.379258964934936, "grad_norm": 0.0947265625, "learning_rate": 0.0006896632561835701, "loss": 0.6149, "step": 27770 }, { "epoch": 1.3797556372305553, "grad_norm": 0.103515625, "learning_rate": 0.0006896235223999206, "loss": 0.6169, "step": 27780 }, { "epoch": 1.3802523095261745, "grad_norm": 0.095703125, "learning_rate": 0.000689583788616271, "loss": 0.6069, "step": 27790 }, { "epoch": 1.380748981821794, "grad_norm": 0.10888671875, "learning_rate": 0.0006895440548326215, "loss": 0.6278, "step": 27800 }, { "epoch": 1.3812456541174134, "grad_norm": 0.12060546875, "learning_rate": 0.000689504321048972, "loss": 0.5892, "step": 27810 }, { "epoch": 1.3817423264130326, "grad_norm": 0.12158203125, "learning_rate": 0.0006894645872653224, "loss": 0.6149, "step": 27820 }, { "epoch": 1.382238998708652, "grad_norm": 0.1416015625, "learning_rate": 0.0006894248534816728, "loss": 0.6042, "step": 27830 }, { "epoch": 1.3827356710042713, "grad_norm": 0.107421875, "learning_rate": 0.0006893851196980233, "loss": 0.6224, "step": 27840 }, { "epoch": 1.3832323432998908, "grad_norm": 0.09814453125, "learning_rate": 0.0006893453859143737, "loss": 0.6311, "step": 27850 }, { "epoch": 1.38372901559551, "grad_norm": 0.11083984375, "learning_rate": 0.0006893056521307242, "loss": 0.6114, "step": 27860 }, { "epoch": 1.3842256878911294, "grad_norm": 0.134765625, "learning_rate": 0.0006892659183470747, "loss": 0.6021, "step": 27870 }, { "epoch": 1.3847223601867489, "grad_norm": 0.12255859375, "learning_rate": 0.000689226184563425, "loss": 0.6095, "step": 27880 }, { "epoch": 1.385219032482368, "grad_norm": 0.09912109375, "learning_rate": 0.0006891864507797755, "loss": 0.5971, "step": 27890 }, { "epoch": 1.3857157047779876, "grad_norm": 0.109375, "learning_rate": 0.000689146716996126, "loss": 0.5634, "step": 27900 }, { "epoch": 1.3862123770736068, "grad_norm": 0.12158203125, "learning_rate": 0.0006891069832124764, "loss": 0.6128, "step": 27910 }, { "epoch": 1.3867090493692262, "grad_norm": 0.138671875, "learning_rate": 0.0006890672494288269, "loss": 0.6113, "step": 27920 }, { "epoch": 1.3872057216648455, "grad_norm": 0.130859375, "learning_rate": 0.0006890275156451773, "loss": 0.5969, "step": 27930 }, { "epoch": 1.387702393960465, "grad_norm": 0.12158203125, "learning_rate": 0.0006889877818615278, "loss": 0.6106, "step": 27940 }, { "epoch": 1.3881990662560844, "grad_norm": 0.10791015625, "learning_rate": 0.0006889480480778783, "loss": 0.6181, "step": 27950 }, { "epoch": 1.3886957385517036, "grad_norm": 0.1015625, "learning_rate": 0.0006889083142942287, "loss": 0.6144, "step": 27960 }, { "epoch": 1.3891924108473228, "grad_norm": 0.1015625, "learning_rate": 0.0006888685805105792, "loss": 0.5952, "step": 27970 }, { "epoch": 1.3896890831429423, "grad_norm": 0.0927734375, "learning_rate": 0.0006888288467269296, "loss": 0.6273, "step": 27980 }, { "epoch": 1.3901857554385617, "grad_norm": 0.09814453125, "learning_rate": 0.00068878911294328, "loss": 0.611, "step": 27990 }, { "epoch": 1.390682427734181, "grad_norm": 0.10888671875, "learning_rate": 0.0006887493791596306, "loss": 0.6432, "step": 28000 }, { "epoch": 1.3911791000298004, "grad_norm": 0.1318359375, "learning_rate": 0.000688709645375981, "loss": 0.5919, "step": 28010 }, { "epoch": 1.3916757723254196, "grad_norm": 0.150390625, "learning_rate": 0.0006886699115923314, "loss": 0.5956, "step": 28020 }, { "epoch": 1.392172444621039, "grad_norm": 0.0927734375, "learning_rate": 0.0006886301778086819, "loss": 0.6022, "step": 28030 }, { "epoch": 1.3926691169166583, "grad_norm": 0.1064453125, "learning_rate": 0.0006885904440250322, "loss": 0.6028, "step": 28040 }, { "epoch": 1.3931657892122777, "grad_norm": 0.10498046875, "learning_rate": 0.0006885507102413828, "loss": 0.6129, "step": 28050 }, { "epoch": 1.3936624615078972, "grad_norm": 0.1650390625, "learning_rate": 0.0006885109764577333, "loss": 0.5869, "step": 28060 }, { "epoch": 1.3941591338035164, "grad_norm": 0.09814453125, "learning_rate": 0.0006884712426740837, "loss": 0.6021, "step": 28070 }, { "epoch": 1.3946558060991359, "grad_norm": 0.11572265625, "learning_rate": 0.0006884315088904341, "loss": 0.6046, "step": 28080 }, { "epoch": 1.395152478394755, "grad_norm": 0.12890625, "learning_rate": 0.0006883917751067846, "loss": 0.603, "step": 28090 }, { "epoch": 1.3956491506903745, "grad_norm": 0.12255859375, "learning_rate": 0.0006883520413231351, "loss": 0.614, "step": 28100 }, { "epoch": 1.3961458229859938, "grad_norm": 0.11572265625, "learning_rate": 0.0006883123075394855, "loss": 0.5915, "step": 28110 }, { "epoch": 1.3966424952816132, "grad_norm": 0.140625, "learning_rate": 0.0006882725737558359, "loss": 0.6009, "step": 28120 }, { "epoch": 1.3971391675772327, "grad_norm": 0.1552734375, "learning_rate": 0.0006882328399721864, "loss": 0.6098, "step": 28130 }, { "epoch": 1.397635839872852, "grad_norm": 0.10986328125, "learning_rate": 0.0006881931061885368, "loss": 0.6106, "step": 28140 }, { "epoch": 1.3981325121684711, "grad_norm": 0.09814453125, "learning_rate": 0.0006881533724048873, "loss": 0.6075, "step": 28150 }, { "epoch": 1.3986291844640906, "grad_norm": 0.10693359375, "learning_rate": 0.0006881136386212378, "loss": 0.6005, "step": 28160 }, { "epoch": 1.39912585675971, "grad_norm": 0.11279296875, "learning_rate": 0.0006880739048375882, "loss": 0.5895, "step": 28170 }, { "epoch": 1.3996225290553292, "grad_norm": 0.10009765625, "learning_rate": 0.0006880341710539386, "loss": 0.6271, "step": 28180 }, { "epoch": 1.4001192013509487, "grad_norm": 0.095703125, "learning_rate": 0.0006879944372702891, "loss": 0.6261, "step": 28190 }, { "epoch": 1.400615873646568, "grad_norm": 0.1298828125, "learning_rate": 0.0006879547034866396, "loss": 0.6023, "step": 28200 }, { "epoch": 1.4011125459421874, "grad_norm": 0.11767578125, "learning_rate": 0.00068791496970299, "loss": 0.64, "step": 28210 }, { "epoch": 1.4016092182378066, "grad_norm": 0.16015625, "learning_rate": 0.0006878752359193405, "loss": 0.5955, "step": 28220 }, { "epoch": 1.402105890533426, "grad_norm": 0.10986328125, "learning_rate": 0.0006878355021356909, "loss": 0.5777, "step": 28230 }, { "epoch": 1.4026025628290455, "grad_norm": 0.11083984375, "learning_rate": 0.0006877957683520413, "loss": 0.6048, "step": 28240 }, { "epoch": 1.4030992351246647, "grad_norm": 0.1357421875, "learning_rate": 0.0006877560345683919, "loss": 0.6006, "step": 28250 }, { "epoch": 1.4035959074202842, "grad_norm": 0.134765625, "learning_rate": 0.0006877163007847423, "loss": 0.5771, "step": 28260 }, { "epoch": 1.4040925797159034, "grad_norm": 0.09326171875, "learning_rate": 0.0006876765670010927, "loss": 0.6036, "step": 28270 }, { "epoch": 1.4045892520115228, "grad_norm": 0.1044921875, "learning_rate": 0.0006876368332174432, "loss": 0.5789, "step": 28280 }, { "epoch": 1.405085924307142, "grad_norm": 0.109375, "learning_rate": 0.0006875970994337936, "loss": 0.6106, "step": 28290 }, { "epoch": 1.4055825966027615, "grad_norm": 0.1640625, "learning_rate": 0.000687557365650144, "loss": 0.6219, "step": 28300 }, { "epoch": 1.406079268898381, "grad_norm": 0.11767578125, "learning_rate": 0.0006875176318664945, "loss": 0.6348, "step": 28310 }, { "epoch": 1.4065759411940002, "grad_norm": 0.12255859375, "learning_rate": 0.000687477898082845, "loss": 0.6038, "step": 28320 }, { "epoch": 1.4070726134896194, "grad_norm": 0.1103515625, "learning_rate": 0.0006874381642991954, "loss": 0.5846, "step": 28330 }, { "epoch": 1.4075692857852389, "grad_norm": 0.1171875, "learning_rate": 0.0006873984305155458, "loss": 0.6066, "step": 28340 }, { "epoch": 1.4080659580808583, "grad_norm": 0.11962890625, "learning_rate": 0.0006873586967318964, "loss": 0.6239, "step": 28350 }, { "epoch": 1.4085626303764776, "grad_norm": 0.095703125, "learning_rate": 0.0006873189629482468, "loss": 0.5947, "step": 28360 }, { "epoch": 1.409059302672097, "grad_norm": 0.1611328125, "learning_rate": 0.0006872792291645972, "loss": 0.615, "step": 28370 }, { "epoch": 1.4095559749677162, "grad_norm": 0.103515625, "learning_rate": 0.0006872394953809477, "loss": 0.5876, "step": 28380 }, { "epoch": 1.4100526472633357, "grad_norm": 0.10498046875, "learning_rate": 0.0006871997615972981, "loss": 0.6149, "step": 28390 }, { "epoch": 1.410549319558955, "grad_norm": 0.15625, "learning_rate": 0.0006871600278136485, "loss": 0.5882, "step": 28400 }, { "epoch": 1.4110459918545744, "grad_norm": 0.09912109375, "learning_rate": 0.0006871202940299991, "loss": 0.6271, "step": 28410 }, { "epoch": 1.4115426641501938, "grad_norm": 0.123046875, "learning_rate": 0.0006870805602463495, "loss": 0.5835, "step": 28420 }, { "epoch": 1.412039336445813, "grad_norm": 0.1650390625, "learning_rate": 0.0006870408264626999, "loss": 0.6312, "step": 28430 }, { "epoch": 1.4125360087414325, "grad_norm": 0.0947265625, "learning_rate": 0.0006870010926790504, "loss": 0.5719, "step": 28440 }, { "epoch": 1.4130326810370517, "grad_norm": 0.146484375, "learning_rate": 0.0006869613588954009, "loss": 0.5973, "step": 28450 }, { "epoch": 1.4135293533326712, "grad_norm": 0.09521484375, "learning_rate": 0.0006869216251117513, "loss": 0.6259, "step": 28460 }, { "epoch": 1.4140260256282904, "grad_norm": 0.09814453125, "learning_rate": 0.0006868818913281018, "loss": 0.6359, "step": 28470 }, { "epoch": 1.4145226979239098, "grad_norm": 0.09619140625, "learning_rate": 0.0006868421575444522, "loss": 0.5925, "step": 28480 }, { "epoch": 1.4150193702195293, "grad_norm": 0.1328125, "learning_rate": 0.0006868024237608026, "loss": 0.5965, "step": 28490 }, { "epoch": 1.4155160425151485, "grad_norm": 0.125, "learning_rate": 0.000686762689977153, "loss": 0.6216, "step": 28500 }, { "epoch": 1.4160127148107677, "grad_norm": 0.12451171875, "learning_rate": 0.0006867229561935036, "loss": 0.5953, "step": 28510 }, { "epoch": 1.4165093871063872, "grad_norm": 0.10107421875, "learning_rate": 0.0006866832224098541, "loss": 0.6156, "step": 28520 }, { "epoch": 1.4170060594020066, "grad_norm": 0.09716796875, "learning_rate": 0.0006866434886262044, "loss": 0.6186, "step": 28530 }, { "epoch": 1.4175027316976259, "grad_norm": 0.1416015625, "learning_rate": 0.0006866037548425549, "loss": 0.6166, "step": 28540 }, { "epoch": 1.4179994039932453, "grad_norm": 0.09326171875, "learning_rate": 0.0006865640210589055, "loss": 0.5936, "step": 28550 }, { "epoch": 1.4184960762888645, "grad_norm": 0.10546875, "learning_rate": 0.0006865242872752558, "loss": 0.6375, "step": 28560 }, { "epoch": 1.418992748584484, "grad_norm": 0.115234375, "learning_rate": 0.0006864845534916063, "loss": 0.6165, "step": 28570 }, { "epoch": 1.4194894208801032, "grad_norm": 0.11083984375, "learning_rate": 0.0006864448197079567, "loss": 0.5936, "step": 28580 }, { "epoch": 1.4199860931757227, "grad_norm": 0.10009765625, "learning_rate": 0.0006864050859243071, "loss": 0.5865, "step": 28590 }, { "epoch": 1.4204827654713421, "grad_norm": 0.09423828125, "learning_rate": 0.0006863653521406576, "loss": 0.5765, "step": 28600 }, { "epoch": 1.4209794377669613, "grad_norm": 0.103515625, "learning_rate": 0.0006863256183570081, "loss": 0.5794, "step": 28610 }, { "epoch": 1.4214761100625808, "grad_norm": 0.10205078125, "learning_rate": 0.0006862858845733585, "loss": 0.6259, "step": 28620 }, { "epoch": 1.4219727823582, "grad_norm": 0.11181640625, "learning_rate": 0.000686246150789709, "loss": 0.6048, "step": 28630 }, { "epoch": 1.4224694546538195, "grad_norm": 0.1728515625, "learning_rate": 0.0006862064170060594, "loss": 0.606, "step": 28640 }, { "epoch": 1.4229661269494387, "grad_norm": 0.09375, "learning_rate": 0.0006861666832224098, "loss": 0.6099, "step": 28650 }, { "epoch": 1.4234627992450581, "grad_norm": 0.1337890625, "learning_rate": 0.0006861269494387604, "loss": 0.6104, "step": 28660 }, { "epoch": 1.4239594715406776, "grad_norm": 0.10302734375, "learning_rate": 0.0006860872156551108, "loss": 0.6408, "step": 28670 }, { "epoch": 1.4244561438362968, "grad_norm": 0.1025390625, "learning_rate": 0.0006860474818714613, "loss": 0.6107, "step": 28680 }, { "epoch": 1.424952816131916, "grad_norm": 0.099609375, "learning_rate": 0.0006860077480878117, "loss": 0.6043, "step": 28690 }, { "epoch": 1.4254494884275355, "grad_norm": 0.095703125, "learning_rate": 0.0006859680143041621, "loss": 0.6196, "step": 28700 }, { "epoch": 1.425946160723155, "grad_norm": 0.11962890625, "learning_rate": 0.0006859282805205127, "loss": 0.5761, "step": 28710 }, { "epoch": 1.4264428330187742, "grad_norm": 0.146484375, "learning_rate": 0.000685888546736863, "loss": 0.61, "step": 28720 }, { "epoch": 1.4269395053143936, "grad_norm": 0.11572265625, "learning_rate": 0.0006858488129532135, "loss": 0.5906, "step": 28730 }, { "epoch": 1.4274361776100128, "grad_norm": 0.1337890625, "learning_rate": 0.000685809079169564, "loss": 0.578, "step": 28740 }, { "epoch": 1.4279328499056323, "grad_norm": 0.0908203125, "learning_rate": 0.0006857693453859143, "loss": 0.6389, "step": 28750 }, { "epoch": 1.4284295222012515, "grad_norm": 0.109375, "learning_rate": 0.0006857296116022649, "loss": 0.6311, "step": 28760 }, { "epoch": 1.428926194496871, "grad_norm": 0.125, "learning_rate": 0.0006856898778186153, "loss": 0.6115, "step": 28770 }, { "epoch": 1.4294228667924904, "grad_norm": 0.10107421875, "learning_rate": 0.0006856501440349657, "loss": 0.5909, "step": 28780 }, { "epoch": 1.4299195390881096, "grad_norm": 0.11181640625, "learning_rate": 0.0006856104102513162, "loss": 0.5808, "step": 28790 }, { "epoch": 1.430416211383729, "grad_norm": 0.1328125, "learning_rate": 0.0006855706764676666, "loss": 0.6197, "step": 28800 }, { "epoch": 1.4309128836793483, "grad_norm": 0.10986328125, "learning_rate": 0.0006855309426840171, "loss": 0.5979, "step": 28810 }, { "epoch": 1.4314095559749678, "grad_norm": 0.1015625, "learning_rate": 0.0006854912089003676, "loss": 0.6095, "step": 28820 }, { "epoch": 1.431906228270587, "grad_norm": 0.1201171875, "learning_rate": 0.000685451475116718, "loss": 0.6002, "step": 28830 }, { "epoch": 1.4324029005662064, "grad_norm": 0.1181640625, "learning_rate": 0.0006854117413330685, "loss": 0.6342, "step": 28840 }, { "epoch": 1.432899572861826, "grad_norm": 0.0966796875, "learning_rate": 0.0006853720075494189, "loss": 0.6459, "step": 28850 }, { "epoch": 1.4333962451574451, "grad_norm": 0.13671875, "learning_rate": 0.0006853322737657694, "loss": 0.5952, "step": 28860 }, { "epoch": 1.4338929174530644, "grad_norm": 0.111328125, "learning_rate": 0.0006852925399821199, "loss": 0.6145, "step": 28870 }, { "epoch": 1.4343895897486838, "grad_norm": 0.11328125, "learning_rate": 0.0006852528061984703, "loss": 0.6077, "step": 28880 }, { "epoch": 1.4348862620443033, "grad_norm": 0.1005859375, "learning_rate": 0.0006852130724148207, "loss": 0.6086, "step": 28890 }, { "epoch": 1.4353829343399225, "grad_norm": 0.10791015625, "learning_rate": 0.0006851733386311712, "loss": 0.6101, "step": 28900 }, { "epoch": 1.435879606635542, "grad_norm": 0.12255859375, "learning_rate": 0.0006851336048475216, "loss": 0.6171, "step": 28910 }, { "epoch": 1.4363762789311612, "grad_norm": 0.1435546875, "learning_rate": 0.0006850938710638721, "loss": 0.5873, "step": 28920 }, { "epoch": 1.4368729512267806, "grad_norm": 0.091796875, "learning_rate": 0.0006850541372802226, "loss": 0.638, "step": 28930 }, { "epoch": 1.4373696235223998, "grad_norm": 0.1083984375, "learning_rate": 0.0006850144034965729, "loss": 0.6042, "step": 28940 }, { "epoch": 1.4378662958180193, "grad_norm": 0.10888671875, "learning_rate": 0.0006849746697129234, "loss": 0.5868, "step": 28950 }, { "epoch": 1.4383629681136387, "grad_norm": 0.1826171875, "learning_rate": 0.0006849349359292739, "loss": 0.6008, "step": 28960 }, { "epoch": 1.438859640409258, "grad_norm": 0.0986328125, "learning_rate": 0.0006848952021456244, "loss": 0.6159, "step": 28970 }, { "epoch": 1.4393563127048772, "grad_norm": 0.1298828125, "learning_rate": 0.0006848554683619748, "loss": 0.5899, "step": 28980 }, { "epoch": 1.4398529850004966, "grad_norm": 0.1669921875, "learning_rate": 0.0006848157345783252, "loss": 0.5951, "step": 28990 }, { "epoch": 1.440349657296116, "grad_norm": 0.10498046875, "learning_rate": 0.0006847760007946757, "loss": 0.6214, "step": 29000 }, { "epoch": 1.4408463295917353, "grad_norm": 0.134765625, "learning_rate": 0.0006847362670110262, "loss": 0.6028, "step": 29010 }, { "epoch": 1.4413430018873548, "grad_norm": 0.10986328125, "learning_rate": 0.0006846965332273766, "loss": 0.6098, "step": 29020 }, { "epoch": 1.4418396741829742, "grad_norm": 0.0908203125, "learning_rate": 0.0006846567994437271, "loss": 0.6203, "step": 29030 }, { "epoch": 1.4423363464785934, "grad_norm": 0.10888671875, "learning_rate": 0.0006846170656600775, "loss": 0.6068, "step": 29040 }, { "epoch": 1.4428330187742127, "grad_norm": 0.09619140625, "learning_rate": 0.0006845773318764279, "loss": 0.6099, "step": 29050 }, { "epoch": 1.443329691069832, "grad_norm": 0.142578125, "learning_rate": 0.0006845375980927785, "loss": 0.5605, "step": 29060 }, { "epoch": 1.4438263633654516, "grad_norm": 0.09521484375, "learning_rate": 0.0006844978643091289, "loss": 0.5866, "step": 29070 }, { "epoch": 1.4443230356610708, "grad_norm": 0.119140625, "learning_rate": 0.0006844581305254793, "loss": 0.6057, "step": 29080 }, { "epoch": 1.4448197079566902, "grad_norm": 0.0966796875, "learning_rate": 0.0006844183967418298, "loss": 0.6059, "step": 29090 }, { "epoch": 1.4453163802523095, "grad_norm": 0.099609375, "learning_rate": 0.0006843786629581801, "loss": 0.5785, "step": 29100 }, { "epoch": 1.445813052547929, "grad_norm": 0.10595703125, "learning_rate": 0.0006843389291745307, "loss": 0.5752, "step": 29110 }, { "epoch": 1.4463097248435481, "grad_norm": 0.1279296875, "learning_rate": 0.0006842991953908812, "loss": 0.5905, "step": 29120 }, { "epoch": 1.4468063971391676, "grad_norm": 0.11083984375, "learning_rate": 0.0006842594616072316, "loss": 0.5859, "step": 29130 }, { "epoch": 1.447303069434787, "grad_norm": 0.130859375, "learning_rate": 0.000684219727823582, "loss": 0.6083, "step": 29140 }, { "epoch": 1.4477997417304063, "grad_norm": 0.11376953125, "learning_rate": 0.0006841799940399325, "loss": 0.5989, "step": 29150 }, { "epoch": 1.4482964140260255, "grad_norm": 0.0986328125, "learning_rate": 0.000684140260256283, "loss": 0.5948, "step": 29160 }, { "epoch": 1.448793086321645, "grad_norm": 0.103515625, "learning_rate": 0.0006841005264726334, "loss": 0.6228, "step": 29170 }, { "epoch": 1.4492897586172644, "grad_norm": 0.09814453125, "learning_rate": 0.0006840607926889838, "loss": 0.599, "step": 29180 }, { "epoch": 1.4497864309128836, "grad_norm": 0.09765625, "learning_rate": 0.0006840210589053343, "loss": 0.6125, "step": 29190 }, { "epoch": 1.450283103208503, "grad_norm": 0.11865234375, "learning_rate": 0.0006839813251216847, "loss": 0.6139, "step": 29200 }, { "epoch": 1.4507797755041225, "grad_norm": 0.0927734375, "learning_rate": 0.0006839415913380352, "loss": 0.6245, "step": 29210 }, { "epoch": 1.4512764477997417, "grad_norm": 0.10693359375, "learning_rate": 0.0006839018575543857, "loss": 0.619, "step": 29220 }, { "epoch": 1.451773120095361, "grad_norm": 0.185546875, "learning_rate": 0.0006838621237707361, "loss": 0.6243, "step": 29230 }, { "epoch": 1.4522697923909804, "grad_norm": 0.130859375, "learning_rate": 0.0006838223899870865, "loss": 0.5803, "step": 29240 }, { "epoch": 1.4527664646865999, "grad_norm": 0.216796875, "learning_rate": 0.000683782656203437, "loss": 0.6058, "step": 29250 }, { "epoch": 1.453263136982219, "grad_norm": 0.10595703125, "learning_rate": 0.0006837429224197875, "loss": 0.6069, "step": 29260 }, { "epoch": 1.4537598092778385, "grad_norm": 0.1083984375, "learning_rate": 0.0006837031886361379, "loss": 0.5811, "step": 29270 }, { "epoch": 1.4542564815734578, "grad_norm": 0.1513671875, "learning_rate": 0.0006836634548524884, "loss": 0.5839, "step": 29280 }, { "epoch": 1.4547531538690772, "grad_norm": 0.11279296875, "learning_rate": 0.0006836237210688388, "loss": 0.6351, "step": 29290 }, { "epoch": 1.4552498261646964, "grad_norm": 0.09765625, "learning_rate": 0.0006835839872851892, "loss": 0.6138, "step": 29300 }, { "epoch": 1.455746498460316, "grad_norm": 0.1611328125, "learning_rate": 0.0006835442535015398, "loss": 0.6059, "step": 29310 }, { "epoch": 1.4562431707559353, "grad_norm": 0.09228515625, "learning_rate": 0.0006835045197178902, "loss": 0.5771, "step": 29320 }, { "epoch": 1.4567398430515546, "grad_norm": 0.125, "learning_rate": 0.0006834647859342406, "loss": 0.5944, "step": 29330 }, { "epoch": 1.4572365153471738, "grad_norm": 0.091796875, "learning_rate": 0.0006834250521505911, "loss": 0.5916, "step": 29340 }, { "epoch": 1.4577331876427932, "grad_norm": 0.1591796875, "learning_rate": 0.0006833853183669415, "loss": 0.6127, "step": 29350 }, { "epoch": 1.4582298599384127, "grad_norm": 0.109375, "learning_rate": 0.000683345584583292, "loss": 0.6273, "step": 29360 }, { "epoch": 1.458726532234032, "grad_norm": 0.10107421875, "learning_rate": 0.0006833058507996424, "loss": 0.6252, "step": 29370 }, { "epoch": 1.4592232045296514, "grad_norm": 0.119140625, "learning_rate": 0.0006832661170159929, "loss": 0.5981, "step": 29380 }, { "epoch": 1.4597198768252708, "grad_norm": 0.09814453125, "learning_rate": 0.0006832263832323433, "loss": 0.6148, "step": 29390 }, { "epoch": 1.46021654912089, "grad_norm": 0.0986328125, "learning_rate": 0.0006831866494486937, "loss": 0.5942, "step": 29400 }, { "epoch": 1.4607132214165093, "grad_norm": 0.11328125, "learning_rate": 0.0006831469156650443, "loss": 0.6089, "step": 29410 }, { "epoch": 1.4612098937121287, "grad_norm": 0.0966796875, "learning_rate": 0.0006831071818813948, "loss": 0.5826, "step": 29420 }, { "epoch": 1.4617065660077482, "grad_norm": 0.11328125, "learning_rate": 0.0006830674480977451, "loss": 0.6103, "step": 29430 }, { "epoch": 1.4622032383033674, "grad_norm": 0.09912109375, "learning_rate": 0.0006830277143140956, "loss": 0.6019, "step": 29440 }, { "epoch": 1.4626999105989869, "grad_norm": 0.10107421875, "learning_rate": 0.000682987980530446, "loss": 0.6102, "step": 29450 }, { "epoch": 1.463196582894606, "grad_norm": 0.11572265625, "learning_rate": 0.0006829482467467965, "loss": 0.6198, "step": 29460 }, { "epoch": 1.4636932551902255, "grad_norm": 0.1376953125, "learning_rate": 0.000682908512963147, "loss": 0.5934, "step": 29470 }, { "epoch": 1.4641899274858448, "grad_norm": 0.1328125, "learning_rate": 0.0006828687791794974, "loss": 0.5846, "step": 29480 }, { "epoch": 1.4646865997814642, "grad_norm": 0.0927734375, "learning_rate": 0.0006828290453958478, "loss": 0.6057, "step": 29490 }, { "epoch": 1.4651832720770837, "grad_norm": 0.1044921875, "learning_rate": 0.0006827893116121983, "loss": 0.633, "step": 29500 }, { "epoch": 1.4656799443727029, "grad_norm": 0.1162109375, "learning_rate": 0.0006827495778285488, "loss": 0.6107, "step": 29510 }, { "epoch": 1.466176616668322, "grad_norm": 0.2119140625, "learning_rate": 0.0006827098440448992, "loss": 0.6107, "step": 29520 }, { "epoch": 1.4666732889639416, "grad_norm": 0.11767578125, "learning_rate": 0.0006826701102612497, "loss": 0.5991, "step": 29530 }, { "epoch": 1.467169961259561, "grad_norm": 0.1123046875, "learning_rate": 0.0006826303764776001, "loss": 0.6115, "step": 29540 }, { "epoch": 1.4676666335551802, "grad_norm": 0.11083984375, "learning_rate": 0.0006825906426939505, "loss": 0.6148, "step": 29550 }, { "epoch": 1.4681633058507997, "grad_norm": 0.1181640625, "learning_rate": 0.000682550908910301, "loss": 0.6007, "step": 29560 }, { "epoch": 1.468659978146419, "grad_norm": 0.10888671875, "learning_rate": 0.0006825111751266515, "loss": 0.6115, "step": 29570 }, { "epoch": 1.4691566504420384, "grad_norm": 0.1337890625, "learning_rate": 0.000682471441343002, "loss": 0.6059, "step": 29580 }, { "epoch": 1.4696533227376576, "grad_norm": 0.09912109375, "learning_rate": 0.0006824317075593523, "loss": 0.6125, "step": 29590 }, { "epoch": 1.470149995033277, "grad_norm": 0.10888671875, "learning_rate": 0.0006823919737757028, "loss": 0.5979, "step": 29600 }, { "epoch": 1.4706466673288965, "grad_norm": 0.119140625, "learning_rate": 0.0006823522399920534, "loss": 0.598, "step": 29610 }, { "epoch": 1.4711433396245157, "grad_norm": 0.181640625, "learning_rate": 0.0006823125062084037, "loss": 0.6289, "step": 29620 }, { "epoch": 1.4716400119201352, "grad_norm": 0.1044921875, "learning_rate": 0.0006822727724247542, "loss": 0.6102, "step": 29630 }, { "epoch": 1.4721366842157544, "grad_norm": 0.1240234375, "learning_rate": 0.0006822330386411046, "loss": 0.5902, "step": 29640 }, { "epoch": 1.4726333565113738, "grad_norm": 0.0986328125, "learning_rate": 0.000682193304857455, "loss": 0.5947, "step": 29650 }, { "epoch": 1.473130028806993, "grad_norm": 0.1044921875, "learning_rate": 0.0006821535710738056, "loss": 0.5978, "step": 29660 }, { "epoch": 1.4736267011026125, "grad_norm": 0.10888671875, "learning_rate": 0.000682113837290156, "loss": 0.6403, "step": 29670 }, { "epoch": 1.474123373398232, "grad_norm": 0.1953125, "learning_rate": 0.0006820741035065064, "loss": 0.607, "step": 29680 }, { "epoch": 1.4746200456938512, "grad_norm": 0.09765625, "learning_rate": 0.0006820343697228569, "loss": 0.6096, "step": 29690 }, { "epoch": 1.4751167179894704, "grad_norm": 0.09814453125, "learning_rate": 0.0006819946359392073, "loss": 0.6089, "step": 29700 }, { "epoch": 1.4756133902850899, "grad_norm": 0.1201171875, "learning_rate": 0.0006819549021555579, "loss": 0.6185, "step": 29710 }, { "epoch": 1.4761100625807093, "grad_norm": 0.11865234375, "learning_rate": 0.0006819151683719083, "loss": 0.5886, "step": 29720 }, { "epoch": 1.4766067348763285, "grad_norm": 0.10009765625, "learning_rate": 0.0006818754345882587, "loss": 0.6284, "step": 29730 }, { "epoch": 1.477103407171948, "grad_norm": 0.111328125, "learning_rate": 0.0006818357008046092, "loss": 0.587, "step": 29740 }, { "epoch": 1.4776000794675672, "grad_norm": 0.09326171875, "learning_rate": 0.0006817959670209596, "loss": 0.5764, "step": 29750 }, { "epoch": 1.4780967517631867, "grad_norm": 0.12353515625, "learning_rate": 0.00068175623323731, "loss": 0.6388, "step": 29760 }, { "epoch": 1.478593424058806, "grad_norm": 0.103515625, "learning_rate": 0.0006817164994536606, "loss": 0.5818, "step": 29770 }, { "epoch": 1.4790900963544253, "grad_norm": 0.109375, "learning_rate": 0.0006816767656700109, "loss": 0.6276, "step": 29780 }, { "epoch": 1.4795867686500448, "grad_norm": 0.1474609375, "learning_rate": 0.0006816370318863614, "loss": 0.5938, "step": 29790 }, { "epoch": 1.480083440945664, "grad_norm": 0.1318359375, "learning_rate": 0.0006815972981027119, "loss": 0.5975, "step": 29800 }, { "epoch": 1.4805801132412835, "grad_norm": 0.0966796875, "learning_rate": 0.0006815575643190622, "loss": 0.6227, "step": 29810 }, { "epoch": 1.4810767855369027, "grad_norm": 0.1474609375, "learning_rate": 0.0006815178305354128, "loss": 0.612, "step": 29820 }, { "epoch": 1.4815734578325221, "grad_norm": 0.10791015625, "learning_rate": 0.0006814780967517632, "loss": 0.5949, "step": 29830 }, { "epoch": 1.4820701301281414, "grad_norm": 0.0966796875, "learning_rate": 0.0006814383629681136, "loss": 0.5996, "step": 29840 }, { "epoch": 1.4825668024237608, "grad_norm": 0.10107421875, "learning_rate": 0.0006813986291844641, "loss": 0.5888, "step": 29850 }, { "epoch": 1.4830634747193803, "grad_norm": 0.09228515625, "learning_rate": 0.0006813588954008145, "loss": 0.6183, "step": 29860 }, { "epoch": 1.4835601470149995, "grad_norm": 0.11181640625, "learning_rate": 0.0006813191616171651, "loss": 0.626, "step": 29870 }, { "epoch": 1.4840568193106187, "grad_norm": 0.11474609375, "learning_rate": 0.0006812794278335155, "loss": 0.6085, "step": 29880 }, { "epoch": 1.4845534916062382, "grad_norm": 0.1474609375, "learning_rate": 0.0006812396940498659, "loss": 0.5884, "step": 29890 }, { "epoch": 1.4850501639018576, "grad_norm": 0.09423828125, "learning_rate": 0.0006811999602662164, "loss": 0.6011, "step": 29900 }, { "epoch": 1.4855468361974768, "grad_norm": 0.0986328125, "learning_rate": 0.0006811602264825668, "loss": 0.6363, "step": 29910 }, { "epoch": 1.4860435084930963, "grad_norm": 0.1083984375, "learning_rate": 0.0006811204926989173, "loss": 0.6158, "step": 29920 }, { "epoch": 1.4865401807887155, "grad_norm": 0.150390625, "learning_rate": 0.0006810807589152678, "loss": 0.5923, "step": 29930 }, { "epoch": 1.487036853084335, "grad_norm": 0.1591796875, "learning_rate": 0.0006810410251316182, "loss": 0.6281, "step": 29940 }, { "epoch": 1.4875335253799542, "grad_norm": 0.1591796875, "learning_rate": 0.0006810012913479686, "loss": 0.5919, "step": 29950 }, { "epoch": 1.4880301976755737, "grad_norm": 0.09619140625, "learning_rate": 0.0006809615575643192, "loss": 0.6249, "step": 29960 }, { "epoch": 1.488526869971193, "grad_norm": 0.0966796875, "learning_rate": 0.0006809218237806695, "loss": 0.5997, "step": 29970 }, { "epoch": 1.4890235422668123, "grad_norm": 0.1298828125, "learning_rate": 0.00068088208999702, "loss": 0.6269, "step": 29980 }, { "epoch": 1.4895202145624318, "grad_norm": 0.0966796875, "learning_rate": 0.0006808423562133705, "loss": 0.6076, "step": 29990 }, { "epoch": 1.490016886858051, "grad_norm": 0.1357421875, "learning_rate": 0.0006808026224297208, "loss": 0.5876, "step": 30000 }, { "epoch": 1.4905135591536705, "grad_norm": 0.0966796875, "learning_rate": 0.0006807628886460713, "loss": 0.6008, "step": 30010 }, { "epoch": 1.4910102314492897, "grad_norm": 0.1806640625, "learning_rate": 0.0006807231548624219, "loss": 0.5796, "step": 30020 }, { "epoch": 1.4915069037449091, "grad_norm": 0.1103515625, "learning_rate": 0.0006806834210787723, "loss": 0.5966, "step": 30030 }, { "epoch": 1.4920035760405286, "grad_norm": 0.0966796875, "learning_rate": 0.0006806436872951227, "loss": 0.6028, "step": 30040 }, { "epoch": 1.4925002483361478, "grad_norm": 0.11279296875, "learning_rate": 0.0006806039535114731, "loss": 0.6235, "step": 30050 }, { "epoch": 1.492996920631767, "grad_norm": 0.10302734375, "learning_rate": 0.0006805642197278236, "loss": 0.6222, "step": 30060 }, { "epoch": 1.4934935929273865, "grad_norm": 0.11328125, "learning_rate": 0.0006805244859441741, "loss": 0.5923, "step": 30070 }, { "epoch": 1.493990265223006, "grad_norm": 0.09033203125, "learning_rate": 0.0006804847521605245, "loss": 0.5916, "step": 30080 }, { "epoch": 1.4944869375186252, "grad_norm": 0.099609375, "learning_rate": 0.000680445018376875, "loss": 0.6256, "step": 30090 }, { "epoch": 1.4949836098142446, "grad_norm": 0.09423828125, "learning_rate": 0.0006804052845932254, "loss": 0.608, "step": 30100 }, { "epoch": 1.4954802821098638, "grad_norm": 0.1220703125, "learning_rate": 0.0006803655508095758, "loss": 0.5927, "step": 30110 }, { "epoch": 1.4959769544054833, "grad_norm": 0.130859375, "learning_rate": 0.0006803258170259264, "loss": 0.6394, "step": 30120 }, { "epoch": 1.4964736267011025, "grad_norm": 0.1376953125, "learning_rate": 0.0006802860832422768, "loss": 0.6129, "step": 30130 }, { "epoch": 1.496970298996722, "grad_norm": 0.0908203125, "learning_rate": 0.0006802463494586272, "loss": 0.6006, "step": 30140 }, { "epoch": 1.4974669712923414, "grad_norm": 0.134765625, "learning_rate": 0.0006802066156749777, "loss": 0.6076, "step": 30150 }, { "epoch": 1.4979636435879606, "grad_norm": 0.14453125, "learning_rate": 0.0006801668818913281, "loss": 0.6152, "step": 30160 }, { "epoch": 1.49846031588358, "grad_norm": 0.09814453125, "learning_rate": 0.0006801271481076786, "loss": 0.6272, "step": 30170 }, { "epoch": 1.4989569881791993, "grad_norm": 0.138671875, "learning_rate": 0.0006800874143240291, "loss": 0.6088, "step": 30180 }, { "epoch": 1.4994536604748188, "grad_norm": 0.1123046875, "learning_rate": 0.0006800476805403795, "loss": 0.6184, "step": 30190 }, { "epoch": 1.499950332770438, "grad_norm": 0.111328125, "learning_rate": 0.0006800079467567299, "loss": 0.594, "step": 30200 }, { "epoch": 1.5004470050660574, "grad_norm": 0.12255859375, "learning_rate": 0.0006799682129730804, "loss": 0.617, "step": 30210 }, { "epoch": 1.5009436773616769, "grad_norm": 0.09375, "learning_rate": 0.0006799284791894309, "loss": 0.6101, "step": 30220 }, { "epoch": 1.5014403496572961, "grad_norm": 0.1552734375, "learning_rate": 0.0006798887454057813, "loss": 0.6013, "step": 30230 }, { "epoch": 1.5019370219529153, "grad_norm": 0.140625, "learning_rate": 0.0006798490116221317, "loss": 0.6145, "step": 30240 }, { "epoch": 1.5024336942485348, "grad_norm": 0.158203125, "learning_rate": 0.0006798092778384822, "loss": 0.6136, "step": 30250 }, { "epoch": 1.5029303665441542, "grad_norm": 0.10400390625, "learning_rate": 0.0006797695440548326, "loss": 0.6326, "step": 30260 }, { "epoch": 1.5034270388397735, "grad_norm": 0.1220703125, "learning_rate": 0.0006797298102711831, "loss": 0.6127, "step": 30270 }, { "epoch": 1.503923711135393, "grad_norm": 0.12060546875, "learning_rate": 0.0006796900764875336, "loss": 0.5979, "step": 30280 }, { "epoch": 1.5044203834310124, "grad_norm": 0.08837890625, "learning_rate": 0.000679650342703884, "loss": 0.6082, "step": 30290 }, { "epoch": 1.5049170557266316, "grad_norm": 0.140625, "learning_rate": 0.0006796106089202344, "loss": 0.6313, "step": 30300 }, { "epoch": 1.5054137280222508, "grad_norm": 0.09375, "learning_rate": 0.0006795708751365849, "loss": 0.5912, "step": 30310 }, { "epoch": 1.5059104003178703, "grad_norm": 0.1240234375, "learning_rate": 0.0006795311413529354, "loss": 0.6078, "step": 30320 }, { "epoch": 1.5064070726134897, "grad_norm": 0.103515625, "learning_rate": 0.0006794914075692858, "loss": 0.5964, "step": 30330 }, { "epoch": 1.506903744909109, "grad_norm": 0.10546875, "learning_rate": 0.0006794516737856363, "loss": 0.5968, "step": 30340 }, { "epoch": 1.5074004172047282, "grad_norm": 0.1044921875, "learning_rate": 0.0006794119400019867, "loss": 0.6018, "step": 30350 }, { "epoch": 1.5078970895003476, "grad_norm": 0.0966796875, "learning_rate": 0.0006793722062183371, "loss": 0.5776, "step": 30360 }, { "epoch": 1.508393761795967, "grad_norm": 0.10498046875, "learning_rate": 0.0006793324724346877, "loss": 0.616, "step": 30370 }, { "epoch": 1.5088904340915863, "grad_norm": 0.1005859375, "learning_rate": 0.0006792927386510381, "loss": 0.6041, "step": 30380 }, { "epoch": 1.5093871063872057, "grad_norm": 0.142578125, "learning_rate": 0.0006792530048673885, "loss": 0.6189, "step": 30390 }, { "epoch": 1.5098837786828252, "grad_norm": 0.1220703125, "learning_rate": 0.000679213271083739, "loss": 0.6155, "step": 30400 }, { "epoch": 1.5103804509784444, "grad_norm": 0.111328125, "learning_rate": 0.0006791735373000894, "loss": 0.596, "step": 30410 }, { "epoch": 1.5108771232740636, "grad_norm": 0.10791015625, "learning_rate": 0.0006791338035164399, "loss": 0.614, "step": 30420 }, { "epoch": 1.511373795569683, "grad_norm": 0.10693359375, "learning_rate": 0.0006790940697327903, "loss": 0.5913, "step": 30430 }, { "epoch": 1.5118704678653025, "grad_norm": 0.09716796875, "learning_rate": 0.0006790543359491408, "loss": 0.6084, "step": 30440 }, { "epoch": 1.5123671401609218, "grad_norm": 0.09619140625, "learning_rate": 0.0006790146021654912, "loss": 0.5904, "step": 30450 }, { "epoch": 1.5128638124565412, "grad_norm": 0.1513671875, "learning_rate": 0.0006789748683818416, "loss": 0.5973, "step": 30460 }, { "epoch": 1.5133604847521607, "grad_norm": 0.09619140625, "learning_rate": 0.0006789351345981922, "loss": 0.6363, "step": 30470 }, { "epoch": 1.51385715704778, "grad_norm": 0.1220703125, "learning_rate": 0.0006788954008145427, "loss": 0.617, "step": 30480 }, { "epoch": 1.5143538293433991, "grad_norm": 0.11865234375, "learning_rate": 0.000678855667030893, "loss": 0.6019, "step": 30490 }, { "epoch": 1.5148505016390186, "grad_norm": 0.0966796875, "learning_rate": 0.0006788159332472435, "loss": 0.5968, "step": 30500 }, { "epoch": 1.515347173934638, "grad_norm": 0.1513671875, "learning_rate": 0.0006787761994635939, "loss": 0.6072, "step": 30510 }, { "epoch": 1.5158438462302573, "grad_norm": 0.10107421875, "learning_rate": 0.0006787364656799444, "loss": 0.6382, "step": 30520 }, { "epoch": 1.5163405185258765, "grad_norm": 0.158203125, "learning_rate": 0.0006786967318962949, "loss": 0.5835, "step": 30530 }, { "epoch": 1.516837190821496, "grad_norm": 0.091796875, "learning_rate": 0.0006786569981126453, "loss": 0.6065, "step": 30540 }, { "epoch": 1.5173338631171154, "grad_norm": 0.126953125, "learning_rate": 0.0006786172643289957, "loss": 0.6027, "step": 30550 }, { "epoch": 1.5178305354127346, "grad_norm": 0.1328125, "learning_rate": 0.0006785775305453462, "loss": 0.5881, "step": 30560 }, { "epoch": 1.518327207708354, "grad_norm": 0.0888671875, "learning_rate": 0.0006785377967616967, "loss": 0.5985, "step": 30570 }, { "epoch": 1.5188238800039735, "grad_norm": 0.09912109375, "learning_rate": 0.0006784980629780471, "loss": 0.5662, "step": 30580 }, { "epoch": 1.5193205522995927, "grad_norm": 0.154296875, "learning_rate": 0.0006784583291943976, "loss": 0.5797, "step": 30590 }, { "epoch": 1.519817224595212, "grad_norm": 0.09814453125, "learning_rate": 0.000678418595410748, "loss": 0.565, "step": 30600 }, { "epoch": 1.5203138968908314, "grad_norm": 0.14453125, "learning_rate": 0.0006783788616270985, "loss": 0.5841, "step": 30610 }, { "epoch": 1.5208105691864509, "grad_norm": 0.09912109375, "learning_rate": 0.000678339127843449, "loss": 0.6007, "step": 30620 }, { "epoch": 1.52130724148207, "grad_norm": 0.1328125, "learning_rate": 0.0006782993940597994, "loss": 0.5948, "step": 30630 }, { "epoch": 1.5218039137776895, "grad_norm": 0.09814453125, "learning_rate": 0.0006782596602761499, "loss": 0.5944, "step": 30640 }, { "epoch": 1.522300586073309, "grad_norm": 0.162109375, "learning_rate": 0.0006782199264925002, "loss": 0.6135, "step": 30650 }, { "epoch": 1.5227972583689282, "grad_norm": 0.11669921875, "learning_rate": 0.0006781801927088507, "loss": 0.6057, "step": 30660 }, { "epoch": 1.5232939306645474, "grad_norm": 0.09814453125, "learning_rate": 0.0006781404589252013, "loss": 0.6222, "step": 30670 }, { "epoch": 1.5237906029601669, "grad_norm": 0.09814453125, "learning_rate": 0.0006781007251415516, "loss": 0.5781, "step": 30680 }, { "epoch": 1.5242872752557863, "grad_norm": 0.1240234375, "learning_rate": 0.0006780609913579021, "loss": 0.6204, "step": 30690 }, { "epoch": 1.5247839475514056, "grad_norm": 0.09228515625, "learning_rate": 0.0006780212575742525, "loss": 0.6038, "step": 30700 }, { "epoch": 1.5252806198470248, "grad_norm": 0.10888671875, "learning_rate": 0.0006779815237906029, "loss": 0.6073, "step": 30710 }, { "epoch": 1.5257772921426442, "grad_norm": 0.1025390625, "learning_rate": 0.0006779417900069535, "loss": 0.6317, "step": 30720 }, { "epoch": 1.5262739644382637, "grad_norm": 0.0986328125, "learning_rate": 0.0006779020562233039, "loss": 0.6054, "step": 30730 }, { "epoch": 1.526770636733883, "grad_norm": 0.1103515625, "learning_rate": 0.0006778623224396543, "loss": 0.5999, "step": 30740 }, { "epoch": 1.5272673090295024, "grad_norm": 0.2158203125, "learning_rate": 0.0006778225886560048, "loss": 0.6027, "step": 30750 }, { "epoch": 1.5277639813251218, "grad_norm": 0.09814453125, "learning_rate": 0.0006777828548723552, "loss": 0.6004, "step": 30760 }, { "epoch": 1.528260653620741, "grad_norm": 0.111328125, "learning_rate": 0.0006777431210887058, "loss": 0.5881, "step": 30770 }, { "epoch": 1.5287573259163603, "grad_norm": 0.10888671875, "learning_rate": 0.0006777033873050562, "loss": 0.6004, "step": 30780 }, { "epoch": 1.5292539982119797, "grad_norm": 0.134765625, "learning_rate": 0.0006776636535214066, "loss": 0.6021, "step": 30790 }, { "epoch": 1.5297506705075992, "grad_norm": 0.1201171875, "learning_rate": 0.0006776239197377571, "loss": 0.6088, "step": 30800 }, { "epoch": 1.5302473428032184, "grad_norm": 0.1162109375, "learning_rate": 0.0006775841859541075, "loss": 0.5897, "step": 30810 }, { "epoch": 1.5307440150988378, "grad_norm": 0.1015625, "learning_rate": 0.000677544452170458, "loss": 0.5908, "step": 30820 }, { "epoch": 1.5312406873944573, "grad_norm": 0.126953125, "learning_rate": 0.0006775047183868085, "loss": 0.5859, "step": 30830 }, { "epoch": 1.5317373596900765, "grad_norm": 0.09423828125, "learning_rate": 0.0006774649846031588, "loss": 0.5808, "step": 30840 }, { "epoch": 1.5322340319856957, "grad_norm": 0.1591796875, "learning_rate": 0.0006774252508195093, "loss": 0.6055, "step": 30850 }, { "epoch": 1.5327307042813152, "grad_norm": 0.1611328125, "learning_rate": 0.0006773855170358598, "loss": 0.5918, "step": 30860 }, { "epoch": 1.5332273765769346, "grad_norm": 0.10888671875, "learning_rate": 0.0006773457832522101, "loss": 0.6138, "step": 30870 }, { "epoch": 1.5337240488725539, "grad_norm": 0.11962890625, "learning_rate": 0.0006773060494685607, "loss": 0.6295, "step": 30880 }, { "epoch": 1.534220721168173, "grad_norm": 0.10693359375, "learning_rate": 0.0006772663156849112, "loss": 0.6049, "step": 30890 }, { "epoch": 1.5347173934637925, "grad_norm": 0.1171875, "learning_rate": 0.0006772265819012615, "loss": 0.5895, "step": 30900 }, { "epoch": 1.535214065759412, "grad_norm": 0.1337890625, "learning_rate": 0.000677186848117612, "loss": 0.6292, "step": 30910 }, { "epoch": 1.5357107380550312, "grad_norm": 0.0947265625, "learning_rate": 0.0006771471143339625, "loss": 0.6287, "step": 30920 }, { "epoch": 1.5362074103506507, "grad_norm": 0.0908203125, "learning_rate": 0.000677107380550313, "loss": 0.5945, "step": 30930 }, { "epoch": 1.5367040826462701, "grad_norm": 0.111328125, "learning_rate": 0.0006770676467666634, "loss": 0.6321, "step": 30940 }, { "epoch": 1.5372007549418893, "grad_norm": 0.11328125, "learning_rate": 0.0006770279129830138, "loss": 0.6027, "step": 30950 }, { "epoch": 1.5376974272375086, "grad_norm": 0.09716796875, "learning_rate": 0.0006769881791993643, "loss": 0.6035, "step": 30960 }, { "epoch": 1.538194099533128, "grad_norm": 0.09912109375, "learning_rate": 0.0006769484454157148, "loss": 0.6127, "step": 30970 }, { "epoch": 1.5386907718287475, "grad_norm": 0.10791015625, "learning_rate": 0.0006769087116320652, "loss": 0.5827, "step": 30980 }, { "epoch": 1.5391874441243667, "grad_norm": 0.1025390625, "learning_rate": 0.0006768689778484157, "loss": 0.617, "step": 30990 }, { "epoch": 1.5396841164199861, "grad_norm": 0.10791015625, "learning_rate": 0.0006768292440647661, "loss": 0.6028, "step": 31000 }, { "epoch": 1.5401807887156056, "grad_norm": 0.1123046875, "learning_rate": 0.0006767895102811165, "loss": 0.5821, "step": 31010 }, { "epoch": 1.5406774610112248, "grad_norm": 0.146484375, "learning_rate": 0.0006767497764974671, "loss": 0.602, "step": 31020 }, { "epoch": 1.541174133306844, "grad_norm": 0.1044921875, "learning_rate": 0.0006767100427138174, "loss": 0.6195, "step": 31030 }, { "epoch": 1.5416708056024635, "grad_norm": 0.1083984375, "learning_rate": 0.0006766703089301679, "loss": 0.6345, "step": 31040 }, { "epoch": 1.542167477898083, "grad_norm": 0.1337890625, "learning_rate": 0.0006766305751465184, "loss": 0.6286, "step": 31050 }, { "epoch": 1.5426641501937022, "grad_norm": 0.1455078125, "learning_rate": 0.0006765908413628688, "loss": 0.607, "step": 31060 }, { "epoch": 1.5431608224893214, "grad_norm": 0.1103515625, "learning_rate": 0.0006765511075792192, "loss": 0.6033, "step": 31070 }, { "epoch": 1.5436574947849409, "grad_norm": 0.10546875, "learning_rate": 0.0006765113737955698, "loss": 0.5816, "step": 31080 }, { "epoch": 1.5441541670805603, "grad_norm": 0.09814453125, "learning_rate": 0.0006764716400119202, "loss": 0.6319, "step": 31090 }, { "epoch": 1.5446508393761795, "grad_norm": 0.1357421875, "learning_rate": 0.0006764319062282706, "loss": 0.5986, "step": 31100 }, { "epoch": 1.545147511671799, "grad_norm": 0.1494140625, "learning_rate": 0.000676392172444621, "loss": 0.6509, "step": 31110 }, { "epoch": 1.5456441839674184, "grad_norm": 0.1171875, "learning_rate": 0.0006763524386609716, "loss": 0.5765, "step": 31120 }, { "epoch": 1.5461408562630377, "grad_norm": 0.1787109375, "learning_rate": 0.000676312704877322, "loss": 0.5904, "step": 31130 }, { "epoch": 1.5466375285586569, "grad_norm": 0.17578125, "learning_rate": 0.0006762729710936724, "loss": 0.5956, "step": 31140 }, { "epoch": 1.5471342008542763, "grad_norm": 0.08984375, "learning_rate": 0.0006762332373100229, "loss": 0.5829, "step": 31150 }, { "epoch": 1.5476308731498958, "grad_norm": 0.125, "learning_rate": 0.0006761935035263733, "loss": 0.5784, "step": 31160 }, { "epoch": 1.548127545445515, "grad_norm": 0.15234375, "learning_rate": 0.0006761537697427237, "loss": 0.6014, "step": 31170 }, { "epoch": 1.5486242177411342, "grad_norm": 0.09033203125, "learning_rate": 0.0006761140359590743, "loss": 0.6188, "step": 31180 }, { "epoch": 1.549120890036754, "grad_norm": 0.09912109375, "learning_rate": 0.0006760743021754247, "loss": 0.6035, "step": 31190 }, { "epoch": 1.5496175623323731, "grad_norm": 0.103515625, "learning_rate": 0.0006760345683917751, "loss": 0.5998, "step": 31200 }, { "epoch": 1.5501142346279924, "grad_norm": 0.09326171875, "learning_rate": 0.0006759948346081256, "loss": 0.5868, "step": 31210 }, { "epoch": 1.5506109069236118, "grad_norm": 0.146484375, "learning_rate": 0.000675955100824476, "loss": 0.6067, "step": 31220 }, { "epoch": 1.5511075792192313, "grad_norm": 0.10205078125, "learning_rate": 0.0006759153670408265, "loss": 0.6151, "step": 31230 }, { "epoch": 1.5516042515148505, "grad_norm": 0.109375, "learning_rate": 0.000675875633257177, "loss": 0.6484, "step": 31240 }, { "epoch": 1.5521009238104697, "grad_norm": 0.1572265625, "learning_rate": 0.0006758358994735274, "loss": 0.6143, "step": 31250 }, { "epoch": 1.5525975961060892, "grad_norm": 0.11181640625, "learning_rate": 0.0006757961656898778, "loss": 0.5907, "step": 31260 }, { "epoch": 1.5530942684017086, "grad_norm": 0.22265625, "learning_rate": 0.0006757564319062284, "loss": 0.6016, "step": 31270 }, { "epoch": 1.5535909406973278, "grad_norm": 0.20703125, "learning_rate": 0.0006757166981225788, "loss": 0.6084, "step": 31280 }, { "epoch": 1.5540876129929473, "grad_norm": 0.09521484375, "learning_rate": 0.0006756769643389292, "loss": 0.6154, "step": 31290 }, { "epoch": 1.5545842852885667, "grad_norm": 0.0986328125, "learning_rate": 0.0006756372305552796, "loss": 0.6008, "step": 31300 }, { "epoch": 1.555080957584186, "grad_norm": 0.09326171875, "learning_rate": 0.0006755974967716301, "loss": 0.5986, "step": 31310 }, { "epoch": 1.5555776298798052, "grad_norm": 0.1171875, "learning_rate": 0.0006755577629879805, "loss": 0.5987, "step": 31320 }, { "epoch": 1.5560743021754246, "grad_norm": 0.09326171875, "learning_rate": 0.000675518029204331, "loss": 0.5984, "step": 31330 }, { "epoch": 1.556570974471044, "grad_norm": 0.16015625, "learning_rate": 0.0006754782954206815, "loss": 0.5834, "step": 31340 }, { "epoch": 1.5570676467666633, "grad_norm": 0.1181640625, "learning_rate": 0.0006754385616370319, "loss": 0.6261, "step": 31350 }, { "epoch": 1.5575643190622825, "grad_norm": 0.10302734375, "learning_rate": 0.0006753988278533823, "loss": 0.5748, "step": 31360 }, { "epoch": 1.5580609913579022, "grad_norm": 0.10986328125, "learning_rate": 0.0006753590940697328, "loss": 0.5874, "step": 31370 }, { "epoch": 1.5585576636535214, "grad_norm": 0.126953125, "learning_rate": 0.0006753193602860833, "loss": 0.5873, "step": 31380 }, { "epoch": 1.5590543359491407, "grad_norm": 0.08837890625, "learning_rate": 0.0006752796265024337, "loss": 0.6083, "step": 31390 }, { "epoch": 1.5595510082447601, "grad_norm": 0.09716796875, "learning_rate": 0.0006752398927187842, "loss": 0.5917, "step": 31400 }, { "epoch": 1.5600476805403796, "grad_norm": 0.14453125, "learning_rate": 0.0006752001589351346, "loss": 0.6035, "step": 31410 }, { "epoch": 1.5605443528359988, "grad_norm": 0.10400390625, "learning_rate": 0.000675160425151485, "loss": 0.5833, "step": 31420 }, { "epoch": 1.561041025131618, "grad_norm": 0.10009765625, "learning_rate": 0.0006751206913678356, "loss": 0.5765, "step": 31430 }, { "epoch": 1.5615376974272375, "grad_norm": 0.1103515625, "learning_rate": 0.000675080957584186, "loss": 0.5868, "step": 31440 }, { "epoch": 1.562034369722857, "grad_norm": 0.115234375, "learning_rate": 0.0006750412238005364, "loss": 0.6094, "step": 31450 }, { "epoch": 1.5625310420184761, "grad_norm": 0.154296875, "learning_rate": 0.0006750014900168869, "loss": 0.5841, "step": 31460 }, { "epoch": 1.5630277143140956, "grad_norm": 0.1103515625, "learning_rate": 0.0006749617562332373, "loss": 0.5807, "step": 31470 }, { "epoch": 1.563524386609715, "grad_norm": 0.1005859375, "learning_rate": 0.0006749220224495878, "loss": 0.5957, "step": 31480 }, { "epoch": 1.5640210589053343, "grad_norm": 0.11181640625, "learning_rate": 0.0006748822886659383, "loss": 0.6276, "step": 31490 }, { "epoch": 1.5645177312009535, "grad_norm": 0.11181640625, "learning_rate": 0.0006748425548822887, "loss": 0.5916, "step": 31500 }, { "epoch": 1.565014403496573, "grad_norm": 0.1513671875, "learning_rate": 0.0006748028210986392, "loss": 0.5754, "step": 31510 }, { "epoch": 1.5655110757921924, "grad_norm": 0.10009765625, "learning_rate": 0.0006747630873149895, "loss": 0.6124, "step": 31520 }, { "epoch": 1.5660077480878116, "grad_norm": 0.10791015625, "learning_rate": 0.0006747233535313401, "loss": 0.579, "step": 31530 }, { "epoch": 1.5665044203834309, "grad_norm": 0.0908203125, "learning_rate": 0.0006746836197476906, "loss": 0.6369, "step": 31540 }, { "epoch": 1.5670010926790505, "grad_norm": 0.0947265625, "learning_rate": 0.0006746438859640409, "loss": 0.6231, "step": 31550 }, { "epoch": 1.5674977649746697, "grad_norm": 0.12451171875, "learning_rate": 0.0006746041521803914, "loss": 0.6074, "step": 31560 }, { "epoch": 1.567994437270289, "grad_norm": 0.09619140625, "learning_rate": 0.0006745644183967418, "loss": 0.6241, "step": 31570 }, { "epoch": 1.5684911095659084, "grad_norm": 0.1220703125, "learning_rate": 0.0006745246846130923, "loss": 0.5816, "step": 31580 }, { "epoch": 1.5689877818615279, "grad_norm": 0.13671875, "learning_rate": 0.0006744849508294428, "loss": 0.6155, "step": 31590 }, { "epoch": 1.569484454157147, "grad_norm": 0.11181640625, "learning_rate": 0.0006744452170457932, "loss": 0.6011, "step": 31600 }, { "epoch": 1.5699811264527663, "grad_norm": 0.10986328125, "learning_rate": 0.0006744054832621436, "loss": 0.6116, "step": 31610 }, { "epoch": 1.5704777987483858, "grad_norm": 0.1484375, "learning_rate": 0.0006743657494784941, "loss": 0.5968, "step": 31620 }, { "epoch": 1.5709744710440052, "grad_norm": 0.119140625, "learning_rate": 0.0006743260156948446, "loss": 0.5831, "step": 31630 }, { "epoch": 1.5714711433396245, "grad_norm": 0.1259765625, "learning_rate": 0.000674286281911195, "loss": 0.614, "step": 31640 }, { "epoch": 1.571967815635244, "grad_norm": 0.1279296875, "learning_rate": 0.0006742465481275455, "loss": 0.5724, "step": 31650 }, { "epoch": 1.5724644879308634, "grad_norm": 0.1396484375, "learning_rate": 0.0006742068143438959, "loss": 0.5828, "step": 31660 }, { "epoch": 1.5729611602264826, "grad_norm": 0.10400390625, "learning_rate": 0.0006741670805602464, "loss": 0.5952, "step": 31670 }, { "epoch": 1.5734578325221018, "grad_norm": 0.1396484375, "learning_rate": 0.0006741273467765969, "loss": 0.6089, "step": 31680 }, { "epoch": 1.5739545048177213, "grad_norm": 0.11572265625, "learning_rate": 0.0006740876129929473, "loss": 0.6162, "step": 31690 }, { "epoch": 1.5744511771133407, "grad_norm": 0.1015625, "learning_rate": 0.0006740478792092978, "loss": 0.5958, "step": 31700 }, { "epoch": 1.57494784940896, "grad_norm": 0.1259765625, "learning_rate": 0.0006740081454256481, "loss": 0.5772, "step": 31710 }, { "epoch": 1.5754445217045792, "grad_norm": 0.09521484375, "learning_rate": 0.0006739684116419986, "loss": 0.594, "step": 31720 }, { "epoch": 1.5759411940001988, "grad_norm": 0.1376953125, "learning_rate": 0.0006739286778583492, "loss": 0.6195, "step": 31730 }, { "epoch": 1.576437866295818, "grad_norm": 0.0986328125, "learning_rate": 0.0006738889440746995, "loss": 0.6125, "step": 31740 }, { "epoch": 1.5769345385914373, "grad_norm": 0.0986328125, "learning_rate": 0.00067384921029105, "loss": 0.6229, "step": 31750 }, { "epoch": 1.5774312108870567, "grad_norm": 0.1376953125, "learning_rate": 0.0006738094765074005, "loss": 0.5876, "step": 31760 }, { "epoch": 1.5779278831826762, "grad_norm": 0.09716796875, "learning_rate": 0.0006737697427237508, "loss": 0.5819, "step": 31770 }, { "epoch": 1.5784245554782954, "grad_norm": 0.11572265625, "learning_rate": 0.0006737300089401014, "loss": 0.59, "step": 31780 }, { "epoch": 1.5789212277739146, "grad_norm": 0.0966796875, "learning_rate": 0.0006736902751564518, "loss": 0.6214, "step": 31790 }, { "epoch": 1.579417900069534, "grad_norm": 0.123046875, "learning_rate": 0.0006736505413728022, "loss": 0.6092, "step": 31800 }, { "epoch": 1.5799145723651535, "grad_norm": 0.08935546875, "learning_rate": 0.0006736108075891527, "loss": 0.601, "step": 31810 }, { "epoch": 1.5804112446607728, "grad_norm": 0.09521484375, "learning_rate": 0.0006735710738055031, "loss": 0.6004, "step": 31820 }, { "epoch": 1.5809079169563922, "grad_norm": 0.1806640625, "learning_rate": 0.0006735313400218537, "loss": 0.5944, "step": 31830 }, { "epoch": 1.5814045892520117, "grad_norm": 0.0927734375, "learning_rate": 0.0006734916062382041, "loss": 0.6069, "step": 31840 }, { "epoch": 1.5819012615476309, "grad_norm": 0.12890625, "learning_rate": 0.0006734518724545545, "loss": 0.5988, "step": 31850 }, { "epoch": 1.5823979338432501, "grad_norm": 0.138671875, "learning_rate": 0.000673412138670905, "loss": 0.5958, "step": 31860 }, { "epoch": 1.5828946061388696, "grad_norm": 0.091796875, "learning_rate": 0.0006733724048872554, "loss": 0.5858, "step": 31870 }, { "epoch": 1.583391278434489, "grad_norm": 0.10205078125, "learning_rate": 0.0006733326711036059, "loss": 0.6077, "step": 31880 }, { "epoch": 1.5838879507301082, "grad_norm": 0.08984375, "learning_rate": 0.0006732929373199564, "loss": 0.5997, "step": 31890 }, { "epoch": 1.5843846230257275, "grad_norm": 0.1171875, "learning_rate": 0.0006732532035363067, "loss": 0.6122, "step": 31900 }, { "epoch": 1.5848812953213471, "grad_norm": 0.08984375, "learning_rate": 0.0006732134697526572, "loss": 0.5781, "step": 31910 }, { "epoch": 1.5853779676169664, "grad_norm": 0.09814453125, "learning_rate": 0.0006731737359690077, "loss": 0.5809, "step": 31920 }, { "epoch": 1.5858746399125856, "grad_norm": 0.10400390625, "learning_rate": 0.000673134002185358, "loss": 0.6108, "step": 31930 }, { "epoch": 1.586371312208205, "grad_norm": 0.1005859375, "learning_rate": 0.0006730942684017086, "loss": 0.604, "step": 31940 }, { "epoch": 1.5868679845038245, "grad_norm": 0.1025390625, "learning_rate": 0.0006730545346180591, "loss": 0.6119, "step": 31950 }, { "epoch": 1.5873646567994437, "grad_norm": 0.162109375, "learning_rate": 0.0006730148008344095, "loss": 0.5925, "step": 31960 }, { "epoch": 1.587861329095063, "grad_norm": 0.103515625, "learning_rate": 0.0006729750670507599, "loss": 0.588, "step": 31970 }, { "epoch": 1.5883580013906824, "grad_norm": 0.1376953125, "learning_rate": 0.0006729353332671104, "loss": 0.6016, "step": 31980 }, { "epoch": 1.5888546736863018, "grad_norm": 0.140625, "learning_rate": 0.0006728955994834609, "loss": 0.6076, "step": 31990 }, { "epoch": 1.589351345981921, "grad_norm": 0.09521484375, "learning_rate": 0.0006728558656998113, "loss": 0.6245, "step": 32000 }, { "epoch": 1.5898480182775405, "grad_norm": 0.11865234375, "learning_rate": 0.0006728161319161617, "loss": 0.5935, "step": 32010 }, { "epoch": 1.59034469057316, "grad_norm": 0.134765625, "learning_rate": 0.0006727763981325122, "loss": 0.6305, "step": 32020 }, { "epoch": 1.5908413628687792, "grad_norm": 0.1162109375, "learning_rate": 0.0006727366643488627, "loss": 0.5974, "step": 32030 }, { "epoch": 1.5913380351643984, "grad_norm": 0.08984375, "learning_rate": 0.0006726969305652131, "loss": 0.5709, "step": 32040 }, { "epoch": 1.5918347074600179, "grad_norm": 0.126953125, "learning_rate": 0.0006726571967815636, "loss": 0.5996, "step": 32050 }, { "epoch": 1.5923313797556373, "grad_norm": 0.09619140625, "learning_rate": 0.000672617462997914, "loss": 0.6167, "step": 32060 }, { "epoch": 1.5928280520512565, "grad_norm": 0.0869140625, "learning_rate": 0.0006725777292142644, "loss": 0.6354, "step": 32070 }, { "epoch": 1.5933247243468758, "grad_norm": 0.0986328125, "learning_rate": 0.000672537995430615, "loss": 0.5934, "step": 32080 }, { "epoch": 1.5938213966424954, "grad_norm": 0.09423828125, "learning_rate": 0.0006724982616469653, "loss": 0.5804, "step": 32090 }, { "epoch": 1.5943180689381147, "grad_norm": 0.10693359375, "learning_rate": 0.0006724585278633158, "loss": 0.5837, "step": 32100 }, { "epoch": 1.594814741233734, "grad_norm": 0.0927734375, "learning_rate": 0.0006724187940796663, "loss": 0.5765, "step": 32110 }, { "epoch": 1.5953114135293534, "grad_norm": 0.10107421875, "learning_rate": 0.0006723790602960167, "loss": 0.6098, "step": 32120 }, { "epoch": 1.5958080858249728, "grad_norm": 0.126953125, "learning_rate": 0.0006723393265123672, "loss": 0.5863, "step": 32130 }, { "epoch": 1.596304758120592, "grad_norm": 0.09716796875, "learning_rate": 0.0006722995927287177, "loss": 0.5996, "step": 32140 }, { "epoch": 1.5968014304162113, "grad_norm": 0.171875, "learning_rate": 0.0006722598589450681, "loss": 0.6006, "step": 32150 }, { "epoch": 1.5972981027118307, "grad_norm": 0.1005859375, "learning_rate": 0.0006722201251614185, "loss": 0.5707, "step": 32160 }, { "epoch": 1.5977947750074502, "grad_norm": 0.10205078125, "learning_rate": 0.0006721803913777689, "loss": 0.5769, "step": 32170 }, { "epoch": 1.5982914473030694, "grad_norm": 0.10205078125, "learning_rate": 0.0006721406575941195, "loss": 0.607, "step": 32180 }, { "epoch": 1.5987881195986888, "grad_norm": 0.1005859375, "learning_rate": 0.0006721009238104699, "loss": 0.5976, "step": 32190 }, { "epoch": 1.5992847918943083, "grad_norm": 0.1123046875, "learning_rate": 0.0006720611900268203, "loss": 0.6053, "step": 32200 }, { "epoch": 1.5997814641899275, "grad_norm": 0.09814453125, "learning_rate": 0.0006720214562431708, "loss": 0.6023, "step": 32210 }, { "epoch": 1.6002781364855467, "grad_norm": 0.111328125, "learning_rate": 0.0006719817224595212, "loss": 0.6075, "step": 32220 }, { "epoch": 1.6007748087811662, "grad_norm": 0.10888671875, "learning_rate": 0.0006719419886758716, "loss": 0.5845, "step": 32230 }, { "epoch": 1.6012714810767856, "grad_norm": 0.1044921875, "learning_rate": 0.0006719022548922222, "loss": 0.6106, "step": 32240 }, { "epoch": 1.6017681533724049, "grad_norm": 0.1015625, "learning_rate": 0.0006718625211085726, "loss": 0.6036, "step": 32250 }, { "epoch": 1.602264825668024, "grad_norm": 0.171875, "learning_rate": 0.000671822787324923, "loss": 0.6072, "step": 32260 }, { "epoch": 1.6027614979636435, "grad_norm": 0.0927734375, "learning_rate": 0.0006717830535412735, "loss": 0.5904, "step": 32270 }, { "epoch": 1.603258170259263, "grad_norm": 0.11865234375, "learning_rate": 0.000671743319757624, "loss": 0.5886, "step": 32280 }, { "epoch": 1.6037548425548822, "grad_norm": 0.09765625, "learning_rate": 0.0006717035859739744, "loss": 0.6086, "step": 32290 }, { "epoch": 1.6042515148505017, "grad_norm": 0.12060546875, "learning_rate": 0.0006716638521903249, "loss": 0.6034, "step": 32300 }, { "epoch": 1.604748187146121, "grad_norm": 0.185546875, "learning_rate": 0.0006716241184066753, "loss": 0.6138, "step": 32310 }, { "epoch": 1.6052448594417403, "grad_norm": 0.1591796875, "learning_rate": 0.0006715843846230257, "loss": 0.614, "step": 32320 }, { "epoch": 1.6057415317373596, "grad_norm": 0.11962890625, "learning_rate": 0.0006715446508393763, "loss": 0.5995, "step": 32330 }, { "epoch": 1.606238204032979, "grad_norm": 0.099609375, "learning_rate": 0.0006715049170557267, "loss": 0.5998, "step": 32340 }, { "epoch": 1.6067348763285985, "grad_norm": 0.09326171875, "learning_rate": 0.0006714651832720771, "loss": 0.5739, "step": 32350 }, { "epoch": 1.6072315486242177, "grad_norm": 0.1240234375, "learning_rate": 0.0006714254494884275, "loss": 0.5946, "step": 32360 }, { "epoch": 1.6077282209198371, "grad_norm": 0.11865234375, "learning_rate": 0.000671385715704778, "loss": 0.5991, "step": 32370 }, { "epoch": 1.6082248932154566, "grad_norm": 0.10888671875, "learning_rate": 0.0006713459819211284, "loss": 0.612, "step": 32380 }, { "epoch": 1.6087215655110758, "grad_norm": 0.107421875, "learning_rate": 0.0006713062481374789, "loss": 0.5953, "step": 32390 }, { "epoch": 1.609218237806695, "grad_norm": 0.0927734375, "learning_rate": 0.0006712665143538294, "loss": 0.5863, "step": 32400 }, { "epoch": 1.6097149101023145, "grad_norm": 0.1298828125, "learning_rate": 0.0006712267805701799, "loss": 0.5835, "step": 32410 }, { "epoch": 1.610211582397934, "grad_norm": 0.138671875, "learning_rate": 0.0006711870467865302, "loss": 0.6002, "step": 32420 }, { "epoch": 1.6107082546935532, "grad_norm": 0.09716796875, "learning_rate": 0.0006711473130028808, "loss": 0.5809, "step": 32430 }, { "epoch": 1.6112049269891724, "grad_norm": 0.10205078125, "learning_rate": 0.0006711075792192312, "loss": 0.6159, "step": 32440 }, { "epoch": 1.6117015992847918, "grad_norm": 0.1513671875, "learning_rate": 0.0006710678454355816, "loss": 0.6001, "step": 32450 }, { "epoch": 1.6121982715804113, "grad_norm": 0.10205078125, "learning_rate": 0.0006710281116519321, "loss": 0.625, "step": 32460 }, { "epoch": 1.6126949438760305, "grad_norm": 0.10009765625, "learning_rate": 0.0006709883778682825, "loss": 0.6024, "step": 32470 }, { "epoch": 1.61319161617165, "grad_norm": 0.10546875, "learning_rate": 0.000670948644084633, "loss": 0.5837, "step": 32480 }, { "epoch": 1.6136882884672694, "grad_norm": 0.1357421875, "learning_rate": 0.0006709089103009835, "loss": 0.5987, "step": 32490 }, { "epoch": 1.6141849607628886, "grad_norm": 0.107421875, "learning_rate": 0.0006708691765173339, "loss": 0.5843, "step": 32500 }, { "epoch": 1.6146816330585079, "grad_norm": 0.08642578125, "learning_rate": 0.0006708294427336843, "loss": 0.5891, "step": 32510 }, { "epoch": 1.6151783053541273, "grad_norm": 0.11328125, "learning_rate": 0.0006707897089500348, "loss": 0.593, "step": 32520 }, { "epoch": 1.6156749776497468, "grad_norm": 0.0986328125, "learning_rate": 0.0006707499751663852, "loss": 0.6336, "step": 32530 }, { "epoch": 1.616171649945366, "grad_norm": 0.1240234375, "learning_rate": 0.0006707102413827357, "loss": 0.5974, "step": 32540 }, { "epoch": 1.6166683222409854, "grad_norm": 0.130859375, "learning_rate": 0.0006706705075990862, "loss": 0.5764, "step": 32550 }, { "epoch": 1.617164994536605, "grad_norm": 0.1337890625, "learning_rate": 0.0006706307738154366, "loss": 0.6318, "step": 32560 }, { "epoch": 1.6176616668322241, "grad_norm": 0.11474609375, "learning_rate": 0.0006705910400317871, "loss": 0.5853, "step": 32570 }, { "epoch": 1.6181583391278433, "grad_norm": 0.09765625, "learning_rate": 0.0006705513062481374, "loss": 0.5914, "step": 32580 }, { "epoch": 1.6186550114234628, "grad_norm": 0.1005859375, "learning_rate": 0.000670511572464488, "loss": 0.5874, "step": 32590 }, { "epoch": 1.6191516837190822, "grad_norm": 0.09375, "learning_rate": 0.0006704718386808385, "loss": 0.5953, "step": 32600 }, { "epoch": 1.6196483560147015, "grad_norm": 0.138671875, "learning_rate": 0.0006704321048971888, "loss": 0.5655, "step": 32610 }, { "epoch": 1.6201450283103207, "grad_norm": 0.09130859375, "learning_rate": 0.0006703923711135393, "loss": 0.6293, "step": 32620 }, { "epoch": 1.6206417006059401, "grad_norm": 0.09619140625, "learning_rate": 0.0006703526373298897, "loss": 0.5849, "step": 32630 }, { "epoch": 1.6211383729015596, "grad_norm": 0.09423828125, "learning_rate": 0.0006703129035462402, "loss": 0.5957, "step": 32640 }, { "epoch": 1.6216350451971788, "grad_norm": 0.154296875, "learning_rate": 0.0006702731697625907, "loss": 0.5986, "step": 32650 }, { "epoch": 1.6221317174927983, "grad_norm": 0.12890625, "learning_rate": 0.0006702334359789411, "loss": 0.6059, "step": 32660 }, { "epoch": 1.6226283897884177, "grad_norm": 0.1005859375, "learning_rate": 0.0006701937021952915, "loss": 0.6147, "step": 32670 }, { "epoch": 1.623125062084037, "grad_norm": 0.140625, "learning_rate": 0.000670153968411642, "loss": 0.602, "step": 32680 }, { "epoch": 1.6236217343796562, "grad_norm": 0.10498046875, "learning_rate": 0.0006701142346279925, "loss": 0.5796, "step": 32690 }, { "epoch": 1.6241184066752756, "grad_norm": 0.1357421875, "learning_rate": 0.000670074500844343, "loss": 0.6129, "step": 32700 }, { "epoch": 1.624615078970895, "grad_norm": 0.1064453125, "learning_rate": 0.0006700347670606934, "loss": 0.5866, "step": 32710 }, { "epoch": 1.6251117512665143, "grad_norm": 0.1201171875, "learning_rate": 0.0006699950332770438, "loss": 0.609, "step": 32720 }, { "epoch": 1.6256084235621338, "grad_norm": 0.10400390625, "learning_rate": 0.0006699552994933944, "loss": 0.639, "step": 32730 }, { "epoch": 1.6261050958577532, "grad_norm": 0.125, "learning_rate": 0.0006699155657097448, "loss": 0.6427, "step": 32740 }, { "epoch": 1.6266017681533724, "grad_norm": 0.0908203125, "learning_rate": 0.0006698758319260952, "loss": 0.5993, "step": 32750 }, { "epoch": 1.6270984404489917, "grad_norm": 0.10400390625, "learning_rate": 0.0006698360981424457, "loss": 0.5942, "step": 32760 }, { "epoch": 1.627595112744611, "grad_norm": 0.10009765625, "learning_rate": 0.000669796364358796, "loss": 0.5955, "step": 32770 }, { "epoch": 1.6280917850402306, "grad_norm": 0.13671875, "learning_rate": 0.0006697566305751465, "loss": 0.6021, "step": 32780 }, { "epoch": 1.6285884573358498, "grad_norm": 0.09228515625, "learning_rate": 0.0006697168967914971, "loss": 0.608, "step": 32790 }, { "epoch": 1.629085129631469, "grad_norm": 0.1162109375, "learning_rate": 0.0006696771630078474, "loss": 0.572, "step": 32800 }, { "epoch": 1.6295818019270885, "grad_norm": 0.0986328125, "learning_rate": 0.0006696374292241979, "loss": 0.6285, "step": 32810 }, { "epoch": 1.630078474222708, "grad_norm": 0.10205078125, "learning_rate": 0.0006695976954405484, "loss": 0.5893, "step": 32820 }, { "epoch": 1.6305751465183271, "grad_norm": 0.1279296875, "learning_rate": 0.0006695579616568987, "loss": 0.5894, "step": 32830 }, { "epoch": 1.6310718188139466, "grad_norm": 0.1103515625, "learning_rate": 0.0006695182278732493, "loss": 0.6047, "step": 32840 }, { "epoch": 1.631568491109566, "grad_norm": 0.2060546875, "learning_rate": 0.0006694784940895997, "loss": 0.6013, "step": 32850 }, { "epoch": 1.6320651634051853, "grad_norm": 0.12060546875, "learning_rate": 0.0006694387603059502, "loss": 0.6161, "step": 32860 }, { "epoch": 1.6325618357008045, "grad_norm": 0.1416015625, "learning_rate": 0.0006693990265223006, "loss": 0.583, "step": 32870 }, { "epoch": 1.633058507996424, "grad_norm": 0.11181640625, "learning_rate": 0.000669359292738651, "loss": 0.6175, "step": 32880 }, { "epoch": 1.6335551802920434, "grad_norm": 0.154296875, "learning_rate": 0.0006693195589550016, "loss": 0.5876, "step": 32890 }, { "epoch": 1.6340518525876626, "grad_norm": 0.08935546875, "learning_rate": 0.000669279825171352, "loss": 0.5955, "step": 32900 }, { "epoch": 1.634548524883282, "grad_norm": 0.09228515625, "learning_rate": 0.0006692400913877024, "loss": 0.5851, "step": 32910 }, { "epoch": 1.6350451971789015, "grad_norm": 0.19921875, "learning_rate": 0.0006692003576040529, "loss": 0.6354, "step": 32920 }, { "epoch": 1.6355418694745207, "grad_norm": 0.10107421875, "learning_rate": 0.0006691606238204033, "loss": 0.5891, "step": 32930 }, { "epoch": 1.63603854177014, "grad_norm": 0.12890625, "learning_rate": 0.0006691208900367538, "loss": 0.6053, "step": 32940 }, { "epoch": 1.6365352140657594, "grad_norm": 0.1015625, "learning_rate": 0.0006690811562531043, "loss": 0.5908, "step": 32950 }, { "epoch": 1.6370318863613789, "grad_norm": 0.171875, "learning_rate": 0.0006690414224694546, "loss": 0.5854, "step": 32960 }, { "epoch": 1.637528558656998, "grad_norm": 0.1318359375, "learning_rate": 0.0006690016886858051, "loss": 0.5946, "step": 32970 }, { "epoch": 1.6380252309526173, "grad_norm": 0.09130859375, "learning_rate": 0.0006689619549021556, "loss": 0.6006, "step": 32980 }, { "epoch": 1.6385219032482368, "grad_norm": 0.142578125, "learning_rate": 0.000668922221118506, "loss": 0.5873, "step": 32990 }, { "epoch": 1.6390185755438562, "grad_norm": 0.138671875, "learning_rate": 0.0006688824873348565, "loss": 0.6015, "step": 33000 }, { "epoch": 1.6395152478394754, "grad_norm": 0.099609375, "learning_rate": 0.000668842753551207, "loss": 0.5932, "step": 33010 }, { "epoch": 1.640011920135095, "grad_norm": 0.1474609375, "learning_rate": 0.0006688030197675574, "loss": 0.601, "step": 33020 }, { "epoch": 1.6405085924307143, "grad_norm": 0.1005859375, "learning_rate": 0.0006687632859839078, "loss": 0.5883, "step": 33030 }, { "epoch": 1.6410052647263336, "grad_norm": 0.0947265625, "learning_rate": 0.0006687235522002583, "loss": 0.619, "step": 33040 }, { "epoch": 1.6415019370219528, "grad_norm": 0.1640625, "learning_rate": 0.0006686838184166088, "loss": 0.5785, "step": 33050 }, { "epoch": 1.6419986093175722, "grad_norm": 0.17578125, "learning_rate": 0.0006686440846329592, "loss": 0.6193, "step": 33060 }, { "epoch": 1.6424952816131917, "grad_norm": 0.1064453125, "learning_rate": 0.0006686043508493096, "loss": 0.6014, "step": 33070 }, { "epoch": 1.642991953908811, "grad_norm": 0.0986328125, "learning_rate": 0.0006685646170656601, "loss": 0.6019, "step": 33080 }, { "epoch": 1.6434886262044304, "grad_norm": 0.09716796875, "learning_rate": 0.0006685248832820106, "loss": 0.5937, "step": 33090 }, { "epoch": 1.6439852985000498, "grad_norm": 0.11474609375, "learning_rate": 0.000668485149498361, "loss": 0.6061, "step": 33100 }, { "epoch": 1.644481970795669, "grad_norm": 0.11328125, "learning_rate": 0.0006684454157147115, "loss": 0.571, "step": 33110 }, { "epoch": 1.6449786430912883, "grad_norm": 0.12451171875, "learning_rate": 0.0006684056819310619, "loss": 0.6038, "step": 33120 }, { "epoch": 1.6454753153869077, "grad_norm": 0.107421875, "learning_rate": 0.0006683659481474123, "loss": 0.5675, "step": 33130 }, { "epoch": 1.6459719876825272, "grad_norm": 0.119140625, "learning_rate": 0.0006683262143637629, "loss": 0.6265, "step": 33140 }, { "epoch": 1.6464686599781464, "grad_norm": 0.11376953125, "learning_rate": 0.0006682864805801133, "loss": 0.6078, "step": 33150 }, { "epoch": 1.6469653322737656, "grad_norm": 0.09619140625, "learning_rate": 0.0006682467467964637, "loss": 0.5742, "step": 33160 }, { "epoch": 1.647462004569385, "grad_norm": 0.111328125, "learning_rate": 0.0006682070130128142, "loss": 0.5663, "step": 33170 }, { "epoch": 1.6479586768650045, "grad_norm": 0.169921875, "learning_rate": 0.0006681672792291646, "loss": 0.5814, "step": 33180 }, { "epoch": 1.6484553491606238, "grad_norm": 0.099609375, "learning_rate": 0.0006681275454455151, "loss": 0.6041, "step": 33190 }, { "epoch": 1.6489520214562432, "grad_norm": 0.1572265625, "learning_rate": 0.0006680878116618656, "loss": 0.5971, "step": 33200 }, { "epoch": 1.6494486937518626, "grad_norm": 0.12890625, "learning_rate": 0.000668048077878216, "loss": 0.5873, "step": 33210 }, { "epoch": 1.6499453660474819, "grad_norm": 0.09521484375, "learning_rate": 0.0006680083440945664, "loss": 0.5809, "step": 33220 }, { "epoch": 1.650442038343101, "grad_norm": 0.1015625, "learning_rate": 0.0006679686103109168, "loss": 0.5429, "step": 33230 }, { "epoch": 1.6509387106387206, "grad_norm": 0.09130859375, "learning_rate": 0.0006679288765272674, "loss": 0.5753, "step": 33240 }, { "epoch": 1.65143538293434, "grad_norm": 0.12890625, "learning_rate": 0.0006678891427436178, "loss": 0.5938, "step": 33250 }, { "epoch": 1.6519320552299592, "grad_norm": 0.10302734375, "learning_rate": 0.0006678494089599682, "loss": 0.6029, "step": 33260 }, { "epoch": 1.6524287275255787, "grad_norm": 0.11767578125, "learning_rate": 0.0006678096751763187, "loss": 0.5796, "step": 33270 }, { "epoch": 1.6529253998211981, "grad_norm": 0.10302734375, "learning_rate": 0.0006677699413926691, "loss": 0.5814, "step": 33280 }, { "epoch": 1.6534220721168174, "grad_norm": 0.0869140625, "learning_rate": 0.0006677302076090196, "loss": 0.5828, "step": 33290 }, { "epoch": 1.6539187444124366, "grad_norm": 0.10546875, "learning_rate": 0.0006676904738253701, "loss": 0.5606, "step": 33300 }, { "epoch": 1.654415416708056, "grad_norm": 0.12060546875, "learning_rate": 0.0006676507400417205, "loss": 0.6069, "step": 33310 }, { "epoch": 1.6549120890036755, "grad_norm": 0.11181640625, "learning_rate": 0.0006676110062580709, "loss": 0.5975, "step": 33320 }, { "epoch": 1.6554087612992947, "grad_norm": 0.126953125, "learning_rate": 0.0006675712724744214, "loss": 0.587, "step": 33330 }, { "epoch": 1.655905433594914, "grad_norm": 0.10009765625, "learning_rate": 0.0006675315386907719, "loss": 0.576, "step": 33340 }, { "epoch": 1.6564021058905334, "grad_norm": 0.228515625, "learning_rate": 0.0006674918049071223, "loss": 0.6055, "step": 33350 }, { "epoch": 1.6568987781861528, "grad_norm": 0.134765625, "learning_rate": 0.0006674520711234728, "loss": 0.5697, "step": 33360 }, { "epoch": 1.657395450481772, "grad_norm": 0.107421875, "learning_rate": 0.0006674123373398232, "loss": 0.5907, "step": 33370 }, { "epoch": 1.6578921227773915, "grad_norm": 0.10986328125, "learning_rate": 0.0006673726035561736, "loss": 0.5925, "step": 33380 }, { "epoch": 1.658388795073011, "grad_norm": 0.1064453125, "learning_rate": 0.0006673328697725242, "loss": 0.6118, "step": 33390 }, { "epoch": 1.6588854673686302, "grad_norm": 0.1181640625, "learning_rate": 0.0006672931359888746, "loss": 0.6064, "step": 33400 }, { "epoch": 1.6593821396642494, "grad_norm": 0.11181640625, "learning_rate": 0.000667253402205225, "loss": 0.5998, "step": 33410 }, { "epoch": 1.6598788119598689, "grad_norm": 0.14453125, "learning_rate": 0.0006672136684215755, "loss": 0.5894, "step": 33420 }, { "epoch": 1.6603754842554883, "grad_norm": 0.12109375, "learning_rate": 0.0006671739346379259, "loss": 0.5991, "step": 33430 }, { "epoch": 1.6608721565511075, "grad_norm": 0.2041015625, "learning_rate": 0.0006671342008542764, "loss": 0.5902, "step": 33440 }, { "epoch": 1.6613688288467268, "grad_norm": 0.10400390625, "learning_rate": 0.0006670944670706268, "loss": 0.6083, "step": 33450 }, { "epoch": 1.6618655011423464, "grad_norm": 0.10107421875, "learning_rate": 0.0006670547332869773, "loss": 0.5757, "step": 33460 }, { "epoch": 1.6623621734379657, "grad_norm": 0.1572265625, "learning_rate": 0.0006670149995033278, "loss": 0.5965, "step": 33470 }, { "epoch": 1.662858845733585, "grad_norm": 0.09912109375, "learning_rate": 0.0006669752657196781, "loss": 0.6245, "step": 33480 }, { "epoch": 1.6633555180292043, "grad_norm": 0.29296875, "learning_rate": 0.0006669355319360287, "loss": 0.6389, "step": 33490 }, { "epoch": 1.6638521903248238, "grad_norm": 0.10546875, "learning_rate": 0.0006668957981523791, "loss": 0.5866, "step": 33500 }, { "epoch": 1.664348862620443, "grad_norm": 0.10693359375, "learning_rate": 0.0006668560643687295, "loss": 0.5713, "step": 33510 }, { "epoch": 1.6648455349160622, "grad_norm": 0.1298828125, "learning_rate": 0.00066681633058508, "loss": 0.6158, "step": 33520 }, { "epoch": 1.6653422072116817, "grad_norm": 0.20703125, "learning_rate": 0.0006667765968014304, "loss": 0.5874, "step": 33530 }, { "epoch": 1.6658388795073011, "grad_norm": 0.11279296875, "learning_rate": 0.0006667368630177808, "loss": 0.5814, "step": 33540 }, { "epoch": 1.6663355518029204, "grad_norm": 0.158203125, "learning_rate": 0.0006666971292341314, "loss": 0.586, "step": 33550 }, { "epoch": 1.6668322240985398, "grad_norm": 0.09912109375, "learning_rate": 0.0006666573954504818, "loss": 0.5789, "step": 33560 }, { "epoch": 1.6673288963941593, "grad_norm": 0.10009765625, "learning_rate": 0.0006666176616668322, "loss": 0.5843, "step": 33570 }, { "epoch": 1.6678255686897785, "grad_norm": 0.1455078125, "learning_rate": 0.0006665779278831827, "loss": 0.5828, "step": 33580 }, { "epoch": 1.6683222409853977, "grad_norm": 0.0927734375, "learning_rate": 0.0006665381940995332, "loss": 0.5808, "step": 33590 }, { "epoch": 1.6688189132810172, "grad_norm": 0.11865234375, "learning_rate": 0.0006664984603158837, "loss": 0.5988, "step": 33600 }, { "epoch": 1.6693155855766366, "grad_norm": 0.11328125, "learning_rate": 0.0006664587265322341, "loss": 0.5865, "step": 33610 }, { "epoch": 1.6698122578722558, "grad_norm": 0.15625, "learning_rate": 0.0006664189927485845, "loss": 0.5933, "step": 33620 }, { "epoch": 1.670308930167875, "grad_norm": 0.11181640625, "learning_rate": 0.000666379258964935, "loss": 0.5899, "step": 33630 }, { "epoch": 1.6708056024634947, "grad_norm": 0.1484375, "learning_rate": 0.0006663395251812853, "loss": 0.5751, "step": 33640 }, { "epoch": 1.671302274759114, "grad_norm": 0.09326171875, "learning_rate": 0.0006662997913976359, "loss": 0.5719, "step": 33650 }, { "epoch": 1.6717989470547332, "grad_norm": 0.10302734375, "learning_rate": 0.0006662600576139864, "loss": 0.5757, "step": 33660 }, { "epoch": 1.6722956193503526, "grad_norm": 0.0927734375, "learning_rate": 0.0006662203238303367, "loss": 0.6287, "step": 33670 }, { "epoch": 1.672792291645972, "grad_norm": 0.09521484375, "learning_rate": 0.0006661805900466872, "loss": 0.6213, "step": 33680 }, { "epoch": 1.6732889639415913, "grad_norm": 0.12890625, "learning_rate": 0.0006661408562630378, "loss": 0.6159, "step": 33690 }, { "epoch": 1.6737856362372106, "grad_norm": 0.10693359375, "learning_rate": 0.0006661011224793881, "loss": 0.5905, "step": 33700 }, { "epoch": 1.67428230853283, "grad_norm": 0.103515625, "learning_rate": 0.0006660613886957386, "loss": 0.6113, "step": 33710 }, { "epoch": 1.6747789808284494, "grad_norm": 0.095703125, "learning_rate": 0.000666021654912089, "loss": 0.5731, "step": 33720 }, { "epoch": 1.6752756531240687, "grad_norm": 0.11669921875, "learning_rate": 0.0006659819211284394, "loss": 0.5678, "step": 33730 }, { "epoch": 1.6757723254196881, "grad_norm": 0.1240234375, "learning_rate": 0.00066594218734479, "loss": 0.5949, "step": 33740 }, { "epoch": 1.6762689977153076, "grad_norm": 0.09619140625, "learning_rate": 0.0006659024535611404, "loss": 0.5886, "step": 33750 }, { "epoch": 1.6767656700109268, "grad_norm": 0.10791015625, "learning_rate": 0.0006658627197774909, "loss": 0.5974, "step": 33760 }, { "epoch": 1.677262342306546, "grad_norm": 0.09619140625, "learning_rate": 0.0006658229859938413, "loss": 0.58, "step": 33770 }, { "epoch": 1.6777590146021655, "grad_norm": 0.0966796875, "learning_rate": 0.0006657832522101917, "loss": 0.5542, "step": 33780 }, { "epoch": 1.678255686897785, "grad_norm": 0.0859375, "learning_rate": 0.0006657435184265423, "loss": 0.5807, "step": 33790 }, { "epoch": 1.6787523591934042, "grad_norm": 0.09814453125, "learning_rate": 0.0006657037846428927, "loss": 0.5959, "step": 33800 }, { "epoch": 1.6792490314890234, "grad_norm": 0.0947265625, "learning_rate": 0.0006656640508592431, "loss": 0.5967, "step": 33810 }, { "epoch": 1.679745703784643, "grad_norm": 0.09228515625, "learning_rate": 0.0006656243170755936, "loss": 0.6256, "step": 33820 }, { "epoch": 1.6802423760802623, "grad_norm": 0.1025390625, "learning_rate": 0.0006655845832919439, "loss": 0.5835, "step": 33830 }, { "epoch": 1.6807390483758815, "grad_norm": 0.10546875, "learning_rate": 0.0006655448495082944, "loss": 0.6092, "step": 33840 }, { "epoch": 1.681235720671501, "grad_norm": 0.1220703125, "learning_rate": 0.000665505115724645, "loss": 0.6094, "step": 33850 }, { "epoch": 1.6817323929671204, "grad_norm": 0.1455078125, "learning_rate": 0.0006654653819409953, "loss": 0.6105, "step": 33860 }, { "epoch": 1.6822290652627396, "grad_norm": 0.12109375, "learning_rate": 0.0006654256481573458, "loss": 0.632, "step": 33870 }, { "epoch": 1.6827257375583589, "grad_norm": 0.09765625, "learning_rate": 0.0006653859143736963, "loss": 0.6004, "step": 33880 }, { "epoch": 1.6832224098539783, "grad_norm": 0.1142578125, "learning_rate": 0.0006653461805900466, "loss": 0.5981, "step": 33890 }, { "epoch": 1.6837190821495978, "grad_norm": 0.134765625, "learning_rate": 0.0006653064468063972, "loss": 0.5944, "step": 33900 }, { "epoch": 1.684215754445217, "grad_norm": 0.10205078125, "learning_rate": 0.0006652667130227476, "loss": 0.599, "step": 33910 }, { "epoch": 1.6847124267408364, "grad_norm": 0.2294921875, "learning_rate": 0.0006652269792390981, "loss": 0.5958, "step": 33920 }, { "epoch": 1.6852090990364559, "grad_norm": 0.09326171875, "learning_rate": 0.0006651872454554485, "loss": 0.5523, "step": 33930 }, { "epoch": 1.685705771332075, "grad_norm": 0.140625, "learning_rate": 0.0006651475116717989, "loss": 0.5817, "step": 33940 }, { "epoch": 1.6862024436276943, "grad_norm": 0.10400390625, "learning_rate": 0.0006651077778881495, "loss": 0.579, "step": 33950 }, { "epoch": 1.6866991159233138, "grad_norm": 0.1357421875, "learning_rate": 0.0006650680441044999, "loss": 0.6026, "step": 33960 }, { "epoch": 1.6871957882189332, "grad_norm": 0.08935546875, "learning_rate": 0.0006650283103208503, "loss": 0.5954, "step": 33970 }, { "epoch": 1.6876924605145525, "grad_norm": 0.12109375, "learning_rate": 0.0006649885765372008, "loss": 0.5736, "step": 33980 }, { "epoch": 1.6881891328101717, "grad_norm": 0.1728515625, "learning_rate": 0.0006649488427535512, "loss": 0.5884, "step": 33990 }, { "epoch": 1.6886858051057914, "grad_norm": 0.12060546875, "learning_rate": 0.0006649091089699017, "loss": 0.6015, "step": 34000 }, { "epoch": 1.6891824774014106, "grad_norm": 0.1123046875, "learning_rate": 0.0006648693751862522, "loss": 0.5854, "step": 34010 }, { "epoch": 1.6896791496970298, "grad_norm": 0.13671875, "learning_rate": 0.0006648296414026026, "loss": 0.6298, "step": 34020 }, { "epoch": 1.6901758219926493, "grad_norm": 0.0947265625, "learning_rate": 0.000664789907618953, "loss": 0.576, "step": 34030 }, { "epoch": 1.6906724942882687, "grad_norm": 0.0986328125, "learning_rate": 0.0006647501738353036, "loss": 0.5603, "step": 34040 }, { "epoch": 1.691169166583888, "grad_norm": 0.099609375, "learning_rate": 0.000664710440051654, "loss": 0.601, "step": 34050 }, { "epoch": 1.6916658388795072, "grad_norm": 0.11669921875, "learning_rate": 0.0006646707062680044, "loss": 0.6415, "step": 34060 }, { "epoch": 1.6921625111751266, "grad_norm": 0.1044921875, "learning_rate": 0.0006646309724843549, "loss": 0.6118, "step": 34070 }, { "epoch": 1.692659183470746, "grad_norm": 0.10205078125, "learning_rate": 0.0006645912387007053, "loss": 0.5767, "step": 34080 }, { "epoch": 1.6931558557663653, "grad_norm": 0.0966796875, "learning_rate": 0.0006645515049170557, "loss": 0.5706, "step": 34090 }, { "epoch": 1.6936525280619847, "grad_norm": 0.11376953125, "learning_rate": 0.0006645117711334062, "loss": 0.6036, "step": 34100 }, { "epoch": 1.6941492003576042, "grad_norm": 0.1005859375, "learning_rate": 0.0006644720373497567, "loss": 0.6352, "step": 34110 }, { "epoch": 1.6946458726532234, "grad_norm": 0.10791015625, "learning_rate": 0.0006644323035661071, "loss": 0.5675, "step": 34120 }, { "epoch": 1.6951425449488426, "grad_norm": 0.125, "learning_rate": 0.0006643925697824575, "loss": 0.6123, "step": 34130 }, { "epoch": 1.695639217244462, "grad_norm": 0.11328125, "learning_rate": 0.000664352835998808, "loss": 0.5847, "step": 34140 }, { "epoch": 1.6961358895400815, "grad_norm": 0.09814453125, "learning_rate": 0.0006643131022151585, "loss": 0.589, "step": 34150 }, { "epoch": 1.6966325618357008, "grad_norm": 0.12060546875, "learning_rate": 0.0006642733684315089, "loss": 0.5961, "step": 34160 }, { "epoch": 1.69712923413132, "grad_norm": 0.11279296875, "learning_rate": 0.0006642336346478594, "loss": 0.6074, "step": 34170 }, { "epoch": 1.6976259064269397, "grad_norm": 0.1201171875, "learning_rate": 0.0006641939008642098, "loss": 0.5817, "step": 34180 }, { "epoch": 1.698122578722559, "grad_norm": 0.09228515625, "learning_rate": 0.0006641541670805602, "loss": 0.5915, "step": 34190 }, { "epoch": 1.6986192510181781, "grad_norm": 0.1201171875, "learning_rate": 0.0006641144332969108, "loss": 0.5905, "step": 34200 }, { "epoch": 1.6991159233137976, "grad_norm": 0.134765625, "learning_rate": 0.0006640746995132612, "loss": 0.5991, "step": 34210 }, { "epoch": 1.699612595609417, "grad_norm": 0.125, "learning_rate": 0.0006640349657296116, "loss": 0.5691, "step": 34220 }, { "epoch": 1.7001092679050362, "grad_norm": 0.09765625, "learning_rate": 0.0006639952319459621, "loss": 0.5869, "step": 34230 }, { "epoch": 1.7006059402006555, "grad_norm": 0.10791015625, "learning_rate": 0.0006639554981623125, "loss": 0.6391, "step": 34240 }, { "epoch": 1.701102612496275, "grad_norm": 0.11767578125, "learning_rate": 0.000663915764378663, "loss": 0.583, "step": 34250 }, { "epoch": 1.7015992847918944, "grad_norm": 0.10595703125, "learning_rate": 0.0006638760305950135, "loss": 0.6054, "step": 34260 }, { "epoch": 1.7020959570875136, "grad_norm": 0.138671875, "learning_rate": 0.0006638362968113639, "loss": 0.5867, "step": 34270 }, { "epoch": 1.702592629383133, "grad_norm": 0.09375, "learning_rate": 0.0006637965630277143, "loss": 0.6036, "step": 34280 }, { "epoch": 1.7030893016787525, "grad_norm": 0.11865234375, "learning_rate": 0.0006637568292440648, "loss": 0.6233, "step": 34290 }, { "epoch": 1.7035859739743717, "grad_norm": 0.08642578125, "learning_rate": 0.0006637170954604153, "loss": 0.5898, "step": 34300 }, { "epoch": 1.704082646269991, "grad_norm": 0.1044921875, "learning_rate": 0.0006636773616767657, "loss": 0.6078, "step": 34310 }, { "epoch": 1.7045793185656104, "grad_norm": 0.1123046875, "learning_rate": 0.0006636376278931161, "loss": 0.6156, "step": 34320 }, { "epoch": 1.7050759908612299, "grad_norm": 0.09716796875, "learning_rate": 0.0006635978941094666, "loss": 0.5982, "step": 34330 }, { "epoch": 1.705572663156849, "grad_norm": 0.10205078125, "learning_rate": 0.0006635581603258171, "loss": 0.5923, "step": 34340 }, { "epoch": 1.7060693354524683, "grad_norm": 0.0908203125, "learning_rate": 0.0006635184265421675, "loss": 0.5583, "step": 34350 }, { "epoch": 1.706566007748088, "grad_norm": 0.15625, "learning_rate": 0.000663478692758518, "loss": 0.5799, "step": 34360 }, { "epoch": 1.7070626800437072, "grad_norm": 0.125, "learning_rate": 0.0006634389589748684, "loss": 0.5927, "step": 34370 }, { "epoch": 1.7075593523393264, "grad_norm": 0.140625, "learning_rate": 0.0006633992251912188, "loss": 0.5996, "step": 34380 }, { "epoch": 1.7080560246349459, "grad_norm": 0.103515625, "learning_rate": 0.0006633594914075693, "loss": 0.6004, "step": 34390 }, { "epoch": 1.7085526969305653, "grad_norm": 0.10595703125, "learning_rate": 0.0006633197576239198, "loss": 0.611, "step": 34400 }, { "epoch": 1.7090493692261846, "grad_norm": 0.138671875, "learning_rate": 0.0006632800238402702, "loss": 0.5754, "step": 34410 }, { "epoch": 1.7095460415218038, "grad_norm": 0.10888671875, "learning_rate": 0.0006632402900566207, "loss": 0.599, "step": 34420 }, { "epoch": 1.7100427138174232, "grad_norm": 0.1064453125, "learning_rate": 0.0006632005562729711, "loss": 0.5795, "step": 34430 }, { "epoch": 1.7105393861130427, "grad_norm": 0.146484375, "learning_rate": 0.0006631608224893215, "loss": 0.5899, "step": 34440 }, { "epoch": 1.711036058408662, "grad_norm": 0.16015625, "learning_rate": 0.0006631210887056721, "loss": 0.5833, "step": 34450 }, { "epoch": 1.7115327307042814, "grad_norm": 0.10888671875, "learning_rate": 0.0006630813549220225, "loss": 0.5888, "step": 34460 }, { "epoch": 1.7120294029999008, "grad_norm": 0.19140625, "learning_rate": 0.0006630416211383729, "loss": 0.5978, "step": 34470 }, { "epoch": 1.71252607529552, "grad_norm": 0.09326171875, "learning_rate": 0.0006630018873547234, "loss": 0.5993, "step": 34480 }, { "epoch": 1.7130227475911393, "grad_norm": 0.1611328125, "learning_rate": 0.0006629621535710738, "loss": 0.5815, "step": 34490 }, { "epoch": 1.7135194198867587, "grad_norm": 0.1357421875, "learning_rate": 0.0006629224197874244, "loss": 0.5956, "step": 34500 }, { "epoch": 1.7140160921823782, "grad_norm": 0.09765625, "learning_rate": 0.0006628826860037747, "loss": 0.5933, "step": 34510 }, { "epoch": 1.7145127644779974, "grad_norm": 0.0947265625, "learning_rate": 0.0006628429522201252, "loss": 0.5888, "step": 34520 }, { "epoch": 1.7150094367736166, "grad_norm": 0.1640625, "learning_rate": 0.0006628032184364757, "loss": 0.5999, "step": 34530 }, { "epoch": 1.715506109069236, "grad_norm": 0.162109375, "learning_rate": 0.000662763484652826, "loss": 0.6141, "step": 34540 }, { "epoch": 1.7160027813648555, "grad_norm": 0.10107421875, "learning_rate": 0.0006627237508691766, "loss": 0.5664, "step": 34550 }, { "epoch": 1.7164994536604747, "grad_norm": 0.09228515625, "learning_rate": 0.0006626840170855271, "loss": 0.5732, "step": 34560 }, { "epoch": 1.7169961259560942, "grad_norm": 0.0849609375, "learning_rate": 0.0006626442833018774, "loss": 0.5897, "step": 34570 }, { "epoch": 1.7174927982517136, "grad_norm": 0.09228515625, "learning_rate": 0.0006626045495182279, "loss": 0.6314, "step": 34580 }, { "epoch": 1.7179894705473329, "grad_norm": 0.10546875, "learning_rate": 0.0006625648157345783, "loss": 0.5981, "step": 34590 }, { "epoch": 1.718486142842952, "grad_norm": 0.10791015625, "learning_rate": 0.0006625250819509288, "loss": 0.6238, "step": 34600 }, { "epoch": 1.7189828151385715, "grad_norm": 0.09619140625, "learning_rate": 0.0006624853481672793, "loss": 0.5775, "step": 34610 }, { "epoch": 1.719479487434191, "grad_norm": 0.1142578125, "learning_rate": 0.0006624456143836297, "loss": 0.5957, "step": 34620 }, { "epoch": 1.7199761597298102, "grad_norm": 0.1328125, "learning_rate": 0.0006624058805999801, "loss": 0.6131, "step": 34630 }, { "epoch": 1.7204728320254297, "grad_norm": 0.1318359375, "learning_rate": 0.0006623661468163306, "loss": 0.6022, "step": 34640 }, { "epoch": 1.7209695043210491, "grad_norm": 0.109375, "learning_rate": 0.0006623264130326811, "loss": 0.601, "step": 34650 }, { "epoch": 1.7214661766166683, "grad_norm": 0.1015625, "learning_rate": 0.0006622866792490316, "loss": 0.594, "step": 34660 }, { "epoch": 1.7219628489122876, "grad_norm": 0.11669921875, "learning_rate": 0.000662246945465382, "loss": 0.5892, "step": 34670 }, { "epoch": 1.722459521207907, "grad_norm": 0.10888671875, "learning_rate": 0.0006622072116817324, "loss": 0.5817, "step": 34680 }, { "epoch": 1.7229561935035265, "grad_norm": 0.12060546875, "learning_rate": 0.0006621674778980829, "loss": 0.6195, "step": 34690 }, { "epoch": 1.7234528657991457, "grad_norm": 0.10107421875, "learning_rate": 0.0006621277441144333, "loss": 0.5867, "step": 34700 }, { "epoch": 1.723949538094765, "grad_norm": 0.09716796875, "learning_rate": 0.0006620880103307838, "loss": 0.6086, "step": 34710 }, { "epoch": 1.7244462103903844, "grad_norm": 0.10693359375, "learning_rate": 0.0006620482765471343, "loss": 0.5925, "step": 34720 }, { "epoch": 1.7249428826860038, "grad_norm": 0.248046875, "learning_rate": 0.0006620085427634846, "loss": 0.6208, "step": 34730 }, { "epoch": 1.725439554981623, "grad_norm": 0.1416015625, "learning_rate": 0.0006619688089798351, "loss": 0.6107, "step": 34740 }, { "epoch": 1.7259362272772425, "grad_norm": 0.09912109375, "learning_rate": 0.0006619290751961857, "loss": 0.5579, "step": 34750 }, { "epoch": 1.726432899572862, "grad_norm": 0.115234375, "learning_rate": 0.000661889341412536, "loss": 0.5842, "step": 34760 }, { "epoch": 1.7269295718684812, "grad_norm": 0.09716796875, "learning_rate": 0.0006618496076288865, "loss": 0.5636, "step": 34770 }, { "epoch": 1.7274262441641004, "grad_norm": 0.10400390625, "learning_rate": 0.0006618098738452369, "loss": 0.6168, "step": 34780 }, { "epoch": 1.7279229164597198, "grad_norm": 0.1064453125, "learning_rate": 0.0006617701400615874, "loss": 0.5637, "step": 34790 }, { "epoch": 1.7284195887553393, "grad_norm": 0.083984375, "learning_rate": 0.0006617304062779379, "loss": 0.5769, "step": 34800 }, { "epoch": 1.7289162610509585, "grad_norm": 0.1201171875, "learning_rate": 0.0006616906724942883, "loss": 0.6049, "step": 34810 }, { "epoch": 1.729412933346578, "grad_norm": 0.130859375, "learning_rate": 0.0006616509387106388, "loss": 0.5878, "step": 34820 }, { "epoch": 1.7299096056421974, "grad_norm": 0.10888671875, "learning_rate": 0.0006616112049269892, "loss": 0.5919, "step": 34830 }, { "epoch": 1.7304062779378167, "grad_norm": 0.1640625, "learning_rate": 0.0006615714711433396, "loss": 0.5782, "step": 34840 }, { "epoch": 1.7309029502334359, "grad_norm": 0.1748046875, "learning_rate": 0.0006615317373596902, "loss": 0.634, "step": 34850 }, { "epoch": 1.7313996225290553, "grad_norm": 0.1142578125, "learning_rate": 0.0006614920035760406, "loss": 0.5835, "step": 34860 }, { "epoch": 1.7318962948246748, "grad_norm": 0.10205078125, "learning_rate": 0.000661452269792391, "loss": 0.6219, "step": 34870 }, { "epoch": 1.732392967120294, "grad_norm": 0.10693359375, "learning_rate": 0.0006614125360087415, "loss": 0.5947, "step": 34880 }, { "epoch": 1.7328896394159132, "grad_norm": 0.10205078125, "learning_rate": 0.0006613728022250919, "loss": 0.59, "step": 34890 }, { "epoch": 1.7333863117115327, "grad_norm": 0.283203125, "learning_rate": 0.0006613330684414424, "loss": 0.6176, "step": 34900 }, { "epoch": 1.7338829840071521, "grad_norm": 0.1201171875, "learning_rate": 0.0006612933346577929, "loss": 0.619, "step": 34910 }, { "epoch": 1.7343796563027714, "grad_norm": 0.0869140625, "learning_rate": 0.0006612536008741432, "loss": 0.5682, "step": 34920 }, { "epoch": 1.7348763285983908, "grad_norm": 0.099609375, "learning_rate": 0.0006612138670904937, "loss": 0.5862, "step": 34930 }, { "epoch": 1.7353730008940103, "grad_norm": 0.10888671875, "learning_rate": 0.0006611741333068442, "loss": 0.5935, "step": 34940 }, { "epoch": 1.7358696731896295, "grad_norm": 0.1650390625, "learning_rate": 0.0006611343995231947, "loss": 0.5896, "step": 34950 }, { "epoch": 1.7363663454852487, "grad_norm": 0.1201171875, "learning_rate": 0.0006610946657395451, "loss": 0.5667, "step": 34960 }, { "epoch": 1.7368630177808682, "grad_norm": 0.16015625, "learning_rate": 0.0006610549319558955, "loss": 0.6185, "step": 34970 }, { "epoch": 1.7373596900764876, "grad_norm": 0.0947265625, "learning_rate": 0.000661015198172246, "loss": 0.5739, "step": 34980 }, { "epoch": 1.7378563623721068, "grad_norm": 0.10107421875, "learning_rate": 0.0006609754643885964, "loss": 0.5968, "step": 34990 }, { "epoch": 1.7383530346677263, "grad_norm": 0.09716796875, "learning_rate": 0.0006609357306049468, "loss": 0.6268, "step": 35000 }, { "epoch": 1.7388497069633457, "grad_norm": 0.11572265625, "learning_rate": 0.0006608959968212974, "loss": 0.6147, "step": 35010 }, { "epoch": 1.739346379258965, "grad_norm": 0.10595703125, "learning_rate": 0.0006608562630376478, "loss": 0.577, "step": 35020 }, { "epoch": 1.7398430515545842, "grad_norm": 0.12890625, "learning_rate": 0.0006608165292539982, "loss": 0.601, "step": 35030 }, { "epoch": 1.7403397238502036, "grad_norm": 0.08935546875, "learning_rate": 0.0006607767954703487, "loss": 0.6041, "step": 35040 }, { "epoch": 1.740836396145823, "grad_norm": 0.08642578125, "learning_rate": 0.0006607370616866992, "loss": 0.581, "step": 35050 }, { "epoch": 1.7413330684414423, "grad_norm": 0.09326171875, "learning_rate": 0.0006606973279030496, "loss": 0.5901, "step": 35060 }, { "epoch": 1.7418297407370615, "grad_norm": 0.091796875, "learning_rate": 0.0006606575941194001, "loss": 0.6036, "step": 35070 }, { "epoch": 1.742326413032681, "grad_norm": 0.10498046875, "learning_rate": 0.0006606178603357505, "loss": 0.6168, "step": 35080 }, { "epoch": 1.7428230853283004, "grad_norm": 0.09033203125, "learning_rate": 0.0006605781265521009, "loss": 0.6085, "step": 35090 }, { "epoch": 1.7433197576239197, "grad_norm": 0.1015625, "learning_rate": 0.0006605383927684515, "loss": 0.5746, "step": 35100 }, { "epoch": 1.7438164299195391, "grad_norm": 0.17578125, "learning_rate": 0.0006604986589848019, "loss": 0.581, "step": 35110 }, { "epoch": 1.7443131022151586, "grad_norm": 0.154296875, "learning_rate": 0.0006604589252011523, "loss": 0.5806, "step": 35120 }, { "epoch": 1.7448097745107778, "grad_norm": 0.11083984375, "learning_rate": 0.0006604191914175028, "loss": 0.5574, "step": 35130 }, { "epoch": 1.745306446806397, "grad_norm": 0.142578125, "learning_rate": 0.0006603794576338532, "loss": 0.6143, "step": 35140 }, { "epoch": 1.7458031191020165, "grad_norm": 0.140625, "learning_rate": 0.0006603397238502036, "loss": 0.5985, "step": 35150 }, { "epoch": 1.746299791397636, "grad_norm": 0.2177734375, "learning_rate": 0.0006602999900665542, "loss": 0.6048, "step": 35160 }, { "epoch": 1.7467964636932551, "grad_norm": 0.0966796875, "learning_rate": 0.0006602602562829046, "loss": 0.5965, "step": 35170 }, { "epoch": 1.7472931359888746, "grad_norm": 0.1044921875, "learning_rate": 0.000660220522499255, "loss": 0.5958, "step": 35180 }, { "epoch": 1.747789808284494, "grad_norm": 0.1025390625, "learning_rate": 0.0006601807887156054, "loss": 0.607, "step": 35190 }, { "epoch": 1.7482864805801133, "grad_norm": 0.1123046875, "learning_rate": 0.000660141054931956, "loss": 0.5923, "step": 35200 }, { "epoch": 1.7487831528757325, "grad_norm": 0.12451171875, "learning_rate": 0.0006601013211483064, "loss": 0.6049, "step": 35210 }, { "epoch": 1.749279825171352, "grad_norm": 0.1279296875, "learning_rate": 0.0006600615873646568, "loss": 0.6206, "step": 35220 }, { "epoch": 1.7497764974669714, "grad_norm": 0.1435546875, "learning_rate": 0.0006600218535810073, "loss": 0.6048, "step": 35230 }, { "epoch": 1.7502731697625906, "grad_norm": 0.10302734375, "learning_rate": 0.0006599821197973577, "loss": 0.6114, "step": 35240 }, { "epoch": 1.7507698420582098, "grad_norm": 0.16015625, "learning_rate": 0.0006599423860137081, "loss": 0.5983, "step": 35250 }, { "epoch": 1.7512665143538293, "grad_norm": 0.1083984375, "learning_rate": 0.0006599026522300587, "loss": 0.6022, "step": 35260 }, { "epoch": 1.7517631866494487, "grad_norm": 0.1123046875, "learning_rate": 0.0006598629184464091, "loss": 0.616, "step": 35270 }, { "epoch": 1.752259858945068, "grad_norm": 0.099609375, "learning_rate": 0.0006598231846627595, "loss": 0.5992, "step": 35280 }, { "epoch": 1.7527565312406874, "grad_norm": 0.11865234375, "learning_rate": 0.00065978345087911, "loss": 0.5907, "step": 35290 }, { "epoch": 1.7532532035363069, "grad_norm": 0.150390625, "learning_rate": 0.0006597437170954604, "loss": 0.5911, "step": 35300 }, { "epoch": 1.753749875831926, "grad_norm": 0.09375, "learning_rate": 0.0006597039833118109, "loss": 0.5878, "step": 35310 }, { "epoch": 1.7542465481275453, "grad_norm": 0.1220703125, "learning_rate": 0.0006596642495281614, "loss": 0.58, "step": 35320 }, { "epoch": 1.7547432204231648, "grad_norm": 0.19921875, "learning_rate": 0.0006596245157445118, "loss": 0.6123, "step": 35330 }, { "epoch": 1.7552398927187842, "grad_norm": 0.0888671875, "learning_rate": 0.0006595847819608622, "loss": 0.5974, "step": 35340 }, { "epoch": 1.7557365650144034, "grad_norm": 0.12451171875, "learning_rate": 0.0006595450481772127, "loss": 0.5911, "step": 35350 }, { "epoch": 1.756233237310023, "grad_norm": 0.09228515625, "learning_rate": 0.0006595053143935632, "loss": 0.5947, "step": 35360 }, { "epoch": 1.7567299096056423, "grad_norm": 0.10595703125, "learning_rate": 0.0006594655806099136, "loss": 0.5844, "step": 35370 }, { "epoch": 1.7572265819012616, "grad_norm": 0.09033203125, "learning_rate": 0.000659425846826264, "loss": 0.5914, "step": 35380 }, { "epoch": 1.7577232541968808, "grad_norm": 0.1015625, "learning_rate": 0.0006593861130426145, "loss": 0.5856, "step": 35390 }, { "epoch": 1.7582199264925003, "grad_norm": 0.0859375, "learning_rate": 0.000659346379258965, "loss": 0.6051, "step": 35400 }, { "epoch": 1.7587165987881197, "grad_norm": 0.09423828125, "learning_rate": 0.0006593066454753154, "loss": 0.6096, "step": 35410 }, { "epoch": 1.759213271083739, "grad_norm": 0.16015625, "learning_rate": 0.0006592669116916659, "loss": 0.5855, "step": 35420 }, { "epoch": 1.7597099433793582, "grad_norm": 0.1318359375, "learning_rate": 0.0006592271779080163, "loss": 0.6032, "step": 35430 }, { "epoch": 1.7602066156749776, "grad_norm": 0.10400390625, "learning_rate": 0.0006591874441243667, "loss": 0.5703, "step": 35440 }, { "epoch": 1.760703287970597, "grad_norm": 0.1103515625, "learning_rate": 0.0006591477103407172, "loss": 0.596, "step": 35450 }, { "epoch": 1.7611999602662163, "grad_norm": 0.1005859375, "learning_rate": 0.0006591079765570677, "loss": 0.5854, "step": 35460 }, { "epoch": 1.7616966325618357, "grad_norm": 0.1611328125, "learning_rate": 0.0006590682427734181, "loss": 0.5599, "step": 35470 }, { "epoch": 1.7621933048574552, "grad_norm": 0.130859375, "learning_rate": 0.0006590285089897686, "loss": 0.6016, "step": 35480 }, { "epoch": 1.7626899771530744, "grad_norm": 0.0966796875, "learning_rate": 0.000658988775206119, "loss": 0.597, "step": 35490 }, { "epoch": 1.7631866494486936, "grad_norm": 0.10791015625, "learning_rate": 0.0006589490414224694, "loss": 0.6352, "step": 35500 }, { "epoch": 1.763683321744313, "grad_norm": 0.09375, "learning_rate": 0.00065890930763882, "loss": 0.5891, "step": 35510 }, { "epoch": 1.7641799940399325, "grad_norm": 0.08984375, "learning_rate": 0.0006588695738551704, "loss": 0.5808, "step": 35520 }, { "epoch": 1.7646766663355518, "grad_norm": 0.09619140625, "learning_rate": 0.0006588298400715208, "loss": 0.585, "step": 35530 }, { "epoch": 1.7651733386311712, "grad_norm": 0.12890625, "learning_rate": 0.0006587901062878713, "loss": 0.5931, "step": 35540 }, { "epoch": 1.7656700109267907, "grad_norm": 0.1240234375, "learning_rate": 0.0006587503725042217, "loss": 0.5712, "step": 35550 }, { "epoch": 1.7661666832224099, "grad_norm": 0.11279296875, "learning_rate": 0.0006587106387205723, "loss": 0.5875, "step": 35560 }, { "epoch": 1.766663355518029, "grad_norm": 0.0888671875, "learning_rate": 0.0006586709049369226, "loss": 0.5762, "step": 35570 }, { "epoch": 1.7671600278136486, "grad_norm": 0.11865234375, "learning_rate": 0.0006586311711532731, "loss": 0.586, "step": 35580 }, { "epoch": 1.767656700109268, "grad_norm": 0.1533203125, "learning_rate": 0.0006585914373696236, "loss": 0.6036, "step": 35590 }, { "epoch": 1.7681533724048872, "grad_norm": 0.10986328125, "learning_rate": 0.0006585517035859739, "loss": 0.592, "step": 35600 }, { "epoch": 1.7686500447005065, "grad_norm": 0.1123046875, "learning_rate": 0.0006585119698023245, "loss": 0.5644, "step": 35610 }, { "epoch": 1.769146716996126, "grad_norm": 0.12890625, "learning_rate": 0.000658472236018675, "loss": 0.579, "step": 35620 }, { "epoch": 1.7696433892917454, "grad_norm": 0.09375, "learning_rate": 0.0006584325022350253, "loss": 0.5832, "step": 35630 }, { "epoch": 1.7701400615873646, "grad_norm": 0.095703125, "learning_rate": 0.0006583927684513758, "loss": 0.5951, "step": 35640 }, { "epoch": 1.770636733882984, "grad_norm": 0.1298828125, "learning_rate": 0.0006583530346677262, "loss": 0.5847, "step": 35650 }, { "epoch": 1.7711334061786035, "grad_norm": 0.1552734375, "learning_rate": 0.0006583133008840767, "loss": 0.5947, "step": 35660 }, { "epoch": 1.7716300784742227, "grad_norm": 0.1533203125, "learning_rate": 0.0006582735671004272, "loss": 0.5862, "step": 35670 }, { "epoch": 1.772126750769842, "grad_norm": 0.1376953125, "learning_rate": 0.0006582338333167776, "loss": 0.6173, "step": 35680 }, { "epoch": 1.7726234230654614, "grad_norm": 0.123046875, "learning_rate": 0.0006581940995331281, "loss": 0.5851, "step": 35690 }, { "epoch": 1.7731200953610808, "grad_norm": 0.0947265625, "learning_rate": 0.0006581543657494785, "loss": 0.5829, "step": 35700 }, { "epoch": 1.7736167676567, "grad_norm": 0.1181640625, "learning_rate": 0.000658114631965829, "loss": 0.5852, "step": 35710 }, { "epoch": 1.7741134399523193, "grad_norm": 0.11279296875, "learning_rate": 0.0006580748981821795, "loss": 0.5783, "step": 35720 }, { "epoch": 1.774610112247939, "grad_norm": 0.10546875, "learning_rate": 0.0006580351643985299, "loss": 0.5801, "step": 35730 }, { "epoch": 1.7751067845435582, "grad_norm": 0.1279296875, "learning_rate": 0.0006579954306148803, "loss": 0.5973, "step": 35740 }, { "epoch": 1.7756034568391774, "grad_norm": 0.1552734375, "learning_rate": 0.0006579556968312308, "loss": 0.611, "step": 35750 }, { "epoch": 1.7761001291347969, "grad_norm": 0.08740234375, "learning_rate": 0.0006579159630475812, "loss": 0.5763, "step": 35760 }, { "epoch": 1.7765968014304163, "grad_norm": 0.0966796875, "learning_rate": 0.0006578762292639317, "loss": 0.5711, "step": 35770 }, { "epoch": 1.7770934737260355, "grad_norm": 0.111328125, "learning_rate": 0.0006578364954802822, "loss": 0.6111, "step": 35780 }, { "epoch": 1.7775901460216548, "grad_norm": 0.1259765625, "learning_rate": 0.0006577967616966325, "loss": 0.5693, "step": 35790 }, { "epoch": 1.7780868183172742, "grad_norm": 0.1494140625, "learning_rate": 0.000657757027912983, "loss": 0.5779, "step": 35800 }, { "epoch": 1.7785834906128937, "grad_norm": 0.12451171875, "learning_rate": 0.0006577172941293336, "loss": 0.5906, "step": 35810 }, { "epoch": 1.779080162908513, "grad_norm": 0.11181640625, "learning_rate": 0.0006576775603456839, "loss": 0.5878, "step": 35820 }, { "epoch": 1.7795768352041323, "grad_norm": 0.1025390625, "learning_rate": 0.0006576378265620344, "loss": 0.5832, "step": 35830 }, { "epoch": 1.7800735074997518, "grad_norm": 0.09912109375, "learning_rate": 0.0006575980927783848, "loss": 0.5882, "step": 35840 }, { "epoch": 1.780570179795371, "grad_norm": 0.109375, "learning_rate": 0.0006575583589947353, "loss": 0.6045, "step": 35850 }, { "epoch": 1.7810668520909902, "grad_norm": 0.09765625, "learning_rate": 0.0006575186252110858, "loss": 0.5789, "step": 35860 }, { "epoch": 1.7815635243866097, "grad_norm": 0.1640625, "learning_rate": 0.0006574788914274362, "loss": 0.5812, "step": 35870 }, { "epoch": 1.7820601966822291, "grad_norm": 0.1279296875, "learning_rate": 0.0006574391576437867, "loss": 0.5801, "step": 35880 }, { "epoch": 1.7825568689778484, "grad_norm": 0.103515625, "learning_rate": 0.0006573994238601371, "loss": 0.5915, "step": 35890 }, { "epoch": 1.7830535412734676, "grad_norm": 0.1611328125, "learning_rate": 0.0006573596900764875, "loss": 0.5687, "step": 35900 }, { "epoch": 1.7835502135690873, "grad_norm": 0.1298828125, "learning_rate": 0.0006573199562928381, "loss": 0.5588, "step": 35910 }, { "epoch": 1.7840468858647065, "grad_norm": 0.111328125, "learning_rate": 0.0006572802225091885, "loss": 0.579, "step": 35920 }, { "epoch": 1.7845435581603257, "grad_norm": 0.119140625, "learning_rate": 0.0006572404887255389, "loss": 0.6217, "step": 35930 }, { "epoch": 1.7850402304559452, "grad_norm": 0.09130859375, "learning_rate": 0.0006572007549418894, "loss": 0.5919, "step": 35940 }, { "epoch": 1.7855369027515646, "grad_norm": 0.1142578125, "learning_rate": 0.0006571610211582398, "loss": 0.5992, "step": 35950 }, { "epoch": 1.7860335750471839, "grad_norm": 0.1064453125, "learning_rate": 0.0006571212873745903, "loss": 0.5961, "step": 35960 }, { "epoch": 1.786530247342803, "grad_norm": 0.1337890625, "learning_rate": 0.0006570815535909408, "loss": 0.6127, "step": 35970 }, { "epoch": 1.7870269196384225, "grad_norm": 0.11767578125, "learning_rate": 0.0006570418198072911, "loss": 0.5762, "step": 35980 }, { "epoch": 1.787523591934042, "grad_norm": 0.10693359375, "learning_rate": 0.0006570020860236416, "loss": 0.5988, "step": 35990 }, { "epoch": 1.7880202642296612, "grad_norm": 0.0869140625, "learning_rate": 0.0006569623522399921, "loss": 0.581, "step": 36000 }, { "epoch": 1.7885169365252807, "grad_norm": 0.09765625, "learning_rate": 0.0006569226184563426, "loss": 0.5811, "step": 36010 }, { "epoch": 1.7890136088209, "grad_norm": 0.11181640625, "learning_rate": 0.000656882884672693, "loss": 0.5708, "step": 36020 }, { "epoch": 1.7895102811165193, "grad_norm": 0.1083984375, "learning_rate": 0.0006568431508890434, "loss": 0.5819, "step": 36030 }, { "epoch": 1.7900069534121386, "grad_norm": 0.10107421875, "learning_rate": 0.0006568034171053939, "loss": 0.5852, "step": 36040 }, { "epoch": 1.790503625707758, "grad_norm": 0.119140625, "learning_rate": 0.0006567636833217443, "loss": 0.6267, "step": 36050 }, { "epoch": 1.7910002980033775, "grad_norm": 0.10595703125, "learning_rate": 0.0006567239495380948, "loss": 0.5895, "step": 36060 }, { "epoch": 1.7914969702989967, "grad_norm": 0.0986328125, "learning_rate": 0.0006566842157544453, "loss": 0.599, "step": 36070 }, { "epoch": 1.791993642594616, "grad_norm": 0.10400390625, "learning_rate": 0.0006566444819707957, "loss": 0.5701, "step": 36080 }, { "epoch": 1.7924903148902356, "grad_norm": 0.125, "learning_rate": 0.0006566047481871461, "loss": 0.5955, "step": 36090 }, { "epoch": 1.7929869871858548, "grad_norm": 0.09423828125, "learning_rate": 0.0006565650144034966, "loss": 0.5917, "step": 36100 }, { "epoch": 1.793483659481474, "grad_norm": 0.130859375, "learning_rate": 0.0006565252806198471, "loss": 0.5633, "step": 36110 }, { "epoch": 1.7939803317770935, "grad_norm": 0.09814453125, "learning_rate": 0.0006564855468361975, "loss": 0.5555, "step": 36120 }, { "epoch": 1.794477004072713, "grad_norm": 0.09521484375, "learning_rate": 0.000656445813052548, "loss": 0.6074, "step": 36130 }, { "epoch": 1.7949736763683322, "grad_norm": 0.111328125, "learning_rate": 0.0006564060792688984, "loss": 0.6201, "step": 36140 }, { "epoch": 1.7954703486639514, "grad_norm": 0.0888671875, "learning_rate": 0.0006563663454852488, "loss": 0.586, "step": 36150 }, { "epoch": 1.7959670209595708, "grad_norm": 0.1142578125, "learning_rate": 0.0006563266117015994, "loss": 0.5746, "step": 36160 }, { "epoch": 1.7964636932551903, "grad_norm": 0.09716796875, "learning_rate": 0.0006562868779179498, "loss": 0.5907, "step": 36170 }, { "epoch": 1.7969603655508095, "grad_norm": 0.10693359375, "learning_rate": 0.0006562471441343002, "loss": 0.5704, "step": 36180 }, { "epoch": 1.797457037846429, "grad_norm": 0.10791015625, "learning_rate": 0.0006562074103506507, "loss": 0.6184, "step": 36190 }, { "epoch": 1.7979537101420484, "grad_norm": 0.0986328125, "learning_rate": 0.0006561676765670011, "loss": 0.6149, "step": 36200 }, { "epoch": 1.7984503824376676, "grad_norm": 0.12158203125, "learning_rate": 0.0006561279427833516, "loss": 0.5642, "step": 36210 }, { "epoch": 1.7989470547332869, "grad_norm": 0.123046875, "learning_rate": 0.0006560882089997021, "loss": 0.5813, "step": 36220 }, { "epoch": 1.7994437270289063, "grad_norm": 0.09228515625, "learning_rate": 0.0006560484752160525, "loss": 0.612, "step": 36230 }, { "epoch": 1.7999403993245258, "grad_norm": 0.099609375, "learning_rate": 0.0006560087414324029, "loss": 0.5886, "step": 36240 }, { "epoch": 1.800437071620145, "grad_norm": 0.10986328125, "learning_rate": 0.0006559690076487533, "loss": 0.6051, "step": 36250 }, { "epoch": 1.8009337439157642, "grad_norm": 0.11474609375, "learning_rate": 0.0006559292738651039, "loss": 0.5852, "step": 36260 }, { "epoch": 1.801430416211384, "grad_norm": 0.12158203125, "learning_rate": 0.0006558895400814543, "loss": 0.5926, "step": 36270 }, { "epoch": 1.8019270885070031, "grad_norm": 0.12451171875, "learning_rate": 0.0006558498062978047, "loss": 0.604, "step": 36280 }, { "epoch": 1.8024237608026223, "grad_norm": 0.091796875, "learning_rate": 0.0006558100725141552, "loss": 0.609, "step": 36290 }, { "epoch": 1.8029204330982418, "grad_norm": 0.08544921875, "learning_rate": 0.0006557703387305056, "loss": 0.5675, "step": 36300 }, { "epoch": 1.8034171053938612, "grad_norm": 0.1005859375, "learning_rate": 0.000655730604946856, "loss": 0.5771, "step": 36310 }, { "epoch": 1.8039137776894805, "grad_norm": 0.123046875, "learning_rate": 0.0006556908711632066, "loss": 0.5779, "step": 36320 }, { "epoch": 1.8044104499850997, "grad_norm": 0.095703125, "learning_rate": 0.000655651137379557, "loss": 0.6223, "step": 36330 }, { "epoch": 1.8049071222807191, "grad_norm": 0.109375, "learning_rate": 0.0006556114035959074, "loss": 0.5769, "step": 36340 }, { "epoch": 1.8054037945763386, "grad_norm": 0.158203125, "learning_rate": 0.0006555716698122579, "loss": 0.5798, "step": 36350 }, { "epoch": 1.8059004668719578, "grad_norm": 0.08154296875, "learning_rate": 0.0006555319360286084, "loss": 0.5818, "step": 36360 }, { "epoch": 1.8063971391675773, "grad_norm": 0.1611328125, "learning_rate": 0.0006554922022449588, "loss": 0.6004, "step": 36370 }, { "epoch": 1.8068938114631967, "grad_norm": 0.09619140625, "learning_rate": 0.0006554524684613093, "loss": 0.597, "step": 36380 }, { "epoch": 1.807390483758816, "grad_norm": 0.1142578125, "learning_rate": 0.0006554127346776597, "loss": 0.5771, "step": 36390 }, { "epoch": 1.8078871560544352, "grad_norm": 0.14453125, "learning_rate": 0.0006553730008940101, "loss": 0.5779, "step": 36400 }, { "epoch": 1.8083838283500546, "grad_norm": 0.10693359375, "learning_rate": 0.0006553332671103607, "loss": 0.596, "step": 36410 }, { "epoch": 1.808880500645674, "grad_norm": 0.10791015625, "learning_rate": 0.0006552935333267111, "loss": 0.6053, "step": 36420 }, { "epoch": 1.8093771729412933, "grad_norm": 0.11083984375, "learning_rate": 0.0006552537995430615, "loss": 0.5661, "step": 36430 }, { "epoch": 1.8098738452369125, "grad_norm": 0.08203125, "learning_rate": 0.0006552140657594119, "loss": 0.5906, "step": 36440 }, { "epoch": 1.8103705175325322, "grad_norm": 0.1240234375, "learning_rate": 0.0006551743319757624, "loss": 0.5939, "step": 36450 }, { "epoch": 1.8108671898281514, "grad_norm": 0.146484375, "learning_rate": 0.000655134598192113, "loss": 0.5859, "step": 36460 }, { "epoch": 1.8113638621237707, "grad_norm": 0.10400390625, "learning_rate": 0.0006550948644084633, "loss": 0.5821, "step": 36470 }, { "epoch": 1.81186053441939, "grad_norm": 0.09130859375, "learning_rate": 0.0006550551306248138, "loss": 0.5904, "step": 36480 }, { "epoch": 1.8123572067150095, "grad_norm": 0.087890625, "learning_rate": 0.0006550153968411643, "loss": 0.5741, "step": 36490 }, { "epoch": 1.8128538790106288, "grad_norm": 0.10546875, "learning_rate": 0.0006549756630575146, "loss": 0.6147, "step": 36500 }, { "epoch": 1.813350551306248, "grad_norm": 0.16015625, "learning_rate": 0.0006549359292738652, "loss": 0.5682, "step": 36510 }, { "epoch": 1.8138472236018675, "grad_norm": 0.09423828125, "learning_rate": 0.0006548961954902156, "loss": 0.6279, "step": 36520 }, { "epoch": 1.814343895897487, "grad_norm": 0.126953125, "learning_rate": 0.000654856461706566, "loss": 0.5933, "step": 36530 }, { "epoch": 1.8148405681931061, "grad_norm": 0.10498046875, "learning_rate": 0.0006548167279229165, "loss": 0.5712, "step": 36540 }, { "epoch": 1.8153372404887256, "grad_norm": 0.10107421875, "learning_rate": 0.0006547769941392669, "loss": 0.5993, "step": 36550 }, { "epoch": 1.815833912784345, "grad_norm": 0.095703125, "learning_rate": 0.0006547372603556173, "loss": 0.6267, "step": 36560 }, { "epoch": 1.8163305850799643, "grad_norm": 0.10009765625, "learning_rate": 0.0006546975265719679, "loss": 0.5799, "step": 36570 }, { "epoch": 1.8168272573755835, "grad_norm": 0.130859375, "learning_rate": 0.0006546577927883183, "loss": 0.6024, "step": 36580 }, { "epoch": 1.817323929671203, "grad_norm": 0.138671875, "learning_rate": 0.0006546180590046688, "loss": 0.6024, "step": 36590 }, { "epoch": 1.8178206019668224, "grad_norm": 0.10498046875, "learning_rate": 0.0006545783252210192, "loss": 0.5906, "step": 36600 }, { "epoch": 1.8183172742624416, "grad_norm": 0.10888671875, "learning_rate": 0.0006545385914373696, "loss": 0.5762, "step": 36610 }, { "epoch": 1.8188139465580608, "grad_norm": 0.1611328125, "learning_rate": 0.0006544988576537202, "loss": 0.5811, "step": 36620 }, { "epoch": 1.8193106188536805, "grad_norm": 0.1044921875, "learning_rate": 0.0006544591238700705, "loss": 0.5927, "step": 36630 }, { "epoch": 1.8198072911492997, "grad_norm": 0.08447265625, "learning_rate": 0.000654419390086421, "loss": 0.5813, "step": 36640 }, { "epoch": 1.820303963444919, "grad_norm": 0.0966796875, "learning_rate": 0.0006543796563027715, "loss": 0.5863, "step": 36650 }, { "epoch": 1.8208006357405384, "grad_norm": 0.10302734375, "learning_rate": 0.0006543399225191218, "loss": 0.5647, "step": 36660 }, { "epoch": 1.8212973080361579, "grad_norm": 0.1396484375, "learning_rate": 0.0006543001887354724, "loss": 0.5953, "step": 36670 }, { "epoch": 1.821793980331777, "grad_norm": 0.12060546875, "learning_rate": 0.0006542604549518229, "loss": 0.5822, "step": 36680 }, { "epoch": 1.8222906526273963, "grad_norm": 0.09716796875, "learning_rate": 0.0006542207211681732, "loss": 0.593, "step": 36690 }, { "epoch": 1.8227873249230158, "grad_norm": 0.1572265625, "learning_rate": 0.0006541809873845237, "loss": 0.5887, "step": 36700 }, { "epoch": 1.8232839972186352, "grad_norm": 0.10498046875, "learning_rate": 0.0006541412536008741, "loss": 0.6251, "step": 36710 }, { "epoch": 1.8237806695142544, "grad_norm": 0.140625, "learning_rate": 0.0006541015198172246, "loss": 0.5714, "step": 36720 }, { "epoch": 1.8242773418098739, "grad_norm": 0.1162109375, "learning_rate": 0.0006540617860335751, "loss": 0.5708, "step": 36730 }, { "epoch": 1.8247740141054933, "grad_norm": 0.09130859375, "learning_rate": 0.0006540220522499255, "loss": 0.5896, "step": 36740 }, { "epoch": 1.8252706864011126, "grad_norm": 0.09765625, "learning_rate": 0.000653982318466276, "loss": 0.578, "step": 36750 }, { "epoch": 1.8257673586967318, "grad_norm": 0.1796875, "learning_rate": 0.0006539425846826264, "loss": 0.6018, "step": 36760 }, { "epoch": 1.8262640309923512, "grad_norm": 0.1435546875, "learning_rate": 0.0006539028508989769, "loss": 0.6012, "step": 36770 }, { "epoch": 1.8267607032879707, "grad_norm": 0.18359375, "learning_rate": 0.0006538631171153274, "loss": 0.6244, "step": 36780 }, { "epoch": 1.82725737558359, "grad_norm": 0.0927734375, "learning_rate": 0.0006538233833316778, "loss": 0.5579, "step": 36790 }, { "epoch": 1.8277540478792091, "grad_norm": 0.111328125, "learning_rate": 0.0006537836495480282, "loss": 0.6142, "step": 36800 }, { "epoch": 1.8282507201748288, "grad_norm": 0.1396484375, "learning_rate": 0.0006537439157643787, "loss": 0.5613, "step": 36810 }, { "epoch": 1.828747392470448, "grad_norm": 0.08642578125, "learning_rate": 0.0006537041819807292, "loss": 0.6049, "step": 36820 }, { "epoch": 1.8292440647660673, "grad_norm": 0.10400390625, "learning_rate": 0.0006536644481970796, "loss": 0.5672, "step": 36830 }, { "epoch": 1.8297407370616867, "grad_norm": 0.0908203125, "learning_rate": 0.0006536247144134301, "loss": 0.5831, "step": 36840 }, { "epoch": 1.8302374093573062, "grad_norm": 0.1640625, "learning_rate": 0.0006535849806297804, "loss": 0.5675, "step": 36850 }, { "epoch": 1.8307340816529254, "grad_norm": 0.1005859375, "learning_rate": 0.0006535452468461309, "loss": 0.6041, "step": 36860 }, { "epoch": 1.8312307539485446, "grad_norm": 0.10888671875, "learning_rate": 0.0006535055130624815, "loss": 0.5579, "step": 36870 }, { "epoch": 1.831727426244164, "grad_norm": 0.1591796875, "learning_rate": 0.0006534657792788319, "loss": 0.5693, "step": 36880 }, { "epoch": 1.8322240985397835, "grad_norm": 0.1298828125, "learning_rate": 0.0006534260454951823, "loss": 0.5783, "step": 36890 }, { "epoch": 1.8327207708354027, "grad_norm": 0.10693359375, "learning_rate": 0.0006533863117115327, "loss": 0.5867, "step": 36900 }, { "epoch": 1.8332174431310222, "grad_norm": 0.1328125, "learning_rate": 0.0006533465779278832, "loss": 0.5905, "step": 36910 }, { "epoch": 1.8337141154266416, "grad_norm": 0.10205078125, "learning_rate": 0.0006533068441442337, "loss": 0.6061, "step": 36920 }, { "epoch": 1.8342107877222609, "grad_norm": 0.1259765625, "learning_rate": 0.0006532671103605841, "loss": 0.6096, "step": 36930 }, { "epoch": 1.83470746001788, "grad_norm": 0.10400390625, "learning_rate": 0.0006532273765769346, "loss": 0.576, "step": 36940 }, { "epoch": 1.8352041323134995, "grad_norm": 0.1005859375, "learning_rate": 0.000653187642793285, "loss": 0.6109, "step": 36950 }, { "epoch": 1.835700804609119, "grad_norm": 0.134765625, "learning_rate": 0.0006531479090096354, "loss": 0.568, "step": 36960 }, { "epoch": 1.8361974769047382, "grad_norm": 0.1416015625, "learning_rate": 0.000653108175225986, "loss": 0.5775, "step": 36970 }, { "epoch": 1.8366941492003575, "grad_norm": 0.08447265625, "learning_rate": 0.0006530684414423364, "loss": 0.5969, "step": 36980 }, { "epoch": 1.837190821495977, "grad_norm": 0.1005859375, "learning_rate": 0.0006530287076586868, "loss": 0.582, "step": 36990 }, { "epoch": 1.8376874937915963, "grad_norm": 0.185546875, "learning_rate": 0.0006529889738750373, "loss": 0.5714, "step": 37000 }, { "epoch": 1.8381841660872156, "grad_norm": 0.1142578125, "learning_rate": 0.0006529492400913877, "loss": 0.609, "step": 37010 }, { "epoch": 1.838680838382835, "grad_norm": 0.1044921875, "learning_rate": 0.0006529095063077382, "loss": 0.6116, "step": 37020 }, { "epoch": 1.8391775106784545, "grad_norm": 0.1884765625, "learning_rate": 0.0006528697725240887, "loss": 0.6088, "step": 37030 }, { "epoch": 1.8396741829740737, "grad_norm": 0.0947265625, "learning_rate": 0.0006528300387404391, "loss": 0.6042, "step": 37040 }, { "epoch": 1.840170855269693, "grad_norm": 0.1474609375, "learning_rate": 0.0006527903049567895, "loss": 0.6014, "step": 37050 }, { "epoch": 1.8406675275653124, "grad_norm": 0.1318359375, "learning_rate": 0.00065275057117314, "loss": 0.5956, "step": 37060 }, { "epoch": 1.8411641998609318, "grad_norm": 0.119140625, "learning_rate": 0.0006527108373894905, "loss": 0.5895, "step": 37070 }, { "epoch": 1.841660872156551, "grad_norm": 0.154296875, "learning_rate": 0.0006526711036058409, "loss": 0.6098, "step": 37080 }, { "epoch": 1.8421575444521705, "grad_norm": 0.0986328125, "learning_rate": 0.0006526313698221914, "loss": 0.585, "step": 37090 }, { "epoch": 1.84265421674779, "grad_norm": 0.1650390625, "learning_rate": 0.0006525916360385418, "loss": 0.6035, "step": 37100 }, { "epoch": 1.8431508890434092, "grad_norm": 0.15625, "learning_rate": 0.0006525519022548922, "loss": 0.5677, "step": 37110 }, { "epoch": 1.8436475613390284, "grad_norm": 0.09765625, "learning_rate": 0.0006525121684712427, "loss": 0.5505, "step": 37120 }, { "epoch": 1.8441442336346479, "grad_norm": 0.11865234375, "learning_rate": 0.0006524724346875932, "loss": 0.5833, "step": 37130 }, { "epoch": 1.8446409059302673, "grad_norm": 0.09130859375, "learning_rate": 0.0006524327009039436, "loss": 0.5779, "step": 37140 }, { "epoch": 1.8451375782258865, "grad_norm": 0.1357421875, "learning_rate": 0.000652392967120294, "loss": 0.5994, "step": 37150 }, { "epoch": 1.8456342505215058, "grad_norm": 0.09912109375, "learning_rate": 0.0006523532333366445, "loss": 0.5936, "step": 37160 }, { "epoch": 1.8461309228171252, "grad_norm": 0.08935546875, "learning_rate": 0.000652313499552995, "loss": 0.5973, "step": 37170 }, { "epoch": 1.8466275951127447, "grad_norm": 0.1787109375, "learning_rate": 0.0006522737657693454, "loss": 0.5972, "step": 37180 }, { "epoch": 1.8471242674083639, "grad_norm": 0.11376953125, "learning_rate": 0.0006522340319856959, "loss": 0.6032, "step": 37190 }, { "epoch": 1.8476209397039833, "grad_norm": 0.1005859375, "learning_rate": 0.0006521942982020463, "loss": 0.607, "step": 37200 }, { "epoch": 1.8481176119996028, "grad_norm": 0.0966796875, "learning_rate": 0.0006521545644183967, "loss": 0.6153, "step": 37210 }, { "epoch": 1.848614284295222, "grad_norm": 0.13671875, "learning_rate": 0.0006521148306347473, "loss": 0.5763, "step": 37220 }, { "epoch": 1.8491109565908412, "grad_norm": 0.10791015625, "learning_rate": 0.0006520750968510977, "loss": 0.5736, "step": 37230 }, { "epoch": 1.8496076288864607, "grad_norm": 0.1298828125, "learning_rate": 0.0006520353630674481, "loss": 0.6079, "step": 37240 }, { "epoch": 1.8501043011820801, "grad_norm": 0.11572265625, "learning_rate": 0.0006519956292837986, "loss": 0.5812, "step": 37250 }, { "epoch": 1.8506009734776994, "grad_norm": 0.146484375, "learning_rate": 0.000651955895500149, "loss": 0.6019, "step": 37260 }, { "epoch": 1.8510976457733188, "grad_norm": 0.130859375, "learning_rate": 0.0006519161617164995, "loss": 0.6007, "step": 37270 }, { "epoch": 1.8515943180689383, "grad_norm": 0.1669921875, "learning_rate": 0.00065187642793285, "loss": 0.5725, "step": 37280 }, { "epoch": 1.8520909903645575, "grad_norm": 0.10302734375, "learning_rate": 0.0006518366941492004, "loss": 0.5952, "step": 37290 }, { "epoch": 1.8525876626601767, "grad_norm": 0.1025390625, "learning_rate": 0.0006517969603655508, "loss": 0.5955, "step": 37300 }, { "epoch": 1.8530843349557962, "grad_norm": 0.09130859375, "learning_rate": 0.0006517572265819012, "loss": 0.5889, "step": 37310 }, { "epoch": 1.8535810072514156, "grad_norm": 0.1474609375, "learning_rate": 0.0006517174927982518, "loss": 0.5997, "step": 37320 }, { "epoch": 1.8540776795470348, "grad_norm": 0.0908203125, "learning_rate": 0.0006516777590146023, "loss": 0.5642, "step": 37330 }, { "epoch": 1.854574351842654, "grad_norm": 0.181640625, "learning_rate": 0.0006516380252309526, "loss": 0.595, "step": 37340 }, { "epoch": 1.8550710241382735, "grad_norm": 0.1416015625, "learning_rate": 0.0006515982914473031, "loss": 0.5922, "step": 37350 }, { "epoch": 1.855567696433893, "grad_norm": 0.10009765625, "learning_rate": 0.0006515585576636536, "loss": 0.5832, "step": 37360 }, { "epoch": 1.8560643687295122, "grad_norm": 0.1494140625, "learning_rate": 0.000651518823880004, "loss": 0.5978, "step": 37370 }, { "epoch": 1.8565610410251316, "grad_norm": 0.0849609375, "learning_rate": 0.0006514790900963545, "loss": 0.573, "step": 37380 }, { "epoch": 1.857057713320751, "grad_norm": 0.1494140625, "learning_rate": 0.0006514393563127049, "loss": 0.5722, "step": 37390 }, { "epoch": 1.8575543856163703, "grad_norm": 0.14453125, "learning_rate": 0.0006513996225290553, "loss": 0.6139, "step": 37400 }, { "epoch": 1.8580510579119895, "grad_norm": 0.109375, "learning_rate": 0.0006513598887454058, "loss": 0.5884, "step": 37410 }, { "epoch": 1.858547730207609, "grad_norm": 0.185546875, "learning_rate": 0.0006513201549617563, "loss": 0.6201, "step": 37420 }, { "epoch": 1.8590444025032284, "grad_norm": 0.10986328125, "learning_rate": 0.0006512804211781067, "loss": 0.5874, "step": 37430 }, { "epoch": 1.8595410747988477, "grad_norm": 0.09521484375, "learning_rate": 0.0006512406873944572, "loss": 0.6012, "step": 37440 }, { "epoch": 1.8600377470944671, "grad_norm": 0.091796875, "learning_rate": 0.0006512009536108076, "loss": 0.5957, "step": 37450 }, { "epoch": 1.8605344193900866, "grad_norm": 0.1171875, "learning_rate": 0.000651161219827158, "loss": 0.5916, "step": 37460 }, { "epoch": 1.8610310916857058, "grad_norm": 0.13671875, "learning_rate": 0.0006511214860435086, "loss": 0.586, "step": 37470 }, { "epoch": 1.861527763981325, "grad_norm": 0.10107421875, "learning_rate": 0.000651081752259859, "loss": 0.569, "step": 37480 }, { "epoch": 1.8620244362769445, "grad_norm": 0.11474609375, "learning_rate": 0.0006510420184762095, "loss": 0.5574, "step": 37490 }, { "epoch": 1.862521108572564, "grad_norm": 0.10595703125, "learning_rate": 0.0006510022846925598, "loss": 0.5899, "step": 37500 }, { "epoch": 1.8630177808681831, "grad_norm": 0.11376953125, "learning_rate": 0.0006509625509089103, "loss": 0.5996, "step": 37510 }, { "epoch": 1.8635144531638024, "grad_norm": 0.091796875, "learning_rate": 0.0006509228171252609, "loss": 0.6009, "step": 37520 }, { "epoch": 1.8640111254594218, "grad_norm": 0.140625, "learning_rate": 0.0006508830833416112, "loss": 0.5804, "step": 37530 }, { "epoch": 1.8645077977550413, "grad_norm": 0.11474609375, "learning_rate": 0.0006508433495579617, "loss": 0.6056, "step": 37540 }, { "epoch": 1.8650044700506605, "grad_norm": 0.12451171875, "learning_rate": 0.0006508036157743122, "loss": 0.5867, "step": 37550 }, { "epoch": 1.86550114234628, "grad_norm": 0.1240234375, "learning_rate": 0.0006507638819906625, "loss": 0.6001, "step": 37560 }, { "epoch": 1.8659978146418994, "grad_norm": 0.1357421875, "learning_rate": 0.000650724148207013, "loss": 0.5841, "step": 37570 }, { "epoch": 1.8664944869375186, "grad_norm": 0.1044921875, "learning_rate": 0.0006506844144233635, "loss": 0.5766, "step": 37580 }, { "epoch": 1.8669911592331379, "grad_norm": 0.1591796875, "learning_rate": 0.0006506446806397139, "loss": 0.5876, "step": 37590 }, { "epoch": 1.8674878315287573, "grad_norm": 0.154296875, "learning_rate": 0.0006506049468560644, "loss": 0.5907, "step": 37600 }, { "epoch": 1.8679845038243768, "grad_norm": 0.09326171875, "learning_rate": 0.0006505652130724148, "loss": 0.5754, "step": 37610 }, { "epoch": 1.868481176119996, "grad_norm": 0.1357421875, "learning_rate": 0.0006505254792887652, "loss": 0.5736, "step": 37620 }, { "epoch": 1.8689778484156154, "grad_norm": 0.09423828125, "learning_rate": 0.0006504857455051158, "loss": 0.6151, "step": 37630 }, { "epoch": 1.8694745207112349, "grad_norm": 0.140625, "learning_rate": 0.0006504460117214662, "loss": 0.5984, "step": 37640 }, { "epoch": 1.869971193006854, "grad_norm": 0.10498046875, "learning_rate": 0.0006504062779378167, "loss": 0.6013, "step": 37650 }, { "epoch": 1.8704678653024733, "grad_norm": 0.11865234375, "learning_rate": 0.0006503665441541671, "loss": 0.6051, "step": 37660 }, { "epoch": 1.8709645375980928, "grad_norm": 0.1396484375, "learning_rate": 0.0006503268103705176, "loss": 0.5964, "step": 37670 }, { "epoch": 1.8714612098937122, "grad_norm": 0.1171875, "learning_rate": 0.0006502870765868681, "loss": 0.5812, "step": 37680 }, { "epoch": 1.8719578821893315, "grad_norm": 0.0986328125, "learning_rate": 0.0006502473428032185, "loss": 0.5946, "step": 37690 }, { "epoch": 1.8724545544849507, "grad_norm": 0.0966796875, "learning_rate": 0.0006502076090195689, "loss": 0.5612, "step": 37700 }, { "epoch": 1.8729512267805701, "grad_norm": 0.126953125, "learning_rate": 0.0006501678752359194, "loss": 0.585, "step": 37710 }, { "epoch": 1.8734478990761896, "grad_norm": 0.09033203125, "learning_rate": 0.0006501281414522697, "loss": 0.5948, "step": 37720 }, { "epoch": 1.8739445713718088, "grad_norm": 0.1455078125, "learning_rate": 0.0006500884076686203, "loss": 0.5773, "step": 37730 }, { "epoch": 1.8744412436674283, "grad_norm": 0.0986328125, "learning_rate": 0.0006500486738849708, "loss": 0.5941, "step": 37740 }, { "epoch": 1.8749379159630477, "grad_norm": 0.10498046875, "learning_rate": 0.0006500089401013211, "loss": 0.5943, "step": 37750 }, { "epoch": 1.875434588258667, "grad_norm": 0.10302734375, "learning_rate": 0.0006499692063176716, "loss": 0.5959, "step": 37760 }, { "epoch": 1.8759312605542862, "grad_norm": 0.095703125, "learning_rate": 0.000649929472534022, "loss": 0.5773, "step": 37770 }, { "epoch": 1.8764279328499056, "grad_norm": 0.10693359375, "learning_rate": 0.0006498897387503726, "loss": 0.5837, "step": 37780 }, { "epoch": 1.876924605145525, "grad_norm": 0.115234375, "learning_rate": 0.000649850004966723, "loss": 0.5888, "step": 37790 }, { "epoch": 1.8774212774411443, "grad_norm": 0.10400390625, "learning_rate": 0.0006498102711830734, "loss": 0.6306, "step": 37800 }, { "epoch": 1.8779179497367637, "grad_norm": 0.12158203125, "learning_rate": 0.0006497705373994239, "loss": 0.5537, "step": 37810 }, { "epoch": 1.8784146220323832, "grad_norm": 0.10107421875, "learning_rate": 0.0006497308036157743, "loss": 0.5905, "step": 37820 }, { "epoch": 1.8789112943280024, "grad_norm": 0.09765625, "learning_rate": 0.0006496910698321248, "loss": 0.5726, "step": 37830 }, { "epoch": 1.8794079666236216, "grad_norm": 0.10986328125, "learning_rate": 0.0006496513360484753, "loss": 0.6031, "step": 37840 }, { "epoch": 1.879904638919241, "grad_norm": 0.1796875, "learning_rate": 0.0006496116022648257, "loss": 0.5986, "step": 37850 }, { "epoch": 1.8804013112148605, "grad_norm": 0.1240234375, "learning_rate": 0.0006495718684811761, "loss": 0.5791, "step": 37860 }, { "epoch": 1.8808979835104798, "grad_norm": 0.1259765625, "learning_rate": 0.0006495321346975267, "loss": 0.5831, "step": 37870 }, { "epoch": 1.881394655806099, "grad_norm": 0.0947265625, "learning_rate": 0.0006494924009138771, "loss": 0.5692, "step": 37880 }, { "epoch": 1.8818913281017184, "grad_norm": 0.09375, "learning_rate": 0.0006494526671302275, "loss": 0.58, "step": 37890 }, { "epoch": 1.882388000397338, "grad_norm": 0.1181640625, "learning_rate": 0.000649412933346578, "loss": 0.5919, "step": 37900 }, { "epoch": 1.8828846726929571, "grad_norm": 0.1396484375, "learning_rate": 0.0006493731995629283, "loss": 0.6251, "step": 37910 }, { "epoch": 1.8833813449885766, "grad_norm": 0.1669921875, "learning_rate": 0.0006493334657792788, "loss": 0.579, "step": 37920 }, { "epoch": 1.883878017284196, "grad_norm": 0.091796875, "learning_rate": 0.0006492937319956294, "loss": 0.6264, "step": 37930 }, { "epoch": 1.8843746895798152, "grad_norm": 0.1142578125, "learning_rate": 0.0006492539982119798, "loss": 0.5701, "step": 37940 }, { "epoch": 1.8848713618754345, "grad_norm": 0.1494140625, "learning_rate": 0.0006492142644283302, "loss": 0.604, "step": 37950 }, { "epoch": 1.885368034171054, "grad_norm": 0.09765625, "learning_rate": 0.0006491745306446807, "loss": 0.5774, "step": 37960 }, { "epoch": 1.8858647064666734, "grad_norm": 0.10595703125, "learning_rate": 0.0006491347968610311, "loss": 0.5748, "step": 37970 }, { "epoch": 1.8863613787622926, "grad_norm": 0.11767578125, "learning_rate": 0.0006490950630773816, "loss": 0.6001, "step": 37980 }, { "epoch": 1.886858051057912, "grad_norm": 0.1630859375, "learning_rate": 0.000649055329293732, "loss": 0.6058, "step": 37990 }, { "epoch": 1.8873547233535315, "grad_norm": 0.1552734375, "learning_rate": 0.0006490155955100825, "loss": 0.5758, "step": 38000 }, { "epoch": 1.8878513956491507, "grad_norm": 0.1181640625, "learning_rate": 0.0006489758617264329, "loss": 0.5878, "step": 38010 }, { "epoch": 1.88834806794477, "grad_norm": 0.11328125, "learning_rate": 0.0006489361279427833, "loss": 0.5932, "step": 38020 }, { "epoch": 1.8888447402403894, "grad_norm": 0.130859375, "learning_rate": 0.0006488963941591339, "loss": 0.5892, "step": 38030 }, { "epoch": 1.8893414125360088, "grad_norm": 0.09521484375, "learning_rate": 0.0006488566603754843, "loss": 0.5921, "step": 38040 }, { "epoch": 1.889838084831628, "grad_norm": 0.10498046875, "learning_rate": 0.0006488169265918347, "loss": 0.5968, "step": 38050 }, { "epoch": 1.8903347571272473, "grad_norm": 0.1474609375, "learning_rate": 0.0006487771928081852, "loss": 0.5817, "step": 38060 }, { "epoch": 1.8908314294228667, "grad_norm": 0.109375, "learning_rate": 0.0006487374590245356, "loss": 0.6205, "step": 38070 }, { "epoch": 1.8913281017184862, "grad_norm": 0.1171875, "learning_rate": 0.0006486977252408861, "loss": 0.5994, "step": 38080 }, { "epoch": 1.8918247740141054, "grad_norm": 0.10595703125, "learning_rate": 0.0006486579914572366, "loss": 0.5979, "step": 38090 }, { "epoch": 1.8923214463097249, "grad_norm": 0.162109375, "learning_rate": 0.000648618257673587, "loss": 0.6016, "step": 38100 }, { "epoch": 1.8928181186053443, "grad_norm": 0.091796875, "learning_rate": 0.0006485785238899374, "loss": 0.615, "step": 38110 }, { "epoch": 1.8933147909009636, "grad_norm": 0.11572265625, "learning_rate": 0.000648538790106288, "loss": 0.5877, "step": 38120 }, { "epoch": 1.8938114631965828, "grad_norm": 0.09423828125, "learning_rate": 0.0006484990563226384, "loss": 0.594, "step": 38130 }, { "epoch": 1.8943081354922022, "grad_norm": 0.1455078125, "learning_rate": 0.0006484593225389888, "loss": 0.5847, "step": 38140 }, { "epoch": 1.8948048077878217, "grad_norm": 0.12158203125, "learning_rate": 0.0006484195887553393, "loss": 0.5837, "step": 38150 }, { "epoch": 1.895301480083441, "grad_norm": 0.1376953125, "learning_rate": 0.0006483798549716897, "loss": 0.569, "step": 38160 }, { "epoch": 1.8957981523790601, "grad_norm": 0.1279296875, "learning_rate": 0.0006483401211880401, "loss": 0.5629, "step": 38170 }, { "epoch": 1.8962948246746798, "grad_norm": 0.16015625, "learning_rate": 0.0006483003874043906, "loss": 0.592, "step": 38180 }, { "epoch": 1.896791496970299, "grad_norm": 0.09326171875, "learning_rate": 0.0006482606536207411, "loss": 0.5981, "step": 38190 }, { "epoch": 1.8972881692659183, "grad_norm": 0.12353515625, "learning_rate": 0.0006482209198370915, "loss": 0.5692, "step": 38200 }, { "epoch": 1.8977848415615377, "grad_norm": 0.154296875, "learning_rate": 0.0006481811860534419, "loss": 0.582, "step": 38210 }, { "epoch": 1.8982815138571572, "grad_norm": 0.13671875, "learning_rate": 0.0006481414522697924, "loss": 0.5871, "step": 38220 }, { "epoch": 1.8987781861527764, "grad_norm": 0.1171875, "learning_rate": 0.000648101718486143, "loss": 0.5967, "step": 38230 }, { "epoch": 1.8992748584483956, "grad_norm": 0.1328125, "learning_rate": 0.0006480619847024933, "loss": 0.5846, "step": 38240 }, { "epoch": 1.899771530744015, "grad_norm": 0.1005859375, "learning_rate": 0.0006480222509188438, "loss": 0.5886, "step": 38250 }, { "epoch": 1.9002682030396345, "grad_norm": 0.1083984375, "learning_rate": 0.0006479825171351942, "loss": 0.593, "step": 38260 }, { "epoch": 1.9007648753352537, "grad_norm": 0.1630859375, "learning_rate": 0.0006479427833515446, "loss": 0.5926, "step": 38270 }, { "epoch": 1.9012615476308732, "grad_norm": 0.17578125, "learning_rate": 0.0006479030495678952, "loss": 0.5937, "step": 38280 }, { "epoch": 1.9017582199264926, "grad_norm": 0.1025390625, "learning_rate": 0.0006478633157842456, "loss": 0.5817, "step": 38290 }, { "epoch": 1.9022548922221119, "grad_norm": 0.09521484375, "learning_rate": 0.000647823582000596, "loss": 0.5878, "step": 38300 }, { "epoch": 1.902751564517731, "grad_norm": 0.1083984375, "learning_rate": 0.0006477838482169465, "loss": 0.6043, "step": 38310 }, { "epoch": 1.9032482368133505, "grad_norm": 0.10888671875, "learning_rate": 0.0006477441144332969, "loss": 0.5627, "step": 38320 }, { "epoch": 1.90374490910897, "grad_norm": 0.09765625, "learning_rate": 0.0006477043806496474, "loss": 0.578, "step": 38330 }, { "epoch": 1.9042415814045892, "grad_norm": 0.1708984375, "learning_rate": 0.0006476646468659979, "loss": 0.5814, "step": 38340 }, { "epoch": 1.9047382537002084, "grad_norm": 0.10595703125, "learning_rate": 0.0006476249130823483, "loss": 0.6276, "step": 38350 }, { "epoch": 1.905234925995828, "grad_norm": 0.1064453125, "learning_rate": 0.0006475851792986987, "loss": 0.5758, "step": 38360 }, { "epoch": 1.9057315982914473, "grad_norm": 0.1162109375, "learning_rate": 0.0006475454455150491, "loss": 0.5622, "step": 38370 }, { "epoch": 1.9062282705870666, "grad_norm": 0.103515625, "learning_rate": 0.0006475057117313997, "loss": 0.5823, "step": 38380 }, { "epoch": 1.906724942882686, "grad_norm": 0.11962890625, "learning_rate": 0.0006474659779477502, "loss": 0.5854, "step": 38390 }, { "epoch": 1.9072216151783055, "grad_norm": 0.1318359375, "learning_rate": 0.0006474262441641005, "loss": 0.6088, "step": 38400 }, { "epoch": 1.9077182874739247, "grad_norm": 0.09716796875, "learning_rate": 0.000647386510380451, "loss": 0.5652, "step": 38410 }, { "epoch": 1.908214959769544, "grad_norm": 0.10107421875, "learning_rate": 0.0006473467765968015, "loss": 0.6183, "step": 38420 }, { "epoch": 1.9087116320651634, "grad_norm": 0.111328125, "learning_rate": 0.0006473070428131519, "loss": 0.6121, "step": 38430 }, { "epoch": 1.9092083043607828, "grad_norm": 0.103515625, "learning_rate": 0.0006472673090295024, "loss": 0.5846, "step": 38440 }, { "epoch": 1.909704976656402, "grad_norm": 0.10693359375, "learning_rate": 0.0006472275752458528, "loss": 0.5984, "step": 38450 }, { "epoch": 1.9102016489520215, "grad_norm": 0.1083984375, "learning_rate": 0.0006471878414622032, "loss": 0.5955, "step": 38460 }, { "epoch": 1.910698321247641, "grad_norm": 0.130859375, "learning_rate": 0.0006471481076785537, "loss": 0.5847, "step": 38470 }, { "epoch": 1.9111949935432602, "grad_norm": 0.10546875, "learning_rate": 0.0006471083738949042, "loss": 0.561, "step": 38480 }, { "epoch": 1.9116916658388794, "grad_norm": 0.11572265625, "learning_rate": 0.0006470686401112546, "loss": 0.5744, "step": 38490 }, { "epoch": 1.9121883381344988, "grad_norm": 0.10888671875, "learning_rate": 0.0006470289063276051, "loss": 0.581, "step": 38500 }, { "epoch": 1.9126850104301183, "grad_norm": 0.1142578125, "learning_rate": 0.0006469891725439555, "loss": 0.5776, "step": 38510 }, { "epoch": 1.9131816827257375, "grad_norm": 0.2451171875, "learning_rate": 0.0006469494387603059, "loss": 0.5793, "step": 38520 }, { "epoch": 1.9136783550213567, "grad_norm": 0.1337890625, "learning_rate": 0.0006469097049766565, "loss": 0.5699, "step": 38530 }, { "epoch": 1.9141750273169764, "grad_norm": 0.09228515625, "learning_rate": 0.0006468699711930069, "loss": 0.584, "step": 38540 }, { "epoch": 1.9146716996125956, "grad_norm": 0.1376953125, "learning_rate": 0.0006468302374093574, "loss": 0.5715, "step": 38550 }, { "epoch": 1.9151683719082149, "grad_norm": 0.111328125, "learning_rate": 0.0006467905036257078, "loss": 0.5985, "step": 38560 }, { "epoch": 1.9156650442038343, "grad_norm": 0.099609375, "learning_rate": 0.0006467507698420582, "loss": 0.5708, "step": 38570 }, { "epoch": 1.9161617164994538, "grad_norm": 0.142578125, "learning_rate": 0.0006467110360584088, "loss": 0.5726, "step": 38580 }, { "epoch": 1.916658388795073, "grad_norm": 0.1689453125, "learning_rate": 0.0006466713022747591, "loss": 0.5902, "step": 38590 }, { "epoch": 1.9171550610906922, "grad_norm": 0.177734375, "learning_rate": 0.0006466315684911096, "loss": 0.5852, "step": 38600 }, { "epoch": 1.9176517333863117, "grad_norm": 0.1630859375, "learning_rate": 0.0006465918347074601, "loss": 0.5814, "step": 38610 }, { "epoch": 1.9181484056819311, "grad_norm": 0.11865234375, "learning_rate": 0.0006465521009238104, "loss": 0.5683, "step": 38620 }, { "epoch": 1.9186450779775504, "grad_norm": 0.09619140625, "learning_rate": 0.000646512367140161, "loss": 0.5598, "step": 38630 }, { "epoch": 1.9191417502731698, "grad_norm": 0.12890625, "learning_rate": 0.0006464726333565114, "loss": 0.5908, "step": 38640 }, { "epoch": 1.9196384225687892, "grad_norm": 0.11376953125, "learning_rate": 0.0006464328995728618, "loss": 0.5623, "step": 38650 }, { "epoch": 1.9201350948644085, "grad_norm": 0.10498046875, "learning_rate": 0.0006463931657892123, "loss": 0.5959, "step": 38660 }, { "epoch": 1.9206317671600277, "grad_norm": 0.171875, "learning_rate": 0.0006463534320055627, "loss": 0.5941, "step": 38670 }, { "epoch": 1.9211284394556472, "grad_norm": 0.10009765625, "learning_rate": 0.0006463136982219133, "loss": 0.6107, "step": 38680 }, { "epoch": 1.9216251117512666, "grad_norm": 0.103515625, "learning_rate": 0.0006462739644382637, "loss": 0.5719, "step": 38690 }, { "epoch": 1.9221217840468858, "grad_norm": 0.1025390625, "learning_rate": 0.0006462342306546141, "loss": 0.5812, "step": 38700 }, { "epoch": 1.922618456342505, "grad_norm": 0.12060546875, "learning_rate": 0.0006461944968709646, "loss": 0.6058, "step": 38710 }, { "epoch": 1.9231151286381247, "grad_norm": 0.09765625, "learning_rate": 0.000646154763087315, "loss": 0.5757, "step": 38720 }, { "epoch": 1.923611800933744, "grad_norm": 0.11328125, "learning_rate": 0.0006461150293036655, "loss": 0.5796, "step": 38730 }, { "epoch": 1.9241084732293632, "grad_norm": 0.0908203125, "learning_rate": 0.000646075295520016, "loss": 0.5969, "step": 38740 }, { "epoch": 1.9246051455249826, "grad_norm": 0.12353515625, "learning_rate": 0.0006460355617363664, "loss": 0.6022, "step": 38750 }, { "epoch": 1.925101817820602, "grad_norm": 0.109375, "learning_rate": 0.0006459958279527168, "loss": 0.5901, "step": 38760 }, { "epoch": 1.9255984901162213, "grad_norm": 0.10107421875, "learning_rate": 0.0006459560941690673, "loss": 0.5949, "step": 38770 }, { "epoch": 1.9260951624118405, "grad_norm": 0.1005859375, "learning_rate": 0.0006459163603854176, "loss": 0.565, "step": 38780 }, { "epoch": 1.92659183470746, "grad_norm": 0.16015625, "learning_rate": 0.0006458766266017682, "loss": 0.5685, "step": 38790 }, { "epoch": 1.9270885070030794, "grad_norm": 0.1083984375, "learning_rate": 0.0006458368928181187, "loss": 0.5727, "step": 38800 }, { "epoch": 1.9275851792986987, "grad_norm": 0.12060546875, "learning_rate": 0.000645797159034469, "loss": 0.5729, "step": 38810 }, { "epoch": 1.928081851594318, "grad_norm": 0.14453125, "learning_rate": 0.0006457574252508195, "loss": 0.5759, "step": 38820 }, { "epoch": 1.9285785238899376, "grad_norm": 0.13671875, "learning_rate": 0.00064571769146717, "loss": 0.5774, "step": 38830 }, { "epoch": 1.9290751961855568, "grad_norm": 0.09521484375, "learning_rate": 0.0006456779576835205, "loss": 0.596, "step": 38840 }, { "epoch": 1.929571868481176, "grad_norm": 0.087890625, "learning_rate": 0.0006456382238998709, "loss": 0.5776, "step": 38850 }, { "epoch": 1.9300685407767955, "grad_norm": 0.1416015625, "learning_rate": 0.0006455984901162213, "loss": 0.5806, "step": 38860 }, { "epoch": 1.930565213072415, "grad_norm": 0.1806640625, "learning_rate": 0.0006455587563325718, "loss": 0.5667, "step": 38870 }, { "epoch": 1.9310618853680341, "grad_norm": 0.123046875, "learning_rate": 0.0006455190225489223, "loss": 0.5859, "step": 38880 }, { "epoch": 1.9315585576636534, "grad_norm": 0.10400390625, "learning_rate": 0.0006454792887652727, "loss": 0.5778, "step": 38890 }, { "epoch": 1.932055229959273, "grad_norm": 0.109375, "learning_rate": 0.0006454395549816232, "loss": 0.5954, "step": 38900 }, { "epoch": 1.9325519022548923, "grad_norm": 0.1083984375, "learning_rate": 0.0006453998211979736, "loss": 0.6011, "step": 38910 }, { "epoch": 1.9330485745505115, "grad_norm": 0.0908203125, "learning_rate": 0.000645360087414324, "loss": 0.5982, "step": 38920 }, { "epoch": 1.933545246846131, "grad_norm": 0.12890625, "learning_rate": 0.0006453203536306746, "loss": 0.6026, "step": 38930 }, { "epoch": 1.9340419191417504, "grad_norm": 0.09716796875, "learning_rate": 0.000645280619847025, "loss": 0.5653, "step": 38940 }, { "epoch": 1.9345385914373696, "grad_norm": 0.09716796875, "learning_rate": 0.0006452408860633754, "loss": 0.5959, "step": 38950 }, { "epoch": 1.9350352637329888, "grad_norm": 0.0986328125, "learning_rate": 0.0006452011522797259, "loss": 0.6307, "step": 38960 }, { "epoch": 1.9355319360286083, "grad_norm": 0.1298828125, "learning_rate": 0.0006451614184960763, "loss": 0.6209, "step": 38970 }, { "epoch": 1.9360286083242277, "grad_norm": 0.109375, "learning_rate": 0.0006451216847124268, "loss": 0.5748, "step": 38980 }, { "epoch": 1.936525280619847, "grad_norm": 0.10302734375, "learning_rate": 0.0006450819509287773, "loss": 0.5798, "step": 38990 }, { "epoch": 1.9370219529154664, "grad_norm": 0.1279296875, "learning_rate": 0.0006450422171451277, "loss": 0.6054, "step": 39000 }, { "epoch": 1.9375186252110859, "grad_norm": 0.10009765625, "learning_rate": 0.0006450024833614781, "loss": 0.5824, "step": 39010 }, { "epoch": 1.938015297506705, "grad_norm": 0.095703125, "learning_rate": 0.0006449627495778286, "loss": 0.6086, "step": 39020 }, { "epoch": 1.9385119698023243, "grad_norm": 0.11328125, "learning_rate": 0.000644923015794179, "loss": 0.6038, "step": 39030 }, { "epoch": 1.9390086420979438, "grad_norm": 0.11181640625, "learning_rate": 0.0006448832820105295, "loss": 0.5818, "step": 39040 }, { "epoch": 1.9395053143935632, "grad_norm": 0.09375, "learning_rate": 0.0006448435482268799, "loss": 0.5801, "step": 39050 }, { "epoch": 1.9400019866891824, "grad_norm": 0.1669921875, "learning_rate": 0.0006448038144432304, "loss": 0.6027, "step": 39060 }, { "epoch": 1.9404986589848017, "grad_norm": 0.1572265625, "learning_rate": 0.0006447640806595808, "loss": 0.5803, "step": 39070 }, { "epoch": 1.9409953312804213, "grad_norm": 0.10595703125, "learning_rate": 0.0006447243468759312, "loss": 0.584, "step": 39080 }, { "epoch": 1.9414920035760406, "grad_norm": 0.1376953125, "learning_rate": 0.0006446846130922818, "loss": 0.5877, "step": 39090 }, { "epoch": 1.9419886758716598, "grad_norm": 0.16796875, "learning_rate": 0.0006446448793086322, "loss": 0.6003, "step": 39100 }, { "epoch": 1.9424853481672792, "grad_norm": 0.134765625, "learning_rate": 0.0006446051455249826, "loss": 0.5956, "step": 39110 }, { "epoch": 1.9429820204628987, "grad_norm": 0.11572265625, "learning_rate": 0.0006445654117413331, "loss": 0.5991, "step": 39120 }, { "epoch": 1.943478692758518, "grad_norm": 0.09765625, "learning_rate": 0.0006445256779576835, "loss": 0.5997, "step": 39130 }, { "epoch": 1.9439753650541371, "grad_norm": 0.14453125, "learning_rate": 0.000644485944174034, "loss": 0.5702, "step": 39140 }, { "epoch": 1.9444720373497566, "grad_norm": 0.10546875, "learning_rate": 0.0006444462103903845, "loss": 0.594, "step": 39150 }, { "epoch": 1.944968709645376, "grad_norm": 0.10107421875, "learning_rate": 0.0006444064766067349, "loss": 0.5686, "step": 39160 }, { "epoch": 1.9454653819409953, "grad_norm": 0.123046875, "learning_rate": 0.0006443667428230853, "loss": 0.6089, "step": 39170 }, { "epoch": 1.9459620542366147, "grad_norm": 0.1689453125, "learning_rate": 0.0006443270090394359, "loss": 0.5801, "step": 39180 }, { "epoch": 1.9464587265322342, "grad_norm": 0.09521484375, "learning_rate": 0.0006442872752557863, "loss": 0.5689, "step": 39190 }, { "epoch": 1.9469553988278534, "grad_norm": 0.09423828125, "learning_rate": 0.0006442475414721367, "loss": 0.5747, "step": 39200 }, { "epoch": 1.9474520711234726, "grad_norm": 0.09423828125, "learning_rate": 0.0006442078076884872, "loss": 0.5758, "step": 39210 }, { "epoch": 1.947948743419092, "grad_norm": 0.09326171875, "learning_rate": 0.0006441680739048376, "loss": 0.5688, "step": 39220 }, { "epoch": 1.9484454157147115, "grad_norm": 0.09912109375, "learning_rate": 0.000644128340121188, "loss": 0.5876, "step": 39230 }, { "epoch": 1.9489420880103308, "grad_norm": 0.0986328125, "learning_rate": 0.0006440886063375385, "loss": 0.565, "step": 39240 }, { "epoch": 1.94943876030595, "grad_norm": 0.1708984375, "learning_rate": 0.000644048872553889, "loss": 0.6057, "step": 39250 }, { "epoch": 1.9499354326015694, "grad_norm": 0.1083984375, "learning_rate": 0.0006440091387702394, "loss": 0.5712, "step": 39260 }, { "epoch": 1.9504321048971889, "grad_norm": 0.1611328125, "learning_rate": 0.0006439694049865898, "loss": 0.6069, "step": 39270 }, { "epoch": 1.950928777192808, "grad_norm": 0.09130859375, "learning_rate": 0.0006439296712029403, "loss": 0.5796, "step": 39280 }, { "epoch": 1.9514254494884276, "grad_norm": 0.11865234375, "learning_rate": 0.0006438899374192909, "loss": 0.5689, "step": 39290 }, { "epoch": 1.951922121784047, "grad_norm": 0.10498046875, "learning_rate": 0.0006438502036356412, "loss": 0.5608, "step": 39300 }, { "epoch": 1.9524187940796662, "grad_norm": 0.095703125, "learning_rate": 0.0006438104698519917, "loss": 0.5871, "step": 39310 }, { "epoch": 1.9529154663752855, "grad_norm": 0.1376953125, "learning_rate": 0.0006437707360683421, "loss": 0.6047, "step": 39320 }, { "epoch": 1.953412138670905, "grad_norm": 0.09326171875, "learning_rate": 0.0006437310022846925, "loss": 0.5833, "step": 39330 }, { "epoch": 1.9539088109665244, "grad_norm": 0.0927734375, "learning_rate": 0.0006436912685010431, "loss": 0.6137, "step": 39340 }, { "epoch": 1.9544054832621436, "grad_norm": 0.1669921875, "learning_rate": 0.0006436515347173935, "loss": 0.5939, "step": 39350 }, { "epoch": 1.954902155557763, "grad_norm": 0.1103515625, "learning_rate": 0.0006436118009337439, "loss": 0.5941, "step": 39360 }, { "epoch": 1.9553988278533825, "grad_norm": 0.08544921875, "learning_rate": 0.0006435720671500944, "loss": 0.5745, "step": 39370 }, { "epoch": 1.9558955001490017, "grad_norm": 0.1591796875, "learning_rate": 0.0006435323333664448, "loss": 0.5842, "step": 39380 }, { "epoch": 1.956392172444621, "grad_norm": 0.13671875, "learning_rate": 0.0006434925995827953, "loss": 0.5685, "step": 39390 }, { "epoch": 1.9568888447402404, "grad_norm": 0.12255859375, "learning_rate": 0.0006434528657991458, "loss": 0.5829, "step": 39400 }, { "epoch": 1.9573855170358598, "grad_norm": 0.0966796875, "learning_rate": 0.0006434131320154962, "loss": 0.6016, "step": 39410 }, { "epoch": 1.957882189331479, "grad_norm": 0.1162109375, "learning_rate": 0.0006433733982318467, "loss": 0.559, "step": 39420 }, { "epoch": 1.9583788616270983, "grad_norm": 0.09228515625, "learning_rate": 0.000643333664448197, "loss": 0.5896, "step": 39430 }, { "epoch": 1.9588755339227177, "grad_norm": 0.11083984375, "learning_rate": 0.0006432939306645476, "loss": 0.5995, "step": 39440 }, { "epoch": 1.9593722062183372, "grad_norm": 0.11572265625, "learning_rate": 0.0006432541968808981, "loss": 0.5651, "step": 39450 }, { "epoch": 1.9598688785139564, "grad_norm": 0.1015625, "learning_rate": 0.0006432144630972484, "loss": 0.5804, "step": 39460 }, { "epoch": 1.9603655508095759, "grad_norm": 0.1796875, "learning_rate": 0.0006431747293135989, "loss": 0.5789, "step": 39470 }, { "epoch": 1.9608622231051953, "grad_norm": 0.1796875, "learning_rate": 0.0006431349955299495, "loss": 0.6015, "step": 39480 }, { "epoch": 1.9613588954008145, "grad_norm": 0.11279296875, "learning_rate": 0.0006430952617462998, "loss": 0.5799, "step": 39490 }, { "epoch": 1.9618555676964338, "grad_norm": 0.0927734375, "learning_rate": 0.0006430555279626503, "loss": 0.6069, "step": 39500 }, { "epoch": 1.9623522399920532, "grad_norm": 0.1279296875, "learning_rate": 0.0006430157941790007, "loss": 0.589, "step": 39510 }, { "epoch": 1.9628489122876727, "grad_norm": 0.0908203125, "learning_rate": 0.0006429760603953511, "loss": 0.6054, "step": 39520 }, { "epoch": 1.963345584583292, "grad_norm": 0.1123046875, "learning_rate": 0.0006429363266117016, "loss": 0.5633, "step": 39530 }, { "epoch": 1.9638422568789113, "grad_norm": 0.08837890625, "learning_rate": 0.0006428965928280521, "loss": 0.5687, "step": 39540 }, { "epoch": 1.9643389291745308, "grad_norm": 0.1220703125, "learning_rate": 0.0006428568590444025, "loss": 0.6061, "step": 39550 }, { "epoch": 1.96483560147015, "grad_norm": 0.109375, "learning_rate": 0.000642817125260753, "loss": 0.5849, "step": 39560 }, { "epoch": 1.9653322737657692, "grad_norm": 0.10791015625, "learning_rate": 0.0006427773914771034, "loss": 0.5754, "step": 39570 }, { "epoch": 1.9658289460613887, "grad_norm": 0.1494140625, "learning_rate": 0.000642737657693454, "loss": 0.5902, "step": 39580 }, { "epoch": 1.9663256183570081, "grad_norm": 0.09521484375, "learning_rate": 0.0006426979239098044, "loss": 0.5815, "step": 39590 }, { "epoch": 1.9668222906526274, "grad_norm": 0.10693359375, "learning_rate": 0.0006426581901261548, "loss": 0.584, "step": 39600 }, { "epoch": 1.9673189629482466, "grad_norm": 0.09228515625, "learning_rate": 0.0006426184563425053, "loss": 0.5894, "step": 39610 }, { "epoch": 1.967815635243866, "grad_norm": 0.1015625, "learning_rate": 0.0006425787225588557, "loss": 0.5796, "step": 39620 }, { "epoch": 1.9683123075394855, "grad_norm": 0.1298828125, "learning_rate": 0.0006425389887752061, "loss": 0.6132, "step": 39630 }, { "epoch": 1.9688089798351047, "grad_norm": 0.10205078125, "learning_rate": 0.0006424992549915567, "loss": 0.5728, "step": 39640 }, { "epoch": 1.9693056521307242, "grad_norm": 0.119140625, "learning_rate": 0.000642459521207907, "loss": 0.5985, "step": 39650 }, { "epoch": 1.9698023244263436, "grad_norm": 0.09619140625, "learning_rate": 0.0006424197874242575, "loss": 0.5922, "step": 39660 }, { "epoch": 1.9702989967219628, "grad_norm": 0.10009765625, "learning_rate": 0.000642380053640608, "loss": 0.6181, "step": 39670 }, { "epoch": 1.970795669017582, "grad_norm": 0.1171875, "learning_rate": 0.0006423403198569583, "loss": 0.5975, "step": 39680 }, { "epoch": 1.9712923413132015, "grad_norm": 0.1318359375, "learning_rate": 0.0006423005860733089, "loss": 0.6012, "step": 39690 }, { "epoch": 1.971789013608821, "grad_norm": 0.09228515625, "learning_rate": 0.0006422608522896593, "loss": 0.567, "step": 39700 }, { "epoch": 1.9722856859044402, "grad_norm": 0.0986328125, "learning_rate": 0.0006422211185060097, "loss": 0.5704, "step": 39710 }, { "epoch": 1.9727823582000596, "grad_norm": 0.107421875, "learning_rate": 0.0006421813847223602, "loss": 0.6102, "step": 39720 }, { "epoch": 1.973279030495679, "grad_norm": 0.1298828125, "learning_rate": 0.0006421416509387106, "loss": 0.6061, "step": 39730 }, { "epoch": 1.9737757027912983, "grad_norm": 0.10498046875, "learning_rate": 0.0006421019171550612, "loss": 0.5885, "step": 39740 }, { "epoch": 1.9742723750869176, "grad_norm": 0.08642578125, "learning_rate": 0.0006420621833714116, "loss": 0.5902, "step": 39750 }, { "epoch": 1.974769047382537, "grad_norm": 0.1259765625, "learning_rate": 0.000642022449587762, "loss": 0.5694, "step": 39760 }, { "epoch": 1.9752657196781565, "grad_norm": 0.1083984375, "learning_rate": 0.0006419827158041125, "loss": 0.6038, "step": 39770 }, { "epoch": 1.9757623919737757, "grad_norm": 0.16015625, "learning_rate": 0.0006419429820204629, "loss": 0.5931, "step": 39780 }, { "epoch": 1.976259064269395, "grad_norm": 0.091796875, "learning_rate": 0.0006419032482368134, "loss": 0.5722, "step": 39790 }, { "epoch": 1.9767557365650144, "grad_norm": 0.09765625, "learning_rate": 0.0006418635144531639, "loss": 0.5876, "step": 39800 }, { "epoch": 1.9772524088606338, "grad_norm": 0.09423828125, "learning_rate": 0.0006418237806695143, "loss": 0.5602, "step": 39810 }, { "epoch": 1.977749081156253, "grad_norm": 0.1416015625, "learning_rate": 0.0006417840468858647, "loss": 0.5556, "step": 39820 }, { "epoch": 1.9782457534518725, "grad_norm": 0.10888671875, "learning_rate": 0.0006417443131022152, "loss": 0.5809, "step": 39830 }, { "epoch": 1.978742425747492, "grad_norm": 0.10986328125, "learning_rate": 0.0006417045793185656, "loss": 0.5805, "step": 39840 }, { "epoch": 1.9792390980431112, "grad_norm": 0.1005859375, "learning_rate": 0.0006416648455349161, "loss": 0.5848, "step": 39850 }, { "epoch": 1.9797357703387304, "grad_norm": 0.1025390625, "learning_rate": 0.0006416251117512666, "loss": 0.5994, "step": 39860 }, { "epoch": 1.9802324426343498, "grad_norm": 0.1064453125, "learning_rate": 0.000641585377967617, "loss": 0.5487, "step": 39870 }, { "epoch": 1.9807291149299693, "grad_norm": 0.10595703125, "learning_rate": 0.0006415456441839674, "loss": 0.5817, "step": 39880 }, { "epoch": 1.9812257872255885, "grad_norm": 0.15625, "learning_rate": 0.000641505910400318, "loss": 0.5628, "step": 39890 }, { "epoch": 1.981722459521208, "grad_norm": 0.0966796875, "learning_rate": 0.0006414661766166684, "loss": 0.5559, "step": 39900 }, { "epoch": 1.9822191318168274, "grad_norm": 0.1513671875, "learning_rate": 0.0006414264428330188, "loss": 0.579, "step": 39910 }, { "epoch": 1.9827158041124466, "grad_norm": 0.150390625, "learning_rate": 0.0006413867090493692, "loss": 0.5864, "step": 39920 }, { "epoch": 1.9832124764080659, "grad_norm": 0.119140625, "learning_rate": 0.0006413469752657197, "loss": 0.5798, "step": 39930 }, { "epoch": 1.9837091487036853, "grad_norm": 0.09716796875, "learning_rate": 0.0006413072414820702, "loss": 0.589, "step": 39940 }, { "epoch": 1.9842058209993048, "grad_norm": 0.10986328125, "learning_rate": 0.0006412675076984206, "loss": 0.6079, "step": 39950 }, { "epoch": 1.984702493294924, "grad_norm": 0.1630859375, "learning_rate": 0.0006412277739147711, "loss": 0.5823, "step": 39960 }, { "epoch": 1.9851991655905432, "grad_norm": 0.1181640625, "learning_rate": 0.0006411880401311215, "loss": 0.5776, "step": 39970 }, { "epoch": 1.9856958378861627, "grad_norm": 0.11083984375, "learning_rate": 0.0006411483063474719, "loss": 0.5796, "step": 39980 }, { "epoch": 1.9861925101817821, "grad_norm": 0.126953125, "learning_rate": 0.0006411085725638225, "loss": 0.5989, "step": 39990 }, { "epoch": 1.9866891824774013, "grad_norm": 0.1064453125, "learning_rate": 0.0006410688387801729, "loss": 0.5959, "step": 40000 }, { "epoch": 1.9871858547730208, "grad_norm": 0.1845703125, "learning_rate": 0.0006410291049965233, "loss": 0.5618, "step": 40010 }, { "epoch": 1.9876825270686402, "grad_norm": 0.0908203125, "learning_rate": 0.0006409893712128738, "loss": 0.5903, "step": 40020 }, { "epoch": 1.9881791993642595, "grad_norm": 0.08740234375, "learning_rate": 0.0006409496374292242, "loss": 0.5609, "step": 40030 }, { "epoch": 1.9886758716598787, "grad_norm": 0.12060546875, "learning_rate": 0.0006409099036455747, "loss": 0.5721, "step": 40040 }, { "epoch": 1.9891725439554981, "grad_norm": 0.111328125, "learning_rate": 0.0006408701698619252, "loss": 0.578, "step": 40050 }, { "epoch": 1.9896692162511176, "grad_norm": 0.10595703125, "learning_rate": 0.0006408304360782756, "loss": 0.5972, "step": 40060 }, { "epoch": 1.9901658885467368, "grad_norm": 0.11181640625, "learning_rate": 0.000640790702294626, "loss": 0.5915, "step": 40070 }, { "epoch": 1.9906625608423563, "grad_norm": 0.1142578125, "learning_rate": 0.0006407509685109765, "loss": 0.597, "step": 40080 }, { "epoch": 1.9911592331379757, "grad_norm": 0.173828125, "learning_rate": 0.000640711234727327, "loss": 0.5964, "step": 40090 }, { "epoch": 1.991655905433595, "grad_norm": 0.12109375, "learning_rate": 0.0006406715009436774, "loss": 0.5867, "step": 40100 }, { "epoch": 1.9921525777292142, "grad_norm": 0.12451171875, "learning_rate": 0.0006406317671600278, "loss": 0.5844, "step": 40110 }, { "epoch": 1.9926492500248336, "grad_norm": 0.08935546875, "learning_rate": 0.0006405920333763783, "loss": 0.6189, "step": 40120 }, { "epoch": 1.993145922320453, "grad_norm": 0.10009765625, "learning_rate": 0.0006405522995927287, "loss": 0.5775, "step": 40130 }, { "epoch": 1.9936425946160723, "grad_norm": 0.10595703125, "learning_rate": 0.0006405125658090792, "loss": 0.5647, "step": 40140 }, { "epoch": 1.9941392669116915, "grad_norm": 0.1044921875, "learning_rate": 0.0006404728320254297, "loss": 0.5857, "step": 40150 }, { "epoch": 1.994635939207311, "grad_norm": 0.1025390625, "learning_rate": 0.0006404330982417801, "loss": 0.5828, "step": 40160 }, { "epoch": 1.9951326115029304, "grad_norm": 0.0986328125, "learning_rate": 0.0006403933644581305, "loss": 0.5727, "step": 40170 }, { "epoch": 1.9956292837985496, "grad_norm": 0.1513671875, "learning_rate": 0.000640353630674481, "loss": 0.5982, "step": 40180 }, { "epoch": 1.996125956094169, "grad_norm": 0.12451171875, "learning_rate": 0.0006403138968908315, "loss": 0.5826, "step": 40190 }, { "epoch": 1.9966226283897885, "grad_norm": 0.0888671875, "learning_rate": 0.0006402741631071819, "loss": 0.5778, "step": 40200 }, { "epoch": 1.9971193006854078, "grad_norm": 0.12158203125, "learning_rate": 0.0006402344293235324, "loss": 0.5487, "step": 40210 }, { "epoch": 1.997615972981027, "grad_norm": 0.203125, "learning_rate": 0.0006401946955398828, "loss": 0.5894, "step": 40220 }, { "epoch": 1.9981126452766464, "grad_norm": 0.111328125, "learning_rate": 0.0006401549617562332, "loss": 0.6065, "step": 40230 }, { "epoch": 1.998609317572266, "grad_norm": 0.1005859375, "learning_rate": 0.0006401152279725838, "loss": 0.5924, "step": 40240 }, { "epoch": 1.9991059898678851, "grad_norm": 0.1298828125, "learning_rate": 0.0006400754941889342, "loss": 0.5868, "step": 40250 }, { "epoch": 1.9996026621635046, "grad_norm": 0.10400390625, "learning_rate": 0.0006400357604052846, "loss": 0.5794, "step": 40260 }, { "epoch": 2.000099334459124, "grad_norm": 0.0966796875, "learning_rate": 0.0006399960266216351, "loss": 0.5783, "step": 40270 }, { "epoch": 2.0005960067547433, "grad_norm": 0.09716796875, "learning_rate": 0.0006399562928379855, "loss": 0.5969, "step": 40280 }, { "epoch": 2.0010926790503625, "grad_norm": 0.1416015625, "learning_rate": 0.000639916559054336, "loss": 0.5477, "step": 40290 }, { "epoch": 2.0015893513459817, "grad_norm": 0.08984375, "learning_rate": 0.0006398768252706864, "loss": 0.5871, "step": 40300 }, { "epoch": 2.0020860236416014, "grad_norm": 0.1240234375, "learning_rate": 0.0006398370914870369, "loss": 0.5876, "step": 40310 }, { "epoch": 2.0025826959372206, "grad_norm": 0.09033203125, "learning_rate": 0.0006397973577033874, "loss": 0.5657, "step": 40320 }, { "epoch": 2.00307936823284, "grad_norm": 0.103515625, "learning_rate": 0.0006397576239197377, "loss": 0.5332, "step": 40330 }, { "epoch": 2.0035760405284595, "grad_norm": 0.11962890625, "learning_rate": 0.0006397178901360883, "loss": 0.5807, "step": 40340 }, { "epoch": 2.0040727128240787, "grad_norm": 0.10791015625, "learning_rate": 0.0006396781563524388, "loss": 0.5744, "step": 40350 }, { "epoch": 2.004569385119698, "grad_norm": 0.1083984375, "learning_rate": 0.0006396384225687891, "loss": 0.5867, "step": 40360 }, { "epoch": 2.005066057415317, "grad_norm": 0.09912109375, "learning_rate": 0.0006395986887851396, "loss": 0.583, "step": 40370 }, { "epoch": 2.005562729710937, "grad_norm": 0.1591796875, "learning_rate": 0.00063955895500149, "loss": 0.5734, "step": 40380 }, { "epoch": 2.006059402006556, "grad_norm": 0.09716796875, "learning_rate": 0.0006395192212178404, "loss": 0.567, "step": 40390 }, { "epoch": 2.0065560743021753, "grad_norm": 0.1865234375, "learning_rate": 0.000639479487434191, "loss": 0.5576, "step": 40400 }, { "epoch": 2.007052746597795, "grad_norm": 0.1123046875, "learning_rate": 0.0006394397536505414, "loss": 0.606, "step": 40410 }, { "epoch": 2.007549418893414, "grad_norm": 0.10205078125, "learning_rate": 0.0006394000198668918, "loss": 0.5705, "step": 40420 }, { "epoch": 2.0080460911890334, "grad_norm": 0.1611328125, "learning_rate": 0.0006393602860832423, "loss": 0.5697, "step": 40430 }, { "epoch": 2.0085427634846527, "grad_norm": 0.1376953125, "learning_rate": 0.0006393205522995927, "loss": 0.5761, "step": 40440 }, { "epoch": 2.0090394357802723, "grad_norm": 0.11376953125, "learning_rate": 0.0006392808185159432, "loss": 0.5834, "step": 40450 }, { "epoch": 2.0095361080758916, "grad_norm": 0.126953125, "learning_rate": 0.0006392410847322937, "loss": 0.5585, "step": 40460 }, { "epoch": 2.010032780371511, "grad_norm": 0.095703125, "learning_rate": 0.0006392013509486441, "loss": 0.5593, "step": 40470 }, { "epoch": 2.01052945266713, "grad_norm": 0.099609375, "learning_rate": 0.0006391616171649946, "loss": 0.553, "step": 40480 }, { "epoch": 2.0110261249627497, "grad_norm": 0.16796875, "learning_rate": 0.000639121883381345, "loss": 0.571, "step": 40490 }, { "epoch": 2.011522797258369, "grad_norm": 0.1875, "learning_rate": 0.0006390821495976955, "loss": 0.5681, "step": 40500 }, { "epoch": 2.012019469553988, "grad_norm": 0.1220703125, "learning_rate": 0.000639042415814046, "loss": 0.5786, "step": 40510 }, { "epoch": 2.012516141849608, "grad_norm": 0.09228515625, "learning_rate": 0.0006390026820303963, "loss": 0.5697, "step": 40520 }, { "epoch": 2.013012814145227, "grad_norm": 0.1064453125, "learning_rate": 0.0006389629482467468, "loss": 0.5741, "step": 40530 }, { "epoch": 2.0135094864408463, "grad_norm": 0.095703125, "learning_rate": 0.0006389232144630974, "loss": 0.5523, "step": 40540 }, { "epoch": 2.0140061587364655, "grad_norm": 0.10546875, "learning_rate": 0.0006388834806794477, "loss": 0.5956, "step": 40550 }, { "epoch": 2.014502831032085, "grad_norm": 0.125, "learning_rate": 0.0006388437468957982, "loss": 0.5698, "step": 40560 }, { "epoch": 2.0149995033277044, "grad_norm": 0.126953125, "learning_rate": 0.0006388040131121486, "loss": 0.5914, "step": 40570 }, { "epoch": 2.0154961756233236, "grad_norm": 0.10205078125, "learning_rate": 0.000638764279328499, "loss": 0.5779, "step": 40580 }, { "epoch": 2.0159928479189433, "grad_norm": 0.08837890625, "learning_rate": 0.0006387245455448495, "loss": 0.5761, "step": 40590 }, { "epoch": 2.0164895202145625, "grad_norm": 0.0908203125, "learning_rate": 0.0006386848117612, "loss": 0.5418, "step": 40600 }, { "epoch": 2.0169861925101817, "grad_norm": 0.12255859375, "learning_rate": 0.0006386450779775504, "loss": 0.5969, "step": 40610 }, { "epoch": 2.017482864805801, "grad_norm": 0.10888671875, "learning_rate": 0.0006386053441939009, "loss": 0.5575, "step": 40620 }, { "epoch": 2.0179795371014206, "grad_norm": 0.142578125, "learning_rate": 0.0006385656104102513, "loss": 0.5786, "step": 40630 }, { "epoch": 2.01847620939704, "grad_norm": 0.11474609375, "learning_rate": 0.0006385258766266019, "loss": 0.5354, "step": 40640 }, { "epoch": 2.018972881692659, "grad_norm": 0.12451171875, "learning_rate": 0.0006384861428429523, "loss": 0.608, "step": 40650 }, { "epoch": 2.0194695539882783, "grad_norm": 0.228515625, "learning_rate": 0.0006384464090593027, "loss": 0.5686, "step": 40660 }, { "epoch": 2.019966226283898, "grad_norm": 0.10986328125, "learning_rate": 0.0006384066752756532, "loss": 0.5938, "step": 40670 }, { "epoch": 2.020462898579517, "grad_norm": 0.138671875, "learning_rate": 0.0006383669414920036, "loss": 0.5727, "step": 40680 }, { "epoch": 2.0209595708751364, "grad_norm": 0.1474609375, "learning_rate": 0.000638327207708354, "loss": 0.5695, "step": 40690 }, { "epoch": 2.021456243170756, "grad_norm": 0.19140625, "learning_rate": 0.0006382874739247046, "loss": 0.5923, "step": 40700 }, { "epoch": 2.0219529154663753, "grad_norm": 0.099609375, "learning_rate": 0.0006382477401410549, "loss": 0.5913, "step": 40710 }, { "epoch": 2.0224495877619946, "grad_norm": 0.1142578125, "learning_rate": 0.0006382080063574054, "loss": 0.5862, "step": 40720 }, { "epoch": 2.022946260057614, "grad_norm": 0.1259765625, "learning_rate": 0.0006381682725737559, "loss": 0.6107, "step": 40730 }, { "epoch": 2.0234429323532335, "grad_norm": 0.10693359375, "learning_rate": 0.0006381285387901062, "loss": 0.6058, "step": 40740 }, { "epoch": 2.0239396046488527, "grad_norm": 0.11669921875, "learning_rate": 0.0006380888050064568, "loss": 0.5606, "step": 40750 }, { "epoch": 2.024436276944472, "grad_norm": 0.08984375, "learning_rate": 0.0006380490712228073, "loss": 0.572, "step": 40760 }, { "epoch": 2.0249329492400916, "grad_norm": 0.10302734375, "learning_rate": 0.0006380093374391577, "loss": 0.5759, "step": 40770 }, { "epoch": 2.025429621535711, "grad_norm": 0.10205078125, "learning_rate": 0.0006379696036555081, "loss": 0.6079, "step": 40780 }, { "epoch": 2.02592629383133, "grad_norm": 0.1142578125, "learning_rate": 0.0006379298698718585, "loss": 0.5937, "step": 40790 }, { "epoch": 2.0264229661269493, "grad_norm": 0.12890625, "learning_rate": 0.0006378901360882091, "loss": 0.5953, "step": 40800 }, { "epoch": 2.026919638422569, "grad_norm": 0.12890625, "learning_rate": 0.0006378504023045595, "loss": 0.5788, "step": 40810 }, { "epoch": 2.027416310718188, "grad_norm": 0.126953125, "learning_rate": 0.0006378106685209099, "loss": 0.5634, "step": 40820 }, { "epoch": 2.0279129830138074, "grad_norm": 0.259765625, "learning_rate": 0.0006377709347372604, "loss": 0.5917, "step": 40830 }, { "epoch": 2.0284096553094266, "grad_norm": 0.18359375, "learning_rate": 0.0006377312009536108, "loss": 0.5721, "step": 40840 }, { "epoch": 2.0289063276050463, "grad_norm": 0.09033203125, "learning_rate": 0.0006376914671699613, "loss": 0.5689, "step": 40850 }, { "epoch": 2.0294029999006655, "grad_norm": 0.10888671875, "learning_rate": 0.0006376517333863118, "loss": 0.5732, "step": 40860 }, { "epoch": 2.0298996721962848, "grad_norm": 0.11181640625, "learning_rate": 0.0006376119996026622, "loss": 0.5649, "step": 40870 }, { "epoch": 2.0303963444919044, "grad_norm": 0.10302734375, "learning_rate": 0.0006375722658190126, "loss": 0.5461, "step": 40880 }, { "epoch": 2.0308930167875237, "grad_norm": 0.10400390625, "learning_rate": 0.0006375325320353631, "loss": 0.5322, "step": 40890 }, { "epoch": 2.031389689083143, "grad_norm": 0.1962890625, "learning_rate": 0.0006374927982517135, "loss": 0.6145, "step": 40900 }, { "epoch": 2.031886361378762, "grad_norm": 0.109375, "learning_rate": 0.000637453064468064, "loss": 0.5766, "step": 40910 }, { "epoch": 2.032383033674382, "grad_norm": 0.1044921875, "learning_rate": 0.0006374133306844145, "loss": 0.563, "step": 40920 }, { "epoch": 2.032879705970001, "grad_norm": 0.10107421875, "learning_rate": 0.0006373735969007649, "loss": 0.5614, "step": 40930 }, { "epoch": 2.0333763782656202, "grad_norm": 0.09765625, "learning_rate": 0.0006373338631171153, "loss": 0.5522, "step": 40940 }, { "epoch": 2.03387305056124, "grad_norm": 0.1044921875, "learning_rate": 0.0006372941293334659, "loss": 0.5635, "step": 40950 }, { "epoch": 2.034369722856859, "grad_norm": 0.1142578125, "learning_rate": 0.0006372543955498163, "loss": 0.5879, "step": 40960 }, { "epoch": 2.0348663951524784, "grad_norm": 0.138671875, "learning_rate": 0.0006372146617661667, "loss": 0.5443, "step": 40970 }, { "epoch": 2.0353630674480976, "grad_norm": 0.10595703125, "learning_rate": 0.0006371749279825171, "loss": 0.5672, "step": 40980 }, { "epoch": 2.0358597397437173, "grad_norm": 0.1064453125, "learning_rate": 0.0006371351941988676, "loss": 0.5633, "step": 40990 }, { "epoch": 2.0363564120393365, "grad_norm": 0.1435546875, "learning_rate": 0.0006370954604152181, "loss": 0.588, "step": 41000 }, { "epoch": 2.0368530843349557, "grad_norm": 0.115234375, "learning_rate": 0.0006370557266315685, "loss": 0.5558, "step": 41010 }, { "epoch": 2.037349756630575, "grad_norm": 0.10107421875, "learning_rate": 0.000637015992847919, "loss": 0.5913, "step": 41020 }, { "epoch": 2.0378464289261946, "grad_norm": 0.10107421875, "learning_rate": 0.0006369762590642694, "loss": 0.5494, "step": 41030 }, { "epoch": 2.038343101221814, "grad_norm": 0.1279296875, "learning_rate": 0.0006369365252806198, "loss": 0.5775, "step": 41040 }, { "epoch": 2.038839773517433, "grad_norm": 0.1103515625, "learning_rate": 0.0006368967914969704, "loss": 0.5988, "step": 41050 }, { "epoch": 2.0393364458130527, "grad_norm": 0.12060546875, "learning_rate": 0.0006368570577133208, "loss": 0.5296, "step": 41060 }, { "epoch": 2.039833118108672, "grad_norm": 0.09130859375, "learning_rate": 0.0006368173239296712, "loss": 0.5901, "step": 41070 }, { "epoch": 2.040329790404291, "grad_norm": 0.0966796875, "learning_rate": 0.0006367775901460217, "loss": 0.559, "step": 41080 }, { "epoch": 2.0408264626999104, "grad_norm": 0.095703125, "learning_rate": 0.0006367378563623721, "loss": 0.5933, "step": 41090 }, { "epoch": 2.04132313499553, "grad_norm": 0.09814453125, "learning_rate": 0.0006366981225787226, "loss": 0.5288, "step": 41100 }, { "epoch": 2.0418198072911493, "grad_norm": 0.1044921875, "learning_rate": 0.0006366583887950731, "loss": 0.5744, "step": 41110 }, { "epoch": 2.0423164795867685, "grad_norm": 0.154296875, "learning_rate": 0.0006366186550114235, "loss": 0.5789, "step": 41120 }, { "epoch": 2.042813151882388, "grad_norm": 0.1806640625, "learning_rate": 0.0006365789212277739, "loss": 0.5652, "step": 41130 }, { "epoch": 2.0433098241780074, "grad_norm": 0.146484375, "learning_rate": 0.0006365391874441244, "loss": 0.5516, "step": 41140 }, { "epoch": 2.0438064964736267, "grad_norm": 0.140625, "learning_rate": 0.0006364994536604749, "loss": 0.5759, "step": 41150 }, { "epoch": 2.044303168769246, "grad_norm": 0.1064453125, "learning_rate": 0.0006364597198768253, "loss": 0.5565, "step": 41160 }, { "epoch": 2.0447998410648656, "grad_norm": 0.12451171875, "learning_rate": 0.0006364199860931757, "loss": 0.5778, "step": 41170 }, { "epoch": 2.045296513360485, "grad_norm": 0.09130859375, "learning_rate": 0.0006363802523095262, "loss": 0.5653, "step": 41180 }, { "epoch": 2.045793185656104, "grad_norm": 0.119140625, "learning_rate": 0.0006363405185258766, "loss": 0.5975, "step": 41190 }, { "epoch": 2.0462898579517232, "grad_norm": 0.1083984375, "learning_rate": 0.0006363007847422271, "loss": 0.5602, "step": 41200 }, { "epoch": 2.046786530247343, "grad_norm": 0.09326171875, "learning_rate": 0.0006362610509585776, "loss": 0.5594, "step": 41210 }, { "epoch": 2.047283202542962, "grad_norm": 0.1494140625, "learning_rate": 0.0006362213171749281, "loss": 0.5369, "step": 41220 }, { "epoch": 2.0477798748385814, "grad_norm": 0.162109375, "learning_rate": 0.0006361815833912784, "loss": 0.5881, "step": 41230 }, { "epoch": 2.048276547134201, "grad_norm": 0.1240234375, "learning_rate": 0.0006361418496076289, "loss": 0.5492, "step": 41240 }, { "epoch": 2.0487732194298203, "grad_norm": 0.09326171875, "learning_rate": 0.0006361021158239794, "loss": 0.5637, "step": 41250 }, { "epoch": 2.0492698917254395, "grad_norm": 0.29296875, "learning_rate": 0.0006360623820403298, "loss": 0.5624, "step": 41260 }, { "epoch": 2.0497665640210587, "grad_norm": 0.1357421875, "learning_rate": 0.0006360226482566803, "loss": 0.5966, "step": 41270 }, { "epoch": 2.0502632363166784, "grad_norm": 0.10009765625, "learning_rate": 0.0006359829144730307, "loss": 0.6032, "step": 41280 }, { "epoch": 2.0507599086122976, "grad_norm": 0.169921875, "learning_rate": 0.0006359431806893811, "loss": 0.594, "step": 41290 }, { "epoch": 2.051256580907917, "grad_norm": 0.10888671875, "learning_rate": 0.0006359034469057317, "loss": 0.5837, "step": 41300 }, { "epoch": 2.0517532532035365, "grad_norm": 0.1318359375, "learning_rate": 0.0006358637131220821, "loss": 0.5417, "step": 41310 }, { "epoch": 2.0522499254991557, "grad_norm": 0.11572265625, "learning_rate": 0.0006358239793384325, "loss": 0.6023, "step": 41320 }, { "epoch": 2.052746597794775, "grad_norm": 0.11767578125, "learning_rate": 0.000635784245554783, "loss": 0.5314, "step": 41330 }, { "epoch": 2.053243270090394, "grad_norm": 0.140625, "learning_rate": 0.0006357445117711334, "loss": 0.5968, "step": 41340 }, { "epoch": 2.053739942386014, "grad_norm": 0.1875, "learning_rate": 0.0006357047779874839, "loss": 0.558, "step": 41350 }, { "epoch": 2.054236614681633, "grad_norm": 0.08837890625, "learning_rate": 0.0006356650442038344, "loss": 0.5716, "step": 41360 }, { "epoch": 2.0547332869772523, "grad_norm": 0.09619140625, "learning_rate": 0.0006356253104201848, "loss": 0.5497, "step": 41370 }, { "epoch": 2.0552299592728716, "grad_norm": 0.10205078125, "learning_rate": 0.0006355855766365353, "loss": 0.5591, "step": 41380 }, { "epoch": 2.0557266315684912, "grad_norm": 0.134765625, "learning_rate": 0.0006355458428528856, "loss": 0.5912, "step": 41390 }, { "epoch": 2.0562233038641105, "grad_norm": 0.09228515625, "learning_rate": 0.0006355061090692362, "loss": 0.5788, "step": 41400 }, { "epoch": 2.0567199761597297, "grad_norm": 0.11279296875, "learning_rate": 0.0006354663752855867, "loss": 0.5456, "step": 41410 }, { "epoch": 2.0572166484553494, "grad_norm": 0.1865234375, "learning_rate": 0.000635426641501937, "loss": 0.5691, "step": 41420 }, { "epoch": 2.0577133207509686, "grad_norm": 0.1328125, "learning_rate": 0.0006353869077182875, "loss": 0.5643, "step": 41430 }, { "epoch": 2.058209993046588, "grad_norm": 0.08349609375, "learning_rate": 0.0006353471739346379, "loss": 0.5555, "step": 41440 }, { "epoch": 2.058706665342207, "grad_norm": 0.09521484375, "learning_rate": 0.0006353074401509884, "loss": 0.5833, "step": 41450 }, { "epoch": 2.0592033376378267, "grad_norm": 0.09423828125, "learning_rate": 0.0006352677063673389, "loss": 0.5867, "step": 41460 }, { "epoch": 2.059700009933446, "grad_norm": 0.09716796875, "learning_rate": 0.0006352279725836893, "loss": 0.5708, "step": 41470 }, { "epoch": 2.060196682229065, "grad_norm": 0.171875, "learning_rate": 0.0006351882388000397, "loss": 0.592, "step": 41480 }, { "epoch": 2.060693354524685, "grad_norm": 0.10400390625, "learning_rate": 0.0006351485050163902, "loss": 0.5738, "step": 41490 }, { "epoch": 2.061190026820304, "grad_norm": 0.09375, "learning_rate": 0.0006351087712327407, "loss": 0.5879, "step": 41500 }, { "epoch": 2.0616866991159233, "grad_norm": 0.126953125, "learning_rate": 0.0006350690374490912, "loss": 0.5452, "step": 41510 }, { "epoch": 2.0621833714115425, "grad_norm": 0.09765625, "learning_rate": 0.0006350293036654416, "loss": 0.5565, "step": 41520 }, { "epoch": 2.062680043707162, "grad_norm": 0.11865234375, "learning_rate": 0.000634989569881792, "loss": 0.5858, "step": 41530 }, { "epoch": 2.0631767160027814, "grad_norm": 0.146484375, "learning_rate": 0.0006349498360981425, "loss": 0.5948, "step": 41540 }, { "epoch": 2.0636733882984006, "grad_norm": 0.1015625, "learning_rate": 0.000634910102314493, "loss": 0.5653, "step": 41550 }, { "epoch": 2.06417006059402, "grad_norm": 0.0986328125, "learning_rate": 0.0006348703685308434, "loss": 0.5673, "step": 41560 }, { "epoch": 2.0646667328896395, "grad_norm": 0.1103515625, "learning_rate": 0.0006348306347471939, "loss": 0.5766, "step": 41570 }, { "epoch": 2.0651634051852588, "grad_norm": 0.130859375, "learning_rate": 0.0006347909009635442, "loss": 0.591, "step": 41580 }, { "epoch": 2.065660077480878, "grad_norm": 0.1240234375, "learning_rate": 0.0006347511671798947, "loss": 0.5991, "step": 41590 }, { "epoch": 2.0661567497764977, "grad_norm": 0.205078125, "learning_rate": 0.0006347114333962453, "loss": 0.5761, "step": 41600 }, { "epoch": 2.066653422072117, "grad_norm": 0.126953125, "learning_rate": 0.0006346716996125956, "loss": 0.5686, "step": 41610 }, { "epoch": 2.067150094367736, "grad_norm": 0.091796875, "learning_rate": 0.0006346319658289461, "loss": 0.5431, "step": 41620 }, { "epoch": 2.0676467666633553, "grad_norm": 0.1005859375, "learning_rate": 0.0006345922320452966, "loss": 0.5931, "step": 41630 }, { "epoch": 2.068143438958975, "grad_norm": 0.11767578125, "learning_rate": 0.0006345524982616469, "loss": 0.5624, "step": 41640 }, { "epoch": 2.0686401112545942, "grad_norm": 0.138671875, "learning_rate": 0.0006345127644779975, "loss": 0.6122, "step": 41650 }, { "epoch": 2.0691367835502135, "grad_norm": 0.10498046875, "learning_rate": 0.0006344730306943479, "loss": 0.5739, "step": 41660 }, { "epoch": 2.0696334558458327, "grad_norm": 0.189453125, "learning_rate": 0.0006344332969106984, "loss": 0.5955, "step": 41670 }, { "epoch": 2.0701301281414524, "grad_norm": 0.12255859375, "learning_rate": 0.0006343935631270488, "loss": 0.5844, "step": 41680 }, { "epoch": 2.0706268004370716, "grad_norm": 0.1943359375, "learning_rate": 0.0006343538293433992, "loss": 0.5685, "step": 41690 }, { "epoch": 2.071123472732691, "grad_norm": 0.0966796875, "learning_rate": 0.0006343140955597498, "loss": 0.5632, "step": 41700 }, { "epoch": 2.0716201450283105, "grad_norm": 0.10205078125, "learning_rate": 0.0006342743617761002, "loss": 0.5264, "step": 41710 }, { "epoch": 2.0721168173239297, "grad_norm": 0.1318359375, "learning_rate": 0.0006342346279924506, "loss": 0.5794, "step": 41720 }, { "epoch": 2.072613489619549, "grad_norm": 0.1083984375, "learning_rate": 0.0006341948942088011, "loss": 0.579, "step": 41730 }, { "epoch": 2.073110161915168, "grad_norm": 0.10693359375, "learning_rate": 0.0006341551604251515, "loss": 0.6261, "step": 41740 }, { "epoch": 2.073606834210788, "grad_norm": 0.1162109375, "learning_rate": 0.000634115426641502, "loss": 0.5718, "step": 41750 }, { "epoch": 2.074103506506407, "grad_norm": 0.166015625, "learning_rate": 0.0006340756928578525, "loss": 0.5665, "step": 41760 }, { "epoch": 2.0746001788020263, "grad_norm": 0.09326171875, "learning_rate": 0.0006340359590742028, "loss": 0.5789, "step": 41770 }, { "epoch": 2.075096851097646, "grad_norm": 0.1025390625, "learning_rate": 0.0006339962252905533, "loss": 0.5854, "step": 41780 }, { "epoch": 2.075593523393265, "grad_norm": 0.150390625, "learning_rate": 0.0006339564915069038, "loss": 0.5833, "step": 41790 }, { "epoch": 2.0760901956888844, "grad_norm": 0.09228515625, "learning_rate": 0.0006339167577232541, "loss": 0.5486, "step": 41800 }, { "epoch": 2.0765868679845036, "grad_norm": 0.134765625, "learning_rate": 0.0006338770239396047, "loss": 0.5727, "step": 41810 }, { "epoch": 2.0770835402801233, "grad_norm": 0.12060546875, "learning_rate": 0.0006338372901559552, "loss": 0.5696, "step": 41820 }, { "epoch": 2.0775802125757425, "grad_norm": 0.1064453125, "learning_rate": 0.0006337975563723056, "loss": 0.5609, "step": 41830 }, { "epoch": 2.0780768848713618, "grad_norm": 0.10888671875, "learning_rate": 0.000633757822588656, "loss": 0.5891, "step": 41840 }, { "epoch": 2.0785735571669814, "grad_norm": 0.10107421875, "learning_rate": 0.0006337180888050064, "loss": 0.5951, "step": 41850 }, { "epoch": 2.0790702294626007, "grad_norm": 0.0947265625, "learning_rate": 0.000633678355021357, "loss": 0.5704, "step": 41860 }, { "epoch": 2.07956690175822, "grad_norm": 0.10986328125, "learning_rate": 0.0006336386212377074, "loss": 0.5804, "step": 41870 }, { "epoch": 2.080063574053839, "grad_norm": 0.09765625, "learning_rate": 0.0006335988874540578, "loss": 0.5651, "step": 41880 }, { "epoch": 2.080560246349459, "grad_norm": 0.0869140625, "learning_rate": 0.0006335591536704083, "loss": 0.5728, "step": 41890 }, { "epoch": 2.081056918645078, "grad_norm": 0.09375, "learning_rate": 0.0006335194198867587, "loss": 0.5829, "step": 41900 }, { "epoch": 2.0815535909406973, "grad_norm": 0.1181640625, "learning_rate": 0.0006334796861031092, "loss": 0.5777, "step": 41910 }, { "epoch": 2.0820502632363165, "grad_norm": 0.11962890625, "learning_rate": 0.0006334399523194597, "loss": 0.5489, "step": 41920 }, { "epoch": 2.082546935531936, "grad_norm": 0.11669921875, "learning_rate": 0.0006334002185358101, "loss": 0.5876, "step": 41930 }, { "epoch": 2.0830436078275554, "grad_norm": 0.10009765625, "learning_rate": 0.0006333604847521605, "loss": 0.5451, "step": 41940 }, { "epoch": 2.0835402801231746, "grad_norm": 0.0888671875, "learning_rate": 0.000633320750968511, "loss": 0.6034, "step": 41950 }, { "epoch": 2.0840369524187943, "grad_norm": 0.1484375, "learning_rate": 0.0006332810171848615, "loss": 0.5812, "step": 41960 }, { "epoch": 2.0845336247144135, "grad_norm": 0.09716796875, "learning_rate": 0.0006332412834012119, "loss": 0.5701, "step": 41970 }, { "epoch": 2.0850302970100327, "grad_norm": 0.1474609375, "learning_rate": 0.0006332015496175624, "loss": 0.5569, "step": 41980 }, { "epoch": 2.085526969305652, "grad_norm": 0.1025390625, "learning_rate": 0.0006331618158339128, "loss": 0.6005, "step": 41990 }, { "epoch": 2.0860236416012716, "grad_norm": 0.091796875, "learning_rate": 0.0006331220820502632, "loss": 0.5837, "step": 42000 }, { "epoch": 2.086520313896891, "grad_norm": 0.1279296875, "learning_rate": 0.0006330823482666138, "loss": 0.576, "step": 42010 }, { "epoch": 2.08701698619251, "grad_norm": 0.146484375, "learning_rate": 0.0006330426144829642, "loss": 0.5952, "step": 42020 }, { "epoch": 2.0875136584881293, "grad_norm": 0.115234375, "learning_rate": 0.0006330028806993146, "loss": 0.6144, "step": 42030 }, { "epoch": 2.088010330783749, "grad_norm": 0.1328125, "learning_rate": 0.000632963146915665, "loss": 0.5758, "step": 42040 }, { "epoch": 2.088507003079368, "grad_norm": 0.1064453125, "learning_rate": 0.0006329234131320155, "loss": 0.5794, "step": 42050 }, { "epoch": 2.0890036753749874, "grad_norm": 0.09423828125, "learning_rate": 0.000632883679348366, "loss": 0.5563, "step": 42060 }, { "epoch": 2.089500347670607, "grad_norm": 0.09375, "learning_rate": 0.0006328439455647164, "loss": 0.5668, "step": 42070 }, { "epoch": 2.0899970199662263, "grad_norm": 0.10400390625, "learning_rate": 0.0006328042117810669, "loss": 0.5914, "step": 42080 }, { "epoch": 2.0904936922618456, "grad_norm": 0.1259765625, "learning_rate": 0.0006327644779974173, "loss": 0.5852, "step": 42090 }, { "epoch": 2.090990364557465, "grad_norm": 0.099609375, "learning_rate": 0.0006327247442137677, "loss": 0.559, "step": 42100 }, { "epoch": 2.0914870368530845, "grad_norm": 0.11328125, "learning_rate": 0.0006326850104301183, "loss": 0.5842, "step": 42110 }, { "epoch": 2.0919837091487037, "grad_norm": 0.10107421875, "learning_rate": 0.0006326452766464687, "loss": 0.5544, "step": 42120 }, { "epoch": 2.092480381444323, "grad_norm": 0.1201171875, "learning_rate": 0.0006326055428628191, "loss": 0.5769, "step": 42130 }, { "epoch": 2.0929770537399426, "grad_norm": 0.099609375, "learning_rate": 0.0006325658090791696, "loss": 0.5735, "step": 42140 }, { "epoch": 2.093473726035562, "grad_norm": 0.130859375, "learning_rate": 0.00063252607529552, "loss": 0.5723, "step": 42150 }, { "epoch": 2.093970398331181, "grad_norm": 0.0927734375, "learning_rate": 0.0006324863415118705, "loss": 0.5741, "step": 42160 }, { "epoch": 2.0944670706268003, "grad_norm": 0.11962890625, "learning_rate": 0.000632446607728221, "loss": 0.5562, "step": 42170 }, { "epoch": 2.09496374292242, "grad_norm": 0.09716796875, "learning_rate": 0.0006324068739445714, "loss": 0.5773, "step": 42180 }, { "epoch": 2.095460415218039, "grad_norm": 0.09765625, "learning_rate": 0.0006323671401609218, "loss": 0.587, "step": 42190 }, { "epoch": 2.0959570875136584, "grad_norm": 0.11474609375, "learning_rate": 0.0006323274063772723, "loss": 0.5982, "step": 42200 }, { "epoch": 2.096453759809278, "grad_norm": 0.13671875, "learning_rate": 0.0006322876725936228, "loss": 0.5631, "step": 42210 }, { "epoch": 2.0969504321048973, "grad_norm": 0.103515625, "learning_rate": 0.0006322479388099732, "loss": 0.5632, "step": 42220 }, { "epoch": 2.0974471044005165, "grad_norm": 0.09716796875, "learning_rate": 0.0006322082050263236, "loss": 0.5785, "step": 42230 }, { "epoch": 2.0979437766961357, "grad_norm": 0.0986328125, "learning_rate": 0.0006321684712426741, "loss": 0.575, "step": 42240 }, { "epoch": 2.0984404489917554, "grad_norm": 0.1376953125, "learning_rate": 0.0006321287374590245, "loss": 0.5626, "step": 42250 }, { "epoch": 2.0989371212873746, "grad_norm": 0.1513671875, "learning_rate": 0.000632089003675375, "loss": 0.6085, "step": 42260 }, { "epoch": 2.099433793582994, "grad_norm": 0.146484375, "learning_rate": 0.0006320492698917255, "loss": 0.5885, "step": 42270 }, { "epoch": 2.099930465878613, "grad_norm": 0.1064453125, "learning_rate": 0.000632009536108076, "loss": 0.5616, "step": 42280 }, { "epoch": 2.1004271381742328, "grad_norm": 0.162109375, "learning_rate": 0.0006319698023244263, "loss": 0.5764, "step": 42290 }, { "epoch": 2.100923810469852, "grad_norm": 0.1044921875, "learning_rate": 0.0006319300685407768, "loss": 0.5754, "step": 42300 }, { "epoch": 2.101420482765471, "grad_norm": 0.109375, "learning_rate": 0.0006318903347571273, "loss": 0.5673, "step": 42310 }, { "epoch": 2.101917155061091, "grad_norm": 0.12353515625, "learning_rate": 0.0006318506009734777, "loss": 0.5716, "step": 42320 }, { "epoch": 2.10241382735671, "grad_norm": 0.146484375, "learning_rate": 0.0006318108671898282, "loss": 0.5877, "step": 42330 }, { "epoch": 2.1029104996523293, "grad_norm": 0.1015625, "learning_rate": 0.0006317711334061786, "loss": 0.5538, "step": 42340 }, { "epoch": 2.1034071719479486, "grad_norm": 0.1181640625, "learning_rate": 0.000631731399622529, "loss": 0.5831, "step": 42350 }, { "epoch": 2.1039038442435682, "grad_norm": 0.09521484375, "learning_rate": 0.0006316916658388796, "loss": 0.5802, "step": 42360 }, { "epoch": 2.1044005165391875, "grad_norm": 0.1572265625, "learning_rate": 0.00063165193205523, "loss": 0.5466, "step": 42370 }, { "epoch": 2.1048971888348067, "grad_norm": 0.11669921875, "learning_rate": 0.0006316121982715804, "loss": 0.5852, "step": 42380 }, { "epoch": 2.105393861130426, "grad_norm": 0.091796875, "learning_rate": 0.0006315724644879309, "loss": 0.5649, "step": 42390 }, { "epoch": 2.1058905334260456, "grad_norm": 0.10107421875, "learning_rate": 0.0006315327307042813, "loss": 0.608, "step": 42400 }, { "epoch": 2.106387205721665, "grad_norm": 0.1259765625, "learning_rate": 0.0006314929969206319, "loss": 0.6046, "step": 42410 }, { "epoch": 2.106883878017284, "grad_norm": 0.09765625, "learning_rate": 0.0006314532631369823, "loss": 0.5915, "step": 42420 }, { "epoch": 2.1073805503129037, "grad_norm": 0.12890625, "learning_rate": 0.0006314135293533327, "loss": 0.579, "step": 42430 }, { "epoch": 2.107877222608523, "grad_norm": 0.08935546875, "learning_rate": 0.0006313737955696832, "loss": 0.5746, "step": 42440 }, { "epoch": 2.108373894904142, "grad_norm": 0.1123046875, "learning_rate": 0.0006313340617860335, "loss": 0.5734, "step": 42450 }, { "epoch": 2.1088705671997614, "grad_norm": 0.138671875, "learning_rate": 0.0006312943280023841, "loss": 0.5612, "step": 42460 }, { "epoch": 2.109367239495381, "grad_norm": 0.11962890625, "learning_rate": 0.0006312545942187346, "loss": 0.5787, "step": 42470 }, { "epoch": 2.1098639117910003, "grad_norm": 0.1396484375, "learning_rate": 0.0006312148604350849, "loss": 0.5601, "step": 42480 }, { "epoch": 2.1103605840866195, "grad_norm": 0.10400390625, "learning_rate": 0.0006311751266514354, "loss": 0.5422, "step": 42490 }, { "epoch": 2.110857256382239, "grad_norm": 0.1630859375, "learning_rate": 0.0006311353928677858, "loss": 0.592, "step": 42500 }, { "epoch": 2.1113539286778584, "grad_norm": 0.095703125, "learning_rate": 0.0006310956590841363, "loss": 0.5701, "step": 42510 }, { "epoch": 2.1118506009734777, "grad_norm": 0.09619140625, "learning_rate": 0.0006310559253004868, "loss": 0.5951, "step": 42520 }, { "epoch": 2.112347273269097, "grad_norm": 0.10986328125, "learning_rate": 0.0006310161915168372, "loss": 0.5869, "step": 42530 }, { "epoch": 2.1128439455647166, "grad_norm": 0.09814453125, "learning_rate": 0.0006309764577331876, "loss": 0.579, "step": 42540 }, { "epoch": 2.113340617860336, "grad_norm": 0.091796875, "learning_rate": 0.0006309367239495381, "loss": 0.5473, "step": 42550 }, { "epoch": 2.113837290155955, "grad_norm": 0.10595703125, "learning_rate": 0.0006308969901658886, "loss": 0.6045, "step": 42560 }, { "epoch": 2.1143339624515747, "grad_norm": 0.130859375, "learning_rate": 0.0006308572563822391, "loss": 0.5556, "step": 42570 }, { "epoch": 2.114830634747194, "grad_norm": 0.11669921875, "learning_rate": 0.0006308175225985895, "loss": 0.6007, "step": 42580 }, { "epoch": 2.115327307042813, "grad_norm": 0.12890625, "learning_rate": 0.0006307777888149399, "loss": 0.533, "step": 42590 }, { "epoch": 2.1158239793384324, "grad_norm": 0.09375, "learning_rate": 0.0006307380550312904, "loss": 0.548, "step": 42600 }, { "epoch": 2.116320651634052, "grad_norm": 0.138671875, "learning_rate": 0.0006306983212476409, "loss": 0.562, "step": 42610 }, { "epoch": 2.1168173239296713, "grad_norm": 0.1396484375, "learning_rate": 0.0006306585874639913, "loss": 0.5785, "step": 42620 }, { "epoch": 2.1173139962252905, "grad_norm": 0.205078125, "learning_rate": 0.0006306188536803418, "loss": 0.5775, "step": 42630 }, { "epoch": 2.1178106685209097, "grad_norm": 0.1328125, "learning_rate": 0.0006305791198966921, "loss": 0.5739, "step": 42640 }, { "epoch": 2.1183073408165294, "grad_norm": 0.11328125, "learning_rate": 0.0006305393861130426, "loss": 0.5826, "step": 42650 }, { "epoch": 2.1188040131121486, "grad_norm": 0.1328125, "learning_rate": 0.0006304996523293932, "loss": 0.5619, "step": 42660 }, { "epoch": 2.119300685407768, "grad_norm": 0.1142578125, "learning_rate": 0.0006304599185457435, "loss": 0.567, "step": 42670 }, { "epoch": 2.1197973577033875, "grad_norm": 0.09765625, "learning_rate": 0.000630420184762094, "loss": 0.5476, "step": 42680 }, { "epoch": 2.1202940299990067, "grad_norm": 0.10107421875, "learning_rate": 0.0006303804509784445, "loss": 0.5572, "step": 42690 }, { "epoch": 2.120790702294626, "grad_norm": 0.111328125, "learning_rate": 0.0006303407171947948, "loss": 0.5884, "step": 42700 }, { "epoch": 2.121287374590245, "grad_norm": 0.216796875, "learning_rate": 0.0006303009834111454, "loss": 0.591, "step": 42710 }, { "epoch": 2.121784046885865, "grad_norm": 0.158203125, "learning_rate": 0.0006302612496274958, "loss": 0.543, "step": 42720 }, { "epoch": 2.122280719181484, "grad_norm": 0.111328125, "learning_rate": 0.0006302215158438463, "loss": 0.5653, "step": 42730 }, { "epoch": 2.1227773914771033, "grad_norm": 0.12158203125, "learning_rate": 0.0006301817820601967, "loss": 0.6003, "step": 42740 }, { "epoch": 2.1232740637727225, "grad_norm": 0.134765625, "learning_rate": 0.0006301420482765471, "loss": 0.58, "step": 42750 }, { "epoch": 2.123770736068342, "grad_norm": 0.11328125, "learning_rate": 0.0006301023144928977, "loss": 0.5922, "step": 42760 }, { "epoch": 2.1242674083639614, "grad_norm": 0.09765625, "learning_rate": 0.0006300625807092481, "loss": 0.6117, "step": 42770 }, { "epoch": 2.1247640806595807, "grad_norm": 0.099609375, "learning_rate": 0.0006300228469255985, "loss": 0.576, "step": 42780 }, { "epoch": 2.1252607529552003, "grad_norm": 0.1044921875, "learning_rate": 0.000629983113141949, "loss": 0.5785, "step": 42790 }, { "epoch": 2.1257574252508196, "grad_norm": 0.1328125, "learning_rate": 0.0006299433793582994, "loss": 0.5766, "step": 42800 }, { "epoch": 2.126254097546439, "grad_norm": 0.0986328125, "learning_rate": 0.0006299036455746499, "loss": 0.5651, "step": 42810 }, { "epoch": 2.126750769842058, "grad_norm": 0.09716796875, "learning_rate": 0.0006298639117910004, "loss": 0.5559, "step": 42820 }, { "epoch": 2.1272474421376777, "grad_norm": 0.10205078125, "learning_rate": 0.0006298241780073507, "loss": 0.5722, "step": 42830 }, { "epoch": 2.127744114433297, "grad_norm": 0.0869140625, "learning_rate": 0.0006297844442237012, "loss": 0.5456, "step": 42840 }, { "epoch": 2.128240786728916, "grad_norm": 0.09716796875, "learning_rate": 0.0006297447104400517, "loss": 0.5748, "step": 42850 }, { "epoch": 2.128737459024536, "grad_norm": 0.1416015625, "learning_rate": 0.0006297049766564022, "loss": 0.6068, "step": 42860 }, { "epoch": 2.129234131320155, "grad_norm": 0.10791015625, "learning_rate": 0.0006296652428727526, "loss": 0.5902, "step": 42870 }, { "epoch": 2.1297308036157743, "grad_norm": 0.10205078125, "learning_rate": 0.0006296255090891031, "loss": 0.5661, "step": 42880 }, { "epoch": 2.1302274759113935, "grad_norm": 0.251953125, "learning_rate": 0.0006295857753054535, "loss": 0.5611, "step": 42890 }, { "epoch": 2.130724148207013, "grad_norm": 0.1337890625, "learning_rate": 0.0006295460415218039, "loss": 0.5864, "step": 42900 }, { "epoch": 2.1312208205026324, "grad_norm": 0.181640625, "learning_rate": 0.0006295063077381543, "loss": 0.5897, "step": 42910 }, { "epoch": 2.1317174927982516, "grad_norm": 0.138671875, "learning_rate": 0.0006294665739545049, "loss": 0.5606, "step": 42920 }, { "epoch": 2.1322141650938713, "grad_norm": 0.08544921875, "learning_rate": 0.0006294268401708553, "loss": 0.5695, "step": 42930 }, { "epoch": 2.1327108373894905, "grad_norm": 0.103515625, "learning_rate": 0.0006293871063872057, "loss": 0.5523, "step": 42940 }, { "epoch": 2.1332075096851097, "grad_norm": 0.11328125, "learning_rate": 0.0006293473726035562, "loss": 0.5766, "step": 42950 }, { "epoch": 2.133704181980729, "grad_norm": 0.09326171875, "learning_rate": 0.0006293076388199067, "loss": 0.5649, "step": 42960 }, { "epoch": 2.1342008542763486, "grad_norm": 0.1015625, "learning_rate": 0.0006292679050362571, "loss": 0.5848, "step": 42970 }, { "epoch": 2.134697526571968, "grad_norm": 0.138671875, "learning_rate": 0.0006292281712526076, "loss": 0.5745, "step": 42980 }, { "epoch": 2.135194198867587, "grad_norm": 0.1142578125, "learning_rate": 0.000629188437468958, "loss": 0.5442, "step": 42990 }, { "epoch": 2.1356908711632063, "grad_norm": 0.09765625, "learning_rate": 0.0006291487036853084, "loss": 0.5622, "step": 43000 }, { "epoch": 2.136187543458826, "grad_norm": 0.1279296875, "learning_rate": 0.000629108969901659, "loss": 0.5485, "step": 43010 }, { "epoch": 2.1366842157544452, "grad_norm": 0.09912109375, "learning_rate": 0.0006290692361180094, "loss": 0.5648, "step": 43020 }, { "epoch": 2.1371808880500645, "grad_norm": 0.1142578125, "learning_rate": 0.0006290295023343598, "loss": 0.5578, "step": 43030 }, { "epoch": 2.137677560345684, "grad_norm": 0.10546875, "learning_rate": 0.0006289897685507103, "loss": 0.5757, "step": 43040 }, { "epoch": 2.1381742326413034, "grad_norm": 0.10546875, "learning_rate": 0.0006289500347670607, "loss": 0.5711, "step": 43050 }, { "epoch": 2.1386709049369226, "grad_norm": 0.11962890625, "learning_rate": 0.0006289103009834111, "loss": 0.5715, "step": 43060 }, { "epoch": 2.139167577232542, "grad_norm": 0.1630859375, "learning_rate": 0.0006288705671997617, "loss": 0.5418, "step": 43070 }, { "epoch": 2.1396642495281615, "grad_norm": 0.09130859375, "learning_rate": 0.0006288308334161121, "loss": 0.5691, "step": 43080 }, { "epoch": 2.1401609218237807, "grad_norm": 0.1572265625, "learning_rate": 0.0006287910996324625, "loss": 0.5663, "step": 43090 }, { "epoch": 2.1406575941194, "grad_norm": 0.099609375, "learning_rate": 0.0006287513658488129, "loss": 0.5602, "step": 43100 }, { "epoch": 2.141154266415019, "grad_norm": 0.1435546875, "learning_rate": 0.0006287116320651635, "loss": 0.5615, "step": 43110 }, { "epoch": 2.141650938710639, "grad_norm": 0.09619140625, "learning_rate": 0.0006286718982815139, "loss": 0.5414, "step": 43120 }, { "epoch": 2.142147611006258, "grad_norm": 0.1220703125, "learning_rate": 0.0006286321644978643, "loss": 0.5916, "step": 43130 }, { "epoch": 2.1426442833018773, "grad_norm": 0.08740234375, "learning_rate": 0.0006285924307142148, "loss": 0.5707, "step": 43140 }, { "epoch": 2.143140955597497, "grad_norm": 0.0947265625, "learning_rate": 0.0006285526969305652, "loss": 0.5878, "step": 43150 }, { "epoch": 2.143637627893116, "grad_norm": 0.10302734375, "learning_rate": 0.0006285129631469156, "loss": 0.5597, "step": 43160 }, { "epoch": 2.1441343001887354, "grad_norm": 0.109375, "learning_rate": 0.0006284732293632662, "loss": 0.5935, "step": 43170 }, { "epoch": 2.1446309724843546, "grad_norm": 0.09814453125, "learning_rate": 0.0006284334955796166, "loss": 0.5761, "step": 43180 }, { "epoch": 2.1451276447799743, "grad_norm": 0.1376953125, "learning_rate": 0.000628393761795967, "loss": 0.573, "step": 43190 }, { "epoch": 2.1456243170755935, "grad_norm": 0.1787109375, "learning_rate": 0.0006283540280123175, "loss": 0.6216, "step": 43200 }, { "epoch": 2.1461209893712128, "grad_norm": 0.10400390625, "learning_rate": 0.000628314294228668, "loss": 0.5643, "step": 43210 }, { "epoch": 2.1466176616668324, "grad_norm": 0.1005859375, "learning_rate": 0.0006282745604450184, "loss": 0.5654, "step": 43220 }, { "epoch": 2.1471143339624517, "grad_norm": 0.1044921875, "learning_rate": 0.0006282348266613689, "loss": 0.586, "step": 43230 }, { "epoch": 2.147611006258071, "grad_norm": 0.1103515625, "learning_rate": 0.0006281950928777193, "loss": 0.5753, "step": 43240 }, { "epoch": 2.14810767855369, "grad_norm": 0.111328125, "learning_rate": 0.0006281553590940697, "loss": 0.5611, "step": 43250 }, { "epoch": 2.14860435084931, "grad_norm": 0.10107421875, "learning_rate": 0.0006281156253104203, "loss": 0.5534, "step": 43260 }, { "epoch": 2.149101023144929, "grad_norm": 0.1611328125, "learning_rate": 0.0006280758915267707, "loss": 0.5512, "step": 43270 }, { "epoch": 2.1495976954405482, "grad_norm": 0.115234375, "learning_rate": 0.0006280361577431211, "loss": 0.6057, "step": 43280 }, { "epoch": 2.150094367736168, "grad_norm": 0.11669921875, "learning_rate": 0.0006279964239594716, "loss": 0.6012, "step": 43290 }, { "epoch": 2.150591040031787, "grad_norm": 0.107421875, "learning_rate": 0.000627956690175822, "loss": 0.5826, "step": 43300 }, { "epoch": 2.1510877123274064, "grad_norm": 0.12890625, "learning_rate": 0.0006279169563921726, "loss": 0.5674, "step": 43310 }, { "epoch": 2.1515843846230256, "grad_norm": 0.095703125, "learning_rate": 0.0006278772226085229, "loss": 0.5706, "step": 43320 }, { "epoch": 2.1520810569186453, "grad_norm": 0.1318359375, "learning_rate": 0.0006278374888248734, "loss": 0.5826, "step": 43330 }, { "epoch": 2.1525777292142645, "grad_norm": 0.1572265625, "learning_rate": 0.0006277977550412239, "loss": 0.56, "step": 43340 }, { "epoch": 2.1530744015098837, "grad_norm": 0.09912109375, "learning_rate": 0.0006277580212575742, "loss": 0.6019, "step": 43350 }, { "epoch": 2.153571073805503, "grad_norm": 0.1103515625, "learning_rate": 0.0006277182874739247, "loss": 0.5726, "step": 43360 }, { "epoch": 2.1540677461011226, "grad_norm": 0.09619140625, "learning_rate": 0.0006276785536902752, "loss": 0.5966, "step": 43370 }, { "epoch": 2.154564418396742, "grad_norm": 0.09130859375, "learning_rate": 0.0006276388199066256, "loss": 0.5368, "step": 43380 }, { "epoch": 2.155061090692361, "grad_norm": 0.10107421875, "learning_rate": 0.0006275990861229761, "loss": 0.5404, "step": 43390 }, { "epoch": 2.1555577629879803, "grad_norm": 0.130859375, "learning_rate": 0.0006275593523393265, "loss": 0.5567, "step": 43400 }, { "epoch": 2.1560544352836, "grad_norm": 0.10888671875, "learning_rate": 0.0006275196185556769, "loss": 0.5695, "step": 43410 }, { "epoch": 2.156551107579219, "grad_norm": 0.09765625, "learning_rate": 0.0006274798847720275, "loss": 0.5826, "step": 43420 }, { "epoch": 2.1570477798748384, "grad_norm": 0.107421875, "learning_rate": 0.0006274401509883779, "loss": 0.5905, "step": 43430 }, { "epoch": 2.157544452170458, "grad_norm": 0.1015625, "learning_rate": 0.0006274004172047283, "loss": 0.572, "step": 43440 }, { "epoch": 2.1580411244660773, "grad_norm": 0.142578125, "learning_rate": 0.0006273606834210788, "loss": 0.583, "step": 43450 }, { "epoch": 2.1585377967616965, "grad_norm": 0.181640625, "learning_rate": 0.0006273209496374292, "loss": 0.5851, "step": 43460 }, { "epoch": 2.1590344690573158, "grad_norm": 0.1318359375, "learning_rate": 0.0006272812158537798, "loss": 0.5956, "step": 43470 }, { "epoch": 2.1595311413529354, "grad_norm": 0.10400390625, "learning_rate": 0.0006272414820701302, "loss": 0.5583, "step": 43480 }, { "epoch": 2.1600278136485547, "grad_norm": 0.111328125, "learning_rate": 0.0006272017482864806, "loss": 0.6028, "step": 43490 }, { "epoch": 2.160524485944174, "grad_norm": 0.2138671875, "learning_rate": 0.0006271620145028311, "loss": 0.5814, "step": 43500 }, { "epoch": 2.1610211582397936, "grad_norm": 0.1298828125, "learning_rate": 0.0006271222807191814, "loss": 0.553, "step": 43510 }, { "epoch": 2.161517830535413, "grad_norm": 0.1337890625, "learning_rate": 0.000627082546935532, "loss": 0.5377, "step": 43520 }, { "epoch": 2.162014502831032, "grad_norm": 0.1826171875, "learning_rate": 0.0006270428131518825, "loss": 0.5696, "step": 43530 }, { "epoch": 2.1625111751266513, "grad_norm": 0.10205078125, "learning_rate": 0.0006270030793682328, "loss": 0.534, "step": 43540 }, { "epoch": 2.163007847422271, "grad_norm": 0.10888671875, "learning_rate": 0.0006269633455845833, "loss": 0.5761, "step": 43550 }, { "epoch": 2.16350451971789, "grad_norm": 0.12890625, "learning_rate": 0.0006269236118009338, "loss": 0.5962, "step": 43560 }, { "epoch": 2.1640011920135094, "grad_norm": 0.10791015625, "learning_rate": 0.0006268838780172842, "loss": 0.5698, "step": 43570 }, { "epoch": 2.164497864309129, "grad_norm": 0.10302734375, "learning_rate": 0.0006268441442336347, "loss": 0.5536, "step": 43580 }, { "epoch": 2.1649945366047483, "grad_norm": 0.09814453125, "learning_rate": 0.0006268044104499851, "loss": 0.5618, "step": 43590 }, { "epoch": 2.1654912089003675, "grad_norm": 0.11962890625, "learning_rate": 0.0006267646766663356, "loss": 0.5572, "step": 43600 }, { "epoch": 2.1659878811959867, "grad_norm": 0.115234375, "learning_rate": 0.000626724942882686, "loss": 0.5328, "step": 43610 }, { "epoch": 2.1664845534916064, "grad_norm": 0.11376953125, "learning_rate": 0.0006266852090990365, "loss": 0.5961, "step": 43620 }, { "epoch": 2.1669812257872256, "grad_norm": 0.1025390625, "learning_rate": 0.000626645475315387, "loss": 0.5593, "step": 43630 }, { "epoch": 2.167477898082845, "grad_norm": 0.10107421875, "learning_rate": 0.0006266057415317374, "loss": 0.5712, "step": 43640 }, { "epoch": 2.167974570378464, "grad_norm": 0.1416015625, "learning_rate": 0.0006265660077480878, "loss": 0.5823, "step": 43650 }, { "epoch": 2.1684712426740838, "grad_norm": 0.1005859375, "learning_rate": 0.0006265262739644383, "loss": 0.5414, "step": 43660 }, { "epoch": 2.168967914969703, "grad_norm": 0.10107421875, "learning_rate": 0.0006264865401807888, "loss": 0.5812, "step": 43670 }, { "epoch": 2.169464587265322, "grad_norm": 0.19140625, "learning_rate": 0.0006264468063971392, "loss": 0.5957, "step": 43680 }, { "epoch": 2.169961259560942, "grad_norm": 0.1259765625, "learning_rate": 0.0006264070726134897, "loss": 0.5713, "step": 43690 }, { "epoch": 2.170457931856561, "grad_norm": 0.11328125, "learning_rate": 0.00062636733882984, "loss": 0.5753, "step": 43700 }, { "epoch": 2.1709546041521803, "grad_norm": 0.10107421875, "learning_rate": 0.0006263276050461905, "loss": 0.5447, "step": 43710 }, { "epoch": 2.1714512764477996, "grad_norm": 0.10107421875, "learning_rate": 0.0006262878712625411, "loss": 0.5614, "step": 43720 }, { "epoch": 2.1719479487434192, "grad_norm": 0.1142578125, "learning_rate": 0.0006262481374788914, "loss": 0.5699, "step": 43730 }, { "epoch": 2.1724446210390385, "grad_norm": 0.09912109375, "learning_rate": 0.0006262084036952419, "loss": 0.5488, "step": 43740 }, { "epoch": 2.1729412933346577, "grad_norm": 0.140625, "learning_rate": 0.0006261686699115924, "loss": 0.5825, "step": 43750 }, { "epoch": 2.173437965630277, "grad_norm": 0.0986328125, "learning_rate": 0.0006261289361279428, "loss": 0.5778, "step": 43760 }, { "epoch": 2.1739346379258966, "grad_norm": 0.1494140625, "learning_rate": 0.0006260892023442933, "loss": 0.5551, "step": 43770 }, { "epoch": 2.174431310221516, "grad_norm": 0.1044921875, "learning_rate": 0.0006260494685606437, "loss": 0.5708, "step": 43780 }, { "epoch": 2.174927982517135, "grad_norm": 0.08837890625, "learning_rate": 0.0006260097347769942, "loss": 0.5614, "step": 43790 }, { "epoch": 2.1754246548127547, "grad_norm": 0.0927734375, "learning_rate": 0.0006259700009933446, "loss": 0.5647, "step": 43800 }, { "epoch": 2.175921327108374, "grad_norm": 0.11279296875, "learning_rate": 0.000625930267209695, "loss": 0.5616, "step": 43810 }, { "epoch": 2.176417999403993, "grad_norm": 0.09765625, "learning_rate": 0.0006258905334260456, "loss": 0.5607, "step": 43820 }, { "epoch": 2.1769146716996124, "grad_norm": 0.09765625, "learning_rate": 0.000625850799642396, "loss": 0.568, "step": 43830 }, { "epoch": 2.177411343995232, "grad_norm": 0.10595703125, "learning_rate": 0.0006258110658587464, "loss": 0.5598, "step": 43840 }, { "epoch": 2.1779080162908513, "grad_norm": 0.09423828125, "learning_rate": 0.0006257713320750969, "loss": 0.5484, "step": 43850 }, { "epoch": 2.1784046885864705, "grad_norm": 0.1083984375, "learning_rate": 0.0006257315982914473, "loss": 0.572, "step": 43860 }, { "epoch": 2.17890136088209, "grad_norm": 0.1259765625, "learning_rate": 0.0006256918645077978, "loss": 0.5755, "step": 43870 }, { "epoch": 2.1793980331777094, "grad_norm": 0.099609375, "learning_rate": 0.0006256521307241483, "loss": 0.5544, "step": 43880 }, { "epoch": 2.1798947054733286, "grad_norm": 0.1005859375, "learning_rate": 0.0006256123969404987, "loss": 0.5519, "step": 43890 }, { "epoch": 2.180391377768948, "grad_norm": 0.09716796875, "learning_rate": 0.0006255726631568491, "loss": 0.5935, "step": 43900 }, { "epoch": 2.1808880500645675, "grad_norm": 0.12451171875, "learning_rate": 0.0006255329293731996, "loss": 0.562, "step": 43910 }, { "epoch": 2.1813847223601868, "grad_norm": 0.1455078125, "learning_rate": 0.0006254931955895501, "loss": 0.562, "step": 43920 }, { "epoch": 2.181881394655806, "grad_norm": 0.1259765625, "learning_rate": 0.0006254534618059005, "loss": 0.5664, "step": 43930 }, { "epoch": 2.1823780669514257, "grad_norm": 0.10498046875, "learning_rate": 0.000625413728022251, "loss": 0.5742, "step": 43940 }, { "epoch": 2.182874739247045, "grad_norm": 0.09521484375, "learning_rate": 0.0006253739942386014, "loss": 0.5476, "step": 43950 }, { "epoch": 2.183371411542664, "grad_norm": 0.09619140625, "learning_rate": 0.0006253342604549518, "loss": 0.6065, "step": 43960 }, { "epoch": 2.1838680838382833, "grad_norm": 0.12451171875, "learning_rate": 0.0006252945266713023, "loss": 0.5835, "step": 43970 }, { "epoch": 2.184364756133903, "grad_norm": 0.123046875, "learning_rate": 0.0006252547928876528, "loss": 0.5412, "step": 43980 }, { "epoch": 2.1848614284295222, "grad_norm": 0.185546875, "learning_rate": 0.0006252150591040032, "loss": 0.5947, "step": 43990 }, { "epoch": 2.1853581007251415, "grad_norm": 0.130859375, "learning_rate": 0.0006251753253203536, "loss": 0.57, "step": 44000 }, { "epoch": 2.1858547730207607, "grad_norm": 0.13671875, "learning_rate": 0.0006251355915367041, "loss": 0.5734, "step": 44010 }, { "epoch": 2.1863514453163804, "grad_norm": 0.0947265625, "learning_rate": 0.0006250958577530546, "loss": 0.5586, "step": 44020 }, { "epoch": 2.1868481176119996, "grad_norm": 0.09521484375, "learning_rate": 0.000625056123969405, "loss": 0.5857, "step": 44030 }, { "epoch": 2.187344789907619, "grad_norm": 0.09033203125, "learning_rate": 0.0006250163901857555, "loss": 0.5902, "step": 44040 }, { "epoch": 2.1878414622032385, "grad_norm": 0.09912109375, "learning_rate": 0.0006249766564021059, "loss": 0.5731, "step": 44050 }, { "epoch": 2.1883381344988577, "grad_norm": 0.111328125, "learning_rate": 0.0006249369226184563, "loss": 0.5886, "step": 44060 }, { "epoch": 2.188834806794477, "grad_norm": 0.1337890625, "learning_rate": 0.0006248971888348069, "loss": 0.5806, "step": 44070 }, { "epoch": 2.189331479090096, "grad_norm": 0.12890625, "learning_rate": 0.0006248574550511573, "loss": 0.5795, "step": 44080 }, { "epoch": 2.189828151385716, "grad_norm": 0.1123046875, "learning_rate": 0.0006248177212675077, "loss": 0.5496, "step": 44090 }, { "epoch": 2.190324823681335, "grad_norm": 0.1103515625, "learning_rate": 0.0006247779874838582, "loss": 0.551, "step": 44100 }, { "epoch": 2.1908214959769543, "grad_norm": 0.09716796875, "learning_rate": 0.0006247382537002086, "loss": 0.5759, "step": 44110 }, { "epoch": 2.1913181682725735, "grad_norm": 0.142578125, "learning_rate": 0.000624698519916559, "loss": 0.5475, "step": 44120 }, { "epoch": 2.191814840568193, "grad_norm": 0.12109375, "learning_rate": 0.0006246587861329096, "loss": 0.542, "step": 44130 }, { "epoch": 2.1923115128638124, "grad_norm": 0.10888671875, "learning_rate": 0.00062461905234926, "loss": 0.5988, "step": 44140 }, { "epoch": 2.1928081851594317, "grad_norm": 0.11572265625, "learning_rate": 0.0006245793185656104, "loss": 0.583, "step": 44150 }, { "epoch": 2.1933048574550513, "grad_norm": 0.087890625, "learning_rate": 0.0006245395847819609, "loss": 0.5564, "step": 44160 }, { "epoch": 2.1938015297506706, "grad_norm": 0.11328125, "learning_rate": 0.0006244998509983114, "loss": 0.5518, "step": 44170 }, { "epoch": 2.19429820204629, "grad_norm": 0.111328125, "learning_rate": 0.0006244601172146618, "loss": 0.523, "step": 44180 }, { "epoch": 2.194794874341909, "grad_norm": 0.1064453125, "learning_rate": 0.0006244203834310122, "loss": 0.5687, "step": 44190 }, { "epoch": 2.1952915466375287, "grad_norm": 0.09912109375, "learning_rate": 0.0006243806496473627, "loss": 0.5617, "step": 44200 }, { "epoch": 2.195788218933148, "grad_norm": 0.1044921875, "learning_rate": 0.0006243409158637132, "loss": 0.5695, "step": 44210 }, { "epoch": 2.196284891228767, "grad_norm": 0.09326171875, "learning_rate": 0.0006243011820800635, "loss": 0.5669, "step": 44220 }, { "epoch": 2.196781563524387, "grad_norm": 0.10888671875, "learning_rate": 0.0006242614482964141, "loss": 0.5852, "step": 44230 }, { "epoch": 2.197278235820006, "grad_norm": 0.11279296875, "learning_rate": 0.0006242217145127645, "loss": 0.5893, "step": 44240 }, { "epoch": 2.1977749081156253, "grad_norm": 0.1455078125, "learning_rate": 0.0006241819807291149, "loss": 0.5775, "step": 44250 }, { "epoch": 2.1982715804112445, "grad_norm": 0.1015625, "learning_rate": 0.0006241422469454654, "loss": 0.5627, "step": 44260 }, { "epoch": 2.198768252706864, "grad_norm": 0.1513671875, "learning_rate": 0.0006241025131618159, "loss": 0.5642, "step": 44270 }, { "epoch": 2.1992649250024834, "grad_norm": 0.130859375, "learning_rate": 0.0006240627793781663, "loss": 0.54, "step": 44280 }, { "epoch": 2.1997615972981026, "grad_norm": 0.109375, "learning_rate": 0.0006240230455945168, "loss": 0.5426, "step": 44290 }, { "epoch": 2.2002582695937223, "grad_norm": 0.09326171875, "learning_rate": 0.0006239833118108672, "loss": 0.5992, "step": 44300 }, { "epoch": 2.2007549418893415, "grad_norm": 0.1123046875, "learning_rate": 0.0006239435780272176, "loss": 0.5477, "step": 44310 }, { "epoch": 2.2012516141849607, "grad_norm": 0.119140625, "learning_rate": 0.0006239038442435682, "loss": 0.5361, "step": 44320 }, { "epoch": 2.20174828648058, "grad_norm": 0.11962890625, "learning_rate": 0.0006238641104599186, "loss": 0.5513, "step": 44330 }, { "epoch": 2.2022449587761996, "grad_norm": 0.1015625, "learning_rate": 0.000623824376676269, "loss": 0.5877, "step": 44340 }, { "epoch": 2.202741631071819, "grad_norm": 0.0927734375, "learning_rate": 0.0006237846428926195, "loss": 0.5765, "step": 44350 }, { "epoch": 2.203238303367438, "grad_norm": 0.09375, "learning_rate": 0.0006237449091089699, "loss": 0.5669, "step": 44360 }, { "epoch": 2.2037349756630573, "grad_norm": 0.0927734375, "learning_rate": 0.0006237051753253205, "loss": 0.5862, "step": 44370 }, { "epoch": 2.204231647958677, "grad_norm": 0.150390625, "learning_rate": 0.0006236654415416708, "loss": 0.5514, "step": 44380 }, { "epoch": 2.204728320254296, "grad_norm": 0.09912109375, "learning_rate": 0.0006236257077580213, "loss": 0.572, "step": 44390 }, { "epoch": 2.2052249925499154, "grad_norm": 0.1279296875, "learning_rate": 0.0006235859739743718, "loss": 0.5707, "step": 44400 }, { "epoch": 2.205721664845535, "grad_norm": 0.1728515625, "learning_rate": 0.0006235462401907221, "loss": 0.5504, "step": 44410 }, { "epoch": 2.2062183371411543, "grad_norm": 0.10107421875, "learning_rate": 0.0006235065064070727, "loss": 0.5581, "step": 44420 }, { "epoch": 2.2067150094367736, "grad_norm": 0.130859375, "learning_rate": 0.0006234667726234232, "loss": 0.5712, "step": 44430 }, { "epoch": 2.207211681732393, "grad_norm": 0.125, "learning_rate": 0.0006234270388397735, "loss": 0.5782, "step": 44440 }, { "epoch": 2.2077083540280125, "grad_norm": 0.1259765625, "learning_rate": 0.000623387305056124, "loss": 0.593, "step": 44450 }, { "epoch": 2.2082050263236317, "grad_norm": 0.1181640625, "learning_rate": 0.0006233475712724744, "loss": 0.5679, "step": 44460 }, { "epoch": 2.208701698619251, "grad_norm": 0.17578125, "learning_rate": 0.0006233078374888248, "loss": 0.5535, "step": 44470 }, { "epoch": 2.20919837091487, "grad_norm": 0.09912109375, "learning_rate": 0.0006232681037051754, "loss": 0.5747, "step": 44480 }, { "epoch": 2.20969504321049, "grad_norm": 0.125, "learning_rate": 0.0006232283699215258, "loss": 0.5539, "step": 44490 }, { "epoch": 2.210191715506109, "grad_norm": 0.1640625, "learning_rate": 0.0006231886361378763, "loss": 0.5745, "step": 44500 }, { "epoch": 2.2106883878017283, "grad_norm": 0.09228515625, "learning_rate": 0.0006231489023542267, "loss": 0.5689, "step": 44510 }, { "epoch": 2.211185060097348, "grad_norm": 0.1025390625, "learning_rate": 0.0006231091685705771, "loss": 0.5573, "step": 44520 }, { "epoch": 2.211681732392967, "grad_norm": 0.1552734375, "learning_rate": 0.0006230694347869277, "loss": 0.5812, "step": 44530 }, { "epoch": 2.2121784046885864, "grad_norm": 0.09716796875, "learning_rate": 0.0006230297010032781, "loss": 0.5662, "step": 44540 }, { "epoch": 2.2126750769842056, "grad_norm": 0.1259765625, "learning_rate": 0.0006229899672196285, "loss": 0.5748, "step": 44550 }, { "epoch": 2.2131717492798253, "grad_norm": 0.1259765625, "learning_rate": 0.000622950233435979, "loss": 0.5514, "step": 44560 }, { "epoch": 2.2136684215754445, "grad_norm": 0.2041015625, "learning_rate": 0.0006229104996523293, "loss": 0.5866, "step": 44570 }, { "epoch": 2.2141650938710637, "grad_norm": 0.126953125, "learning_rate": 0.0006228707658686799, "loss": 0.598, "step": 44580 }, { "epoch": 2.2146617661666834, "grad_norm": 0.09130859375, "learning_rate": 0.0006228310320850304, "loss": 0.5792, "step": 44590 }, { "epoch": 2.2151584384623026, "grad_norm": 0.10009765625, "learning_rate": 0.0006227912983013807, "loss": 0.572, "step": 44600 }, { "epoch": 2.215655110757922, "grad_norm": 0.142578125, "learning_rate": 0.0006227515645177312, "loss": 0.5688, "step": 44610 }, { "epoch": 2.216151783053541, "grad_norm": 0.0966796875, "learning_rate": 0.0006227118307340818, "loss": 0.5636, "step": 44620 }, { "epoch": 2.2166484553491608, "grad_norm": 0.1025390625, "learning_rate": 0.0006226720969504321, "loss": 0.5957, "step": 44630 }, { "epoch": 2.21714512764478, "grad_norm": 0.10595703125, "learning_rate": 0.0006226323631667826, "loss": 0.5581, "step": 44640 }, { "epoch": 2.2176417999403992, "grad_norm": 0.125, "learning_rate": 0.000622592629383133, "loss": 0.5876, "step": 44650 }, { "epoch": 2.218138472236019, "grad_norm": 0.123046875, "learning_rate": 0.0006225528955994835, "loss": 0.5619, "step": 44660 }, { "epoch": 2.218635144531638, "grad_norm": 0.1005859375, "learning_rate": 0.000622513161815834, "loss": 0.567, "step": 44670 }, { "epoch": 2.2191318168272574, "grad_norm": 0.1171875, "learning_rate": 0.0006224734280321844, "loss": 0.5615, "step": 44680 }, { "epoch": 2.2196284891228766, "grad_norm": 0.138671875, "learning_rate": 0.0006224336942485349, "loss": 0.5507, "step": 44690 }, { "epoch": 2.2201251614184963, "grad_norm": 0.10888671875, "learning_rate": 0.0006223939604648853, "loss": 0.5586, "step": 44700 }, { "epoch": 2.2206218337141155, "grad_norm": 0.10302734375, "learning_rate": 0.0006223542266812357, "loss": 0.5518, "step": 44710 }, { "epoch": 2.2211185060097347, "grad_norm": 0.1025390625, "learning_rate": 0.0006223144928975862, "loss": 0.5889, "step": 44720 }, { "epoch": 2.221615178305354, "grad_norm": 0.09716796875, "learning_rate": 0.0006222747591139367, "loss": 0.5527, "step": 44730 }, { "epoch": 2.2221118506009736, "grad_norm": 0.0986328125, "learning_rate": 0.0006222350253302871, "loss": 0.5946, "step": 44740 }, { "epoch": 2.222608522896593, "grad_norm": 0.09375, "learning_rate": 0.0006221952915466376, "loss": 0.5737, "step": 44750 }, { "epoch": 2.223105195192212, "grad_norm": 0.091796875, "learning_rate": 0.000622155557762988, "loss": 0.5618, "step": 44760 }, { "epoch": 2.2236018674878317, "grad_norm": 0.1630859375, "learning_rate": 0.0006221158239793384, "loss": 0.5826, "step": 44770 }, { "epoch": 2.224098539783451, "grad_norm": 0.115234375, "learning_rate": 0.000622076090195689, "loss": 0.5718, "step": 44780 }, { "epoch": 2.22459521207907, "grad_norm": 0.150390625, "learning_rate": 0.0006220363564120393, "loss": 0.5569, "step": 44790 }, { "epoch": 2.2250918843746894, "grad_norm": 0.10302734375, "learning_rate": 0.0006219966226283898, "loss": 0.5731, "step": 44800 }, { "epoch": 2.225588556670309, "grad_norm": 0.1083984375, "learning_rate": 0.0006219568888447403, "loss": 0.5907, "step": 44810 }, { "epoch": 2.2260852289659283, "grad_norm": 0.10791015625, "learning_rate": 0.0006219171550610907, "loss": 0.5298, "step": 44820 }, { "epoch": 2.2265819012615475, "grad_norm": 0.10546875, "learning_rate": 0.0006218774212774412, "loss": 0.5683, "step": 44830 }, { "epoch": 2.2270785735571668, "grad_norm": 0.138671875, "learning_rate": 0.0006218376874937916, "loss": 0.5793, "step": 44840 }, { "epoch": 2.2275752458527864, "grad_norm": 0.08935546875, "learning_rate": 0.0006217979537101421, "loss": 0.5538, "step": 44850 }, { "epoch": 2.2280719181484057, "grad_norm": 0.10498046875, "learning_rate": 0.0006217582199264925, "loss": 0.5721, "step": 44860 }, { "epoch": 2.228568590444025, "grad_norm": 0.119140625, "learning_rate": 0.0006217184861428429, "loss": 0.5709, "step": 44870 }, { "epoch": 2.2290652627396446, "grad_norm": 0.11962890625, "learning_rate": 0.0006216787523591935, "loss": 0.5901, "step": 44880 }, { "epoch": 2.229561935035264, "grad_norm": 0.1328125, "learning_rate": 0.0006216390185755439, "loss": 0.5396, "step": 44890 }, { "epoch": 2.230058607330883, "grad_norm": 0.1015625, "learning_rate": 0.0006215992847918943, "loss": 0.5628, "step": 44900 }, { "epoch": 2.2305552796265022, "grad_norm": 0.1025390625, "learning_rate": 0.0006215595510082448, "loss": 0.5887, "step": 44910 }, { "epoch": 2.231051951922122, "grad_norm": 0.099609375, "learning_rate": 0.0006215198172245952, "loss": 0.5642, "step": 44920 }, { "epoch": 2.231548624217741, "grad_norm": 0.11328125, "learning_rate": 0.0006214800834409457, "loss": 0.5751, "step": 44930 }, { "epoch": 2.2320452965133604, "grad_norm": 0.1181640625, "learning_rate": 0.0006214403496572962, "loss": 0.572, "step": 44940 }, { "epoch": 2.23254196880898, "grad_norm": 0.12890625, "learning_rate": 0.0006214006158736466, "loss": 0.5656, "step": 44950 }, { "epoch": 2.2330386411045993, "grad_norm": 0.1552734375, "learning_rate": 0.000621360882089997, "loss": 0.5807, "step": 44960 }, { "epoch": 2.2335353134002185, "grad_norm": 0.13671875, "learning_rate": 0.0006213211483063475, "loss": 0.5496, "step": 44970 }, { "epoch": 2.2340319856958377, "grad_norm": 0.1396484375, "learning_rate": 0.000621281414522698, "loss": 0.5722, "step": 44980 }, { "epoch": 2.2345286579914574, "grad_norm": 0.11328125, "learning_rate": 0.0006212416807390484, "loss": 0.5721, "step": 44990 }, { "epoch": 2.2350253302870766, "grad_norm": 0.11474609375, "learning_rate": 0.0006212019469553989, "loss": 0.5698, "step": 45000 }, { "epoch": 2.235522002582696, "grad_norm": 0.09326171875, "learning_rate": 0.0006211622131717493, "loss": 0.5797, "step": 45010 }, { "epoch": 2.2360186748783155, "grad_norm": 0.09716796875, "learning_rate": 0.0006211224793880997, "loss": 0.5671, "step": 45020 }, { "epoch": 2.2365153471739347, "grad_norm": 0.09619140625, "learning_rate": 0.0006210827456044503, "loss": 0.5678, "step": 45030 }, { "epoch": 2.237012019469554, "grad_norm": 0.13671875, "learning_rate": 0.0006210430118208007, "loss": 0.5494, "step": 45040 }, { "epoch": 2.237508691765173, "grad_norm": 0.12890625, "learning_rate": 0.0006210032780371511, "loss": 0.5662, "step": 45050 }, { "epoch": 2.238005364060793, "grad_norm": 0.1279296875, "learning_rate": 0.0006209635442535015, "loss": 0.5825, "step": 45060 }, { "epoch": 2.238502036356412, "grad_norm": 0.1396484375, "learning_rate": 0.000620923810469852, "loss": 0.5997, "step": 45070 }, { "epoch": 2.2389987086520313, "grad_norm": 0.09375, "learning_rate": 0.0006208840766862025, "loss": 0.5766, "step": 45080 }, { "epoch": 2.2394953809476505, "grad_norm": 0.162109375, "learning_rate": 0.0006208443429025529, "loss": 0.5571, "step": 45090 }, { "epoch": 2.23999205324327, "grad_norm": 0.11572265625, "learning_rate": 0.0006208046091189034, "loss": 0.5639, "step": 45100 }, { "epoch": 2.2404887255388894, "grad_norm": 0.09423828125, "learning_rate": 0.0006207648753352538, "loss": 0.5667, "step": 45110 }, { "epoch": 2.2409853978345087, "grad_norm": 0.09619140625, "learning_rate": 0.0006207251415516042, "loss": 0.5826, "step": 45120 }, { "epoch": 2.2414820701301283, "grad_norm": 0.09912109375, "learning_rate": 0.0006206854077679548, "loss": 0.5695, "step": 45130 }, { "epoch": 2.2419787424257476, "grad_norm": 0.11865234375, "learning_rate": 0.0006206456739843052, "loss": 0.5393, "step": 45140 }, { "epoch": 2.242475414721367, "grad_norm": 0.099609375, "learning_rate": 0.0006206059402006556, "loss": 0.5668, "step": 45150 }, { "epoch": 2.242972087016986, "grad_norm": 0.10595703125, "learning_rate": 0.0006205662064170061, "loss": 0.5789, "step": 45160 }, { "epoch": 2.2434687593126057, "grad_norm": 0.1162109375, "learning_rate": 0.0006205264726333565, "loss": 0.5663, "step": 45170 }, { "epoch": 2.243965431608225, "grad_norm": 0.09814453125, "learning_rate": 0.000620486738849707, "loss": 0.5535, "step": 45180 }, { "epoch": 2.244462103903844, "grad_norm": 0.1533203125, "learning_rate": 0.0006204470050660575, "loss": 0.6223, "step": 45190 }, { "epoch": 2.2449587761994634, "grad_norm": 0.09326171875, "learning_rate": 0.0006204072712824079, "loss": 0.5743, "step": 45200 }, { "epoch": 2.245455448495083, "grad_norm": 0.095703125, "learning_rate": 0.0006203675374987583, "loss": 0.57, "step": 45210 }, { "epoch": 2.2459521207907023, "grad_norm": 0.1591796875, "learning_rate": 0.0006203278037151088, "loss": 0.5516, "step": 45220 }, { "epoch": 2.2464487930863215, "grad_norm": 0.15625, "learning_rate": 0.0006202880699314593, "loss": 0.5767, "step": 45230 }, { "epoch": 2.246945465381941, "grad_norm": 0.130859375, "learning_rate": 0.0006202483361478097, "loss": 0.558, "step": 45240 }, { "epoch": 2.2474421376775604, "grad_norm": 0.08935546875, "learning_rate": 0.0006202086023641601, "loss": 0.5542, "step": 45250 }, { "epoch": 2.2479388099731796, "grad_norm": 0.109375, "learning_rate": 0.0006201688685805106, "loss": 0.5535, "step": 45260 }, { "epoch": 2.248435482268799, "grad_norm": 0.140625, "learning_rate": 0.0006201291347968611, "loss": 0.5627, "step": 45270 }, { "epoch": 2.2489321545644185, "grad_norm": 0.19921875, "learning_rate": 0.0006200894010132115, "loss": 0.5727, "step": 45280 }, { "epoch": 2.2494288268600378, "grad_norm": 0.11962890625, "learning_rate": 0.000620049667229562, "loss": 0.5784, "step": 45290 }, { "epoch": 2.249925499155657, "grad_norm": 0.11181640625, "learning_rate": 0.0006200099334459124, "loss": 0.5787, "step": 45300 }, { "epoch": 2.2504221714512767, "grad_norm": 0.11083984375, "learning_rate": 0.0006199701996622628, "loss": 0.5643, "step": 45310 }, { "epoch": 2.250918843746896, "grad_norm": 0.154296875, "learning_rate": 0.0006199304658786133, "loss": 0.5724, "step": 45320 }, { "epoch": 2.251415516042515, "grad_norm": 0.1259765625, "learning_rate": 0.0006198907320949638, "loss": 0.5998, "step": 45330 }, { "epoch": 2.2519121883381343, "grad_norm": 0.09619140625, "learning_rate": 0.0006198509983113142, "loss": 0.5486, "step": 45340 }, { "epoch": 2.252408860633754, "grad_norm": 0.130859375, "learning_rate": 0.0006198112645276647, "loss": 0.559, "step": 45350 }, { "epoch": 2.2529055329293732, "grad_norm": 0.095703125, "learning_rate": 0.0006197715307440151, "loss": 0.5696, "step": 45360 }, { "epoch": 2.2534022052249925, "grad_norm": 0.095703125, "learning_rate": 0.0006197317969603655, "loss": 0.5941, "step": 45370 }, { "epoch": 2.253898877520612, "grad_norm": 0.10888671875, "learning_rate": 0.0006196920631767161, "loss": 0.5569, "step": 45380 }, { "epoch": 2.2543955498162314, "grad_norm": 0.123046875, "learning_rate": 0.0006196523293930665, "loss": 0.5771, "step": 45390 }, { "epoch": 2.2548922221118506, "grad_norm": 0.0986328125, "learning_rate": 0.000619612595609417, "loss": 0.5813, "step": 45400 }, { "epoch": 2.25538889440747, "grad_norm": 0.1201171875, "learning_rate": 0.0006195728618257674, "loss": 0.5656, "step": 45410 }, { "epoch": 2.2558855667030895, "grad_norm": 0.11376953125, "learning_rate": 0.0006195331280421178, "loss": 0.5561, "step": 45420 }, { "epoch": 2.2563822389987087, "grad_norm": 0.10693359375, "learning_rate": 0.0006194933942584684, "loss": 0.5836, "step": 45430 }, { "epoch": 2.256878911294328, "grad_norm": 0.0966796875, "learning_rate": 0.0006194536604748187, "loss": 0.5693, "step": 45440 }, { "epoch": 2.257375583589947, "grad_norm": 0.22265625, "learning_rate": 0.0006194139266911692, "loss": 0.5708, "step": 45450 }, { "epoch": 2.257872255885567, "grad_norm": 0.109375, "learning_rate": 0.0006193741929075197, "loss": 0.5635, "step": 45460 }, { "epoch": 2.258368928181186, "grad_norm": 0.09130859375, "learning_rate": 0.00061933445912387, "loss": 0.5647, "step": 45470 }, { "epoch": 2.2588656004768053, "grad_norm": 0.138671875, "learning_rate": 0.0006192947253402206, "loss": 0.5577, "step": 45480 }, { "epoch": 2.2593622727724245, "grad_norm": 0.1181640625, "learning_rate": 0.0006192549915565711, "loss": 0.5508, "step": 45490 }, { "epoch": 2.259858945068044, "grad_norm": 0.095703125, "learning_rate": 0.0006192152577729214, "loss": 0.5782, "step": 45500 }, { "epoch": 2.2603556173636634, "grad_norm": 0.10400390625, "learning_rate": 0.0006191755239892719, "loss": 0.5538, "step": 45510 }, { "epoch": 2.2608522896592826, "grad_norm": 0.09326171875, "learning_rate": 0.0006191357902056223, "loss": 0.5579, "step": 45520 }, { "epoch": 2.2613489619549023, "grad_norm": 0.140625, "learning_rate": 0.0006190960564219727, "loss": 0.5628, "step": 45530 }, { "epoch": 2.2618456342505215, "grad_norm": 0.09814453125, "learning_rate": 0.0006190563226383233, "loss": 0.5516, "step": 45540 }, { "epoch": 2.2623423065461408, "grad_norm": 0.1123046875, "learning_rate": 0.0006190165888546737, "loss": 0.5491, "step": 45550 }, { "epoch": 2.26283897884176, "grad_norm": 0.126953125, "learning_rate": 0.0006189768550710242, "loss": 0.5538, "step": 45560 }, { "epoch": 2.2633356511373797, "grad_norm": 0.12255859375, "learning_rate": 0.0006189371212873746, "loss": 0.5748, "step": 45570 }, { "epoch": 2.263832323432999, "grad_norm": 0.130859375, "learning_rate": 0.000618897387503725, "loss": 0.5923, "step": 45580 }, { "epoch": 2.264328995728618, "grad_norm": 0.1044921875, "learning_rate": 0.0006188576537200756, "loss": 0.5539, "step": 45590 }, { "epoch": 2.264825668024238, "grad_norm": 0.1298828125, "learning_rate": 0.000618817919936426, "loss": 0.5704, "step": 45600 }, { "epoch": 2.265322340319857, "grad_norm": 0.11328125, "learning_rate": 0.0006187781861527764, "loss": 0.561, "step": 45610 }, { "epoch": 2.2658190126154762, "grad_norm": 0.12890625, "learning_rate": 0.0006187384523691269, "loss": 0.5745, "step": 45620 }, { "epoch": 2.2663156849110955, "grad_norm": 0.10205078125, "learning_rate": 0.0006186987185854772, "loss": 0.5669, "step": 45630 }, { "epoch": 2.266812357206715, "grad_norm": 0.09912109375, "learning_rate": 0.0006186589848018278, "loss": 0.5674, "step": 45640 }, { "epoch": 2.2673090295023344, "grad_norm": 0.10302734375, "learning_rate": 0.0006186192510181783, "loss": 0.5598, "step": 45650 }, { "epoch": 2.2678057017979536, "grad_norm": 0.10791015625, "learning_rate": 0.0006185795172345286, "loss": 0.5784, "step": 45660 }, { "epoch": 2.2683023740935733, "grad_norm": 0.134765625, "learning_rate": 0.0006185397834508791, "loss": 0.6022, "step": 45670 }, { "epoch": 2.2687990463891925, "grad_norm": 0.1025390625, "learning_rate": 0.0006185000496672297, "loss": 0.567, "step": 45680 }, { "epoch": 2.2692957186848117, "grad_norm": 0.10546875, "learning_rate": 0.00061846031588358, "loss": 0.5606, "step": 45690 }, { "epoch": 2.269792390980431, "grad_norm": 0.11376953125, "learning_rate": 0.0006184205820999305, "loss": 0.5446, "step": 45700 }, { "epoch": 2.2702890632760506, "grad_norm": 0.119140625, "learning_rate": 0.0006183808483162809, "loss": 0.5699, "step": 45710 }, { "epoch": 2.27078573557167, "grad_norm": 0.0966796875, "learning_rate": 0.0006183411145326314, "loss": 0.5412, "step": 45720 }, { "epoch": 2.271282407867289, "grad_norm": 0.1328125, "learning_rate": 0.0006183013807489819, "loss": 0.5655, "step": 45730 }, { "epoch": 2.2717790801629087, "grad_norm": 0.0966796875, "learning_rate": 0.0006182616469653323, "loss": 0.5383, "step": 45740 }, { "epoch": 2.272275752458528, "grad_norm": 0.1513671875, "learning_rate": 0.0006182219131816828, "loss": 0.565, "step": 45750 }, { "epoch": 2.272772424754147, "grad_norm": 0.10693359375, "learning_rate": 0.0006181821793980332, "loss": 0.5611, "step": 45760 }, { "epoch": 2.2732690970497664, "grad_norm": 0.08837890625, "learning_rate": 0.0006181424456143836, "loss": 0.5672, "step": 45770 }, { "epoch": 2.273765769345386, "grad_norm": 0.09521484375, "learning_rate": 0.0006181027118307342, "loss": 0.6097, "step": 45780 }, { "epoch": 2.2742624416410053, "grad_norm": 0.09814453125, "learning_rate": 0.0006180629780470846, "loss": 0.5667, "step": 45790 }, { "epoch": 2.2747591139366246, "grad_norm": 0.11181640625, "learning_rate": 0.000618023244263435, "loss": 0.5503, "step": 45800 }, { "epoch": 2.275255786232244, "grad_norm": 0.10791015625, "learning_rate": 0.0006179835104797855, "loss": 0.5652, "step": 45810 }, { "epoch": 2.2757524585278635, "grad_norm": 0.10693359375, "learning_rate": 0.0006179437766961359, "loss": 0.566, "step": 45820 }, { "epoch": 2.2762491308234827, "grad_norm": 0.10205078125, "learning_rate": 0.0006179040429124863, "loss": 0.5466, "step": 45830 }, { "epoch": 2.276745803119102, "grad_norm": 0.1142578125, "learning_rate": 0.0006178643091288369, "loss": 0.5661, "step": 45840 }, { "epoch": 2.277242475414721, "grad_norm": 0.1220703125, "learning_rate": 0.0006178245753451873, "loss": 0.548, "step": 45850 }, { "epoch": 2.277739147710341, "grad_norm": 0.1123046875, "learning_rate": 0.0006177848415615377, "loss": 0.5579, "step": 45860 }, { "epoch": 2.27823582000596, "grad_norm": 0.11572265625, "learning_rate": 0.0006177451077778882, "loss": 0.566, "step": 45870 }, { "epoch": 2.2787324923015793, "grad_norm": 0.1259765625, "learning_rate": 0.0006177053739942387, "loss": 0.5508, "step": 45880 }, { "epoch": 2.279229164597199, "grad_norm": 0.126953125, "learning_rate": 0.0006176656402105891, "loss": 0.5312, "step": 45890 }, { "epoch": 2.279725836892818, "grad_norm": 0.091796875, "learning_rate": 0.0006176259064269395, "loss": 0.5519, "step": 45900 }, { "epoch": 2.2802225091884374, "grad_norm": 0.154296875, "learning_rate": 0.00061758617264329, "loss": 0.5566, "step": 45910 }, { "epoch": 2.2807191814840566, "grad_norm": 0.1806640625, "learning_rate": 0.0006175464388596404, "loss": 0.5298, "step": 45920 }, { "epoch": 2.2812158537796763, "grad_norm": 0.08984375, "learning_rate": 0.0006175067050759908, "loss": 0.5844, "step": 45930 }, { "epoch": 2.2817125260752955, "grad_norm": 0.1123046875, "learning_rate": 0.0006174669712923414, "loss": 0.5758, "step": 45940 }, { "epoch": 2.2822091983709147, "grad_norm": 0.10009765625, "learning_rate": 0.0006174272375086918, "loss": 0.5557, "step": 45950 }, { "epoch": 2.2827058706665344, "grad_norm": 0.2021484375, "learning_rate": 0.0006173875037250422, "loss": 0.5717, "step": 45960 }, { "epoch": 2.2832025429621536, "grad_norm": 0.10498046875, "learning_rate": 0.0006173477699413927, "loss": 0.5456, "step": 45970 }, { "epoch": 2.283699215257773, "grad_norm": 0.134765625, "learning_rate": 0.0006173080361577431, "loss": 0.5535, "step": 45980 }, { "epoch": 2.284195887553392, "grad_norm": 0.10888671875, "learning_rate": 0.0006172683023740936, "loss": 0.575, "step": 45990 }, { "epoch": 2.2846925598490118, "grad_norm": 0.095703125, "learning_rate": 0.0006172285685904441, "loss": 0.53, "step": 46000 }, { "epoch": 2.285189232144631, "grad_norm": 0.0908203125, "learning_rate": 0.0006171888348067945, "loss": 0.5599, "step": 46010 }, { "epoch": 2.28568590444025, "grad_norm": 0.1044921875, "learning_rate": 0.0006171491010231449, "loss": 0.5837, "step": 46020 }, { "epoch": 2.28618257673587, "grad_norm": 0.0986328125, "learning_rate": 0.0006171093672394954, "loss": 0.5603, "step": 46030 }, { "epoch": 2.286679249031489, "grad_norm": 0.1591796875, "learning_rate": 0.0006170696334558459, "loss": 0.5613, "step": 46040 }, { "epoch": 2.2871759213271083, "grad_norm": 0.1220703125, "learning_rate": 0.0006170298996721963, "loss": 0.5899, "step": 46050 }, { "epoch": 2.2876725936227276, "grad_norm": 0.14453125, "learning_rate": 0.0006169901658885468, "loss": 0.6059, "step": 46060 }, { "epoch": 2.2881692659183472, "grad_norm": 0.103515625, "learning_rate": 0.0006169504321048972, "loss": 0.5521, "step": 46070 }, { "epoch": 2.2886659382139665, "grad_norm": 0.08642578125, "learning_rate": 0.0006169106983212476, "loss": 0.5426, "step": 46080 }, { "epoch": 2.2891626105095857, "grad_norm": 0.154296875, "learning_rate": 0.0006168709645375982, "loss": 0.5622, "step": 46090 }, { "epoch": 2.2896592828052054, "grad_norm": 0.1484375, "learning_rate": 0.0006168312307539486, "loss": 0.5883, "step": 46100 }, { "epoch": 2.2901559551008246, "grad_norm": 0.1328125, "learning_rate": 0.000616791496970299, "loss": 0.5705, "step": 46110 }, { "epoch": 2.290652627396444, "grad_norm": 0.1083984375, "learning_rate": 0.0006167517631866494, "loss": 0.5991, "step": 46120 }, { "epoch": 2.291149299692063, "grad_norm": 0.10595703125, "learning_rate": 0.000616712029403, "loss": 0.5995, "step": 46130 }, { "epoch": 2.2916459719876827, "grad_norm": 0.123046875, "learning_rate": 0.0006166722956193505, "loss": 0.611, "step": 46140 }, { "epoch": 2.292142644283302, "grad_norm": 0.08349609375, "learning_rate": 0.0006166325618357008, "loss": 0.5577, "step": 46150 }, { "epoch": 2.292639316578921, "grad_norm": 0.138671875, "learning_rate": 0.0006165928280520513, "loss": 0.5759, "step": 46160 }, { "epoch": 2.2931359888745404, "grad_norm": 0.1259765625, "learning_rate": 0.0006165530942684017, "loss": 0.5706, "step": 46170 }, { "epoch": 2.29363266117016, "grad_norm": 0.09326171875, "learning_rate": 0.0006165133604847521, "loss": 0.5883, "step": 46180 }, { "epoch": 2.2941293334657793, "grad_norm": 0.08935546875, "learning_rate": 0.0006164736267011027, "loss": 0.5422, "step": 46190 }, { "epoch": 2.2946260057613985, "grad_norm": 0.2109375, "learning_rate": 0.0006164338929174531, "loss": 0.5701, "step": 46200 }, { "epoch": 2.2951226780570178, "grad_norm": 0.154296875, "learning_rate": 0.0006163941591338035, "loss": 0.5675, "step": 46210 }, { "epoch": 2.2956193503526374, "grad_norm": 0.1279296875, "learning_rate": 0.000616354425350154, "loss": 0.5479, "step": 46220 }, { "epoch": 2.2961160226482566, "grad_norm": 0.09423828125, "learning_rate": 0.0006163146915665044, "loss": 0.607, "step": 46230 }, { "epoch": 2.296612694943876, "grad_norm": 0.154296875, "learning_rate": 0.0006162749577828549, "loss": 0.5768, "step": 46240 }, { "epoch": 2.2971093672394955, "grad_norm": 0.1279296875, "learning_rate": 0.0006162352239992054, "loss": 0.5206, "step": 46250 }, { "epoch": 2.2976060395351148, "grad_norm": 0.0927734375, "learning_rate": 0.0006161954902155558, "loss": 0.5652, "step": 46260 }, { "epoch": 2.298102711830734, "grad_norm": 0.0947265625, "learning_rate": 0.0006161557564319062, "loss": 0.6084, "step": 46270 }, { "epoch": 2.2985993841263532, "grad_norm": 0.1103515625, "learning_rate": 0.0006161160226482567, "loss": 0.5947, "step": 46280 }, { "epoch": 2.299096056421973, "grad_norm": 0.09326171875, "learning_rate": 0.0006160762888646072, "loss": 0.5682, "step": 46290 }, { "epoch": 2.299592728717592, "grad_norm": 0.111328125, "learning_rate": 0.0006160365550809577, "loss": 0.5654, "step": 46300 }, { "epoch": 2.3000894010132114, "grad_norm": 0.1171875, "learning_rate": 0.000615996821297308, "loss": 0.6128, "step": 46310 }, { "epoch": 2.300586073308831, "grad_norm": 0.1552734375, "learning_rate": 0.0006159570875136585, "loss": 0.5572, "step": 46320 }, { "epoch": 2.3010827456044503, "grad_norm": 0.1435546875, "learning_rate": 0.000615917353730009, "loss": 0.5714, "step": 46330 }, { "epoch": 2.3015794179000695, "grad_norm": 0.12890625, "learning_rate": 0.0006158776199463594, "loss": 0.5847, "step": 46340 }, { "epoch": 2.3020760901956887, "grad_norm": 0.0927734375, "learning_rate": 0.0006158378861627099, "loss": 0.5539, "step": 46350 }, { "epoch": 2.3025727624913084, "grad_norm": 0.125, "learning_rate": 0.0006157981523790604, "loss": 0.5749, "step": 46360 }, { "epoch": 2.3030694347869276, "grad_norm": 0.1005859375, "learning_rate": 0.0006157584185954107, "loss": 0.5744, "step": 46370 }, { "epoch": 2.303566107082547, "grad_norm": 0.10009765625, "learning_rate": 0.0006157186848117612, "loss": 0.5616, "step": 46380 }, { "epoch": 2.3040627793781665, "grad_norm": 0.10546875, "learning_rate": 0.0006156789510281117, "loss": 0.5513, "step": 46390 }, { "epoch": 2.3045594516737857, "grad_norm": 0.1025390625, "learning_rate": 0.0006156392172444621, "loss": 0.5751, "step": 46400 }, { "epoch": 2.305056123969405, "grad_norm": 0.1708984375, "learning_rate": 0.0006155994834608126, "loss": 0.5758, "step": 46410 }, { "epoch": 2.305552796265024, "grad_norm": 0.158203125, "learning_rate": 0.000615559749677163, "loss": 0.5471, "step": 46420 }, { "epoch": 2.306049468560644, "grad_norm": 0.0986328125, "learning_rate": 0.0006155200158935134, "loss": 0.5547, "step": 46430 }, { "epoch": 2.306546140856263, "grad_norm": 0.1025390625, "learning_rate": 0.000615480282109864, "loss": 0.5718, "step": 46440 }, { "epoch": 2.3070428131518823, "grad_norm": 0.1328125, "learning_rate": 0.0006154405483262144, "loss": 0.578, "step": 46450 }, { "epoch": 2.307539485447502, "grad_norm": 0.11376953125, "learning_rate": 0.0006154008145425649, "loss": 0.5647, "step": 46460 }, { "epoch": 2.308036157743121, "grad_norm": 0.111328125, "learning_rate": 0.0006153610807589153, "loss": 0.5614, "step": 46470 }, { "epoch": 2.3085328300387404, "grad_norm": 0.2041015625, "learning_rate": 0.0006153213469752657, "loss": 0.542, "step": 46480 }, { "epoch": 2.3090295023343597, "grad_norm": 0.0908203125, "learning_rate": 0.0006152816131916163, "loss": 0.5636, "step": 46490 }, { "epoch": 2.3095261746299793, "grad_norm": 0.1025390625, "learning_rate": 0.0006152418794079666, "loss": 0.5703, "step": 46500 }, { "epoch": 2.3100228469255986, "grad_norm": 0.12158203125, "learning_rate": 0.0006152021456243171, "loss": 0.5895, "step": 46510 }, { "epoch": 2.310519519221218, "grad_norm": 0.103515625, "learning_rate": 0.0006151624118406676, "loss": 0.5679, "step": 46520 }, { "epoch": 2.311016191516837, "grad_norm": 0.099609375, "learning_rate": 0.0006151226780570179, "loss": 0.5894, "step": 46530 }, { "epoch": 2.3115128638124567, "grad_norm": 0.10986328125, "learning_rate": 0.0006150829442733685, "loss": 0.5703, "step": 46540 }, { "epoch": 2.312009536108076, "grad_norm": 0.11181640625, "learning_rate": 0.000615043210489719, "loss": 0.563, "step": 46550 }, { "epoch": 2.312506208403695, "grad_norm": 0.1201171875, "learning_rate": 0.0006150034767060693, "loss": 0.5498, "step": 46560 }, { "epoch": 2.3130028806993144, "grad_norm": 0.1416015625, "learning_rate": 0.0006149637429224198, "loss": 0.5776, "step": 46570 }, { "epoch": 2.313499552994934, "grad_norm": 0.08984375, "learning_rate": 0.0006149240091387702, "loss": 0.5543, "step": 46580 }, { "epoch": 2.3139962252905533, "grad_norm": 0.08154296875, "learning_rate": 0.0006148842753551208, "loss": 0.5573, "step": 46590 }, { "epoch": 2.3144928975861725, "grad_norm": 0.09033203125, "learning_rate": 0.0006148445415714712, "loss": 0.5547, "step": 46600 }, { "epoch": 2.314989569881792, "grad_norm": 0.095703125, "learning_rate": 0.0006148048077878216, "loss": 0.5712, "step": 46610 }, { "epoch": 2.3154862421774114, "grad_norm": 0.115234375, "learning_rate": 0.0006147650740041721, "loss": 0.5789, "step": 46620 }, { "epoch": 2.3159829144730306, "grad_norm": 0.1220703125, "learning_rate": 0.0006147253402205225, "loss": 0.5589, "step": 46630 }, { "epoch": 2.31647958676865, "grad_norm": 0.146484375, "learning_rate": 0.000614685606436873, "loss": 0.5565, "step": 46640 }, { "epoch": 2.3169762590642695, "grad_norm": 0.10595703125, "learning_rate": 0.0006146458726532235, "loss": 0.5665, "step": 46650 }, { "epoch": 2.3174729313598887, "grad_norm": 0.099609375, "learning_rate": 0.0006146061388695739, "loss": 0.5588, "step": 46660 }, { "epoch": 2.317969603655508, "grad_norm": 0.1396484375, "learning_rate": 0.0006145664050859243, "loss": 0.556, "step": 46670 }, { "epoch": 2.3184662759511276, "grad_norm": 0.2392578125, "learning_rate": 0.0006145266713022748, "loss": 0.5475, "step": 46680 }, { "epoch": 2.318962948246747, "grad_norm": 0.11865234375, "learning_rate": 0.0006144869375186253, "loss": 0.5643, "step": 46690 }, { "epoch": 2.319459620542366, "grad_norm": 0.10888671875, "learning_rate": 0.0006144472037349757, "loss": 0.5823, "step": 46700 }, { "epoch": 2.3199562928379853, "grad_norm": 0.130859375, "learning_rate": 0.0006144074699513262, "loss": 0.5479, "step": 46710 }, { "epoch": 2.320452965133605, "grad_norm": 0.103515625, "learning_rate": 0.0006143677361676765, "loss": 0.5915, "step": 46720 }, { "epoch": 2.320949637429224, "grad_norm": 0.1318359375, "learning_rate": 0.000614328002384027, "loss": 0.5537, "step": 46730 }, { "epoch": 2.3214463097248434, "grad_norm": 0.11572265625, "learning_rate": 0.0006142882686003776, "loss": 0.5458, "step": 46740 }, { "epoch": 2.321942982020463, "grad_norm": 0.095703125, "learning_rate": 0.000614248534816728, "loss": 0.5543, "step": 46750 }, { "epoch": 2.3224396543160823, "grad_norm": 0.1171875, "learning_rate": 0.0006142088010330784, "loss": 0.5659, "step": 46760 }, { "epoch": 2.3229363266117016, "grad_norm": 0.09716796875, "learning_rate": 0.0006141690672494288, "loss": 0.5702, "step": 46770 }, { "epoch": 2.323432998907321, "grad_norm": 0.1083984375, "learning_rate": 0.0006141293334657793, "loss": 0.554, "step": 46780 }, { "epoch": 2.3239296712029405, "grad_norm": 0.12158203125, "learning_rate": 0.0006140895996821298, "loss": 0.5765, "step": 46790 }, { "epoch": 2.3244263434985597, "grad_norm": 0.1181640625, "learning_rate": 0.0006140498658984802, "loss": 0.5599, "step": 46800 }, { "epoch": 2.324923015794179, "grad_norm": 0.083984375, "learning_rate": 0.0006140101321148307, "loss": 0.5532, "step": 46810 }, { "epoch": 2.3254196880897986, "grad_norm": 0.1591796875, "learning_rate": 0.0006139703983311811, "loss": 0.5837, "step": 46820 }, { "epoch": 2.325916360385418, "grad_norm": 0.1259765625, "learning_rate": 0.0006139306645475315, "loss": 0.5828, "step": 46830 }, { "epoch": 2.326413032681037, "grad_norm": 0.22265625, "learning_rate": 0.0006138909307638821, "loss": 0.5436, "step": 46840 }, { "epoch": 2.3269097049766563, "grad_norm": 0.0927734375, "learning_rate": 0.0006138511969802325, "loss": 0.5453, "step": 46850 }, { "epoch": 2.3274063772722755, "grad_norm": 0.1376953125, "learning_rate": 0.0006138114631965829, "loss": 0.5653, "step": 46860 }, { "epoch": 2.327903049567895, "grad_norm": 0.10107421875, "learning_rate": 0.0006137717294129334, "loss": 0.5757, "step": 46870 }, { "epoch": 2.3283997218635144, "grad_norm": 0.1396484375, "learning_rate": 0.0006137319956292838, "loss": 0.5588, "step": 46880 }, { "epoch": 2.3288963941591336, "grad_norm": 0.10205078125, "learning_rate": 0.0006136922618456343, "loss": 0.569, "step": 46890 }, { "epoch": 2.3293930664547533, "grad_norm": 0.1240234375, "learning_rate": 0.0006136525280619848, "loss": 0.5593, "step": 46900 }, { "epoch": 2.3298897387503725, "grad_norm": 0.0966796875, "learning_rate": 0.0006136127942783352, "loss": 0.5966, "step": 46910 }, { "epoch": 2.3303864110459918, "grad_norm": 0.11962890625, "learning_rate": 0.0006135730604946856, "loss": 0.5688, "step": 46920 }, { "epoch": 2.330883083341611, "grad_norm": 0.1064453125, "learning_rate": 0.0006135333267110361, "loss": 0.5858, "step": 46930 }, { "epoch": 2.3313797556372307, "grad_norm": 0.11474609375, "learning_rate": 0.0006134935929273866, "loss": 0.5659, "step": 46940 }, { "epoch": 2.33187642793285, "grad_norm": 0.1611328125, "learning_rate": 0.000613453859143737, "loss": 0.5702, "step": 46950 }, { "epoch": 2.332373100228469, "grad_norm": 0.1767578125, "learning_rate": 0.0006134141253600875, "loss": 0.548, "step": 46960 }, { "epoch": 2.332869772524089, "grad_norm": 0.1220703125, "learning_rate": 0.0006133743915764379, "loss": 0.5671, "step": 46970 }, { "epoch": 2.333366444819708, "grad_norm": 0.115234375, "learning_rate": 0.0006133346577927883, "loss": 0.5613, "step": 46980 }, { "epoch": 2.3338631171153272, "grad_norm": 0.11865234375, "learning_rate": 0.0006132949240091387, "loss": 0.6136, "step": 46990 }, { "epoch": 2.3343597894109465, "grad_norm": 0.11083984375, "learning_rate": 0.0006132551902254893, "loss": 0.5489, "step": 47000 }, { "epoch": 2.334856461706566, "grad_norm": 0.119140625, "learning_rate": 0.0006132154564418397, "loss": 0.5452, "step": 47010 }, { "epoch": 2.3353531340021854, "grad_norm": 0.1181640625, "learning_rate": 0.0006131757226581901, "loss": 0.5752, "step": 47020 }, { "epoch": 2.3358498062978046, "grad_norm": 0.11767578125, "learning_rate": 0.0006131359888745406, "loss": 0.5623, "step": 47030 }, { "epoch": 2.3363464785934243, "grad_norm": 0.107421875, "learning_rate": 0.000613096255090891, "loss": 0.5492, "step": 47040 }, { "epoch": 2.3368431508890435, "grad_norm": 0.1279296875, "learning_rate": 0.0006130565213072415, "loss": 0.5686, "step": 47050 }, { "epoch": 2.3373398231846627, "grad_norm": 0.09814453125, "learning_rate": 0.000613016787523592, "loss": 0.5749, "step": 47060 }, { "epoch": 2.337836495480282, "grad_norm": 0.1962890625, "learning_rate": 0.0006129770537399424, "loss": 0.6005, "step": 47070 }, { "epoch": 2.3383331677759016, "grad_norm": 0.10302734375, "learning_rate": 0.0006129373199562928, "loss": 0.581, "step": 47080 }, { "epoch": 2.338829840071521, "grad_norm": 0.09765625, "learning_rate": 0.0006128975861726434, "loss": 0.5312, "step": 47090 }, { "epoch": 2.33932651236714, "grad_norm": 0.09375, "learning_rate": 0.0006128578523889938, "loss": 0.5676, "step": 47100 }, { "epoch": 2.3398231846627597, "grad_norm": 0.1181640625, "learning_rate": 0.0006128181186053442, "loss": 0.5615, "step": 47110 }, { "epoch": 2.340319856958379, "grad_norm": 0.1123046875, "learning_rate": 0.0006127783848216947, "loss": 0.5737, "step": 47120 }, { "epoch": 2.340816529253998, "grad_norm": 0.12890625, "learning_rate": 0.0006127386510380451, "loss": 0.5802, "step": 47130 }, { "epoch": 2.3413132015496174, "grad_norm": 0.09521484375, "learning_rate": 0.0006126989172543955, "loss": 0.5564, "step": 47140 }, { "epoch": 2.341809873845237, "grad_norm": 0.125, "learning_rate": 0.0006126591834707461, "loss": 0.563, "step": 47150 }, { "epoch": 2.3423065461408563, "grad_norm": 0.09716796875, "learning_rate": 0.0006126194496870965, "loss": 0.5529, "step": 47160 }, { "epoch": 2.3428032184364755, "grad_norm": 0.12255859375, "learning_rate": 0.0006125797159034469, "loss": 0.5879, "step": 47170 }, { "epoch": 2.3432998907320948, "grad_norm": 0.1171875, "learning_rate": 0.0006125399821197973, "loss": 0.5619, "step": 47180 }, { "epoch": 2.3437965630277144, "grad_norm": 0.20703125, "learning_rate": 0.0006125002483361478, "loss": 0.5826, "step": 47190 }, { "epoch": 2.3442932353233337, "grad_norm": 0.1689453125, "learning_rate": 0.0006124605145524984, "loss": 0.5169, "step": 47200 }, { "epoch": 2.344789907618953, "grad_norm": 0.083984375, "learning_rate": 0.0006124207807688487, "loss": 0.5398, "step": 47210 }, { "epoch": 2.345286579914572, "grad_norm": 0.09228515625, "learning_rate": 0.0006123810469851992, "loss": 0.5669, "step": 47220 }, { "epoch": 2.345783252210192, "grad_norm": 0.162109375, "learning_rate": 0.0006123413132015497, "loss": 0.5466, "step": 47230 }, { "epoch": 2.346279924505811, "grad_norm": 0.10205078125, "learning_rate": 0.0006123015794179, "loss": 0.5729, "step": 47240 }, { "epoch": 2.3467765968014302, "grad_norm": 0.0947265625, "learning_rate": 0.0006122618456342506, "loss": 0.5874, "step": 47250 }, { "epoch": 2.34727326909705, "grad_norm": 0.09130859375, "learning_rate": 0.000612222111850601, "loss": 0.5505, "step": 47260 }, { "epoch": 2.347769941392669, "grad_norm": 0.11865234375, "learning_rate": 0.0006121823780669514, "loss": 0.5806, "step": 47270 }, { "epoch": 2.3482666136882884, "grad_norm": 0.21875, "learning_rate": 0.0006121426442833019, "loss": 0.558, "step": 47280 }, { "epoch": 2.3487632859839076, "grad_norm": 0.09521484375, "learning_rate": 0.0006121029104996523, "loss": 0.5507, "step": 47290 }, { "epoch": 2.3492599582795273, "grad_norm": 0.1875, "learning_rate": 0.0006120631767160028, "loss": 0.5562, "step": 47300 }, { "epoch": 2.3497566305751465, "grad_norm": 0.1259765625, "learning_rate": 0.0006120234429323533, "loss": 0.5989, "step": 47310 }, { "epoch": 2.3502533028707657, "grad_norm": 0.11572265625, "learning_rate": 0.0006119837091487037, "loss": 0.577, "step": 47320 }, { "epoch": 2.3507499751663854, "grad_norm": 0.10791015625, "learning_rate": 0.0006119439753650541, "loss": 0.5982, "step": 47330 }, { "epoch": 2.3512466474620046, "grad_norm": 0.1337890625, "learning_rate": 0.0006119042415814046, "loss": 0.5664, "step": 47340 }, { "epoch": 2.351743319757624, "grad_norm": 0.1572265625, "learning_rate": 0.0006118645077977551, "loss": 0.557, "step": 47350 }, { "epoch": 2.352239992053243, "grad_norm": 0.10205078125, "learning_rate": 0.0006118247740141056, "loss": 0.5568, "step": 47360 }, { "epoch": 2.3527366643488627, "grad_norm": 0.1728515625, "learning_rate": 0.0006117850402304559, "loss": 0.5472, "step": 47370 }, { "epoch": 2.353233336644482, "grad_norm": 0.09375, "learning_rate": 0.0006117453064468064, "loss": 0.5609, "step": 47380 }, { "epoch": 2.353730008940101, "grad_norm": 0.09814453125, "learning_rate": 0.000611705572663157, "loss": 0.5356, "step": 47390 }, { "epoch": 2.354226681235721, "grad_norm": 0.10986328125, "learning_rate": 0.0006116658388795073, "loss": 0.5859, "step": 47400 }, { "epoch": 2.35472335353134, "grad_norm": 0.1279296875, "learning_rate": 0.0006116261050958578, "loss": 0.5539, "step": 47410 }, { "epoch": 2.3552200258269593, "grad_norm": 0.162109375, "learning_rate": 0.0006115863713122083, "loss": 0.5498, "step": 47420 }, { "epoch": 2.3557166981225786, "grad_norm": 0.10107421875, "learning_rate": 0.0006115466375285586, "loss": 0.5688, "step": 47430 }, { "epoch": 2.3562133704181982, "grad_norm": 0.111328125, "learning_rate": 0.0006115069037449091, "loss": 0.543, "step": 47440 }, { "epoch": 2.3567100427138175, "grad_norm": 0.1572265625, "learning_rate": 0.0006114671699612596, "loss": 0.5701, "step": 47450 }, { "epoch": 2.3572067150094367, "grad_norm": 0.09375, "learning_rate": 0.00061142743617761, "loss": 0.5587, "step": 47460 }, { "epoch": 2.3577033873050564, "grad_norm": 0.125, "learning_rate": 0.0006113877023939605, "loss": 0.6082, "step": 47470 }, { "epoch": 2.3582000596006756, "grad_norm": 0.0947265625, "learning_rate": 0.0006113479686103109, "loss": 0.5627, "step": 47480 }, { "epoch": 2.358696731896295, "grad_norm": 0.1416015625, "learning_rate": 0.0006113082348266614, "loss": 0.5644, "step": 47490 }, { "epoch": 2.359193404191914, "grad_norm": 0.08935546875, "learning_rate": 0.0006112685010430119, "loss": 0.5654, "step": 47500 }, { "epoch": 2.3596900764875337, "grad_norm": 0.1005859375, "learning_rate": 0.0006112287672593623, "loss": 0.5713, "step": 47510 }, { "epoch": 2.360186748783153, "grad_norm": 0.091796875, "learning_rate": 0.0006111890334757128, "loss": 0.5781, "step": 47520 }, { "epoch": 2.360683421078772, "grad_norm": 0.1357421875, "learning_rate": 0.0006111492996920632, "loss": 0.5249, "step": 47530 }, { "epoch": 2.3611800933743914, "grad_norm": 0.1513671875, "learning_rate": 0.0006111095659084136, "loss": 0.5465, "step": 47540 }, { "epoch": 2.361676765670011, "grad_norm": 0.138671875, "learning_rate": 0.0006110698321247642, "loss": 0.5634, "step": 47550 }, { "epoch": 2.3621734379656303, "grad_norm": 0.10302734375, "learning_rate": 0.0006110300983411146, "loss": 0.5259, "step": 47560 }, { "epoch": 2.3626701102612495, "grad_norm": 0.10009765625, "learning_rate": 0.000610990364557465, "loss": 0.5567, "step": 47570 }, { "epoch": 2.3631667825568687, "grad_norm": 0.09814453125, "learning_rate": 0.0006109506307738155, "loss": 0.5519, "step": 47580 }, { "epoch": 2.3636634548524884, "grad_norm": 0.11279296875, "learning_rate": 0.0006109108969901658, "loss": 0.5779, "step": 47590 }, { "epoch": 2.3641601271481076, "grad_norm": 0.103515625, "learning_rate": 0.0006108711632065164, "loss": 0.5892, "step": 47600 }, { "epoch": 2.364656799443727, "grad_norm": 0.10400390625, "learning_rate": 0.0006108314294228669, "loss": 0.5698, "step": 47610 }, { "epoch": 2.3651534717393465, "grad_norm": 0.12158203125, "learning_rate": 0.0006107916956392172, "loss": 0.5859, "step": 47620 }, { "epoch": 2.3656501440349658, "grad_norm": 0.1103515625, "learning_rate": 0.0006107519618555677, "loss": 0.5683, "step": 47630 }, { "epoch": 2.366146816330585, "grad_norm": 0.126953125, "learning_rate": 0.0006107122280719181, "loss": 0.5654, "step": 47640 }, { "epoch": 2.366643488626204, "grad_norm": 0.0986328125, "learning_rate": 0.0006106724942882687, "loss": 0.5919, "step": 47650 }, { "epoch": 2.367140160921824, "grad_norm": 0.09326171875, "learning_rate": 0.0006106327605046191, "loss": 0.5796, "step": 47660 }, { "epoch": 2.367636833217443, "grad_norm": 0.11181640625, "learning_rate": 0.0006105930267209695, "loss": 0.5524, "step": 47670 }, { "epoch": 2.3681335055130623, "grad_norm": 0.181640625, "learning_rate": 0.00061055329293732, "loss": 0.5327, "step": 47680 }, { "epoch": 2.368630177808682, "grad_norm": 0.11328125, "learning_rate": 0.0006105135591536704, "loss": 0.5474, "step": 47690 }, { "epoch": 2.3691268501043012, "grad_norm": 0.09423828125, "learning_rate": 0.0006104738253700209, "loss": 0.5729, "step": 47700 }, { "epoch": 2.3696235223999205, "grad_norm": 0.1015625, "learning_rate": 0.0006104340915863714, "loss": 0.5572, "step": 47710 }, { "epoch": 2.3701201946955397, "grad_norm": 0.09765625, "learning_rate": 0.0006103943578027218, "loss": 0.5617, "step": 47720 }, { "epoch": 2.3706168669911594, "grad_norm": 0.10107421875, "learning_rate": 0.0006103546240190722, "loss": 0.5546, "step": 47730 }, { "epoch": 2.3711135392867786, "grad_norm": 0.09130859375, "learning_rate": 0.0006103148902354227, "loss": 0.5584, "step": 47740 }, { "epoch": 2.371610211582398, "grad_norm": 0.1513671875, "learning_rate": 0.0006102751564517732, "loss": 0.5744, "step": 47750 }, { "epoch": 2.3721068838780175, "grad_norm": 0.11279296875, "learning_rate": 0.0006102354226681236, "loss": 0.5729, "step": 47760 }, { "epoch": 2.3726035561736367, "grad_norm": 0.09326171875, "learning_rate": 0.0006101956888844741, "loss": 0.5538, "step": 47770 }, { "epoch": 2.373100228469256, "grad_norm": 0.09326171875, "learning_rate": 0.0006101559551008244, "loss": 0.5581, "step": 47780 }, { "epoch": 2.373596900764875, "grad_norm": 0.1318359375, "learning_rate": 0.0006101162213171749, "loss": 0.5644, "step": 47790 }, { "epoch": 2.374093573060495, "grad_norm": 0.0947265625, "learning_rate": 0.0006100764875335255, "loss": 0.5738, "step": 47800 }, { "epoch": 2.374590245356114, "grad_norm": 0.1259765625, "learning_rate": 0.0006100367537498759, "loss": 0.5686, "step": 47810 }, { "epoch": 2.3750869176517333, "grad_norm": 0.1044921875, "learning_rate": 0.0006099970199662263, "loss": 0.5489, "step": 47820 }, { "epoch": 2.375583589947353, "grad_norm": 0.0986328125, "learning_rate": 0.0006099572861825768, "loss": 0.5604, "step": 47830 }, { "epoch": 2.376080262242972, "grad_norm": 0.1318359375, "learning_rate": 0.0006099175523989272, "loss": 0.5671, "step": 47840 }, { "epoch": 2.3765769345385914, "grad_norm": 0.11767578125, "learning_rate": 0.0006098778186152777, "loss": 0.5772, "step": 47850 }, { "epoch": 2.3770736068342107, "grad_norm": 0.1279296875, "learning_rate": 0.0006098380848316281, "loss": 0.592, "step": 47860 }, { "epoch": 2.3775702791298303, "grad_norm": 0.10107421875, "learning_rate": 0.0006097983510479786, "loss": 0.555, "step": 47870 }, { "epoch": 2.3780669514254495, "grad_norm": 0.12890625, "learning_rate": 0.000609758617264329, "loss": 0.5402, "step": 47880 }, { "epoch": 2.3785636237210688, "grad_norm": 0.115234375, "learning_rate": 0.0006097188834806794, "loss": 0.5615, "step": 47890 }, { "epoch": 2.379060296016688, "grad_norm": 0.2177734375, "learning_rate": 0.00060967914969703, "loss": 0.5759, "step": 47900 }, { "epoch": 2.3795569683123077, "grad_norm": 0.1298828125, "learning_rate": 0.0006096394159133804, "loss": 0.5377, "step": 47910 }, { "epoch": 2.380053640607927, "grad_norm": 0.162109375, "learning_rate": 0.0006095996821297308, "loss": 0.5576, "step": 47920 }, { "epoch": 2.380550312903546, "grad_norm": 0.09716796875, "learning_rate": 0.0006095599483460813, "loss": 0.5403, "step": 47930 }, { "epoch": 2.3810469851991654, "grad_norm": 0.09814453125, "learning_rate": 0.0006095202145624317, "loss": 0.5741, "step": 47940 }, { "epoch": 2.381543657494785, "grad_norm": 0.10205078125, "learning_rate": 0.0006094804807787822, "loss": 0.5711, "step": 47950 }, { "epoch": 2.3820403297904043, "grad_norm": 0.12353515625, "learning_rate": 0.0006094407469951327, "loss": 0.5971, "step": 47960 }, { "epoch": 2.3825370020860235, "grad_norm": 0.099609375, "learning_rate": 0.0006094010132114831, "loss": 0.5602, "step": 47970 }, { "epoch": 2.383033674381643, "grad_norm": 0.10400390625, "learning_rate": 0.0006093612794278335, "loss": 0.5781, "step": 47980 }, { "epoch": 2.3835303466772624, "grad_norm": 0.10205078125, "learning_rate": 0.000609321545644184, "loss": 0.5557, "step": 47990 }, { "epoch": 2.3840270189728816, "grad_norm": 0.099609375, "learning_rate": 0.0006092818118605345, "loss": 0.5475, "step": 48000 }, { "epoch": 2.384523691268501, "grad_norm": 0.130859375, "learning_rate": 0.0006092420780768849, "loss": 0.5776, "step": 48010 }, { "epoch": 2.3850203635641205, "grad_norm": 0.142578125, "learning_rate": 0.0006092023442932354, "loss": 0.5688, "step": 48020 }, { "epoch": 2.3855170358597397, "grad_norm": 0.123046875, "learning_rate": 0.0006091626105095858, "loss": 0.5748, "step": 48030 }, { "epoch": 2.386013708155359, "grad_norm": 0.08984375, "learning_rate": 0.0006091228767259362, "loss": 0.5595, "step": 48040 }, { "epoch": 2.3865103804509786, "grad_norm": 0.10205078125, "learning_rate": 0.0006090831429422867, "loss": 0.55, "step": 48050 }, { "epoch": 2.387007052746598, "grad_norm": 0.11474609375, "learning_rate": 0.0006090434091586372, "loss": 0.5507, "step": 48060 }, { "epoch": 2.387503725042217, "grad_norm": 0.1455078125, "learning_rate": 0.0006090036753749876, "loss": 0.5414, "step": 48070 }, { "epoch": 2.3880003973378363, "grad_norm": 0.099609375, "learning_rate": 0.000608963941591338, "loss": 0.5573, "step": 48080 }, { "epoch": 2.388497069633456, "grad_norm": 0.09326171875, "learning_rate": 0.0006089242078076885, "loss": 0.5717, "step": 48090 }, { "epoch": 2.388993741929075, "grad_norm": 0.1103515625, "learning_rate": 0.0006088844740240391, "loss": 0.5798, "step": 48100 }, { "epoch": 2.3894904142246944, "grad_norm": 0.126953125, "learning_rate": 0.0006088447402403894, "loss": 0.5437, "step": 48110 }, { "epoch": 2.389987086520314, "grad_norm": 0.1220703125, "learning_rate": 0.0006088050064567399, "loss": 0.5658, "step": 48120 }, { "epoch": 2.3904837588159333, "grad_norm": 0.2431640625, "learning_rate": 0.0006087652726730903, "loss": 0.5929, "step": 48130 }, { "epoch": 2.3909804311115526, "grad_norm": 0.10498046875, "learning_rate": 0.0006087255388894407, "loss": 0.5686, "step": 48140 }, { "epoch": 2.391477103407172, "grad_norm": 0.1240234375, "learning_rate": 0.0006086858051057913, "loss": 0.5695, "step": 48150 }, { "epoch": 2.3919737757027915, "grad_norm": 0.10009765625, "learning_rate": 0.0006086460713221417, "loss": 0.5974, "step": 48160 }, { "epoch": 2.3924704479984107, "grad_norm": 0.09765625, "learning_rate": 0.0006086063375384921, "loss": 0.5581, "step": 48170 }, { "epoch": 2.39296712029403, "grad_norm": 0.0986328125, "learning_rate": 0.0006085666037548426, "loss": 0.5586, "step": 48180 }, { "epoch": 2.3934637925896496, "grad_norm": 0.11474609375, "learning_rate": 0.000608526869971193, "loss": 0.5454, "step": 48190 }, { "epoch": 2.393960464885269, "grad_norm": 0.099609375, "learning_rate": 0.0006084871361875435, "loss": 0.5464, "step": 48200 }, { "epoch": 2.394457137180888, "grad_norm": 0.12451171875, "learning_rate": 0.000608447402403894, "loss": 0.5685, "step": 48210 }, { "epoch": 2.3949538094765073, "grad_norm": 0.130859375, "learning_rate": 0.0006084076686202444, "loss": 0.545, "step": 48220 }, { "epoch": 2.395450481772127, "grad_norm": 0.1513671875, "learning_rate": 0.0006083679348365949, "loss": 0.5746, "step": 48230 }, { "epoch": 2.395947154067746, "grad_norm": 0.103515625, "learning_rate": 0.0006083282010529452, "loss": 0.5444, "step": 48240 }, { "epoch": 2.3964438263633654, "grad_norm": 0.10595703125, "learning_rate": 0.0006082884672692958, "loss": 0.5705, "step": 48250 }, { "epoch": 2.3969404986589846, "grad_norm": 0.15234375, "learning_rate": 0.0006082487334856463, "loss": 0.5899, "step": 48260 }, { "epoch": 2.3974371709546043, "grad_norm": 0.10595703125, "learning_rate": 0.0006082089997019966, "loss": 0.546, "step": 48270 }, { "epoch": 2.3979338432502235, "grad_norm": 0.162109375, "learning_rate": 0.0006081692659183471, "loss": 0.5731, "step": 48280 }, { "epoch": 2.3984305155458427, "grad_norm": 0.0927734375, "learning_rate": 0.0006081295321346976, "loss": 0.5569, "step": 48290 }, { "epoch": 2.398927187841462, "grad_norm": 0.10888671875, "learning_rate": 0.000608089798351048, "loss": 0.5787, "step": 48300 }, { "epoch": 2.3994238601370816, "grad_norm": 0.08837890625, "learning_rate": 0.0006080500645673985, "loss": 0.565, "step": 48310 }, { "epoch": 2.399920532432701, "grad_norm": 0.10205078125, "learning_rate": 0.0006080103307837489, "loss": 0.5364, "step": 48320 }, { "epoch": 2.40041720472832, "grad_norm": 0.10791015625, "learning_rate": 0.0006079705970000993, "loss": 0.5913, "step": 48330 }, { "epoch": 2.4009138770239398, "grad_norm": 0.11376953125, "learning_rate": 0.0006079308632164498, "loss": 0.5611, "step": 48340 }, { "epoch": 2.401410549319559, "grad_norm": 0.10888671875, "learning_rate": 0.0006078911294328003, "loss": 0.5501, "step": 48350 }, { "epoch": 2.4019072216151782, "grad_norm": 0.0927734375, "learning_rate": 0.0006078513956491507, "loss": 0.5486, "step": 48360 }, { "epoch": 2.4024038939107974, "grad_norm": 0.10400390625, "learning_rate": 0.0006078116618655012, "loss": 0.5591, "step": 48370 }, { "epoch": 2.402900566206417, "grad_norm": 0.1015625, "learning_rate": 0.0006077719280818516, "loss": 0.5502, "step": 48380 }, { "epoch": 2.4033972385020363, "grad_norm": 0.09521484375, "learning_rate": 0.0006077321942982021, "loss": 0.554, "step": 48390 }, { "epoch": 2.4038939107976556, "grad_norm": 0.16796875, "learning_rate": 0.0006076924605145526, "loss": 0.608, "step": 48400 }, { "epoch": 2.4043905830932752, "grad_norm": 0.19140625, "learning_rate": 0.000607652726730903, "loss": 0.5643, "step": 48410 }, { "epoch": 2.4048872553888945, "grad_norm": 0.10107421875, "learning_rate": 0.0006076129929472535, "loss": 0.5602, "step": 48420 }, { "epoch": 2.4053839276845137, "grad_norm": 0.09912109375, "learning_rate": 0.0006075732591636039, "loss": 0.5781, "step": 48430 }, { "epoch": 2.405880599980133, "grad_norm": 0.1220703125, "learning_rate": 0.0006075335253799543, "loss": 0.5666, "step": 48440 }, { "epoch": 2.4063772722757526, "grad_norm": 0.09228515625, "learning_rate": 0.0006074937915963049, "loss": 0.5493, "step": 48450 }, { "epoch": 2.406873944571372, "grad_norm": 0.1572265625, "learning_rate": 0.0006074540578126552, "loss": 0.5756, "step": 48460 }, { "epoch": 2.407370616866991, "grad_norm": 0.1826171875, "learning_rate": 0.0006074143240290057, "loss": 0.548, "step": 48470 }, { "epoch": 2.4078672891626107, "grad_norm": 0.1494140625, "learning_rate": 0.0006073745902453562, "loss": 0.5627, "step": 48480 }, { "epoch": 2.40836396145823, "grad_norm": 0.09716796875, "learning_rate": 0.0006073348564617065, "loss": 0.5822, "step": 48490 }, { "epoch": 2.408860633753849, "grad_norm": 0.109375, "learning_rate": 0.000607295122678057, "loss": 0.5511, "step": 48500 }, { "epoch": 2.4093573060494684, "grad_norm": 0.09521484375, "learning_rate": 0.0006072553888944075, "loss": 0.5496, "step": 48510 }, { "epoch": 2.409853978345088, "grad_norm": 0.12109375, "learning_rate": 0.0006072156551107579, "loss": 0.5663, "step": 48520 }, { "epoch": 2.4103506506407073, "grad_norm": 0.10302734375, "learning_rate": 0.0006071759213271084, "loss": 0.5617, "step": 48530 }, { "epoch": 2.4108473229363265, "grad_norm": 0.1435546875, "learning_rate": 0.0006071361875434588, "loss": 0.5533, "step": 48540 }, { "epoch": 2.411343995231946, "grad_norm": 0.1337890625, "learning_rate": 0.0006070964537598094, "loss": 0.5751, "step": 48550 }, { "epoch": 2.4118406675275654, "grad_norm": 0.11083984375, "learning_rate": 0.0006070567199761598, "loss": 0.5347, "step": 48560 }, { "epoch": 2.4123373398231847, "grad_norm": 0.099609375, "learning_rate": 0.0006070169861925102, "loss": 0.5762, "step": 48570 }, { "epoch": 2.412834012118804, "grad_norm": 0.10693359375, "learning_rate": 0.0006069772524088607, "loss": 0.5724, "step": 48580 }, { "epoch": 2.4133306844144236, "grad_norm": 0.0888671875, "learning_rate": 0.0006069375186252111, "loss": 0.5843, "step": 48590 }, { "epoch": 2.413827356710043, "grad_norm": 0.140625, "learning_rate": 0.0006068977848415615, "loss": 0.5724, "step": 48600 }, { "epoch": 2.414324029005662, "grad_norm": 0.1474609375, "learning_rate": 0.0006068580510579121, "loss": 0.5487, "step": 48610 }, { "epoch": 2.4148207013012812, "grad_norm": 0.08935546875, "learning_rate": 0.0006068183172742625, "loss": 0.547, "step": 48620 }, { "epoch": 2.415317373596901, "grad_norm": 0.115234375, "learning_rate": 0.0006067785834906129, "loss": 0.5484, "step": 48630 }, { "epoch": 2.41581404589252, "grad_norm": 0.1142578125, "learning_rate": 0.0006067388497069634, "loss": 0.565, "step": 48640 }, { "epoch": 2.4163107181881394, "grad_norm": 0.1083984375, "learning_rate": 0.0006066991159233137, "loss": 0.5757, "step": 48650 }, { "epoch": 2.4168073904837586, "grad_norm": 0.0986328125, "learning_rate": 0.0006066593821396643, "loss": 0.5451, "step": 48660 }, { "epoch": 2.4173040627793783, "grad_norm": 0.10498046875, "learning_rate": 0.0006066196483560148, "loss": 0.5837, "step": 48670 }, { "epoch": 2.4178007350749975, "grad_norm": 0.1083984375, "learning_rate": 0.0006065799145723652, "loss": 0.5708, "step": 48680 }, { "epoch": 2.4182974073706167, "grad_norm": 0.126953125, "learning_rate": 0.0006065401807887156, "loss": 0.565, "step": 48690 }, { "epoch": 2.4187940796662364, "grad_norm": 0.09716796875, "learning_rate": 0.000606500447005066, "loss": 0.5489, "step": 48700 }, { "epoch": 2.4192907519618556, "grad_norm": 0.10498046875, "learning_rate": 0.0006064607132214166, "loss": 0.5792, "step": 48710 }, { "epoch": 2.419787424257475, "grad_norm": 0.1162109375, "learning_rate": 0.000606420979437767, "loss": 0.5766, "step": 48720 }, { "epoch": 2.420284096553094, "grad_norm": 0.1669921875, "learning_rate": 0.0006063812456541174, "loss": 0.5633, "step": 48730 }, { "epoch": 2.4207807688487137, "grad_norm": 0.1181640625, "learning_rate": 0.0006063415118704679, "loss": 0.5718, "step": 48740 }, { "epoch": 2.421277441144333, "grad_norm": 0.1123046875, "learning_rate": 0.0006063017780868183, "loss": 0.573, "step": 48750 }, { "epoch": 2.421774113439952, "grad_norm": 0.1005859375, "learning_rate": 0.0006062620443031688, "loss": 0.5696, "step": 48760 }, { "epoch": 2.422270785735572, "grad_norm": 0.08984375, "learning_rate": 0.0006062223105195193, "loss": 0.5642, "step": 48770 }, { "epoch": 2.422767458031191, "grad_norm": 0.09912109375, "learning_rate": 0.0006061825767358697, "loss": 0.5697, "step": 48780 }, { "epoch": 2.4232641303268103, "grad_norm": 0.1162109375, "learning_rate": 0.0006061428429522201, "loss": 0.5572, "step": 48790 }, { "epoch": 2.4237608026224295, "grad_norm": 0.1044921875, "learning_rate": 0.0006061031091685706, "loss": 0.5505, "step": 48800 }, { "epoch": 2.424257474918049, "grad_norm": 0.0888671875, "learning_rate": 0.0006060633753849211, "loss": 0.5675, "step": 48810 }, { "epoch": 2.4247541472136684, "grad_norm": 0.10302734375, "learning_rate": 0.0006060236416012715, "loss": 0.5403, "step": 48820 }, { "epoch": 2.4252508195092877, "grad_norm": 0.10595703125, "learning_rate": 0.000605983907817622, "loss": 0.5562, "step": 48830 }, { "epoch": 2.4257474918049073, "grad_norm": 0.099609375, "learning_rate": 0.0006059441740339724, "loss": 0.5669, "step": 48840 }, { "epoch": 2.4262441641005266, "grad_norm": 0.1220703125, "learning_rate": 0.0006059044402503228, "loss": 0.5864, "step": 48850 }, { "epoch": 2.426740836396146, "grad_norm": 0.09423828125, "learning_rate": 0.0006058647064666734, "loss": 0.553, "step": 48860 }, { "epoch": 2.427237508691765, "grad_norm": 0.1259765625, "learning_rate": 0.0006058249726830238, "loss": 0.5676, "step": 48870 }, { "epoch": 2.4277341809873847, "grad_norm": 0.0947265625, "learning_rate": 0.0006057852388993742, "loss": 0.543, "step": 48880 }, { "epoch": 2.428230853283004, "grad_norm": 0.0927734375, "learning_rate": 0.0006057455051157247, "loss": 0.5484, "step": 48890 }, { "epoch": 2.428727525578623, "grad_norm": 0.12890625, "learning_rate": 0.0006057057713320751, "loss": 0.5772, "step": 48900 }, { "epoch": 2.429224197874243, "grad_norm": 0.1162109375, "learning_rate": 0.0006056660375484256, "loss": 0.5984, "step": 48910 }, { "epoch": 2.429720870169862, "grad_norm": 0.103515625, "learning_rate": 0.000605626303764776, "loss": 0.574, "step": 48920 }, { "epoch": 2.4302175424654813, "grad_norm": 0.10693359375, "learning_rate": 0.0006055865699811265, "loss": 0.5479, "step": 48930 }, { "epoch": 2.4307142147611005, "grad_norm": 0.103515625, "learning_rate": 0.0006055468361974769, "loss": 0.5385, "step": 48940 }, { "epoch": 2.4312108870567197, "grad_norm": 0.103515625, "learning_rate": 0.0006055071024138273, "loss": 0.5857, "step": 48950 }, { "epoch": 2.4317075593523394, "grad_norm": 0.09912109375, "learning_rate": 0.0006054673686301779, "loss": 0.5407, "step": 48960 }, { "epoch": 2.4322042316479586, "grad_norm": 0.1533203125, "learning_rate": 0.0006054276348465283, "loss": 0.577, "step": 48970 }, { "epoch": 2.432700903943578, "grad_norm": 0.10205078125, "learning_rate": 0.0006053879010628787, "loss": 0.5924, "step": 48980 }, { "epoch": 2.4331975762391975, "grad_norm": 0.134765625, "learning_rate": 0.0006053481672792292, "loss": 0.5802, "step": 48990 }, { "epoch": 2.4336942485348168, "grad_norm": 0.1494140625, "learning_rate": 0.0006053084334955796, "loss": 0.5521, "step": 49000 }, { "epoch": 2.434190920830436, "grad_norm": 0.0966796875, "learning_rate": 0.0006052686997119301, "loss": 0.5648, "step": 49010 }, { "epoch": 2.434687593126055, "grad_norm": 0.0947265625, "learning_rate": 0.0006052289659282806, "loss": 0.58, "step": 49020 }, { "epoch": 2.435184265421675, "grad_norm": 0.09619140625, "learning_rate": 0.000605189232144631, "loss": 0.5418, "step": 49030 }, { "epoch": 2.435680937717294, "grad_norm": 0.09912109375, "learning_rate": 0.0006051494983609814, "loss": 0.5535, "step": 49040 }, { "epoch": 2.4361776100129133, "grad_norm": 0.10986328125, "learning_rate": 0.0006051097645773319, "loss": 0.5718, "step": 49050 }, { "epoch": 2.436674282308533, "grad_norm": 0.1083984375, "learning_rate": 0.0006050700307936824, "loss": 0.5783, "step": 49060 }, { "epoch": 2.4371709546041522, "grad_norm": 0.119140625, "learning_rate": 0.0006050302970100328, "loss": 0.5711, "step": 49070 }, { "epoch": 2.4376676268997715, "grad_norm": 0.107421875, "learning_rate": 0.0006049905632263833, "loss": 0.5727, "step": 49080 }, { "epoch": 2.4381642991953907, "grad_norm": 0.1474609375, "learning_rate": 0.0006049508294427337, "loss": 0.567, "step": 49090 }, { "epoch": 2.4386609714910104, "grad_norm": 0.140625, "learning_rate": 0.0006049110956590841, "loss": 0.573, "step": 49100 }, { "epoch": 2.4391576437866296, "grad_norm": 0.11474609375, "learning_rate": 0.0006048713618754346, "loss": 0.5678, "step": 49110 }, { "epoch": 2.439654316082249, "grad_norm": 0.126953125, "learning_rate": 0.0006048316280917851, "loss": 0.5699, "step": 49120 }, { "epoch": 2.4401509883778685, "grad_norm": 0.10107421875, "learning_rate": 0.0006047918943081356, "loss": 0.5472, "step": 49130 }, { "epoch": 2.4406476606734877, "grad_norm": 0.1337890625, "learning_rate": 0.0006047521605244859, "loss": 0.5707, "step": 49140 }, { "epoch": 2.441144332969107, "grad_norm": 0.1162109375, "learning_rate": 0.0006047124267408364, "loss": 0.5915, "step": 49150 }, { "epoch": 2.441641005264726, "grad_norm": 0.09912109375, "learning_rate": 0.000604672692957187, "loss": 0.5795, "step": 49160 }, { "epoch": 2.442137677560346, "grad_norm": 0.1728515625, "learning_rate": 0.0006046329591735373, "loss": 0.5591, "step": 49170 }, { "epoch": 2.442634349855965, "grad_norm": 0.0947265625, "learning_rate": 0.0006045932253898878, "loss": 0.5705, "step": 49180 }, { "epoch": 2.4431310221515843, "grad_norm": 0.12109375, "learning_rate": 0.0006045534916062382, "loss": 0.5731, "step": 49190 }, { "epoch": 2.443627694447204, "grad_norm": 0.1396484375, "learning_rate": 0.0006045137578225886, "loss": 0.5761, "step": 49200 }, { "epoch": 2.444124366742823, "grad_norm": 0.10302734375, "learning_rate": 0.0006044740240389392, "loss": 0.5612, "step": 49210 }, { "epoch": 2.4446210390384424, "grad_norm": 0.1591796875, "learning_rate": 0.0006044342902552896, "loss": 0.5573, "step": 49220 }, { "epoch": 2.4451177113340616, "grad_norm": 0.126953125, "learning_rate": 0.00060439455647164, "loss": 0.5884, "step": 49230 }, { "epoch": 2.4456143836296813, "grad_norm": 0.1220703125, "learning_rate": 0.0006043548226879905, "loss": 0.5599, "step": 49240 }, { "epoch": 2.4461110559253005, "grad_norm": 0.09375, "learning_rate": 0.0006043150889043409, "loss": 0.5611, "step": 49250 }, { "epoch": 2.4466077282209198, "grad_norm": 0.10888671875, "learning_rate": 0.0006042753551206914, "loss": 0.5909, "step": 49260 }, { "epoch": 2.4471044005165394, "grad_norm": 0.1005859375, "learning_rate": 0.0006042356213370419, "loss": 0.5518, "step": 49270 }, { "epoch": 2.4476010728121587, "grad_norm": 0.1005859375, "learning_rate": 0.0006041958875533923, "loss": 0.5707, "step": 49280 }, { "epoch": 2.448097745107778, "grad_norm": 0.1123046875, "learning_rate": 0.0006041561537697428, "loss": 0.5541, "step": 49290 }, { "epoch": 2.448594417403397, "grad_norm": 0.10693359375, "learning_rate": 0.0006041164199860931, "loss": 0.5475, "step": 49300 }, { "epoch": 2.4490910896990163, "grad_norm": 0.10302734375, "learning_rate": 0.0006040766862024437, "loss": 0.5545, "step": 49310 }, { "epoch": 2.449587761994636, "grad_norm": 0.1630859375, "learning_rate": 0.0006040369524187942, "loss": 0.554, "step": 49320 }, { "epoch": 2.4500844342902552, "grad_norm": 0.10546875, "learning_rate": 0.0006039972186351445, "loss": 0.553, "step": 49330 }, { "epoch": 2.4505811065858745, "grad_norm": 0.09814453125, "learning_rate": 0.000603957484851495, "loss": 0.578, "step": 49340 }, { "epoch": 2.451077778881494, "grad_norm": 0.099609375, "learning_rate": 0.0006039177510678455, "loss": 0.5772, "step": 49350 }, { "epoch": 2.4515744511771134, "grad_norm": 0.099609375, "learning_rate": 0.0006038780172841959, "loss": 0.5496, "step": 49360 }, { "epoch": 2.4520711234727326, "grad_norm": 0.0888671875, "learning_rate": 0.0006038382835005464, "loss": 0.5625, "step": 49370 }, { "epoch": 2.452567795768352, "grad_norm": 0.109375, "learning_rate": 0.0006037985497168968, "loss": 0.5499, "step": 49380 }, { "epoch": 2.4530644680639715, "grad_norm": 0.1103515625, "learning_rate": 0.0006037588159332472, "loss": 0.589, "step": 49390 }, { "epoch": 2.4535611403595907, "grad_norm": 0.0966796875, "learning_rate": 0.0006037190821495977, "loss": 0.5583, "step": 49400 }, { "epoch": 2.45405781265521, "grad_norm": 0.0947265625, "learning_rate": 0.0006036793483659482, "loss": 0.5581, "step": 49410 }, { "epoch": 2.4545544849508296, "grad_norm": 0.103515625, "learning_rate": 0.0006036396145822986, "loss": 0.5714, "step": 49420 }, { "epoch": 2.455051157246449, "grad_norm": 0.1171875, "learning_rate": 0.0006035998807986491, "loss": 0.5595, "step": 49430 }, { "epoch": 2.455547829542068, "grad_norm": 0.1474609375, "learning_rate": 0.0006035601470149995, "loss": 0.5451, "step": 49440 }, { "epoch": 2.4560445018376873, "grad_norm": 0.125, "learning_rate": 0.00060352041323135, "loss": 0.5666, "step": 49450 }, { "epoch": 2.456541174133307, "grad_norm": 0.1142578125, "learning_rate": 0.0006034806794477005, "loss": 0.6094, "step": 49460 }, { "epoch": 2.457037846428926, "grad_norm": 0.125, "learning_rate": 0.0006034409456640509, "loss": 0.5374, "step": 49470 }, { "epoch": 2.4575345187245454, "grad_norm": 0.11669921875, "learning_rate": 0.0006034012118804014, "loss": 0.5574, "step": 49480 }, { "epoch": 2.458031191020165, "grad_norm": 0.146484375, "learning_rate": 0.0006033614780967518, "loss": 0.5301, "step": 49490 }, { "epoch": 2.4585278633157843, "grad_norm": 0.12109375, "learning_rate": 0.0006033217443131022, "loss": 0.5728, "step": 49500 }, { "epoch": 2.4590245356114036, "grad_norm": 0.1455078125, "learning_rate": 0.0006032820105294528, "loss": 0.5635, "step": 49510 }, { "epoch": 2.4595212079070228, "grad_norm": 0.10693359375, "learning_rate": 0.0006032422767458031, "loss": 0.5471, "step": 49520 }, { "epoch": 2.4600178802026424, "grad_norm": 0.146484375, "learning_rate": 0.0006032025429621536, "loss": 0.5895, "step": 49530 }, { "epoch": 2.4605145524982617, "grad_norm": 0.1103515625, "learning_rate": 0.0006031628091785041, "loss": 0.5626, "step": 49540 }, { "epoch": 2.461011224793881, "grad_norm": 0.09228515625, "learning_rate": 0.0006031230753948544, "loss": 0.5692, "step": 49550 }, { "epoch": 2.4615078970895006, "grad_norm": 0.1279296875, "learning_rate": 0.000603083341611205, "loss": 0.5512, "step": 49560 }, { "epoch": 2.46200456938512, "grad_norm": 0.1328125, "learning_rate": 0.0006030436078275554, "loss": 0.5326, "step": 49570 }, { "epoch": 2.462501241680739, "grad_norm": 0.11767578125, "learning_rate": 0.0006030038740439059, "loss": 0.5489, "step": 49580 }, { "epoch": 2.4629979139763583, "grad_norm": 0.09521484375, "learning_rate": 0.0006029641402602563, "loss": 0.5411, "step": 49590 }, { "epoch": 2.463494586271978, "grad_norm": 0.0947265625, "learning_rate": 0.0006029244064766067, "loss": 0.571, "step": 49600 }, { "epoch": 2.463991258567597, "grad_norm": 0.10205078125, "learning_rate": 0.0006028846726929573, "loss": 0.5845, "step": 49610 }, { "epoch": 2.4644879308632164, "grad_norm": 0.1318359375, "learning_rate": 0.0006028449389093077, "loss": 0.5634, "step": 49620 }, { "epoch": 2.4649846031588356, "grad_norm": 0.11865234375, "learning_rate": 0.0006028052051256581, "loss": 0.5268, "step": 49630 }, { "epoch": 2.4654812754544553, "grad_norm": 0.09521484375, "learning_rate": 0.0006027654713420086, "loss": 0.5525, "step": 49640 }, { "epoch": 2.4659779477500745, "grad_norm": 0.12451171875, "learning_rate": 0.000602725737558359, "loss": 0.5734, "step": 49650 }, { "epoch": 2.4664746200456937, "grad_norm": 0.115234375, "learning_rate": 0.0006026860037747094, "loss": 0.581, "step": 49660 }, { "epoch": 2.466971292341313, "grad_norm": 0.10302734375, "learning_rate": 0.00060264626999106, "loss": 0.5424, "step": 49670 }, { "epoch": 2.4674679646369326, "grad_norm": 0.109375, "learning_rate": 0.0006026065362074104, "loss": 0.5855, "step": 49680 }, { "epoch": 2.467964636932552, "grad_norm": 0.109375, "learning_rate": 0.0006025668024237608, "loss": 0.5456, "step": 49690 }, { "epoch": 2.468461309228171, "grad_norm": 0.1708984375, "learning_rate": 0.0006025270686401113, "loss": 0.5672, "step": 49700 }, { "epoch": 2.4689579815237908, "grad_norm": 0.19921875, "learning_rate": 0.0006024873348564616, "loss": 0.5702, "step": 49710 }, { "epoch": 2.46945465381941, "grad_norm": 0.09716796875, "learning_rate": 0.0006024476010728122, "loss": 0.5975, "step": 49720 }, { "epoch": 2.469951326115029, "grad_norm": 0.109375, "learning_rate": 0.0006024078672891627, "loss": 0.5456, "step": 49730 }, { "epoch": 2.4704479984106484, "grad_norm": 0.126953125, "learning_rate": 0.0006023681335055131, "loss": 0.5342, "step": 49740 }, { "epoch": 2.470944670706268, "grad_norm": 0.115234375, "learning_rate": 0.0006023283997218635, "loss": 0.5671, "step": 49750 }, { "epoch": 2.4714413430018873, "grad_norm": 0.1064453125, "learning_rate": 0.0006022886659382141, "loss": 0.5535, "step": 49760 }, { "epoch": 2.4719380152975066, "grad_norm": 0.134765625, "learning_rate": 0.0006022489321545645, "loss": 0.5873, "step": 49770 }, { "epoch": 2.4724346875931262, "grad_norm": 0.09375, "learning_rate": 0.0006022091983709149, "loss": 0.5657, "step": 49780 }, { "epoch": 2.4729313598887455, "grad_norm": 0.1103515625, "learning_rate": 0.0006021694645872653, "loss": 0.5449, "step": 49790 }, { "epoch": 2.4734280321843647, "grad_norm": 0.1162109375, "learning_rate": 0.0006021297308036158, "loss": 0.5494, "step": 49800 }, { "epoch": 2.473924704479984, "grad_norm": 0.10498046875, "learning_rate": 0.0006020899970199662, "loss": 0.5704, "step": 49810 }, { "epoch": 2.4744213767756036, "grad_norm": 0.1044921875, "learning_rate": 0.0006020502632363167, "loss": 0.5526, "step": 49820 }, { "epoch": 2.474918049071223, "grad_norm": 0.09130859375, "learning_rate": 0.0006020105294526672, "loss": 0.5606, "step": 49830 }, { "epoch": 2.475414721366842, "grad_norm": 0.1142578125, "learning_rate": 0.0006019707956690176, "loss": 0.588, "step": 49840 }, { "epoch": 2.4759113936624617, "grad_norm": 0.1181640625, "learning_rate": 0.000601931061885368, "loss": 0.5424, "step": 49850 }, { "epoch": 2.476408065958081, "grad_norm": 0.10400390625, "learning_rate": 0.0006018913281017186, "loss": 0.5562, "step": 49860 }, { "epoch": 2.4769047382537, "grad_norm": 0.1376953125, "learning_rate": 0.000601851594318069, "loss": 0.545, "step": 49870 }, { "epoch": 2.4774014105493194, "grad_norm": 0.220703125, "learning_rate": 0.0006018118605344194, "loss": 0.5647, "step": 49880 }, { "epoch": 2.477898082844939, "grad_norm": 0.1220703125, "learning_rate": 0.0006017721267507699, "loss": 0.5427, "step": 49890 }, { "epoch": 2.4783947551405583, "grad_norm": 0.08837890625, "learning_rate": 0.0006017323929671203, "loss": 0.5381, "step": 49900 }, { "epoch": 2.4788914274361775, "grad_norm": 0.138671875, "learning_rate": 0.0006016926591834707, "loss": 0.555, "step": 49910 }, { "epoch": 2.479388099731797, "grad_norm": 0.09912109375, "learning_rate": 0.0006016529253998213, "loss": 0.5809, "step": 49920 }, { "epoch": 2.4798847720274164, "grad_norm": 0.0927734375, "learning_rate": 0.0006016131916161717, "loss": 0.5339, "step": 49930 }, { "epoch": 2.4803814443230356, "grad_norm": 0.10595703125, "learning_rate": 0.0006015734578325221, "loss": 0.5717, "step": 49940 }, { "epoch": 2.480878116618655, "grad_norm": 0.10693359375, "learning_rate": 0.0006015337240488726, "loss": 0.5455, "step": 49950 }, { "epoch": 2.4813747889142745, "grad_norm": 0.11962890625, "learning_rate": 0.000601493990265223, "loss": 0.5582, "step": 49960 }, { "epoch": 2.4818714612098938, "grad_norm": 0.162109375, "learning_rate": 0.0006014542564815735, "loss": 0.588, "step": 49970 }, { "epoch": 2.482368133505513, "grad_norm": 0.10595703125, "learning_rate": 0.0006014145226979239, "loss": 0.5391, "step": 49980 }, { "epoch": 2.4828648058011322, "grad_norm": 0.1279296875, "learning_rate": 0.0006013747889142744, "loss": 0.5665, "step": 49990 }, { "epoch": 2.483361478096752, "grad_norm": 0.12158203125, "learning_rate": 0.0006013350551306248, "loss": 0.5499, "step": 50000 }, { "epoch": 2.483858150392371, "grad_norm": 0.1376953125, "learning_rate": 0.0006012953213469752, "loss": 0.5816, "step": 50010 }, { "epoch": 2.4843548226879903, "grad_norm": 0.095703125, "learning_rate": 0.0006012555875633258, "loss": 0.5779, "step": 50020 }, { "epoch": 2.4848514949836096, "grad_norm": 0.10986328125, "learning_rate": 0.0006012158537796763, "loss": 0.526, "step": 50030 }, { "epoch": 2.4853481672792292, "grad_norm": 0.1416015625, "learning_rate": 0.0006011761199960266, "loss": 0.5614, "step": 50040 }, { "epoch": 2.4858448395748485, "grad_norm": 0.0986328125, "learning_rate": 0.0006011363862123771, "loss": 0.5772, "step": 50050 }, { "epoch": 2.4863415118704677, "grad_norm": 0.10302734375, "learning_rate": 0.0006010966524287275, "loss": 0.5888, "step": 50060 }, { "epoch": 2.4868381841660874, "grad_norm": 0.09521484375, "learning_rate": 0.000601056918645078, "loss": 0.5385, "step": 50070 }, { "epoch": 2.4873348564617066, "grad_norm": 0.10498046875, "learning_rate": 0.0006010171848614285, "loss": 0.6059, "step": 50080 }, { "epoch": 2.487831528757326, "grad_norm": 0.11962890625, "learning_rate": 0.0006009774510777789, "loss": 0.5604, "step": 50090 }, { "epoch": 2.488328201052945, "grad_norm": 0.10302734375, "learning_rate": 0.0006009377172941293, "loss": 0.5387, "step": 50100 }, { "epoch": 2.4888248733485647, "grad_norm": 0.12890625, "learning_rate": 0.0006008979835104798, "loss": 0.5603, "step": 50110 }, { "epoch": 2.489321545644184, "grad_norm": 0.1494140625, "learning_rate": 0.0006008582497268303, "loss": 0.5659, "step": 50120 }, { "epoch": 2.489818217939803, "grad_norm": 0.1279296875, "learning_rate": 0.0006008185159431807, "loss": 0.5607, "step": 50130 }, { "epoch": 2.490314890235423, "grad_norm": 0.09912109375, "learning_rate": 0.0006007787821595312, "loss": 0.5308, "step": 50140 }, { "epoch": 2.490811562531042, "grad_norm": 0.150390625, "learning_rate": 0.0006007390483758816, "loss": 0.5595, "step": 50150 }, { "epoch": 2.4913082348266613, "grad_norm": 0.12060546875, "learning_rate": 0.000600699314592232, "loss": 0.5742, "step": 50160 }, { "epoch": 2.4918049071222805, "grad_norm": 0.1259765625, "learning_rate": 0.0006006595808085825, "loss": 0.5578, "step": 50170 }, { "epoch": 2.4923015794179, "grad_norm": 0.09130859375, "learning_rate": 0.000600619847024933, "loss": 0.5511, "step": 50180 }, { "epoch": 2.4927982517135194, "grad_norm": 0.1748046875, "learning_rate": 0.0006005801132412835, "loss": 0.5548, "step": 50190 }, { "epoch": 2.4932949240091387, "grad_norm": 0.10546875, "learning_rate": 0.0006005403794576338, "loss": 0.5776, "step": 50200 }, { "epoch": 2.4937915963047583, "grad_norm": 0.0947265625, "learning_rate": 0.0006005006456739843, "loss": 0.5829, "step": 50210 }, { "epoch": 2.4942882686003776, "grad_norm": 0.11279296875, "learning_rate": 0.0006004609118903349, "loss": 0.5768, "step": 50220 }, { "epoch": 2.494784940895997, "grad_norm": 0.15234375, "learning_rate": 0.0006004211781066852, "loss": 0.5617, "step": 50230 }, { "epoch": 2.495281613191616, "grad_norm": 0.16796875, "learning_rate": 0.0006003814443230357, "loss": 0.5578, "step": 50240 }, { "epoch": 2.4957782854872357, "grad_norm": 0.1259765625, "learning_rate": 0.0006003417105393861, "loss": 0.5624, "step": 50250 }, { "epoch": 2.496274957782855, "grad_norm": 0.1044921875, "learning_rate": 0.0006003019767557365, "loss": 0.5755, "step": 50260 }, { "epoch": 2.496771630078474, "grad_norm": 0.10986328125, "learning_rate": 0.0006002622429720871, "loss": 0.5617, "step": 50270 }, { "epoch": 2.497268302374094, "grad_norm": 0.22265625, "learning_rate": 0.0006002225091884375, "loss": 0.538, "step": 50280 }, { "epoch": 2.497764974669713, "grad_norm": 0.1103515625, "learning_rate": 0.0006001827754047879, "loss": 0.5541, "step": 50290 }, { "epoch": 2.4982616469653323, "grad_norm": 0.1162109375, "learning_rate": 0.0006001430416211384, "loss": 0.5606, "step": 50300 }, { "epoch": 2.4987583192609515, "grad_norm": 0.1123046875, "learning_rate": 0.0006001033078374888, "loss": 0.5533, "step": 50310 }, { "epoch": 2.499254991556571, "grad_norm": 0.1015625, "learning_rate": 0.0006000635740538393, "loss": 0.5404, "step": 50320 }, { "epoch": 2.4997516638521904, "grad_norm": 0.095703125, "learning_rate": 0.0006000238402701898, "loss": 0.5438, "step": 50330 }, { "epoch": 2.5002483361478096, "grad_norm": 0.126953125, "learning_rate": 0.0005999841064865402, "loss": 0.556, "step": 50340 }, { "epoch": 2.5007450084434293, "grad_norm": 0.125, "learning_rate": 0.0005999443727028907, "loss": 0.589, "step": 50350 }, { "epoch": 2.5012416807390485, "grad_norm": 0.11279296875, "learning_rate": 0.0005999046389192411, "loss": 0.5908, "step": 50360 }, { "epoch": 2.5017383530346677, "grad_norm": 0.09716796875, "learning_rate": 0.0005998649051355916, "loss": 0.5258, "step": 50370 }, { "epoch": 2.502235025330287, "grad_norm": 0.11474609375, "learning_rate": 0.0005998251713519421, "loss": 0.5685, "step": 50380 }, { "epoch": 2.502731697625906, "grad_norm": 0.0908203125, "learning_rate": 0.0005997854375682924, "loss": 0.5652, "step": 50390 }, { "epoch": 2.503228369921526, "grad_norm": 0.10009765625, "learning_rate": 0.0005997457037846429, "loss": 0.5472, "step": 50400 }, { "epoch": 2.503725042217145, "grad_norm": 0.1884765625, "learning_rate": 0.0005997059700009934, "loss": 0.5512, "step": 50410 }, { "epoch": 2.5042217145127643, "grad_norm": 0.095703125, "learning_rate": 0.0005996662362173438, "loss": 0.5654, "step": 50420 }, { "epoch": 2.504718386808384, "grad_norm": 0.11181640625, "learning_rate": 0.0005996265024336943, "loss": 0.5999, "step": 50430 }, { "epoch": 2.505215059104003, "grad_norm": 0.10107421875, "learning_rate": 0.0005995867686500447, "loss": 0.555, "step": 50440 }, { "epoch": 2.5057117313996224, "grad_norm": 0.10498046875, "learning_rate": 0.0005995470348663951, "loss": 0.5657, "step": 50450 }, { "epoch": 2.5062084036952417, "grad_norm": 0.1005859375, "learning_rate": 0.0005995073010827456, "loss": 0.5458, "step": 50460 }, { "epoch": 2.5067050759908613, "grad_norm": 0.1025390625, "learning_rate": 0.0005994675672990961, "loss": 0.565, "step": 50470 }, { "epoch": 2.5072017482864806, "grad_norm": 0.10400390625, "learning_rate": 0.0005994278335154466, "loss": 0.566, "step": 50480 }, { "epoch": 2.5076984205821, "grad_norm": 0.13671875, "learning_rate": 0.000599388099731797, "loss": 0.5603, "step": 50490 }, { "epoch": 2.5081950928777195, "grad_norm": 0.11376953125, "learning_rate": 0.0005993483659481474, "loss": 0.544, "step": 50500 }, { "epoch": 2.5086917651733387, "grad_norm": 0.150390625, "learning_rate": 0.0005993086321644979, "loss": 0.5736, "step": 50510 }, { "epoch": 2.509188437468958, "grad_norm": 0.126953125, "learning_rate": 0.0005992688983808484, "loss": 0.5321, "step": 50520 }, { "epoch": 2.509685109764577, "grad_norm": 0.09521484375, "learning_rate": 0.0005992291645971988, "loss": 0.5619, "step": 50530 }, { "epoch": 2.510181782060197, "grad_norm": 0.10205078125, "learning_rate": 0.0005991894308135493, "loss": 0.5561, "step": 50540 }, { "epoch": 2.510678454355816, "grad_norm": 0.15234375, "learning_rate": 0.0005991496970298997, "loss": 0.5537, "step": 50550 }, { "epoch": 2.5111751266514353, "grad_norm": 0.1025390625, "learning_rate": 0.0005991099632462501, "loss": 0.5722, "step": 50560 }, { "epoch": 2.511671798947055, "grad_norm": 0.09521484375, "learning_rate": 0.0005990702294626007, "loss": 0.5708, "step": 50570 }, { "epoch": 2.512168471242674, "grad_norm": 0.11376953125, "learning_rate": 0.000599030495678951, "loss": 0.5803, "step": 50580 }, { "epoch": 2.5126651435382934, "grad_norm": 0.12255859375, "learning_rate": 0.0005989907618953015, "loss": 0.5337, "step": 50590 }, { "epoch": 2.5131618158339126, "grad_norm": 0.09521484375, "learning_rate": 0.000598951028111652, "loss": 0.5547, "step": 50600 }, { "epoch": 2.5136584881295323, "grad_norm": 0.0966796875, "learning_rate": 0.0005989112943280023, "loss": 0.5613, "step": 50610 }, { "epoch": 2.5141551604251515, "grad_norm": 0.115234375, "learning_rate": 0.0005988715605443529, "loss": 0.5465, "step": 50620 }, { "epoch": 2.5146518327207708, "grad_norm": 0.12353515625, "learning_rate": 0.0005988318267607034, "loss": 0.5655, "step": 50630 }, { "epoch": 2.5151485050163904, "grad_norm": 0.09814453125, "learning_rate": 0.0005987920929770538, "loss": 0.5754, "step": 50640 }, { "epoch": 2.5156451773120097, "grad_norm": 0.1044921875, "learning_rate": 0.0005987523591934042, "loss": 0.5651, "step": 50650 }, { "epoch": 2.516141849607629, "grad_norm": 0.10107421875, "learning_rate": 0.0005987126254097546, "loss": 0.5524, "step": 50660 }, { "epoch": 2.516638521903248, "grad_norm": 0.0947265625, "learning_rate": 0.0005986728916261052, "loss": 0.5469, "step": 50670 }, { "epoch": 2.5171351941988673, "grad_norm": 0.119140625, "learning_rate": 0.0005986331578424556, "loss": 0.5544, "step": 50680 }, { "epoch": 2.517631866494487, "grad_norm": 0.09765625, "learning_rate": 0.000598593424058806, "loss": 0.5957, "step": 50690 }, { "epoch": 2.5181285387901062, "grad_norm": 0.09423828125, "learning_rate": 0.0005985536902751565, "loss": 0.5565, "step": 50700 }, { "epoch": 2.518625211085726, "grad_norm": 0.1025390625, "learning_rate": 0.0005985139564915069, "loss": 0.574, "step": 50710 }, { "epoch": 2.519121883381345, "grad_norm": 0.154296875, "learning_rate": 0.0005984742227078574, "loss": 0.5723, "step": 50720 }, { "epoch": 2.5196185556769644, "grad_norm": 0.11767578125, "learning_rate": 0.0005984344889242079, "loss": 0.5467, "step": 50730 }, { "epoch": 2.5201152279725836, "grad_norm": 0.10009765625, "learning_rate": 0.0005983947551405583, "loss": 0.5421, "step": 50740 }, { "epoch": 2.520611900268203, "grad_norm": 0.10498046875, "learning_rate": 0.0005983550213569087, "loss": 0.5228, "step": 50750 }, { "epoch": 2.5211085725638225, "grad_norm": 0.103515625, "learning_rate": 0.0005983152875732592, "loss": 0.5707, "step": 50760 }, { "epoch": 2.5216052448594417, "grad_norm": 0.1142578125, "learning_rate": 0.0005982755537896097, "loss": 0.5645, "step": 50770 }, { "epoch": 2.522101917155061, "grad_norm": 0.10205078125, "learning_rate": 0.0005982358200059601, "loss": 0.563, "step": 50780 }, { "epoch": 2.5225985894506806, "grad_norm": 0.140625, "learning_rate": 0.0005981960862223106, "loss": 0.5781, "step": 50790 }, { "epoch": 2.5230952617463, "grad_norm": 0.1396484375, "learning_rate": 0.000598156352438661, "loss": 0.5269, "step": 50800 }, { "epoch": 2.523591934041919, "grad_norm": 0.126953125, "learning_rate": 0.0005981166186550114, "loss": 0.55, "step": 50810 }, { "epoch": 2.5240886063375383, "grad_norm": 0.1083984375, "learning_rate": 0.000598076884871362, "loss": 0.5676, "step": 50820 }, { "epoch": 2.524585278633158, "grad_norm": 0.1103515625, "learning_rate": 0.0005980371510877124, "loss": 0.5254, "step": 50830 }, { "epoch": 2.525081950928777, "grad_norm": 0.10205078125, "learning_rate": 0.0005979974173040628, "loss": 0.5673, "step": 50840 }, { "epoch": 2.5255786232243964, "grad_norm": 0.0986328125, "learning_rate": 0.0005979576835204132, "loss": 0.5848, "step": 50850 }, { "epoch": 2.526075295520016, "grad_norm": 0.177734375, "learning_rate": 0.0005979179497367637, "loss": 0.5343, "step": 50860 }, { "epoch": 2.5265719678156353, "grad_norm": 0.09814453125, "learning_rate": 0.0005978782159531142, "loss": 0.5898, "step": 50870 }, { "epoch": 2.5270686401112545, "grad_norm": 0.1103515625, "learning_rate": 0.0005978384821694646, "loss": 0.5488, "step": 50880 }, { "epoch": 2.5275653124068738, "grad_norm": 0.11279296875, "learning_rate": 0.0005977987483858151, "loss": 0.5552, "step": 50890 }, { "epoch": 2.5280619847024934, "grad_norm": 0.11669921875, "learning_rate": 0.0005977590146021655, "loss": 0.5616, "step": 50900 }, { "epoch": 2.5285586569981127, "grad_norm": 0.09619140625, "learning_rate": 0.0005977192808185159, "loss": 0.5752, "step": 50910 }, { "epoch": 2.529055329293732, "grad_norm": 0.11376953125, "learning_rate": 0.0005976795470348665, "loss": 0.5545, "step": 50920 }, { "epoch": 2.5295520015893516, "grad_norm": 0.10107421875, "learning_rate": 0.0005976398132512169, "loss": 0.5562, "step": 50930 }, { "epoch": 2.530048673884971, "grad_norm": 0.1396484375, "learning_rate": 0.0005976000794675673, "loss": 0.56, "step": 50940 }, { "epoch": 2.53054534618059, "grad_norm": 0.10107421875, "learning_rate": 0.0005975603456839178, "loss": 0.5614, "step": 50950 }, { "epoch": 2.5310420184762092, "grad_norm": 0.1064453125, "learning_rate": 0.0005975206119002682, "loss": 0.5647, "step": 50960 }, { "epoch": 2.5315386907718285, "grad_norm": 0.0986328125, "learning_rate": 0.0005974808781166186, "loss": 0.5495, "step": 50970 }, { "epoch": 2.532035363067448, "grad_norm": 0.09912109375, "learning_rate": 0.0005974411443329692, "loss": 0.5878, "step": 50980 }, { "epoch": 2.5325320353630674, "grad_norm": 0.1240234375, "learning_rate": 0.0005974014105493196, "loss": 0.5465, "step": 50990 }, { "epoch": 2.533028707658687, "grad_norm": 0.11083984375, "learning_rate": 0.00059736167676567, "loss": 0.5815, "step": 51000 }, { "epoch": 2.5335253799543063, "grad_norm": 0.1201171875, "learning_rate": 0.0005973219429820205, "loss": 0.5638, "step": 51010 }, { "epoch": 2.5340220522499255, "grad_norm": 0.11865234375, "learning_rate": 0.000597282209198371, "loss": 0.5659, "step": 51020 }, { "epoch": 2.5345187245455447, "grad_norm": 0.09228515625, "learning_rate": 0.0005972424754147214, "loss": 0.5737, "step": 51030 }, { "epoch": 2.535015396841164, "grad_norm": 0.1259765625, "learning_rate": 0.0005972027416310718, "loss": 0.5632, "step": 51040 }, { "epoch": 2.5355120691367836, "grad_norm": 0.1455078125, "learning_rate": 0.0005971630078474223, "loss": 0.568, "step": 51050 }, { "epoch": 2.536008741432403, "grad_norm": 0.1025390625, "learning_rate": 0.0005971232740637727, "loss": 0.5503, "step": 51060 }, { "epoch": 2.5365054137280225, "grad_norm": 0.1123046875, "learning_rate": 0.0005970835402801231, "loss": 0.5756, "step": 51070 }, { "epoch": 2.5370020860236417, "grad_norm": 0.10595703125, "learning_rate": 0.0005970438064964737, "loss": 0.5414, "step": 51080 }, { "epoch": 2.537498758319261, "grad_norm": 0.173828125, "learning_rate": 0.0005970040727128242, "loss": 0.5714, "step": 51090 }, { "epoch": 2.53799543061488, "grad_norm": 0.103515625, "learning_rate": 0.0005969643389291745, "loss": 0.5618, "step": 51100 }, { "epoch": 2.5384921029104994, "grad_norm": 0.12158203125, "learning_rate": 0.000596924605145525, "loss": 0.551, "step": 51110 }, { "epoch": 2.538988775206119, "grad_norm": 0.10498046875, "learning_rate": 0.0005968848713618754, "loss": 0.5455, "step": 51120 }, { "epoch": 2.5394854475017383, "grad_norm": 0.11572265625, "learning_rate": 0.0005968451375782259, "loss": 0.5642, "step": 51130 }, { "epoch": 2.5399821197973576, "grad_norm": 0.150390625, "learning_rate": 0.0005968054037945764, "loss": 0.5395, "step": 51140 }, { "epoch": 2.5404787920929772, "grad_norm": 0.11279296875, "learning_rate": 0.0005967656700109268, "loss": 0.5503, "step": 51150 }, { "epoch": 2.5409754643885964, "grad_norm": 0.1435546875, "learning_rate": 0.0005967259362272772, "loss": 0.5227, "step": 51160 }, { "epoch": 2.5414721366842157, "grad_norm": 0.09912109375, "learning_rate": 0.0005966862024436278, "loss": 0.5776, "step": 51170 }, { "epoch": 2.541968808979835, "grad_norm": 0.09716796875, "learning_rate": 0.0005966464686599782, "loss": 0.5362, "step": 51180 }, { "epoch": 2.5424654812754546, "grad_norm": 0.12109375, "learning_rate": 0.0005966067348763286, "loss": 0.5623, "step": 51190 }, { "epoch": 2.542962153571074, "grad_norm": 0.091796875, "learning_rate": 0.0005965670010926791, "loss": 0.5747, "step": 51200 }, { "epoch": 2.543458825866693, "grad_norm": 0.1025390625, "learning_rate": 0.0005965272673090295, "loss": 0.5386, "step": 51210 }, { "epoch": 2.5439554981623127, "grad_norm": 0.09814453125, "learning_rate": 0.00059648753352538, "loss": 0.5459, "step": 51220 }, { "epoch": 2.544452170457932, "grad_norm": 0.1083984375, "learning_rate": 0.0005964477997417305, "loss": 0.5909, "step": 51230 }, { "epoch": 2.544948842753551, "grad_norm": 0.11865234375, "learning_rate": 0.0005964080659580809, "loss": 0.5465, "step": 51240 }, { "epoch": 2.5454455150491704, "grad_norm": 0.1328125, "learning_rate": 0.0005963683321744314, "loss": 0.5811, "step": 51250 }, { "epoch": 2.54594218734479, "grad_norm": 0.109375, "learning_rate": 0.0005963285983907817, "loss": 0.5755, "step": 51260 }, { "epoch": 2.5464388596404093, "grad_norm": 0.142578125, "learning_rate": 0.0005962888646071322, "loss": 0.5701, "step": 51270 }, { "epoch": 2.5469355319360285, "grad_norm": 0.09521484375, "learning_rate": 0.0005962491308234828, "loss": 0.5748, "step": 51280 }, { "epoch": 2.547432204231648, "grad_norm": 0.10546875, "learning_rate": 0.0005962093970398331, "loss": 0.5806, "step": 51290 }, { "epoch": 2.5479288765272674, "grad_norm": 0.10302734375, "learning_rate": 0.0005961696632561836, "loss": 0.5596, "step": 51300 }, { "epoch": 2.5484255488228866, "grad_norm": 0.103515625, "learning_rate": 0.000596129929472534, "loss": 0.5824, "step": 51310 }, { "epoch": 2.548922221118506, "grad_norm": 0.14453125, "learning_rate": 0.0005960901956888844, "loss": 0.5651, "step": 51320 }, { "epoch": 2.549418893414125, "grad_norm": 0.193359375, "learning_rate": 0.000596050461905235, "loss": 0.5594, "step": 51330 }, { "epoch": 2.5499155657097448, "grad_norm": 0.09423828125, "learning_rate": 0.0005960107281215854, "loss": 0.5409, "step": 51340 }, { "epoch": 2.550412238005364, "grad_norm": 0.1298828125, "learning_rate": 0.0005959709943379358, "loss": 0.5479, "step": 51350 }, { "epoch": 2.5509089103009837, "grad_norm": 0.1396484375, "learning_rate": 0.0005959312605542863, "loss": 0.5584, "step": 51360 }, { "epoch": 2.551405582596603, "grad_norm": 0.11669921875, "learning_rate": 0.0005958915267706367, "loss": 0.568, "step": 51370 }, { "epoch": 2.551902254892222, "grad_norm": 0.123046875, "learning_rate": 0.0005958517929869873, "loss": 0.5619, "step": 51380 }, { "epoch": 2.5523989271878413, "grad_norm": 0.10693359375, "learning_rate": 0.0005958120592033377, "loss": 0.5573, "step": 51390 }, { "epoch": 2.5528955994834606, "grad_norm": 0.09619140625, "learning_rate": 0.0005957723254196881, "loss": 0.561, "step": 51400 }, { "epoch": 2.5533922717790802, "grad_norm": 0.1298828125, "learning_rate": 0.0005957325916360386, "loss": 0.5522, "step": 51410 }, { "epoch": 2.5538889440746995, "grad_norm": 0.10400390625, "learning_rate": 0.000595692857852389, "loss": 0.5504, "step": 51420 }, { "epoch": 2.554385616370319, "grad_norm": 0.1611328125, "learning_rate": 0.0005956531240687395, "loss": 0.5398, "step": 51430 }, { "epoch": 2.5548822886659384, "grad_norm": 0.10302734375, "learning_rate": 0.00059561339028509, "loss": 0.5599, "step": 51440 }, { "epoch": 2.5553789609615576, "grad_norm": 0.0986328125, "learning_rate": 0.0005955736565014403, "loss": 0.5635, "step": 51450 }, { "epoch": 2.555875633257177, "grad_norm": 0.09814453125, "learning_rate": 0.0005955339227177908, "loss": 0.5605, "step": 51460 }, { "epoch": 2.556372305552796, "grad_norm": 0.109375, "learning_rate": 0.0005954941889341414, "loss": 0.5695, "step": 51470 }, { "epoch": 2.5568689778484157, "grad_norm": 0.1064453125, "learning_rate": 0.0005954544551504917, "loss": 0.576, "step": 51480 }, { "epoch": 2.557365650144035, "grad_norm": 0.0908203125, "learning_rate": 0.0005954147213668422, "loss": 0.5342, "step": 51490 }, { "epoch": 2.557862322439654, "grad_norm": 0.0888671875, "learning_rate": 0.0005953749875831927, "loss": 0.5262, "step": 51500 }, { "epoch": 2.558358994735274, "grad_norm": 0.09765625, "learning_rate": 0.000595335253799543, "loss": 0.5752, "step": 51510 }, { "epoch": 2.558855667030893, "grad_norm": 0.0859375, "learning_rate": 0.0005952955200158935, "loss": 0.5404, "step": 51520 }, { "epoch": 2.5593523393265123, "grad_norm": 0.10546875, "learning_rate": 0.000595255786232244, "loss": 0.583, "step": 51530 }, { "epoch": 2.5598490116221315, "grad_norm": 0.09912109375, "learning_rate": 0.0005952160524485945, "loss": 0.5767, "step": 51540 }, { "epoch": 2.560345683917751, "grad_norm": 0.10693359375, "learning_rate": 0.0005951763186649449, "loss": 0.5468, "step": 51550 }, { "epoch": 2.5608423562133704, "grad_norm": 0.109375, "learning_rate": 0.0005951365848812953, "loss": 0.5662, "step": 51560 }, { "epoch": 2.5613390285089896, "grad_norm": 0.1005859375, "learning_rate": 0.0005950968510976458, "loss": 0.5559, "step": 51570 }, { "epoch": 2.5618357008046093, "grad_norm": 0.10888671875, "learning_rate": 0.0005950571173139963, "loss": 0.5359, "step": 51580 }, { "epoch": 2.5623323731002285, "grad_norm": 0.14453125, "learning_rate": 0.0005950173835303467, "loss": 0.5524, "step": 51590 }, { "epoch": 2.5628290453958478, "grad_norm": 0.10205078125, "learning_rate": 0.0005949776497466972, "loss": 0.5748, "step": 51600 }, { "epoch": 2.563325717691467, "grad_norm": 0.134765625, "learning_rate": 0.0005949379159630476, "loss": 0.589, "step": 51610 }, { "epoch": 2.5638223899870867, "grad_norm": 0.12060546875, "learning_rate": 0.000594898182179398, "loss": 0.5767, "step": 51620 }, { "epoch": 2.564319062282706, "grad_norm": 0.1572265625, "learning_rate": 0.0005948584483957486, "loss": 0.5685, "step": 51630 }, { "epoch": 2.564815734578325, "grad_norm": 0.09375, "learning_rate": 0.0005948187146120989, "loss": 0.5551, "step": 51640 }, { "epoch": 2.565312406873945, "grad_norm": 0.1083984375, "learning_rate": 0.0005947789808284494, "loss": 0.5697, "step": 51650 }, { "epoch": 2.565809079169564, "grad_norm": 0.09423828125, "learning_rate": 0.0005947392470447999, "loss": 0.5754, "step": 51660 }, { "epoch": 2.5663057514651832, "grad_norm": 0.1875, "learning_rate": 0.0005946995132611503, "loss": 0.5471, "step": 51670 }, { "epoch": 2.5668024237608025, "grad_norm": 0.0927734375, "learning_rate": 0.0005946597794775008, "loss": 0.5898, "step": 51680 }, { "epoch": 2.5672990960564217, "grad_norm": 0.2001953125, "learning_rate": 0.0005946200456938513, "loss": 0.5595, "step": 51690 }, { "epoch": 2.5677957683520414, "grad_norm": 0.11865234375, "learning_rate": 0.0005945803119102017, "loss": 0.5411, "step": 51700 }, { "epoch": 2.5682924406476606, "grad_norm": 0.10546875, "learning_rate": 0.0005945405781265521, "loss": 0.5632, "step": 51710 }, { "epoch": 2.5687891129432803, "grad_norm": 0.091796875, "learning_rate": 0.0005945008443429025, "loss": 0.5642, "step": 51720 }, { "epoch": 2.5692857852388995, "grad_norm": 0.11865234375, "learning_rate": 0.0005944611105592531, "loss": 0.5499, "step": 51730 }, { "epoch": 2.5697824575345187, "grad_norm": 0.1064453125, "learning_rate": 0.0005944213767756035, "loss": 0.5764, "step": 51740 }, { "epoch": 2.570279129830138, "grad_norm": 0.09423828125, "learning_rate": 0.0005943816429919539, "loss": 0.53, "step": 51750 }, { "epoch": 2.570775802125757, "grad_norm": 0.11328125, "learning_rate": 0.0005943419092083044, "loss": 0.6109, "step": 51760 }, { "epoch": 2.571272474421377, "grad_norm": 0.1201171875, "learning_rate": 0.0005943021754246548, "loss": 0.5751, "step": 51770 }, { "epoch": 2.571769146716996, "grad_norm": 0.1455078125, "learning_rate": 0.0005942624416410053, "loss": 0.5906, "step": 51780 }, { "epoch": 2.5722658190126158, "grad_norm": 0.15625, "learning_rate": 0.0005942227078573558, "loss": 0.5624, "step": 51790 }, { "epoch": 2.572762491308235, "grad_norm": 0.09912109375, "learning_rate": 0.0005941829740737062, "loss": 0.5324, "step": 51800 }, { "epoch": 2.573259163603854, "grad_norm": 0.09375, "learning_rate": 0.0005941432402900566, "loss": 0.5542, "step": 51810 }, { "epoch": 2.5737558358994734, "grad_norm": 0.08984375, "learning_rate": 0.0005941035065064071, "loss": 0.5472, "step": 51820 }, { "epoch": 2.5742525081950927, "grad_norm": 0.12109375, "learning_rate": 0.0005940637727227576, "loss": 0.5527, "step": 51830 }, { "epoch": 2.5747491804907123, "grad_norm": 0.1025390625, "learning_rate": 0.000594024038939108, "loss": 0.5812, "step": 51840 }, { "epoch": 2.5752458527863316, "grad_norm": 0.1162109375, "learning_rate": 0.0005939843051554585, "loss": 0.5717, "step": 51850 }, { "epoch": 2.575742525081951, "grad_norm": 0.10986328125, "learning_rate": 0.0005939445713718089, "loss": 0.5504, "step": 51860 }, { "epoch": 2.5762391973775705, "grad_norm": 0.1259765625, "learning_rate": 0.0005939048375881593, "loss": 0.5747, "step": 51870 }, { "epoch": 2.5767358696731897, "grad_norm": 0.0830078125, "learning_rate": 0.0005938651038045099, "loss": 0.5129, "step": 51880 }, { "epoch": 2.577232541968809, "grad_norm": 0.10009765625, "learning_rate": 0.0005938253700208603, "loss": 0.5677, "step": 51890 }, { "epoch": 2.577729214264428, "grad_norm": 0.169921875, "learning_rate": 0.0005937856362372107, "loss": 0.5708, "step": 51900 }, { "epoch": 2.578225886560048, "grad_norm": 0.09619140625, "learning_rate": 0.0005937459024535611, "loss": 0.5443, "step": 51910 }, { "epoch": 2.578722558855667, "grad_norm": 0.09912109375, "learning_rate": 0.0005937061686699116, "loss": 0.5586, "step": 51920 }, { "epoch": 2.5792192311512863, "grad_norm": 0.10498046875, "learning_rate": 0.0005936664348862621, "loss": 0.5766, "step": 51930 }, { "epoch": 2.579715903446906, "grad_norm": 0.12890625, "learning_rate": 0.0005936267011026125, "loss": 0.579, "step": 51940 }, { "epoch": 2.580212575742525, "grad_norm": 0.095703125, "learning_rate": 0.000593586967318963, "loss": 0.5805, "step": 51950 }, { "epoch": 2.5807092480381444, "grad_norm": 0.10888671875, "learning_rate": 0.0005935472335353134, "loss": 0.5551, "step": 51960 }, { "epoch": 2.5812059203337636, "grad_norm": 0.11962890625, "learning_rate": 0.0005935074997516638, "loss": 0.5564, "step": 51970 }, { "epoch": 2.5817025926293833, "grad_norm": 0.12890625, "learning_rate": 0.0005934677659680144, "loss": 0.5701, "step": 51980 }, { "epoch": 2.5821992649250025, "grad_norm": 0.10400390625, "learning_rate": 0.0005934280321843648, "loss": 0.5799, "step": 51990 }, { "epoch": 2.5826959372206217, "grad_norm": 0.11767578125, "learning_rate": 0.0005933882984007152, "loss": 0.5812, "step": 52000 }, { "epoch": 2.5831926095162414, "grad_norm": 0.154296875, "learning_rate": 0.0005933485646170657, "loss": 0.5707, "step": 52010 }, { "epoch": 2.5836892818118606, "grad_norm": 0.09423828125, "learning_rate": 0.0005933088308334161, "loss": 0.5802, "step": 52020 }, { "epoch": 2.58418595410748, "grad_norm": 0.115234375, "learning_rate": 0.0005932690970497666, "loss": 0.5655, "step": 52030 }, { "epoch": 2.584682626403099, "grad_norm": 0.130859375, "learning_rate": 0.0005932293632661171, "loss": 0.5621, "step": 52040 }, { "epoch": 2.5851792986987183, "grad_norm": 0.140625, "learning_rate": 0.0005931896294824675, "loss": 0.5851, "step": 52050 }, { "epoch": 2.585675970994338, "grad_norm": 0.1572265625, "learning_rate": 0.0005931498956988179, "loss": 0.5652, "step": 52060 }, { "epoch": 2.586172643289957, "grad_norm": 0.11181640625, "learning_rate": 0.0005931101619151684, "loss": 0.5645, "step": 52070 }, { "epoch": 2.586669315585577, "grad_norm": 0.107421875, "learning_rate": 0.0005930704281315189, "loss": 0.5482, "step": 52080 }, { "epoch": 2.587165987881196, "grad_norm": 0.09423828125, "learning_rate": 0.0005930306943478693, "loss": 0.5524, "step": 52090 }, { "epoch": 2.5876626601768153, "grad_norm": 0.10009765625, "learning_rate": 0.0005929909605642197, "loss": 0.5543, "step": 52100 }, { "epoch": 2.5881593324724346, "grad_norm": 0.0966796875, "learning_rate": 0.0005929512267805702, "loss": 0.5657, "step": 52110 }, { "epoch": 2.588656004768054, "grad_norm": 0.1201171875, "learning_rate": 0.0005929114929969207, "loss": 0.5542, "step": 52120 }, { "epoch": 2.5891526770636735, "grad_norm": 0.12890625, "learning_rate": 0.000592871759213271, "loss": 0.571, "step": 52130 }, { "epoch": 2.5896493493592927, "grad_norm": 0.09912109375, "learning_rate": 0.0005928320254296216, "loss": 0.5935, "step": 52140 }, { "epoch": 2.590146021654912, "grad_norm": 0.1005859375, "learning_rate": 0.0005927922916459721, "loss": 0.5556, "step": 52150 }, { "epoch": 2.5906426939505316, "grad_norm": 0.107421875, "learning_rate": 0.0005927525578623224, "loss": 0.5633, "step": 52160 }, { "epoch": 2.591139366246151, "grad_norm": 0.1279296875, "learning_rate": 0.0005927128240786729, "loss": 0.5721, "step": 52170 }, { "epoch": 2.59163603854177, "grad_norm": 0.150390625, "learning_rate": 0.0005926730902950234, "loss": 0.5669, "step": 52180 }, { "epoch": 2.5921327108373893, "grad_norm": 0.09814453125, "learning_rate": 0.0005926333565113738, "loss": 0.5699, "step": 52190 }, { "epoch": 2.592629383133009, "grad_norm": 0.09326171875, "learning_rate": 0.0005925936227277243, "loss": 0.5165, "step": 52200 }, { "epoch": 2.593126055428628, "grad_norm": 0.119140625, "learning_rate": 0.0005925538889440747, "loss": 0.5432, "step": 52210 }, { "epoch": 2.5936227277242474, "grad_norm": 0.12451171875, "learning_rate": 0.0005925141551604251, "loss": 0.5869, "step": 52220 }, { "epoch": 2.594119400019867, "grad_norm": 0.1884765625, "learning_rate": 0.0005924744213767757, "loss": 0.5908, "step": 52230 }, { "epoch": 2.5946160723154863, "grad_norm": 0.1494140625, "learning_rate": 0.0005924346875931261, "loss": 0.5426, "step": 52240 }, { "epoch": 2.5951127446111055, "grad_norm": 0.10205078125, "learning_rate": 0.0005923949538094765, "loss": 0.5939, "step": 52250 }, { "epoch": 2.5956094169067248, "grad_norm": 0.09716796875, "learning_rate": 0.000592355220025827, "loss": 0.5533, "step": 52260 }, { "epoch": 2.5961060892023444, "grad_norm": 0.1416015625, "learning_rate": 0.0005923154862421774, "loss": 0.5361, "step": 52270 }, { "epoch": 2.5966027614979637, "grad_norm": 0.1484375, "learning_rate": 0.000592275752458528, "loss": 0.5498, "step": 52280 }, { "epoch": 2.597099433793583, "grad_norm": 0.1279296875, "learning_rate": 0.0005922360186748784, "loss": 0.5545, "step": 52290 }, { "epoch": 2.5975961060892026, "grad_norm": 0.154296875, "learning_rate": 0.0005921962848912288, "loss": 0.5817, "step": 52300 }, { "epoch": 2.5980927783848218, "grad_norm": 0.1005859375, "learning_rate": 0.0005921565511075793, "loss": 0.5931, "step": 52310 }, { "epoch": 2.598589450680441, "grad_norm": 0.1005859375, "learning_rate": 0.0005921168173239296, "loss": 0.5344, "step": 52320 }, { "epoch": 2.5990861229760602, "grad_norm": 0.09912109375, "learning_rate": 0.0005920770835402802, "loss": 0.5631, "step": 52330 }, { "epoch": 2.59958279527168, "grad_norm": 0.1298828125, "learning_rate": 0.0005920373497566307, "loss": 0.5554, "step": 52340 }, { "epoch": 2.600079467567299, "grad_norm": 0.1484375, "learning_rate": 0.000591997615972981, "loss": 0.5733, "step": 52350 }, { "epoch": 2.6005761398629184, "grad_norm": 0.0947265625, "learning_rate": 0.0005919578821893315, "loss": 0.5849, "step": 52360 }, { "epoch": 2.601072812158538, "grad_norm": 0.13671875, "learning_rate": 0.0005919181484056819, "loss": 0.5489, "step": 52370 }, { "epoch": 2.6015694844541573, "grad_norm": 0.087890625, "learning_rate": 0.0005918784146220323, "loss": 0.5192, "step": 52380 }, { "epoch": 2.6020661567497765, "grad_norm": 0.10009765625, "learning_rate": 0.0005918386808383829, "loss": 0.5245, "step": 52390 }, { "epoch": 2.6025628290453957, "grad_norm": 0.10986328125, "learning_rate": 0.0005917989470547333, "loss": 0.5205, "step": 52400 }, { "epoch": 2.603059501341015, "grad_norm": 0.103515625, "learning_rate": 0.0005917592132710837, "loss": 0.5696, "step": 52410 }, { "epoch": 2.6035561736366346, "grad_norm": 0.126953125, "learning_rate": 0.0005917194794874342, "loss": 0.5737, "step": 52420 }, { "epoch": 2.604052845932254, "grad_norm": 0.0947265625, "learning_rate": 0.0005916797457037846, "loss": 0.5474, "step": 52430 }, { "epoch": 2.6045495182278735, "grad_norm": 0.09814453125, "learning_rate": 0.0005916400119201352, "loss": 0.5651, "step": 52440 }, { "epoch": 2.6050461905234927, "grad_norm": 0.11767578125, "learning_rate": 0.0005916002781364856, "loss": 0.5621, "step": 52450 }, { "epoch": 2.605542862819112, "grad_norm": 0.1484375, "learning_rate": 0.000591560544352836, "loss": 0.5554, "step": 52460 }, { "epoch": 2.606039535114731, "grad_norm": 0.130859375, "learning_rate": 0.0005915208105691865, "loss": 0.5747, "step": 52470 }, { "epoch": 2.6065362074103504, "grad_norm": 0.10693359375, "learning_rate": 0.000591481076785537, "loss": 0.5467, "step": 52480 }, { "epoch": 2.60703287970597, "grad_norm": 0.09716796875, "learning_rate": 0.0005914413430018874, "loss": 0.5722, "step": 52490 }, { "epoch": 2.6075295520015893, "grad_norm": 0.1640625, "learning_rate": 0.0005914016092182379, "loss": 0.5646, "step": 52500 }, { "epoch": 2.6080262242972085, "grad_norm": 0.111328125, "learning_rate": 0.0005913618754345882, "loss": 0.6004, "step": 52510 }, { "epoch": 2.608522896592828, "grad_norm": 0.1552734375, "learning_rate": 0.0005913221416509387, "loss": 0.5948, "step": 52520 }, { "epoch": 2.6090195688884474, "grad_norm": 0.1767578125, "learning_rate": 0.0005912824078672893, "loss": 0.5654, "step": 52530 }, { "epoch": 2.6095162411840667, "grad_norm": 0.1044921875, "learning_rate": 0.0005912426740836396, "loss": 0.5787, "step": 52540 }, { "epoch": 2.610012913479686, "grad_norm": 0.09130859375, "learning_rate": 0.0005912029402999901, "loss": 0.5808, "step": 52550 }, { "epoch": 2.6105095857753056, "grad_norm": 0.1259765625, "learning_rate": 0.0005911632065163406, "loss": 0.5478, "step": 52560 }, { "epoch": 2.611006258070925, "grad_norm": 0.095703125, "learning_rate": 0.000591123472732691, "loss": 0.5723, "step": 52570 }, { "epoch": 2.611502930366544, "grad_norm": 0.0908203125, "learning_rate": 0.0005910837389490414, "loss": 0.541, "step": 52580 }, { "epoch": 2.6119996026621637, "grad_norm": 0.1123046875, "learning_rate": 0.0005910440051653919, "loss": 0.5594, "step": 52590 }, { "epoch": 2.612496274957783, "grad_norm": 0.125, "learning_rate": 0.0005910042713817424, "loss": 0.5633, "step": 52600 }, { "epoch": 2.612992947253402, "grad_norm": 0.1162109375, "learning_rate": 0.0005909645375980928, "loss": 0.5304, "step": 52610 }, { "epoch": 2.6134896195490214, "grad_norm": 0.111328125, "learning_rate": 0.0005909248038144432, "loss": 0.5781, "step": 52620 }, { "epoch": 2.613986291844641, "grad_norm": 0.11962890625, "learning_rate": 0.0005908850700307938, "loss": 0.525, "step": 52630 }, { "epoch": 2.6144829641402603, "grad_norm": 0.1171875, "learning_rate": 0.0005908453362471442, "loss": 0.551, "step": 52640 }, { "epoch": 2.6149796364358795, "grad_norm": 0.146484375, "learning_rate": 0.0005908056024634946, "loss": 0.5546, "step": 52650 }, { "epoch": 2.615476308731499, "grad_norm": 0.09423828125, "learning_rate": 0.0005907658686798451, "loss": 0.5552, "step": 52660 }, { "epoch": 2.6159729810271184, "grad_norm": 0.1005859375, "learning_rate": 0.0005907261348961955, "loss": 0.5338, "step": 52670 }, { "epoch": 2.6164696533227376, "grad_norm": 0.146484375, "learning_rate": 0.0005906864011125459, "loss": 0.5431, "step": 52680 }, { "epoch": 2.616966325618357, "grad_norm": 0.11083984375, "learning_rate": 0.0005906466673288965, "loss": 0.5516, "step": 52690 }, { "epoch": 2.6174629979139765, "grad_norm": 0.11279296875, "learning_rate": 0.0005906069335452468, "loss": 0.5614, "step": 52700 }, { "epoch": 2.6179596702095957, "grad_norm": 0.0927734375, "learning_rate": 0.0005905671997615973, "loss": 0.5881, "step": 52710 }, { "epoch": 2.618456342505215, "grad_norm": 0.1162109375, "learning_rate": 0.0005905274659779478, "loss": 0.5574, "step": 52720 }, { "epoch": 2.6189530148008346, "grad_norm": 0.11474609375, "learning_rate": 0.0005904877321942982, "loss": 0.5601, "step": 52730 }, { "epoch": 2.619449687096454, "grad_norm": 0.150390625, "learning_rate": 0.0005904479984106487, "loss": 0.5525, "step": 52740 }, { "epoch": 2.619946359392073, "grad_norm": 0.12109375, "learning_rate": 0.0005904082646269992, "loss": 0.5587, "step": 52750 }, { "epoch": 2.6204430316876923, "grad_norm": 0.09912109375, "learning_rate": 0.0005903685308433496, "loss": 0.5613, "step": 52760 }, { "epoch": 2.6209397039833116, "grad_norm": 0.09521484375, "learning_rate": 0.0005903287970597, "loss": 0.53, "step": 52770 }, { "epoch": 2.6214363762789312, "grad_norm": 0.11181640625, "learning_rate": 0.0005902890632760504, "loss": 0.571, "step": 52780 }, { "epoch": 2.6219330485745505, "grad_norm": 0.126953125, "learning_rate": 0.000590249329492401, "loss": 0.5459, "step": 52790 }, { "epoch": 2.62242972087017, "grad_norm": 0.095703125, "learning_rate": 0.0005902095957087514, "loss": 0.5785, "step": 52800 }, { "epoch": 2.6229263931657893, "grad_norm": 0.10546875, "learning_rate": 0.0005901698619251018, "loss": 0.5447, "step": 52810 }, { "epoch": 2.6234230654614086, "grad_norm": 0.103515625, "learning_rate": 0.0005901301281414523, "loss": 0.5534, "step": 52820 }, { "epoch": 2.623919737757028, "grad_norm": 0.111328125, "learning_rate": 0.0005900903943578027, "loss": 0.5597, "step": 52830 }, { "epoch": 2.624416410052647, "grad_norm": 0.22265625, "learning_rate": 0.0005900506605741532, "loss": 0.5841, "step": 52840 }, { "epoch": 2.6249130823482667, "grad_norm": 0.1298828125, "learning_rate": 0.0005900109267905037, "loss": 0.5437, "step": 52850 }, { "epoch": 2.625409754643886, "grad_norm": 0.11572265625, "learning_rate": 0.0005899711930068541, "loss": 0.5652, "step": 52860 }, { "epoch": 2.625906426939505, "grad_norm": 0.1884765625, "learning_rate": 0.0005899314592232045, "loss": 0.563, "step": 52870 }, { "epoch": 2.626403099235125, "grad_norm": 0.1357421875, "learning_rate": 0.000589891725439555, "loss": 0.5704, "step": 52880 }, { "epoch": 2.626899771530744, "grad_norm": 0.09912109375, "learning_rate": 0.0005898519916559055, "loss": 0.5478, "step": 52890 }, { "epoch": 2.6273964438263633, "grad_norm": 0.1142578125, "learning_rate": 0.0005898122578722559, "loss": 0.5751, "step": 52900 }, { "epoch": 2.6278931161219825, "grad_norm": 0.11376953125, "learning_rate": 0.0005897725240886064, "loss": 0.5358, "step": 52910 }, { "epoch": 2.628389788417602, "grad_norm": 0.1123046875, "learning_rate": 0.0005897327903049568, "loss": 0.5574, "step": 52920 }, { "epoch": 2.6288864607132214, "grad_norm": 0.19140625, "learning_rate": 0.0005896930565213072, "loss": 0.5408, "step": 52930 }, { "epoch": 2.6293831330088406, "grad_norm": 0.1884765625, "learning_rate": 0.0005896533227376578, "loss": 0.5505, "step": 52940 }, { "epoch": 2.6298798053044603, "grad_norm": 0.1640625, "learning_rate": 0.0005896135889540082, "loss": 0.5553, "step": 52950 }, { "epoch": 2.6303764776000795, "grad_norm": 0.0966796875, "learning_rate": 0.0005895738551703586, "loss": 0.5837, "step": 52960 }, { "epoch": 2.6308731498956988, "grad_norm": 0.125, "learning_rate": 0.000589534121386709, "loss": 0.5853, "step": 52970 }, { "epoch": 2.631369822191318, "grad_norm": 0.09619140625, "learning_rate": 0.0005894943876030595, "loss": 0.5668, "step": 52980 }, { "epoch": 2.6318664944869377, "grad_norm": 0.10498046875, "learning_rate": 0.00058945465381941, "loss": 0.5911, "step": 52990 }, { "epoch": 2.632363166782557, "grad_norm": 0.115234375, "learning_rate": 0.0005894149200357604, "loss": 0.5381, "step": 53000 }, { "epoch": 2.632859839078176, "grad_norm": 0.09375, "learning_rate": 0.0005893751862521109, "loss": 0.543, "step": 53010 }, { "epoch": 2.633356511373796, "grad_norm": 0.09619140625, "learning_rate": 0.0005893354524684614, "loss": 0.5565, "step": 53020 }, { "epoch": 2.633853183669415, "grad_norm": 0.1328125, "learning_rate": 0.0005892957186848117, "loss": 0.5544, "step": 53030 }, { "epoch": 2.6343498559650342, "grad_norm": 0.0927734375, "learning_rate": 0.0005892559849011623, "loss": 0.5533, "step": 53040 }, { "epoch": 2.6348465282606535, "grad_norm": 0.10498046875, "learning_rate": 0.0005892162511175127, "loss": 0.5711, "step": 53050 }, { "epoch": 2.635343200556273, "grad_norm": 0.103515625, "learning_rate": 0.0005891765173338631, "loss": 0.5413, "step": 53060 }, { "epoch": 2.6358398728518924, "grad_norm": 0.0927734375, "learning_rate": 0.0005891367835502136, "loss": 0.5541, "step": 53070 }, { "epoch": 2.6363365451475116, "grad_norm": 0.0927734375, "learning_rate": 0.000589097049766564, "loss": 0.6096, "step": 53080 }, { "epoch": 2.6368332174431313, "grad_norm": 0.10595703125, "learning_rate": 0.0005890573159829145, "loss": 0.5926, "step": 53090 }, { "epoch": 2.6373298897387505, "grad_norm": 0.1494140625, "learning_rate": 0.000589017582199265, "loss": 0.5598, "step": 53100 }, { "epoch": 2.6378265620343697, "grad_norm": 0.10205078125, "learning_rate": 0.0005889778484156154, "loss": 0.5967, "step": 53110 }, { "epoch": 2.638323234329989, "grad_norm": 0.111328125, "learning_rate": 0.0005889381146319658, "loss": 0.5706, "step": 53120 }, { "epoch": 2.638819906625608, "grad_norm": 0.125, "learning_rate": 0.0005888983808483163, "loss": 0.5699, "step": 53130 }, { "epoch": 2.639316578921228, "grad_norm": 0.21484375, "learning_rate": 0.0005888586470646668, "loss": 0.5778, "step": 53140 }, { "epoch": 2.639813251216847, "grad_norm": 0.09619140625, "learning_rate": 0.0005888189132810172, "loss": 0.5371, "step": 53150 }, { "epoch": 2.6403099235124667, "grad_norm": 0.1044921875, "learning_rate": 0.0005887791794973677, "loss": 0.5268, "step": 53160 }, { "epoch": 2.640806595808086, "grad_norm": 0.0927734375, "learning_rate": 0.0005887394457137181, "loss": 0.5741, "step": 53170 }, { "epoch": 2.641303268103705, "grad_norm": 0.09765625, "learning_rate": 0.0005886997119300686, "loss": 0.5917, "step": 53180 }, { "epoch": 2.6417999403993244, "grad_norm": 0.150390625, "learning_rate": 0.000588659978146419, "loss": 0.5463, "step": 53190 }, { "epoch": 2.6422966126949436, "grad_norm": 0.11767578125, "learning_rate": 0.0005886202443627695, "loss": 0.5711, "step": 53200 }, { "epoch": 2.6427932849905633, "grad_norm": 0.11328125, "learning_rate": 0.00058858051057912, "loss": 0.5452, "step": 53210 }, { "epoch": 2.6432899572861825, "grad_norm": 0.16796875, "learning_rate": 0.0005885407767954703, "loss": 0.5595, "step": 53220 }, { "epoch": 2.6437866295818018, "grad_norm": 0.0986328125, "learning_rate": 0.0005885010430118208, "loss": 0.5474, "step": 53230 }, { "epoch": 2.6442833018774214, "grad_norm": 0.134765625, "learning_rate": 0.0005884613092281713, "loss": 0.5801, "step": 53240 }, { "epoch": 2.6447799741730407, "grad_norm": 0.09814453125, "learning_rate": 0.0005884215754445217, "loss": 0.5728, "step": 53250 }, { "epoch": 2.64527664646866, "grad_norm": 0.12353515625, "learning_rate": 0.0005883818416608722, "loss": 0.5775, "step": 53260 }, { "epoch": 2.645773318764279, "grad_norm": 0.1611328125, "learning_rate": 0.0005883421078772226, "loss": 0.5507, "step": 53270 }, { "epoch": 2.646269991059899, "grad_norm": 0.09033203125, "learning_rate": 0.000588302374093573, "loss": 0.5794, "step": 53280 }, { "epoch": 2.646766663355518, "grad_norm": 0.1025390625, "learning_rate": 0.0005882626403099236, "loss": 0.5759, "step": 53290 }, { "epoch": 2.6472633356511373, "grad_norm": 0.11865234375, "learning_rate": 0.000588222906526274, "loss": 0.5717, "step": 53300 }, { "epoch": 2.647760007946757, "grad_norm": 0.09716796875, "learning_rate": 0.0005881831727426245, "loss": 0.5721, "step": 53310 }, { "epoch": 2.648256680242376, "grad_norm": 0.095703125, "learning_rate": 0.0005881434389589749, "loss": 0.5629, "step": 53320 }, { "epoch": 2.6487533525379954, "grad_norm": 0.10595703125, "learning_rate": 0.0005881037051753253, "loss": 0.5881, "step": 53330 }, { "epoch": 2.6492500248336146, "grad_norm": 0.11328125, "learning_rate": 0.0005880639713916759, "loss": 0.5568, "step": 53340 }, { "epoch": 2.6497466971292343, "grad_norm": 0.123046875, "learning_rate": 0.0005880242376080263, "loss": 0.5692, "step": 53350 }, { "epoch": 2.6502433694248535, "grad_norm": 0.1025390625, "learning_rate": 0.0005879845038243767, "loss": 0.5416, "step": 53360 }, { "epoch": 2.6507400417204727, "grad_norm": 0.1474609375, "learning_rate": 0.0005879447700407272, "loss": 0.5644, "step": 53370 }, { "epoch": 2.6512367140160924, "grad_norm": 0.10107421875, "learning_rate": 0.0005879050362570775, "loss": 0.5573, "step": 53380 }, { "epoch": 2.6517333863117116, "grad_norm": 0.1328125, "learning_rate": 0.0005878653024734281, "loss": 0.5575, "step": 53390 }, { "epoch": 2.652230058607331, "grad_norm": 0.134765625, "learning_rate": 0.0005878255686897786, "loss": 0.5463, "step": 53400 }, { "epoch": 2.65272673090295, "grad_norm": 0.09716796875, "learning_rate": 0.0005877858349061289, "loss": 0.5543, "step": 53410 }, { "epoch": 2.6532234031985693, "grad_norm": 0.12255859375, "learning_rate": 0.0005877461011224794, "loss": 0.5464, "step": 53420 }, { "epoch": 2.653720075494189, "grad_norm": 0.1357421875, "learning_rate": 0.0005877063673388299, "loss": 0.5422, "step": 53430 }, { "epoch": 2.654216747789808, "grad_norm": 0.1357421875, "learning_rate": 0.0005876666335551802, "loss": 0.5569, "step": 53440 }, { "epoch": 2.654713420085428, "grad_norm": 0.103515625, "learning_rate": 0.0005876268997715308, "loss": 0.5385, "step": 53450 }, { "epoch": 2.655210092381047, "grad_norm": 0.1640625, "learning_rate": 0.0005875871659878812, "loss": 0.5348, "step": 53460 }, { "epoch": 2.6557067646766663, "grad_norm": 0.09765625, "learning_rate": 0.0005875474322042317, "loss": 0.5475, "step": 53470 }, { "epoch": 2.6562034369722856, "grad_norm": 0.09423828125, "learning_rate": 0.0005875076984205821, "loss": 0.5525, "step": 53480 }, { "epoch": 2.656700109267905, "grad_norm": 0.14453125, "learning_rate": 0.0005874679646369326, "loss": 0.5494, "step": 53490 }, { "epoch": 2.6571967815635245, "grad_norm": 0.11572265625, "learning_rate": 0.0005874282308532831, "loss": 0.5446, "step": 53500 }, { "epoch": 2.6576934538591437, "grad_norm": 0.099609375, "learning_rate": 0.0005873884970696335, "loss": 0.5354, "step": 53510 }, { "epoch": 2.6581901261547634, "grad_norm": 0.11328125, "learning_rate": 0.0005873487632859839, "loss": 0.5875, "step": 53520 }, { "epoch": 2.6586867984503826, "grad_norm": 0.1806640625, "learning_rate": 0.0005873090295023344, "loss": 0.5827, "step": 53530 }, { "epoch": 2.659183470746002, "grad_norm": 0.1396484375, "learning_rate": 0.0005872692957186849, "loss": 0.5811, "step": 53540 }, { "epoch": 2.659680143041621, "grad_norm": 0.09521484375, "learning_rate": 0.0005872295619350353, "loss": 0.5338, "step": 53550 }, { "epoch": 2.6601768153372403, "grad_norm": 0.099609375, "learning_rate": 0.0005871898281513858, "loss": 0.5841, "step": 53560 }, { "epoch": 2.66067348763286, "grad_norm": 0.111328125, "learning_rate": 0.0005871500943677361, "loss": 0.5524, "step": 53570 }, { "epoch": 2.661170159928479, "grad_norm": 0.12109375, "learning_rate": 0.0005871103605840866, "loss": 0.5401, "step": 53580 }, { "epoch": 2.6616668322240984, "grad_norm": 0.12451171875, "learning_rate": 0.0005870706268004372, "loss": 0.5596, "step": 53590 }, { "epoch": 2.662163504519718, "grad_norm": 0.10302734375, "learning_rate": 0.0005870308930167875, "loss": 0.5294, "step": 53600 }, { "epoch": 2.6626601768153373, "grad_norm": 0.09423828125, "learning_rate": 0.000586991159233138, "loss": 0.5505, "step": 53610 }, { "epoch": 2.6631568491109565, "grad_norm": 0.103515625, "learning_rate": 0.0005869514254494885, "loss": 0.5808, "step": 53620 }, { "epoch": 2.6636535214065757, "grad_norm": 0.11083984375, "learning_rate": 0.0005869116916658389, "loss": 0.559, "step": 53630 }, { "epoch": 2.6641501937021954, "grad_norm": 0.103515625, "learning_rate": 0.0005868719578821894, "loss": 0.5737, "step": 53640 }, { "epoch": 2.6646468659978146, "grad_norm": 0.140625, "learning_rate": 0.0005868322240985398, "loss": 0.5706, "step": 53650 }, { "epoch": 2.665143538293434, "grad_norm": 0.111328125, "learning_rate": 0.0005867924903148903, "loss": 0.5827, "step": 53660 }, { "epoch": 2.6656402105890535, "grad_norm": 0.09765625, "learning_rate": 0.0005867527565312407, "loss": 0.5591, "step": 53670 }, { "epoch": 2.6661368828846728, "grad_norm": 0.11279296875, "learning_rate": 0.0005867130227475911, "loss": 0.5814, "step": 53680 }, { "epoch": 2.666633555180292, "grad_norm": 0.09619140625, "learning_rate": 0.0005866732889639417, "loss": 0.5339, "step": 53690 }, { "epoch": 2.667130227475911, "grad_norm": 0.11279296875, "learning_rate": 0.0005866335551802921, "loss": 0.5854, "step": 53700 }, { "epoch": 2.667626899771531, "grad_norm": 0.11279296875, "learning_rate": 0.0005865938213966425, "loss": 0.5793, "step": 53710 }, { "epoch": 2.66812357206715, "grad_norm": 0.10400390625, "learning_rate": 0.000586554087612993, "loss": 0.5616, "step": 53720 }, { "epoch": 2.6686202443627693, "grad_norm": 0.1396484375, "learning_rate": 0.0005865143538293434, "loss": 0.5383, "step": 53730 }, { "epoch": 2.669116916658389, "grad_norm": 0.125, "learning_rate": 0.0005864746200456938, "loss": 0.5441, "step": 53740 }, { "epoch": 2.6696135889540082, "grad_norm": 0.1279296875, "learning_rate": 0.0005864348862620444, "loss": 0.554, "step": 53750 }, { "epoch": 2.6701102612496275, "grad_norm": 0.09130859375, "learning_rate": 0.0005863951524783948, "loss": 0.5615, "step": 53760 }, { "epoch": 2.6706069335452467, "grad_norm": 0.158203125, "learning_rate": 0.0005863554186947452, "loss": 0.5313, "step": 53770 }, { "epoch": 2.671103605840866, "grad_norm": 0.1171875, "learning_rate": 0.0005863156849110957, "loss": 0.5787, "step": 53780 }, { "epoch": 2.6716002781364856, "grad_norm": 0.1025390625, "learning_rate": 0.0005862759511274462, "loss": 0.5651, "step": 53790 }, { "epoch": 2.672096950432105, "grad_norm": 0.1123046875, "learning_rate": 0.0005862362173437966, "loss": 0.5787, "step": 53800 }, { "epoch": 2.6725936227277245, "grad_norm": 0.1083984375, "learning_rate": 0.0005861964835601471, "loss": 0.554, "step": 53810 }, { "epoch": 2.6730902950233437, "grad_norm": 0.10400390625, "learning_rate": 0.0005861567497764975, "loss": 0.5671, "step": 53820 }, { "epoch": 2.673586967318963, "grad_norm": 0.1533203125, "learning_rate": 0.0005861170159928479, "loss": 0.5346, "step": 53830 }, { "epoch": 2.674083639614582, "grad_norm": 0.10986328125, "learning_rate": 0.0005860772822091983, "loss": 0.5534, "step": 53840 }, { "epoch": 2.6745803119102014, "grad_norm": 0.11962890625, "learning_rate": 0.0005860375484255489, "loss": 0.5411, "step": 53850 }, { "epoch": 2.675076984205821, "grad_norm": 0.1015625, "learning_rate": 0.0005859978146418993, "loss": 0.5475, "step": 53860 }, { "epoch": 2.6755736565014403, "grad_norm": 0.09814453125, "learning_rate": 0.0005859580808582497, "loss": 0.5515, "step": 53870 }, { "epoch": 2.67607032879706, "grad_norm": 0.11083984375, "learning_rate": 0.0005859183470746002, "loss": 0.575, "step": 53880 }, { "epoch": 2.676567001092679, "grad_norm": 0.1259765625, "learning_rate": 0.0005858786132909506, "loss": 0.531, "step": 53890 }, { "epoch": 2.6770636733882984, "grad_norm": 0.10400390625, "learning_rate": 0.0005858388795073011, "loss": 0.5366, "step": 53900 }, { "epoch": 2.6775603456839177, "grad_norm": 0.1650390625, "learning_rate": 0.0005857991457236516, "loss": 0.5634, "step": 53910 }, { "epoch": 2.678057017979537, "grad_norm": 0.12353515625, "learning_rate": 0.000585759411940002, "loss": 0.542, "step": 53920 }, { "epoch": 2.6785536902751566, "grad_norm": 0.09423828125, "learning_rate": 0.0005857196781563524, "loss": 0.5662, "step": 53930 }, { "epoch": 2.679050362570776, "grad_norm": 0.11572265625, "learning_rate": 0.000585679944372703, "loss": 0.5822, "step": 53940 }, { "epoch": 2.679547034866395, "grad_norm": 0.1279296875, "learning_rate": 0.0005856402105890534, "loss": 0.5319, "step": 53950 }, { "epoch": 2.6800437071620147, "grad_norm": 0.1884765625, "learning_rate": 0.0005856004768054038, "loss": 0.5512, "step": 53960 }, { "epoch": 2.680540379457634, "grad_norm": 0.09521484375, "learning_rate": 0.0005855607430217543, "loss": 0.5579, "step": 53970 }, { "epoch": 2.681037051753253, "grad_norm": 0.2099609375, "learning_rate": 0.0005855210092381047, "loss": 0.5863, "step": 53980 }, { "epoch": 2.6815337240488724, "grad_norm": 0.142578125, "learning_rate": 0.0005854812754544551, "loss": 0.5027, "step": 53990 }, { "epoch": 2.682030396344492, "grad_norm": 0.12060546875, "learning_rate": 0.0005854415416708057, "loss": 0.5308, "step": 54000 }, { "epoch": 2.6825270686401113, "grad_norm": 0.09375, "learning_rate": 0.0005854018078871561, "loss": 0.5463, "step": 54010 }, { "epoch": 2.6830237409357305, "grad_norm": 0.10400390625, "learning_rate": 0.0005853620741035065, "loss": 0.5609, "step": 54020 }, { "epoch": 2.68352041323135, "grad_norm": 0.10400390625, "learning_rate": 0.000585322340319857, "loss": 0.5414, "step": 54030 }, { "epoch": 2.6840170855269694, "grad_norm": 0.107421875, "learning_rate": 0.0005852826065362074, "loss": 0.556, "step": 54040 }, { "epoch": 2.6845137578225886, "grad_norm": 0.1005859375, "learning_rate": 0.0005852428727525579, "loss": 0.5399, "step": 54050 }, { "epoch": 2.685010430118208, "grad_norm": 0.1552734375, "learning_rate": 0.0005852031389689083, "loss": 0.5864, "step": 54060 }, { "epoch": 2.6855071024138275, "grad_norm": 0.1767578125, "learning_rate": 0.0005851634051852588, "loss": 0.5751, "step": 54070 }, { "epoch": 2.6860037747094467, "grad_norm": 0.099609375, "learning_rate": 0.0005851236714016093, "loss": 0.5782, "step": 54080 }, { "epoch": 2.686500447005066, "grad_norm": 0.1357421875, "learning_rate": 0.0005850839376179596, "loss": 0.5686, "step": 54090 }, { "epoch": 2.6869971193006856, "grad_norm": 0.09228515625, "learning_rate": 0.0005850442038343102, "loss": 0.5474, "step": 54100 }, { "epoch": 2.687493791596305, "grad_norm": 0.09716796875, "learning_rate": 0.0005850044700506606, "loss": 0.5418, "step": 54110 }, { "epoch": 2.687990463891924, "grad_norm": 0.0947265625, "learning_rate": 0.000584964736267011, "loss": 0.541, "step": 54120 }, { "epoch": 2.6884871361875433, "grad_norm": 0.12890625, "learning_rate": 0.0005849250024833615, "loss": 0.5756, "step": 54130 }, { "epoch": 2.6889838084831625, "grad_norm": 0.1181640625, "learning_rate": 0.0005848852686997119, "loss": 0.5503, "step": 54140 }, { "epoch": 2.689480480778782, "grad_norm": 0.1669921875, "learning_rate": 0.0005848455349160624, "loss": 0.551, "step": 54150 }, { "epoch": 2.6899771530744014, "grad_norm": 0.10205078125, "learning_rate": 0.0005848058011324129, "loss": 0.5432, "step": 54160 }, { "epoch": 2.690473825370021, "grad_norm": 0.11376953125, "learning_rate": 0.0005847660673487633, "loss": 0.5528, "step": 54170 }, { "epoch": 2.6909704976656403, "grad_norm": 0.1552734375, "learning_rate": 0.0005847263335651137, "loss": 0.5809, "step": 54180 }, { "epoch": 2.6914671699612596, "grad_norm": 0.103515625, "learning_rate": 0.0005846865997814642, "loss": 0.5633, "step": 54190 }, { "epoch": 2.691963842256879, "grad_norm": 0.09033203125, "learning_rate": 0.0005846468659978147, "loss": 0.5667, "step": 54200 }, { "epoch": 2.692460514552498, "grad_norm": 0.099609375, "learning_rate": 0.0005846071322141652, "loss": 0.5571, "step": 54210 }, { "epoch": 2.6929571868481177, "grad_norm": 0.10986328125, "learning_rate": 0.0005845673984305156, "loss": 0.5512, "step": 54220 }, { "epoch": 2.693453859143737, "grad_norm": 0.103515625, "learning_rate": 0.000584527664646866, "loss": 0.5544, "step": 54230 }, { "epoch": 2.6939505314393566, "grad_norm": 0.1376953125, "learning_rate": 0.0005844879308632165, "loss": 0.5588, "step": 54240 }, { "epoch": 2.694447203734976, "grad_norm": 0.298828125, "learning_rate": 0.0005844481970795669, "loss": 0.5605, "step": 54250 }, { "epoch": 2.694943876030595, "grad_norm": 0.12451171875, "learning_rate": 0.0005844084632959174, "loss": 0.5892, "step": 54260 }, { "epoch": 2.6954405483262143, "grad_norm": 0.10693359375, "learning_rate": 0.0005843687295122679, "loss": 0.5496, "step": 54270 }, { "epoch": 2.6959372206218335, "grad_norm": 0.09228515625, "learning_rate": 0.0005843289957286182, "loss": 0.5694, "step": 54280 }, { "epoch": 2.696433892917453, "grad_norm": 0.12158203125, "learning_rate": 0.0005842892619449687, "loss": 0.5944, "step": 54290 }, { "epoch": 2.6969305652130724, "grad_norm": 0.158203125, "learning_rate": 0.0005842495281613193, "loss": 0.5402, "step": 54300 }, { "epoch": 2.6974272375086916, "grad_norm": 0.1357421875, "learning_rate": 0.0005842097943776696, "loss": 0.5805, "step": 54310 }, { "epoch": 2.6979239098043113, "grad_norm": 0.11865234375, "learning_rate": 0.0005841700605940201, "loss": 0.5464, "step": 54320 }, { "epoch": 2.6984205820999305, "grad_norm": 0.09521484375, "learning_rate": 0.0005841303268103705, "loss": 0.5548, "step": 54330 }, { "epoch": 2.6989172543955497, "grad_norm": 0.10791015625, "learning_rate": 0.0005840905930267209, "loss": 0.5549, "step": 54340 }, { "epoch": 2.699413926691169, "grad_norm": 0.109375, "learning_rate": 0.0005840508592430715, "loss": 0.5741, "step": 54350 }, { "epoch": 2.6999105989867886, "grad_norm": 0.1162109375, "learning_rate": 0.0005840111254594219, "loss": 0.582, "step": 54360 }, { "epoch": 2.700407271282408, "grad_norm": 0.11181640625, "learning_rate": 0.0005839713916757724, "loss": 0.5709, "step": 54370 }, { "epoch": 2.700903943578027, "grad_norm": 0.1044921875, "learning_rate": 0.0005839316578921228, "loss": 0.5393, "step": 54380 }, { "epoch": 2.7014006158736468, "grad_norm": 0.1064453125, "learning_rate": 0.0005838919241084732, "loss": 0.5663, "step": 54390 }, { "epoch": 2.701897288169266, "grad_norm": 0.1162109375, "learning_rate": 0.0005838521903248238, "loss": 0.5497, "step": 54400 }, { "epoch": 2.7023939604648852, "grad_norm": 0.134765625, "learning_rate": 0.0005838124565411742, "loss": 0.5285, "step": 54410 }, { "epoch": 2.7028906327605045, "grad_norm": 0.10693359375, "learning_rate": 0.0005837727227575246, "loss": 0.5538, "step": 54420 }, { "epoch": 2.703387305056124, "grad_norm": 0.1083984375, "learning_rate": 0.0005837329889738751, "loss": 0.5732, "step": 54430 }, { "epoch": 2.7038839773517434, "grad_norm": 0.1376953125, "learning_rate": 0.0005836932551902254, "loss": 0.5686, "step": 54440 }, { "epoch": 2.7043806496473626, "grad_norm": 0.0947265625, "learning_rate": 0.000583653521406576, "loss": 0.5449, "step": 54450 }, { "epoch": 2.7048773219429822, "grad_norm": 0.107421875, "learning_rate": 0.0005836137876229265, "loss": 0.5793, "step": 54460 }, { "epoch": 2.7053739942386015, "grad_norm": 0.09326171875, "learning_rate": 0.0005835740538392768, "loss": 0.5356, "step": 54470 }, { "epoch": 2.7058706665342207, "grad_norm": 0.125, "learning_rate": 0.0005835343200556273, "loss": 0.5417, "step": 54480 }, { "epoch": 2.70636733882984, "grad_norm": 0.125, "learning_rate": 0.0005834945862719778, "loss": 0.5341, "step": 54490 }, { "epoch": 2.706864011125459, "grad_norm": 0.130859375, "learning_rate": 0.0005834548524883282, "loss": 0.5406, "step": 54500 }, { "epoch": 2.707360683421079, "grad_norm": 0.154296875, "learning_rate": 0.0005834151187046787, "loss": 0.586, "step": 54510 }, { "epoch": 2.707857355716698, "grad_norm": 0.08837890625, "learning_rate": 0.0005833753849210291, "loss": 0.5395, "step": 54520 }, { "epoch": 2.7083540280123177, "grad_norm": 0.1005859375, "learning_rate": 0.0005833356511373796, "loss": 0.5561, "step": 54530 }, { "epoch": 2.708850700307937, "grad_norm": 0.103515625, "learning_rate": 0.00058329591735373, "loss": 0.5365, "step": 54540 }, { "epoch": 2.709347372603556, "grad_norm": 0.115234375, "learning_rate": 0.0005832561835700805, "loss": 0.5619, "step": 54550 }, { "epoch": 2.7098440448991754, "grad_norm": 0.1337890625, "learning_rate": 0.000583216449786431, "loss": 0.5714, "step": 54560 }, { "epoch": 2.7103407171947946, "grad_norm": 0.091796875, "learning_rate": 0.0005831767160027814, "loss": 0.5312, "step": 54570 }, { "epoch": 2.7108373894904143, "grad_norm": 0.0986328125, "learning_rate": 0.0005831369822191318, "loss": 0.5659, "step": 54580 }, { "epoch": 2.7113340617860335, "grad_norm": 0.10888671875, "learning_rate": 0.0005830972484354823, "loss": 0.5533, "step": 54590 }, { "epoch": 2.7118307340816528, "grad_norm": 0.09912109375, "learning_rate": 0.0005830575146518328, "loss": 0.5633, "step": 54600 }, { "epoch": 2.7123274063772724, "grad_norm": 0.1435546875, "learning_rate": 0.0005830177808681832, "loss": 0.5577, "step": 54610 }, { "epoch": 2.7128240786728917, "grad_norm": 0.1416015625, "learning_rate": 0.0005829780470845337, "loss": 0.572, "step": 54620 }, { "epoch": 2.713320750968511, "grad_norm": 0.10546875, "learning_rate": 0.0005829383133008841, "loss": 0.5672, "step": 54630 }, { "epoch": 2.71381742326413, "grad_norm": 0.1142578125, "learning_rate": 0.0005828985795172345, "loss": 0.5401, "step": 54640 }, { "epoch": 2.71431409555975, "grad_norm": 0.1455078125, "learning_rate": 0.0005828588457335851, "loss": 0.5604, "step": 54650 }, { "epoch": 2.714810767855369, "grad_norm": 0.126953125, "learning_rate": 0.0005828191119499355, "loss": 0.5605, "step": 54660 }, { "epoch": 2.7153074401509882, "grad_norm": 0.095703125, "learning_rate": 0.0005827793781662859, "loss": 0.5624, "step": 54670 }, { "epoch": 2.715804112446608, "grad_norm": 0.1396484375, "learning_rate": 0.0005827396443826364, "loss": 0.5725, "step": 54680 }, { "epoch": 2.716300784742227, "grad_norm": 0.123046875, "learning_rate": 0.0005826999105989868, "loss": 0.5552, "step": 54690 }, { "epoch": 2.7167974570378464, "grad_norm": 0.1220703125, "learning_rate": 0.0005826601768153373, "loss": 0.5755, "step": 54700 }, { "epoch": 2.7172941293334656, "grad_norm": 0.10009765625, "learning_rate": 0.0005826204430316877, "loss": 0.5701, "step": 54710 }, { "epoch": 2.7177908016290853, "grad_norm": 0.10791015625, "learning_rate": 0.0005825807092480382, "loss": 0.5731, "step": 54720 }, { "epoch": 2.7182874739247045, "grad_norm": 0.140625, "learning_rate": 0.0005825409754643886, "loss": 0.5941, "step": 54730 }, { "epoch": 2.7187841462203237, "grad_norm": 0.1123046875, "learning_rate": 0.000582501241680739, "loss": 0.5641, "step": 54740 }, { "epoch": 2.7192808185159434, "grad_norm": 0.11669921875, "learning_rate": 0.0005824615078970896, "loss": 0.5417, "step": 54750 }, { "epoch": 2.7197774908115626, "grad_norm": 0.1728515625, "learning_rate": 0.00058242177411344, "loss": 0.5744, "step": 54760 }, { "epoch": 2.720274163107182, "grad_norm": 0.10107421875, "learning_rate": 0.0005823820403297904, "loss": 0.571, "step": 54770 }, { "epoch": 2.720770835402801, "grad_norm": 0.10791015625, "learning_rate": 0.0005823423065461409, "loss": 0.5549, "step": 54780 }, { "epoch": 2.7212675076984207, "grad_norm": 0.1337890625, "learning_rate": 0.0005823025727624913, "loss": 0.5888, "step": 54790 }, { "epoch": 2.72176417999404, "grad_norm": 0.11962890625, "learning_rate": 0.0005822628389788418, "loss": 0.5551, "step": 54800 }, { "epoch": 2.722260852289659, "grad_norm": 0.197265625, "learning_rate": 0.0005822231051951923, "loss": 0.5548, "step": 54810 }, { "epoch": 2.722757524585279, "grad_norm": 0.11181640625, "learning_rate": 0.0005821833714115427, "loss": 0.5363, "step": 54820 }, { "epoch": 2.723254196880898, "grad_norm": 0.12353515625, "learning_rate": 0.0005821436376278931, "loss": 0.5589, "step": 54830 }, { "epoch": 2.7237508691765173, "grad_norm": 0.1767578125, "learning_rate": 0.0005821039038442436, "loss": 0.5889, "step": 54840 }, { "epoch": 2.7242475414721365, "grad_norm": 0.10400390625, "learning_rate": 0.0005820641700605941, "loss": 0.5374, "step": 54850 }, { "epoch": 2.7247442137677558, "grad_norm": 0.09521484375, "learning_rate": 0.0005820244362769445, "loss": 0.5606, "step": 54860 }, { "epoch": 2.7252408860633754, "grad_norm": 0.10302734375, "learning_rate": 0.000581984702493295, "loss": 0.5569, "step": 54870 }, { "epoch": 2.7257375583589947, "grad_norm": 0.2158203125, "learning_rate": 0.0005819449687096454, "loss": 0.5761, "step": 54880 }, { "epoch": 2.7262342306546143, "grad_norm": 0.12158203125, "learning_rate": 0.0005819052349259958, "loss": 0.5551, "step": 54890 }, { "epoch": 2.7267309029502336, "grad_norm": 0.09619140625, "learning_rate": 0.0005818655011423464, "loss": 0.5721, "step": 54900 }, { "epoch": 2.727227575245853, "grad_norm": 0.11767578125, "learning_rate": 0.0005818257673586968, "loss": 0.5725, "step": 54910 }, { "epoch": 2.727724247541472, "grad_norm": 0.09130859375, "learning_rate": 0.0005817860335750472, "loss": 0.5593, "step": 54920 }, { "epoch": 2.7282209198370913, "grad_norm": 0.109375, "learning_rate": 0.0005817462997913976, "loss": 0.5551, "step": 54930 }, { "epoch": 2.728717592132711, "grad_norm": 0.09765625, "learning_rate": 0.0005817065660077481, "loss": 0.5658, "step": 54940 }, { "epoch": 2.72921426442833, "grad_norm": 0.11279296875, "learning_rate": 0.0005816668322240986, "loss": 0.5853, "step": 54950 }, { "epoch": 2.7297109367239494, "grad_norm": 0.154296875, "learning_rate": 0.000581627098440449, "loss": 0.5695, "step": 54960 }, { "epoch": 2.730207609019569, "grad_norm": 0.1005859375, "learning_rate": 0.0005815873646567995, "loss": 0.5309, "step": 54970 }, { "epoch": 2.7307042813151883, "grad_norm": 0.10400390625, "learning_rate": 0.0005815476308731499, "loss": 0.5484, "step": 54980 }, { "epoch": 2.7312009536108075, "grad_norm": 0.09619140625, "learning_rate": 0.0005815078970895003, "loss": 0.5488, "step": 54990 }, { "epoch": 2.7316976259064267, "grad_norm": 0.1357421875, "learning_rate": 0.0005814681633058509, "loss": 0.5306, "step": 55000 }, { "epoch": 2.7321942982020464, "grad_norm": 0.1162109375, "learning_rate": 0.0005814284295222013, "loss": 0.5647, "step": 55010 }, { "epoch": 2.7326909704976656, "grad_norm": 0.111328125, "learning_rate": 0.0005813886957385517, "loss": 0.5721, "step": 55020 }, { "epoch": 2.733187642793285, "grad_norm": 0.1044921875, "learning_rate": 0.0005813489619549022, "loss": 0.5647, "step": 55030 }, { "epoch": 2.7336843150889045, "grad_norm": 0.09228515625, "learning_rate": 0.0005813092281712526, "loss": 0.5479, "step": 55040 }, { "epoch": 2.7341809873845238, "grad_norm": 0.2578125, "learning_rate": 0.000581269494387603, "loss": 0.5689, "step": 55050 }, { "epoch": 2.734677659680143, "grad_norm": 0.0986328125, "learning_rate": 0.0005812297606039536, "loss": 0.535, "step": 55060 }, { "epoch": 2.735174331975762, "grad_norm": 0.140625, "learning_rate": 0.000581190026820304, "loss": 0.5574, "step": 55070 }, { "epoch": 2.735671004271382, "grad_norm": 0.11474609375, "learning_rate": 0.0005811502930366544, "loss": 0.5488, "step": 55080 }, { "epoch": 2.736167676567001, "grad_norm": 0.1083984375, "learning_rate": 0.0005811105592530049, "loss": 0.5373, "step": 55090 }, { "epoch": 2.7366643488626203, "grad_norm": 0.1064453125, "learning_rate": 0.0005810708254693554, "loss": 0.5725, "step": 55100 }, { "epoch": 2.73716102115824, "grad_norm": 0.12255859375, "learning_rate": 0.0005810310916857059, "loss": 0.5674, "step": 55110 }, { "epoch": 2.7376576934538592, "grad_norm": 0.1279296875, "learning_rate": 0.0005809913579020562, "loss": 0.5623, "step": 55120 }, { "epoch": 2.7381543657494785, "grad_norm": 0.15234375, "learning_rate": 0.0005809516241184067, "loss": 0.5248, "step": 55130 }, { "epoch": 2.7386510380450977, "grad_norm": 0.09814453125, "learning_rate": 0.0005809118903347572, "loss": 0.5304, "step": 55140 }, { "epoch": 2.7391477103407174, "grad_norm": 0.1845703125, "learning_rate": 0.0005808721565511075, "loss": 0.5724, "step": 55150 }, { "epoch": 2.7396443826363366, "grad_norm": 0.1064453125, "learning_rate": 0.0005808324227674581, "loss": 0.5655, "step": 55160 }, { "epoch": 2.740141054931956, "grad_norm": 0.0986328125, "learning_rate": 0.0005807926889838085, "loss": 0.5197, "step": 55170 }, { "epoch": 2.7406377272275755, "grad_norm": 0.126953125, "learning_rate": 0.0005807529552001589, "loss": 0.551, "step": 55180 }, { "epoch": 2.7411343995231947, "grad_norm": 0.1318359375, "learning_rate": 0.0005807132214165094, "loss": 0.5737, "step": 55190 }, { "epoch": 2.741631071818814, "grad_norm": 0.1162109375, "learning_rate": 0.0005806734876328598, "loss": 0.5701, "step": 55200 }, { "epoch": 2.742127744114433, "grad_norm": 0.1552734375, "learning_rate": 0.0005806337538492103, "loss": 0.5167, "step": 55210 }, { "epoch": 2.7426244164100524, "grad_norm": 0.10791015625, "learning_rate": 0.0005805940200655608, "loss": 0.5687, "step": 55220 }, { "epoch": 2.743121088705672, "grad_norm": 0.1611328125, "learning_rate": 0.0005805542862819112, "loss": 0.5623, "step": 55230 }, { "epoch": 2.7436177610012913, "grad_norm": 0.11669921875, "learning_rate": 0.0005805145524982616, "loss": 0.5348, "step": 55240 }, { "epoch": 2.744114433296911, "grad_norm": 0.1201171875, "learning_rate": 0.0005804748187146121, "loss": 0.549, "step": 55250 }, { "epoch": 2.74461110559253, "grad_norm": 0.09912109375, "learning_rate": 0.0005804350849309626, "loss": 0.5745, "step": 55260 }, { "epoch": 2.7451077778881494, "grad_norm": 0.10302734375, "learning_rate": 0.0005803953511473131, "loss": 0.5593, "step": 55270 }, { "epoch": 2.7456044501837686, "grad_norm": 0.1337890625, "learning_rate": 0.0005803556173636635, "loss": 0.5544, "step": 55280 }, { "epoch": 2.746101122479388, "grad_norm": 0.142578125, "learning_rate": 0.0005803158835800139, "loss": 0.5419, "step": 55290 }, { "epoch": 2.7465977947750075, "grad_norm": 0.1474609375, "learning_rate": 0.0005802761497963645, "loss": 0.6107, "step": 55300 }, { "epoch": 2.7470944670706268, "grad_norm": 0.11181640625, "learning_rate": 0.0005802364160127148, "loss": 0.5794, "step": 55310 }, { "epoch": 2.747591139366246, "grad_norm": 0.1201171875, "learning_rate": 0.0005801966822290653, "loss": 0.5429, "step": 55320 }, { "epoch": 2.7480878116618657, "grad_norm": 0.125, "learning_rate": 0.0005801569484454158, "loss": 0.5616, "step": 55330 }, { "epoch": 2.748584483957485, "grad_norm": 0.1640625, "learning_rate": 0.0005801172146617661, "loss": 0.5419, "step": 55340 }, { "epoch": 2.749081156253104, "grad_norm": 0.140625, "learning_rate": 0.0005800774808781166, "loss": 0.5558, "step": 55350 }, { "epoch": 2.7495778285487233, "grad_norm": 0.1064453125, "learning_rate": 0.0005800377470944672, "loss": 0.5258, "step": 55360 }, { "epoch": 2.750074500844343, "grad_norm": 0.09375, "learning_rate": 0.0005799980133108175, "loss": 0.5429, "step": 55370 }, { "epoch": 2.7505711731399622, "grad_norm": 0.1298828125, "learning_rate": 0.000579958279527168, "loss": 0.5463, "step": 55380 }, { "epoch": 2.7510678454355815, "grad_norm": 0.10888671875, "learning_rate": 0.0005799185457435184, "loss": 0.5475, "step": 55390 }, { "epoch": 2.751564517731201, "grad_norm": 0.130859375, "learning_rate": 0.000579878811959869, "loss": 0.5423, "step": 55400 }, { "epoch": 2.7520611900268204, "grad_norm": 0.158203125, "learning_rate": 0.0005798390781762194, "loss": 0.5488, "step": 55410 }, { "epoch": 2.7525578623224396, "grad_norm": 0.171875, "learning_rate": 0.0005797993443925698, "loss": 0.5434, "step": 55420 }, { "epoch": 2.753054534618059, "grad_norm": 0.130859375, "learning_rate": 0.0005797596106089203, "loss": 0.5555, "step": 55430 }, { "epoch": 2.7535512069136785, "grad_norm": 0.11328125, "learning_rate": 0.0005797198768252707, "loss": 0.5677, "step": 55440 }, { "epoch": 2.7540478792092977, "grad_norm": 0.09423828125, "learning_rate": 0.0005796801430416211, "loss": 0.5583, "step": 55450 }, { "epoch": 2.754544551504917, "grad_norm": 0.111328125, "learning_rate": 0.0005796404092579717, "loss": 0.5538, "step": 55460 }, { "epoch": 2.7550412238005366, "grad_norm": 0.091796875, "learning_rate": 0.0005796006754743221, "loss": 0.5444, "step": 55470 }, { "epoch": 2.755537896096156, "grad_norm": 0.13671875, "learning_rate": 0.0005795609416906725, "loss": 0.5492, "step": 55480 }, { "epoch": 2.756034568391775, "grad_norm": 0.1083984375, "learning_rate": 0.000579521207907023, "loss": 0.5969, "step": 55490 }, { "epoch": 2.7565312406873943, "grad_norm": 0.1220703125, "learning_rate": 0.0005794814741233733, "loss": 0.5599, "step": 55500 }, { "epoch": 2.757027912983014, "grad_norm": 0.107421875, "learning_rate": 0.0005794417403397239, "loss": 0.5281, "step": 55510 }, { "epoch": 2.757524585278633, "grad_norm": 0.10546875, "learning_rate": 0.0005794020065560744, "loss": 0.565, "step": 55520 }, { "epoch": 2.7580212575742524, "grad_norm": 0.1416015625, "learning_rate": 0.0005793622727724247, "loss": 0.5562, "step": 55530 }, { "epoch": 2.758517929869872, "grad_norm": 0.169921875, "learning_rate": 0.0005793225389887752, "loss": 0.5884, "step": 55540 }, { "epoch": 2.7590146021654913, "grad_norm": 0.12353515625, "learning_rate": 0.0005792828052051257, "loss": 0.5439, "step": 55550 }, { "epoch": 2.7595112744611106, "grad_norm": 0.107421875, "learning_rate": 0.0005792430714214762, "loss": 0.5555, "step": 55560 }, { "epoch": 2.76000794675673, "grad_norm": 0.16015625, "learning_rate": 0.0005792033376378266, "loss": 0.5547, "step": 55570 }, { "epoch": 2.760504619052349, "grad_norm": 0.1162109375, "learning_rate": 0.000579163603854177, "loss": 0.5592, "step": 55580 }, { "epoch": 2.7610012913479687, "grad_norm": 0.1005859375, "learning_rate": 0.0005791238700705275, "loss": 0.5555, "step": 55590 }, { "epoch": 2.761497963643588, "grad_norm": 0.1337890625, "learning_rate": 0.0005790841362868779, "loss": 0.5415, "step": 55600 }, { "epoch": 2.7619946359392076, "grad_norm": 0.09765625, "learning_rate": 0.0005790444025032284, "loss": 0.5891, "step": 55610 }, { "epoch": 2.762491308234827, "grad_norm": 0.103515625, "learning_rate": 0.0005790046687195789, "loss": 0.5656, "step": 55620 }, { "epoch": 2.762987980530446, "grad_norm": 0.10302734375, "learning_rate": 0.0005789649349359293, "loss": 0.5647, "step": 55630 }, { "epoch": 2.7634846528260653, "grad_norm": 0.1845703125, "learning_rate": 0.0005789252011522797, "loss": 0.5689, "step": 55640 }, { "epoch": 2.7639813251216845, "grad_norm": 0.1005859375, "learning_rate": 0.0005788854673686302, "loss": 0.5426, "step": 55650 }, { "epoch": 2.764477997417304, "grad_norm": 0.1923828125, "learning_rate": 0.0005788457335849807, "loss": 0.5417, "step": 55660 }, { "epoch": 2.7649746697129234, "grad_norm": 0.169921875, "learning_rate": 0.0005788059998013311, "loss": 0.5529, "step": 55670 }, { "epoch": 2.7654713420085426, "grad_norm": 0.11181640625, "learning_rate": 0.0005787662660176816, "loss": 0.5507, "step": 55680 }, { "epoch": 2.7659680143041623, "grad_norm": 0.11865234375, "learning_rate": 0.000578726532234032, "loss": 0.5881, "step": 55690 }, { "epoch": 2.7664646865997815, "grad_norm": 0.09814453125, "learning_rate": 0.0005786867984503824, "loss": 0.5548, "step": 55700 }, { "epoch": 2.7669613588954007, "grad_norm": 0.21484375, "learning_rate": 0.000578647064666733, "loss": 0.5443, "step": 55710 }, { "epoch": 2.76745803119102, "grad_norm": 0.1083984375, "learning_rate": 0.0005786073308830834, "loss": 0.5477, "step": 55720 }, { "epoch": 2.7679547034866396, "grad_norm": 0.1005859375, "learning_rate": 0.0005785675970994338, "loss": 0.5643, "step": 55730 }, { "epoch": 2.768451375782259, "grad_norm": 0.17578125, "learning_rate": 0.0005785278633157843, "loss": 0.5501, "step": 55740 }, { "epoch": 2.768948048077878, "grad_norm": 0.09375, "learning_rate": 0.0005784881295321347, "loss": 0.5595, "step": 55750 }, { "epoch": 2.7694447203734978, "grad_norm": 0.11328125, "learning_rate": 0.0005784483957484852, "loss": 0.5723, "step": 55760 }, { "epoch": 2.769941392669117, "grad_norm": 0.0986328125, "learning_rate": 0.0005784086619648356, "loss": 0.5765, "step": 55770 }, { "epoch": 2.770438064964736, "grad_norm": 0.1337890625, "learning_rate": 0.0005783689281811861, "loss": 0.5515, "step": 55780 }, { "epoch": 2.7709347372603554, "grad_norm": 0.1328125, "learning_rate": 0.0005783291943975365, "loss": 0.5348, "step": 55790 }, { "epoch": 2.771431409555975, "grad_norm": 0.115234375, "learning_rate": 0.0005782894606138869, "loss": 0.5507, "step": 55800 }, { "epoch": 2.7719280818515943, "grad_norm": 0.099609375, "learning_rate": 0.0005782497268302375, "loss": 0.5576, "step": 55810 }, { "epoch": 2.7724247541472136, "grad_norm": 0.1044921875, "learning_rate": 0.0005782099930465879, "loss": 0.5549, "step": 55820 }, { "epoch": 2.7729214264428332, "grad_norm": 0.09375, "learning_rate": 0.0005781702592629383, "loss": 0.5687, "step": 55830 }, { "epoch": 2.7734180987384525, "grad_norm": 0.09130859375, "learning_rate": 0.0005781305254792888, "loss": 0.5809, "step": 55840 }, { "epoch": 2.7739147710340717, "grad_norm": 0.1669921875, "learning_rate": 0.0005780907916956392, "loss": 0.5701, "step": 55850 }, { "epoch": 2.774411443329691, "grad_norm": 0.2890625, "learning_rate": 0.0005780510579119897, "loss": 0.568, "step": 55860 }, { "epoch": 2.77490811562531, "grad_norm": 0.09130859375, "learning_rate": 0.0005780113241283402, "loss": 0.5715, "step": 55870 }, { "epoch": 2.77540478792093, "grad_norm": 0.11962890625, "learning_rate": 0.0005779715903446906, "loss": 0.5823, "step": 55880 }, { "epoch": 2.775901460216549, "grad_norm": 0.1357421875, "learning_rate": 0.000577931856561041, "loss": 0.5197, "step": 55890 }, { "epoch": 2.7763981325121687, "grad_norm": 0.12060546875, "learning_rate": 0.0005778921227773915, "loss": 0.6089, "step": 55900 }, { "epoch": 2.776894804807788, "grad_norm": 0.087890625, "learning_rate": 0.000577852388993742, "loss": 0.5394, "step": 55910 }, { "epoch": 2.777391477103407, "grad_norm": 0.150390625, "learning_rate": 0.0005778126552100924, "loss": 0.5576, "step": 55920 }, { "epoch": 2.7778881493990264, "grad_norm": 0.08642578125, "learning_rate": 0.0005777729214264429, "loss": 0.5577, "step": 55930 }, { "epoch": 2.7783848216946456, "grad_norm": 0.146484375, "learning_rate": 0.0005777331876427933, "loss": 0.5627, "step": 55940 }, { "epoch": 2.7788814939902653, "grad_norm": 0.146484375, "learning_rate": 0.0005776934538591437, "loss": 0.5685, "step": 55950 }, { "epoch": 2.7793781662858845, "grad_norm": 0.09814453125, "learning_rate": 0.0005776537200754943, "loss": 0.5755, "step": 55960 }, { "epoch": 2.779874838581504, "grad_norm": 0.0947265625, "learning_rate": 0.0005776139862918447, "loss": 0.5418, "step": 55970 }, { "epoch": 2.7803715108771234, "grad_norm": 0.11328125, "learning_rate": 0.0005775742525081951, "loss": 0.5429, "step": 55980 }, { "epoch": 2.7808681831727426, "grad_norm": 0.1220703125, "learning_rate": 0.0005775345187245455, "loss": 0.5879, "step": 55990 }, { "epoch": 2.781364855468362, "grad_norm": 0.0947265625, "learning_rate": 0.000577494784940896, "loss": 0.5522, "step": 56000 }, { "epoch": 2.781861527763981, "grad_norm": 0.10107421875, "learning_rate": 0.0005774550511572466, "loss": 0.5791, "step": 56010 }, { "epoch": 2.7823582000596008, "grad_norm": 0.103515625, "learning_rate": 0.0005774153173735969, "loss": 0.5753, "step": 56020 }, { "epoch": 2.78285487235522, "grad_norm": 0.1572265625, "learning_rate": 0.0005773755835899474, "loss": 0.5485, "step": 56030 }, { "epoch": 2.7833515446508392, "grad_norm": 0.10009765625, "learning_rate": 0.0005773358498062978, "loss": 0.5378, "step": 56040 }, { "epoch": 2.783848216946459, "grad_norm": 0.126953125, "learning_rate": 0.0005772961160226482, "loss": 0.5962, "step": 56050 }, { "epoch": 2.784344889242078, "grad_norm": 0.10595703125, "learning_rate": 0.0005772563822389988, "loss": 0.5633, "step": 56060 }, { "epoch": 2.7848415615376974, "grad_norm": 0.091796875, "learning_rate": 0.0005772166484553492, "loss": 0.5915, "step": 56070 }, { "epoch": 2.7853382338333166, "grad_norm": 0.13671875, "learning_rate": 0.0005771769146716996, "loss": 0.565, "step": 56080 }, { "epoch": 2.7858349061289363, "grad_norm": 0.1171875, "learning_rate": 0.0005771371808880501, "loss": 0.544, "step": 56090 }, { "epoch": 2.7863315784245555, "grad_norm": 0.11328125, "learning_rate": 0.0005770974471044005, "loss": 0.5503, "step": 56100 }, { "epoch": 2.7868282507201747, "grad_norm": 0.0966796875, "learning_rate": 0.000577057713320751, "loss": 0.5926, "step": 56110 }, { "epoch": 2.7873249230157944, "grad_norm": 0.1142578125, "learning_rate": 0.0005770179795371015, "loss": 0.5907, "step": 56120 }, { "epoch": 2.7878215953114136, "grad_norm": 0.0986328125, "learning_rate": 0.0005769782457534519, "loss": 0.542, "step": 56130 }, { "epoch": 2.788318267607033, "grad_norm": 0.1591796875, "learning_rate": 0.0005769385119698023, "loss": 0.5577, "step": 56140 }, { "epoch": 2.788814939902652, "grad_norm": 0.10595703125, "learning_rate": 0.0005768987781861528, "loss": 0.5581, "step": 56150 }, { "epoch": 2.7893116121982717, "grad_norm": 0.091796875, "learning_rate": 0.0005768590444025033, "loss": 0.5403, "step": 56160 }, { "epoch": 2.789808284493891, "grad_norm": 0.11572265625, "learning_rate": 0.0005768193106188538, "loss": 0.5795, "step": 56170 }, { "epoch": 2.79030495678951, "grad_norm": 0.12890625, "learning_rate": 0.0005767795768352041, "loss": 0.529, "step": 56180 }, { "epoch": 2.79080162908513, "grad_norm": 0.12109375, "learning_rate": 0.0005767398430515546, "loss": 0.5354, "step": 56190 }, { "epoch": 2.791298301380749, "grad_norm": 0.1328125, "learning_rate": 0.0005767001092679051, "loss": 0.5647, "step": 56200 }, { "epoch": 2.7917949736763683, "grad_norm": 0.1171875, "learning_rate": 0.0005766603754842554, "loss": 0.5636, "step": 56210 }, { "epoch": 2.7922916459719875, "grad_norm": 0.08544921875, "learning_rate": 0.000576620641700606, "loss": 0.5823, "step": 56220 }, { "epoch": 2.7927883182676068, "grad_norm": 0.1513671875, "learning_rate": 0.0005765809079169565, "loss": 0.5592, "step": 56230 }, { "epoch": 2.7932849905632264, "grad_norm": 0.12060546875, "learning_rate": 0.0005765411741333068, "loss": 0.5779, "step": 56240 }, { "epoch": 2.7937816628588457, "grad_norm": 0.1533203125, "learning_rate": 0.0005765014403496573, "loss": 0.5916, "step": 56250 }, { "epoch": 2.7942783351544653, "grad_norm": 0.20703125, "learning_rate": 0.0005764617065660078, "loss": 0.567, "step": 56260 }, { "epoch": 2.7947750074500846, "grad_norm": 0.10595703125, "learning_rate": 0.0005764219727823582, "loss": 0.5717, "step": 56270 }, { "epoch": 2.795271679745704, "grad_norm": 0.115234375, "learning_rate": 0.0005763822389987087, "loss": 0.5514, "step": 56280 }, { "epoch": 2.795768352041323, "grad_norm": 0.09765625, "learning_rate": 0.0005763425052150591, "loss": 0.5704, "step": 56290 }, { "epoch": 2.7962650243369422, "grad_norm": 0.166015625, "learning_rate": 0.0005763027714314096, "loss": 0.5569, "step": 56300 }, { "epoch": 2.796761696632562, "grad_norm": 0.11669921875, "learning_rate": 0.00057626303764776, "loss": 0.5818, "step": 56310 }, { "epoch": 2.797258368928181, "grad_norm": 0.1337890625, "learning_rate": 0.0005762233038641105, "loss": 0.5351, "step": 56320 }, { "epoch": 2.797755041223801, "grad_norm": 0.126953125, "learning_rate": 0.000576183570080461, "loss": 0.5564, "step": 56330 }, { "epoch": 2.79825171351942, "grad_norm": 0.1904296875, "learning_rate": 0.0005761438362968114, "loss": 0.577, "step": 56340 }, { "epoch": 2.7987483858150393, "grad_norm": 0.0966796875, "learning_rate": 0.0005761041025131618, "loss": 0.5136, "step": 56350 }, { "epoch": 2.7992450581106585, "grad_norm": 0.12158203125, "learning_rate": 0.0005760643687295124, "loss": 0.5576, "step": 56360 }, { "epoch": 2.7997417304062777, "grad_norm": 0.10888671875, "learning_rate": 0.0005760246349458627, "loss": 0.5479, "step": 56370 }, { "epoch": 2.8002384027018974, "grad_norm": 0.09326171875, "learning_rate": 0.0005759849011622132, "loss": 0.5534, "step": 56380 }, { "epoch": 2.8007350749975166, "grad_norm": 0.0966796875, "learning_rate": 0.0005759451673785637, "loss": 0.5432, "step": 56390 }, { "epoch": 2.801231747293136, "grad_norm": 0.1025390625, "learning_rate": 0.000575905433594914, "loss": 0.5506, "step": 56400 }, { "epoch": 2.8017284195887555, "grad_norm": 0.107421875, "learning_rate": 0.0005758656998112646, "loss": 0.5494, "step": 56410 }, { "epoch": 2.8022250918843747, "grad_norm": 0.15625, "learning_rate": 0.0005758259660276151, "loss": 0.5985, "step": 56420 }, { "epoch": 2.802721764179994, "grad_norm": 0.103515625, "learning_rate": 0.0005757862322439654, "loss": 0.5345, "step": 56430 }, { "epoch": 2.803218436475613, "grad_norm": 0.12890625, "learning_rate": 0.0005757464984603159, "loss": 0.5927, "step": 56440 }, { "epoch": 2.803715108771233, "grad_norm": 0.1064453125, "learning_rate": 0.0005757067646766663, "loss": 0.5388, "step": 56450 }, { "epoch": 2.804211781066852, "grad_norm": 0.13671875, "learning_rate": 0.0005756670308930169, "loss": 0.5468, "step": 56460 }, { "epoch": 2.8047084533624713, "grad_norm": 0.111328125, "learning_rate": 0.0005756272971093673, "loss": 0.592, "step": 56470 }, { "epoch": 2.805205125658091, "grad_norm": 0.10986328125, "learning_rate": 0.0005755875633257177, "loss": 0.5579, "step": 56480 }, { "epoch": 2.80570179795371, "grad_norm": 0.1806640625, "learning_rate": 0.0005755478295420682, "loss": 0.556, "step": 56490 }, { "epoch": 2.8061984702493294, "grad_norm": 0.08642578125, "learning_rate": 0.0005755080957584186, "loss": 0.5605, "step": 56500 }, { "epoch": 2.8066951425449487, "grad_norm": 0.10009765625, "learning_rate": 0.000575468361974769, "loss": 0.5704, "step": 56510 }, { "epoch": 2.8071918148405683, "grad_norm": 0.11767578125, "learning_rate": 0.0005754286281911196, "loss": 0.538, "step": 56520 }, { "epoch": 2.8076884871361876, "grad_norm": 0.16015625, "learning_rate": 0.00057538889440747, "loss": 0.5666, "step": 56530 }, { "epoch": 2.808185159431807, "grad_norm": 0.1650390625, "learning_rate": 0.0005753491606238204, "loss": 0.579, "step": 56540 }, { "epoch": 2.8086818317274265, "grad_norm": 0.0927734375, "learning_rate": 0.0005753094268401709, "loss": 0.5407, "step": 56550 }, { "epoch": 2.8091785040230457, "grad_norm": 0.1259765625, "learning_rate": 0.0005752696930565213, "loss": 0.55, "step": 56560 }, { "epoch": 2.809675176318665, "grad_norm": 0.1337890625, "learning_rate": 0.0005752299592728718, "loss": 0.5505, "step": 56570 }, { "epoch": 2.810171848614284, "grad_norm": 0.11767578125, "learning_rate": 0.0005751902254892223, "loss": 0.5341, "step": 56580 }, { "epoch": 2.8106685209099034, "grad_norm": 0.134765625, "learning_rate": 0.0005751504917055726, "loss": 0.5854, "step": 56590 }, { "epoch": 2.811165193205523, "grad_norm": 0.09814453125, "learning_rate": 0.0005751107579219231, "loss": 0.6014, "step": 56600 }, { "epoch": 2.8116618655011423, "grad_norm": 0.146484375, "learning_rate": 0.0005750710241382737, "loss": 0.5923, "step": 56610 }, { "epoch": 2.812158537796762, "grad_norm": 0.130859375, "learning_rate": 0.0005750312903546241, "loss": 0.5599, "step": 56620 }, { "epoch": 2.812655210092381, "grad_norm": 0.09375, "learning_rate": 0.0005749915565709745, "loss": 0.5852, "step": 56630 }, { "epoch": 2.8131518823880004, "grad_norm": 0.09326171875, "learning_rate": 0.0005749518227873249, "loss": 0.5262, "step": 56640 }, { "epoch": 2.8136485546836196, "grad_norm": 0.10400390625, "learning_rate": 0.0005749120890036754, "loss": 0.5398, "step": 56650 }, { "epoch": 2.814145226979239, "grad_norm": 0.1171875, "learning_rate": 0.0005748723552200258, "loss": 0.5394, "step": 56660 }, { "epoch": 2.8146418992748585, "grad_norm": 0.119140625, "learning_rate": 0.0005748326214363763, "loss": 0.5383, "step": 56670 }, { "epoch": 2.8151385715704778, "grad_norm": 0.1328125, "learning_rate": 0.0005747928876527268, "loss": 0.5352, "step": 56680 }, { "epoch": 2.8156352438660974, "grad_norm": 0.19140625, "learning_rate": 0.0005747531538690772, "loss": 0.5325, "step": 56690 }, { "epoch": 2.8161319161617167, "grad_norm": 0.1875, "learning_rate": 0.0005747134200854276, "loss": 0.5311, "step": 56700 }, { "epoch": 2.816628588457336, "grad_norm": 0.169921875, "learning_rate": 0.0005746736863017781, "loss": 0.5797, "step": 56710 }, { "epoch": 2.817125260752955, "grad_norm": 0.1083984375, "learning_rate": 0.0005746339525181286, "loss": 0.5722, "step": 56720 }, { "epoch": 2.8176219330485743, "grad_norm": 0.130859375, "learning_rate": 0.000574594218734479, "loss": 0.5606, "step": 56730 }, { "epoch": 2.818118605344194, "grad_norm": 0.111328125, "learning_rate": 0.0005745544849508295, "loss": 0.5685, "step": 56740 }, { "epoch": 2.8186152776398132, "grad_norm": 0.1484375, "learning_rate": 0.0005745147511671799, "loss": 0.5442, "step": 56750 }, { "epoch": 2.8191119499354325, "grad_norm": 0.09814453125, "learning_rate": 0.0005744750173835303, "loss": 0.5349, "step": 56760 }, { "epoch": 2.819608622231052, "grad_norm": 0.1298828125, "learning_rate": 0.0005744352835998809, "loss": 0.5479, "step": 56770 }, { "epoch": 2.8201052945266714, "grad_norm": 0.09716796875, "learning_rate": 0.0005743955498162313, "loss": 0.5517, "step": 56780 }, { "epoch": 2.8206019668222906, "grad_norm": 0.11279296875, "learning_rate": 0.0005743558160325817, "loss": 0.5598, "step": 56790 }, { "epoch": 2.82109863911791, "grad_norm": 0.1611328125, "learning_rate": 0.0005743160822489322, "loss": 0.546, "step": 56800 }, { "epoch": 2.8215953114135295, "grad_norm": 0.08935546875, "learning_rate": 0.0005742763484652826, "loss": 0.5627, "step": 56810 }, { "epoch": 2.8220919837091487, "grad_norm": 0.10791015625, "learning_rate": 0.0005742366146816331, "loss": 0.5589, "step": 56820 }, { "epoch": 2.822588656004768, "grad_norm": 0.1728515625, "learning_rate": 0.0005741968808979836, "loss": 0.5491, "step": 56830 }, { "epoch": 2.8230853283003876, "grad_norm": 0.09765625, "learning_rate": 0.000574157147114334, "loss": 0.5563, "step": 56840 }, { "epoch": 2.823582000596007, "grad_norm": 0.0927734375, "learning_rate": 0.0005741174133306844, "loss": 0.5788, "step": 56850 }, { "epoch": 2.824078672891626, "grad_norm": 0.11279296875, "learning_rate": 0.0005740776795470348, "loss": 0.5798, "step": 56860 }, { "epoch": 2.8245753451872453, "grad_norm": 0.10205078125, "learning_rate": 0.0005740379457633854, "loss": 0.5597, "step": 56870 }, { "epoch": 2.825072017482865, "grad_norm": 0.1044921875, "learning_rate": 0.0005739982119797358, "loss": 0.5476, "step": 56880 }, { "epoch": 2.825568689778484, "grad_norm": 0.1044921875, "learning_rate": 0.0005739584781960862, "loss": 0.5343, "step": 56890 }, { "epoch": 2.8260653620741034, "grad_norm": 0.11328125, "learning_rate": 0.0005739187444124367, "loss": 0.5601, "step": 56900 }, { "epoch": 2.826562034369723, "grad_norm": 0.10498046875, "learning_rate": 0.0005738790106287871, "loss": 0.5578, "step": 56910 }, { "epoch": 2.8270587066653423, "grad_norm": 0.1015625, "learning_rate": 0.0005738392768451376, "loss": 0.5794, "step": 56920 }, { "epoch": 2.8275553789609615, "grad_norm": 0.1484375, "learning_rate": 0.0005737995430614881, "loss": 0.5499, "step": 56930 }, { "epoch": 2.8280520512565808, "grad_norm": 0.158203125, "learning_rate": 0.0005737598092778385, "loss": 0.5601, "step": 56940 }, { "epoch": 2.8285487235522, "grad_norm": 0.1259765625, "learning_rate": 0.0005737200754941889, "loss": 0.5375, "step": 56950 }, { "epoch": 2.8290453958478197, "grad_norm": 0.09521484375, "learning_rate": 0.0005736803417105394, "loss": 0.5793, "step": 56960 }, { "epoch": 2.829542068143439, "grad_norm": 0.1513671875, "learning_rate": 0.0005736406079268899, "loss": 0.5278, "step": 56970 }, { "epoch": 2.8300387404390586, "grad_norm": 0.1123046875, "learning_rate": 0.0005736008741432403, "loss": 0.5501, "step": 56980 }, { "epoch": 2.830535412734678, "grad_norm": 0.0947265625, "learning_rate": 0.0005735611403595908, "loss": 0.5422, "step": 56990 }, { "epoch": 2.831032085030297, "grad_norm": 0.08984375, "learning_rate": 0.0005735214065759412, "loss": 0.5629, "step": 57000 }, { "epoch": 2.8315287573259162, "grad_norm": 0.09375, "learning_rate": 0.0005734816727922916, "loss": 0.5634, "step": 57010 }, { "epoch": 2.8320254296215355, "grad_norm": 0.11767578125, "learning_rate": 0.0005734419390086422, "loss": 0.5477, "step": 57020 }, { "epoch": 2.832522101917155, "grad_norm": 0.134765625, "learning_rate": 0.0005734022052249926, "loss": 0.5744, "step": 57030 }, { "epoch": 2.8330187742127744, "grad_norm": 0.10888671875, "learning_rate": 0.000573362471441343, "loss": 0.5712, "step": 57040 }, { "epoch": 2.8335154465083936, "grad_norm": 0.09521484375, "learning_rate": 0.0005733227376576934, "loss": 0.513, "step": 57050 }, { "epoch": 2.8340121188040133, "grad_norm": 0.119140625, "learning_rate": 0.0005732830038740439, "loss": 0.5702, "step": 57060 }, { "epoch": 2.8345087910996325, "grad_norm": 0.09765625, "learning_rate": 0.0005732432700903945, "loss": 0.5636, "step": 57070 }, { "epoch": 2.8350054633952517, "grad_norm": 0.08837890625, "learning_rate": 0.0005732035363067448, "loss": 0.5351, "step": 57080 }, { "epoch": 2.835502135690871, "grad_norm": 0.1484375, "learning_rate": 0.0005731638025230953, "loss": 0.5575, "step": 57090 }, { "epoch": 2.8359988079864906, "grad_norm": 0.119140625, "learning_rate": 0.0005731240687394458, "loss": 0.5511, "step": 57100 }, { "epoch": 2.83649548028211, "grad_norm": 0.1025390625, "learning_rate": 0.0005730843349557961, "loss": 0.564, "step": 57110 }, { "epoch": 2.836992152577729, "grad_norm": 0.08447265625, "learning_rate": 0.0005730446011721467, "loss": 0.5408, "step": 57120 }, { "epoch": 2.8374888248733487, "grad_norm": 0.1396484375, "learning_rate": 0.0005730048673884971, "loss": 0.5589, "step": 57130 }, { "epoch": 2.837985497168968, "grad_norm": 0.10107421875, "learning_rate": 0.0005729651336048475, "loss": 0.5682, "step": 57140 }, { "epoch": 2.838482169464587, "grad_norm": 0.12060546875, "learning_rate": 0.000572925399821198, "loss": 0.5336, "step": 57150 }, { "epoch": 2.8389788417602064, "grad_norm": 0.1025390625, "learning_rate": 0.0005728856660375484, "loss": 0.5409, "step": 57160 }, { "epoch": 2.839475514055826, "grad_norm": 0.1298828125, "learning_rate": 0.0005728459322538989, "loss": 0.5335, "step": 57170 }, { "epoch": 2.8399721863514453, "grad_norm": 0.134765625, "learning_rate": 0.0005728061984702494, "loss": 0.5436, "step": 57180 }, { "epoch": 2.8404688586470646, "grad_norm": 0.09521484375, "learning_rate": 0.0005727664646865998, "loss": 0.5297, "step": 57190 }, { "epoch": 2.8409655309426842, "grad_norm": 0.09326171875, "learning_rate": 0.0005727267309029503, "loss": 0.5597, "step": 57200 }, { "epoch": 2.8414622032383035, "grad_norm": 0.08740234375, "learning_rate": 0.0005726869971193007, "loss": 0.5425, "step": 57210 }, { "epoch": 2.8419588755339227, "grad_norm": 0.1123046875, "learning_rate": 0.0005726472633356512, "loss": 0.5408, "step": 57220 }, { "epoch": 2.842455547829542, "grad_norm": 0.1162109375, "learning_rate": 0.0005726075295520017, "loss": 0.5352, "step": 57230 }, { "epoch": 2.8429522201251616, "grad_norm": 0.11181640625, "learning_rate": 0.000572567795768352, "loss": 0.574, "step": 57240 }, { "epoch": 2.843448892420781, "grad_norm": 0.109375, "learning_rate": 0.0005725280619847025, "loss": 0.5742, "step": 57250 }, { "epoch": 2.8439455647164, "grad_norm": 0.1484375, "learning_rate": 0.000572488328201053, "loss": 0.5515, "step": 57260 }, { "epoch": 2.8444422370120197, "grad_norm": 0.107421875, "learning_rate": 0.0005724485944174034, "loss": 0.593, "step": 57270 }, { "epoch": 2.844938909307639, "grad_norm": 0.150390625, "learning_rate": 0.0005724088606337539, "loss": 0.528, "step": 57280 }, { "epoch": 2.845435581603258, "grad_norm": 0.1826171875, "learning_rate": 0.0005723691268501044, "loss": 0.551, "step": 57290 }, { "epoch": 2.8459322538988774, "grad_norm": 0.1318359375, "learning_rate": 0.0005723293930664547, "loss": 0.5678, "step": 57300 }, { "epoch": 2.8464289261944966, "grad_norm": 0.11962890625, "learning_rate": 0.0005722896592828052, "loss": 0.5736, "step": 57310 }, { "epoch": 2.8469255984901163, "grad_norm": 0.169921875, "learning_rate": 0.0005722499254991557, "loss": 0.5452, "step": 57320 }, { "epoch": 2.8474222707857355, "grad_norm": 0.11083984375, "learning_rate": 0.0005722101917155061, "loss": 0.5354, "step": 57330 }, { "epoch": 2.847918943081355, "grad_norm": 0.08935546875, "learning_rate": 0.0005721704579318566, "loss": 0.5659, "step": 57340 }, { "epoch": 2.8484156153769744, "grad_norm": 0.0908203125, "learning_rate": 0.000572130724148207, "loss": 0.532, "step": 57350 }, { "epoch": 2.8489122876725936, "grad_norm": 0.12890625, "learning_rate": 0.0005720909903645575, "loss": 0.5466, "step": 57360 }, { "epoch": 2.849408959968213, "grad_norm": 0.154296875, "learning_rate": 0.000572051256580908, "loss": 0.5825, "step": 57370 }, { "epoch": 2.849905632263832, "grad_norm": 0.171875, "learning_rate": 0.0005720115227972584, "loss": 0.5741, "step": 57380 }, { "epoch": 2.8504023045594518, "grad_norm": 0.0927734375, "learning_rate": 0.0005719717890136089, "loss": 0.5571, "step": 57390 }, { "epoch": 2.850898976855071, "grad_norm": 0.10888671875, "learning_rate": 0.0005719320552299593, "loss": 0.5709, "step": 57400 }, { "epoch": 2.85139564915069, "grad_norm": 0.1376953125, "learning_rate": 0.0005718923214463097, "loss": 0.569, "step": 57410 }, { "epoch": 2.85189232144631, "grad_norm": 0.11083984375, "learning_rate": 0.0005718525876626603, "loss": 0.5289, "step": 57420 }, { "epoch": 2.852388993741929, "grad_norm": 0.1015625, "learning_rate": 0.0005718128538790107, "loss": 0.5553, "step": 57430 }, { "epoch": 2.8528856660375483, "grad_norm": 0.12060546875, "learning_rate": 0.0005717731200953611, "loss": 0.539, "step": 57440 }, { "epoch": 2.8533823383331676, "grad_norm": 0.10400390625, "learning_rate": 0.0005717333863117116, "loss": 0.5497, "step": 57450 }, { "epoch": 2.8538790106287872, "grad_norm": 0.091796875, "learning_rate": 0.0005716936525280619, "loss": 0.5593, "step": 57460 }, { "epoch": 2.8543756829244065, "grad_norm": 0.1123046875, "learning_rate": 0.0005716539187444125, "loss": 0.5529, "step": 57470 }, { "epoch": 2.8548723552200257, "grad_norm": 0.1396484375, "learning_rate": 0.000571614184960763, "loss": 0.557, "step": 57480 }, { "epoch": 2.8553690275156454, "grad_norm": 0.1015625, "learning_rate": 0.0005715744511771133, "loss": 0.5768, "step": 57490 }, { "epoch": 2.8558656998112646, "grad_norm": 0.11669921875, "learning_rate": 0.0005715347173934638, "loss": 0.5636, "step": 57500 }, { "epoch": 2.856362372106884, "grad_norm": 0.09912109375, "learning_rate": 0.0005714949836098142, "loss": 0.5743, "step": 57510 }, { "epoch": 2.856859044402503, "grad_norm": 0.1259765625, "learning_rate": 0.0005714552498261648, "loss": 0.559, "step": 57520 }, { "epoch": 2.8573557166981227, "grad_norm": 0.09423828125, "learning_rate": 0.0005714155160425152, "loss": 0.5599, "step": 57530 }, { "epoch": 2.857852388993742, "grad_norm": 0.1015625, "learning_rate": 0.0005713757822588656, "loss": 0.5598, "step": 57540 }, { "epoch": 2.858349061289361, "grad_norm": 0.0947265625, "learning_rate": 0.0005713360484752161, "loss": 0.5347, "step": 57550 }, { "epoch": 2.858845733584981, "grad_norm": 0.08447265625, "learning_rate": 0.0005712963146915665, "loss": 0.5707, "step": 57560 }, { "epoch": 2.8593424058806, "grad_norm": 0.115234375, "learning_rate": 0.000571256580907917, "loss": 0.528, "step": 57570 }, { "epoch": 2.8598390781762193, "grad_norm": 0.12890625, "learning_rate": 0.0005712168471242675, "loss": 0.5671, "step": 57580 }, { "epoch": 2.8603357504718385, "grad_norm": 0.10546875, "learning_rate": 0.0005711771133406179, "loss": 0.5585, "step": 57590 }, { "epoch": 2.860832422767458, "grad_norm": 0.10546875, "learning_rate": 0.0005711373795569683, "loss": 0.5669, "step": 57600 }, { "epoch": 2.8613290950630774, "grad_norm": 0.1005859375, "learning_rate": 0.0005710976457733188, "loss": 0.579, "step": 57610 }, { "epoch": 2.8618257673586966, "grad_norm": 0.1416015625, "learning_rate": 0.0005710579119896693, "loss": 0.5508, "step": 57620 }, { "epoch": 2.8623224396543163, "grad_norm": 0.08740234375, "learning_rate": 0.0005710181782060197, "loss": 0.5277, "step": 57630 }, { "epoch": 2.8628191119499355, "grad_norm": 0.12451171875, "learning_rate": 0.0005709784444223702, "loss": 0.5405, "step": 57640 }, { "epoch": 2.8633157842455548, "grad_norm": 0.09814453125, "learning_rate": 0.0005709387106387206, "loss": 0.5483, "step": 57650 }, { "epoch": 2.863812456541174, "grad_norm": 0.1455078125, "learning_rate": 0.000570898976855071, "loss": 0.5332, "step": 57660 }, { "epoch": 2.8643091288367932, "grad_norm": 0.119140625, "learning_rate": 0.0005708592430714216, "loss": 0.5571, "step": 57670 }, { "epoch": 2.864805801132413, "grad_norm": 0.09228515625, "learning_rate": 0.000570819509287772, "loss": 0.5255, "step": 57680 }, { "epoch": 2.865302473428032, "grad_norm": 0.1552734375, "learning_rate": 0.0005707797755041224, "loss": 0.5287, "step": 57690 }, { "epoch": 2.865799145723652, "grad_norm": 0.11474609375, "learning_rate": 0.0005707400417204729, "loss": 0.5412, "step": 57700 }, { "epoch": 2.866295818019271, "grad_norm": 0.16015625, "learning_rate": 0.0005707003079368233, "loss": 0.5548, "step": 57710 }, { "epoch": 2.8667924903148903, "grad_norm": 0.12158203125, "learning_rate": 0.0005706605741531738, "loss": 0.5363, "step": 57720 }, { "epoch": 2.8672891626105095, "grad_norm": 0.10888671875, "learning_rate": 0.0005706208403695242, "loss": 0.5702, "step": 57730 }, { "epoch": 2.8677858349061287, "grad_norm": 0.1494140625, "learning_rate": 0.0005705811065858747, "loss": 0.5496, "step": 57740 }, { "epoch": 2.8682825072017484, "grad_norm": 0.1259765625, "learning_rate": 0.0005705413728022251, "loss": 0.5126, "step": 57750 }, { "epoch": 2.8687791794973676, "grad_norm": 0.154296875, "learning_rate": 0.0005705016390185755, "loss": 0.5608, "step": 57760 }, { "epoch": 2.869275851792987, "grad_norm": 0.09521484375, "learning_rate": 0.000570461905234926, "loss": 0.5755, "step": 57770 }, { "epoch": 2.8697725240886065, "grad_norm": 0.140625, "learning_rate": 0.0005704221714512765, "loss": 0.5264, "step": 57780 }, { "epoch": 2.8702691963842257, "grad_norm": 0.1513671875, "learning_rate": 0.0005703824376676269, "loss": 0.5669, "step": 57790 }, { "epoch": 2.870765868679845, "grad_norm": 0.12890625, "learning_rate": 0.0005703427038839774, "loss": 0.564, "step": 57800 }, { "epoch": 2.871262540975464, "grad_norm": 0.1787109375, "learning_rate": 0.0005703029701003278, "loss": 0.5497, "step": 57810 }, { "epoch": 2.871759213271084, "grad_norm": 0.1357421875, "learning_rate": 0.0005702632363166782, "loss": 0.5965, "step": 57820 }, { "epoch": 2.872255885566703, "grad_norm": 0.134765625, "learning_rate": 0.0005702235025330288, "loss": 0.5384, "step": 57830 }, { "epoch": 2.8727525578623223, "grad_norm": 0.162109375, "learning_rate": 0.0005701837687493792, "loss": 0.5137, "step": 57840 }, { "epoch": 2.873249230157942, "grad_norm": 0.091796875, "learning_rate": 0.0005701440349657296, "loss": 0.562, "step": 57850 }, { "epoch": 2.873745902453561, "grad_norm": 0.09814453125, "learning_rate": 0.0005701043011820801, "loss": 0.5425, "step": 57860 }, { "epoch": 2.8742425747491804, "grad_norm": 0.10546875, "learning_rate": 0.0005700645673984305, "loss": 0.5806, "step": 57870 }, { "epoch": 2.8747392470447997, "grad_norm": 0.1376953125, "learning_rate": 0.000570024833614781, "loss": 0.5594, "step": 57880 }, { "epoch": 2.8752359193404193, "grad_norm": 0.12109375, "learning_rate": 0.0005699850998311315, "loss": 0.5513, "step": 57890 }, { "epoch": 2.8757325916360386, "grad_norm": 0.09521484375, "learning_rate": 0.0005699453660474819, "loss": 0.5623, "step": 57900 }, { "epoch": 2.876229263931658, "grad_norm": 0.2294921875, "learning_rate": 0.0005699056322638323, "loss": 0.5909, "step": 57910 }, { "epoch": 2.8767259362272775, "grad_norm": 0.10888671875, "learning_rate": 0.0005698658984801827, "loss": 0.5394, "step": 57920 }, { "epoch": 2.8772226085228967, "grad_norm": 0.0966796875, "learning_rate": 0.0005698261646965333, "loss": 0.556, "step": 57930 }, { "epoch": 2.877719280818516, "grad_norm": 0.10400390625, "learning_rate": 0.0005697864309128838, "loss": 0.576, "step": 57940 }, { "epoch": 2.878215953114135, "grad_norm": 0.12060546875, "learning_rate": 0.0005697466971292341, "loss": 0.555, "step": 57950 }, { "epoch": 2.8787126254097544, "grad_norm": 0.1259765625, "learning_rate": 0.0005697069633455846, "loss": 0.5515, "step": 57960 }, { "epoch": 2.879209297705374, "grad_norm": 0.0927734375, "learning_rate": 0.0005696672295619352, "loss": 0.568, "step": 57970 }, { "epoch": 2.8797059700009933, "grad_norm": 0.11572265625, "learning_rate": 0.0005696274957782855, "loss": 0.5729, "step": 57980 }, { "epoch": 2.880202642296613, "grad_norm": 0.158203125, "learning_rate": 0.000569587761994636, "loss": 0.5552, "step": 57990 }, { "epoch": 2.880699314592232, "grad_norm": 0.10302734375, "learning_rate": 0.0005695480282109864, "loss": 0.5485, "step": 58000 }, { "epoch": 2.8811959868878514, "grad_norm": 0.2041015625, "learning_rate": 0.0005695082944273368, "loss": 0.545, "step": 58010 }, { "epoch": 2.8816926591834706, "grad_norm": 0.166015625, "learning_rate": 0.0005694685606436873, "loss": 0.5598, "step": 58020 }, { "epoch": 2.88218933147909, "grad_norm": 0.0966796875, "learning_rate": 0.0005694288268600378, "loss": 0.5485, "step": 58030 }, { "epoch": 2.8826860037747095, "grad_norm": 0.09619140625, "learning_rate": 0.0005693890930763882, "loss": 0.5435, "step": 58040 }, { "epoch": 2.8831826760703287, "grad_norm": 0.09716796875, "learning_rate": 0.0005693493592927387, "loss": 0.5659, "step": 58050 }, { "epoch": 2.8836793483659484, "grad_norm": 0.0966796875, "learning_rate": 0.0005693096255090891, "loss": 0.5266, "step": 58060 }, { "epoch": 2.8841760206615676, "grad_norm": 0.12353515625, "learning_rate": 0.0005692698917254395, "loss": 0.5522, "step": 58070 }, { "epoch": 2.884672692957187, "grad_norm": 0.13671875, "learning_rate": 0.0005692301579417901, "loss": 0.5434, "step": 58080 }, { "epoch": 2.885169365252806, "grad_norm": 0.09619140625, "learning_rate": 0.0005691904241581405, "loss": 0.5619, "step": 58090 }, { "epoch": 2.8856660375484253, "grad_norm": 0.099609375, "learning_rate": 0.000569150690374491, "loss": 0.569, "step": 58100 }, { "epoch": 2.886162709844045, "grad_norm": 0.1142578125, "learning_rate": 0.0005691109565908413, "loss": 0.5799, "step": 58110 }, { "epoch": 2.886659382139664, "grad_norm": 0.10107421875, "learning_rate": 0.0005690712228071918, "loss": 0.572, "step": 58120 }, { "epoch": 2.8871560544352834, "grad_norm": 0.10546875, "learning_rate": 0.0005690314890235424, "loss": 0.5379, "step": 58130 }, { "epoch": 2.887652726730903, "grad_norm": 0.15625, "learning_rate": 0.0005689917552398927, "loss": 0.5252, "step": 58140 }, { "epoch": 2.8881493990265223, "grad_norm": 0.1123046875, "learning_rate": 0.0005689520214562432, "loss": 0.5574, "step": 58150 }, { "epoch": 2.8886460713221416, "grad_norm": 0.099609375, "learning_rate": 0.0005689122876725937, "loss": 0.551, "step": 58160 }, { "epoch": 2.889142743617761, "grad_norm": 0.1171875, "learning_rate": 0.000568872553888944, "loss": 0.5691, "step": 58170 }, { "epoch": 2.8896394159133805, "grad_norm": 0.1162109375, "learning_rate": 0.0005688328201052946, "loss": 0.5442, "step": 58180 }, { "epoch": 2.8901360882089997, "grad_norm": 0.11279296875, "learning_rate": 0.000568793086321645, "loss": 0.5618, "step": 58190 }, { "epoch": 2.890632760504619, "grad_norm": 0.10986328125, "learning_rate": 0.0005687533525379954, "loss": 0.5685, "step": 58200 }, { "epoch": 2.8911294328002386, "grad_norm": 0.1025390625, "learning_rate": 0.0005687136187543459, "loss": 0.5804, "step": 58210 }, { "epoch": 2.891626105095858, "grad_norm": 0.10205078125, "learning_rate": 0.0005686738849706963, "loss": 0.5615, "step": 58220 }, { "epoch": 2.892122777391477, "grad_norm": 0.09033203125, "learning_rate": 0.0005686341511870468, "loss": 0.5725, "step": 58230 }, { "epoch": 2.8926194496870963, "grad_norm": 0.166015625, "learning_rate": 0.0005685944174033973, "loss": 0.5545, "step": 58240 }, { "epoch": 2.893116121982716, "grad_norm": 0.09521484375, "learning_rate": 0.0005685546836197477, "loss": 0.5463, "step": 58250 }, { "epoch": 2.893612794278335, "grad_norm": 0.1806640625, "learning_rate": 0.0005685149498360982, "loss": 0.5872, "step": 58260 }, { "epoch": 2.8941094665739544, "grad_norm": 0.11181640625, "learning_rate": 0.0005684752160524486, "loss": 0.5727, "step": 58270 }, { "epoch": 2.894606138869574, "grad_norm": 0.1025390625, "learning_rate": 0.0005684354822687991, "loss": 0.5308, "step": 58280 }, { "epoch": 2.8951028111651933, "grad_norm": 0.10205078125, "learning_rate": 0.0005683957484851496, "loss": 0.5595, "step": 58290 }, { "epoch": 2.8955994834608125, "grad_norm": 0.1162109375, "learning_rate": 0.0005683560147015, "loss": 0.5706, "step": 58300 }, { "epoch": 2.8960961557564318, "grad_norm": 0.10986328125, "learning_rate": 0.0005683162809178504, "loss": 0.5375, "step": 58310 }, { "epoch": 2.896592828052051, "grad_norm": 0.10888671875, "learning_rate": 0.000568276547134201, "loss": 0.543, "step": 58320 }, { "epoch": 2.8970895003476707, "grad_norm": 0.11669921875, "learning_rate": 0.0005682368133505513, "loss": 0.5418, "step": 58330 }, { "epoch": 2.89758617264329, "grad_norm": 0.1572265625, "learning_rate": 0.0005681970795669018, "loss": 0.5714, "step": 58340 }, { "epoch": 2.8980828449389096, "grad_norm": 0.1201171875, "learning_rate": 0.0005681573457832523, "loss": 0.5453, "step": 58350 }, { "epoch": 2.898579517234529, "grad_norm": 0.09765625, "learning_rate": 0.0005681176119996026, "loss": 0.5533, "step": 58360 }, { "epoch": 2.899076189530148, "grad_norm": 0.10498046875, "learning_rate": 0.0005680778782159531, "loss": 0.5524, "step": 58370 }, { "epoch": 2.8995728618257672, "grad_norm": 0.09375, "learning_rate": 0.0005680381444323036, "loss": 0.5252, "step": 58380 }, { "epoch": 2.9000695341213865, "grad_norm": 0.138671875, "learning_rate": 0.0005679984106486541, "loss": 0.5223, "step": 58390 }, { "epoch": 2.900566206417006, "grad_norm": 0.09716796875, "learning_rate": 0.0005679586768650045, "loss": 0.553, "step": 58400 }, { "epoch": 2.9010628787126254, "grad_norm": 0.08837890625, "learning_rate": 0.0005679189430813549, "loss": 0.5507, "step": 58410 }, { "epoch": 2.901559551008245, "grad_norm": 0.1630859375, "learning_rate": 0.0005678792092977054, "loss": 0.5367, "step": 58420 }, { "epoch": 2.9020562233038643, "grad_norm": 0.10498046875, "learning_rate": 0.0005678394755140559, "loss": 0.5669, "step": 58430 }, { "epoch": 2.9025528955994835, "grad_norm": 0.087890625, "learning_rate": 0.0005677997417304063, "loss": 0.5491, "step": 58440 }, { "epoch": 2.9030495678951027, "grad_norm": 0.1494140625, "learning_rate": 0.0005677600079467568, "loss": 0.5746, "step": 58450 }, { "epoch": 2.903546240190722, "grad_norm": 0.1044921875, "learning_rate": 0.0005677202741631072, "loss": 0.5686, "step": 58460 }, { "epoch": 2.9040429124863416, "grad_norm": 0.1162109375, "learning_rate": 0.0005676805403794576, "loss": 0.5631, "step": 58470 }, { "epoch": 2.904539584781961, "grad_norm": 0.09375, "learning_rate": 0.0005676408065958082, "loss": 0.5275, "step": 58480 }, { "epoch": 2.90503625707758, "grad_norm": 0.1015625, "learning_rate": 0.0005676010728121586, "loss": 0.5573, "step": 58490 }, { "epoch": 2.9055329293731997, "grad_norm": 0.11181640625, "learning_rate": 0.000567561339028509, "loss": 0.5584, "step": 58500 }, { "epoch": 2.906029601668819, "grad_norm": 0.11083984375, "learning_rate": 0.0005675216052448595, "loss": 0.5452, "step": 58510 }, { "epoch": 2.906526273964438, "grad_norm": 0.10107421875, "learning_rate": 0.0005674818714612098, "loss": 0.5653, "step": 58520 }, { "epoch": 2.9070229462600574, "grad_norm": 0.1376953125, "learning_rate": 0.0005674421376775604, "loss": 0.5529, "step": 58530 }, { "epoch": 2.907519618555677, "grad_norm": 0.1025390625, "learning_rate": 0.0005674024038939109, "loss": 0.5543, "step": 58540 }, { "epoch": 2.9080162908512963, "grad_norm": 0.11767578125, "learning_rate": 0.0005673626701102613, "loss": 0.5662, "step": 58550 }, { "epoch": 2.9085129631469155, "grad_norm": 0.10302734375, "learning_rate": 0.0005673229363266117, "loss": 0.5571, "step": 58560 }, { "epoch": 2.909009635442535, "grad_norm": 0.119140625, "learning_rate": 0.0005672832025429621, "loss": 0.5496, "step": 58570 }, { "epoch": 2.9095063077381544, "grad_norm": 0.10205078125, "learning_rate": 0.0005672434687593127, "loss": 0.5188, "step": 58580 }, { "epoch": 2.9100029800337737, "grad_norm": 0.087890625, "learning_rate": 0.0005672037349756631, "loss": 0.5523, "step": 58590 }, { "epoch": 2.910499652329393, "grad_norm": 0.10546875, "learning_rate": 0.0005671640011920135, "loss": 0.5485, "step": 58600 }, { "epoch": 2.9109963246250126, "grad_norm": 0.0986328125, "learning_rate": 0.000567124267408364, "loss": 0.5503, "step": 58610 }, { "epoch": 2.911492996920632, "grad_norm": 0.10546875, "learning_rate": 0.0005670845336247144, "loss": 0.562, "step": 58620 }, { "epoch": 2.911989669216251, "grad_norm": 0.11474609375, "learning_rate": 0.0005670447998410649, "loss": 0.5759, "step": 58630 }, { "epoch": 2.9124863415118707, "grad_norm": 0.1240234375, "learning_rate": 0.0005670050660574154, "loss": 0.5494, "step": 58640 }, { "epoch": 2.91298301380749, "grad_norm": 0.11474609375, "learning_rate": 0.0005669653322737658, "loss": 0.5299, "step": 58650 }, { "epoch": 2.913479686103109, "grad_norm": 0.1005859375, "learning_rate": 0.0005669255984901162, "loss": 0.5516, "step": 58660 }, { "epoch": 2.9139763583987284, "grad_norm": 0.12158203125, "learning_rate": 0.0005668858647064667, "loss": 0.5365, "step": 58670 }, { "epoch": 2.9144730306943476, "grad_norm": 0.125, "learning_rate": 0.0005668461309228172, "loss": 0.5389, "step": 58680 }, { "epoch": 2.9149697029899673, "grad_norm": 0.099609375, "learning_rate": 0.0005668063971391676, "loss": 0.5402, "step": 58690 }, { "epoch": 2.9154663752855865, "grad_norm": 0.111328125, "learning_rate": 0.0005667666633555181, "loss": 0.5457, "step": 58700 }, { "epoch": 2.915963047581206, "grad_norm": 0.10986328125, "learning_rate": 0.0005667269295718685, "loss": 0.5668, "step": 58710 }, { "epoch": 2.9164597198768254, "grad_norm": 0.18359375, "learning_rate": 0.0005666871957882189, "loss": 0.5632, "step": 58720 }, { "epoch": 2.9169563921724446, "grad_norm": 0.1474609375, "learning_rate": 0.0005666474620045695, "loss": 0.5504, "step": 58730 }, { "epoch": 2.917453064468064, "grad_norm": 0.166015625, "learning_rate": 0.0005666077282209199, "loss": 0.5523, "step": 58740 }, { "epoch": 2.917949736763683, "grad_norm": 0.1591796875, "learning_rate": 0.0005665679944372703, "loss": 0.5607, "step": 58750 }, { "epoch": 2.9184464090593027, "grad_norm": 0.09423828125, "learning_rate": 0.0005665282606536208, "loss": 0.5626, "step": 58760 }, { "epoch": 2.918943081354922, "grad_norm": 0.11181640625, "learning_rate": 0.0005664885268699712, "loss": 0.5397, "step": 58770 }, { "epoch": 2.9194397536505416, "grad_norm": 0.1513671875, "learning_rate": 0.0005664487930863217, "loss": 0.5625, "step": 58780 }, { "epoch": 2.919936425946161, "grad_norm": 0.12890625, "learning_rate": 0.0005664090593026721, "loss": 0.5383, "step": 58790 }, { "epoch": 2.92043309824178, "grad_norm": 0.0966796875, "learning_rate": 0.0005663693255190226, "loss": 0.5734, "step": 58800 }, { "epoch": 2.9209297705373993, "grad_norm": 0.09521484375, "learning_rate": 0.000566329591735373, "loss": 0.5363, "step": 58810 }, { "epoch": 2.9214264428330186, "grad_norm": 0.1318359375, "learning_rate": 0.0005662898579517234, "loss": 0.5437, "step": 58820 }, { "epoch": 2.9219231151286382, "grad_norm": 0.138671875, "learning_rate": 0.000566250124168074, "loss": 0.5454, "step": 58830 }, { "epoch": 2.9224197874242575, "grad_norm": 0.1181640625, "learning_rate": 0.0005662103903844244, "loss": 0.5451, "step": 58840 }, { "epoch": 2.9229164597198767, "grad_norm": 0.103515625, "learning_rate": 0.0005661706566007748, "loss": 0.5632, "step": 58850 }, { "epoch": 2.9234131320154964, "grad_norm": 0.1474609375, "learning_rate": 0.0005661309228171253, "loss": 0.5585, "step": 58860 }, { "epoch": 2.9239098043111156, "grad_norm": 0.1298828125, "learning_rate": 0.0005660911890334757, "loss": 0.557, "step": 58870 }, { "epoch": 2.924406476606735, "grad_norm": 0.091796875, "learning_rate": 0.0005660514552498262, "loss": 0.5664, "step": 58880 }, { "epoch": 2.924903148902354, "grad_norm": 0.09619140625, "learning_rate": 0.0005660117214661767, "loss": 0.5604, "step": 58890 }, { "epoch": 2.9253998211979737, "grad_norm": 0.103515625, "learning_rate": 0.0005659719876825271, "loss": 0.6002, "step": 58900 }, { "epoch": 2.925896493493593, "grad_norm": 0.09326171875, "learning_rate": 0.0005659322538988775, "loss": 0.5526, "step": 58910 }, { "epoch": 2.926393165789212, "grad_norm": 0.095703125, "learning_rate": 0.000565892520115228, "loss": 0.5556, "step": 58920 }, { "epoch": 2.926889838084832, "grad_norm": 0.10107421875, "learning_rate": 0.0005658527863315785, "loss": 0.5418, "step": 58930 }, { "epoch": 2.927386510380451, "grad_norm": 0.138671875, "learning_rate": 0.0005658130525479289, "loss": 0.5495, "step": 58940 }, { "epoch": 2.9278831826760703, "grad_norm": 0.1259765625, "learning_rate": 0.0005657733187642794, "loss": 0.5517, "step": 58950 }, { "epoch": 2.9283798549716895, "grad_norm": 0.2099609375, "learning_rate": 0.0005657335849806298, "loss": 0.5698, "step": 58960 }, { "epoch": 2.928876527267309, "grad_norm": 0.1572265625, "learning_rate": 0.0005656938511969802, "loss": 0.5611, "step": 58970 }, { "epoch": 2.9293731995629284, "grad_norm": 0.1494140625, "learning_rate": 0.0005656541174133306, "loss": 0.5241, "step": 58980 }, { "epoch": 2.9298698718585476, "grad_norm": 0.11962890625, "learning_rate": 0.0005656143836296812, "loss": 0.5577, "step": 58990 }, { "epoch": 2.9303665441541673, "grad_norm": 0.138671875, "learning_rate": 0.0005655746498460317, "loss": 0.5671, "step": 59000 }, { "epoch": 2.9308632164497865, "grad_norm": 0.1181640625, "learning_rate": 0.000565534916062382, "loss": 0.5119, "step": 59010 }, { "epoch": 2.9313598887454058, "grad_norm": 0.10302734375, "learning_rate": 0.0005654951822787325, "loss": 0.5553, "step": 59020 }, { "epoch": 2.931856561041025, "grad_norm": 0.099609375, "learning_rate": 0.0005654554484950831, "loss": 0.5578, "step": 59030 }, { "epoch": 2.932353233336644, "grad_norm": 0.09814453125, "learning_rate": 0.0005654157147114334, "loss": 0.5429, "step": 59040 }, { "epoch": 2.932849905632264, "grad_norm": 0.1494140625, "learning_rate": 0.0005653759809277839, "loss": 0.5359, "step": 59050 }, { "epoch": 2.933346577927883, "grad_norm": 0.1181640625, "learning_rate": 0.0005653362471441343, "loss": 0.5667, "step": 59060 }, { "epoch": 2.933843250223503, "grad_norm": 0.09619140625, "learning_rate": 0.0005652965133604847, "loss": 0.5741, "step": 59070 }, { "epoch": 2.934339922519122, "grad_norm": 0.162109375, "learning_rate": 0.0005652567795768353, "loss": 0.5537, "step": 59080 }, { "epoch": 2.9348365948147412, "grad_norm": 0.1337890625, "learning_rate": 0.0005652170457931857, "loss": 0.5458, "step": 59090 }, { "epoch": 2.9353332671103605, "grad_norm": 0.10302734375, "learning_rate": 0.0005651773120095361, "loss": 0.597, "step": 59100 }, { "epoch": 2.9358299394059797, "grad_norm": 0.10595703125, "learning_rate": 0.0005651375782258866, "loss": 0.5725, "step": 59110 }, { "epoch": 2.9363266117015994, "grad_norm": 0.1279296875, "learning_rate": 0.000565097844442237, "loss": 0.5645, "step": 59120 }, { "epoch": 2.9368232839972186, "grad_norm": 0.1435546875, "learning_rate": 0.0005650581106585874, "loss": 0.5217, "step": 59130 }, { "epoch": 2.937319956292838, "grad_norm": 0.09814453125, "learning_rate": 0.000565018376874938, "loss": 0.5433, "step": 59140 }, { "epoch": 2.9378166285884575, "grad_norm": 0.12060546875, "learning_rate": 0.0005649786430912884, "loss": 0.5658, "step": 59150 }, { "epoch": 2.9383133008840767, "grad_norm": 0.1279296875, "learning_rate": 0.0005649389093076389, "loss": 0.5592, "step": 59160 }, { "epoch": 2.938809973179696, "grad_norm": 0.09912109375, "learning_rate": 0.0005648991755239892, "loss": 0.5329, "step": 59170 }, { "epoch": 2.939306645475315, "grad_norm": 0.10888671875, "learning_rate": 0.0005648594417403397, "loss": 0.5381, "step": 59180 }, { "epoch": 2.939803317770935, "grad_norm": 0.1181640625, "learning_rate": 0.0005648197079566903, "loss": 0.5439, "step": 59190 }, { "epoch": 2.940299990066554, "grad_norm": 0.09814453125, "learning_rate": 0.0005647799741730406, "loss": 0.5555, "step": 59200 }, { "epoch": 2.9407966623621733, "grad_norm": 0.1240234375, "learning_rate": 0.0005647402403893911, "loss": 0.5537, "step": 59210 }, { "epoch": 2.941293334657793, "grad_norm": 0.10546875, "learning_rate": 0.0005647005066057416, "loss": 0.5486, "step": 59220 }, { "epoch": 2.941790006953412, "grad_norm": 0.1044921875, "learning_rate": 0.0005646607728220919, "loss": 0.5506, "step": 59230 }, { "epoch": 2.9422866792490314, "grad_norm": 0.1298828125, "learning_rate": 0.0005646210390384425, "loss": 0.5734, "step": 59240 }, { "epoch": 2.9427833515446506, "grad_norm": 0.10888671875, "learning_rate": 0.0005645813052547929, "loss": 0.5873, "step": 59250 }, { "epoch": 2.9432800238402703, "grad_norm": 0.0986328125, "learning_rate": 0.0005645415714711433, "loss": 0.5386, "step": 59260 }, { "epoch": 2.9437766961358895, "grad_norm": 0.11962890625, "learning_rate": 0.0005645018376874938, "loss": 0.542, "step": 59270 }, { "epoch": 2.9442733684315088, "grad_norm": 0.10400390625, "learning_rate": 0.0005644621039038442, "loss": 0.5314, "step": 59280 }, { "epoch": 2.9447700407271284, "grad_norm": 0.10595703125, "learning_rate": 0.0005644223701201948, "loss": 0.5418, "step": 59290 }, { "epoch": 2.9452667130227477, "grad_norm": 0.11865234375, "learning_rate": 0.0005643826363365452, "loss": 0.5572, "step": 59300 }, { "epoch": 2.945763385318367, "grad_norm": 0.1474609375, "learning_rate": 0.0005643429025528956, "loss": 0.5328, "step": 59310 }, { "epoch": 2.946260057613986, "grad_norm": 0.09326171875, "learning_rate": 0.0005643031687692461, "loss": 0.5504, "step": 59320 }, { "epoch": 2.946756729909606, "grad_norm": 0.09619140625, "learning_rate": 0.0005642634349855965, "loss": 0.5546, "step": 59330 }, { "epoch": 2.947253402205225, "grad_norm": 0.09619140625, "learning_rate": 0.000564223701201947, "loss": 0.5449, "step": 59340 }, { "epoch": 2.9477500745008443, "grad_norm": 0.12890625, "learning_rate": 0.0005641839674182975, "loss": 0.5657, "step": 59350 }, { "epoch": 2.948246746796464, "grad_norm": 0.107421875, "learning_rate": 0.0005641442336346479, "loss": 0.5305, "step": 59360 }, { "epoch": 2.948743419092083, "grad_norm": 0.0927734375, "learning_rate": 0.0005641044998509983, "loss": 0.5326, "step": 59370 }, { "epoch": 2.9492400913877024, "grad_norm": 0.0927734375, "learning_rate": 0.0005640647660673489, "loss": 0.5455, "step": 59380 }, { "epoch": 2.9497367636833216, "grad_norm": 0.10205078125, "learning_rate": 0.0005640250322836992, "loss": 0.5653, "step": 59390 }, { "epoch": 2.950233435978941, "grad_norm": 0.0947265625, "learning_rate": 0.0005639852985000497, "loss": 0.5481, "step": 59400 }, { "epoch": 2.9507301082745605, "grad_norm": 0.111328125, "learning_rate": 0.0005639455647164002, "loss": 0.5329, "step": 59410 }, { "epoch": 2.9512267805701797, "grad_norm": 0.1328125, "learning_rate": 0.0005639058309327505, "loss": 0.5738, "step": 59420 }, { "epoch": 2.9517234528657994, "grad_norm": 0.0986328125, "learning_rate": 0.000563866097149101, "loss": 0.5695, "step": 59430 }, { "epoch": 2.9522201251614186, "grad_norm": 0.1240234375, "learning_rate": 0.0005638263633654515, "loss": 0.5291, "step": 59440 }, { "epoch": 2.952716797457038, "grad_norm": 0.1298828125, "learning_rate": 0.000563786629581802, "loss": 0.5616, "step": 59450 }, { "epoch": 2.953213469752657, "grad_norm": 0.12890625, "learning_rate": 0.0005637468957981524, "loss": 0.5414, "step": 59460 }, { "epoch": 2.9537101420482763, "grad_norm": 0.1337890625, "learning_rate": 0.0005637071620145028, "loss": 0.5406, "step": 59470 }, { "epoch": 2.954206814343896, "grad_norm": 0.15234375, "learning_rate": 0.0005636674282308533, "loss": 0.5475, "step": 59480 }, { "epoch": 2.954703486639515, "grad_norm": 0.15234375, "learning_rate": 0.0005636276944472038, "loss": 0.541, "step": 59490 }, { "epoch": 2.9552001589351344, "grad_norm": 0.1103515625, "learning_rate": 0.0005635879606635542, "loss": 0.5627, "step": 59500 }, { "epoch": 2.955696831230754, "grad_norm": 0.09912109375, "learning_rate": 0.0005635482268799047, "loss": 0.5566, "step": 59510 }, { "epoch": 2.9561935035263733, "grad_norm": 0.111328125, "learning_rate": 0.0005635084930962551, "loss": 0.5491, "step": 59520 }, { "epoch": 2.9566901758219926, "grad_norm": 0.119140625, "learning_rate": 0.0005634687593126055, "loss": 0.5696, "step": 59530 }, { "epoch": 2.957186848117612, "grad_norm": 0.0927734375, "learning_rate": 0.0005634290255289561, "loss": 0.5736, "step": 59540 }, { "epoch": 2.9576835204132315, "grad_norm": 0.14453125, "learning_rate": 0.0005633892917453065, "loss": 0.5498, "step": 59550 }, { "epoch": 2.9581801927088507, "grad_norm": 0.11181640625, "learning_rate": 0.0005633495579616569, "loss": 0.571, "step": 59560 }, { "epoch": 2.95867686500447, "grad_norm": 0.1181640625, "learning_rate": 0.0005633098241780074, "loss": 0.5617, "step": 59570 }, { "epoch": 2.9591735373000896, "grad_norm": 0.2373046875, "learning_rate": 0.0005632700903943577, "loss": 0.563, "step": 59580 }, { "epoch": 2.959670209595709, "grad_norm": 0.11279296875, "learning_rate": 0.0005632303566107083, "loss": 0.5737, "step": 59590 }, { "epoch": 2.960166881891328, "grad_norm": 0.11474609375, "learning_rate": 0.0005631906228270588, "loss": 0.5386, "step": 59600 }, { "epoch": 2.9606635541869473, "grad_norm": 0.107421875, "learning_rate": 0.0005631508890434092, "loss": 0.561, "step": 59610 }, { "epoch": 2.961160226482567, "grad_norm": 0.099609375, "learning_rate": 0.0005631111552597596, "loss": 0.5493, "step": 59620 }, { "epoch": 2.961656898778186, "grad_norm": 0.09814453125, "learning_rate": 0.0005630714214761101, "loss": 0.5367, "step": 59630 }, { "epoch": 2.9621535710738054, "grad_norm": 0.11474609375, "learning_rate": 0.0005630316876924606, "loss": 0.5491, "step": 59640 }, { "epoch": 2.962650243369425, "grad_norm": 0.0966796875, "learning_rate": 0.000562991953908811, "loss": 0.536, "step": 59650 }, { "epoch": 2.9631469156650443, "grad_norm": 0.1044921875, "learning_rate": 0.0005629522201251614, "loss": 0.5557, "step": 59660 }, { "epoch": 2.9636435879606635, "grad_norm": 0.125, "learning_rate": 0.0005629124863415119, "loss": 0.57, "step": 59670 }, { "epoch": 2.9641402602562827, "grad_norm": 0.150390625, "learning_rate": 0.0005628727525578623, "loss": 0.5497, "step": 59680 }, { "epoch": 2.9646369325519024, "grad_norm": 0.10302734375, "learning_rate": 0.0005628330187742128, "loss": 0.5465, "step": 59690 }, { "epoch": 2.9651336048475216, "grad_norm": 0.1435546875, "learning_rate": 0.0005627932849905633, "loss": 0.5469, "step": 59700 }, { "epoch": 2.965630277143141, "grad_norm": 0.0966796875, "learning_rate": 0.0005627535512069137, "loss": 0.5711, "step": 59710 }, { "epoch": 2.9661269494387605, "grad_norm": 0.2353515625, "learning_rate": 0.0005627138174232641, "loss": 0.5855, "step": 59720 }, { "epoch": 2.9666236217343798, "grad_norm": 0.1611328125, "learning_rate": 0.0005626740836396146, "loss": 0.5443, "step": 59730 }, { "epoch": 2.967120294029999, "grad_norm": 0.1083984375, "learning_rate": 0.0005626343498559651, "loss": 0.593, "step": 59740 }, { "epoch": 2.967616966325618, "grad_norm": 0.126953125, "learning_rate": 0.0005625946160723155, "loss": 0.5584, "step": 59750 }, { "epoch": 2.9681136386212374, "grad_norm": 0.1240234375, "learning_rate": 0.000562554882288666, "loss": 0.5535, "step": 59760 }, { "epoch": 2.968610310916857, "grad_norm": 0.09765625, "learning_rate": 0.0005625151485050164, "loss": 0.5354, "step": 59770 }, { "epoch": 2.9691069832124763, "grad_norm": 0.1064453125, "learning_rate": 0.0005624754147213668, "loss": 0.5449, "step": 59780 }, { "epoch": 2.969603655508096, "grad_norm": 0.09521484375, "learning_rate": 0.0005624356809377174, "loss": 0.5572, "step": 59790 }, { "epoch": 2.9701003278037152, "grad_norm": 0.1494140625, "learning_rate": 0.0005623959471540678, "loss": 0.5555, "step": 59800 }, { "epoch": 2.9705970000993345, "grad_norm": 0.10546875, "learning_rate": 0.0005623562133704182, "loss": 0.5341, "step": 59810 }, { "epoch": 2.9710936723949537, "grad_norm": 0.1484375, "learning_rate": 0.0005623164795867687, "loss": 0.5561, "step": 59820 }, { "epoch": 2.971590344690573, "grad_norm": 0.1962890625, "learning_rate": 0.0005622767458031191, "loss": 0.5475, "step": 59830 }, { "epoch": 2.9720870169861926, "grad_norm": 0.0947265625, "learning_rate": 0.0005622370120194696, "loss": 0.5521, "step": 59840 }, { "epoch": 2.972583689281812, "grad_norm": 0.1455078125, "learning_rate": 0.00056219727823582, "loss": 0.5514, "step": 59850 }, { "epoch": 2.973080361577431, "grad_norm": 0.10546875, "learning_rate": 0.0005621575444521705, "loss": 0.5566, "step": 59860 }, { "epoch": 2.9735770338730507, "grad_norm": 0.11669921875, "learning_rate": 0.0005621178106685209, "loss": 0.5545, "step": 59870 }, { "epoch": 2.97407370616867, "grad_norm": 0.1181640625, "learning_rate": 0.0005620780768848713, "loss": 0.5559, "step": 59880 }, { "epoch": 2.974570378464289, "grad_norm": 0.1875, "learning_rate": 0.0005620383431012219, "loss": 0.5307, "step": 59890 }, { "epoch": 2.9750670507599084, "grad_norm": 0.1220703125, "learning_rate": 0.0005619986093175724, "loss": 0.5339, "step": 59900 }, { "epoch": 2.975563723055528, "grad_norm": 0.11669921875, "learning_rate": 0.0005619588755339227, "loss": 0.5683, "step": 59910 }, { "epoch": 2.9760603953511473, "grad_norm": 0.09912109375, "learning_rate": 0.0005619191417502732, "loss": 0.5408, "step": 59920 }, { "epoch": 2.9765570676467665, "grad_norm": 0.11474609375, "learning_rate": 0.0005618794079666236, "loss": 0.5533, "step": 59930 }, { "epoch": 2.977053739942386, "grad_norm": 0.1513671875, "learning_rate": 0.000561839674182974, "loss": 0.5609, "step": 59940 }, { "epoch": 2.9775504122380054, "grad_norm": 0.103515625, "learning_rate": 0.0005617999403993246, "loss": 0.5143, "step": 59950 }, { "epoch": 2.9780470845336247, "grad_norm": 0.11376953125, "learning_rate": 0.000561760206615675, "loss": 0.5777, "step": 59960 }, { "epoch": 2.978543756829244, "grad_norm": 0.11328125, "learning_rate": 0.0005617204728320254, "loss": 0.5602, "step": 59970 }, { "epoch": 2.9790404291248636, "grad_norm": 0.099609375, "learning_rate": 0.0005616807390483759, "loss": 0.5576, "step": 59980 }, { "epoch": 2.979537101420483, "grad_norm": 0.0986328125, "learning_rate": 0.0005616410052647264, "loss": 0.5405, "step": 59990 }, { "epoch": 2.980033773716102, "grad_norm": 0.1171875, "learning_rate": 0.0005616012714810768, "loss": 0.5464, "step": 60000 }, { "epoch": 2.9805304460117217, "grad_norm": 0.12255859375, "learning_rate": 0.0005615615376974273, "loss": 0.557, "step": 60010 }, { "epoch": 2.981027118307341, "grad_norm": 0.0986328125, "learning_rate": 0.0005615218039137777, "loss": 0.5384, "step": 60020 }, { "epoch": 2.98152379060296, "grad_norm": 0.09814453125, "learning_rate": 0.0005614820701301282, "loss": 0.5597, "step": 60030 }, { "epoch": 2.9820204628985794, "grad_norm": 0.1298828125, "learning_rate": 0.0005614423363464786, "loss": 0.5859, "step": 60040 }, { "epoch": 2.982517135194199, "grad_norm": 0.11767578125, "learning_rate": 0.0005614026025628291, "loss": 0.5451, "step": 60050 }, { "epoch": 2.9830138074898183, "grad_norm": 0.1591796875, "learning_rate": 0.0005613628687791796, "loss": 0.5357, "step": 60060 }, { "epoch": 2.9835104797854375, "grad_norm": 0.1533203125, "learning_rate": 0.0005613231349955299, "loss": 0.5503, "step": 60070 }, { "epoch": 2.984007152081057, "grad_norm": 0.10302734375, "learning_rate": 0.0005612834012118804, "loss": 0.5272, "step": 60080 }, { "epoch": 2.9845038243766764, "grad_norm": 0.10986328125, "learning_rate": 0.000561243667428231, "loss": 0.5474, "step": 60090 }, { "epoch": 2.9850004966722956, "grad_norm": 0.11767578125, "learning_rate": 0.0005612039336445813, "loss": 0.5673, "step": 60100 }, { "epoch": 2.985497168967915, "grad_norm": 0.10400390625, "learning_rate": 0.0005611641998609318, "loss": 0.5401, "step": 60110 }, { "epoch": 2.985993841263534, "grad_norm": 0.1015625, "learning_rate": 0.0005611244660772822, "loss": 0.5523, "step": 60120 }, { "epoch": 2.9864905135591537, "grad_norm": 0.10107421875, "learning_rate": 0.0005610847322936326, "loss": 0.5665, "step": 60130 }, { "epoch": 2.986987185854773, "grad_norm": 0.146484375, "learning_rate": 0.0005610449985099832, "loss": 0.517, "step": 60140 }, { "epoch": 2.9874838581503926, "grad_norm": 0.095703125, "learning_rate": 0.0005610052647263336, "loss": 0.552, "step": 60150 }, { "epoch": 2.987980530446012, "grad_norm": 0.09521484375, "learning_rate": 0.000560965530942684, "loss": 0.5065, "step": 60160 }, { "epoch": 2.988477202741631, "grad_norm": 0.10546875, "learning_rate": 0.0005609257971590345, "loss": 0.542, "step": 60170 }, { "epoch": 2.9889738750372503, "grad_norm": 0.1669921875, "learning_rate": 0.0005608860633753849, "loss": 0.5447, "step": 60180 }, { "epoch": 2.9894705473328695, "grad_norm": 0.1142578125, "learning_rate": 0.0005608463295917355, "loss": 0.5466, "step": 60190 }, { "epoch": 2.989967219628489, "grad_norm": 0.11962890625, "learning_rate": 0.0005608065958080859, "loss": 0.5523, "step": 60200 }, { "epoch": 2.9904638919241084, "grad_norm": 0.099609375, "learning_rate": 0.0005607668620244363, "loss": 0.5566, "step": 60210 }, { "epoch": 2.9909605642197277, "grad_norm": 0.08740234375, "learning_rate": 0.0005607271282407868, "loss": 0.5332, "step": 60220 }, { "epoch": 2.9914572365153473, "grad_norm": 0.09375, "learning_rate": 0.0005606873944571372, "loss": 0.5049, "step": 60230 }, { "epoch": 2.9919539088109666, "grad_norm": 0.123046875, "learning_rate": 0.0005606476606734877, "loss": 0.5662, "step": 60240 }, { "epoch": 2.992450581106586, "grad_norm": 0.115234375, "learning_rate": 0.0005606079268898382, "loss": 0.5306, "step": 60250 }, { "epoch": 2.992947253402205, "grad_norm": 0.1171875, "learning_rate": 0.0005605681931061885, "loss": 0.5561, "step": 60260 }, { "epoch": 2.9934439256978247, "grad_norm": 0.1025390625, "learning_rate": 0.000560528459322539, "loss": 0.5283, "step": 60270 }, { "epoch": 2.993940597993444, "grad_norm": 0.1591796875, "learning_rate": 0.0005604887255388895, "loss": 0.5596, "step": 60280 }, { "epoch": 2.994437270289063, "grad_norm": 0.0986328125, "learning_rate": 0.0005604489917552398, "loss": 0.5754, "step": 60290 }, { "epoch": 2.994933942584683, "grad_norm": 0.1201171875, "learning_rate": 0.0005604092579715904, "loss": 0.5628, "step": 60300 }, { "epoch": 2.995430614880302, "grad_norm": 0.134765625, "learning_rate": 0.0005603695241879408, "loss": 0.5484, "step": 60310 }, { "epoch": 2.9959272871759213, "grad_norm": 0.12890625, "learning_rate": 0.0005603297904042912, "loss": 0.5422, "step": 60320 }, { "epoch": 2.9964239594715405, "grad_norm": 0.111328125, "learning_rate": 0.0005602900566206417, "loss": 0.5361, "step": 60330 }, { "epoch": 2.99692063176716, "grad_norm": 0.130859375, "learning_rate": 0.0005602503228369921, "loss": 0.5161, "step": 60340 }, { "epoch": 2.9974173040627794, "grad_norm": 0.109375, "learning_rate": 0.0005602105890533427, "loss": 0.5492, "step": 60350 }, { "epoch": 2.9979139763583986, "grad_norm": 0.10107421875, "learning_rate": 0.0005601708552696931, "loss": 0.5375, "step": 60360 }, { "epoch": 2.9984106486540183, "grad_norm": 0.099609375, "learning_rate": 0.0005601311214860435, "loss": 0.5327, "step": 60370 }, { "epoch": 2.9989073209496375, "grad_norm": 0.17578125, "learning_rate": 0.000560091387702394, "loss": 0.5367, "step": 60380 }, { "epoch": 2.9994039932452567, "grad_norm": 0.1103515625, "learning_rate": 0.0005600516539187445, "loss": 0.5471, "step": 60390 }, { "epoch": 2.999900665540876, "grad_norm": 0.1064453125, "learning_rate": 0.0005600119201350949, "loss": 0.5716, "step": 60400 }, { "epoch": 3.0003973378364956, "grad_norm": 0.1328125, "learning_rate": 0.0005599721863514454, "loss": 0.5505, "step": 60410 }, { "epoch": 3.000894010132115, "grad_norm": 0.111328125, "learning_rate": 0.0005599324525677958, "loss": 0.5274, "step": 60420 }, { "epoch": 3.001390682427734, "grad_norm": 0.11181640625, "learning_rate": 0.0005598927187841462, "loss": 0.5332, "step": 60430 }, { "epoch": 3.0018873547233533, "grad_norm": 0.08984375, "learning_rate": 0.0005598529850004968, "loss": 0.5394, "step": 60440 }, { "epoch": 3.002384027018973, "grad_norm": 0.0908203125, "learning_rate": 0.0005598132512168471, "loss": 0.5643, "step": 60450 }, { "epoch": 3.0028806993145922, "grad_norm": 0.09130859375, "learning_rate": 0.0005597735174331976, "loss": 0.5487, "step": 60460 }, { "epoch": 3.0033773716102115, "grad_norm": 0.1015625, "learning_rate": 0.0005597337836495481, "loss": 0.5516, "step": 60470 }, { "epoch": 3.003874043905831, "grad_norm": 0.1064453125, "learning_rate": 0.0005596940498658985, "loss": 0.5343, "step": 60480 }, { "epoch": 3.0043707162014504, "grad_norm": 0.1064453125, "learning_rate": 0.000559654316082249, "loss": 0.5349, "step": 60490 }, { "epoch": 3.0048673884970696, "grad_norm": 0.09619140625, "learning_rate": 0.0005596145822985995, "loss": 0.5361, "step": 60500 }, { "epoch": 3.005364060792689, "grad_norm": 0.09375, "learning_rate": 0.0005595748485149499, "loss": 0.5158, "step": 60510 }, { "epoch": 3.0058607330883085, "grad_norm": 0.109375, "learning_rate": 0.0005595351147313003, "loss": 0.5549, "step": 60520 }, { "epoch": 3.0063574053839277, "grad_norm": 0.10009765625, "learning_rate": 0.0005594953809476507, "loss": 0.5027, "step": 60530 }, { "epoch": 3.006854077679547, "grad_norm": 0.10498046875, "learning_rate": 0.0005594556471640013, "loss": 0.5512, "step": 60540 }, { "epoch": 3.0073507499751666, "grad_norm": 0.11328125, "learning_rate": 0.0005594159133803517, "loss": 0.5563, "step": 60550 }, { "epoch": 3.007847422270786, "grad_norm": 0.10546875, "learning_rate": 0.0005593761795967021, "loss": 0.551, "step": 60560 }, { "epoch": 3.008344094566405, "grad_norm": 0.09619140625, "learning_rate": 0.0005593364458130526, "loss": 0.5456, "step": 60570 }, { "epoch": 3.0088407668620243, "grad_norm": 0.162109375, "learning_rate": 0.000559296712029403, "loss": 0.5606, "step": 60580 }, { "epoch": 3.009337439157644, "grad_norm": 0.1201171875, "learning_rate": 0.0005592569782457534, "loss": 0.5347, "step": 60590 }, { "epoch": 3.009834111453263, "grad_norm": 0.138671875, "learning_rate": 0.000559217244462104, "loss": 0.5382, "step": 60600 }, { "epoch": 3.0103307837488824, "grad_norm": 0.140625, "learning_rate": 0.0005591775106784544, "loss": 0.5571, "step": 60610 }, { "epoch": 3.0108274560445016, "grad_norm": 0.1044921875, "learning_rate": 0.0005591377768948048, "loss": 0.5239, "step": 60620 }, { "epoch": 3.0113241283401213, "grad_norm": 0.1728515625, "learning_rate": 0.0005590980431111553, "loss": 0.5453, "step": 60630 }, { "epoch": 3.0118208006357405, "grad_norm": 0.09765625, "learning_rate": 0.0005590583093275057, "loss": 0.5146, "step": 60640 }, { "epoch": 3.0123174729313598, "grad_norm": 0.11572265625, "learning_rate": 0.0005590185755438562, "loss": 0.506, "step": 60650 }, { "epoch": 3.0128141452269794, "grad_norm": 0.09033203125, "learning_rate": 0.0005589788417602067, "loss": 0.5441, "step": 60660 }, { "epoch": 3.0133108175225987, "grad_norm": 0.12255859375, "learning_rate": 0.0005589391079765571, "loss": 0.5373, "step": 60670 }, { "epoch": 3.013807489818218, "grad_norm": 0.169921875, "learning_rate": 0.0005588993741929075, "loss": 0.5262, "step": 60680 }, { "epoch": 3.014304162113837, "grad_norm": 0.10009765625, "learning_rate": 0.000558859640409258, "loss": 0.5653, "step": 60690 }, { "epoch": 3.014800834409457, "grad_norm": 0.1064453125, "learning_rate": 0.0005588199066256085, "loss": 0.5368, "step": 60700 }, { "epoch": 3.015297506705076, "grad_norm": 0.1484375, "learning_rate": 0.0005587801728419589, "loss": 0.5373, "step": 60710 }, { "epoch": 3.0157941790006952, "grad_norm": 0.1337890625, "learning_rate": 0.0005587404390583093, "loss": 0.5432, "step": 60720 }, { "epoch": 3.016290851296315, "grad_norm": 0.1259765625, "learning_rate": 0.0005587007052746598, "loss": 0.5181, "step": 60730 }, { "epoch": 3.016787523591934, "grad_norm": 0.2177734375, "learning_rate": 0.0005586609714910102, "loss": 0.5347, "step": 60740 }, { "epoch": 3.0172841958875534, "grad_norm": 0.09814453125, "learning_rate": 0.0005586212377073607, "loss": 0.5342, "step": 60750 }, { "epoch": 3.0177808681831726, "grad_norm": 0.10009765625, "learning_rate": 0.0005585815039237112, "loss": 0.5126, "step": 60760 }, { "epoch": 3.0182775404787923, "grad_norm": 0.12451171875, "learning_rate": 0.0005585417701400616, "loss": 0.5561, "step": 60770 }, { "epoch": 3.0187742127744115, "grad_norm": 0.1083984375, "learning_rate": 0.000558502036356412, "loss": 0.5455, "step": 60780 }, { "epoch": 3.0192708850700307, "grad_norm": 0.1103515625, "learning_rate": 0.0005584623025727625, "loss": 0.5802, "step": 60790 }, { "epoch": 3.01976755736565, "grad_norm": 0.1328125, "learning_rate": 0.000558422568789113, "loss": 0.5601, "step": 60800 }, { "epoch": 3.0202642296612696, "grad_norm": 0.169921875, "learning_rate": 0.0005583828350054634, "loss": 0.5845, "step": 60810 }, { "epoch": 3.020760901956889, "grad_norm": 0.095703125, "learning_rate": 0.0005583431012218139, "loss": 0.5392, "step": 60820 }, { "epoch": 3.021257574252508, "grad_norm": 0.09814453125, "learning_rate": 0.0005583033674381643, "loss": 0.5334, "step": 60830 }, { "epoch": 3.0217542465481277, "grad_norm": 0.09912109375, "learning_rate": 0.0005582636336545147, "loss": 0.5474, "step": 60840 }, { "epoch": 3.022250918843747, "grad_norm": 0.09814453125, "learning_rate": 0.0005582238998708653, "loss": 0.5282, "step": 60850 }, { "epoch": 3.022747591139366, "grad_norm": 0.10498046875, "learning_rate": 0.0005581841660872157, "loss": 0.547, "step": 60860 }, { "epoch": 3.0232442634349854, "grad_norm": 0.1376953125, "learning_rate": 0.0005581444323035661, "loss": 0.5394, "step": 60870 }, { "epoch": 3.023740935730605, "grad_norm": 0.09716796875, "learning_rate": 0.0005581046985199166, "loss": 0.5465, "step": 60880 }, { "epoch": 3.0242376080262243, "grad_norm": 0.10595703125, "learning_rate": 0.000558064964736267, "loss": 0.5242, "step": 60890 }, { "epoch": 3.0247342803218435, "grad_norm": 0.09619140625, "learning_rate": 0.0005580252309526175, "loss": 0.5338, "step": 60900 }, { "epoch": 3.0252309526174628, "grad_norm": 0.09375, "learning_rate": 0.0005579854971689679, "loss": 0.5479, "step": 60910 }, { "epoch": 3.0257276249130824, "grad_norm": 0.10595703125, "learning_rate": 0.0005579457633853184, "loss": 0.5434, "step": 60920 }, { "epoch": 3.0262242972087017, "grad_norm": 0.11962890625, "learning_rate": 0.0005579060296016689, "loss": 0.5436, "step": 60930 }, { "epoch": 3.026720969504321, "grad_norm": 0.109375, "learning_rate": 0.0005578662958180192, "loss": 0.5053, "step": 60940 }, { "epoch": 3.0272176417999406, "grad_norm": 0.10107421875, "learning_rate": 0.0005578265620343698, "loss": 0.52, "step": 60950 }, { "epoch": 3.02771431409556, "grad_norm": 0.1015625, "learning_rate": 0.0005577868282507203, "loss": 0.5536, "step": 60960 }, { "epoch": 3.028210986391179, "grad_norm": 0.10546875, "learning_rate": 0.0005577470944670706, "loss": 0.54, "step": 60970 }, { "epoch": 3.0287076586867983, "grad_norm": 0.134765625, "learning_rate": 0.0005577073606834211, "loss": 0.5429, "step": 60980 }, { "epoch": 3.029204330982418, "grad_norm": 0.08935546875, "learning_rate": 0.0005576676268997715, "loss": 0.4954, "step": 60990 }, { "epoch": 3.029701003278037, "grad_norm": 0.1044921875, "learning_rate": 0.000557627893116122, "loss": 0.5265, "step": 61000 }, { "epoch": 3.0301976755736564, "grad_norm": 0.10888671875, "learning_rate": 0.0005575881593324725, "loss": 0.554, "step": 61010 }, { "epoch": 3.030694347869276, "grad_norm": 0.1123046875, "learning_rate": 0.0005575484255488229, "loss": 0.595, "step": 61020 }, { "epoch": 3.0311910201648953, "grad_norm": 0.14453125, "learning_rate": 0.0005575086917651733, "loss": 0.5713, "step": 61030 }, { "epoch": 3.0316876924605145, "grad_norm": 0.09521484375, "learning_rate": 0.0005574689579815238, "loss": 0.5409, "step": 61040 }, { "epoch": 3.0321843647561337, "grad_norm": 0.1064453125, "learning_rate": 0.0005574292241978743, "loss": 0.529, "step": 61050 }, { "epoch": 3.0326810370517534, "grad_norm": 0.109375, "learning_rate": 0.0005573894904142247, "loss": 0.5127, "step": 61060 }, { "epoch": 3.0331777093473726, "grad_norm": 0.10107421875, "learning_rate": 0.0005573497566305752, "loss": 0.5463, "step": 61070 }, { "epoch": 3.033674381642992, "grad_norm": 0.11572265625, "learning_rate": 0.0005573100228469256, "loss": 0.5276, "step": 61080 }, { "epoch": 3.0341710539386115, "grad_norm": 0.125, "learning_rate": 0.0005572702890632761, "loss": 0.5473, "step": 61090 }, { "epoch": 3.0346677262342308, "grad_norm": 0.0888671875, "learning_rate": 0.0005572305552796266, "loss": 0.5491, "step": 61100 }, { "epoch": 3.03516439852985, "grad_norm": 0.126953125, "learning_rate": 0.000557190821495977, "loss": 0.5376, "step": 61110 }, { "epoch": 3.035661070825469, "grad_norm": 0.10302734375, "learning_rate": 0.0005571510877123275, "loss": 0.5374, "step": 61120 }, { "epoch": 3.036157743121089, "grad_norm": 0.09619140625, "learning_rate": 0.0005571113539286778, "loss": 0.549, "step": 61130 }, { "epoch": 3.036654415416708, "grad_norm": 0.14453125, "learning_rate": 0.0005570716201450283, "loss": 0.5694, "step": 61140 }, { "epoch": 3.0371510877123273, "grad_norm": 0.16015625, "learning_rate": 0.0005570318863613789, "loss": 0.5125, "step": 61150 }, { "epoch": 3.0376477600079466, "grad_norm": 0.10595703125, "learning_rate": 0.0005569921525777292, "loss": 0.5519, "step": 61160 }, { "epoch": 3.0381444323035662, "grad_norm": 0.1416015625, "learning_rate": 0.0005569524187940797, "loss": 0.5451, "step": 61170 }, { "epoch": 3.0386411045991855, "grad_norm": 0.1298828125, "learning_rate": 0.0005569126850104301, "loss": 0.5599, "step": 61180 }, { "epoch": 3.0391377768948047, "grad_norm": 0.1357421875, "learning_rate": 0.0005568729512267805, "loss": 0.4976, "step": 61190 }, { "epoch": 3.0396344491904244, "grad_norm": 0.0986328125, "learning_rate": 0.0005568332174431311, "loss": 0.5451, "step": 61200 }, { "epoch": 3.0401311214860436, "grad_norm": 0.10498046875, "learning_rate": 0.0005567934836594815, "loss": 0.5469, "step": 61210 }, { "epoch": 3.040627793781663, "grad_norm": 0.126953125, "learning_rate": 0.0005567537498758319, "loss": 0.5476, "step": 61220 }, { "epoch": 3.041124466077282, "grad_norm": 0.1142578125, "learning_rate": 0.0005567140160921824, "loss": 0.5501, "step": 61230 }, { "epoch": 3.0416211383729017, "grad_norm": 0.09326171875, "learning_rate": 0.0005566742823085328, "loss": 0.5679, "step": 61240 }, { "epoch": 3.042117810668521, "grad_norm": 0.10595703125, "learning_rate": 0.0005566345485248834, "loss": 0.5431, "step": 61250 }, { "epoch": 3.04261448296414, "grad_norm": 0.1494140625, "learning_rate": 0.0005565948147412338, "loss": 0.5192, "step": 61260 }, { "epoch": 3.0431111552597594, "grad_norm": 0.11083984375, "learning_rate": 0.0005565550809575842, "loss": 0.5309, "step": 61270 }, { "epoch": 3.043607827555379, "grad_norm": 0.091796875, "learning_rate": 0.0005565153471739347, "loss": 0.5166, "step": 61280 }, { "epoch": 3.0441044998509983, "grad_norm": 0.1474609375, "learning_rate": 0.0005564756133902851, "loss": 0.5208, "step": 61290 }, { "epoch": 3.0446011721466175, "grad_norm": 0.130859375, "learning_rate": 0.0005564358796066356, "loss": 0.5551, "step": 61300 }, { "epoch": 3.045097844442237, "grad_norm": 0.11376953125, "learning_rate": 0.0005563961458229861, "loss": 0.5407, "step": 61310 }, { "epoch": 3.0455945167378564, "grad_norm": 0.1455078125, "learning_rate": 0.0005563564120393364, "loss": 0.5215, "step": 61320 }, { "epoch": 3.0460911890334756, "grad_norm": 0.1416015625, "learning_rate": 0.0005563166782556869, "loss": 0.5664, "step": 61330 }, { "epoch": 3.046587861329095, "grad_norm": 0.0986328125, "learning_rate": 0.0005562769444720374, "loss": 0.5375, "step": 61340 }, { "epoch": 3.0470845336247145, "grad_norm": 0.09765625, "learning_rate": 0.0005562372106883878, "loss": 0.5631, "step": 61350 }, { "epoch": 3.0475812059203338, "grad_norm": 0.099609375, "learning_rate": 0.0005561974769047383, "loss": 0.5521, "step": 61360 }, { "epoch": 3.048077878215953, "grad_norm": 0.1259765625, "learning_rate": 0.0005561577431210888, "loss": 0.5416, "step": 61370 }, { "epoch": 3.0485745505115727, "grad_norm": 0.16015625, "learning_rate": 0.0005561180093374392, "loss": 0.5347, "step": 61380 }, { "epoch": 3.049071222807192, "grad_norm": 0.10888671875, "learning_rate": 0.0005560782755537896, "loss": 0.5525, "step": 61390 }, { "epoch": 3.049567895102811, "grad_norm": 0.1337890625, "learning_rate": 0.00055603854177014, "loss": 0.5232, "step": 61400 }, { "epoch": 3.0500645673984303, "grad_norm": 0.1259765625, "learning_rate": 0.0005559988079864906, "loss": 0.5682, "step": 61410 }, { "epoch": 3.05056123969405, "grad_norm": 0.0966796875, "learning_rate": 0.000555959074202841, "loss": 0.5561, "step": 61420 }, { "epoch": 3.0510579119896692, "grad_norm": 0.09375, "learning_rate": 0.0005559193404191914, "loss": 0.547, "step": 61430 }, { "epoch": 3.0515545842852885, "grad_norm": 0.11083984375, "learning_rate": 0.0005558796066355419, "loss": 0.5549, "step": 61440 }, { "epoch": 3.0520512565809077, "grad_norm": 0.1181640625, "learning_rate": 0.0005558398728518924, "loss": 0.534, "step": 61450 }, { "epoch": 3.0525479288765274, "grad_norm": 0.10107421875, "learning_rate": 0.0005558001390682428, "loss": 0.5269, "step": 61460 }, { "epoch": 3.0530446011721466, "grad_norm": 0.107421875, "learning_rate": 0.0005557604052845933, "loss": 0.554, "step": 61470 }, { "epoch": 3.053541273467766, "grad_norm": 0.10498046875, "learning_rate": 0.0005557206715009437, "loss": 0.5262, "step": 61480 }, { "epoch": 3.0540379457633855, "grad_norm": 0.12451171875, "learning_rate": 0.0005556809377172941, "loss": 0.5439, "step": 61490 }, { "epoch": 3.0545346180590047, "grad_norm": 0.1533203125, "learning_rate": 0.0005556412039336447, "loss": 0.5188, "step": 61500 }, { "epoch": 3.055031290354624, "grad_norm": 0.10107421875, "learning_rate": 0.000555601470149995, "loss": 0.5862, "step": 61510 }, { "epoch": 3.055527962650243, "grad_norm": 0.0927734375, "learning_rate": 0.0005555617363663455, "loss": 0.5218, "step": 61520 }, { "epoch": 3.056024634945863, "grad_norm": 0.09326171875, "learning_rate": 0.000555522002582696, "loss": 0.5514, "step": 61530 }, { "epoch": 3.056521307241482, "grad_norm": 0.177734375, "learning_rate": 0.0005554822687990464, "loss": 0.5412, "step": 61540 }, { "epoch": 3.0570179795371013, "grad_norm": 0.11279296875, "learning_rate": 0.0005554425350153969, "loss": 0.5377, "step": 61550 }, { "epoch": 3.057514651832721, "grad_norm": 0.1162109375, "learning_rate": 0.0005554028012317474, "loss": 0.519, "step": 61560 }, { "epoch": 3.05801132412834, "grad_norm": 0.1376953125, "learning_rate": 0.0005553630674480978, "loss": 0.5234, "step": 61570 }, { "epoch": 3.0585079964239594, "grad_norm": 0.115234375, "learning_rate": 0.0005553233336644482, "loss": 0.5131, "step": 61580 }, { "epoch": 3.0590046687195787, "grad_norm": 0.138671875, "learning_rate": 0.0005552835998807986, "loss": 0.5561, "step": 61590 }, { "epoch": 3.0595013410151983, "grad_norm": 0.1474609375, "learning_rate": 0.0005552438660971492, "loss": 0.5365, "step": 61600 }, { "epoch": 3.0599980133108176, "grad_norm": 0.109375, "learning_rate": 0.0005552041323134996, "loss": 0.5246, "step": 61610 }, { "epoch": 3.060494685606437, "grad_norm": 0.142578125, "learning_rate": 0.00055516439852985, "loss": 0.5475, "step": 61620 }, { "epoch": 3.060991357902056, "grad_norm": 0.11328125, "learning_rate": 0.0005551246647462005, "loss": 0.5341, "step": 61630 }, { "epoch": 3.0614880301976757, "grad_norm": 0.10595703125, "learning_rate": 0.0005550849309625509, "loss": 0.529, "step": 61640 }, { "epoch": 3.061984702493295, "grad_norm": 0.10205078125, "learning_rate": 0.0005550451971789013, "loss": 0.5492, "step": 61650 }, { "epoch": 3.062481374788914, "grad_norm": 0.119140625, "learning_rate": 0.0005550054633952519, "loss": 0.5384, "step": 61660 }, { "epoch": 3.062978047084534, "grad_norm": 0.099609375, "learning_rate": 0.0005549657296116023, "loss": 0.5352, "step": 61670 }, { "epoch": 3.063474719380153, "grad_norm": 0.1103515625, "learning_rate": 0.0005549259958279527, "loss": 0.5321, "step": 61680 }, { "epoch": 3.0639713916757723, "grad_norm": 0.12255859375, "learning_rate": 0.0005548862620443032, "loss": 0.5378, "step": 61690 }, { "epoch": 3.0644680639713915, "grad_norm": 0.10595703125, "learning_rate": 0.0005548465282606537, "loss": 0.5709, "step": 61700 }, { "epoch": 3.064964736267011, "grad_norm": 0.11669921875, "learning_rate": 0.0005548067944770041, "loss": 0.5592, "step": 61710 }, { "epoch": 3.0654614085626304, "grad_norm": 0.1435546875, "learning_rate": 0.0005547670606933546, "loss": 0.5601, "step": 61720 }, { "epoch": 3.0659580808582496, "grad_norm": 0.1005859375, "learning_rate": 0.000554727326909705, "loss": 0.5452, "step": 61730 }, { "epoch": 3.0664547531538693, "grad_norm": 0.11279296875, "learning_rate": 0.0005546875931260554, "loss": 0.554, "step": 61740 }, { "epoch": 3.0669514254494885, "grad_norm": 0.10693359375, "learning_rate": 0.000554647859342406, "loss": 0.5515, "step": 61750 }, { "epoch": 3.0674480977451077, "grad_norm": 0.11669921875, "learning_rate": 0.0005546081255587564, "loss": 0.537, "step": 61760 }, { "epoch": 3.067944770040727, "grad_norm": 0.12353515625, "learning_rate": 0.0005545683917751068, "loss": 0.5719, "step": 61770 }, { "epoch": 3.0684414423363466, "grad_norm": 0.09228515625, "learning_rate": 0.0005545286579914572, "loss": 0.4947, "step": 61780 }, { "epoch": 3.068938114631966, "grad_norm": 0.103515625, "learning_rate": 0.0005544889242078077, "loss": 0.5461, "step": 61790 }, { "epoch": 3.069434786927585, "grad_norm": 0.1181640625, "learning_rate": 0.0005544491904241581, "loss": 0.5729, "step": 61800 }, { "epoch": 3.0699314592232043, "grad_norm": 0.1552734375, "learning_rate": 0.0005544094566405086, "loss": 0.5313, "step": 61810 }, { "epoch": 3.070428131518824, "grad_norm": 0.11669921875, "learning_rate": 0.0005543697228568591, "loss": 0.5505, "step": 61820 }, { "epoch": 3.070924803814443, "grad_norm": 0.1572265625, "learning_rate": 0.0005543299890732096, "loss": 0.5322, "step": 61830 }, { "epoch": 3.0714214761100624, "grad_norm": 0.12890625, "learning_rate": 0.0005542902552895599, "loss": 0.5548, "step": 61840 }, { "epoch": 3.071918148405682, "grad_norm": 0.1328125, "learning_rate": 0.0005542505215059105, "loss": 0.5299, "step": 61850 }, { "epoch": 3.0724148207013013, "grad_norm": 0.11181640625, "learning_rate": 0.0005542107877222609, "loss": 0.5366, "step": 61860 }, { "epoch": 3.0729114929969206, "grad_norm": 0.12060546875, "learning_rate": 0.0005541710539386113, "loss": 0.5327, "step": 61870 }, { "epoch": 3.07340816529254, "grad_norm": 0.138671875, "learning_rate": 0.0005541313201549618, "loss": 0.5327, "step": 61880 }, { "epoch": 3.0739048375881595, "grad_norm": 0.1220703125, "learning_rate": 0.0005540915863713122, "loss": 0.563, "step": 61890 }, { "epoch": 3.0744015098837787, "grad_norm": 0.09619140625, "learning_rate": 0.0005540518525876626, "loss": 0.5256, "step": 61900 }, { "epoch": 3.074898182179398, "grad_norm": 0.09912109375, "learning_rate": 0.0005540121188040132, "loss": 0.5415, "step": 61910 }, { "epoch": 3.0753948544750176, "grad_norm": 0.123046875, "learning_rate": 0.0005539723850203636, "loss": 0.5438, "step": 61920 }, { "epoch": 3.075891526770637, "grad_norm": 0.1279296875, "learning_rate": 0.000553932651236714, "loss": 0.5567, "step": 61930 }, { "epoch": 3.076388199066256, "grad_norm": 0.09765625, "learning_rate": 0.0005538929174530645, "loss": 0.5373, "step": 61940 }, { "epoch": 3.0768848713618753, "grad_norm": 0.11376953125, "learning_rate": 0.000553853183669415, "loss": 0.5203, "step": 61950 }, { "epoch": 3.077381543657495, "grad_norm": 0.154296875, "learning_rate": 0.0005538134498857654, "loss": 0.5388, "step": 61960 }, { "epoch": 3.077878215953114, "grad_norm": 0.10107421875, "learning_rate": 0.0005537737161021158, "loss": 0.5174, "step": 61970 }, { "epoch": 3.0783748882487334, "grad_norm": 0.109375, "learning_rate": 0.0005537339823184663, "loss": 0.5629, "step": 61980 }, { "epoch": 3.0788715605443526, "grad_norm": 0.11474609375, "learning_rate": 0.0005536942485348168, "loss": 0.5578, "step": 61990 }, { "epoch": 3.0793682328399723, "grad_norm": 0.1103515625, "learning_rate": 0.0005536545147511671, "loss": 0.5396, "step": 62000 }, { "epoch": 3.0798649051355915, "grad_norm": 0.1064453125, "learning_rate": 0.0005536147809675177, "loss": 0.5275, "step": 62010 }, { "epoch": 3.0803615774312108, "grad_norm": 0.109375, "learning_rate": 0.0005535750471838682, "loss": 0.5266, "step": 62020 }, { "epoch": 3.0808582497268304, "grad_norm": 0.1396484375, "learning_rate": 0.0005535353134002185, "loss": 0.5339, "step": 62030 }, { "epoch": 3.0813549220224496, "grad_norm": 0.1279296875, "learning_rate": 0.000553495579616569, "loss": 0.5053, "step": 62040 }, { "epoch": 3.081851594318069, "grad_norm": 0.095703125, "learning_rate": 0.0005534558458329194, "loss": 0.5093, "step": 62050 }, { "epoch": 3.082348266613688, "grad_norm": 0.08935546875, "learning_rate": 0.0005534161120492699, "loss": 0.5779, "step": 62060 }, { "epoch": 3.0828449389093078, "grad_norm": 0.1064453125, "learning_rate": 0.0005533763782656204, "loss": 0.5301, "step": 62070 }, { "epoch": 3.083341611204927, "grad_norm": 0.1533203125, "learning_rate": 0.0005533366444819708, "loss": 0.5422, "step": 62080 }, { "epoch": 3.0838382835005462, "grad_norm": 0.115234375, "learning_rate": 0.0005532969106983212, "loss": 0.5463, "step": 62090 }, { "epoch": 3.084334955796166, "grad_norm": 0.1083984375, "learning_rate": 0.0005532571769146717, "loss": 0.5811, "step": 62100 }, { "epoch": 3.084831628091785, "grad_norm": 0.1171875, "learning_rate": 0.0005532174431310222, "loss": 0.5423, "step": 62110 }, { "epoch": 3.0853283003874044, "grad_norm": 0.12353515625, "learning_rate": 0.0005531777093473726, "loss": 0.5375, "step": 62120 }, { "epoch": 3.0858249726830236, "grad_norm": 0.0986328125, "learning_rate": 0.0005531379755637231, "loss": 0.5421, "step": 62130 }, { "epoch": 3.0863216449786433, "grad_norm": 0.08984375, "learning_rate": 0.0005530982417800735, "loss": 0.5549, "step": 62140 }, { "epoch": 3.0868183172742625, "grad_norm": 0.1005859375, "learning_rate": 0.000553058507996424, "loss": 0.5281, "step": 62150 }, { "epoch": 3.0873149895698817, "grad_norm": 0.1171875, "learning_rate": 0.0005530187742127745, "loss": 0.5695, "step": 62160 }, { "epoch": 3.087811661865501, "grad_norm": 0.1279296875, "learning_rate": 0.0005529790404291249, "loss": 0.5456, "step": 62170 }, { "epoch": 3.0883083341611206, "grad_norm": 0.10400390625, "learning_rate": 0.0005529393066454754, "loss": 0.5652, "step": 62180 }, { "epoch": 3.08880500645674, "grad_norm": 0.1298828125, "learning_rate": 0.0005528995728618257, "loss": 0.5569, "step": 62190 }, { "epoch": 3.089301678752359, "grad_norm": 0.11083984375, "learning_rate": 0.0005528598390781762, "loss": 0.5453, "step": 62200 }, { "epoch": 3.0897983510479787, "grad_norm": 0.09912109375, "learning_rate": 0.0005528201052945268, "loss": 0.5413, "step": 62210 }, { "epoch": 3.090295023343598, "grad_norm": 0.10888671875, "learning_rate": 0.0005527803715108771, "loss": 0.545, "step": 62220 }, { "epoch": 3.090791695639217, "grad_norm": 0.12255859375, "learning_rate": 0.0005527406377272276, "loss": 0.5355, "step": 62230 }, { "epoch": 3.0912883679348364, "grad_norm": 0.10302734375, "learning_rate": 0.000552700903943578, "loss": 0.5564, "step": 62240 }, { "epoch": 3.091785040230456, "grad_norm": 0.11767578125, "learning_rate": 0.0005526611701599284, "loss": 0.5451, "step": 62250 }, { "epoch": 3.0922817125260753, "grad_norm": 0.1298828125, "learning_rate": 0.000552621436376279, "loss": 0.5192, "step": 62260 }, { "epoch": 3.0927783848216945, "grad_norm": 0.1396484375, "learning_rate": 0.0005525817025926294, "loss": 0.521, "step": 62270 }, { "epoch": 3.093275057117314, "grad_norm": 0.119140625, "learning_rate": 0.0005525419688089799, "loss": 0.5656, "step": 62280 }, { "epoch": 3.0937717294129334, "grad_norm": 0.103515625, "learning_rate": 0.0005525022350253303, "loss": 0.5135, "step": 62290 }, { "epoch": 3.0942684017085527, "grad_norm": 0.09912109375, "learning_rate": 0.0005524625012416807, "loss": 0.5452, "step": 62300 }, { "epoch": 3.094765074004172, "grad_norm": 0.10498046875, "learning_rate": 0.0005524227674580313, "loss": 0.5428, "step": 62310 }, { "epoch": 3.0952617462997916, "grad_norm": 0.1259765625, "learning_rate": 0.0005523830336743817, "loss": 0.5487, "step": 62320 }, { "epoch": 3.095758418595411, "grad_norm": 0.1630859375, "learning_rate": 0.0005523432998907321, "loss": 0.5499, "step": 62330 }, { "epoch": 3.09625509089103, "grad_norm": 0.09716796875, "learning_rate": 0.0005523035661070826, "loss": 0.55, "step": 62340 }, { "epoch": 3.0967517631866492, "grad_norm": 0.12158203125, "learning_rate": 0.000552263832323433, "loss": 0.5263, "step": 62350 }, { "epoch": 3.097248435482269, "grad_norm": 0.15625, "learning_rate": 0.0005522240985397835, "loss": 0.5364, "step": 62360 }, { "epoch": 3.097745107777888, "grad_norm": 0.11181640625, "learning_rate": 0.000552184364756134, "loss": 0.5405, "step": 62370 }, { "epoch": 3.0982417800735074, "grad_norm": 0.12353515625, "learning_rate": 0.0005521446309724843, "loss": 0.5298, "step": 62380 }, { "epoch": 3.098738452369127, "grad_norm": 0.11376953125, "learning_rate": 0.0005521048971888348, "loss": 0.5596, "step": 62390 }, { "epoch": 3.0992351246647463, "grad_norm": 0.095703125, "learning_rate": 0.0005520651634051853, "loss": 0.5401, "step": 62400 }, { "epoch": 3.0997317969603655, "grad_norm": 0.1015625, "learning_rate": 0.0005520254296215357, "loss": 0.5401, "step": 62410 }, { "epoch": 3.1002284692559847, "grad_norm": 0.1259765625, "learning_rate": 0.0005519856958378862, "loss": 0.5443, "step": 62420 }, { "epoch": 3.1007251415516044, "grad_norm": 0.11181640625, "learning_rate": 0.0005519459620542367, "loss": 0.538, "step": 62430 }, { "epoch": 3.1012218138472236, "grad_norm": 0.09814453125, "learning_rate": 0.0005519062282705871, "loss": 0.524, "step": 62440 }, { "epoch": 3.101718486142843, "grad_norm": 0.15234375, "learning_rate": 0.0005518664944869375, "loss": 0.5423, "step": 62450 }, { "epoch": 3.1022151584384625, "grad_norm": 0.1298828125, "learning_rate": 0.000551826760703288, "loss": 0.5312, "step": 62460 }, { "epoch": 3.1027118307340817, "grad_norm": 0.1376953125, "learning_rate": 0.0005517870269196385, "loss": 0.5245, "step": 62470 }, { "epoch": 3.103208503029701, "grad_norm": 0.12158203125, "learning_rate": 0.0005517472931359889, "loss": 0.5601, "step": 62480 }, { "epoch": 3.10370517532532, "grad_norm": 0.10693359375, "learning_rate": 0.0005517075593523393, "loss": 0.5267, "step": 62490 }, { "epoch": 3.10420184762094, "grad_norm": 0.0947265625, "learning_rate": 0.0005516678255686898, "loss": 0.569, "step": 62500 }, { "epoch": 3.104698519916559, "grad_norm": 0.12060546875, "learning_rate": 0.0005516280917850403, "loss": 0.5465, "step": 62510 }, { "epoch": 3.1051951922121783, "grad_norm": 0.08642578125, "learning_rate": 0.0005515883580013907, "loss": 0.5337, "step": 62520 }, { "epoch": 3.1056918645077976, "grad_norm": 0.15625, "learning_rate": 0.0005515486242177412, "loss": 0.5241, "step": 62530 }, { "epoch": 3.106188536803417, "grad_norm": 0.1826171875, "learning_rate": 0.0005515088904340916, "loss": 0.5715, "step": 62540 }, { "epoch": 3.1066852090990364, "grad_norm": 0.12890625, "learning_rate": 0.000551469156650442, "loss": 0.5474, "step": 62550 }, { "epoch": 3.1071818813946557, "grad_norm": 0.111328125, "learning_rate": 0.0005514294228667926, "loss": 0.5265, "step": 62560 }, { "epoch": 3.1076785536902753, "grad_norm": 0.0966796875, "learning_rate": 0.000551389689083143, "loss": 0.5357, "step": 62570 }, { "epoch": 3.1081752259858946, "grad_norm": 0.0888671875, "learning_rate": 0.0005513499552994934, "loss": 0.5471, "step": 62580 }, { "epoch": 3.108671898281514, "grad_norm": 0.1240234375, "learning_rate": 0.0005513102215158439, "loss": 0.5478, "step": 62590 }, { "epoch": 3.109168570577133, "grad_norm": 0.11279296875, "learning_rate": 0.0005512704877321943, "loss": 0.5785, "step": 62600 }, { "epoch": 3.1096652428727527, "grad_norm": 0.109375, "learning_rate": 0.0005512307539485448, "loss": 0.5711, "step": 62610 }, { "epoch": 3.110161915168372, "grad_norm": 0.12158203125, "learning_rate": 0.0005511910201648953, "loss": 0.533, "step": 62620 }, { "epoch": 3.110658587463991, "grad_norm": 0.09375, "learning_rate": 0.0005511512863812457, "loss": 0.5451, "step": 62630 }, { "epoch": 3.1111552597596104, "grad_norm": 0.109375, "learning_rate": 0.0005511115525975961, "loss": 0.544, "step": 62640 }, { "epoch": 3.11165193205523, "grad_norm": 0.10546875, "learning_rate": 0.0005510718188139465, "loss": 0.529, "step": 62650 }, { "epoch": 3.1121486043508493, "grad_norm": 0.09912109375, "learning_rate": 0.0005510320850302971, "loss": 0.5538, "step": 62660 }, { "epoch": 3.1126452766464685, "grad_norm": 0.103515625, "learning_rate": 0.0005509923512466475, "loss": 0.5443, "step": 62670 }, { "epoch": 3.113141948942088, "grad_norm": 0.1650390625, "learning_rate": 0.0005509526174629979, "loss": 0.5412, "step": 62680 }, { "epoch": 3.1136386212377074, "grad_norm": 0.10546875, "learning_rate": 0.0005509128836793484, "loss": 0.5474, "step": 62690 }, { "epoch": 3.1141352935333266, "grad_norm": 0.10888671875, "learning_rate": 0.0005508731498956988, "loss": 0.5529, "step": 62700 }, { "epoch": 3.114631965828946, "grad_norm": 0.119140625, "learning_rate": 0.0005508334161120493, "loss": 0.5555, "step": 62710 }, { "epoch": 3.1151286381245655, "grad_norm": 0.17578125, "learning_rate": 0.0005507936823283998, "loss": 0.5342, "step": 62720 }, { "epoch": 3.1156253104201848, "grad_norm": 0.08935546875, "learning_rate": 0.0005507539485447502, "loss": 0.5442, "step": 62730 }, { "epoch": 3.116121982715804, "grad_norm": 0.10205078125, "learning_rate": 0.0005507142147611006, "loss": 0.5198, "step": 62740 }, { "epoch": 3.1166186550114237, "grad_norm": 0.162109375, "learning_rate": 0.0005506744809774511, "loss": 0.5384, "step": 62750 }, { "epoch": 3.117115327307043, "grad_norm": 0.10546875, "learning_rate": 0.0005506347471938016, "loss": 0.5622, "step": 62760 }, { "epoch": 3.117611999602662, "grad_norm": 0.1572265625, "learning_rate": 0.000550595013410152, "loss": 0.5312, "step": 62770 }, { "epoch": 3.1181086718982813, "grad_norm": 0.1337890625, "learning_rate": 0.0005505552796265025, "loss": 0.5747, "step": 62780 }, { "epoch": 3.118605344193901, "grad_norm": 0.10888671875, "learning_rate": 0.0005505155458428529, "loss": 0.5556, "step": 62790 }, { "epoch": 3.1191020164895202, "grad_norm": 0.0966796875, "learning_rate": 0.0005504758120592033, "loss": 0.5464, "step": 62800 }, { "epoch": 3.1195986887851395, "grad_norm": 0.1162109375, "learning_rate": 0.0005504360782755539, "loss": 0.5591, "step": 62810 }, { "epoch": 3.120095361080759, "grad_norm": 0.095703125, "learning_rate": 0.0005503963444919043, "loss": 0.5534, "step": 62820 }, { "epoch": 3.1205920333763784, "grad_norm": 0.1005859375, "learning_rate": 0.0005503566107082547, "loss": 0.5294, "step": 62830 }, { "epoch": 3.1210887056719976, "grad_norm": 0.10205078125, "learning_rate": 0.0005503168769246051, "loss": 0.5288, "step": 62840 }, { "epoch": 3.121585377967617, "grad_norm": 0.10546875, "learning_rate": 0.0005502771431409556, "loss": 0.5457, "step": 62850 }, { "epoch": 3.1220820502632365, "grad_norm": 0.10693359375, "learning_rate": 0.000550237409357306, "loss": 0.5185, "step": 62860 }, { "epoch": 3.1225787225588557, "grad_norm": 0.1025390625, "learning_rate": 0.0005501976755736565, "loss": 0.5331, "step": 62870 }, { "epoch": 3.123075394854475, "grad_norm": 0.10302734375, "learning_rate": 0.000550157941790007, "loss": 0.5471, "step": 62880 }, { "epoch": 3.123572067150094, "grad_norm": 0.11328125, "learning_rate": 0.0005501182080063575, "loss": 0.5435, "step": 62890 }, { "epoch": 3.124068739445714, "grad_norm": 0.150390625, "learning_rate": 0.0005500784742227078, "loss": 0.5571, "step": 62900 }, { "epoch": 3.124565411741333, "grad_norm": 0.11181640625, "learning_rate": 0.0005500387404390584, "loss": 0.5406, "step": 62910 }, { "epoch": 3.1250620840369523, "grad_norm": 0.1162109375, "learning_rate": 0.0005499990066554088, "loss": 0.5539, "step": 62920 }, { "epoch": 3.125558756332572, "grad_norm": 0.10400390625, "learning_rate": 0.0005499592728717592, "loss": 0.5627, "step": 62930 }, { "epoch": 3.126055428628191, "grad_norm": 0.10498046875, "learning_rate": 0.0005499195390881097, "loss": 0.5457, "step": 62940 }, { "epoch": 3.1265521009238104, "grad_norm": 0.109375, "learning_rate": 0.0005498798053044601, "loss": 0.5293, "step": 62950 }, { "epoch": 3.1270487732194296, "grad_norm": 0.099609375, "learning_rate": 0.0005498400715208105, "loss": 0.5657, "step": 62960 }, { "epoch": 3.1275454455150493, "grad_norm": 0.099609375, "learning_rate": 0.0005498003377371611, "loss": 0.5352, "step": 62970 }, { "epoch": 3.1280421178106685, "grad_norm": 0.1162109375, "learning_rate": 0.0005497606039535115, "loss": 0.5144, "step": 62980 }, { "epoch": 3.1285387901062878, "grad_norm": 0.126953125, "learning_rate": 0.0005497208701698619, "loss": 0.5005, "step": 62990 }, { "epoch": 3.129035462401907, "grad_norm": 0.1171875, "learning_rate": 0.0005496811363862124, "loss": 0.5394, "step": 63000 }, { "epoch": 3.1295321346975267, "grad_norm": 0.10009765625, "learning_rate": 0.0005496414026025629, "loss": 0.5577, "step": 63010 }, { "epoch": 3.130028806993146, "grad_norm": 0.09619140625, "learning_rate": 0.0005496016688189134, "loss": 0.5583, "step": 63020 }, { "epoch": 3.130525479288765, "grad_norm": 0.10107421875, "learning_rate": 0.0005495619350352638, "loss": 0.5588, "step": 63030 }, { "epoch": 3.131022151584385, "grad_norm": 0.1025390625, "learning_rate": 0.0005495222012516142, "loss": 0.5226, "step": 63040 }, { "epoch": 3.131518823880004, "grad_norm": 0.11767578125, "learning_rate": 0.0005494824674679647, "loss": 0.5239, "step": 63050 }, { "epoch": 3.1320154961756232, "grad_norm": 0.09130859375, "learning_rate": 0.000549442733684315, "loss": 0.5049, "step": 63060 }, { "epoch": 3.1325121684712425, "grad_norm": 0.10986328125, "learning_rate": 0.0005494029999006656, "loss": 0.5572, "step": 63070 }, { "epoch": 3.133008840766862, "grad_norm": 0.134765625, "learning_rate": 0.0005493632661170161, "loss": 0.5536, "step": 63080 }, { "epoch": 3.1335055130624814, "grad_norm": 0.115234375, "learning_rate": 0.0005493235323333664, "loss": 0.5535, "step": 63090 }, { "epoch": 3.1340021853581006, "grad_norm": 0.2490234375, "learning_rate": 0.0005492837985497169, "loss": 0.5422, "step": 63100 }, { "epoch": 3.1344988576537203, "grad_norm": 0.1416015625, "learning_rate": 0.0005492440647660673, "loss": 0.569, "step": 63110 }, { "epoch": 3.1349955299493395, "grad_norm": 0.111328125, "learning_rate": 0.0005492043309824178, "loss": 0.5438, "step": 63120 }, { "epoch": 3.1354922022449587, "grad_norm": 0.158203125, "learning_rate": 0.0005491645971987683, "loss": 0.5527, "step": 63130 }, { "epoch": 3.135988874540578, "grad_norm": 0.0986328125, "learning_rate": 0.0005491248634151187, "loss": 0.5441, "step": 63140 }, { "epoch": 3.1364855468361976, "grad_norm": 0.1083984375, "learning_rate": 0.0005490851296314691, "loss": 0.5532, "step": 63150 }, { "epoch": 3.136982219131817, "grad_norm": 0.1337890625, "learning_rate": 0.0005490453958478197, "loss": 0.5299, "step": 63160 }, { "epoch": 3.137478891427436, "grad_norm": 0.1064453125, "learning_rate": 0.0005490056620641701, "loss": 0.5217, "step": 63170 }, { "epoch": 3.1379755637230557, "grad_norm": 0.1689453125, "learning_rate": 0.0005489659282805206, "loss": 0.5396, "step": 63180 }, { "epoch": 3.138472236018675, "grad_norm": 0.109375, "learning_rate": 0.000548926194496871, "loss": 0.5253, "step": 63190 }, { "epoch": 3.138968908314294, "grad_norm": 0.166015625, "learning_rate": 0.0005488864607132214, "loss": 0.5396, "step": 63200 }, { "epoch": 3.1394655806099134, "grad_norm": 0.10595703125, "learning_rate": 0.000548846726929572, "loss": 0.5891, "step": 63210 }, { "epoch": 3.139962252905533, "grad_norm": 0.10498046875, "learning_rate": 0.0005488069931459224, "loss": 0.5388, "step": 63220 }, { "epoch": 3.1404589252011523, "grad_norm": 0.11767578125, "learning_rate": 0.0005487672593622728, "loss": 0.5178, "step": 63230 }, { "epoch": 3.1409555974967716, "grad_norm": 0.09521484375, "learning_rate": 0.0005487275255786233, "loss": 0.5149, "step": 63240 }, { "epoch": 3.141452269792391, "grad_norm": 0.09765625, "learning_rate": 0.0005486877917949736, "loss": 0.5216, "step": 63250 }, { "epoch": 3.1419489420880105, "grad_norm": 0.1064453125, "learning_rate": 0.0005486480580113241, "loss": 0.5156, "step": 63260 }, { "epoch": 3.1424456143836297, "grad_norm": 0.115234375, "learning_rate": 0.0005486083242276747, "loss": 0.5361, "step": 63270 }, { "epoch": 3.142942286679249, "grad_norm": 0.197265625, "learning_rate": 0.000548568590444025, "loss": 0.5553, "step": 63280 }, { "epoch": 3.1434389589748686, "grad_norm": 0.0947265625, "learning_rate": 0.0005485288566603755, "loss": 0.5313, "step": 63290 }, { "epoch": 3.143935631270488, "grad_norm": 0.11962890625, "learning_rate": 0.000548489122876726, "loss": 0.5264, "step": 63300 }, { "epoch": 3.144432303566107, "grad_norm": 0.1923828125, "learning_rate": 0.0005484493890930763, "loss": 0.5595, "step": 63310 }, { "epoch": 3.1449289758617263, "grad_norm": 0.11572265625, "learning_rate": 0.0005484096553094269, "loss": 0.526, "step": 63320 }, { "epoch": 3.145425648157346, "grad_norm": 0.134765625, "learning_rate": 0.0005483699215257773, "loss": 0.5358, "step": 63330 }, { "epoch": 3.145922320452965, "grad_norm": 0.11767578125, "learning_rate": 0.0005483301877421278, "loss": 0.5369, "step": 63340 }, { "epoch": 3.1464189927485844, "grad_norm": 0.140625, "learning_rate": 0.0005482904539584782, "loss": 0.537, "step": 63350 }, { "epoch": 3.1469156650442036, "grad_norm": 0.1181640625, "learning_rate": 0.0005482507201748286, "loss": 0.5195, "step": 63360 }, { "epoch": 3.1474123373398233, "grad_norm": 0.10009765625, "learning_rate": 0.0005482109863911792, "loss": 0.5355, "step": 63370 }, { "epoch": 3.1479090096354425, "grad_norm": 0.138671875, "learning_rate": 0.0005481712526075296, "loss": 0.5421, "step": 63380 }, { "epoch": 3.1484056819310617, "grad_norm": 0.125, "learning_rate": 0.00054813151882388, "loss": 0.5275, "step": 63390 }, { "epoch": 3.1489023542266814, "grad_norm": 0.11279296875, "learning_rate": 0.0005480917850402305, "loss": 0.5508, "step": 63400 }, { "epoch": 3.1493990265223006, "grad_norm": 0.10986328125, "learning_rate": 0.000548052051256581, "loss": 0.5531, "step": 63410 }, { "epoch": 3.14989569881792, "grad_norm": 0.1025390625, "learning_rate": 0.0005480123174729314, "loss": 0.5266, "step": 63420 }, { "epoch": 3.150392371113539, "grad_norm": 0.10400390625, "learning_rate": 0.0005479725836892819, "loss": 0.5375, "step": 63430 }, { "epoch": 3.1508890434091588, "grad_norm": 0.10205078125, "learning_rate": 0.0005479328499056322, "loss": 0.5275, "step": 63440 }, { "epoch": 3.151385715704778, "grad_norm": 0.142578125, "learning_rate": 0.0005478931161219827, "loss": 0.5394, "step": 63450 }, { "epoch": 3.151882388000397, "grad_norm": 0.125, "learning_rate": 0.0005478533823383332, "loss": 0.5159, "step": 63460 }, { "epoch": 3.152379060296017, "grad_norm": 0.158203125, "learning_rate": 0.0005478136485546837, "loss": 0.5586, "step": 63470 }, { "epoch": 3.152875732591636, "grad_norm": 0.1279296875, "learning_rate": 0.0005477739147710341, "loss": 0.5405, "step": 63480 }, { "epoch": 3.1533724048872553, "grad_norm": 0.1318359375, "learning_rate": 0.0005477341809873846, "loss": 0.5514, "step": 63490 }, { "epoch": 3.1538690771828746, "grad_norm": 0.1015625, "learning_rate": 0.000547694447203735, "loss": 0.5254, "step": 63500 }, { "epoch": 3.1543657494784942, "grad_norm": 0.115234375, "learning_rate": 0.0005476547134200854, "loss": 0.5192, "step": 63510 }, { "epoch": 3.1548624217741135, "grad_norm": 0.10986328125, "learning_rate": 0.0005476149796364359, "loss": 0.5334, "step": 63520 }, { "epoch": 3.1553590940697327, "grad_norm": 0.12060546875, "learning_rate": 0.0005475752458527864, "loss": 0.5305, "step": 63530 }, { "epoch": 3.1558557663653524, "grad_norm": 0.1337890625, "learning_rate": 0.0005475355120691368, "loss": 0.5516, "step": 63540 }, { "epoch": 3.1563524386609716, "grad_norm": 0.11474609375, "learning_rate": 0.0005474957782854872, "loss": 0.5457, "step": 63550 }, { "epoch": 3.156849110956591, "grad_norm": 0.095703125, "learning_rate": 0.0005474560445018377, "loss": 0.5303, "step": 63560 }, { "epoch": 3.15734578325221, "grad_norm": 0.08935546875, "learning_rate": 0.0005474163107181882, "loss": 0.5292, "step": 63570 }, { "epoch": 3.1578424555478297, "grad_norm": 0.10400390625, "learning_rate": 0.0005473765769345386, "loss": 0.5164, "step": 63580 }, { "epoch": 3.158339127843449, "grad_norm": 0.11376953125, "learning_rate": 0.0005473368431508891, "loss": 0.5831, "step": 63590 }, { "epoch": 3.158835800139068, "grad_norm": 0.10693359375, "learning_rate": 0.0005472971093672395, "loss": 0.5374, "step": 63600 }, { "epoch": 3.1593324724346874, "grad_norm": 0.1318359375, "learning_rate": 0.0005472573755835899, "loss": 0.5068, "step": 63610 }, { "epoch": 3.159829144730307, "grad_norm": 0.1923828125, "learning_rate": 0.0005472176417999405, "loss": 0.5486, "step": 63620 }, { "epoch": 3.1603258170259263, "grad_norm": 0.11083984375, "learning_rate": 0.0005471779080162909, "loss": 0.5547, "step": 63630 }, { "epoch": 3.1608224893215455, "grad_norm": 0.1259765625, "learning_rate": 0.0005471381742326413, "loss": 0.5387, "step": 63640 }, { "epoch": 3.161319161617165, "grad_norm": 0.1162109375, "learning_rate": 0.0005470984404489918, "loss": 0.5184, "step": 63650 }, { "epoch": 3.1618158339127844, "grad_norm": 0.10400390625, "learning_rate": 0.0005470587066653422, "loss": 0.5584, "step": 63660 }, { "epoch": 3.1623125062084037, "grad_norm": 0.10595703125, "learning_rate": 0.0005470189728816927, "loss": 0.5666, "step": 63670 }, { "epoch": 3.162809178504023, "grad_norm": 0.111328125, "learning_rate": 0.0005469792390980432, "loss": 0.552, "step": 63680 }, { "epoch": 3.1633058507996425, "grad_norm": 0.099609375, "learning_rate": 0.0005469395053143936, "loss": 0.564, "step": 63690 }, { "epoch": 3.1638025230952618, "grad_norm": 0.11669921875, "learning_rate": 0.000546899771530744, "loss": 0.5487, "step": 63700 }, { "epoch": 3.164299195390881, "grad_norm": 0.1005859375, "learning_rate": 0.0005468600377470944, "loss": 0.5383, "step": 63710 }, { "epoch": 3.1647958676865002, "grad_norm": 0.11962890625, "learning_rate": 0.000546820303963445, "loss": 0.5467, "step": 63720 }, { "epoch": 3.16529253998212, "grad_norm": 0.1533203125, "learning_rate": 0.0005467805701797954, "loss": 0.573, "step": 63730 }, { "epoch": 3.165789212277739, "grad_norm": 0.1953125, "learning_rate": 0.0005467408363961458, "loss": 0.5421, "step": 63740 }, { "epoch": 3.1662858845733584, "grad_norm": 0.0927734375, "learning_rate": 0.0005467011026124963, "loss": 0.5125, "step": 63750 }, { "epoch": 3.166782556868978, "grad_norm": 0.09716796875, "learning_rate": 0.0005466613688288467, "loss": 0.5599, "step": 63760 }, { "epoch": 3.1672792291645973, "grad_norm": 0.11328125, "learning_rate": 0.0005466216350451972, "loss": 0.5077, "step": 63770 }, { "epoch": 3.1677759014602165, "grad_norm": 0.1337890625, "learning_rate": 0.0005465819012615477, "loss": 0.5331, "step": 63780 }, { "epoch": 3.1682725737558357, "grad_norm": 0.11181640625, "learning_rate": 0.0005465421674778981, "loss": 0.5454, "step": 63790 }, { "epoch": 3.1687692460514554, "grad_norm": 0.12158203125, "learning_rate": 0.0005465024336942485, "loss": 0.5786, "step": 63800 }, { "epoch": 3.1692659183470746, "grad_norm": 0.091796875, "learning_rate": 0.000546462699910599, "loss": 0.5214, "step": 63810 }, { "epoch": 3.169762590642694, "grad_norm": 0.09423828125, "learning_rate": 0.0005464229661269495, "loss": 0.5239, "step": 63820 }, { "epoch": 3.1702592629383135, "grad_norm": 0.119140625, "learning_rate": 0.0005463832323432999, "loss": 0.5442, "step": 63830 }, { "epoch": 3.1707559352339327, "grad_norm": 0.1123046875, "learning_rate": 0.0005463434985596504, "loss": 0.5585, "step": 63840 }, { "epoch": 3.171252607529552, "grad_norm": 0.1005859375, "learning_rate": 0.0005463037647760008, "loss": 0.5462, "step": 63850 }, { "epoch": 3.171749279825171, "grad_norm": 0.10400390625, "learning_rate": 0.0005462640309923512, "loss": 0.5352, "step": 63860 }, { "epoch": 3.172245952120791, "grad_norm": 0.10107421875, "learning_rate": 0.0005462242972087018, "loss": 0.5169, "step": 63870 }, { "epoch": 3.17274262441641, "grad_norm": 0.11328125, "learning_rate": 0.0005461845634250522, "loss": 0.5455, "step": 63880 }, { "epoch": 3.1732392967120293, "grad_norm": 0.10107421875, "learning_rate": 0.0005461448296414026, "loss": 0.5168, "step": 63890 }, { "epoch": 3.173735969007649, "grad_norm": 0.1376953125, "learning_rate": 0.0005461050958577531, "loss": 0.5427, "step": 63900 }, { "epoch": 3.174232641303268, "grad_norm": 0.1240234375, "learning_rate": 0.0005460653620741035, "loss": 0.5546, "step": 63910 }, { "epoch": 3.1747293135988874, "grad_norm": 0.10498046875, "learning_rate": 0.0005460256282904541, "loss": 0.5372, "step": 63920 }, { "epoch": 3.1752259858945067, "grad_norm": 0.125, "learning_rate": 0.0005459858945068044, "loss": 0.5347, "step": 63930 }, { "epoch": 3.1757226581901263, "grad_norm": 0.115234375, "learning_rate": 0.0005459461607231549, "loss": 0.5307, "step": 63940 }, { "epoch": 3.1762193304857456, "grad_norm": 0.10205078125, "learning_rate": 0.0005459064269395054, "loss": 0.5004, "step": 63950 }, { "epoch": 3.176716002781365, "grad_norm": 0.1015625, "learning_rate": 0.0005458666931558557, "loss": 0.5764, "step": 63960 }, { "epoch": 3.177212675076984, "grad_norm": 0.1591796875, "learning_rate": 0.0005458269593722063, "loss": 0.5565, "step": 63970 }, { "epoch": 3.1777093473726037, "grad_norm": 0.11865234375, "learning_rate": 0.0005457872255885567, "loss": 0.5638, "step": 63980 }, { "epoch": 3.178206019668223, "grad_norm": 0.10205078125, "learning_rate": 0.0005457474918049071, "loss": 0.5408, "step": 63990 }, { "epoch": 3.178702691963842, "grad_norm": 0.09814453125, "learning_rate": 0.0005457077580212576, "loss": 0.5744, "step": 64000 }, { "epoch": 3.179199364259462, "grad_norm": 0.1298828125, "learning_rate": 0.000545668024237608, "loss": 0.5473, "step": 64010 }, { "epoch": 3.179696036555081, "grad_norm": 0.146484375, "learning_rate": 0.0005456282904539585, "loss": 0.5372, "step": 64020 }, { "epoch": 3.1801927088507003, "grad_norm": 0.1337890625, "learning_rate": 0.000545588556670309, "loss": 0.5091, "step": 64030 }, { "epoch": 3.1806893811463195, "grad_norm": 0.1171875, "learning_rate": 0.0005455488228866594, "loss": 0.5632, "step": 64040 }, { "epoch": 3.181186053441939, "grad_norm": 0.0966796875, "learning_rate": 0.0005455090891030098, "loss": 0.5374, "step": 64050 }, { "epoch": 3.1816827257375584, "grad_norm": 0.1279296875, "learning_rate": 0.0005454693553193603, "loss": 0.5519, "step": 64060 }, { "epoch": 3.1821793980331776, "grad_norm": 0.11474609375, "learning_rate": 0.0005454296215357108, "loss": 0.5184, "step": 64070 }, { "epoch": 3.182676070328797, "grad_norm": 0.1806640625, "learning_rate": 0.0005453898877520613, "loss": 0.5471, "step": 64080 }, { "epoch": 3.1831727426244165, "grad_norm": 0.08740234375, "learning_rate": 0.0005453501539684117, "loss": 0.5274, "step": 64090 }, { "epoch": 3.1836694149200357, "grad_norm": 0.10205078125, "learning_rate": 0.0005453104201847621, "loss": 0.5229, "step": 64100 }, { "epoch": 3.184166087215655, "grad_norm": 0.10546875, "learning_rate": 0.0005452706864011126, "loss": 0.5344, "step": 64110 }, { "epoch": 3.1846627595112746, "grad_norm": 0.1416015625, "learning_rate": 0.000545230952617463, "loss": 0.5281, "step": 64120 }, { "epoch": 3.185159431806894, "grad_norm": 0.0966796875, "learning_rate": 0.0005451912188338135, "loss": 0.5332, "step": 64130 }, { "epoch": 3.185656104102513, "grad_norm": 0.1884765625, "learning_rate": 0.000545151485050164, "loss": 0.5792, "step": 64140 }, { "epoch": 3.1861527763981323, "grad_norm": 0.109375, "learning_rate": 0.0005451117512665143, "loss": 0.5684, "step": 64150 }, { "epoch": 3.186649448693752, "grad_norm": 0.1201171875, "learning_rate": 0.0005450720174828648, "loss": 0.5583, "step": 64160 }, { "epoch": 3.1871461209893712, "grad_norm": 0.10888671875, "learning_rate": 0.0005450322836992154, "loss": 0.5471, "step": 64170 }, { "epoch": 3.1876427932849905, "grad_norm": 0.1376953125, "learning_rate": 0.0005449925499155657, "loss": 0.5191, "step": 64180 }, { "epoch": 3.18813946558061, "grad_norm": 0.10888671875, "learning_rate": 0.0005449528161319162, "loss": 0.5254, "step": 64190 }, { "epoch": 3.1886361378762293, "grad_norm": 0.0927734375, "learning_rate": 0.0005449130823482666, "loss": 0.5393, "step": 64200 }, { "epoch": 3.1891328101718486, "grad_norm": 0.11669921875, "learning_rate": 0.000544873348564617, "loss": 0.5409, "step": 64210 }, { "epoch": 3.189629482467468, "grad_norm": 0.1318359375, "learning_rate": 0.0005448336147809676, "loss": 0.5351, "step": 64220 }, { "epoch": 3.1901261547630875, "grad_norm": 0.142578125, "learning_rate": 0.000544793880997318, "loss": 0.5417, "step": 64230 }, { "epoch": 3.1906228270587067, "grad_norm": 0.109375, "learning_rate": 0.0005447541472136685, "loss": 0.5297, "step": 64240 }, { "epoch": 3.191119499354326, "grad_norm": 0.11376953125, "learning_rate": 0.0005447144134300189, "loss": 0.5272, "step": 64250 }, { "epoch": 3.1916161716499456, "grad_norm": 0.10205078125, "learning_rate": 0.0005446746796463693, "loss": 0.5497, "step": 64260 }, { "epoch": 3.192112843945565, "grad_norm": 0.10498046875, "learning_rate": 0.0005446349458627199, "loss": 0.5246, "step": 64270 }, { "epoch": 3.192609516241184, "grad_norm": 0.11767578125, "learning_rate": 0.0005445952120790703, "loss": 0.5257, "step": 64280 }, { "epoch": 3.1931061885368033, "grad_norm": 0.10009765625, "learning_rate": 0.0005445554782954207, "loss": 0.5019, "step": 64290 }, { "epoch": 3.193602860832423, "grad_norm": 0.1298828125, "learning_rate": 0.0005445157445117712, "loss": 0.5745, "step": 64300 }, { "epoch": 3.194099533128042, "grad_norm": 0.1513671875, "learning_rate": 0.0005444760107281215, "loss": 0.5407, "step": 64310 }, { "epoch": 3.1945962054236614, "grad_norm": 0.1513671875, "learning_rate": 0.000544436276944472, "loss": 0.5295, "step": 64320 }, { "epoch": 3.1950928777192806, "grad_norm": 0.099609375, "learning_rate": 0.0005443965431608226, "loss": 0.5248, "step": 64330 }, { "epoch": 3.1955895500149003, "grad_norm": 0.12109375, "learning_rate": 0.0005443568093771729, "loss": 0.5247, "step": 64340 }, { "epoch": 3.1960862223105195, "grad_norm": 0.107421875, "learning_rate": 0.0005443170755935234, "loss": 0.5307, "step": 64350 }, { "epoch": 3.1965828946061388, "grad_norm": 0.1162109375, "learning_rate": 0.0005442773418098739, "loss": 0.5546, "step": 64360 }, { "epoch": 3.197079566901758, "grad_norm": 0.1064453125, "learning_rate": 0.0005442376080262244, "loss": 0.5415, "step": 64370 }, { "epoch": 3.1975762391973777, "grad_norm": 0.12890625, "learning_rate": 0.0005441978742425748, "loss": 0.5384, "step": 64380 }, { "epoch": 3.198072911492997, "grad_norm": 0.1044921875, "learning_rate": 0.0005441581404589252, "loss": 0.5277, "step": 64390 }, { "epoch": 3.198569583788616, "grad_norm": 0.12060546875, "learning_rate": 0.0005441184066752757, "loss": 0.5105, "step": 64400 }, { "epoch": 3.199066256084236, "grad_norm": 0.1279296875, "learning_rate": 0.0005440786728916261, "loss": 0.526, "step": 64410 }, { "epoch": 3.199562928379855, "grad_norm": 0.11572265625, "learning_rate": 0.0005440389391079765, "loss": 0.5225, "step": 64420 }, { "epoch": 3.2000596006754742, "grad_norm": 0.1181640625, "learning_rate": 0.0005439992053243271, "loss": 0.5477, "step": 64430 }, { "epoch": 3.2005562729710935, "grad_norm": 0.1376953125, "learning_rate": 0.0005439594715406775, "loss": 0.5536, "step": 64440 }, { "epoch": 3.201052945266713, "grad_norm": 0.09716796875, "learning_rate": 0.0005439197377570279, "loss": 0.5606, "step": 64450 }, { "epoch": 3.2015496175623324, "grad_norm": 0.12060546875, "learning_rate": 0.0005438800039733784, "loss": 0.5213, "step": 64460 }, { "epoch": 3.2020462898579516, "grad_norm": 0.11572265625, "learning_rate": 0.0005438402701897289, "loss": 0.5197, "step": 64470 }, { "epoch": 3.2025429621535713, "grad_norm": 0.10986328125, "learning_rate": 0.0005438005364060793, "loss": 0.5368, "step": 64480 }, { "epoch": 3.2030396344491905, "grad_norm": 0.1103515625, "learning_rate": 0.0005437608026224298, "loss": 0.5127, "step": 64490 }, { "epoch": 3.2035363067448097, "grad_norm": 0.12109375, "learning_rate": 0.0005437210688387802, "loss": 0.5114, "step": 64500 }, { "epoch": 3.204032979040429, "grad_norm": 0.12255859375, "learning_rate": 0.0005436813350551306, "loss": 0.5162, "step": 64510 }, { "epoch": 3.2045296513360486, "grad_norm": 0.09375, "learning_rate": 0.0005436416012714812, "loss": 0.5653, "step": 64520 }, { "epoch": 3.205026323631668, "grad_norm": 0.173828125, "learning_rate": 0.0005436018674878316, "loss": 0.56, "step": 64530 }, { "epoch": 3.205522995927287, "grad_norm": 0.103515625, "learning_rate": 0.000543562133704182, "loss": 0.5731, "step": 64540 }, { "epoch": 3.2060196682229067, "grad_norm": 0.09619140625, "learning_rate": 0.0005435223999205325, "loss": 0.5533, "step": 64550 }, { "epoch": 3.206516340518526, "grad_norm": 0.11474609375, "learning_rate": 0.0005434826661368829, "loss": 0.5644, "step": 64560 }, { "epoch": 3.207013012814145, "grad_norm": 0.0908203125, "learning_rate": 0.0005434429323532333, "loss": 0.5294, "step": 64570 }, { "epoch": 3.2075096851097644, "grad_norm": 0.1044921875, "learning_rate": 0.0005434031985695838, "loss": 0.5623, "step": 64580 }, { "epoch": 3.208006357405384, "grad_norm": 0.10205078125, "learning_rate": 0.0005433634647859343, "loss": 0.5674, "step": 64590 }, { "epoch": 3.2085030297010033, "grad_norm": 0.11328125, "learning_rate": 0.0005433237310022847, "loss": 0.5413, "step": 64600 }, { "epoch": 3.2089997019966225, "grad_norm": 0.10986328125, "learning_rate": 0.0005432839972186351, "loss": 0.517, "step": 64610 }, { "epoch": 3.2094963742922418, "grad_norm": 0.11279296875, "learning_rate": 0.0005432442634349856, "loss": 0.5401, "step": 64620 }, { "epoch": 3.2099930465878614, "grad_norm": 0.0966796875, "learning_rate": 0.0005432045296513361, "loss": 0.5611, "step": 64630 }, { "epoch": 3.2104897188834807, "grad_norm": 0.1103515625, "learning_rate": 0.0005431647958676865, "loss": 0.5207, "step": 64640 }, { "epoch": 3.2109863911791, "grad_norm": 0.0966796875, "learning_rate": 0.000543125062084037, "loss": 0.5277, "step": 64650 }, { "epoch": 3.2114830634747196, "grad_norm": 0.10693359375, "learning_rate": 0.0005430853283003874, "loss": 0.5313, "step": 64660 }, { "epoch": 3.211979735770339, "grad_norm": 0.171875, "learning_rate": 0.0005430455945167378, "loss": 0.5264, "step": 64670 }, { "epoch": 3.212476408065958, "grad_norm": 0.09228515625, "learning_rate": 0.0005430058607330884, "loss": 0.5481, "step": 64680 }, { "epoch": 3.2129730803615772, "grad_norm": 0.111328125, "learning_rate": 0.0005429661269494388, "loss": 0.5533, "step": 64690 }, { "epoch": 3.213469752657197, "grad_norm": 0.10205078125, "learning_rate": 0.0005429263931657892, "loss": 0.5321, "step": 64700 }, { "epoch": 3.213966424952816, "grad_norm": 0.1201171875, "learning_rate": 0.0005428866593821397, "loss": 0.5544, "step": 64710 }, { "epoch": 3.2144630972484354, "grad_norm": 0.09814453125, "learning_rate": 0.0005428469255984901, "loss": 0.5066, "step": 64720 }, { "epoch": 3.2149597695440546, "grad_norm": 0.1015625, "learning_rate": 0.0005428071918148406, "loss": 0.5334, "step": 64730 }, { "epoch": 3.2154564418396743, "grad_norm": 0.18359375, "learning_rate": 0.0005427674580311911, "loss": 0.5496, "step": 64740 }, { "epoch": 3.2159531141352935, "grad_norm": 0.10595703125, "learning_rate": 0.0005427277242475415, "loss": 0.5501, "step": 64750 }, { "epoch": 3.2164497864309127, "grad_norm": 0.12060546875, "learning_rate": 0.0005426879904638919, "loss": 0.5431, "step": 64760 }, { "epoch": 3.2169464587265324, "grad_norm": 0.12109375, "learning_rate": 0.0005426482566802424, "loss": 0.5598, "step": 64770 }, { "epoch": 3.2174431310221516, "grad_norm": 0.16796875, "learning_rate": 0.0005426085228965929, "loss": 0.5536, "step": 64780 }, { "epoch": 3.217939803317771, "grad_norm": 0.095703125, "learning_rate": 0.0005425687891129433, "loss": 0.5343, "step": 64790 }, { "epoch": 3.21843647561339, "grad_norm": 0.099609375, "learning_rate": 0.0005425290553292937, "loss": 0.5431, "step": 64800 }, { "epoch": 3.2189331479090098, "grad_norm": 0.10107421875, "learning_rate": 0.0005424893215456442, "loss": 0.5393, "step": 64810 }, { "epoch": 3.219429820204629, "grad_norm": 0.150390625, "learning_rate": 0.0005424495877619948, "loss": 0.5208, "step": 64820 }, { "epoch": 3.219926492500248, "grad_norm": 0.1728515625, "learning_rate": 0.0005424098539783451, "loss": 0.5303, "step": 64830 }, { "epoch": 3.220423164795868, "grad_norm": 0.1875, "learning_rate": 0.0005423701201946956, "loss": 0.5394, "step": 64840 }, { "epoch": 3.220919837091487, "grad_norm": 0.1396484375, "learning_rate": 0.000542330386411046, "loss": 0.5353, "step": 64850 }, { "epoch": 3.2214165093871063, "grad_norm": 0.13671875, "learning_rate": 0.0005422906526273964, "loss": 0.5584, "step": 64860 }, { "epoch": 3.2219131816827256, "grad_norm": 0.10595703125, "learning_rate": 0.0005422509188437469, "loss": 0.5353, "step": 64870 }, { "epoch": 3.2224098539783452, "grad_norm": 0.11279296875, "learning_rate": 0.0005422111850600974, "loss": 0.524, "step": 64880 }, { "epoch": 3.2229065262739645, "grad_norm": 0.1123046875, "learning_rate": 0.0005421714512764478, "loss": 0.5158, "step": 64890 }, { "epoch": 3.2234031985695837, "grad_norm": 0.115234375, "learning_rate": 0.0005421317174927983, "loss": 0.537, "step": 64900 }, { "epoch": 3.2238998708652034, "grad_norm": 0.1201171875, "learning_rate": 0.0005420919837091487, "loss": 0.511, "step": 64910 }, { "epoch": 3.2243965431608226, "grad_norm": 0.126953125, "learning_rate": 0.0005420522499254991, "loss": 0.5867, "step": 64920 }, { "epoch": 3.224893215456442, "grad_norm": 0.0986328125, "learning_rate": 0.0005420125161418497, "loss": 0.5472, "step": 64930 }, { "epoch": 3.225389887752061, "grad_norm": 0.1357421875, "learning_rate": 0.0005419727823582001, "loss": 0.5355, "step": 64940 }, { "epoch": 3.2258865600476807, "grad_norm": 0.10595703125, "learning_rate": 0.0005419330485745505, "loss": 0.5414, "step": 64950 }, { "epoch": 3.2263832323433, "grad_norm": 0.1787109375, "learning_rate": 0.000541893314790901, "loss": 0.5407, "step": 64960 }, { "epoch": 3.226879904638919, "grad_norm": 0.09814453125, "learning_rate": 0.0005418535810072514, "loss": 0.5396, "step": 64970 }, { "epoch": 3.2273765769345384, "grad_norm": 0.09375, "learning_rate": 0.000541813847223602, "loss": 0.5581, "step": 64980 }, { "epoch": 3.227873249230158, "grad_norm": 0.1162109375, "learning_rate": 0.0005417741134399523, "loss": 0.5349, "step": 64990 }, { "epoch": 3.2283699215257773, "grad_norm": 0.091796875, "learning_rate": 0.0005417343796563028, "loss": 0.5391, "step": 65000 }, { "epoch": 3.2288665938213965, "grad_norm": 0.1064453125, "learning_rate": 0.0005416946458726533, "loss": 0.5329, "step": 65010 }, { "epoch": 3.229363266117016, "grad_norm": 0.111328125, "learning_rate": 0.0005416549120890036, "loss": 0.5257, "step": 65020 }, { "epoch": 3.2298599384126354, "grad_norm": 0.1298828125, "learning_rate": 0.0005416151783053542, "loss": 0.5322, "step": 65030 }, { "epoch": 3.2303566107082546, "grad_norm": 0.12353515625, "learning_rate": 0.0005415754445217047, "loss": 0.5467, "step": 65040 }, { "epoch": 3.230853283003874, "grad_norm": 0.10498046875, "learning_rate": 0.000541535710738055, "loss": 0.5571, "step": 65050 }, { "epoch": 3.2313499552994935, "grad_norm": 0.1142578125, "learning_rate": 0.0005414959769544055, "loss": 0.5597, "step": 65060 }, { "epoch": 3.2318466275951128, "grad_norm": 0.1083984375, "learning_rate": 0.0005414562431707559, "loss": 0.5388, "step": 65070 }, { "epoch": 3.232343299890732, "grad_norm": 0.12158203125, "learning_rate": 0.0005414165093871064, "loss": 0.5341, "step": 65080 }, { "epoch": 3.232839972186351, "grad_norm": 0.1015625, "learning_rate": 0.0005413767756034569, "loss": 0.5413, "step": 65090 }, { "epoch": 3.233336644481971, "grad_norm": 0.1025390625, "learning_rate": 0.0005413370418198073, "loss": 0.5701, "step": 65100 }, { "epoch": 3.23383331677759, "grad_norm": 0.109375, "learning_rate": 0.0005412973080361578, "loss": 0.5333, "step": 65110 }, { "epoch": 3.2343299890732093, "grad_norm": 0.099609375, "learning_rate": 0.0005412575742525082, "loss": 0.5367, "step": 65120 }, { "epoch": 3.234826661368829, "grad_norm": 0.1318359375, "learning_rate": 0.0005412178404688587, "loss": 0.5415, "step": 65130 }, { "epoch": 3.2353233336644482, "grad_norm": 0.10400390625, "learning_rate": 0.0005411781066852092, "loss": 0.5212, "step": 65140 }, { "epoch": 3.2358200059600675, "grad_norm": 0.130859375, "learning_rate": 0.0005411383729015596, "loss": 0.529, "step": 65150 }, { "epoch": 3.2363166782556867, "grad_norm": 0.1005859375, "learning_rate": 0.00054109863911791, "loss": 0.5667, "step": 65160 }, { "epoch": 3.2368133505513064, "grad_norm": 0.08984375, "learning_rate": 0.0005410589053342605, "loss": 0.5229, "step": 65170 }, { "epoch": 3.2373100228469256, "grad_norm": 0.1171875, "learning_rate": 0.0005410191715506109, "loss": 0.5514, "step": 65180 }, { "epoch": 3.237806695142545, "grad_norm": 0.12158203125, "learning_rate": 0.0005409794377669614, "loss": 0.5677, "step": 65190 }, { "epoch": 3.2383033674381645, "grad_norm": 0.103515625, "learning_rate": 0.0005409397039833119, "loss": 0.5259, "step": 65200 }, { "epoch": 3.2388000397337837, "grad_norm": 0.1240234375, "learning_rate": 0.0005408999701996622, "loss": 0.5209, "step": 65210 }, { "epoch": 3.239296712029403, "grad_norm": 0.10400390625, "learning_rate": 0.0005408602364160127, "loss": 0.532, "step": 65220 }, { "epoch": 3.239793384325022, "grad_norm": 0.10107421875, "learning_rate": 0.0005408205026323633, "loss": 0.5373, "step": 65230 }, { "epoch": 3.240290056620642, "grad_norm": 0.123046875, "learning_rate": 0.0005407807688487136, "loss": 0.5476, "step": 65240 }, { "epoch": 3.240786728916261, "grad_norm": 0.0966796875, "learning_rate": 0.0005407410350650641, "loss": 0.5462, "step": 65250 }, { "epoch": 3.2412834012118803, "grad_norm": 0.109375, "learning_rate": 0.0005407013012814145, "loss": 0.5377, "step": 65260 }, { "epoch": 3.2417800735075, "grad_norm": 0.12060546875, "learning_rate": 0.000540661567497765, "loss": 0.557, "step": 65270 }, { "epoch": 3.242276745803119, "grad_norm": 0.09521484375, "learning_rate": 0.0005406218337141155, "loss": 0.5376, "step": 65280 }, { "epoch": 3.2427734180987384, "grad_norm": 0.0927734375, "learning_rate": 0.0005405820999304659, "loss": 0.5319, "step": 65290 }, { "epoch": 3.2432700903943577, "grad_norm": 0.10400390625, "learning_rate": 0.0005405423661468164, "loss": 0.536, "step": 65300 }, { "epoch": 3.2437667626899773, "grad_norm": 0.11328125, "learning_rate": 0.0005405026323631668, "loss": 0.5568, "step": 65310 }, { "epoch": 3.2442634349855966, "grad_norm": 0.10986328125, "learning_rate": 0.0005404628985795172, "loss": 0.5463, "step": 65320 }, { "epoch": 3.2447601072812158, "grad_norm": 0.11474609375, "learning_rate": 0.0005404231647958678, "loss": 0.5409, "step": 65330 }, { "epoch": 3.245256779576835, "grad_norm": 0.14453125, "learning_rate": 0.0005403834310122182, "loss": 0.5473, "step": 65340 }, { "epoch": 3.2457534518724547, "grad_norm": 0.09765625, "learning_rate": 0.0005403436972285686, "loss": 0.5728, "step": 65350 }, { "epoch": 3.246250124168074, "grad_norm": 0.10546875, "learning_rate": 0.0005403039634449191, "loss": 0.5339, "step": 65360 }, { "epoch": 3.246746796463693, "grad_norm": 0.1455078125, "learning_rate": 0.0005402642296612694, "loss": 0.57, "step": 65370 }, { "epoch": 3.247243468759313, "grad_norm": 0.10888671875, "learning_rate": 0.00054022449587762, "loss": 0.5259, "step": 65380 }, { "epoch": 3.247740141054932, "grad_norm": 0.1240234375, "learning_rate": 0.0005401847620939705, "loss": 0.5278, "step": 65390 }, { "epoch": 3.2482368133505513, "grad_norm": 0.1064453125, "learning_rate": 0.0005401450283103208, "loss": 0.5341, "step": 65400 }, { "epoch": 3.2487334856461705, "grad_norm": 0.1015625, "learning_rate": 0.0005401052945266713, "loss": 0.5482, "step": 65410 }, { "epoch": 3.24923015794179, "grad_norm": 0.1396484375, "learning_rate": 0.0005400655607430218, "loss": 0.5626, "step": 65420 }, { "epoch": 3.2497268302374094, "grad_norm": 0.1435546875, "learning_rate": 0.0005400258269593723, "loss": 0.5739, "step": 65430 }, { "epoch": 3.2502235025330286, "grad_norm": 0.1416015625, "learning_rate": 0.0005399860931757227, "loss": 0.5266, "step": 65440 }, { "epoch": 3.250720174828648, "grad_norm": 0.158203125, "learning_rate": 0.0005399463593920731, "loss": 0.541, "step": 65450 }, { "epoch": 3.2512168471242675, "grad_norm": 0.1318359375, "learning_rate": 0.0005399066256084236, "loss": 0.5476, "step": 65460 }, { "epoch": 3.2517135194198867, "grad_norm": 0.0986328125, "learning_rate": 0.000539866891824774, "loss": 0.5393, "step": 65470 }, { "epoch": 3.252210191715506, "grad_norm": 0.125, "learning_rate": 0.0005398271580411245, "loss": 0.5299, "step": 65480 }, { "epoch": 3.2527068640111256, "grad_norm": 0.1259765625, "learning_rate": 0.000539787424257475, "loss": 0.5222, "step": 65490 }, { "epoch": 3.253203536306745, "grad_norm": 0.13671875, "learning_rate": 0.0005397476904738254, "loss": 0.544, "step": 65500 }, { "epoch": 3.253700208602364, "grad_norm": 0.1708984375, "learning_rate": 0.0005397079566901758, "loss": 0.547, "step": 65510 }, { "epoch": 3.2541968808979833, "grad_norm": 0.1630859375, "learning_rate": 0.0005396682229065263, "loss": 0.5586, "step": 65520 }, { "epoch": 3.254693553193603, "grad_norm": 0.09619140625, "learning_rate": 0.0005396284891228768, "loss": 0.55, "step": 65530 }, { "epoch": 3.255190225489222, "grad_norm": 0.1064453125, "learning_rate": 0.0005395887553392272, "loss": 0.5558, "step": 65540 }, { "epoch": 3.2556868977848414, "grad_norm": 0.185546875, "learning_rate": 0.0005395490215555777, "loss": 0.5421, "step": 65550 }, { "epoch": 3.256183570080461, "grad_norm": 0.130859375, "learning_rate": 0.0005395092877719281, "loss": 0.5712, "step": 65560 }, { "epoch": 3.2566802423760803, "grad_norm": 0.10791015625, "learning_rate": 0.0005394695539882785, "loss": 0.5159, "step": 65570 }, { "epoch": 3.2571769146716996, "grad_norm": 0.11865234375, "learning_rate": 0.0005394298202046291, "loss": 0.545, "step": 65580 }, { "epoch": 3.257673586967319, "grad_norm": 0.1376953125, "learning_rate": 0.0005393900864209795, "loss": 0.535, "step": 65590 }, { "epoch": 3.2581702592629385, "grad_norm": 0.1298828125, "learning_rate": 0.0005393503526373299, "loss": 0.5352, "step": 65600 }, { "epoch": 3.2586669315585577, "grad_norm": 0.1083984375, "learning_rate": 0.0005393106188536804, "loss": 0.5546, "step": 65610 }, { "epoch": 3.259163603854177, "grad_norm": 0.1552734375, "learning_rate": 0.0005392708850700308, "loss": 0.5228, "step": 65620 }, { "epoch": 3.2596602761497966, "grad_norm": 0.109375, "learning_rate": 0.0005392311512863813, "loss": 0.5123, "step": 65630 }, { "epoch": 3.260156948445416, "grad_norm": 0.1611328125, "learning_rate": 0.0005391914175027317, "loss": 0.5049, "step": 65640 }, { "epoch": 3.260653620741035, "grad_norm": 0.11279296875, "learning_rate": 0.0005391516837190822, "loss": 0.5455, "step": 65650 }, { "epoch": 3.2611502930366543, "grad_norm": 0.1005859375, "learning_rate": 0.0005391119499354326, "loss": 0.5342, "step": 65660 }, { "epoch": 3.261646965332274, "grad_norm": 0.171875, "learning_rate": 0.000539072216151783, "loss": 0.5565, "step": 65670 }, { "epoch": 3.262143637627893, "grad_norm": 0.11962890625, "learning_rate": 0.0005390324823681336, "loss": 0.5447, "step": 65680 }, { "epoch": 3.2626403099235124, "grad_norm": 0.130859375, "learning_rate": 0.000538992748584484, "loss": 0.5521, "step": 65690 }, { "epoch": 3.263136982219132, "grad_norm": 0.103515625, "learning_rate": 0.0005389530148008344, "loss": 0.5454, "step": 65700 }, { "epoch": 3.2636336545147513, "grad_norm": 0.1943359375, "learning_rate": 0.0005389132810171849, "loss": 0.5164, "step": 65710 }, { "epoch": 3.2641303268103705, "grad_norm": 0.09228515625, "learning_rate": 0.0005388735472335353, "loss": 0.5159, "step": 65720 }, { "epoch": 3.2646269991059897, "grad_norm": 0.10302734375, "learning_rate": 0.0005388338134498857, "loss": 0.5367, "step": 65730 }, { "epoch": 3.265123671401609, "grad_norm": 0.1748046875, "learning_rate": 0.0005387940796662363, "loss": 0.5297, "step": 65740 }, { "epoch": 3.2656203436972286, "grad_norm": 0.13671875, "learning_rate": 0.0005387543458825867, "loss": 0.5748, "step": 65750 }, { "epoch": 3.266117015992848, "grad_norm": 0.1328125, "learning_rate": 0.0005387146120989371, "loss": 0.5504, "step": 65760 }, { "epoch": 3.266613688288467, "grad_norm": 0.10302734375, "learning_rate": 0.0005386748783152876, "loss": 0.4994, "step": 65770 }, { "epoch": 3.2671103605840868, "grad_norm": 0.1845703125, "learning_rate": 0.000538635144531638, "loss": 0.5118, "step": 65780 }, { "epoch": 3.267607032879706, "grad_norm": 0.1328125, "learning_rate": 0.0005385954107479885, "loss": 0.529, "step": 65790 }, { "epoch": 3.2681037051753252, "grad_norm": 0.1318359375, "learning_rate": 0.000538555676964339, "loss": 0.5371, "step": 65800 }, { "epoch": 3.2686003774709445, "grad_norm": 0.10791015625, "learning_rate": 0.0005385159431806894, "loss": 0.5454, "step": 65810 }, { "epoch": 3.269097049766564, "grad_norm": 0.09619140625, "learning_rate": 0.0005384762093970398, "loss": 0.5322, "step": 65820 }, { "epoch": 3.2695937220621833, "grad_norm": 0.2080078125, "learning_rate": 0.0005384364756133904, "loss": 0.561, "step": 65830 }, { "epoch": 3.2700903943578026, "grad_norm": 0.1181640625, "learning_rate": 0.0005383967418297408, "loss": 0.5195, "step": 65840 }, { "epoch": 3.2705870666534222, "grad_norm": 0.146484375, "learning_rate": 0.0005383570080460912, "loss": 0.5318, "step": 65850 }, { "epoch": 3.2710837389490415, "grad_norm": 0.09765625, "learning_rate": 0.0005383172742624416, "loss": 0.5518, "step": 65860 }, { "epoch": 3.2715804112446607, "grad_norm": 0.10009765625, "learning_rate": 0.0005382775404787921, "loss": 0.5417, "step": 65870 }, { "epoch": 3.27207708354028, "grad_norm": 0.123046875, "learning_rate": 0.0005382378066951427, "loss": 0.5254, "step": 65880 }, { "epoch": 3.2725737558358996, "grad_norm": 0.1025390625, "learning_rate": 0.000538198072911493, "loss": 0.5294, "step": 65890 }, { "epoch": 3.273070428131519, "grad_norm": 0.142578125, "learning_rate": 0.0005381583391278435, "loss": 0.5571, "step": 65900 }, { "epoch": 3.273567100427138, "grad_norm": 0.138671875, "learning_rate": 0.0005381186053441939, "loss": 0.5311, "step": 65910 }, { "epoch": 3.2740637727227577, "grad_norm": 0.109375, "learning_rate": 0.0005380788715605443, "loss": 0.5349, "step": 65920 }, { "epoch": 3.274560445018377, "grad_norm": 0.1357421875, "learning_rate": 0.0005380391377768948, "loss": 0.5339, "step": 65930 }, { "epoch": 3.275057117313996, "grad_norm": 0.125, "learning_rate": 0.0005379994039932453, "loss": 0.5454, "step": 65940 }, { "epoch": 3.2755537896096154, "grad_norm": 0.09814453125, "learning_rate": 0.0005379596702095957, "loss": 0.5472, "step": 65950 }, { "epoch": 3.276050461905235, "grad_norm": 0.158203125, "learning_rate": 0.0005379199364259462, "loss": 0.5065, "step": 65960 }, { "epoch": 3.2765471342008543, "grad_norm": 0.11474609375, "learning_rate": 0.0005378802026422966, "loss": 0.5536, "step": 65970 }, { "epoch": 3.2770438064964735, "grad_norm": 0.16796875, "learning_rate": 0.000537840468858647, "loss": 0.5362, "step": 65980 }, { "epoch": 3.277540478792093, "grad_norm": 0.10498046875, "learning_rate": 0.0005378007350749976, "loss": 0.5506, "step": 65990 }, { "epoch": 3.2780371510877124, "grad_norm": 0.1259765625, "learning_rate": 0.000537761001291348, "loss": 0.5296, "step": 66000 }, { "epoch": 3.2785338233833317, "grad_norm": 0.162109375, "learning_rate": 0.0005377212675076985, "loss": 0.5581, "step": 66010 }, { "epoch": 3.279030495678951, "grad_norm": 0.10546875, "learning_rate": 0.0005376815337240489, "loss": 0.5496, "step": 66020 }, { "epoch": 3.2795271679745706, "grad_norm": 0.1044921875, "learning_rate": 0.0005376417999403993, "loss": 0.5502, "step": 66030 }, { "epoch": 3.28002384027019, "grad_norm": 0.19140625, "learning_rate": 0.0005376020661567499, "loss": 0.5442, "step": 66040 }, { "epoch": 3.280520512565809, "grad_norm": 0.10546875, "learning_rate": 0.0005375623323731002, "loss": 0.5387, "step": 66050 }, { "epoch": 3.2810171848614282, "grad_norm": 0.10986328125, "learning_rate": 0.0005375225985894507, "loss": 0.5739, "step": 66060 }, { "epoch": 3.281513857157048, "grad_norm": 0.099609375, "learning_rate": 0.0005374828648058012, "loss": 0.5438, "step": 66070 }, { "epoch": 3.282010529452667, "grad_norm": 0.10205078125, "learning_rate": 0.0005374431310221515, "loss": 0.5437, "step": 66080 }, { "epoch": 3.2825072017482864, "grad_norm": 0.111328125, "learning_rate": 0.0005374033972385021, "loss": 0.5223, "step": 66090 }, { "epoch": 3.2830038740439056, "grad_norm": 0.1279296875, "learning_rate": 0.0005373636634548526, "loss": 0.5179, "step": 66100 }, { "epoch": 3.2835005463395253, "grad_norm": 0.11767578125, "learning_rate": 0.0005373239296712029, "loss": 0.5414, "step": 66110 }, { "epoch": 3.2839972186351445, "grad_norm": 0.10302734375, "learning_rate": 0.0005372841958875534, "loss": 0.5291, "step": 66120 }, { "epoch": 3.2844938909307637, "grad_norm": 0.1435546875, "learning_rate": 0.0005372444621039038, "loss": 0.5523, "step": 66130 }, { "epoch": 3.2849905632263834, "grad_norm": 0.11083984375, "learning_rate": 0.0005372047283202543, "loss": 0.5257, "step": 66140 }, { "epoch": 3.2854872355220026, "grad_norm": 0.19921875, "learning_rate": 0.0005371649945366048, "loss": 0.5845, "step": 66150 }, { "epoch": 3.285983907817622, "grad_norm": 0.150390625, "learning_rate": 0.0005371252607529552, "loss": 0.5372, "step": 66160 }, { "epoch": 3.286480580113241, "grad_norm": 0.1396484375, "learning_rate": 0.0005370855269693057, "loss": 0.5626, "step": 66170 }, { "epoch": 3.2869772524088607, "grad_norm": 0.1201171875, "learning_rate": 0.0005370457931856561, "loss": 0.5303, "step": 66180 }, { "epoch": 3.28747392470448, "grad_norm": 0.103515625, "learning_rate": 0.0005370060594020066, "loss": 0.5337, "step": 66190 }, { "epoch": 3.287970597000099, "grad_norm": 0.1083984375, "learning_rate": 0.0005369663256183571, "loss": 0.5547, "step": 66200 }, { "epoch": 3.288467269295719, "grad_norm": 0.146484375, "learning_rate": 0.0005369265918347075, "loss": 0.5274, "step": 66210 }, { "epoch": 3.288963941591338, "grad_norm": 0.09765625, "learning_rate": 0.0005368868580510579, "loss": 0.5411, "step": 66220 }, { "epoch": 3.2894606138869573, "grad_norm": 0.10009765625, "learning_rate": 0.0005368471242674084, "loss": 0.5588, "step": 66230 }, { "epoch": 3.2899572861825765, "grad_norm": 0.11572265625, "learning_rate": 0.0005368073904837588, "loss": 0.5181, "step": 66240 }, { "epoch": 3.290453958478196, "grad_norm": 0.14453125, "learning_rate": 0.0005367676567001093, "loss": 0.5378, "step": 66250 }, { "epoch": 3.2909506307738154, "grad_norm": 0.1572265625, "learning_rate": 0.0005367279229164598, "loss": 0.5639, "step": 66260 }, { "epoch": 3.2914473030694347, "grad_norm": 0.12109375, "learning_rate": 0.0005366881891328101, "loss": 0.5596, "step": 66270 }, { "epoch": 3.2919439753650543, "grad_norm": 0.119140625, "learning_rate": 0.0005366484553491606, "loss": 0.557, "step": 66280 }, { "epoch": 3.2924406476606736, "grad_norm": 0.10302734375, "learning_rate": 0.0005366087215655112, "loss": 0.55, "step": 66290 }, { "epoch": 3.292937319956293, "grad_norm": 0.11572265625, "learning_rate": 0.0005365689877818615, "loss": 0.5637, "step": 66300 }, { "epoch": 3.293433992251912, "grad_norm": 0.134765625, "learning_rate": 0.000536529253998212, "loss": 0.5482, "step": 66310 }, { "epoch": 3.2939306645475317, "grad_norm": 0.11279296875, "learning_rate": 0.0005364895202145624, "loss": 0.5404, "step": 66320 }, { "epoch": 3.294427336843151, "grad_norm": 0.17578125, "learning_rate": 0.0005364497864309129, "loss": 0.5382, "step": 66330 }, { "epoch": 3.29492400913877, "grad_norm": 0.10791015625, "learning_rate": 0.0005364100526472634, "loss": 0.5647, "step": 66340 }, { "epoch": 3.29542068143439, "grad_norm": 0.11962890625, "learning_rate": 0.0005363703188636138, "loss": 0.5365, "step": 66350 }, { "epoch": 3.295917353730009, "grad_norm": 0.138671875, "learning_rate": 0.0005363305850799643, "loss": 0.5564, "step": 66360 }, { "epoch": 3.2964140260256283, "grad_norm": 0.11279296875, "learning_rate": 0.0005362908512963147, "loss": 0.531, "step": 66370 }, { "epoch": 3.2969106983212475, "grad_norm": 0.08984375, "learning_rate": 0.0005362511175126651, "loss": 0.5241, "step": 66380 }, { "epoch": 3.297407370616867, "grad_norm": 0.0986328125, "learning_rate": 0.0005362113837290157, "loss": 0.528, "step": 66390 }, { "epoch": 3.2979040429124864, "grad_norm": 0.1435546875, "learning_rate": 0.0005361716499453661, "loss": 0.5528, "step": 66400 }, { "epoch": 3.2984007152081056, "grad_norm": 0.125, "learning_rate": 0.0005361319161617165, "loss": 0.5735, "step": 66410 }, { "epoch": 3.298897387503725, "grad_norm": 0.10595703125, "learning_rate": 0.000536092182378067, "loss": 0.5501, "step": 66420 }, { "epoch": 3.2993940597993445, "grad_norm": 0.0986328125, "learning_rate": 0.0005360524485944174, "loss": 0.5369, "step": 66430 }, { "epoch": 3.2998907320949638, "grad_norm": 0.10546875, "learning_rate": 0.0005360127148107679, "loss": 0.5435, "step": 66440 }, { "epoch": 3.300387404390583, "grad_norm": 0.0947265625, "learning_rate": 0.0005359729810271184, "loss": 0.5274, "step": 66450 }, { "epoch": 3.300884076686202, "grad_norm": 0.1416015625, "learning_rate": 0.0005359332472434688, "loss": 0.5446, "step": 66460 }, { "epoch": 3.301380748981822, "grad_norm": 0.15625, "learning_rate": 0.0005358935134598192, "loss": 0.5579, "step": 66470 }, { "epoch": 3.301877421277441, "grad_norm": 0.11767578125, "learning_rate": 0.0005358537796761697, "loss": 0.5127, "step": 66480 }, { "epoch": 3.3023740935730603, "grad_norm": 0.10546875, "learning_rate": 0.0005358140458925202, "loss": 0.5156, "step": 66490 }, { "epoch": 3.30287076586868, "grad_norm": 0.1142578125, "learning_rate": 0.0005357743121088706, "loss": 0.5647, "step": 66500 }, { "epoch": 3.3033674381642992, "grad_norm": 0.1298828125, "learning_rate": 0.000535734578325221, "loss": 0.5426, "step": 66510 }, { "epoch": 3.3038641104599185, "grad_norm": 0.099609375, "learning_rate": 0.0005356948445415715, "loss": 0.5492, "step": 66520 }, { "epoch": 3.3043607827555377, "grad_norm": 0.12060546875, "learning_rate": 0.0005356551107579219, "loss": 0.5311, "step": 66530 }, { "epoch": 3.3048574550511574, "grad_norm": 0.1005859375, "learning_rate": 0.0005356153769742724, "loss": 0.5401, "step": 66540 }, { "epoch": 3.3053541273467766, "grad_norm": 0.1572265625, "learning_rate": 0.0005355756431906229, "loss": 0.5331, "step": 66550 }, { "epoch": 3.305850799642396, "grad_norm": 0.10302734375, "learning_rate": 0.0005355359094069733, "loss": 0.5468, "step": 66560 }, { "epoch": 3.3063474719380155, "grad_norm": 0.1162109375, "learning_rate": 0.0005354961756233237, "loss": 0.5447, "step": 66570 }, { "epoch": 3.3068441442336347, "grad_norm": 0.10595703125, "learning_rate": 0.0005354564418396742, "loss": 0.5458, "step": 66580 }, { "epoch": 3.307340816529254, "grad_norm": 0.1025390625, "learning_rate": 0.0005354167080560247, "loss": 0.5301, "step": 66590 }, { "epoch": 3.307837488824873, "grad_norm": 0.1591796875, "learning_rate": 0.0005353769742723751, "loss": 0.5473, "step": 66600 }, { "epoch": 3.308334161120493, "grad_norm": 0.08984375, "learning_rate": 0.0005353372404887256, "loss": 0.5619, "step": 66610 }, { "epoch": 3.308830833416112, "grad_norm": 0.10205078125, "learning_rate": 0.000535297506705076, "loss": 0.5796, "step": 66620 }, { "epoch": 3.3093275057117313, "grad_norm": 0.10205078125, "learning_rate": 0.0005352577729214264, "loss": 0.5296, "step": 66630 }, { "epoch": 3.309824178007351, "grad_norm": 0.1279296875, "learning_rate": 0.000535218039137777, "loss": 0.5319, "step": 66640 }, { "epoch": 3.31032085030297, "grad_norm": 0.1513671875, "learning_rate": 0.0005351783053541274, "loss": 0.5339, "step": 66650 }, { "epoch": 3.3108175225985894, "grad_norm": 0.109375, "learning_rate": 0.0005351385715704778, "loss": 0.535, "step": 66660 }, { "epoch": 3.3113141948942086, "grad_norm": 0.10595703125, "learning_rate": 0.0005350988377868283, "loss": 0.5615, "step": 66670 }, { "epoch": 3.3118108671898283, "grad_norm": 0.1376953125, "learning_rate": 0.0005350591040031787, "loss": 0.5534, "step": 66680 }, { "epoch": 3.3123075394854475, "grad_norm": 0.12060546875, "learning_rate": 0.0005350193702195292, "loss": 0.5532, "step": 66690 }, { "epoch": 3.3128042117810668, "grad_norm": 0.125, "learning_rate": 0.0005349796364358797, "loss": 0.5475, "step": 66700 }, { "epoch": 3.3133008840766864, "grad_norm": 0.126953125, "learning_rate": 0.0005349399026522301, "loss": 0.5217, "step": 66710 }, { "epoch": 3.3137975563723057, "grad_norm": 0.09423828125, "learning_rate": 0.0005349001688685805, "loss": 0.5623, "step": 66720 }, { "epoch": 3.314294228667925, "grad_norm": 0.1318359375, "learning_rate": 0.0005348604350849309, "loss": 0.5609, "step": 66730 }, { "epoch": 3.314790900963544, "grad_norm": 0.1015625, "learning_rate": 0.0005348207013012815, "loss": 0.5325, "step": 66740 }, { "epoch": 3.315287573259164, "grad_norm": 0.10009765625, "learning_rate": 0.0005347809675176319, "loss": 0.516, "step": 66750 }, { "epoch": 3.315784245554783, "grad_norm": 0.0947265625, "learning_rate": 0.0005347412337339823, "loss": 0.5524, "step": 66760 }, { "epoch": 3.3162809178504022, "grad_norm": 0.09814453125, "learning_rate": 0.0005347014999503328, "loss": 0.549, "step": 66770 }, { "epoch": 3.3167775901460215, "grad_norm": 0.099609375, "learning_rate": 0.0005346617661666832, "loss": 0.5395, "step": 66780 }, { "epoch": 3.317274262441641, "grad_norm": 0.1298828125, "learning_rate": 0.0005346220323830337, "loss": 0.5433, "step": 66790 }, { "epoch": 3.3177709347372604, "grad_norm": 0.1103515625, "learning_rate": 0.0005345822985993842, "loss": 0.526, "step": 66800 }, { "epoch": 3.3182676070328796, "grad_norm": 0.09912109375, "learning_rate": 0.0005345425648157346, "loss": 0.5638, "step": 66810 }, { "epoch": 3.318764279328499, "grad_norm": 0.162109375, "learning_rate": 0.000534502831032085, "loss": 0.5345, "step": 66820 }, { "epoch": 3.3192609516241185, "grad_norm": 0.09521484375, "learning_rate": 0.0005344630972484355, "loss": 0.5446, "step": 66830 }, { "epoch": 3.3197576239197377, "grad_norm": 0.14453125, "learning_rate": 0.000534423363464786, "loss": 0.5389, "step": 66840 }, { "epoch": 3.320254296215357, "grad_norm": 0.1044921875, "learning_rate": 0.0005343836296811364, "loss": 0.5606, "step": 66850 }, { "epoch": 3.3207509685109766, "grad_norm": 0.09716796875, "learning_rate": 0.0005343438958974869, "loss": 0.528, "step": 66860 }, { "epoch": 3.321247640806596, "grad_norm": 0.10986328125, "learning_rate": 0.0005343041621138373, "loss": 0.556, "step": 66870 }, { "epoch": 3.321744313102215, "grad_norm": 0.09228515625, "learning_rate": 0.0005342644283301877, "loss": 0.5319, "step": 66880 }, { "epoch": 3.3222409853978343, "grad_norm": 0.0966796875, "learning_rate": 0.0005342246945465383, "loss": 0.523, "step": 66890 }, { "epoch": 3.322737657693454, "grad_norm": 0.09619140625, "learning_rate": 0.0005341849607628887, "loss": 0.5343, "step": 66900 }, { "epoch": 3.323234329989073, "grad_norm": 0.1123046875, "learning_rate": 0.0005341452269792392, "loss": 0.5325, "step": 66910 }, { "epoch": 3.3237310022846924, "grad_norm": 0.138671875, "learning_rate": 0.0005341054931955895, "loss": 0.5168, "step": 66920 }, { "epoch": 3.324227674580312, "grad_norm": 0.1435546875, "learning_rate": 0.00053406575941194, "loss": 0.5139, "step": 66930 }, { "epoch": 3.3247243468759313, "grad_norm": 0.10498046875, "learning_rate": 0.0005340260256282906, "loss": 0.5396, "step": 66940 }, { "epoch": 3.3252210191715506, "grad_norm": 0.115234375, "learning_rate": 0.0005339862918446409, "loss": 0.5139, "step": 66950 }, { "epoch": 3.32571769146717, "grad_norm": 0.10595703125, "learning_rate": 0.0005339465580609914, "loss": 0.5107, "step": 66960 }, { "epoch": 3.3262143637627894, "grad_norm": 0.14453125, "learning_rate": 0.0005339068242773419, "loss": 0.5205, "step": 66970 }, { "epoch": 3.3267110360584087, "grad_norm": 0.0986328125, "learning_rate": 0.0005338670904936922, "loss": 0.5306, "step": 66980 }, { "epoch": 3.327207708354028, "grad_norm": 0.1083984375, "learning_rate": 0.0005338273567100428, "loss": 0.5516, "step": 66990 }, { "epoch": 3.3277043806496476, "grad_norm": 0.0966796875, "learning_rate": 0.0005337876229263932, "loss": 0.5457, "step": 67000 }, { "epoch": 3.328201052945267, "grad_norm": 0.150390625, "learning_rate": 0.0005337478891427436, "loss": 0.5139, "step": 67010 }, { "epoch": 3.328697725240886, "grad_norm": 0.12158203125, "learning_rate": 0.0005337081553590941, "loss": 0.5391, "step": 67020 }, { "epoch": 3.3291943975365053, "grad_norm": 0.14453125, "learning_rate": 0.0005336684215754445, "loss": 0.5683, "step": 67030 }, { "epoch": 3.329691069832125, "grad_norm": 0.1416015625, "learning_rate": 0.000533628687791795, "loss": 0.5214, "step": 67040 }, { "epoch": 3.330187742127744, "grad_norm": 0.1103515625, "learning_rate": 0.0005335889540081455, "loss": 0.5251, "step": 67050 }, { "epoch": 3.3306844144233634, "grad_norm": 0.10009765625, "learning_rate": 0.0005335492202244959, "loss": 0.5435, "step": 67060 }, { "epoch": 3.331181086718983, "grad_norm": 0.1865234375, "learning_rate": 0.0005335094864408464, "loss": 0.5237, "step": 67070 }, { "epoch": 3.3316777590146023, "grad_norm": 0.0966796875, "learning_rate": 0.0005334697526571968, "loss": 0.5006, "step": 67080 }, { "epoch": 3.3321744313102215, "grad_norm": 0.1103515625, "learning_rate": 0.0005334300188735472, "loss": 0.5426, "step": 67090 }, { "epoch": 3.3326711036058407, "grad_norm": 0.11083984375, "learning_rate": 0.0005333902850898978, "loss": 0.5197, "step": 67100 }, { "epoch": 3.3331677759014604, "grad_norm": 0.09423828125, "learning_rate": 0.0005333505513062481, "loss": 0.5393, "step": 67110 }, { "epoch": 3.3336644481970796, "grad_norm": 0.10205078125, "learning_rate": 0.0005333108175225986, "loss": 0.5338, "step": 67120 }, { "epoch": 3.334161120492699, "grad_norm": 0.13671875, "learning_rate": 0.0005332710837389491, "loss": 0.554, "step": 67130 }, { "epoch": 3.334657792788318, "grad_norm": 0.10888671875, "learning_rate": 0.0005332313499552994, "loss": 0.536, "step": 67140 }, { "epoch": 3.3351544650839378, "grad_norm": 0.10986328125, "learning_rate": 0.00053319161617165, "loss": 0.5317, "step": 67150 }, { "epoch": 3.335651137379557, "grad_norm": 0.1474609375, "learning_rate": 0.0005331518823880005, "loss": 0.5181, "step": 67160 }, { "epoch": 3.336147809675176, "grad_norm": 0.2080078125, "learning_rate": 0.0005331121486043508, "loss": 0.5273, "step": 67170 }, { "epoch": 3.3366444819707954, "grad_norm": 0.12060546875, "learning_rate": 0.0005330724148207013, "loss": 0.5372, "step": 67180 }, { "epoch": 3.337141154266415, "grad_norm": 0.10107421875, "learning_rate": 0.0005330326810370517, "loss": 0.5223, "step": 67190 }, { "epoch": 3.3376378265620343, "grad_norm": 0.1787109375, "learning_rate": 0.0005329929472534023, "loss": 0.5198, "step": 67200 }, { "epoch": 3.3381344988576536, "grad_norm": 0.11572265625, "learning_rate": 0.0005329532134697527, "loss": 0.5214, "step": 67210 }, { "epoch": 3.3386311711532732, "grad_norm": 0.11376953125, "learning_rate": 0.0005329134796861031, "loss": 0.5321, "step": 67220 }, { "epoch": 3.3391278434488925, "grad_norm": 0.12060546875, "learning_rate": 0.0005328737459024536, "loss": 0.553, "step": 67230 }, { "epoch": 3.3396245157445117, "grad_norm": 0.109375, "learning_rate": 0.000532834012118804, "loss": 0.5799, "step": 67240 }, { "epoch": 3.340121188040131, "grad_norm": 0.1416015625, "learning_rate": 0.0005327942783351545, "loss": 0.5631, "step": 67250 }, { "epoch": 3.3406178603357506, "grad_norm": 0.10009765625, "learning_rate": 0.000532754544551505, "loss": 0.5336, "step": 67260 }, { "epoch": 3.34111453263137, "grad_norm": 0.11328125, "learning_rate": 0.0005327148107678554, "loss": 0.527, "step": 67270 }, { "epoch": 3.341611204926989, "grad_norm": 0.1259765625, "learning_rate": 0.0005326750769842058, "loss": 0.5445, "step": 67280 }, { "epoch": 3.3421078772226087, "grad_norm": 0.1259765625, "learning_rate": 0.0005326353432005564, "loss": 0.5429, "step": 67290 }, { "epoch": 3.342604549518228, "grad_norm": 0.1259765625, "learning_rate": 0.0005325956094169068, "loss": 0.5717, "step": 67300 }, { "epoch": 3.343101221813847, "grad_norm": 0.1142578125, "learning_rate": 0.0005325558756332572, "loss": 0.5346, "step": 67310 }, { "epoch": 3.3435978941094664, "grad_norm": 0.11572265625, "learning_rate": 0.0005325161418496077, "loss": 0.518, "step": 67320 }, { "epoch": 3.344094566405086, "grad_norm": 0.169921875, "learning_rate": 0.000532476408065958, "loss": 0.5257, "step": 67330 }, { "epoch": 3.3445912387007053, "grad_norm": 0.10693359375, "learning_rate": 0.0005324366742823085, "loss": 0.5612, "step": 67340 }, { "epoch": 3.3450879109963245, "grad_norm": 0.12890625, "learning_rate": 0.0005323969404986591, "loss": 0.5445, "step": 67350 }, { "epoch": 3.345584583291944, "grad_norm": 0.11474609375, "learning_rate": 0.0005323572067150095, "loss": 0.5257, "step": 67360 }, { "epoch": 3.3460812555875634, "grad_norm": 0.1005859375, "learning_rate": 0.0005323174729313599, "loss": 0.5268, "step": 67370 }, { "epoch": 3.3465779278831826, "grad_norm": 0.10595703125, "learning_rate": 0.0005322777391477103, "loss": 0.5465, "step": 67380 }, { "epoch": 3.347074600178802, "grad_norm": 0.1669921875, "learning_rate": 0.0005322380053640608, "loss": 0.5473, "step": 67390 }, { "epoch": 3.3475712724744215, "grad_norm": 0.10009765625, "learning_rate": 0.0005321982715804113, "loss": 0.5397, "step": 67400 }, { "epoch": 3.3480679447700408, "grad_norm": 0.11572265625, "learning_rate": 0.0005321585377967617, "loss": 0.5467, "step": 67410 }, { "epoch": 3.34856461706566, "grad_norm": 0.1083984375, "learning_rate": 0.0005321188040131122, "loss": 0.5278, "step": 67420 }, { "epoch": 3.3490612893612797, "grad_norm": 0.107421875, "learning_rate": 0.0005320790702294626, "loss": 0.5759, "step": 67430 }, { "epoch": 3.349557961656899, "grad_norm": 0.1015625, "learning_rate": 0.000532039336445813, "loss": 0.5251, "step": 67440 }, { "epoch": 3.350054633952518, "grad_norm": 0.1552734375, "learning_rate": 0.0005319996026621636, "loss": 0.5322, "step": 67450 }, { "epoch": 3.3505513062481374, "grad_norm": 0.09326171875, "learning_rate": 0.000531959868878514, "loss": 0.5063, "step": 67460 }, { "epoch": 3.351047978543757, "grad_norm": 0.15625, "learning_rate": 0.0005319201350948644, "loss": 0.5257, "step": 67470 }, { "epoch": 3.3515446508393762, "grad_norm": 0.1953125, "learning_rate": 0.0005318804013112149, "loss": 0.5474, "step": 67480 }, { "epoch": 3.3520413231349955, "grad_norm": 0.10400390625, "learning_rate": 0.0005318406675275653, "loss": 0.5393, "step": 67490 }, { "epoch": 3.3525379954306147, "grad_norm": 0.1650390625, "learning_rate": 0.0005318009337439158, "loss": 0.5335, "step": 67500 }, { "epoch": 3.3530346677262344, "grad_norm": 0.1435546875, "learning_rate": 0.0005317611999602663, "loss": 0.5153, "step": 67510 }, { "epoch": 3.3535313400218536, "grad_norm": 0.119140625, "learning_rate": 0.0005317214661766167, "loss": 0.528, "step": 67520 }, { "epoch": 3.354028012317473, "grad_norm": 0.1279296875, "learning_rate": 0.0005316817323929671, "loss": 0.5356, "step": 67530 }, { "epoch": 3.354524684613092, "grad_norm": 0.12255859375, "learning_rate": 0.0005316419986093176, "loss": 0.5734, "step": 67540 }, { "epoch": 3.3550213569087117, "grad_norm": 0.1015625, "learning_rate": 0.0005316022648256681, "loss": 0.5431, "step": 67550 }, { "epoch": 3.355518029204331, "grad_norm": 0.1005859375, "learning_rate": 0.0005315625310420185, "loss": 0.5309, "step": 67560 }, { "epoch": 3.35601470149995, "grad_norm": 0.1376953125, "learning_rate": 0.000531522797258369, "loss": 0.5435, "step": 67570 }, { "epoch": 3.35651137379557, "grad_norm": 0.1142578125, "learning_rate": 0.0005314830634747194, "loss": 0.552, "step": 67580 }, { "epoch": 3.357008046091189, "grad_norm": 0.1337890625, "learning_rate": 0.0005314433296910698, "loss": 0.5352, "step": 67590 }, { "epoch": 3.3575047183868083, "grad_norm": 0.10302734375, "learning_rate": 0.0005314035959074203, "loss": 0.5418, "step": 67600 }, { "epoch": 3.3580013906824275, "grad_norm": 0.2080078125, "learning_rate": 0.0005313638621237708, "loss": 0.5286, "step": 67610 }, { "epoch": 3.358498062978047, "grad_norm": 0.11083984375, "learning_rate": 0.0005313241283401212, "loss": 0.5356, "step": 67620 }, { "epoch": 3.3589947352736664, "grad_norm": 0.0966796875, "learning_rate": 0.0005312843945564716, "loss": 0.5358, "step": 67630 }, { "epoch": 3.3594914075692857, "grad_norm": 0.1123046875, "learning_rate": 0.0005312446607728221, "loss": 0.5482, "step": 67640 }, { "epoch": 3.3599880798649053, "grad_norm": 0.1845703125, "learning_rate": 0.0005312049269891726, "loss": 0.5531, "step": 67650 }, { "epoch": 3.3604847521605246, "grad_norm": 0.12060546875, "learning_rate": 0.000531165193205523, "loss": 0.5543, "step": 67660 }, { "epoch": 3.360981424456144, "grad_norm": 0.1103515625, "learning_rate": 0.0005311254594218735, "loss": 0.5504, "step": 67670 }, { "epoch": 3.361478096751763, "grad_norm": 0.1337890625, "learning_rate": 0.0005310857256382239, "loss": 0.5481, "step": 67680 }, { "epoch": 3.3619747690473827, "grad_norm": 0.10693359375, "learning_rate": 0.0005310459918545743, "loss": 0.5184, "step": 67690 }, { "epoch": 3.362471441343002, "grad_norm": 0.10791015625, "learning_rate": 0.0005310062580709249, "loss": 0.5619, "step": 67700 }, { "epoch": 3.362968113638621, "grad_norm": 0.11572265625, "learning_rate": 0.0005309665242872753, "loss": 0.5395, "step": 67710 }, { "epoch": 3.363464785934241, "grad_norm": 0.10986328125, "learning_rate": 0.0005309267905036257, "loss": 0.5428, "step": 67720 }, { "epoch": 3.36396145822986, "grad_norm": 0.11181640625, "learning_rate": 0.0005308870567199762, "loss": 0.538, "step": 67730 }, { "epoch": 3.3644581305254793, "grad_norm": 0.09130859375, "learning_rate": 0.0005308473229363266, "loss": 0.5436, "step": 67740 }, { "epoch": 3.3649548028210985, "grad_norm": 0.16015625, "learning_rate": 0.0005308075891526771, "loss": 0.5553, "step": 67750 }, { "epoch": 3.365451475116718, "grad_norm": 0.09521484375, "learning_rate": 0.0005307678553690276, "loss": 0.5249, "step": 67760 }, { "epoch": 3.3659481474123374, "grad_norm": 0.11279296875, "learning_rate": 0.000530728121585378, "loss": 0.5369, "step": 67770 }, { "epoch": 3.3664448197079566, "grad_norm": 0.142578125, "learning_rate": 0.0005306883878017284, "loss": 0.5338, "step": 67780 }, { "epoch": 3.3669414920035763, "grad_norm": 0.130859375, "learning_rate": 0.0005306486540180788, "loss": 0.5266, "step": 67790 }, { "epoch": 3.3674381642991955, "grad_norm": 0.107421875, "learning_rate": 0.0005306089202344294, "loss": 0.5549, "step": 67800 }, { "epoch": 3.3679348365948147, "grad_norm": 0.1376953125, "learning_rate": 0.0005305691864507799, "loss": 0.5414, "step": 67810 }, { "epoch": 3.368431508890434, "grad_norm": 0.162109375, "learning_rate": 0.0005305294526671302, "loss": 0.5242, "step": 67820 }, { "epoch": 3.368928181186053, "grad_norm": 0.107421875, "learning_rate": 0.0005304897188834807, "loss": 0.5392, "step": 67830 }, { "epoch": 3.369424853481673, "grad_norm": 0.1484375, "learning_rate": 0.0005304499850998312, "loss": 0.5174, "step": 67840 }, { "epoch": 3.369921525777292, "grad_norm": 0.115234375, "learning_rate": 0.0005304102513161816, "loss": 0.5748, "step": 67850 }, { "epoch": 3.3704181980729113, "grad_norm": 0.1201171875, "learning_rate": 0.0005303705175325321, "loss": 0.5387, "step": 67860 }, { "epoch": 3.370914870368531, "grad_norm": 0.099609375, "learning_rate": 0.0005303307837488825, "loss": 0.5072, "step": 67870 }, { "epoch": 3.37141154266415, "grad_norm": 0.1630859375, "learning_rate": 0.0005302910499652329, "loss": 0.5303, "step": 67880 }, { "epoch": 3.3719082149597694, "grad_norm": 0.09765625, "learning_rate": 0.0005302513161815834, "loss": 0.5631, "step": 67890 }, { "epoch": 3.3724048872553887, "grad_norm": 0.09326171875, "learning_rate": 0.0005302115823979339, "loss": 0.5304, "step": 67900 }, { "epoch": 3.3729015595510083, "grad_norm": 0.169921875, "learning_rate": 0.0005301718486142843, "loss": 0.5107, "step": 67910 }, { "epoch": 3.3733982318466276, "grad_norm": 0.1025390625, "learning_rate": 0.0005301321148306348, "loss": 0.5199, "step": 67920 }, { "epoch": 3.373894904142247, "grad_norm": 0.10400390625, "learning_rate": 0.0005300923810469852, "loss": 0.5309, "step": 67930 }, { "epoch": 3.3743915764378665, "grad_norm": 0.10546875, "learning_rate": 0.0005300526472633356, "loss": 0.5479, "step": 67940 }, { "epoch": 3.3748882487334857, "grad_norm": 0.107421875, "learning_rate": 0.0005300129134796862, "loss": 0.513, "step": 67950 }, { "epoch": 3.375384921029105, "grad_norm": 0.11669921875, "learning_rate": 0.0005299731796960366, "loss": 0.5582, "step": 67960 }, { "epoch": 3.375881593324724, "grad_norm": 0.11865234375, "learning_rate": 0.0005299334459123871, "loss": 0.5594, "step": 67970 }, { "epoch": 3.376378265620344, "grad_norm": 0.09716796875, "learning_rate": 0.0005298937121287374, "loss": 0.5227, "step": 67980 }, { "epoch": 3.376874937915963, "grad_norm": 0.1318359375, "learning_rate": 0.0005298539783450879, "loss": 0.5474, "step": 67990 }, { "epoch": 3.3773716102115823, "grad_norm": 0.10205078125, "learning_rate": 0.0005298142445614385, "loss": 0.5357, "step": 68000 }, { "epoch": 3.377868282507202, "grad_norm": 0.134765625, "learning_rate": 0.0005297745107777888, "loss": 0.5372, "step": 68010 }, { "epoch": 3.378364954802821, "grad_norm": 0.09326171875, "learning_rate": 0.0005297347769941393, "loss": 0.5519, "step": 68020 }, { "epoch": 3.3788616270984404, "grad_norm": 0.126953125, "learning_rate": 0.0005296950432104898, "loss": 0.5776, "step": 68030 }, { "epoch": 3.3793582993940596, "grad_norm": 0.1630859375, "learning_rate": 0.0005296553094268401, "loss": 0.537, "step": 68040 }, { "epoch": 3.3798549716896793, "grad_norm": 0.11376953125, "learning_rate": 0.0005296155756431907, "loss": 0.5529, "step": 68050 }, { "epoch": 3.3803516439852985, "grad_norm": 0.10791015625, "learning_rate": 0.0005295758418595411, "loss": 0.5543, "step": 68060 }, { "epoch": 3.3808483162809178, "grad_norm": 0.1279296875, "learning_rate": 0.0005295361080758915, "loss": 0.5728, "step": 68070 }, { "epoch": 3.3813449885765374, "grad_norm": 0.1318359375, "learning_rate": 0.000529496374292242, "loss": 0.525, "step": 68080 }, { "epoch": 3.3818416608721567, "grad_norm": 0.10693359375, "learning_rate": 0.0005294566405085924, "loss": 0.5424, "step": 68090 }, { "epoch": 3.382338333167776, "grad_norm": 0.1259765625, "learning_rate": 0.000529416906724943, "loss": 0.5249, "step": 68100 }, { "epoch": 3.382835005463395, "grad_norm": 0.1142578125, "learning_rate": 0.0005293771729412934, "loss": 0.559, "step": 68110 }, { "epoch": 3.3833316777590148, "grad_norm": 0.10107421875, "learning_rate": 0.0005293374391576438, "loss": 0.52, "step": 68120 }, { "epoch": 3.383828350054634, "grad_norm": 0.09912109375, "learning_rate": 0.0005292977053739943, "loss": 0.5467, "step": 68130 }, { "epoch": 3.3843250223502532, "grad_norm": 0.11279296875, "learning_rate": 0.0005292579715903447, "loss": 0.5558, "step": 68140 }, { "epoch": 3.384821694645873, "grad_norm": 0.11083984375, "learning_rate": 0.0005292182378066952, "loss": 0.554, "step": 68150 }, { "epoch": 3.385318366941492, "grad_norm": 0.134765625, "learning_rate": 0.0005291785040230457, "loss": 0.5432, "step": 68160 }, { "epoch": 3.3858150392371114, "grad_norm": 0.095703125, "learning_rate": 0.0005291387702393961, "loss": 0.5391, "step": 68170 }, { "epoch": 3.3863117115327306, "grad_norm": 0.10546875, "learning_rate": 0.0005290990364557465, "loss": 0.5326, "step": 68180 }, { "epoch": 3.38680838382835, "grad_norm": 0.09765625, "learning_rate": 0.000529059302672097, "loss": 0.5463, "step": 68190 }, { "epoch": 3.3873050561239695, "grad_norm": 0.111328125, "learning_rate": 0.0005290195688884473, "loss": 0.5734, "step": 68200 }, { "epoch": 3.3878017284195887, "grad_norm": 0.1298828125, "learning_rate": 0.0005289798351047979, "loss": 0.521, "step": 68210 }, { "epoch": 3.388298400715208, "grad_norm": 0.11328125, "learning_rate": 0.0005289401013211484, "loss": 0.5451, "step": 68220 }, { "epoch": 3.3887950730108276, "grad_norm": 0.1123046875, "learning_rate": 0.0005289003675374987, "loss": 0.5402, "step": 68230 }, { "epoch": 3.389291745306447, "grad_norm": 0.1259765625, "learning_rate": 0.0005288606337538492, "loss": 0.5204, "step": 68240 }, { "epoch": 3.389788417602066, "grad_norm": 0.103515625, "learning_rate": 0.0005288208999701997, "loss": 0.5358, "step": 68250 }, { "epoch": 3.3902850898976853, "grad_norm": 0.09814453125, "learning_rate": 0.0005287811661865502, "loss": 0.5163, "step": 68260 }, { "epoch": 3.390781762193305, "grad_norm": 0.12890625, "learning_rate": 0.0005287414324029006, "loss": 0.5371, "step": 68270 }, { "epoch": 3.391278434488924, "grad_norm": 0.1044921875, "learning_rate": 0.000528701698619251, "loss": 0.5256, "step": 68280 }, { "epoch": 3.3917751067845434, "grad_norm": 0.11865234375, "learning_rate": 0.0005286619648356015, "loss": 0.5205, "step": 68290 }, { "epoch": 3.392271779080163, "grad_norm": 0.189453125, "learning_rate": 0.000528622231051952, "loss": 0.496, "step": 68300 }, { "epoch": 3.3927684513757823, "grad_norm": 0.16015625, "learning_rate": 0.0005285824972683024, "loss": 0.5303, "step": 68310 }, { "epoch": 3.3932651236714015, "grad_norm": 0.095703125, "learning_rate": 0.0005285427634846529, "loss": 0.5417, "step": 68320 }, { "epoch": 3.3937617959670208, "grad_norm": 0.109375, "learning_rate": 0.0005285030297010033, "loss": 0.5569, "step": 68330 }, { "epoch": 3.3942584682626404, "grad_norm": 0.1240234375, "learning_rate": 0.0005284632959173537, "loss": 0.5545, "step": 68340 }, { "epoch": 3.3947551405582597, "grad_norm": 0.095703125, "learning_rate": 0.0005284235621337043, "loss": 0.5759, "step": 68350 }, { "epoch": 3.395251812853879, "grad_norm": 0.0966796875, "learning_rate": 0.0005283838283500547, "loss": 0.5187, "step": 68360 }, { "epoch": 3.3957484851494986, "grad_norm": 0.1376953125, "learning_rate": 0.0005283440945664051, "loss": 0.5299, "step": 68370 }, { "epoch": 3.396245157445118, "grad_norm": 0.10302734375, "learning_rate": 0.0005283043607827556, "loss": 0.5582, "step": 68380 }, { "epoch": 3.396741829740737, "grad_norm": 0.10888671875, "learning_rate": 0.0005282646269991059, "loss": 0.5287, "step": 68390 }, { "epoch": 3.3972385020363562, "grad_norm": 0.0947265625, "learning_rate": 0.0005282248932154564, "loss": 0.5188, "step": 68400 }, { "epoch": 3.397735174331976, "grad_norm": 0.10205078125, "learning_rate": 0.000528185159431807, "loss": 0.5522, "step": 68410 }, { "epoch": 3.398231846627595, "grad_norm": 0.173828125, "learning_rate": 0.0005281454256481574, "loss": 0.5268, "step": 68420 }, { "epoch": 3.3987285189232144, "grad_norm": 0.11474609375, "learning_rate": 0.0005281056918645078, "loss": 0.5343, "step": 68430 }, { "epoch": 3.399225191218834, "grad_norm": 0.1845703125, "learning_rate": 0.0005280659580808582, "loss": 0.5571, "step": 68440 }, { "epoch": 3.3997218635144533, "grad_norm": 0.10595703125, "learning_rate": 0.0005280262242972088, "loss": 0.5443, "step": 68450 }, { "epoch": 3.4002185358100725, "grad_norm": 0.0986328125, "learning_rate": 0.0005279864905135592, "loss": 0.5339, "step": 68460 }, { "epoch": 3.4007152081056917, "grad_norm": 0.15234375, "learning_rate": 0.0005279467567299096, "loss": 0.5359, "step": 68470 }, { "epoch": 3.4012118804013114, "grad_norm": 0.150390625, "learning_rate": 0.0005279070229462601, "loss": 0.5478, "step": 68480 }, { "epoch": 3.4017085526969306, "grad_norm": 0.10595703125, "learning_rate": 0.0005278672891626105, "loss": 0.5385, "step": 68490 }, { "epoch": 3.40220522499255, "grad_norm": 0.09912109375, "learning_rate": 0.000527827555378961, "loss": 0.5467, "step": 68500 }, { "epoch": 3.402701897288169, "grad_norm": 0.0927734375, "learning_rate": 0.0005277878215953115, "loss": 0.5592, "step": 68510 }, { "epoch": 3.4031985695837887, "grad_norm": 0.10546875, "learning_rate": 0.0005277480878116619, "loss": 0.5589, "step": 68520 }, { "epoch": 3.403695241879408, "grad_norm": 0.11962890625, "learning_rate": 0.0005277083540280123, "loss": 0.5371, "step": 68530 }, { "epoch": 3.404191914175027, "grad_norm": 0.1259765625, "learning_rate": 0.0005276686202443628, "loss": 0.5365, "step": 68540 }, { "epoch": 3.4046885864706464, "grad_norm": 0.11474609375, "learning_rate": 0.0005276288864607132, "loss": 0.5139, "step": 68550 }, { "epoch": 3.405185258766266, "grad_norm": 0.1240234375, "learning_rate": 0.0005275891526770637, "loss": 0.546, "step": 68560 }, { "epoch": 3.4056819310618853, "grad_norm": 0.10986328125, "learning_rate": 0.0005275494188934142, "loss": 0.4999, "step": 68570 }, { "epoch": 3.4061786033575046, "grad_norm": 0.14453125, "learning_rate": 0.0005275096851097646, "loss": 0.5094, "step": 68580 }, { "epoch": 3.4066752756531242, "grad_norm": 0.11181640625, "learning_rate": 0.000527469951326115, "loss": 0.5602, "step": 68590 }, { "epoch": 3.4071719479487435, "grad_norm": 0.10302734375, "learning_rate": 0.0005274302175424656, "loss": 0.5446, "step": 68600 }, { "epoch": 3.4076686202443627, "grad_norm": 0.10400390625, "learning_rate": 0.000527390483758816, "loss": 0.5177, "step": 68610 }, { "epoch": 3.408165292539982, "grad_norm": 0.2216796875, "learning_rate": 0.0005273507499751664, "loss": 0.5689, "step": 68620 }, { "epoch": 3.4086619648356016, "grad_norm": 0.1015625, "learning_rate": 0.0005273110161915169, "loss": 0.5237, "step": 68630 }, { "epoch": 3.409158637131221, "grad_norm": 0.09716796875, "learning_rate": 0.0005272712824078673, "loss": 0.5226, "step": 68640 }, { "epoch": 3.40965530942684, "grad_norm": 0.09375, "learning_rate": 0.0005272315486242177, "loss": 0.5478, "step": 68650 }, { "epoch": 3.4101519817224597, "grad_norm": 0.16015625, "learning_rate": 0.0005271918148405682, "loss": 0.5422, "step": 68660 }, { "epoch": 3.410648654018079, "grad_norm": 0.130859375, "learning_rate": 0.0005271520810569187, "loss": 0.5278, "step": 68670 }, { "epoch": 3.411145326313698, "grad_norm": 0.09716796875, "learning_rate": 0.0005271123472732691, "loss": 0.5128, "step": 68680 }, { "epoch": 3.4116419986093174, "grad_norm": 0.09423828125, "learning_rate": 0.0005270726134896195, "loss": 0.5396, "step": 68690 }, { "epoch": 3.412138670904937, "grad_norm": 0.111328125, "learning_rate": 0.00052703287970597, "loss": 0.523, "step": 68700 }, { "epoch": 3.4126353432005563, "grad_norm": 0.10693359375, "learning_rate": 0.0005269931459223205, "loss": 0.5256, "step": 68710 }, { "epoch": 3.4131320154961755, "grad_norm": 0.1357421875, "learning_rate": 0.0005269534121386709, "loss": 0.5363, "step": 68720 }, { "epoch": 3.413628687791795, "grad_norm": 0.1279296875, "learning_rate": 0.0005269136783550214, "loss": 0.5413, "step": 68730 }, { "epoch": 3.4141253600874144, "grad_norm": 0.11083984375, "learning_rate": 0.0005268739445713718, "loss": 0.544, "step": 68740 }, { "epoch": 3.4146220323830336, "grad_norm": 0.134765625, "learning_rate": 0.0005268342107877222, "loss": 0.541, "step": 68750 }, { "epoch": 3.415118704678653, "grad_norm": 0.1044921875, "learning_rate": 0.0005267944770040728, "loss": 0.5474, "step": 68760 }, { "epoch": 3.4156153769742725, "grad_norm": 0.1484375, "learning_rate": 0.0005267547432204232, "loss": 0.5393, "step": 68770 }, { "epoch": 3.4161120492698918, "grad_norm": 0.103515625, "learning_rate": 0.0005267150094367736, "loss": 0.5744, "step": 68780 }, { "epoch": 3.416608721565511, "grad_norm": 0.16015625, "learning_rate": 0.0005266752756531241, "loss": 0.5633, "step": 68790 }, { "epoch": 3.4171053938611307, "grad_norm": 0.126953125, "learning_rate": 0.0005266355418694745, "loss": 0.5428, "step": 68800 }, { "epoch": 3.41760206615675, "grad_norm": 0.1171875, "learning_rate": 0.000526595808085825, "loss": 0.5468, "step": 68810 }, { "epoch": 3.418098738452369, "grad_norm": 0.10302734375, "learning_rate": 0.0005265560743021755, "loss": 0.5306, "step": 68820 }, { "epoch": 3.4185954107479883, "grad_norm": 0.10693359375, "learning_rate": 0.0005265163405185259, "loss": 0.5672, "step": 68830 }, { "epoch": 3.419092083043608, "grad_norm": 0.11083984375, "learning_rate": 0.0005264766067348763, "loss": 0.5292, "step": 68840 }, { "epoch": 3.4195887553392272, "grad_norm": 0.10986328125, "learning_rate": 0.0005264368729512267, "loss": 0.5451, "step": 68850 }, { "epoch": 3.4200854276348465, "grad_norm": 0.1630859375, "learning_rate": 0.0005263971391675773, "loss": 0.5447, "step": 68860 }, { "epoch": 3.4205820999304657, "grad_norm": 0.10693359375, "learning_rate": 0.0005263574053839278, "loss": 0.5387, "step": 68870 }, { "epoch": 3.4210787722260854, "grad_norm": 0.10205078125, "learning_rate": 0.0005263176716002781, "loss": 0.5183, "step": 68880 }, { "epoch": 3.4215754445217046, "grad_norm": 0.10693359375, "learning_rate": 0.0005262779378166286, "loss": 0.5336, "step": 68890 }, { "epoch": 3.422072116817324, "grad_norm": 0.0986328125, "learning_rate": 0.0005262382040329792, "loss": 0.5533, "step": 68900 }, { "epoch": 3.422568789112943, "grad_norm": 0.1455078125, "learning_rate": 0.0005261984702493295, "loss": 0.5835, "step": 68910 }, { "epoch": 3.4230654614085627, "grad_norm": 0.10498046875, "learning_rate": 0.00052615873646568, "loss": 0.5446, "step": 68920 }, { "epoch": 3.423562133704182, "grad_norm": 0.12060546875, "learning_rate": 0.0005261190026820304, "loss": 0.548, "step": 68930 }, { "epoch": 3.424058805999801, "grad_norm": 0.1328125, "learning_rate": 0.0005260792688983808, "loss": 0.5148, "step": 68940 }, { "epoch": 3.424555478295421, "grad_norm": 0.1630859375, "learning_rate": 0.0005260395351147313, "loss": 0.5478, "step": 68950 }, { "epoch": 3.42505215059104, "grad_norm": 0.1171875, "learning_rate": 0.0005259998013310818, "loss": 0.541, "step": 68960 }, { "epoch": 3.4255488228866593, "grad_norm": 0.1181640625, "learning_rate": 0.0005259600675474322, "loss": 0.5277, "step": 68970 }, { "epoch": 3.4260454951822785, "grad_norm": 0.107421875, "learning_rate": 0.0005259203337637827, "loss": 0.537, "step": 68980 }, { "epoch": 3.426542167477898, "grad_norm": 0.1376953125, "learning_rate": 0.0005258805999801331, "loss": 0.551, "step": 68990 }, { "epoch": 3.4270388397735174, "grad_norm": 0.11474609375, "learning_rate": 0.0005258408661964836, "loss": 0.5412, "step": 69000 }, { "epoch": 3.4275355120691366, "grad_norm": 0.09521484375, "learning_rate": 0.0005258011324128341, "loss": 0.5622, "step": 69010 }, { "epoch": 3.4280321843647563, "grad_norm": 0.1044921875, "learning_rate": 0.0005257613986291845, "loss": 0.5339, "step": 69020 }, { "epoch": 3.4285288566603755, "grad_norm": 0.1494140625, "learning_rate": 0.000525721664845535, "loss": 0.5559, "step": 69030 }, { "epoch": 3.4290255289559948, "grad_norm": 0.125, "learning_rate": 0.0005256819310618853, "loss": 0.5526, "step": 69040 }, { "epoch": 3.429522201251614, "grad_norm": 0.10888671875, "learning_rate": 0.0005256421972782358, "loss": 0.5512, "step": 69050 }, { "epoch": 3.4300188735472337, "grad_norm": 0.134765625, "learning_rate": 0.0005256024634945864, "loss": 0.5645, "step": 69060 }, { "epoch": 3.430515545842853, "grad_norm": 0.201171875, "learning_rate": 0.0005255627297109367, "loss": 0.538, "step": 69070 }, { "epoch": 3.431012218138472, "grad_norm": 0.0966796875, "learning_rate": 0.0005255229959272872, "loss": 0.5065, "step": 69080 }, { "epoch": 3.431508890434092, "grad_norm": 0.140625, "learning_rate": 0.0005254832621436377, "loss": 0.5269, "step": 69090 }, { "epoch": 3.432005562729711, "grad_norm": 0.1025390625, "learning_rate": 0.000525443528359988, "loss": 0.5125, "step": 69100 }, { "epoch": 3.4325022350253303, "grad_norm": 0.11767578125, "learning_rate": 0.0005254037945763386, "loss": 0.5389, "step": 69110 }, { "epoch": 3.4329989073209495, "grad_norm": 0.11474609375, "learning_rate": 0.000525364060792689, "loss": 0.5265, "step": 69120 }, { "epoch": 3.433495579616569, "grad_norm": 0.1435546875, "learning_rate": 0.0005253243270090394, "loss": 0.5164, "step": 69130 }, { "epoch": 3.4339922519121884, "grad_norm": 0.1484375, "learning_rate": 0.0005252845932253899, "loss": 0.5255, "step": 69140 }, { "epoch": 3.4344889242078076, "grad_norm": 0.1484375, "learning_rate": 0.0005252448594417403, "loss": 0.5521, "step": 69150 }, { "epoch": 3.4349855965034273, "grad_norm": 0.1064453125, "learning_rate": 0.0005252051256580909, "loss": 0.5428, "step": 69160 }, { "epoch": 3.4354822687990465, "grad_norm": 0.1865234375, "learning_rate": 0.0005251653918744413, "loss": 0.5408, "step": 69170 }, { "epoch": 3.4359789410946657, "grad_norm": 0.09521484375, "learning_rate": 0.0005251256580907917, "loss": 0.547, "step": 69180 }, { "epoch": 3.436475613390285, "grad_norm": 0.11669921875, "learning_rate": 0.0005250859243071422, "loss": 0.5285, "step": 69190 }, { "epoch": 3.4369722856859046, "grad_norm": 0.173828125, "learning_rate": 0.0005250461905234926, "loss": 0.5264, "step": 69200 }, { "epoch": 3.437468957981524, "grad_norm": 0.109375, "learning_rate": 0.0005250064567398431, "loss": 0.512, "step": 69210 }, { "epoch": 3.437965630277143, "grad_norm": 0.1025390625, "learning_rate": 0.0005249667229561936, "loss": 0.535, "step": 69220 }, { "epoch": 3.4384623025727623, "grad_norm": 0.126953125, "learning_rate": 0.000524926989172544, "loss": 0.5482, "step": 69230 }, { "epoch": 3.438958974868382, "grad_norm": 0.10888671875, "learning_rate": 0.0005248872553888944, "loss": 0.5437, "step": 69240 }, { "epoch": 3.439455647164001, "grad_norm": 0.09765625, "learning_rate": 0.0005248475216052449, "loss": 0.543, "step": 69250 }, { "epoch": 3.4399523194596204, "grad_norm": 0.13671875, "learning_rate": 0.0005248077878215953, "loss": 0.5419, "step": 69260 }, { "epoch": 3.4404489917552397, "grad_norm": 0.10693359375, "learning_rate": 0.0005247680540379458, "loss": 0.5374, "step": 69270 }, { "epoch": 3.4409456640508593, "grad_norm": 0.115234375, "learning_rate": 0.0005247283202542963, "loss": 0.5166, "step": 69280 }, { "epoch": 3.4414423363464786, "grad_norm": 0.095703125, "learning_rate": 0.0005246885864706466, "loss": 0.5189, "step": 69290 }, { "epoch": 3.441939008642098, "grad_norm": 0.115234375, "learning_rate": 0.0005246488526869971, "loss": 0.5521, "step": 69300 }, { "epoch": 3.4424356809377175, "grad_norm": 0.12109375, "learning_rate": 0.0005246091189033476, "loss": 0.5179, "step": 69310 }, { "epoch": 3.4429323532333367, "grad_norm": 0.10546875, "learning_rate": 0.0005245693851196981, "loss": 0.5458, "step": 69320 }, { "epoch": 3.443429025528956, "grad_norm": 0.1015625, "learning_rate": 0.0005245296513360485, "loss": 0.5593, "step": 69330 }, { "epoch": 3.443925697824575, "grad_norm": 0.2119140625, "learning_rate": 0.0005244899175523989, "loss": 0.5407, "step": 69340 }, { "epoch": 3.444422370120195, "grad_norm": 0.10595703125, "learning_rate": 0.0005244501837687494, "loss": 0.5337, "step": 69350 }, { "epoch": 3.444919042415814, "grad_norm": 0.10986328125, "learning_rate": 0.0005244104499850999, "loss": 0.5273, "step": 69360 }, { "epoch": 3.4454157147114333, "grad_norm": 0.119140625, "learning_rate": 0.0005243707162014503, "loss": 0.5145, "step": 69370 }, { "epoch": 3.445912387007053, "grad_norm": 0.11669921875, "learning_rate": 0.0005243309824178008, "loss": 0.5227, "step": 69380 }, { "epoch": 3.446409059302672, "grad_norm": 0.1142578125, "learning_rate": 0.0005242912486341512, "loss": 0.5793, "step": 69390 }, { "epoch": 3.4469057315982914, "grad_norm": 0.0986328125, "learning_rate": 0.0005242515148505016, "loss": 0.5365, "step": 69400 }, { "epoch": 3.4474024038939106, "grad_norm": 0.146484375, "learning_rate": 0.0005242117810668522, "loss": 0.5376, "step": 69410 }, { "epoch": 3.4478990761895303, "grad_norm": 0.103515625, "learning_rate": 0.0005241720472832026, "loss": 0.5581, "step": 69420 }, { "epoch": 3.4483957484851495, "grad_norm": 0.1357421875, "learning_rate": 0.000524132313499553, "loss": 0.5289, "step": 69430 }, { "epoch": 3.4488924207807687, "grad_norm": 0.1240234375, "learning_rate": 0.0005240925797159035, "loss": 0.5134, "step": 69440 }, { "epoch": 3.4493890930763884, "grad_norm": 0.1630859375, "learning_rate": 0.0005240528459322539, "loss": 0.5416, "step": 69450 }, { "epoch": 3.4498857653720076, "grad_norm": 0.1005859375, "learning_rate": 0.0005240131121486044, "loss": 0.5462, "step": 69460 }, { "epoch": 3.450382437667627, "grad_norm": 0.1318359375, "learning_rate": 0.0005239733783649549, "loss": 0.5286, "step": 69470 }, { "epoch": 3.450879109963246, "grad_norm": 0.09912109375, "learning_rate": 0.0005239336445813053, "loss": 0.5301, "step": 69480 }, { "epoch": 3.4513757822588658, "grad_norm": 0.1279296875, "learning_rate": 0.0005238939107976557, "loss": 0.5386, "step": 69490 }, { "epoch": 3.451872454554485, "grad_norm": 0.10888671875, "learning_rate": 0.0005238541770140062, "loss": 0.5514, "step": 69500 }, { "epoch": 3.452369126850104, "grad_norm": 0.1025390625, "learning_rate": 0.0005238144432303567, "loss": 0.5283, "step": 69510 }, { "epoch": 3.452865799145724, "grad_norm": 0.111328125, "learning_rate": 0.0005237747094467071, "loss": 0.5174, "step": 69520 }, { "epoch": 3.453362471441343, "grad_norm": 0.1572265625, "learning_rate": 0.0005237349756630575, "loss": 0.53, "step": 69530 }, { "epoch": 3.4538591437369623, "grad_norm": 0.12158203125, "learning_rate": 0.000523695241879408, "loss": 0.5549, "step": 69540 }, { "epoch": 3.4543558160325816, "grad_norm": 0.12255859375, "learning_rate": 0.0005236555080957584, "loss": 0.5244, "step": 69550 }, { "epoch": 3.4548524883282012, "grad_norm": 0.10986328125, "learning_rate": 0.0005236157743121089, "loss": 0.5421, "step": 69560 }, { "epoch": 3.4553491606238205, "grad_norm": 0.166015625, "learning_rate": 0.0005235760405284594, "loss": 0.5856, "step": 69570 }, { "epoch": 3.4558458329194397, "grad_norm": 0.10400390625, "learning_rate": 0.0005235363067448098, "loss": 0.5223, "step": 69580 }, { "epoch": 3.456342505215059, "grad_norm": 0.142578125, "learning_rate": 0.0005234965729611602, "loss": 0.5458, "step": 69590 }, { "epoch": 3.4568391775106786, "grad_norm": 0.1123046875, "learning_rate": 0.0005234568391775107, "loss": 0.5358, "step": 69600 }, { "epoch": 3.457335849806298, "grad_norm": 0.12353515625, "learning_rate": 0.0005234171053938612, "loss": 0.5253, "step": 69610 }, { "epoch": 3.457832522101917, "grad_norm": 0.1259765625, "learning_rate": 0.0005233773716102116, "loss": 0.5263, "step": 69620 }, { "epoch": 3.4583291943975363, "grad_norm": 0.10791015625, "learning_rate": 0.0005233376378265621, "loss": 0.5653, "step": 69630 }, { "epoch": 3.458825866693156, "grad_norm": 0.169921875, "learning_rate": 0.0005232979040429125, "loss": 0.5114, "step": 69640 }, { "epoch": 3.459322538988775, "grad_norm": 0.11767578125, "learning_rate": 0.0005232581702592629, "loss": 0.5446, "step": 69650 }, { "epoch": 3.4598192112843944, "grad_norm": 0.13671875, "learning_rate": 0.0005232184364756135, "loss": 0.5211, "step": 69660 }, { "epoch": 3.460315883580014, "grad_norm": 0.12060546875, "learning_rate": 0.0005231787026919639, "loss": 0.5337, "step": 69670 }, { "epoch": 3.4608125558756333, "grad_norm": 0.09228515625, "learning_rate": 0.0005231389689083143, "loss": 0.5658, "step": 69680 }, { "epoch": 3.4613092281712525, "grad_norm": 0.1337890625, "learning_rate": 0.0005230992351246648, "loss": 0.5337, "step": 69690 }, { "epoch": 3.4618059004668718, "grad_norm": 0.10986328125, "learning_rate": 0.0005230595013410152, "loss": 0.5631, "step": 69700 }, { "epoch": 3.4623025727624914, "grad_norm": 0.134765625, "learning_rate": 0.0005230197675573656, "loss": 0.5428, "step": 69710 }, { "epoch": 3.4627992450581107, "grad_norm": 0.18359375, "learning_rate": 0.0005229800337737161, "loss": 0.5287, "step": 69720 }, { "epoch": 3.46329591735373, "grad_norm": 0.177734375, "learning_rate": 0.0005229402999900666, "loss": 0.5648, "step": 69730 }, { "epoch": 3.4637925896493496, "grad_norm": 0.146484375, "learning_rate": 0.0005229005662064171, "loss": 0.5414, "step": 69740 }, { "epoch": 3.464289261944969, "grad_norm": 0.1240234375, "learning_rate": 0.0005228608324227674, "loss": 0.5192, "step": 69750 }, { "epoch": 3.464785934240588, "grad_norm": 0.1396484375, "learning_rate": 0.000522821098639118, "loss": 0.538, "step": 69760 }, { "epoch": 3.4652826065362072, "grad_norm": 0.1083984375, "learning_rate": 0.0005227813648554685, "loss": 0.5175, "step": 69770 }, { "epoch": 3.465779278831827, "grad_norm": 0.11376953125, "learning_rate": 0.0005227416310718188, "loss": 0.548, "step": 69780 }, { "epoch": 3.466275951127446, "grad_norm": 0.15234375, "learning_rate": 0.0005227018972881693, "loss": 0.5178, "step": 69790 }, { "epoch": 3.4667726234230654, "grad_norm": 0.11474609375, "learning_rate": 0.0005226621635045197, "loss": 0.5473, "step": 69800 }, { "epoch": 3.467269295718685, "grad_norm": 0.11767578125, "learning_rate": 0.0005226224297208701, "loss": 0.5624, "step": 69810 }, { "epoch": 3.4677659680143043, "grad_norm": 0.1298828125, "learning_rate": 0.0005225826959372207, "loss": 0.5316, "step": 69820 }, { "epoch": 3.4682626403099235, "grad_norm": 0.1181640625, "learning_rate": 0.0005225429621535711, "loss": 0.5235, "step": 69830 }, { "epoch": 3.4687593126055427, "grad_norm": 0.10107421875, "learning_rate": 0.0005225032283699215, "loss": 0.5579, "step": 69840 }, { "epoch": 3.4692559849011624, "grad_norm": 0.10302734375, "learning_rate": 0.000522463494586272, "loss": 0.525, "step": 69850 }, { "epoch": 3.4697526571967816, "grad_norm": 0.1015625, "learning_rate": 0.0005224237608026224, "loss": 0.5302, "step": 69860 }, { "epoch": 3.470249329492401, "grad_norm": 0.10205078125, "learning_rate": 0.0005223840270189729, "loss": 0.5348, "step": 69870 }, { "epoch": 3.4707460017880205, "grad_norm": 0.1201171875, "learning_rate": 0.0005223442932353234, "loss": 0.5421, "step": 69880 }, { "epoch": 3.4712426740836397, "grad_norm": 0.1630859375, "learning_rate": 0.0005223045594516738, "loss": 0.5356, "step": 69890 }, { "epoch": 3.471739346379259, "grad_norm": 0.11181640625, "learning_rate": 0.0005222648256680243, "loss": 0.5201, "step": 69900 }, { "epoch": 3.472236018674878, "grad_norm": 0.177734375, "learning_rate": 0.0005222250918843746, "loss": 0.5484, "step": 69910 }, { "epoch": 3.472732690970498, "grad_norm": 0.154296875, "learning_rate": 0.0005221853581007252, "loss": 0.5577, "step": 69920 }, { "epoch": 3.473229363266117, "grad_norm": 0.11767578125, "learning_rate": 0.0005221456243170757, "loss": 0.5364, "step": 69930 }, { "epoch": 3.4737260355617363, "grad_norm": 0.125, "learning_rate": 0.000522105890533426, "loss": 0.5182, "step": 69940 }, { "epoch": 3.4742227078573555, "grad_norm": 0.142578125, "learning_rate": 0.0005220661567497765, "loss": 0.5141, "step": 69950 }, { "epoch": 3.474719380152975, "grad_norm": 0.18359375, "learning_rate": 0.000522026422966127, "loss": 0.5548, "step": 69960 }, { "epoch": 3.4752160524485944, "grad_norm": 0.107421875, "learning_rate": 0.0005219866891824774, "loss": 0.5866, "step": 69970 }, { "epoch": 3.4757127247442137, "grad_norm": 0.1845703125, "learning_rate": 0.0005219469553988279, "loss": 0.503, "step": 69980 }, { "epoch": 3.476209397039833, "grad_norm": 0.11376953125, "learning_rate": 0.0005219072216151783, "loss": 0.5192, "step": 69990 }, { "epoch": 3.4767060693354526, "grad_norm": 0.1279296875, "learning_rate": 0.0005218674878315287, "loss": 0.5409, "step": 70000 }, { "epoch": 3.477202741631072, "grad_norm": 0.1474609375, "learning_rate": 0.0005218277540478792, "loss": 0.5133, "step": 70010 }, { "epoch": 3.477699413926691, "grad_norm": 0.11767578125, "learning_rate": 0.0005217880202642297, "loss": 0.5512, "step": 70020 }, { "epoch": 3.4781960862223107, "grad_norm": 0.10546875, "learning_rate": 0.0005217482864805801, "loss": 0.5461, "step": 70030 }, { "epoch": 3.47869275851793, "grad_norm": 0.1103515625, "learning_rate": 0.0005217085526969306, "loss": 0.5825, "step": 70040 }, { "epoch": 3.479189430813549, "grad_norm": 0.1240234375, "learning_rate": 0.000521668818913281, "loss": 0.5304, "step": 70050 }, { "epoch": 3.4796861031091684, "grad_norm": 0.1328125, "learning_rate": 0.0005216290851296316, "loss": 0.5333, "step": 70060 }, { "epoch": 3.480182775404788, "grad_norm": 0.1484375, "learning_rate": 0.000521589351345982, "loss": 0.5195, "step": 70070 }, { "epoch": 3.4806794477004073, "grad_norm": 0.1015625, "learning_rate": 0.0005215496175623324, "loss": 0.5215, "step": 70080 }, { "epoch": 3.4811761199960265, "grad_norm": 0.12060546875, "learning_rate": 0.0005215098837786829, "loss": 0.5377, "step": 70090 }, { "epoch": 3.481672792291646, "grad_norm": 0.12451171875, "learning_rate": 0.0005214701499950333, "loss": 0.5759, "step": 70100 }, { "epoch": 3.4821694645872654, "grad_norm": 0.98046875, "learning_rate": 0.0005214304162113837, "loss": 0.5493, "step": 70110 }, { "epoch": 3.4826661368828846, "grad_norm": 0.125, "learning_rate": 0.0005213906824277343, "loss": 0.5331, "step": 70120 }, { "epoch": 3.483162809178504, "grad_norm": 0.10498046875, "learning_rate": 0.0005213509486440846, "loss": 0.5143, "step": 70130 }, { "epoch": 3.4836594814741235, "grad_norm": 0.09912109375, "learning_rate": 0.0005213112148604351, "loss": 0.5208, "step": 70140 }, { "epoch": 3.4841561537697427, "grad_norm": 0.12890625, "learning_rate": 0.0005212714810767856, "loss": 0.5189, "step": 70150 }, { "epoch": 3.484652826065362, "grad_norm": 0.10400390625, "learning_rate": 0.0005212317472931359, "loss": 0.5287, "step": 70160 }, { "epoch": 3.4851494983609816, "grad_norm": 0.15625, "learning_rate": 0.0005211920135094865, "loss": 0.5186, "step": 70170 }, { "epoch": 3.485646170656601, "grad_norm": 0.1005859375, "learning_rate": 0.0005211522797258369, "loss": 0.5472, "step": 70180 }, { "epoch": 3.48614284295222, "grad_norm": 0.099609375, "learning_rate": 0.0005211125459421874, "loss": 0.5225, "step": 70190 }, { "epoch": 3.4866395152478393, "grad_norm": 0.140625, "learning_rate": 0.0005210728121585378, "loss": 0.5379, "step": 70200 }, { "epoch": 3.487136187543459, "grad_norm": 0.1044921875, "learning_rate": 0.0005210330783748882, "loss": 0.5436, "step": 70210 }, { "epoch": 3.4876328598390782, "grad_norm": 0.1240234375, "learning_rate": 0.0005209933445912388, "loss": 0.5352, "step": 70220 }, { "epoch": 3.4881295321346975, "grad_norm": 0.1259765625, "learning_rate": 0.0005209536108075892, "loss": 0.5112, "step": 70230 }, { "epoch": 3.488626204430317, "grad_norm": 0.11181640625, "learning_rate": 0.0005209138770239396, "loss": 0.5121, "step": 70240 }, { "epoch": 3.4891228767259364, "grad_norm": 0.10302734375, "learning_rate": 0.0005208741432402901, "loss": 0.5324, "step": 70250 }, { "epoch": 3.4896195490215556, "grad_norm": 0.09619140625, "learning_rate": 0.0005208344094566405, "loss": 0.5386, "step": 70260 }, { "epoch": 3.490116221317175, "grad_norm": 0.11328125, "learning_rate": 0.000520794675672991, "loss": 0.5745, "step": 70270 }, { "epoch": 3.490612893612794, "grad_norm": 0.1259765625, "learning_rate": 0.0005207549418893415, "loss": 0.5429, "step": 70280 }, { "epoch": 3.4911095659084137, "grad_norm": 0.12158203125, "learning_rate": 0.0005207152081056919, "loss": 0.567, "step": 70290 }, { "epoch": 3.491606238204033, "grad_norm": 0.1103515625, "learning_rate": 0.0005206754743220423, "loss": 0.5386, "step": 70300 }, { "epoch": 3.492102910499652, "grad_norm": 0.103515625, "learning_rate": 0.0005206357405383928, "loss": 0.5078, "step": 70310 }, { "epoch": 3.492599582795272, "grad_norm": 0.0966796875, "learning_rate": 0.0005205960067547432, "loss": 0.5088, "step": 70320 }, { "epoch": 3.493096255090891, "grad_norm": 0.09423828125, "learning_rate": 0.0005205562729710937, "loss": 0.5491, "step": 70330 }, { "epoch": 3.4935929273865103, "grad_norm": 0.10693359375, "learning_rate": 0.0005205165391874442, "loss": 0.5201, "step": 70340 }, { "epoch": 3.4940895996821295, "grad_norm": 0.099609375, "learning_rate": 0.0005204768054037946, "loss": 0.5218, "step": 70350 }, { "epoch": 3.494586271977749, "grad_norm": 0.1640625, "learning_rate": 0.000520437071620145, "loss": 0.5051, "step": 70360 }, { "epoch": 3.4950829442733684, "grad_norm": 0.10595703125, "learning_rate": 0.0005203973378364956, "loss": 0.5346, "step": 70370 }, { "epoch": 3.4955796165689876, "grad_norm": 0.107421875, "learning_rate": 0.000520357604052846, "loss": 0.5233, "step": 70380 }, { "epoch": 3.4960762888646073, "grad_norm": 0.11083984375, "learning_rate": 0.0005203178702691964, "loss": 0.526, "step": 70390 }, { "epoch": 3.4965729611602265, "grad_norm": 0.1650390625, "learning_rate": 0.0005202781364855468, "loss": 0.5111, "step": 70400 }, { "epoch": 3.4970696334558458, "grad_norm": 0.11083984375, "learning_rate": 0.0005202384027018973, "loss": 0.5377, "step": 70410 }, { "epoch": 3.497566305751465, "grad_norm": 0.16015625, "learning_rate": 0.0005201986689182478, "loss": 0.5488, "step": 70420 }, { "epoch": 3.4980629780470847, "grad_norm": 0.12255859375, "learning_rate": 0.0005201589351345982, "loss": 0.5356, "step": 70430 }, { "epoch": 3.498559650342704, "grad_norm": 0.1025390625, "learning_rate": 0.0005201192013509487, "loss": 0.5158, "step": 70440 }, { "epoch": 3.499056322638323, "grad_norm": 0.1064453125, "learning_rate": 0.0005200794675672991, "loss": 0.545, "step": 70450 }, { "epoch": 3.499552994933943, "grad_norm": 0.10888671875, "learning_rate": 0.0005200397337836495, "loss": 0.5401, "step": 70460 }, { "epoch": 3.500049667229562, "grad_norm": 0.173828125, "learning_rate": 0.0005200000000000001, "loss": 0.519, "step": 70470 }, { "epoch": 3.5005463395251812, "grad_norm": 0.1396484375, "learning_rate": 0.0005199602662163505, "loss": 0.5401, "step": 70480 }, { "epoch": 3.5010430118208005, "grad_norm": 0.1083984375, "learning_rate": 0.0005199205324327009, "loss": 0.5255, "step": 70490 }, { "epoch": 3.50153968411642, "grad_norm": 0.1259765625, "learning_rate": 0.0005198807986490514, "loss": 0.5162, "step": 70500 }, { "epoch": 3.5020363564120394, "grad_norm": 0.18359375, "learning_rate": 0.0005198410648654018, "loss": 0.5138, "step": 70510 }, { "epoch": 3.5025330287076586, "grad_norm": 0.10595703125, "learning_rate": 0.0005198013310817523, "loss": 0.5377, "step": 70520 }, { "epoch": 3.5030297010032783, "grad_norm": 0.11083984375, "learning_rate": 0.0005197615972981028, "loss": 0.534, "step": 70530 }, { "epoch": 3.5035263732988975, "grad_norm": 0.1015625, "learning_rate": 0.0005197218635144532, "loss": 0.557, "step": 70540 }, { "epoch": 3.5040230455945167, "grad_norm": 0.123046875, "learning_rate": 0.0005196821297308036, "loss": 0.5508, "step": 70550 }, { "epoch": 3.504519717890136, "grad_norm": 0.11474609375, "learning_rate": 0.0005196423959471541, "loss": 0.5263, "step": 70560 }, { "epoch": 3.505016390185755, "grad_norm": 0.11669921875, "learning_rate": 0.0005196026621635046, "loss": 0.5132, "step": 70570 }, { "epoch": 3.505513062481375, "grad_norm": 0.11083984375, "learning_rate": 0.000519562928379855, "loss": 0.5644, "step": 70580 }, { "epoch": 3.506009734776994, "grad_norm": 0.10693359375, "learning_rate": 0.0005195231945962054, "loss": 0.501, "step": 70590 }, { "epoch": 3.5065064070726137, "grad_norm": 0.10009765625, "learning_rate": 0.0005194834608125559, "loss": 0.5329, "step": 70600 }, { "epoch": 3.507003079368233, "grad_norm": 0.1064453125, "learning_rate": 0.0005194437270289063, "loss": 0.5443, "step": 70610 }, { "epoch": 3.507499751663852, "grad_norm": 0.11572265625, "learning_rate": 0.0005194039932452568, "loss": 0.5276, "step": 70620 }, { "epoch": 3.5079964239594714, "grad_norm": 0.126953125, "learning_rate": 0.0005193642594616073, "loss": 0.5405, "step": 70630 }, { "epoch": 3.5084930962550906, "grad_norm": 0.11474609375, "learning_rate": 0.0005193245256779578, "loss": 0.5529, "step": 70640 }, { "epoch": 3.5089897685507103, "grad_norm": 0.13671875, "learning_rate": 0.0005192847918943081, "loss": 0.5094, "step": 70650 }, { "epoch": 3.5094864408463295, "grad_norm": 0.1044921875, "learning_rate": 0.0005192450581106586, "loss": 0.5102, "step": 70660 }, { "epoch": 3.509983113141949, "grad_norm": 0.11572265625, "learning_rate": 0.0005192053243270091, "loss": 0.5402, "step": 70670 }, { "epoch": 3.5104797854375684, "grad_norm": 0.10986328125, "learning_rate": 0.0005191655905433595, "loss": 0.5365, "step": 70680 }, { "epoch": 3.5109764577331877, "grad_norm": 0.1328125, "learning_rate": 0.00051912585675971, "loss": 0.554, "step": 70690 }, { "epoch": 3.511473130028807, "grad_norm": 0.10498046875, "learning_rate": 0.0005190861229760604, "loss": 0.5333, "step": 70700 }, { "epoch": 3.511969802324426, "grad_norm": 0.107421875, "learning_rate": 0.0005190463891924108, "loss": 0.5351, "step": 70710 }, { "epoch": 3.512466474620046, "grad_norm": 0.1005859375, "learning_rate": 0.0005190066554087614, "loss": 0.5524, "step": 70720 }, { "epoch": 3.512963146915665, "grad_norm": 0.1025390625, "learning_rate": 0.0005189669216251118, "loss": 0.5221, "step": 70730 }, { "epoch": 3.5134598192112843, "grad_norm": 0.11328125, "learning_rate": 0.0005189271878414622, "loss": 0.5439, "step": 70740 }, { "epoch": 3.513956491506904, "grad_norm": 0.11328125, "learning_rate": 0.0005188874540578127, "loss": 0.534, "step": 70750 }, { "epoch": 3.514453163802523, "grad_norm": 0.1337890625, "learning_rate": 0.0005188477202741631, "loss": 0.5449, "step": 70760 }, { "epoch": 3.5149498360981424, "grad_norm": 0.09619140625, "learning_rate": 0.0005188079864905136, "loss": 0.5213, "step": 70770 }, { "epoch": 3.5154465083937616, "grad_norm": 0.1123046875, "learning_rate": 0.000518768252706864, "loss": 0.5238, "step": 70780 }, { "epoch": 3.5159431806893813, "grad_norm": 0.1533203125, "learning_rate": 0.0005187285189232145, "loss": 0.5503, "step": 70790 }, { "epoch": 3.5164398529850005, "grad_norm": 0.1572265625, "learning_rate": 0.000518688785139565, "loss": 0.5157, "step": 70800 }, { "epoch": 3.5169365252806197, "grad_norm": 0.10595703125, "learning_rate": 0.0005186490513559153, "loss": 0.5357, "step": 70810 }, { "epoch": 3.5174331975762394, "grad_norm": 0.140625, "learning_rate": 0.0005186093175722659, "loss": 0.5286, "step": 70820 }, { "epoch": 3.5179298698718586, "grad_norm": 0.1025390625, "learning_rate": 0.0005185695837886164, "loss": 0.556, "step": 70830 }, { "epoch": 3.518426542167478, "grad_norm": 0.154296875, "learning_rate": 0.0005185298500049667, "loss": 0.5277, "step": 70840 }, { "epoch": 3.518923214463097, "grad_norm": 0.1083984375, "learning_rate": 0.0005184901162213172, "loss": 0.5423, "step": 70850 }, { "epoch": 3.5194198867587168, "grad_norm": 0.10400390625, "learning_rate": 0.0005184503824376676, "loss": 0.5051, "step": 70860 }, { "epoch": 3.519916559054336, "grad_norm": 0.1962890625, "learning_rate": 0.000518410648654018, "loss": 0.5264, "step": 70870 }, { "epoch": 3.520413231349955, "grad_norm": 0.1240234375, "learning_rate": 0.0005183709148703686, "loss": 0.5475, "step": 70880 }, { "epoch": 3.520909903645575, "grad_norm": 0.12890625, "learning_rate": 0.000518331181086719, "loss": 0.5207, "step": 70890 }, { "epoch": 3.521406575941194, "grad_norm": 0.1630859375, "learning_rate": 0.0005182914473030694, "loss": 0.532, "step": 70900 }, { "epoch": 3.5219032482368133, "grad_norm": 0.09130859375, "learning_rate": 0.0005182517135194199, "loss": 0.5313, "step": 70910 }, { "epoch": 3.5223999205324326, "grad_norm": 0.1005859375, "learning_rate": 0.0005182119797357704, "loss": 0.5218, "step": 70920 }, { "epoch": 3.522896592828052, "grad_norm": 0.08935546875, "learning_rate": 0.0005181722459521208, "loss": 0.5331, "step": 70930 }, { "epoch": 3.5233932651236715, "grad_norm": 0.09912109375, "learning_rate": 0.0005181325121684713, "loss": 0.5286, "step": 70940 }, { "epoch": 3.5238899374192907, "grad_norm": 0.1044921875, "learning_rate": 0.0005180927783848217, "loss": 0.534, "step": 70950 }, { "epoch": 3.5243866097149104, "grad_norm": 0.1220703125, "learning_rate": 0.0005180530446011722, "loss": 0.5566, "step": 70960 }, { "epoch": 3.5248832820105296, "grad_norm": 0.10107421875, "learning_rate": 0.0005180133108175227, "loss": 0.5477, "step": 70970 }, { "epoch": 3.525379954306149, "grad_norm": 0.10009765625, "learning_rate": 0.0005179735770338731, "loss": 0.512, "step": 70980 }, { "epoch": 3.525876626601768, "grad_norm": 0.0986328125, "learning_rate": 0.0005179338432502236, "loss": 0.5373, "step": 70990 }, { "epoch": 3.5263732988973873, "grad_norm": 0.1259765625, "learning_rate": 0.0005178941094665739, "loss": 0.5523, "step": 71000 }, { "epoch": 3.526869971193007, "grad_norm": 0.11669921875, "learning_rate": 0.0005178543756829244, "loss": 0.5356, "step": 71010 }, { "epoch": 3.527366643488626, "grad_norm": 0.1064453125, "learning_rate": 0.000517814641899275, "loss": 0.4895, "step": 71020 }, { "epoch": 3.5278633157842454, "grad_norm": 0.11572265625, "learning_rate": 0.0005177749081156253, "loss": 0.5625, "step": 71030 }, { "epoch": 3.528359988079865, "grad_norm": 0.1416015625, "learning_rate": 0.0005177351743319758, "loss": 0.5389, "step": 71040 }, { "epoch": 3.5288566603754843, "grad_norm": 0.177734375, "learning_rate": 0.0005176954405483262, "loss": 0.5461, "step": 71050 }, { "epoch": 3.5293533326711035, "grad_norm": 0.1005859375, "learning_rate": 0.0005176557067646766, "loss": 0.5489, "step": 71060 }, { "epoch": 3.5298500049667227, "grad_norm": 0.109375, "learning_rate": 0.0005176159729810272, "loss": 0.5162, "step": 71070 }, { "epoch": 3.5303466772623424, "grad_norm": 0.109375, "learning_rate": 0.0005175762391973776, "loss": 0.5644, "step": 71080 }, { "epoch": 3.5308433495579616, "grad_norm": 0.1103515625, "learning_rate": 0.0005175365054137281, "loss": 0.5453, "step": 71090 }, { "epoch": 3.531340021853581, "grad_norm": 0.12451171875, "learning_rate": 0.0005174967716300785, "loss": 0.5447, "step": 71100 }, { "epoch": 3.5318366941492005, "grad_norm": 0.134765625, "learning_rate": 0.0005174570378464289, "loss": 0.5287, "step": 71110 }, { "epoch": 3.5323333664448198, "grad_norm": 0.0908203125, "learning_rate": 0.0005174173040627795, "loss": 0.5363, "step": 71120 }, { "epoch": 3.532830038740439, "grad_norm": 0.103515625, "learning_rate": 0.0005173775702791299, "loss": 0.5309, "step": 71130 }, { "epoch": 3.533326711036058, "grad_norm": 0.10595703125, "learning_rate": 0.0005173378364954803, "loss": 0.5407, "step": 71140 }, { "epoch": 3.533823383331678, "grad_norm": 0.1572265625, "learning_rate": 0.0005172981027118308, "loss": 0.5334, "step": 71150 }, { "epoch": 3.534320055627297, "grad_norm": 0.09814453125, "learning_rate": 0.0005172583689281812, "loss": 0.5472, "step": 71160 }, { "epoch": 3.5348167279229163, "grad_norm": 0.10107421875, "learning_rate": 0.0005172186351445316, "loss": 0.5391, "step": 71170 }, { "epoch": 3.535313400218536, "grad_norm": 0.1455078125, "learning_rate": 0.0005171789013608822, "loss": 0.5256, "step": 71180 }, { "epoch": 3.5358100725141552, "grad_norm": 0.09765625, "learning_rate": 0.0005171391675772325, "loss": 0.5316, "step": 71190 }, { "epoch": 3.5363067448097745, "grad_norm": 0.11376953125, "learning_rate": 0.000517099433793583, "loss": 0.5215, "step": 71200 }, { "epoch": 3.5368034171053937, "grad_norm": 0.11669921875, "learning_rate": 0.0005170597000099335, "loss": 0.5602, "step": 71210 }, { "epoch": 3.5373000894010134, "grad_norm": 0.115234375, "learning_rate": 0.0005170199662262838, "loss": 0.5639, "step": 71220 }, { "epoch": 3.5377967616966326, "grad_norm": 0.1357421875, "learning_rate": 0.0005169802324426344, "loss": 0.5334, "step": 71230 }, { "epoch": 3.538293433992252, "grad_norm": 0.11962890625, "learning_rate": 0.0005169404986589849, "loss": 0.5257, "step": 71240 }, { "epoch": 3.5387901062878715, "grad_norm": 0.130859375, "learning_rate": 0.0005169007648753353, "loss": 0.5557, "step": 71250 }, { "epoch": 3.5392867785834907, "grad_norm": 0.1123046875, "learning_rate": 0.0005168610310916857, "loss": 0.526, "step": 71260 }, { "epoch": 3.53978345087911, "grad_norm": 0.12060546875, "learning_rate": 0.0005168212973080361, "loss": 0.5291, "step": 71270 }, { "epoch": 3.540280123174729, "grad_norm": 0.1435546875, "learning_rate": 0.0005167815635243867, "loss": 0.5306, "step": 71280 }, { "epoch": 3.5407767954703484, "grad_norm": 0.14453125, "learning_rate": 0.0005167418297407371, "loss": 0.5884, "step": 71290 }, { "epoch": 3.541273467765968, "grad_norm": 0.09765625, "learning_rate": 0.0005167020959570875, "loss": 0.54, "step": 71300 }, { "epoch": 3.5417701400615873, "grad_norm": 0.10595703125, "learning_rate": 0.000516662362173438, "loss": 0.5544, "step": 71310 }, { "epoch": 3.542266812357207, "grad_norm": 0.09765625, "learning_rate": 0.0005166226283897884, "loss": 0.5319, "step": 71320 }, { "epoch": 3.542763484652826, "grad_norm": 0.1240234375, "learning_rate": 0.0005165828946061389, "loss": 0.5144, "step": 71330 }, { "epoch": 3.5432601569484454, "grad_norm": 0.1015625, "learning_rate": 0.0005165431608224894, "loss": 0.5351, "step": 71340 }, { "epoch": 3.5437568292440647, "grad_norm": 0.1015625, "learning_rate": 0.0005165034270388398, "loss": 0.528, "step": 71350 }, { "epoch": 3.544253501539684, "grad_norm": 0.09912109375, "learning_rate": 0.0005164636932551902, "loss": 0.5571, "step": 71360 }, { "epoch": 3.5447501738353036, "grad_norm": 0.10498046875, "learning_rate": 0.0005164239594715408, "loss": 0.5561, "step": 71370 }, { "epoch": 3.545246846130923, "grad_norm": 0.1533203125, "learning_rate": 0.0005163842256878911, "loss": 0.5146, "step": 71380 }, { "epoch": 3.545743518426542, "grad_norm": 0.115234375, "learning_rate": 0.0005163444919042416, "loss": 0.5854, "step": 71390 }, { "epoch": 3.5462401907221617, "grad_norm": 0.10107421875, "learning_rate": 0.0005163047581205921, "loss": 0.5365, "step": 71400 }, { "epoch": 3.546736863017781, "grad_norm": 0.11083984375, "learning_rate": 0.0005162650243369425, "loss": 0.5505, "step": 71410 }, { "epoch": 3.5472335353134, "grad_norm": 0.09619140625, "learning_rate": 0.0005162252905532929, "loss": 0.5291, "step": 71420 }, { "epoch": 3.5477302076090194, "grad_norm": 0.11474609375, "learning_rate": 0.0005161855567696435, "loss": 0.5225, "step": 71430 }, { "epoch": 3.548226879904639, "grad_norm": 0.1201171875, "learning_rate": 0.0005161458229859939, "loss": 0.5633, "step": 71440 }, { "epoch": 3.5487235522002583, "grad_norm": 0.1123046875, "learning_rate": 0.0005161060892023443, "loss": 0.5341, "step": 71450 }, { "epoch": 3.5492202244958775, "grad_norm": 0.1337890625, "learning_rate": 0.0005160663554186947, "loss": 0.5525, "step": 71460 }, { "epoch": 3.549716896791497, "grad_norm": 0.1103515625, "learning_rate": 0.0005160266216350452, "loss": 0.5279, "step": 71470 }, { "epoch": 3.5502135690871164, "grad_norm": 0.1298828125, "learning_rate": 0.0005159868878513957, "loss": 0.5233, "step": 71480 }, { "epoch": 3.5507102413827356, "grad_norm": 0.11376953125, "learning_rate": 0.0005159471540677461, "loss": 0.5602, "step": 71490 }, { "epoch": 3.551206913678355, "grad_norm": 0.177734375, "learning_rate": 0.0005159074202840966, "loss": 0.5283, "step": 71500 }, { "epoch": 3.5517035859739745, "grad_norm": 0.1376953125, "learning_rate": 0.000515867686500447, "loss": 0.5276, "step": 71510 }, { "epoch": 3.5522002582695937, "grad_norm": 0.09521484375, "learning_rate": 0.0005158279527167974, "loss": 0.5337, "step": 71520 }, { "epoch": 3.552696930565213, "grad_norm": 0.123046875, "learning_rate": 0.000515788218933148, "loss": 0.5452, "step": 71530 }, { "epoch": 3.5531936028608326, "grad_norm": 0.1669921875, "learning_rate": 0.0005157484851494984, "loss": 0.5402, "step": 71540 }, { "epoch": 3.553690275156452, "grad_norm": 0.1025390625, "learning_rate": 0.0005157087513658488, "loss": 0.5368, "step": 71550 }, { "epoch": 3.554186947452071, "grad_norm": 0.11181640625, "learning_rate": 0.0005156690175821993, "loss": 0.5163, "step": 71560 }, { "epoch": 3.5546836197476903, "grad_norm": 0.11328125, "learning_rate": 0.0005156292837985497, "loss": 0.5423, "step": 71570 }, { "epoch": 3.55518029204331, "grad_norm": 0.11083984375, "learning_rate": 0.0005155895500149002, "loss": 0.5379, "step": 71580 }, { "epoch": 3.555676964338929, "grad_norm": 0.1318359375, "learning_rate": 0.0005155498162312507, "loss": 0.5347, "step": 71590 }, { "epoch": 3.5561736366345484, "grad_norm": 0.10107421875, "learning_rate": 0.0005155100824476011, "loss": 0.5431, "step": 71600 }, { "epoch": 3.556670308930168, "grad_norm": 0.1298828125, "learning_rate": 0.0005154703486639515, "loss": 0.5104, "step": 71610 }, { "epoch": 3.5571669812257873, "grad_norm": 0.1640625, "learning_rate": 0.000515430614880302, "loss": 0.5474, "step": 71620 }, { "epoch": 3.5576636535214066, "grad_norm": 0.1064453125, "learning_rate": 0.0005153908810966525, "loss": 0.552, "step": 71630 }, { "epoch": 3.558160325817026, "grad_norm": 0.1259765625, "learning_rate": 0.0005153511473130029, "loss": 0.534, "step": 71640 }, { "epoch": 3.558656998112645, "grad_norm": 0.11279296875, "learning_rate": 0.0005153114135293533, "loss": 0.5133, "step": 71650 }, { "epoch": 3.5591536704082647, "grad_norm": 0.146484375, "learning_rate": 0.0005152716797457038, "loss": 0.5526, "step": 71660 }, { "epoch": 3.559650342703884, "grad_norm": 0.11767578125, "learning_rate": 0.0005152319459620542, "loss": 0.5265, "step": 71670 }, { "epoch": 3.5601470149995036, "grad_norm": 0.107421875, "learning_rate": 0.0005151922121784047, "loss": 0.5408, "step": 71680 }, { "epoch": 3.560643687295123, "grad_norm": 0.12109375, "learning_rate": 0.0005151524783947552, "loss": 0.5153, "step": 71690 }, { "epoch": 3.561140359590742, "grad_norm": 0.1142578125, "learning_rate": 0.0005151127446111057, "loss": 0.5274, "step": 71700 }, { "epoch": 3.5616370318863613, "grad_norm": 0.09619140625, "learning_rate": 0.000515073010827456, "loss": 0.5479, "step": 71710 }, { "epoch": 3.5621337041819805, "grad_norm": 0.146484375, "learning_rate": 0.0005150332770438065, "loss": 0.5359, "step": 71720 }, { "epoch": 3.5626303764776, "grad_norm": 0.1005859375, "learning_rate": 0.000514993543260157, "loss": 0.5266, "step": 71730 }, { "epoch": 3.5631270487732194, "grad_norm": 0.1279296875, "learning_rate": 0.0005149538094765074, "loss": 0.5389, "step": 71740 }, { "epoch": 3.5636237210688386, "grad_norm": 0.09912109375, "learning_rate": 0.0005149140756928579, "loss": 0.5269, "step": 71750 }, { "epoch": 3.5641203933644583, "grad_norm": 0.11865234375, "learning_rate": 0.0005148743419092083, "loss": 0.5298, "step": 71760 }, { "epoch": 3.5646170656600775, "grad_norm": 0.1416015625, "learning_rate": 0.0005148346081255587, "loss": 0.5324, "step": 71770 }, { "epoch": 3.5651137379556967, "grad_norm": 0.09130859375, "learning_rate": 0.0005147948743419093, "loss": 0.5094, "step": 71780 }, { "epoch": 3.565610410251316, "grad_norm": 0.12158203125, "learning_rate": 0.0005147551405582597, "loss": 0.541, "step": 71790 }, { "epoch": 3.5661070825469356, "grad_norm": 0.1591796875, "learning_rate": 0.0005147154067746101, "loss": 0.5141, "step": 71800 }, { "epoch": 3.566603754842555, "grad_norm": 0.1474609375, "learning_rate": 0.0005146756729909606, "loss": 0.5217, "step": 71810 }, { "epoch": 3.567100427138174, "grad_norm": 0.1669921875, "learning_rate": 0.000514635939207311, "loss": 0.5366, "step": 71820 }, { "epoch": 3.5675970994337938, "grad_norm": 0.130859375, "learning_rate": 0.0005145962054236616, "loss": 0.542, "step": 71830 }, { "epoch": 3.568093771729413, "grad_norm": 0.115234375, "learning_rate": 0.0005145564716400119, "loss": 0.5197, "step": 71840 }, { "epoch": 3.5685904440250322, "grad_norm": 0.169921875, "learning_rate": 0.0005145167378563624, "loss": 0.5421, "step": 71850 }, { "epoch": 3.5690871163206515, "grad_norm": 0.11962890625, "learning_rate": 0.0005144770040727129, "loss": 0.5638, "step": 71860 }, { "epoch": 3.569583788616271, "grad_norm": 0.11083984375, "learning_rate": 0.0005144372702890632, "loss": 0.501, "step": 71870 }, { "epoch": 3.5700804609118904, "grad_norm": 0.1318359375, "learning_rate": 0.0005143975365054138, "loss": 0.5251, "step": 71880 }, { "epoch": 3.5705771332075096, "grad_norm": 0.1123046875, "learning_rate": 0.0005143578027217643, "loss": 0.5545, "step": 71890 }, { "epoch": 3.5710738055031293, "grad_norm": 0.11328125, "learning_rate": 0.0005143180689381146, "loss": 0.5231, "step": 71900 }, { "epoch": 3.5715704777987485, "grad_norm": 0.1025390625, "learning_rate": 0.0005142783351544651, "loss": 0.5064, "step": 71910 }, { "epoch": 3.5720671500943677, "grad_norm": 0.11376953125, "learning_rate": 0.0005142386013708155, "loss": 0.5371, "step": 71920 }, { "epoch": 3.572563822389987, "grad_norm": 0.1328125, "learning_rate": 0.000514198867587166, "loss": 0.5559, "step": 71930 }, { "epoch": 3.5730604946856066, "grad_norm": 0.142578125, "learning_rate": 0.0005141591338035165, "loss": 0.5532, "step": 71940 }, { "epoch": 3.573557166981226, "grad_norm": 0.09521484375, "learning_rate": 0.0005141194000198669, "loss": 0.5337, "step": 71950 }, { "epoch": 3.574053839276845, "grad_norm": 0.12158203125, "learning_rate": 0.0005140796662362173, "loss": 0.5438, "step": 71960 }, { "epoch": 3.5745505115724647, "grad_norm": 0.09716796875, "learning_rate": 0.0005140399324525678, "loss": 0.507, "step": 71970 }, { "epoch": 3.575047183868084, "grad_norm": 0.0986328125, "learning_rate": 0.0005140001986689183, "loss": 0.5545, "step": 71980 }, { "epoch": 3.575543856163703, "grad_norm": 0.1455078125, "learning_rate": 0.0005139604648852688, "loss": 0.5334, "step": 71990 }, { "epoch": 3.5760405284593224, "grad_norm": 0.11572265625, "learning_rate": 0.0005139207311016192, "loss": 0.5445, "step": 72000 }, { "epoch": 3.5765372007549416, "grad_norm": 0.12060546875, "learning_rate": 0.0005138809973179696, "loss": 0.5251, "step": 72010 }, { "epoch": 3.5770338730505613, "grad_norm": 0.1669921875, "learning_rate": 0.0005138412635343201, "loss": 0.5447, "step": 72020 }, { "epoch": 3.5775305453461805, "grad_norm": 0.09765625, "learning_rate": 0.0005138015297506706, "loss": 0.5429, "step": 72030 }, { "epoch": 3.5780272176418, "grad_norm": 0.111328125, "learning_rate": 0.000513761795967021, "loss": 0.5255, "step": 72040 }, { "epoch": 3.5785238899374194, "grad_norm": 0.1259765625, "learning_rate": 0.0005137220621833715, "loss": 0.5466, "step": 72050 }, { "epoch": 3.5790205622330387, "grad_norm": 0.10400390625, "learning_rate": 0.0005136823283997218, "loss": 0.5773, "step": 72060 }, { "epoch": 3.579517234528658, "grad_norm": 0.10009765625, "learning_rate": 0.0005136425946160723, "loss": 0.5379, "step": 72070 }, { "epoch": 3.580013906824277, "grad_norm": 0.111328125, "learning_rate": 0.0005136028608324229, "loss": 0.5583, "step": 72080 }, { "epoch": 3.580510579119897, "grad_norm": 0.1279296875, "learning_rate": 0.0005135631270487732, "loss": 0.5559, "step": 72090 }, { "epoch": 3.581007251415516, "grad_norm": 0.11572265625, "learning_rate": 0.0005135233932651237, "loss": 0.5188, "step": 72100 }, { "epoch": 3.5815039237111352, "grad_norm": 0.099609375, "learning_rate": 0.0005134836594814741, "loss": 0.5299, "step": 72110 }, { "epoch": 3.582000596006755, "grad_norm": 0.107421875, "learning_rate": 0.0005134439256978245, "loss": 0.522, "step": 72120 }, { "epoch": 3.582497268302374, "grad_norm": 0.1259765625, "learning_rate": 0.0005134041919141751, "loss": 0.5496, "step": 72130 }, { "epoch": 3.5829939405979934, "grad_norm": 0.11083984375, "learning_rate": 0.0005133644581305255, "loss": 0.5553, "step": 72140 }, { "epoch": 3.5834906128936126, "grad_norm": 0.1337890625, "learning_rate": 0.000513324724346876, "loss": 0.5767, "step": 72150 }, { "epoch": 3.5839872851892323, "grad_norm": 0.10400390625, "learning_rate": 0.0005132849905632264, "loss": 0.5259, "step": 72160 }, { "epoch": 3.5844839574848515, "grad_norm": 0.10009765625, "learning_rate": 0.0005132452567795768, "loss": 0.546, "step": 72170 }, { "epoch": 3.5849806297804707, "grad_norm": 0.1474609375, "learning_rate": 0.0005132055229959274, "loss": 0.5524, "step": 72180 }, { "epoch": 3.5854773020760904, "grad_norm": 0.146484375, "learning_rate": 0.0005131657892122778, "loss": 0.5292, "step": 72190 }, { "epoch": 3.5859739743717096, "grad_norm": 0.1083984375, "learning_rate": 0.0005131260554286282, "loss": 0.5343, "step": 72200 }, { "epoch": 3.586470646667329, "grad_norm": 0.12158203125, "learning_rate": 0.0005130863216449787, "loss": 0.5208, "step": 72210 }, { "epoch": 3.586967318962948, "grad_norm": 0.09521484375, "learning_rate": 0.0005130465878613291, "loss": 0.5311, "step": 72220 }, { "epoch": 3.5874639912585677, "grad_norm": 0.203125, "learning_rate": 0.0005130068540776796, "loss": 0.5472, "step": 72230 }, { "epoch": 3.587960663554187, "grad_norm": 0.1044921875, "learning_rate": 0.0005129671202940301, "loss": 0.5617, "step": 72240 }, { "epoch": 3.588457335849806, "grad_norm": 0.1123046875, "learning_rate": 0.0005129273865103804, "loss": 0.5349, "step": 72250 }, { "epoch": 3.588954008145426, "grad_norm": 0.099609375, "learning_rate": 0.0005128876527267309, "loss": 0.5385, "step": 72260 }, { "epoch": 3.589450680441045, "grad_norm": 0.09619140625, "learning_rate": 0.0005128479189430814, "loss": 0.5568, "step": 72270 }, { "epoch": 3.5899473527366643, "grad_norm": 0.115234375, "learning_rate": 0.0005128081851594319, "loss": 0.5277, "step": 72280 }, { "epoch": 3.5904440250322835, "grad_norm": 0.103515625, "learning_rate": 0.0005127684513757823, "loss": 0.5236, "step": 72290 }, { "epoch": 3.5909406973279028, "grad_norm": 0.12890625, "learning_rate": 0.0005127287175921328, "loss": 0.5263, "step": 72300 }, { "epoch": 3.5914373696235224, "grad_norm": 0.099609375, "learning_rate": 0.0005126889838084832, "loss": 0.5068, "step": 72310 }, { "epoch": 3.5919340419191417, "grad_norm": 0.1513671875, "learning_rate": 0.0005126492500248336, "loss": 0.5226, "step": 72320 }, { "epoch": 3.5924307142147613, "grad_norm": 0.1162109375, "learning_rate": 0.000512609516241184, "loss": 0.526, "step": 72330 }, { "epoch": 3.5929273865103806, "grad_norm": 0.0986328125, "learning_rate": 0.0005125697824575346, "loss": 0.5258, "step": 72340 }, { "epoch": 3.593424058806, "grad_norm": 0.14453125, "learning_rate": 0.000512530048673885, "loss": 0.5363, "step": 72350 }, { "epoch": 3.593920731101619, "grad_norm": 0.10986328125, "learning_rate": 0.0005124903148902354, "loss": 0.5559, "step": 72360 }, { "epoch": 3.5944174033972383, "grad_norm": 0.1259765625, "learning_rate": 0.0005124505811065859, "loss": 0.5435, "step": 72370 }, { "epoch": 3.594914075692858, "grad_norm": 0.1552734375, "learning_rate": 0.0005124108473229364, "loss": 0.5365, "step": 72380 }, { "epoch": 3.595410747988477, "grad_norm": 0.12158203125, "learning_rate": 0.0005123711135392868, "loss": 0.5493, "step": 72390 }, { "epoch": 3.595907420284097, "grad_norm": 0.0986328125, "learning_rate": 0.0005123313797556373, "loss": 0.4943, "step": 72400 }, { "epoch": 3.596404092579716, "grad_norm": 0.1337890625, "learning_rate": 0.0005122916459719877, "loss": 0.5471, "step": 72410 }, { "epoch": 3.5969007648753353, "grad_norm": 0.12060546875, "learning_rate": 0.0005122519121883381, "loss": 0.5154, "step": 72420 }, { "epoch": 3.5973974371709545, "grad_norm": 0.1396484375, "learning_rate": 0.0005122121784046887, "loss": 0.5237, "step": 72430 }, { "epoch": 3.5978941094665737, "grad_norm": 0.099609375, "learning_rate": 0.0005121724446210391, "loss": 0.5129, "step": 72440 }, { "epoch": 3.5983907817621934, "grad_norm": 0.1396484375, "learning_rate": 0.0005121327108373895, "loss": 0.5398, "step": 72450 }, { "epoch": 3.5988874540578126, "grad_norm": 0.09423828125, "learning_rate": 0.00051209297705374, "loss": 0.5368, "step": 72460 }, { "epoch": 3.599384126353432, "grad_norm": 0.130859375, "learning_rate": 0.0005120532432700904, "loss": 0.5295, "step": 72470 }, { "epoch": 3.5998807986490515, "grad_norm": 0.11328125, "learning_rate": 0.0005120135094864408, "loss": 0.5352, "step": 72480 }, { "epoch": 3.6003774709446708, "grad_norm": 0.11962890625, "learning_rate": 0.0005119737757027914, "loss": 0.5417, "step": 72490 }, { "epoch": 3.60087414324029, "grad_norm": 0.09326171875, "learning_rate": 0.0005119340419191418, "loss": 0.5551, "step": 72500 }, { "epoch": 3.601370815535909, "grad_norm": 0.099609375, "learning_rate": 0.0005118943081354922, "loss": 0.5224, "step": 72510 }, { "epoch": 3.601867487831529, "grad_norm": 0.1474609375, "learning_rate": 0.0005118545743518426, "loss": 0.5196, "step": 72520 }, { "epoch": 3.602364160127148, "grad_norm": 0.1064453125, "learning_rate": 0.0005118148405681932, "loss": 0.5537, "step": 72530 }, { "epoch": 3.6028608324227673, "grad_norm": 0.09716796875, "learning_rate": 0.0005117751067845436, "loss": 0.5389, "step": 72540 }, { "epoch": 3.603357504718387, "grad_norm": 0.10205078125, "learning_rate": 0.000511735373000894, "loss": 0.5216, "step": 72550 }, { "epoch": 3.6038541770140062, "grad_norm": 0.0966796875, "learning_rate": 0.0005116956392172445, "loss": 0.5322, "step": 72560 }, { "epoch": 3.6043508493096255, "grad_norm": 0.12255859375, "learning_rate": 0.0005116559054335949, "loss": 0.5167, "step": 72570 }, { "epoch": 3.6048475216052447, "grad_norm": 0.10498046875, "learning_rate": 0.0005116161716499453, "loss": 0.5366, "step": 72580 }, { "epoch": 3.6053441939008644, "grad_norm": 0.1220703125, "learning_rate": 0.0005115764378662959, "loss": 0.5253, "step": 72590 }, { "epoch": 3.6058408661964836, "grad_norm": 0.1396484375, "learning_rate": 0.0005115367040826463, "loss": 0.5323, "step": 72600 }, { "epoch": 3.606337538492103, "grad_norm": 0.09326171875, "learning_rate": 0.0005114969702989967, "loss": 0.5361, "step": 72610 }, { "epoch": 3.6068342107877225, "grad_norm": 0.09912109375, "learning_rate": 0.0005114572365153472, "loss": 0.5282, "step": 72620 }, { "epoch": 3.6073308830833417, "grad_norm": 0.10693359375, "learning_rate": 0.0005114175027316976, "loss": 0.5132, "step": 72630 }, { "epoch": 3.607827555378961, "grad_norm": 0.10107421875, "learning_rate": 0.0005113777689480481, "loss": 0.5314, "step": 72640 }, { "epoch": 3.60832422767458, "grad_norm": 0.126953125, "learning_rate": 0.0005113380351643986, "loss": 0.5729, "step": 72650 }, { "epoch": 3.6088208999701994, "grad_norm": 0.09765625, "learning_rate": 0.000511298301380749, "loss": 0.5214, "step": 72660 }, { "epoch": 3.609317572265819, "grad_norm": 0.138671875, "learning_rate": 0.0005112585675970994, "loss": 0.5506, "step": 72670 }, { "epoch": 3.6098142445614383, "grad_norm": 0.10205078125, "learning_rate": 0.00051121883381345, "loss": 0.5271, "step": 72680 }, { "epoch": 3.610310916857058, "grad_norm": 0.09814453125, "learning_rate": 0.0005111791000298004, "loss": 0.5279, "step": 72690 }, { "epoch": 3.610807589152677, "grad_norm": 0.11328125, "learning_rate": 0.0005111393662461508, "loss": 0.5279, "step": 72700 }, { "epoch": 3.6113042614482964, "grad_norm": 0.11279296875, "learning_rate": 0.0005110996324625012, "loss": 0.5447, "step": 72710 }, { "epoch": 3.6118009337439156, "grad_norm": 0.12890625, "learning_rate": 0.0005110598986788517, "loss": 0.5431, "step": 72720 }, { "epoch": 3.612297606039535, "grad_norm": 0.119140625, "learning_rate": 0.0005110201648952023, "loss": 0.5097, "step": 72730 }, { "epoch": 3.6127942783351545, "grad_norm": 0.1044921875, "learning_rate": 0.0005109804311115526, "loss": 0.5035, "step": 72740 }, { "epoch": 3.6132909506307738, "grad_norm": 0.15234375, "learning_rate": 0.0005109406973279031, "loss": 0.5387, "step": 72750 }, { "epoch": 3.6137876229263934, "grad_norm": 0.109375, "learning_rate": 0.0005109009635442536, "loss": 0.5378, "step": 72760 }, { "epoch": 3.6142842952220127, "grad_norm": 0.1220703125, "learning_rate": 0.0005108612297606039, "loss": 0.5338, "step": 72770 }, { "epoch": 3.614780967517632, "grad_norm": 0.099609375, "learning_rate": 0.0005108214959769544, "loss": 0.5323, "step": 72780 }, { "epoch": 3.615277639813251, "grad_norm": 0.1064453125, "learning_rate": 0.0005107817621933049, "loss": 0.54, "step": 72790 }, { "epoch": 3.6157743121088703, "grad_norm": 0.1025390625, "learning_rate": 0.0005107420284096553, "loss": 0.5103, "step": 72800 }, { "epoch": 3.61627098440449, "grad_norm": 0.1279296875, "learning_rate": 0.0005107022946260058, "loss": 0.5156, "step": 72810 }, { "epoch": 3.6167676567001092, "grad_norm": 0.1416015625, "learning_rate": 0.0005106625608423562, "loss": 0.5502, "step": 72820 }, { "epoch": 3.6172643289957285, "grad_norm": 0.09765625, "learning_rate": 0.0005106228270587066, "loss": 0.5266, "step": 72830 }, { "epoch": 3.617761001291348, "grad_norm": 0.1005859375, "learning_rate": 0.0005105830932750572, "loss": 0.564, "step": 72840 }, { "epoch": 3.6182576735869674, "grad_norm": 0.10009765625, "learning_rate": 0.0005105433594914076, "loss": 0.55, "step": 72850 }, { "epoch": 3.6187543458825866, "grad_norm": 0.115234375, "learning_rate": 0.000510503625707758, "loss": 0.5557, "step": 72860 }, { "epoch": 3.619251018178206, "grad_norm": 0.11474609375, "learning_rate": 0.0005104638919241085, "loss": 0.5395, "step": 72870 }, { "epoch": 3.6197476904738255, "grad_norm": 0.130859375, "learning_rate": 0.0005104241581404589, "loss": 0.5113, "step": 72880 }, { "epoch": 3.6202443627694447, "grad_norm": 0.10595703125, "learning_rate": 0.0005103844243568095, "loss": 0.5233, "step": 72890 }, { "epoch": 3.620741035065064, "grad_norm": 0.1005859375, "learning_rate": 0.0005103446905731599, "loss": 0.5474, "step": 72900 }, { "epoch": 3.6212377073606836, "grad_norm": 0.0966796875, "learning_rate": 0.0005103049567895103, "loss": 0.5339, "step": 72910 }, { "epoch": 3.621734379656303, "grad_norm": 0.1240234375, "learning_rate": 0.0005102652230058608, "loss": 0.5733, "step": 72920 }, { "epoch": 3.622231051951922, "grad_norm": 0.10400390625, "learning_rate": 0.0005102254892222111, "loss": 0.5275, "step": 72930 }, { "epoch": 3.6227277242475413, "grad_norm": 0.1357421875, "learning_rate": 0.0005101857554385617, "loss": 0.5442, "step": 72940 }, { "epoch": 3.623224396543161, "grad_norm": 0.0986328125, "learning_rate": 0.0005101460216549122, "loss": 0.5314, "step": 72950 }, { "epoch": 3.62372106883878, "grad_norm": 0.1025390625, "learning_rate": 0.0005101062878712625, "loss": 0.5269, "step": 72960 }, { "epoch": 3.6242177411343994, "grad_norm": 0.1025390625, "learning_rate": 0.000510066554087613, "loss": 0.5378, "step": 72970 }, { "epoch": 3.624714413430019, "grad_norm": 0.11962890625, "learning_rate": 0.0005100268203039634, "loss": 0.5608, "step": 72980 }, { "epoch": 3.6252110857256383, "grad_norm": 0.10888671875, "learning_rate": 0.0005099870865203139, "loss": 0.5212, "step": 72990 }, { "epoch": 3.6257077580212576, "grad_norm": 0.10400390625, "learning_rate": 0.0005099473527366644, "loss": 0.5278, "step": 73000 }, { "epoch": 3.626204430316877, "grad_norm": 0.09716796875, "learning_rate": 0.0005099076189530148, "loss": 0.5407, "step": 73010 }, { "epoch": 3.626701102612496, "grad_norm": 0.0986328125, "learning_rate": 0.0005098678851693652, "loss": 0.5108, "step": 73020 }, { "epoch": 3.6271977749081157, "grad_norm": 0.1025390625, "learning_rate": 0.0005098281513857157, "loss": 0.553, "step": 73030 }, { "epoch": 3.627694447203735, "grad_norm": 0.11328125, "learning_rate": 0.0005097884176020662, "loss": 0.5371, "step": 73040 }, { "epoch": 3.6281911194993546, "grad_norm": 0.1005859375, "learning_rate": 0.0005097486838184167, "loss": 0.5203, "step": 73050 }, { "epoch": 3.628687791794974, "grad_norm": 0.1005859375, "learning_rate": 0.0005097089500347671, "loss": 0.5108, "step": 73060 }, { "epoch": 3.629184464090593, "grad_norm": 0.1455078125, "learning_rate": 0.0005096692162511175, "loss": 0.5054, "step": 73070 }, { "epoch": 3.6296811363862123, "grad_norm": 0.1103515625, "learning_rate": 0.000509629482467468, "loss": 0.5246, "step": 73080 }, { "epoch": 3.6301778086818315, "grad_norm": 0.0927734375, "learning_rate": 0.0005095897486838185, "loss": 0.5111, "step": 73090 }, { "epoch": 3.630674480977451, "grad_norm": 0.1181640625, "learning_rate": 0.0005095500149001689, "loss": 0.5068, "step": 73100 }, { "epoch": 3.6311711532730704, "grad_norm": 0.11669921875, "learning_rate": 0.0005095102811165194, "loss": 0.5182, "step": 73110 }, { "epoch": 3.63166782556869, "grad_norm": 0.1357421875, "learning_rate": 0.0005094705473328697, "loss": 0.5182, "step": 73120 }, { "epoch": 3.6321644978643093, "grad_norm": 0.10400390625, "learning_rate": 0.0005094308135492202, "loss": 0.5534, "step": 73130 }, { "epoch": 3.6326611701599285, "grad_norm": 0.1337890625, "learning_rate": 0.0005093910797655708, "loss": 0.553, "step": 73140 }, { "epoch": 3.6331578424555477, "grad_norm": 0.125, "learning_rate": 0.0005093513459819211, "loss": 0.5526, "step": 73150 }, { "epoch": 3.633654514751167, "grad_norm": 0.10009765625, "learning_rate": 0.0005093116121982716, "loss": 0.5402, "step": 73160 }, { "epoch": 3.6341511870467866, "grad_norm": 0.1318359375, "learning_rate": 0.0005092718784146221, "loss": 0.515, "step": 73170 }, { "epoch": 3.634647859342406, "grad_norm": 0.126953125, "learning_rate": 0.0005092321446309725, "loss": 0.5568, "step": 73180 }, { "epoch": 3.635144531638025, "grad_norm": 0.1171875, "learning_rate": 0.000509192410847323, "loss": 0.5495, "step": 73190 }, { "epoch": 3.6356412039336448, "grad_norm": 0.11669921875, "learning_rate": 0.0005091526770636734, "loss": 0.5464, "step": 73200 }, { "epoch": 3.636137876229264, "grad_norm": 0.1298828125, "learning_rate": 0.0005091129432800239, "loss": 0.5332, "step": 73210 }, { "epoch": 3.636634548524883, "grad_norm": 0.23828125, "learning_rate": 0.0005090732094963743, "loss": 0.5506, "step": 73220 }, { "epoch": 3.6371312208205024, "grad_norm": 0.1513671875, "learning_rate": 0.0005090334757127247, "loss": 0.5535, "step": 73230 }, { "epoch": 3.637627893116122, "grad_norm": 0.109375, "learning_rate": 0.0005089937419290753, "loss": 0.5442, "step": 73240 }, { "epoch": 3.6381245654117413, "grad_norm": 0.1142578125, "learning_rate": 0.0005089540081454257, "loss": 0.5157, "step": 73250 }, { "epoch": 3.6386212377073606, "grad_norm": 0.1181640625, "learning_rate": 0.0005089142743617761, "loss": 0.5133, "step": 73260 }, { "epoch": 3.6391179100029802, "grad_norm": 0.12158203125, "learning_rate": 0.0005088745405781266, "loss": 0.5598, "step": 73270 }, { "epoch": 3.6396145822985995, "grad_norm": 0.11279296875, "learning_rate": 0.000508834806794477, "loss": 0.5106, "step": 73280 }, { "epoch": 3.6401112545942187, "grad_norm": 0.11181640625, "learning_rate": 0.0005087950730108275, "loss": 0.5276, "step": 73290 }, { "epoch": 3.640607926889838, "grad_norm": 0.138671875, "learning_rate": 0.000508755339227178, "loss": 0.5294, "step": 73300 }, { "epoch": 3.6411045991854576, "grad_norm": 0.10009765625, "learning_rate": 0.0005087156054435283, "loss": 0.5231, "step": 73310 }, { "epoch": 3.641601271481077, "grad_norm": 0.1025390625, "learning_rate": 0.0005086758716598788, "loss": 0.5216, "step": 73320 }, { "epoch": 3.642097943776696, "grad_norm": 0.138671875, "learning_rate": 0.0005086361378762293, "loss": 0.536, "step": 73330 }, { "epoch": 3.6425946160723157, "grad_norm": 0.10009765625, "learning_rate": 0.0005085964040925798, "loss": 0.5273, "step": 73340 }, { "epoch": 3.643091288367935, "grad_norm": 0.1416015625, "learning_rate": 0.0005085566703089302, "loss": 0.5331, "step": 73350 }, { "epoch": 3.643587960663554, "grad_norm": 0.09423828125, "learning_rate": 0.0005085169365252807, "loss": 0.5148, "step": 73360 }, { "epoch": 3.6440846329591734, "grad_norm": 0.11865234375, "learning_rate": 0.0005084772027416311, "loss": 0.5313, "step": 73370 }, { "epoch": 3.6445813052547926, "grad_norm": 0.10498046875, "learning_rate": 0.0005084374689579815, "loss": 0.5294, "step": 73380 }, { "epoch": 3.6450779775504123, "grad_norm": 0.0927734375, "learning_rate": 0.000508397735174332, "loss": 0.5306, "step": 73390 }, { "epoch": 3.6455746498460315, "grad_norm": 0.10888671875, "learning_rate": 0.0005083580013906825, "loss": 0.5003, "step": 73400 }, { "epoch": 3.646071322141651, "grad_norm": 0.09619140625, "learning_rate": 0.0005083182676070329, "loss": 0.5395, "step": 73410 }, { "epoch": 3.6465679944372704, "grad_norm": 0.11865234375, "learning_rate": 0.0005082785338233833, "loss": 0.5017, "step": 73420 }, { "epoch": 3.6470646667328896, "grad_norm": 0.11279296875, "learning_rate": 0.0005082388000397338, "loss": 0.5296, "step": 73430 }, { "epoch": 3.647561339028509, "grad_norm": 0.0947265625, "learning_rate": 0.0005081990662560843, "loss": 0.5262, "step": 73440 }, { "epoch": 3.648058011324128, "grad_norm": 0.0966796875, "learning_rate": 0.0005081593324724347, "loss": 0.5517, "step": 73450 }, { "epoch": 3.6485546836197478, "grad_norm": 0.10205078125, "learning_rate": 0.0005081195986887852, "loss": 0.5157, "step": 73460 }, { "epoch": 3.649051355915367, "grad_norm": 0.09814453125, "learning_rate": 0.0005080798649051356, "loss": 0.5272, "step": 73470 }, { "epoch": 3.6495480282109862, "grad_norm": 0.0947265625, "learning_rate": 0.000508040131121486, "loss": 0.5168, "step": 73480 }, { "epoch": 3.650044700506606, "grad_norm": 0.1083984375, "learning_rate": 0.0005080003973378366, "loss": 0.5559, "step": 73490 }, { "epoch": 3.650541372802225, "grad_norm": 0.09912109375, "learning_rate": 0.000507960663554187, "loss": 0.4941, "step": 73500 }, { "epoch": 3.6510380450978444, "grad_norm": 0.11181640625, "learning_rate": 0.0005079209297705374, "loss": 0.5256, "step": 73510 }, { "epoch": 3.6515347173934636, "grad_norm": 0.11328125, "learning_rate": 0.0005078811959868879, "loss": 0.5269, "step": 73520 }, { "epoch": 3.6520313896890833, "grad_norm": 0.11474609375, "learning_rate": 0.0005078414622032383, "loss": 0.5148, "step": 73530 }, { "epoch": 3.6525280619847025, "grad_norm": 0.10205078125, "learning_rate": 0.0005078017284195888, "loss": 0.5282, "step": 73540 }, { "epoch": 3.6530247342803217, "grad_norm": 0.1279296875, "learning_rate": 0.0005077619946359393, "loss": 0.5345, "step": 73550 }, { "epoch": 3.6535214065759414, "grad_norm": 0.103515625, "learning_rate": 0.0005077222608522897, "loss": 0.5298, "step": 73560 }, { "epoch": 3.6540180788715606, "grad_norm": 0.10595703125, "learning_rate": 0.0005076825270686401, "loss": 0.5444, "step": 73570 }, { "epoch": 3.65451475116718, "grad_norm": 0.189453125, "learning_rate": 0.0005076427932849905, "loss": 0.5165, "step": 73580 }, { "epoch": 3.655011423462799, "grad_norm": 0.11572265625, "learning_rate": 0.0005076030595013411, "loss": 0.5404, "step": 73590 }, { "epoch": 3.6555080957584187, "grad_norm": 0.1142578125, "learning_rate": 0.0005075633257176915, "loss": 0.5279, "step": 73600 }, { "epoch": 3.656004768054038, "grad_norm": 0.16796875, "learning_rate": 0.0005075235919340419, "loss": 0.5495, "step": 73610 }, { "epoch": 3.656501440349657, "grad_norm": 0.11767578125, "learning_rate": 0.0005074838581503924, "loss": 0.5339, "step": 73620 }, { "epoch": 3.656998112645277, "grad_norm": 0.140625, "learning_rate": 0.0005074441243667429, "loss": 0.4827, "step": 73630 }, { "epoch": 3.657494784940896, "grad_norm": 0.1484375, "learning_rate": 0.0005074043905830932, "loss": 0.5288, "step": 73640 }, { "epoch": 3.6579914572365153, "grad_norm": 0.11279296875, "learning_rate": 0.0005073646567994438, "loss": 0.5439, "step": 73650 }, { "epoch": 3.6584881295321345, "grad_norm": 0.1650390625, "learning_rate": 0.0005073249230157942, "loss": 0.5207, "step": 73660 }, { "epoch": 3.658984801827754, "grad_norm": 0.1357421875, "learning_rate": 0.0005072851892321446, "loss": 0.5458, "step": 73670 }, { "epoch": 3.6594814741233734, "grad_norm": 0.12255859375, "learning_rate": 0.0005072454554484951, "loss": 0.5007, "step": 73680 }, { "epoch": 3.6599781464189927, "grad_norm": 0.10009765625, "learning_rate": 0.0005072057216648456, "loss": 0.5203, "step": 73690 }, { "epoch": 3.6604748187146123, "grad_norm": 0.1376953125, "learning_rate": 0.000507165987881196, "loss": 0.5408, "step": 73700 }, { "epoch": 3.6609714910102316, "grad_norm": 0.11181640625, "learning_rate": 0.0005071262540975465, "loss": 0.5383, "step": 73710 }, { "epoch": 3.661468163305851, "grad_norm": 0.103515625, "learning_rate": 0.0005070865203138969, "loss": 0.5536, "step": 73720 }, { "epoch": 3.66196483560147, "grad_norm": 0.11376953125, "learning_rate": 0.0005070467865302473, "loss": 0.5546, "step": 73730 }, { "epoch": 3.6624615078970892, "grad_norm": 0.11083984375, "learning_rate": 0.0005070070527465979, "loss": 0.5166, "step": 73740 }, { "epoch": 3.662958180192709, "grad_norm": 0.0986328125, "learning_rate": 0.0005069673189629483, "loss": 0.5149, "step": 73750 }, { "epoch": 3.663454852488328, "grad_norm": 0.099609375, "learning_rate": 0.0005069275851792987, "loss": 0.5278, "step": 73760 }, { "epoch": 3.663951524783948, "grad_norm": 0.12353515625, "learning_rate": 0.0005068878513956492, "loss": 0.5433, "step": 73770 }, { "epoch": 3.664448197079567, "grad_norm": 0.09814453125, "learning_rate": 0.0005068481176119996, "loss": 0.515, "step": 73780 }, { "epoch": 3.6649448693751863, "grad_norm": 0.1064453125, "learning_rate": 0.0005068083838283502, "loss": 0.5293, "step": 73790 }, { "epoch": 3.6654415416708055, "grad_norm": 0.13671875, "learning_rate": 0.0005067686500447005, "loss": 0.5491, "step": 73800 }, { "epoch": 3.6659382139664247, "grad_norm": 0.1083984375, "learning_rate": 0.000506728916261051, "loss": 0.5319, "step": 73810 }, { "epoch": 3.6664348862620444, "grad_norm": 0.11083984375, "learning_rate": 0.0005066891824774015, "loss": 0.5304, "step": 73820 }, { "epoch": 3.6669315585576636, "grad_norm": 0.1435546875, "learning_rate": 0.0005066494486937518, "loss": 0.5555, "step": 73830 }, { "epoch": 3.667428230853283, "grad_norm": 0.10986328125, "learning_rate": 0.0005066097149101024, "loss": 0.5552, "step": 73840 }, { "epoch": 3.6679249031489025, "grad_norm": 0.11376953125, "learning_rate": 0.0005065699811264528, "loss": 0.531, "step": 73850 }, { "epoch": 3.6684215754445217, "grad_norm": 0.130859375, "learning_rate": 0.0005065302473428032, "loss": 0.5315, "step": 73860 }, { "epoch": 3.668918247740141, "grad_norm": 0.10107421875, "learning_rate": 0.0005064905135591537, "loss": 0.5336, "step": 73870 }, { "epoch": 3.66941492003576, "grad_norm": 0.10791015625, "learning_rate": 0.0005064507797755041, "loss": 0.5378, "step": 73880 }, { "epoch": 3.66991159233138, "grad_norm": 0.1416015625, "learning_rate": 0.0005064110459918545, "loss": 0.5245, "step": 73890 }, { "epoch": 3.670408264626999, "grad_norm": 0.10595703125, "learning_rate": 0.0005063713122082051, "loss": 0.5215, "step": 73900 }, { "epoch": 3.6709049369226183, "grad_norm": 0.1103515625, "learning_rate": 0.0005063315784245555, "loss": 0.526, "step": 73910 }, { "epoch": 3.671401609218238, "grad_norm": 0.11279296875, "learning_rate": 0.0005062918446409059, "loss": 0.5362, "step": 73920 }, { "epoch": 3.671898281513857, "grad_norm": 0.09716796875, "learning_rate": 0.0005062521108572564, "loss": 0.5355, "step": 73930 }, { "epoch": 3.6723949538094764, "grad_norm": 0.115234375, "learning_rate": 0.0005062123770736068, "loss": 0.5274, "step": 73940 }, { "epoch": 3.6728916261050957, "grad_norm": 0.1171875, "learning_rate": 0.0005061726432899574, "loss": 0.5348, "step": 73950 }, { "epoch": 3.6733882984007153, "grad_norm": 0.18359375, "learning_rate": 0.0005061329095063078, "loss": 0.4846, "step": 73960 }, { "epoch": 3.6738849706963346, "grad_norm": 0.126953125, "learning_rate": 0.0005060931757226582, "loss": 0.5432, "step": 73970 }, { "epoch": 3.674381642991954, "grad_norm": 0.1142578125, "learning_rate": 0.0005060534419390087, "loss": 0.5665, "step": 73980 }, { "epoch": 3.6748783152875735, "grad_norm": 0.09814453125, "learning_rate": 0.000506013708155359, "loss": 0.5178, "step": 73990 }, { "epoch": 3.6753749875831927, "grad_norm": 0.1494140625, "learning_rate": 0.0005059739743717096, "loss": 0.5502, "step": 74000 }, { "epoch": 3.675871659878812, "grad_norm": 0.150390625, "learning_rate": 0.0005059342405880601, "loss": 0.5316, "step": 74010 }, { "epoch": 3.676368332174431, "grad_norm": 0.0986328125, "learning_rate": 0.0005058945068044104, "loss": 0.521, "step": 74020 }, { "epoch": 3.676865004470051, "grad_norm": 0.1748046875, "learning_rate": 0.0005058547730207609, "loss": 0.5134, "step": 74030 }, { "epoch": 3.67736167676567, "grad_norm": 0.142578125, "learning_rate": 0.0005058150392371115, "loss": 0.5453, "step": 74040 }, { "epoch": 3.6778583490612893, "grad_norm": 0.115234375, "learning_rate": 0.0005057753054534618, "loss": 0.5439, "step": 74050 }, { "epoch": 3.678355021356909, "grad_norm": 0.150390625, "learning_rate": 0.0005057355716698123, "loss": 0.5378, "step": 74060 }, { "epoch": 3.678851693652528, "grad_norm": 0.11767578125, "learning_rate": 0.0005056958378861627, "loss": 0.5359, "step": 74070 }, { "epoch": 3.6793483659481474, "grad_norm": 0.1318359375, "learning_rate": 0.0005056561041025132, "loss": 0.5216, "step": 74080 }, { "epoch": 3.6798450382437666, "grad_norm": 0.09765625, "learning_rate": 0.0005056163703188636, "loss": 0.5172, "step": 74090 }, { "epoch": 3.680341710539386, "grad_norm": 0.1181640625, "learning_rate": 0.0005055766365352141, "loss": 0.5365, "step": 74100 }, { "epoch": 3.6808383828350055, "grad_norm": 0.11181640625, "learning_rate": 0.0005055369027515646, "loss": 0.5234, "step": 74110 }, { "epoch": 3.6813350551306248, "grad_norm": 0.11279296875, "learning_rate": 0.000505497168967915, "loss": 0.5581, "step": 74120 }, { "epoch": 3.6818317274262444, "grad_norm": 0.10546875, "learning_rate": 0.0005054574351842654, "loss": 0.5291, "step": 74130 }, { "epoch": 3.6823283997218637, "grad_norm": 0.10498046875, "learning_rate": 0.000505417701400616, "loss": 0.5433, "step": 74140 }, { "epoch": 3.682825072017483, "grad_norm": 0.10693359375, "learning_rate": 0.0005053779676169664, "loss": 0.5088, "step": 74150 }, { "epoch": 3.683321744313102, "grad_norm": 0.11572265625, "learning_rate": 0.0005053382338333168, "loss": 0.552, "step": 74160 }, { "epoch": 3.6838184166087213, "grad_norm": 0.1015625, "learning_rate": 0.0005052985000496673, "loss": 0.5277, "step": 74170 }, { "epoch": 3.684315088904341, "grad_norm": 0.11279296875, "learning_rate": 0.0005052587662660176, "loss": 0.5309, "step": 74180 }, { "epoch": 3.6848117611999602, "grad_norm": 0.1318359375, "learning_rate": 0.0005052190324823681, "loss": 0.5292, "step": 74190 }, { "epoch": 3.6853084334955795, "grad_norm": 0.1875, "learning_rate": 0.0005051792986987187, "loss": 0.5, "step": 74200 }, { "epoch": 3.685805105791199, "grad_norm": 0.09814453125, "learning_rate": 0.000505139564915069, "loss": 0.5177, "step": 74210 }, { "epoch": 3.6863017780868184, "grad_norm": 0.12060546875, "learning_rate": 0.0005050998311314195, "loss": 0.5462, "step": 74220 }, { "epoch": 3.6867984503824376, "grad_norm": 0.12353515625, "learning_rate": 0.00050506009734777, "loss": 0.5618, "step": 74230 }, { "epoch": 3.687295122678057, "grad_norm": 0.1025390625, "learning_rate": 0.0005050203635641204, "loss": 0.5204, "step": 74240 }, { "epoch": 3.6877917949736765, "grad_norm": 0.107421875, "learning_rate": 0.0005049806297804709, "loss": 0.5339, "step": 74250 }, { "epoch": 3.6882884672692957, "grad_norm": 0.10400390625, "learning_rate": 0.0005049408959968213, "loss": 0.5595, "step": 74260 }, { "epoch": 3.688785139564915, "grad_norm": 0.1708984375, "learning_rate": 0.0005049011622131718, "loss": 0.5333, "step": 74270 }, { "epoch": 3.6892818118605346, "grad_norm": 0.1123046875, "learning_rate": 0.0005048614284295222, "loss": 0.5175, "step": 74280 }, { "epoch": 3.689778484156154, "grad_norm": 0.109375, "learning_rate": 0.0005048216946458726, "loss": 0.5102, "step": 74290 }, { "epoch": 3.690275156451773, "grad_norm": 0.1044921875, "learning_rate": 0.0005047819608622232, "loss": 0.5456, "step": 74300 }, { "epoch": 3.6907718287473923, "grad_norm": 0.10400390625, "learning_rate": 0.0005047422270785736, "loss": 0.5277, "step": 74310 }, { "epoch": 3.691268501043012, "grad_norm": 0.10400390625, "learning_rate": 0.000504702493294924, "loss": 0.5348, "step": 74320 }, { "epoch": 3.691765173338631, "grad_norm": 0.2021484375, "learning_rate": 0.0005046627595112745, "loss": 0.5225, "step": 74330 }, { "epoch": 3.6922618456342504, "grad_norm": 0.1533203125, "learning_rate": 0.0005046230257276249, "loss": 0.5521, "step": 74340 }, { "epoch": 3.69275851792987, "grad_norm": 0.1357421875, "learning_rate": 0.0005045832919439754, "loss": 0.5586, "step": 74350 }, { "epoch": 3.6932551902254893, "grad_norm": 0.10693359375, "learning_rate": 0.0005045435581603259, "loss": 0.524, "step": 74360 }, { "epoch": 3.6937518625211085, "grad_norm": 0.10546875, "learning_rate": 0.0005045038243766763, "loss": 0.5222, "step": 74370 }, { "epoch": 3.6942485348167278, "grad_norm": 0.150390625, "learning_rate": 0.0005044640905930267, "loss": 0.5279, "step": 74380 }, { "epoch": 3.694745207112347, "grad_norm": 0.09912109375, "learning_rate": 0.0005044243568093772, "loss": 0.5513, "step": 74390 }, { "epoch": 3.6952418794079667, "grad_norm": 0.130859375, "learning_rate": 0.0005043846230257277, "loss": 0.5075, "step": 74400 }, { "epoch": 3.695738551703586, "grad_norm": 0.09423828125, "learning_rate": 0.0005043448892420781, "loss": 0.5501, "step": 74410 }, { "epoch": 3.6962352239992056, "grad_norm": 0.1396484375, "learning_rate": 0.0005043051554584286, "loss": 0.5157, "step": 74420 }, { "epoch": 3.696731896294825, "grad_norm": 0.0966796875, "learning_rate": 0.000504265421674779, "loss": 0.5792, "step": 74430 }, { "epoch": 3.697228568590444, "grad_norm": 0.109375, "learning_rate": 0.0005042256878911294, "loss": 0.5353, "step": 74440 }, { "epoch": 3.6977252408860632, "grad_norm": 0.103515625, "learning_rate": 0.0005041859541074799, "loss": 0.5202, "step": 74450 }, { "epoch": 3.6982219131816825, "grad_norm": 0.1181640625, "learning_rate": 0.0005041462203238304, "loss": 0.5369, "step": 74460 }, { "epoch": 3.698718585477302, "grad_norm": 0.10791015625, "learning_rate": 0.0005041064865401808, "loss": 0.5329, "step": 74470 }, { "epoch": 3.6992152577729214, "grad_norm": 0.142578125, "learning_rate": 0.0005040667527565312, "loss": 0.5314, "step": 74480 }, { "epoch": 3.699711930068541, "grad_norm": 0.154296875, "learning_rate": 0.0005040270189728817, "loss": 0.5389, "step": 74490 }, { "epoch": 3.7002086023641603, "grad_norm": 0.1025390625, "learning_rate": 0.0005039872851892322, "loss": 0.55, "step": 74500 }, { "epoch": 3.7007052746597795, "grad_norm": 0.09619140625, "learning_rate": 0.0005039475514055826, "loss": 0.5334, "step": 74510 }, { "epoch": 3.7012019469553987, "grad_norm": 0.13671875, "learning_rate": 0.0005039078176219331, "loss": 0.5203, "step": 74520 }, { "epoch": 3.701698619251018, "grad_norm": 0.1611328125, "learning_rate": 0.0005038680838382835, "loss": 0.5108, "step": 74530 }, { "epoch": 3.7021952915466376, "grad_norm": 0.1328125, "learning_rate": 0.0005038283500546339, "loss": 0.5133, "step": 74540 }, { "epoch": 3.702691963842257, "grad_norm": 0.10400390625, "learning_rate": 0.0005037886162709845, "loss": 0.5255, "step": 74550 }, { "epoch": 3.703188636137876, "grad_norm": 0.115234375, "learning_rate": 0.0005037488824873349, "loss": 0.4999, "step": 74560 }, { "epoch": 3.7036853084334957, "grad_norm": 0.130859375, "learning_rate": 0.0005037091487036853, "loss": 0.531, "step": 74570 }, { "epoch": 3.704181980729115, "grad_norm": 0.1201171875, "learning_rate": 0.0005036694149200358, "loss": 0.5445, "step": 74580 }, { "epoch": 3.704678653024734, "grad_norm": 0.1025390625, "learning_rate": 0.0005036296811363862, "loss": 0.5408, "step": 74590 }, { "epoch": 3.7051753253203534, "grad_norm": 0.10791015625, "learning_rate": 0.0005035899473527367, "loss": 0.5694, "step": 74600 }, { "epoch": 3.705671997615973, "grad_norm": 0.11328125, "learning_rate": 0.0005035502135690872, "loss": 0.5388, "step": 74610 }, { "epoch": 3.7061686699115923, "grad_norm": 0.1611328125, "learning_rate": 0.0005035104797854376, "loss": 0.5405, "step": 74620 }, { "epoch": 3.7066653422072116, "grad_norm": 0.140625, "learning_rate": 0.000503470746001788, "loss": 0.5164, "step": 74630 }, { "epoch": 3.7071620145028312, "grad_norm": 0.1318359375, "learning_rate": 0.0005034310122181385, "loss": 0.5371, "step": 74640 }, { "epoch": 3.7076586867984505, "grad_norm": 0.11572265625, "learning_rate": 0.000503391278434489, "loss": 0.536, "step": 74650 }, { "epoch": 3.7081553590940697, "grad_norm": 0.1162109375, "learning_rate": 0.0005033515446508394, "loss": 0.5447, "step": 74660 }, { "epoch": 3.708652031389689, "grad_norm": 0.1484375, "learning_rate": 0.0005033118108671898, "loss": 0.5733, "step": 74670 }, { "epoch": 3.7091487036853086, "grad_norm": 0.1650390625, "learning_rate": 0.0005032720770835403, "loss": 0.5171, "step": 74680 }, { "epoch": 3.709645375980928, "grad_norm": 0.11474609375, "learning_rate": 0.0005032323432998908, "loss": 0.5562, "step": 74690 }, { "epoch": 3.710142048276547, "grad_norm": 0.13671875, "learning_rate": 0.0005031926095162412, "loss": 0.5388, "step": 74700 }, { "epoch": 3.7106387205721667, "grad_norm": 0.0986328125, "learning_rate": 0.0005031528757325917, "loss": 0.5385, "step": 74710 }, { "epoch": 3.711135392867786, "grad_norm": 0.09912109375, "learning_rate": 0.0005031131419489421, "loss": 0.5191, "step": 74720 }, { "epoch": 3.711632065163405, "grad_norm": 0.111328125, "learning_rate": 0.0005030734081652925, "loss": 0.5243, "step": 74730 }, { "epoch": 3.7121287374590244, "grad_norm": 0.09765625, "learning_rate": 0.000503033674381643, "loss": 0.5402, "step": 74740 }, { "epoch": 3.7126254097546436, "grad_norm": 0.10302734375, "learning_rate": 0.0005029939405979935, "loss": 0.5371, "step": 74750 }, { "epoch": 3.7131220820502633, "grad_norm": 0.0927734375, "learning_rate": 0.0005029542068143439, "loss": 0.5397, "step": 74760 }, { "epoch": 3.7136187543458825, "grad_norm": 0.1337890625, "learning_rate": 0.0005029144730306944, "loss": 0.5406, "step": 74770 }, { "epoch": 3.714115426641502, "grad_norm": 0.1337890625, "learning_rate": 0.0005028747392470448, "loss": 0.5853, "step": 74780 }, { "epoch": 3.7146120989371214, "grad_norm": 0.10400390625, "learning_rate": 0.0005028350054633952, "loss": 0.5466, "step": 74790 }, { "epoch": 3.7151087712327406, "grad_norm": 0.126953125, "learning_rate": 0.0005027952716797458, "loss": 0.5634, "step": 74800 }, { "epoch": 3.71560544352836, "grad_norm": 0.1376953125, "learning_rate": 0.0005027555378960962, "loss": 0.526, "step": 74810 }, { "epoch": 3.716102115823979, "grad_norm": 0.11279296875, "learning_rate": 0.0005027158041124467, "loss": 0.5252, "step": 74820 }, { "epoch": 3.7165987881195988, "grad_norm": 0.1533203125, "learning_rate": 0.0005026760703287971, "loss": 0.5454, "step": 74830 }, { "epoch": 3.717095460415218, "grad_norm": 0.1181640625, "learning_rate": 0.0005026363365451475, "loss": 0.5045, "step": 74840 }, { "epoch": 3.7175921327108377, "grad_norm": 0.12451171875, "learning_rate": 0.0005025966027614981, "loss": 0.5203, "step": 74850 }, { "epoch": 3.718088805006457, "grad_norm": 0.10986328125, "learning_rate": 0.0005025568689778484, "loss": 0.5681, "step": 74860 }, { "epoch": 3.718585477302076, "grad_norm": 0.1044921875, "learning_rate": 0.0005025171351941989, "loss": 0.5234, "step": 74870 }, { "epoch": 3.7190821495976953, "grad_norm": 0.123046875, "learning_rate": 0.0005024774014105494, "loss": 0.5626, "step": 74880 }, { "epoch": 3.7195788218933146, "grad_norm": 0.173828125, "learning_rate": 0.0005024376676268997, "loss": 0.5342, "step": 74890 }, { "epoch": 3.7200754941889342, "grad_norm": 0.099609375, "learning_rate": 0.0005023979338432503, "loss": 0.5172, "step": 74900 }, { "epoch": 3.7205721664845535, "grad_norm": 0.125, "learning_rate": 0.0005023582000596008, "loss": 0.5401, "step": 74910 }, { "epoch": 3.7210688387801727, "grad_norm": 0.09716796875, "learning_rate": 0.0005023184662759511, "loss": 0.5451, "step": 74920 }, { "epoch": 3.7215655110757924, "grad_norm": 0.1982421875, "learning_rate": 0.0005022787324923016, "loss": 0.534, "step": 74930 }, { "epoch": 3.7220621833714116, "grad_norm": 0.10009765625, "learning_rate": 0.000502238998708652, "loss": 0.5271, "step": 74940 }, { "epoch": 3.722558855667031, "grad_norm": 0.1142578125, "learning_rate": 0.0005021992649250024, "loss": 0.5351, "step": 74950 }, { "epoch": 3.72305552796265, "grad_norm": 0.10595703125, "learning_rate": 0.000502159531141353, "loss": 0.5425, "step": 74960 }, { "epoch": 3.7235522002582697, "grad_norm": 0.11376953125, "learning_rate": 0.0005021197973577034, "loss": 0.5562, "step": 74970 }, { "epoch": 3.724048872553889, "grad_norm": 0.11181640625, "learning_rate": 0.0005020800635740539, "loss": 0.5429, "step": 74980 }, { "epoch": 3.724545544849508, "grad_norm": 0.134765625, "learning_rate": 0.0005020403297904043, "loss": 0.5185, "step": 74990 }, { "epoch": 3.725042217145128, "grad_norm": 0.109375, "learning_rate": 0.0005020005960067548, "loss": 0.5465, "step": 75000 }, { "epoch": 3.725538889440747, "grad_norm": 0.1474609375, "learning_rate": 0.0005019608622231053, "loss": 0.5251, "step": 75010 }, { "epoch": 3.7260355617363663, "grad_norm": 0.1435546875, "learning_rate": 0.0005019211284394557, "loss": 0.5146, "step": 75020 }, { "epoch": 3.7265322340319855, "grad_norm": 0.1064453125, "learning_rate": 0.0005018813946558061, "loss": 0.499, "step": 75030 }, { "epoch": 3.727028906327605, "grad_norm": 0.123046875, "learning_rate": 0.0005018416608721566, "loss": 0.5298, "step": 75040 }, { "epoch": 3.7275255786232244, "grad_norm": 0.123046875, "learning_rate": 0.0005018019270885069, "loss": 0.5222, "step": 75050 }, { "epoch": 3.7280222509188436, "grad_norm": 0.1298828125, "learning_rate": 0.0005017621933048575, "loss": 0.5474, "step": 75060 }, { "epoch": 3.7285189232144633, "grad_norm": 0.10888671875, "learning_rate": 0.000501722459521208, "loss": 0.5366, "step": 75070 }, { "epoch": 3.7290155955100825, "grad_norm": 0.09521484375, "learning_rate": 0.0005016827257375583, "loss": 0.5366, "step": 75080 }, { "epoch": 3.7295122678057018, "grad_norm": 0.103515625, "learning_rate": 0.0005016429919539088, "loss": 0.5356, "step": 75090 }, { "epoch": 3.730008940101321, "grad_norm": 0.11669921875, "learning_rate": 0.0005016032581702594, "loss": 0.5207, "step": 75100 }, { "epoch": 3.7305056123969402, "grad_norm": 0.15625, "learning_rate": 0.0005015635243866097, "loss": 0.5548, "step": 75110 }, { "epoch": 3.73100228469256, "grad_norm": 0.1357421875, "learning_rate": 0.0005015237906029602, "loss": 0.558, "step": 75120 }, { "epoch": 3.731498956988179, "grad_norm": 0.1083984375, "learning_rate": 0.0005014840568193106, "loss": 0.5615, "step": 75130 }, { "epoch": 3.731995629283799, "grad_norm": 0.11962890625, "learning_rate": 0.0005014443230356611, "loss": 0.5319, "step": 75140 }, { "epoch": 3.732492301579418, "grad_norm": 0.134765625, "learning_rate": 0.0005014045892520116, "loss": 0.5452, "step": 75150 }, { "epoch": 3.7329889738750373, "grad_norm": 0.0947265625, "learning_rate": 0.000501364855468362, "loss": 0.5418, "step": 75160 }, { "epoch": 3.7334856461706565, "grad_norm": 0.1259765625, "learning_rate": 0.0005013251216847125, "loss": 0.5408, "step": 75170 }, { "epoch": 3.7339823184662757, "grad_norm": 0.1279296875, "learning_rate": 0.0005012853879010629, "loss": 0.5412, "step": 75180 }, { "epoch": 3.7344789907618954, "grad_norm": 0.11474609375, "learning_rate": 0.0005012456541174133, "loss": 0.5283, "step": 75190 }, { "epoch": 3.7349756630575146, "grad_norm": 0.140625, "learning_rate": 0.0005012059203337639, "loss": 0.5539, "step": 75200 }, { "epoch": 3.7354723353531343, "grad_norm": 0.12109375, "learning_rate": 0.0005011661865501143, "loss": 0.5243, "step": 75210 }, { "epoch": 3.7359690076487535, "grad_norm": 0.1552734375, "learning_rate": 0.0005011264527664647, "loss": 0.5207, "step": 75220 }, { "epoch": 3.7364656799443727, "grad_norm": 0.1015625, "learning_rate": 0.0005010867189828152, "loss": 0.5092, "step": 75230 }, { "epoch": 3.736962352239992, "grad_norm": 0.134765625, "learning_rate": 0.0005010469851991655, "loss": 0.5549, "step": 75240 }, { "epoch": 3.737459024535611, "grad_norm": 0.10107421875, "learning_rate": 0.000501007251415516, "loss": 0.5267, "step": 75250 }, { "epoch": 3.737955696831231, "grad_norm": 0.09912109375, "learning_rate": 0.0005009675176318666, "loss": 0.504, "step": 75260 }, { "epoch": 3.73845236912685, "grad_norm": 0.109375, "learning_rate": 0.000500927783848217, "loss": 0.5222, "step": 75270 }, { "epoch": 3.7389490414224693, "grad_norm": 0.10986328125, "learning_rate": 0.0005008880500645674, "loss": 0.5661, "step": 75280 }, { "epoch": 3.739445713718089, "grad_norm": 0.10205078125, "learning_rate": 0.0005008483162809179, "loss": 0.5216, "step": 75290 }, { "epoch": 3.739942386013708, "grad_norm": 0.0986328125, "learning_rate": 0.0005008085824972683, "loss": 0.5375, "step": 75300 }, { "epoch": 3.7404390583093274, "grad_norm": 0.1259765625, "learning_rate": 0.0005007688487136188, "loss": 0.5243, "step": 75310 }, { "epoch": 3.7409357306049467, "grad_norm": 0.1044921875, "learning_rate": 0.0005007291149299692, "loss": 0.5113, "step": 75320 }, { "epoch": 3.7414324029005663, "grad_norm": 0.1923828125, "learning_rate": 0.0005006893811463197, "loss": 0.5147, "step": 75330 }, { "epoch": 3.7419290751961856, "grad_norm": 0.181640625, "learning_rate": 0.0005006496473626701, "loss": 0.535, "step": 75340 }, { "epoch": 3.742425747491805, "grad_norm": 0.1259765625, "learning_rate": 0.0005006099135790205, "loss": 0.5412, "step": 75350 }, { "epoch": 3.7429224197874245, "grad_norm": 0.111328125, "learning_rate": 0.0005005701797953711, "loss": 0.5162, "step": 75360 }, { "epoch": 3.7434190920830437, "grad_norm": 0.1142578125, "learning_rate": 0.0005005304460117215, "loss": 0.5369, "step": 75370 }, { "epoch": 3.743915764378663, "grad_norm": 0.11279296875, "learning_rate": 0.0005004907122280719, "loss": 0.5315, "step": 75380 }, { "epoch": 3.744412436674282, "grad_norm": 0.1064453125, "learning_rate": 0.0005004509784444224, "loss": 0.538, "step": 75390 }, { "epoch": 3.744909108969902, "grad_norm": 0.09912109375, "learning_rate": 0.0005004112446607728, "loss": 0.5474, "step": 75400 }, { "epoch": 3.745405781265521, "grad_norm": 0.09326171875, "learning_rate": 0.0005003715108771233, "loss": 0.5329, "step": 75410 }, { "epoch": 3.7459024535611403, "grad_norm": 0.11572265625, "learning_rate": 0.0005003317770934738, "loss": 0.5152, "step": 75420 }, { "epoch": 3.74639912585676, "grad_norm": 0.11376953125, "learning_rate": 0.0005002920433098242, "loss": 0.5356, "step": 75430 }, { "epoch": 3.746895798152379, "grad_norm": 0.1337890625, "learning_rate": 0.0005002523095261746, "loss": 0.5321, "step": 75440 }, { "epoch": 3.7473924704479984, "grad_norm": 0.09765625, "learning_rate": 0.0005002125757425251, "loss": 0.5199, "step": 75450 }, { "epoch": 3.7478891427436176, "grad_norm": 0.107421875, "learning_rate": 0.0005001728419588756, "loss": 0.5606, "step": 75460 }, { "epoch": 3.748385815039237, "grad_norm": 0.12060546875, "learning_rate": 0.000500133108175226, "loss": 0.5587, "step": 75470 }, { "epoch": 3.7488824873348565, "grad_norm": 0.12109375, "learning_rate": 0.0005000933743915765, "loss": 0.5451, "step": 75480 }, { "epoch": 3.7493791596304757, "grad_norm": 0.12109375, "learning_rate": 0.0005000536406079269, "loss": 0.5322, "step": 75490 }, { "epoch": 3.7498758319260954, "grad_norm": 0.10986328125, "learning_rate": 0.0005000139068242773, "loss": 0.5317, "step": 75500 }, { "epoch": 3.7503725042217146, "grad_norm": 0.1455078125, "learning_rate": 0.0004999741730406278, "loss": 0.5438, "step": 75510 }, { "epoch": 3.750869176517334, "grad_norm": 0.1630859375, "learning_rate": 0.0004999344392569783, "loss": 0.5394, "step": 75520 }, { "epoch": 3.751365848812953, "grad_norm": 0.103515625, "learning_rate": 0.0004998947054733287, "loss": 0.5357, "step": 75530 }, { "epoch": 3.7518625211085723, "grad_norm": 0.13671875, "learning_rate": 0.0004998549716896791, "loss": 0.524, "step": 75540 }, { "epoch": 3.752359193404192, "grad_norm": 0.11767578125, "learning_rate": 0.0004998152379060296, "loss": 0.5782, "step": 75550 }, { "epoch": 3.752855865699811, "grad_norm": 0.1298828125, "learning_rate": 0.0004997755041223801, "loss": 0.523, "step": 75560 }, { "epoch": 3.7533525379954304, "grad_norm": 0.11376953125, "learning_rate": 0.0004997357703387305, "loss": 0.5524, "step": 75570 }, { "epoch": 3.75384921029105, "grad_norm": 0.10498046875, "learning_rate": 0.000499696036555081, "loss": 0.5031, "step": 75580 }, { "epoch": 3.7543458825866693, "grad_norm": 0.10595703125, "learning_rate": 0.0004996563027714314, "loss": 0.5556, "step": 75590 }, { "epoch": 3.7548425548822886, "grad_norm": 0.10498046875, "learning_rate": 0.0004996165689877818, "loss": 0.5152, "step": 75600 }, { "epoch": 3.755339227177908, "grad_norm": 0.11865234375, "learning_rate": 0.0004995768352041324, "loss": 0.5316, "step": 75610 }, { "epoch": 3.7558358994735275, "grad_norm": 0.15625, "learning_rate": 0.0004995371014204828, "loss": 0.5134, "step": 75620 }, { "epoch": 3.7563325717691467, "grad_norm": 0.1181640625, "learning_rate": 0.0004994973676368332, "loss": 0.5617, "step": 75630 }, { "epoch": 3.756829244064766, "grad_norm": 0.134765625, "learning_rate": 0.0004994576338531837, "loss": 0.528, "step": 75640 }, { "epoch": 3.7573259163603856, "grad_norm": 0.1298828125, "learning_rate": 0.0004994179000695341, "loss": 0.5361, "step": 75650 }, { "epoch": 3.757822588656005, "grad_norm": 0.15625, "learning_rate": 0.0004993781662858846, "loss": 0.5295, "step": 75660 }, { "epoch": 3.758319260951624, "grad_norm": 0.11181640625, "learning_rate": 0.0004993384325022351, "loss": 0.5404, "step": 75670 }, { "epoch": 3.7588159332472433, "grad_norm": 0.1181640625, "learning_rate": 0.0004992986987185855, "loss": 0.5221, "step": 75680 }, { "epoch": 3.759312605542863, "grad_norm": 0.162109375, "learning_rate": 0.0004992589649349359, "loss": 0.5524, "step": 75690 }, { "epoch": 3.759809277838482, "grad_norm": 0.185546875, "learning_rate": 0.0004992192311512864, "loss": 0.5441, "step": 75700 }, { "epoch": 3.7603059501341014, "grad_norm": 0.10009765625, "learning_rate": 0.0004991794973676369, "loss": 0.5298, "step": 75710 }, { "epoch": 3.760802622429721, "grad_norm": 0.1123046875, "learning_rate": 0.0004991397635839874, "loss": 0.5343, "step": 75720 }, { "epoch": 3.7612992947253403, "grad_norm": 0.12451171875, "learning_rate": 0.0004991000298003377, "loss": 0.5252, "step": 75730 }, { "epoch": 3.7617959670209595, "grad_norm": 0.10546875, "learning_rate": 0.0004990602960166882, "loss": 0.5356, "step": 75740 }, { "epoch": 3.7622926393165788, "grad_norm": 0.115234375, "learning_rate": 0.0004990205622330387, "loss": 0.5323, "step": 75750 }, { "epoch": 3.7627893116121984, "grad_norm": 0.11962890625, "learning_rate": 0.0004989808284493891, "loss": 0.5198, "step": 75760 }, { "epoch": 3.7632859839078177, "grad_norm": 0.1533203125, "learning_rate": 0.0004989410946657396, "loss": 0.5332, "step": 75770 }, { "epoch": 3.763782656203437, "grad_norm": 0.11376953125, "learning_rate": 0.00049890136088209, "loss": 0.5421, "step": 75780 }, { "epoch": 3.7642793284990566, "grad_norm": 0.10595703125, "learning_rate": 0.0004988616270984404, "loss": 0.4969, "step": 75790 }, { "epoch": 3.764776000794676, "grad_norm": 0.10498046875, "learning_rate": 0.0004988218933147909, "loss": 0.5289, "step": 75800 }, { "epoch": 3.765272673090295, "grad_norm": 0.134765625, "learning_rate": 0.0004987821595311414, "loss": 0.5358, "step": 75810 }, { "epoch": 3.7657693453859142, "grad_norm": 0.10400390625, "learning_rate": 0.0004987424257474918, "loss": 0.5283, "step": 75820 }, { "epoch": 3.7662660176815335, "grad_norm": 0.10205078125, "learning_rate": 0.0004987026919638423, "loss": 0.5582, "step": 75830 }, { "epoch": 3.766762689977153, "grad_norm": 0.10498046875, "learning_rate": 0.0004986629581801927, "loss": 0.526, "step": 75840 }, { "epoch": 3.7672593622727724, "grad_norm": 0.10107421875, "learning_rate": 0.0004986232243965431, "loss": 0.5216, "step": 75850 }, { "epoch": 3.767756034568392, "grad_norm": 0.0947265625, "learning_rate": 0.0004985834906128937, "loss": 0.5394, "step": 75860 }, { "epoch": 3.7682527068640113, "grad_norm": 0.11669921875, "learning_rate": 0.0004985437568292441, "loss": 0.5392, "step": 75870 }, { "epoch": 3.7687493791596305, "grad_norm": 0.11083984375, "learning_rate": 0.0004985040230455946, "loss": 0.5583, "step": 75880 }, { "epoch": 3.7692460514552497, "grad_norm": 0.134765625, "learning_rate": 0.000498464289261945, "loss": 0.5372, "step": 75890 }, { "epoch": 3.769742723750869, "grad_norm": 0.1435546875, "learning_rate": 0.0004984245554782954, "loss": 0.5437, "step": 75900 }, { "epoch": 3.7702393960464886, "grad_norm": 0.1005859375, "learning_rate": 0.000498384821694646, "loss": 0.5374, "step": 75910 }, { "epoch": 3.770736068342108, "grad_norm": 0.1240234375, "learning_rate": 0.0004983450879109963, "loss": 0.5581, "step": 75920 }, { "epoch": 3.771232740637727, "grad_norm": 0.1005859375, "learning_rate": 0.0004983053541273468, "loss": 0.5358, "step": 75930 }, { "epoch": 3.7717294129333467, "grad_norm": 0.1015625, "learning_rate": 0.0004982656203436973, "loss": 0.5459, "step": 75940 }, { "epoch": 3.772226085228966, "grad_norm": 0.10107421875, "learning_rate": 0.0004982258865600476, "loss": 0.5242, "step": 75950 }, { "epoch": 3.772722757524585, "grad_norm": 0.1279296875, "learning_rate": 0.0004981861527763982, "loss": 0.5467, "step": 75960 }, { "epoch": 3.7732194298202044, "grad_norm": 0.12890625, "learning_rate": 0.0004981464189927487, "loss": 0.5221, "step": 75970 }, { "epoch": 3.773716102115824, "grad_norm": 0.11962890625, "learning_rate": 0.000498106685209099, "loss": 0.5415, "step": 75980 }, { "epoch": 3.7742127744114433, "grad_norm": 0.09814453125, "learning_rate": 0.0004980669514254495, "loss": 0.5212, "step": 75990 }, { "epoch": 3.7747094467070625, "grad_norm": 0.11376953125, "learning_rate": 0.0004980272176417999, "loss": 0.5402, "step": 76000 }, { "epoch": 3.775206119002682, "grad_norm": 0.11279296875, "learning_rate": 0.0004979874838581504, "loss": 0.5187, "step": 76010 }, { "epoch": 3.7757027912983014, "grad_norm": 0.12060546875, "learning_rate": 0.0004979477500745009, "loss": 0.5301, "step": 76020 }, { "epoch": 3.7761994635939207, "grad_norm": 0.12890625, "learning_rate": 0.0004979080162908513, "loss": 0.5241, "step": 76030 }, { "epoch": 3.77669613588954, "grad_norm": 0.1171875, "learning_rate": 0.0004978682825072018, "loss": 0.5217, "step": 76040 }, { "epoch": 3.7771928081851596, "grad_norm": 0.1474609375, "learning_rate": 0.0004978285487235522, "loss": 0.5234, "step": 76050 }, { "epoch": 3.777689480480779, "grad_norm": 0.11328125, "learning_rate": 0.0004977888149399027, "loss": 0.5231, "step": 76060 }, { "epoch": 3.778186152776398, "grad_norm": 0.10400390625, "learning_rate": 0.0004977490811562532, "loss": 0.5186, "step": 76070 }, { "epoch": 3.7786828250720177, "grad_norm": 0.1318359375, "learning_rate": 0.0004977093473726036, "loss": 0.5255, "step": 76080 }, { "epoch": 3.779179497367637, "grad_norm": 0.12890625, "learning_rate": 0.000497669613588954, "loss": 0.5878, "step": 76090 }, { "epoch": 3.779676169663256, "grad_norm": 0.1259765625, "learning_rate": 0.0004976298798053045, "loss": 0.5512, "step": 76100 }, { "epoch": 3.7801728419588754, "grad_norm": 0.1357421875, "learning_rate": 0.0004975901460216548, "loss": 0.5578, "step": 76110 }, { "epoch": 3.780669514254495, "grad_norm": 0.1103515625, "learning_rate": 0.0004975504122380054, "loss": 0.5282, "step": 76120 }, { "epoch": 3.7811661865501143, "grad_norm": 0.09130859375, "learning_rate": 0.0004975106784543559, "loss": 0.5399, "step": 76130 }, { "epoch": 3.7816628588457335, "grad_norm": 0.1025390625, "learning_rate": 0.0004974709446707062, "loss": 0.5211, "step": 76140 }, { "epoch": 3.782159531141353, "grad_norm": 0.1064453125, "learning_rate": 0.0004974312108870567, "loss": 0.4971, "step": 76150 }, { "epoch": 3.7826562034369724, "grad_norm": 0.126953125, "learning_rate": 0.0004973914771034073, "loss": 0.5228, "step": 76160 }, { "epoch": 3.7831528757325916, "grad_norm": 0.119140625, "learning_rate": 0.0004973517433197577, "loss": 0.5268, "step": 76170 }, { "epoch": 3.783649548028211, "grad_norm": 0.11083984375, "learning_rate": 0.0004973120095361081, "loss": 0.5358, "step": 76180 }, { "epoch": 3.78414622032383, "grad_norm": 0.10546875, "learning_rate": 0.0004972722757524585, "loss": 0.5051, "step": 76190 }, { "epoch": 3.7846428926194497, "grad_norm": 0.10400390625, "learning_rate": 0.000497232541968809, "loss": 0.552, "step": 76200 }, { "epoch": 3.785139564915069, "grad_norm": 0.111328125, "learning_rate": 0.0004971928081851595, "loss": 0.5174, "step": 76210 }, { "epoch": 3.7856362372106886, "grad_norm": 0.1103515625, "learning_rate": 0.0004971530744015099, "loss": 0.5515, "step": 76220 }, { "epoch": 3.786132909506308, "grad_norm": 0.10986328125, "learning_rate": 0.0004971133406178604, "loss": 0.4994, "step": 76230 }, { "epoch": 3.786629581801927, "grad_norm": 0.1650390625, "learning_rate": 0.0004970736068342108, "loss": 0.5371, "step": 76240 }, { "epoch": 3.7871262540975463, "grad_norm": 0.208984375, "learning_rate": 0.0004970338730505612, "loss": 0.5338, "step": 76250 }, { "epoch": 3.7876229263931656, "grad_norm": 0.09423828125, "learning_rate": 0.0004969941392669118, "loss": 0.536, "step": 76260 }, { "epoch": 3.7881195986887852, "grad_norm": 0.12109375, "learning_rate": 0.0004969544054832622, "loss": 0.5196, "step": 76270 }, { "epoch": 3.7886162709844045, "grad_norm": 0.1005859375, "learning_rate": 0.0004969146716996126, "loss": 0.5345, "step": 76280 }, { "epoch": 3.7891129432800237, "grad_norm": 0.11083984375, "learning_rate": 0.0004968749379159631, "loss": 0.517, "step": 76290 }, { "epoch": 3.7896096155756434, "grad_norm": 0.103515625, "learning_rate": 0.0004968352041323135, "loss": 0.5606, "step": 76300 }, { "epoch": 3.7901062878712626, "grad_norm": 0.1064453125, "learning_rate": 0.000496795470348664, "loss": 0.5667, "step": 76310 }, { "epoch": 3.790602960166882, "grad_norm": 0.11962890625, "learning_rate": 0.0004967557365650145, "loss": 0.5437, "step": 76320 }, { "epoch": 3.791099632462501, "grad_norm": 0.11865234375, "learning_rate": 0.0004967160027813649, "loss": 0.5455, "step": 76330 }, { "epoch": 3.7915963047581207, "grad_norm": 0.1484375, "learning_rate": 0.0004966762689977153, "loss": 0.5301, "step": 76340 }, { "epoch": 3.79209297705374, "grad_norm": 0.1435546875, "learning_rate": 0.0004966365352140658, "loss": 0.5488, "step": 76350 }, { "epoch": 3.792589649349359, "grad_norm": 0.1298828125, "learning_rate": 0.0004965968014304163, "loss": 0.5309, "step": 76360 }, { "epoch": 3.793086321644979, "grad_norm": 0.1103515625, "learning_rate": 0.0004965570676467667, "loss": 0.5257, "step": 76370 }, { "epoch": 3.793582993940598, "grad_norm": 0.09912109375, "learning_rate": 0.0004965173338631171, "loss": 0.5463, "step": 76380 }, { "epoch": 3.7940796662362173, "grad_norm": 0.11181640625, "learning_rate": 0.0004964776000794676, "loss": 0.5615, "step": 76390 }, { "epoch": 3.7945763385318365, "grad_norm": 0.1015625, "learning_rate": 0.000496437866295818, "loss": 0.5293, "step": 76400 }, { "epoch": 3.795073010827456, "grad_norm": 0.1064453125, "learning_rate": 0.0004963981325121684, "loss": 0.5131, "step": 76410 }, { "epoch": 3.7955696831230754, "grad_norm": 0.09716796875, "learning_rate": 0.000496358398728519, "loss": 0.5213, "step": 76420 }, { "epoch": 3.7960663554186946, "grad_norm": 0.12255859375, "learning_rate": 0.0004963186649448694, "loss": 0.5412, "step": 76430 }, { "epoch": 3.7965630277143143, "grad_norm": 0.1025390625, "learning_rate": 0.0004962789311612198, "loss": 0.5338, "step": 76440 }, { "epoch": 3.7970597000099335, "grad_norm": 0.10009765625, "learning_rate": 0.0004962391973775703, "loss": 0.5537, "step": 76450 }, { "epoch": 3.7975563723055528, "grad_norm": 0.11767578125, "learning_rate": 0.0004961994635939207, "loss": 0.5441, "step": 76460 }, { "epoch": 3.798053044601172, "grad_norm": 0.123046875, "learning_rate": 0.0004961597298102712, "loss": 0.5135, "step": 76470 }, { "epoch": 3.7985497168967917, "grad_norm": 0.1083984375, "learning_rate": 0.0004961199960266217, "loss": 0.5597, "step": 76480 }, { "epoch": 3.799046389192411, "grad_norm": 0.109375, "learning_rate": 0.0004960802622429721, "loss": 0.5418, "step": 76490 }, { "epoch": 3.79954306148803, "grad_norm": 0.11181640625, "learning_rate": 0.0004960405284593225, "loss": 0.5386, "step": 76500 }, { "epoch": 3.80003973378365, "grad_norm": 0.130859375, "learning_rate": 0.000496000794675673, "loss": 0.5257, "step": 76510 }, { "epoch": 3.800536406079269, "grad_norm": 0.10400390625, "learning_rate": 0.0004959610608920235, "loss": 0.5425, "step": 76520 }, { "epoch": 3.8010330783748882, "grad_norm": 0.10205078125, "learning_rate": 0.0004959213271083739, "loss": 0.5369, "step": 76530 }, { "epoch": 3.8015297506705075, "grad_norm": 0.20703125, "learning_rate": 0.0004958815933247244, "loss": 0.5424, "step": 76540 }, { "epoch": 3.8020264229661267, "grad_norm": 0.09375, "learning_rate": 0.0004958418595410748, "loss": 0.5137, "step": 76550 }, { "epoch": 3.8025230952617464, "grad_norm": 0.1171875, "learning_rate": 0.0004958021257574252, "loss": 0.5386, "step": 76560 }, { "epoch": 3.8030197675573656, "grad_norm": 0.10498046875, "learning_rate": 0.0004957623919737758, "loss": 0.514, "step": 76570 }, { "epoch": 3.8035164398529853, "grad_norm": 0.1015625, "learning_rate": 0.0004957226581901262, "loss": 0.535, "step": 76580 }, { "epoch": 3.8040131121486045, "grad_norm": 0.10498046875, "learning_rate": 0.0004956829244064766, "loss": 0.5503, "step": 76590 }, { "epoch": 3.8045097844442237, "grad_norm": 0.1279296875, "learning_rate": 0.000495643190622827, "loss": 0.5156, "step": 76600 }, { "epoch": 3.805006456739843, "grad_norm": 0.126953125, "learning_rate": 0.0004956034568391775, "loss": 0.5681, "step": 76610 }, { "epoch": 3.805503129035462, "grad_norm": 0.150390625, "learning_rate": 0.0004955637230555281, "loss": 0.5347, "step": 76620 }, { "epoch": 3.805999801331082, "grad_norm": 0.1357421875, "learning_rate": 0.0004955239892718784, "loss": 0.5676, "step": 76630 }, { "epoch": 3.806496473626701, "grad_norm": 0.11181640625, "learning_rate": 0.0004954842554882289, "loss": 0.5308, "step": 76640 }, { "epoch": 3.8069931459223203, "grad_norm": 0.111328125, "learning_rate": 0.0004954445217045793, "loss": 0.5314, "step": 76650 }, { "epoch": 3.80748981821794, "grad_norm": 0.11279296875, "learning_rate": 0.0004954047879209297, "loss": 0.5216, "step": 76660 }, { "epoch": 3.807986490513559, "grad_norm": 0.1806640625, "learning_rate": 0.0004953650541372803, "loss": 0.5433, "step": 76670 }, { "epoch": 3.8084831628091784, "grad_norm": 0.1904296875, "learning_rate": 0.0004953253203536307, "loss": 0.5521, "step": 76680 }, { "epoch": 3.8089798351047977, "grad_norm": 0.126953125, "learning_rate": 0.0004952855865699811, "loss": 0.5234, "step": 76690 }, { "epoch": 3.8094765074004173, "grad_norm": 0.1494140625, "learning_rate": 0.0004952458527863316, "loss": 0.4972, "step": 76700 }, { "epoch": 3.8099731796960365, "grad_norm": 0.16796875, "learning_rate": 0.000495206119002682, "loss": 0.5387, "step": 76710 }, { "epoch": 3.8104698519916558, "grad_norm": 0.1162109375, "learning_rate": 0.0004951663852190325, "loss": 0.5169, "step": 76720 }, { "epoch": 3.8109665242872754, "grad_norm": 0.15625, "learning_rate": 0.000495126651435383, "loss": 0.5121, "step": 76730 }, { "epoch": 3.8114631965828947, "grad_norm": 0.11572265625, "learning_rate": 0.0004950869176517334, "loss": 0.5538, "step": 76740 }, { "epoch": 3.811959868878514, "grad_norm": 0.1279296875, "learning_rate": 0.0004950471838680838, "loss": 0.529, "step": 76750 }, { "epoch": 3.812456541174133, "grad_norm": 0.10107421875, "learning_rate": 0.0004950074500844343, "loss": 0.5366, "step": 76760 }, { "epoch": 3.812953213469753, "grad_norm": 0.10693359375, "learning_rate": 0.0004949677163007848, "loss": 0.5343, "step": 76770 }, { "epoch": 3.813449885765372, "grad_norm": 0.1552734375, "learning_rate": 0.0004949279825171353, "loss": 0.5588, "step": 76780 }, { "epoch": 3.8139465580609913, "grad_norm": 0.1162109375, "learning_rate": 0.0004948882487334856, "loss": 0.5383, "step": 76790 }, { "epoch": 3.814443230356611, "grad_norm": 0.095703125, "learning_rate": 0.0004948485149498361, "loss": 0.5263, "step": 76800 }, { "epoch": 3.81493990265223, "grad_norm": 0.12255859375, "learning_rate": 0.0004948087811661867, "loss": 0.5515, "step": 76810 }, { "epoch": 3.8154365749478494, "grad_norm": 0.142578125, "learning_rate": 0.000494769047382537, "loss": 0.515, "step": 76820 }, { "epoch": 3.8159332472434686, "grad_norm": 0.11474609375, "learning_rate": 0.0004947293135988875, "loss": 0.5327, "step": 76830 }, { "epoch": 3.816429919539088, "grad_norm": 0.15234375, "learning_rate": 0.000494689579815238, "loss": 0.5308, "step": 76840 }, { "epoch": 3.8169265918347075, "grad_norm": 0.1337890625, "learning_rate": 0.0004946498460315883, "loss": 0.5392, "step": 76850 }, { "epoch": 3.8174232641303267, "grad_norm": 0.10546875, "learning_rate": 0.0004946101122479388, "loss": 0.5223, "step": 76860 }, { "epoch": 3.8179199364259464, "grad_norm": 0.095703125, "learning_rate": 0.0004945703784642893, "loss": 0.5221, "step": 76870 }, { "epoch": 3.8184166087215656, "grad_norm": 0.09375, "learning_rate": 0.0004945306446806397, "loss": 0.5134, "step": 76880 }, { "epoch": 3.818913281017185, "grad_norm": 0.20703125, "learning_rate": 0.0004944909108969902, "loss": 0.5067, "step": 76890 }, { "epoch": 3.819409953312804, "grad_norm": 0.1474609375, "learning_rate": 0.0004944511771133406, "loss": 0.5321, "step": 76900 }, { "epoch": 3.8199066256084233, "grad_norm": 0.1337890625, "learning_rate": 0.0004944114433296911, "loss": 0.5426, "step": 76910 }, { "epoch": 3.820403297904043, "grad_norm": 0.09619140625, "learning_rate": 0.0004943717095460416, "loss": 0.515, "step": 76920 }, { "epoch": 3.820899970199662, "grad_norm": 0.11328125, "learning_rate": 0.000494331975762392, "loss": 0.5162, "step": 76930 }, { "epoch": 3.821396642495282, "grad_norm": 0.1455078125, "learning_rate": 0.0004942922419787425, "loss": 0.5413, "step": 76940 }, { "epoch": 3.821893314790901, "grad_norm": 0.1220703125, "learning_rate": 0.0004942525081950929, "loss": 0.5547, "step": 76950 }, { "epoch": 3.8223899870865203, "grad_norm": 0.189453125, "learning_rate": 0.0004942127744114433, "loss": 0.5419, "step": 76960 }, { "epoch": 3.8228866593821396, "grad_norm": 0.126953125, "learning_rate": 0.0004941730406277939, "loss": 0.5224, "step": 76970 }, { "epoch": 3.823383331677759, "grad_norm": 0.1494140625, "learning_rate": 0.0004941333068441442, "loss": 0.5259, "step": 76980 }, { "epoch": 3.8238800039733785, "grad_norm": 0.1015625, "learning_rate": 0.0004940935730604947, "loss": 0.5216, "step": 76990 }, { "epoch": 3.8243766762689977, "grad_norm": 0.10498046875, "learning_rate": 0.0004940538392768452, "loss": 0.5639, "step": 77000 }, { "epoch": 3.824873348564617, "grad_norm": 0.10986328125, "learning_rate": 0.0004940141054931955, "loss": 0.5427, "step": 77010 }, { "epoch": 3.8253700208602366, "grad_norm": 0.12109375, "learning_rate": 0.0004939743717095461, "loss": 0.5237, "step": 77020 }, { "epoch": 3.825866693155856, "grad_norm": 0.10546875, "learning_rate": 0.0004939346379258966, "loss": 0.5508, "step": 77030 }, { "epoch": 3.826363365451475, "grad_norm": 0.177734375, "learning_rate": 0.0004938949041422469, "loss": 0.5257, "step": 77040 }, { "epoch": 3.8268600377470943, "grad_norm": 0.10107421875, "learning_rate": 0.0004938551703585974, "loss": 0.5381, "step": 77050 }, { "epoch": 3.827356710042714, "grad_norm": 0.1650390625, "learning_rate": 0.0004938154365749478, "loss": 0.5421, "step": 77060 }, { "epoch": 3.827853382338333, "grad_norm": 0.103515625, "learning_rate": 0.0004937757027912984, "loss": 0.525, "step": 77070 }, { "epoch": 3.8283500546339524, "grad_norm": 0.109375, "learning_rate": 0.0004937359690076488, "loss": 0.5108, "step": 77080 }, { "epoch": 3.828846726929572, "grad_norm": 0.103515625, "learning_rate": 0.0004936962352239992, "loss": 0.5251, "step": 77090 }, { "epoch": 3.8293433992251913, "grad_norm": 0.10400390625, "learning_rate": 0.0004936565014403497, "loss": 0.5566, "step": 77100 }, { "epoch": 3.8298400715208105, "grad_norm": 0.1064453125, "learning_rate": 0.0004936167676567001, "loss": 0.5185, "step": 77110 }, { "epoch": 3.8303367438164297, "grad_norm": 0.099609375, "learning_rate": 0.0004935770338730506, "loss": 0.5233, "step": 77120 }, { "epoch": 3.8308334161120494, "grad_norm": 0.11669921875, "learning_rate": 0.0004935373000894011, "loss": 0.5356, "step": 77130 }, { "epoch": 3.8313300884076686, "grad_norm": 0.11083984375, "learning_rate": 0.0004934975663057515, "loss": 0.5334, "step": 77140 }, { "epoch": 3.831826760703288, "grad_norm": 0.09912109375, "learning_rate": 0.0004934578325221019, "loss": 0.5327, "step": 77150 }, { "epoch": 3.8323234329989075, "grad_norm": 0.10107421875, "learning_rate": 0.0004934180987384524, "loss": 0.5342, "step": 77160 }, { "epoch": 3.8328201052945268, "grad_norm": 0.12158203125, "learning_rate": 0.0004933783649548029, "loss": 0.5361, "step": 77170 }, { "epoch": 3.833316777590146, "grad_norm": 0.103515625, "learning_rate": 0.0004933386311711533, "loss": 0.5545, "step": 77180 }, { "epoch": 3.8338134498857652, "grad_norm": 0.1357421875, "learning_rate": 0.0004932988973875038, "loss": 0.5256, "step": 77190 }, { "epoch": 3.8343101221813845, "grad_norm": 0.1201171875, "learning_rate": 0.0004932591636038541, "loss": 0.5355, "step": 77200 }, { "epoch": 3.834806794477004, "grad_norm": 0.0966796875, "learning_rate": 0.0004932194298202046, "loss": 0.5366, "step": 77210 }, { "epoch": 3.8353034667726233, "grad_norm": 0.1318359375, "learning_rate": 0.0004931796960365552, "loss": 0.5469, "step": 77220 }, { "epoch": 3.835800139068243, "grad_norm": 0.1533203125, "learning_rate": 0.0004931399622529056, "loss": 0.5382, "step": 77230 }, { "epoch": 3.8362968113638622, "grad_norm": 0.11669921875, "learning_rate": 0.000493100228469256, "loss": 0.5042, "step": 77240 }, { "epoch": 3.8367934836594815, "grad_norm": 0.1396484375, "learning_rate": 0.0004930604946856064, "loss": 0.5104, "step": 77250 }, { "epoch": 3.8372901559551007, "grad_norm": 0.1640625, "learning_rate": 0.0004930207609019569, "loss": 0.5509, "step": 77260 }, { "epoch": 3.83778682825072, "grad_norm": 0.10498046875, "learning_rate": 0.0004929810271183074, "loss": 0.5261, "step": 77270 }, { "epoch": 3.8382835005463396, "grad_norm": 0.10791015625, "learning_rate": 0.0004929412933346578, "loss": 0.5002, "step": 77280 }, { "epoch": 3.838780172841959, "grad_norm": 0.1259765625, "learning_rate": 0.0004929015595510083, "loss": 0.5322, "step": 77290 }, { "epoch": 3.8392768451375785, "grad_norm": 0.11865234375, "learning_rate": 0.0004928618257673587, "loss": 0.5219, "step": 77300 }, { "epoch": 3.8397735174331977, "grad_norm": 0.13671875, "learning_rate": 0.0004928220919837091, "loss": 0.5465, "step": 77310 }, { "epoch": 3.840270189728817, "grad_norm": 0.1015625, "learning_rate": 0.0004927823582000597, "loss": 0.5752, "step": 77320 }, { "epoch": 3.840766862024436, "grad_norm": 0.1650390625, "learning_rate": 0.0004927426244164101, "loss": 0.5246, "step": 77330 }, { "epoch": 3.8412635343200554, "grad_norm": 0.1337890625, "learning_rate": 0.0004927028906327605, "loss": 0.5423, "step": 77340 }, { "epoch": 3.841760206615675, "grad_norm": 0.0947265625, "learning_rate": 0.000492663156849111, "loss": 0.5225, "step": 77350 }, { "epoch": 3.8422568789112943, "grad_norm": 0.099609375, "learning_rate": 0.0004926234230654614, "loss": 0.5334, "step": 77360 }, { "epoch": 3.8427535512069135, "grad_norm": 0.09130859375, "learning_rate": 0.0004925836892818119, "loss": 0.505, "step": 77370 }, { "epoch": 3.843250223502533, "grad_norm": 0.11572265625, "learning_rate": 0.0004925439554981624, "loss": 0.5224, "step": 77380 }, { "epoch": 3.8437468957981524, "grad_norm": 0.11376953125, "learning_rate": 0.0004925042217145128, "loss": 0.5149, "step": 77390 }, { "epoch": 3.8442435680937717, "grad_norm": 0.1298828125, "learning_rate": 0.0004924644879308632, "loss": 0.5327, "step": 77400 }, { "epoch": 3.844740240389391, "grad_norm": 0.0966796875, "learning_rate": 0.0004924247541472137, "loss": 0.5212, "step": 77410 }, { "epoch": 3.8452369126850106, "grad_norm": 0.0966796875, "learning_rate": 0.0004923850203635642, "loss": 0.5236, "step": 77420 }, { "epoch": 3.84573358498063, "grad_norm": 0.11865234375, "learning_rate": 0.0004923452865799146, "loss": 0.5518, "step": 77430 }, { "epoch": 3.846230257276249, "grad_norm": 0.111328125, "learning_rate": 0.0004923055527962651, "loss": 0.5539, "step": 77440 }, { "epoch": 3.8467269295718687, "grad_norm": 0.12890625, "learning_rate": 0.0004922658190126155, "loss": 0.5373, "step": 77450 }, { "epoch": 3.847223601867488, "grad_norm": 0.10888671875, "learning_rate": 0.0004922260852289659, "loss": 0.5529, "step": 77460 }, { "epoch": 3.847720274163107, "grad_norm": 0.09716796875, "learning_rate": 0.0004921863514453164, "loss": 0.5682, "step": 77470 }, { "epoch": 3.8482169464587264, "grad_norm": 0.09912109375, "learning_rate": 0.0004921466176616669, "loss": 0.5279, "step": 77480 }, { "epoch": 3.848713618754346, "grad_norm": 0.1279296875, "learning_rate": 0.0004921068838780173, "loss": 0.5535, "step": 77490 }, { "epoch": 3.8492102910499653, "grad_norm": 0.12890625, "learning_rate": 0.0004920671500943677, "loss": 0.5249, "step": 77500 }, { "epoch": 3.8497069633455845, "grad_norm": 0.138671875, "learning_rate": 0.0004920274163107182, "loss": 0.5058, "step": 77510 }, { "epoch": 3.850203635641204, "grad_norm": 0.2138671875, "learning_rate": 0.0004919876825270687, "loss": 0.5522, "step": 77520 }, { "epoch": 3.8507003079368234, "grad_norm": 0.1044921875, "learning_rate": 0.0004919479487434191, "loss": 0.5331, "step": 77530 }, { "epoch": 3.8511969802324426, "grad_norm": 0.138671875, "learning_rate": 0.0004919082149597696, "loss": 0.5452, "step": 77540 }, { "epoch": 3.851693652528062, "grad_norm": 0.09912109375, "learning_rate": 0.00049186848117612, "loss": 0.5229, "step": 77550 }, { "epoch": 3.852190324823681, "grad_norm": 0.1376953125, "learning_rate": 0.0004918287473924704, "loss": 0.5111, "step": 77560 }, { "epoch": 3.8526869971193007, "grad_norm": 0.11669921875, "learning_rate": 0.000491789013608821, "loss": 0.5245, "step": 77570 }, { "epoch": 3.85318366941492, "grad_norm": 0.12451171875, "learning_rate": 0.0004917492798251714, "loss": 0.5325, "step": 77580 }, { "epoch": 3.8536803417105396, "grad_norm": 0.177734375, "learning_rate": 0.0004917095460415218, "loss": 0.5324, "step": 77590 }, { "epoch": 3.854177014006159, "grad_norm": 0.12060546875, "learning_rate": 0.0004916698122578723, "loss": 0.523, "step": 77600 }, { "epoch": 3.854673686301778, "grad_norm": 0.1220703125, "learning_rate": 0.0004916300784742227, "loss": 0.5764, "step": 77610 }, { "epoch": 3.8551703585973973, "grad_norm": 0.1494140625, "learning_rate": 0.0004915903446905732, "loss": 0.5273, "step": 77620 }, { "epoch": 3.8556670308930165, "grad_norm": 0.11376953125, "learning_rate": 0.0004915506109069237, "loss": 0.5281, "step": 77630 }, { "epoch": 3.856163703188636, "grad_norm": 0.1025390625, "learning_rate": 0.0004915108771232741, "loss": 0.5493, "step": 77640 }, { "epoch": 3.8566603754842554, "grad_norm": 0.11767578125, "learning_rate": 0.0004914711433396245, "loss": 0.53, "step": 77650 }, { "epoch": 3.857157047779875, "grad_norm": 0.09716796875, "learning_rate": 0.0004914314095559749, "loss": 0.5514, "step": 77660 }, { "epoch": 3.8576537200754943, "grad_norm": 0.1201171875, "learning_rate": 0.0004913916757723255, "loss": 0.5638, "step": 77670 }, { "epoch": 3.8581503923711136, "grad_norm": 0.12060546875, "learning_rate": 0.000491351941988676, "loss": 0.5153, "step": 77680 }, { "epoch": 3.858647064666733, "grad_norm": 0.11328125, "learning_rate": 0.0004913122082050263, "loss": 0.5267, "step": 77690 }, { "epoch": 3.859143736962352, "grad_norm": 0.1279296875, "learning_rate": 0.0004912724744213768, "loss": 0.563, "step": 77700 }, { "epoch": 3.8596404092579717, "grad_norm": 0.12890625, "learning_rate": 0.0004912327406377273, "loss": 0.5209, "step": 77710 }, { "epoch": 3.860137081553591, "grad_norm": 0.125, "learning_rate": 0.0004911930068540776, "loss": 0.5381, "step": 77720 }, { "epoch": 3.86063375384921, "grad_norm": 0.1376953125, "learning_rate": 0.0004911532730704282, "loss": 0.5184, "step": 77730 }, { "epoch": 3.86113042614483, "grad_norm": 0.142578125, "learning_rate": 0.0004911135392867786, "loss": 0.5145, "step": 77740 }, { "epoch": 3.861627098440449, "grad_norm": 0.1123046875, "learning_rate": 0.000491073805503129, "loss": 0.5214, "step": 77750 }, { "epoch": 3.8621237707360683, "grad_norm": 0.1259765625, "learning_rate": 0.0004910340717194795, "loss": 0.5344, "step": 77760 }, { "epoch": 3.8626204430316875, "grad_norm": 0.1201171875, "learning_rate": 0.00049099433793583, "loss": 0.5234, "step": 77770 }, { "epoch": 3.863117115327307, "grad_norm": 0.18359375, "learning_rate": 0.0004909546041521804, "loss": 0.5339, "step": 77780 }, { "epoch": 3.8636137876229264, "grad_norm": 0.1015625, "learning_rate": 0.0004909148703685309, "loss": 0.5405, "step": 77790 }, { "epoch": 3.8641104599185456, "grad_norm": 0.12060546875, "learning_rate": 0.0004908751365848813, "loss": 0.5536, "step": 77800 }, { "epoch": 3.8646071322141653, "grad_norm": 0.10595703125, "learning_rate": 0.0004908354028012318, "loss": 0.5374, "step": 77810 }, { "epoch": 3.8651038045097845, "grad_norm": 0.1162109375, "learning_rate": 0.0004907956690175823, "loss": 0.5438, "step": 77820 }, { "epoch": 3.8656004768054038, "grad_norm": 0.119140625, "learning_rate": 0.0004907559352339327, "loss": 0.5263, "step": 77830 }, { "epoch": 3.866097149101023, "grad_norm": 0.11767578125, "learning_rate": 0.0004907162014502832, "loss": 0.5228, "step": 77840 }, { "epoch": 3.8665938213966426, "grad_norm": 0.205078125, "learning_rate": 0.0004906764676666335, "loss": 0.5644, "step": 77850 }, { "epoch": 3.867090493692262, "grad_norm": 0.1259765625, "learning_rate": 0.000490636733882984, "loss": 0.5252, "step": 77860 }, { "epoch": 3.867587165987881, "grad_norm": 0.11474609375, "learning_rate": 0.0004905970000993346, "loss": 0.5538, "step": 77870 }, { "epoch": 3.8680838382835008, "grad_norm": 0.1318359375, "learning_rate": 0.0004905572663156849, "loss": 0.5115, "step": 77880 }, { "epoch": 3.86858051057912, "grad_norm": 0.1171875, "learning_rate": 0.0004905175325320354, "loss": 0.5239, "step": 77890 }, { "epoch": 3.8690771828747392, "grad_norm": 0.111328125, "learning_rate": 0.0004904777987483859, "loss": 0.4906, "step": 77900 }, { "epoch": 3.8695738551703585, "grad_norm": 0.11669921875, "learning_rate": 0.0004904380649647362, "loss": 0.551, "step": 77910 }, { "epoch": 3.8700705274659777, "grad_norm": 0.14453125, "learning_rate": 0.0004903983311810867, "loss": 0.5334, "step": 77920 }, { "epoch": 3.8705671997615974, "grad_norm": 0.0947265625, "learning_rate": 0.0004903585973974372, "loss": 0.5479, "step": 77930 }, { "epoch": 3.8710638720572166, "grad_norm": 0.1005859375, "learning_rate": 0.0004903188636137876, "loss": 0.5058, "step": 77940 }, { "epoch": 3.8715605443528363, "grad_norm": 0.1171875, "learning_rate": 0.0004902791298301381, "loss": 0.54, "step": 77950 }, { "epoch": 3.8720572166484555, "grad_norm": 0.111328125, "learning_rate": 0.0004902393960464885, "loss": 0.533, "step": 77960 }, { "epoch": 3.8725538889440747, "grad_norm": 0.109375, "learning_rate": 0.000490199662262839, "loss": 0.5098, "step": 77970 }, { "epoch": 3.873050561239694, "grad_norm": 0.11181640625, "learning_rate": 0.0004901599284791895, "loss": 0.5435, "step": 77980 }, { "epoch": 3.873547233535313, "grad_norm": 0.10498046875, "learning_rate": 0.0004901201946955399, "loss": 0.5652, "step": 77990 }, { "epoch": 3.874043905830933, "grad_norm": 0.11181640625, "learning_rate": 0.0004900804609118904, "loss": 0.5213, "step": 78000 }, { "epoch": 3.874540578126552, "grad_norm": 0.1201171875, "learning_rate": 0.0004900407271282408, "loss": 0.5487, "step": 78010 }, { "epoch": 3.8750372504221713, "grad_norm": 0.1328125, "learning_rate": 0.0004900009933445912, "loss": 0.5225, "step": 78020 }, { "epoch": 3.875533922717791, "grad_norm": 0.11328125, "learning_rate": 0.0004899612595609418, "loss": 0.5417, "step": 78030 }, { "epoch": 3.87603059501341, "grad_norm": 0.1015625, "learning_rate": 0.0004899215257772922, "loss": 0.5715, "step": 78040 }, { "epoch": 3.8765272673090294, "grad_norm": 0.1318359375, "learning_rate": 0.0004898817919936426, "loss": 0.5074, "step": 78050 }, { "epoch": 3.8770239396046486, "grad_norm": 0.126953125, "learning_rate": 0.0004898420582099931, "loss": 0.5138, "step": 78060 }, { "epoch": 3.8775206119002683, "grad_norm": 0.1357421875, "learning_rate": 0.0004898023244263434, "loss": 0.513, "step": 78070 }, { "epoch": 3.8780172841958875, "grad_norm": 0.1318359375, "learning_rate": 0.000489762590642694, "loss": 0.5218, "step": 78080 }, { "epoch": 3.8785139564915068, "grad_norm": 0.10888671875, "learning_rate": 0.0004897228568590445, "loss": 0.5516, "step": 78090 }, { "epoch": 3.8790106287871264, "grad_norm": 0.1689453125, "learning_rate": 0.0004896831230753948, "loss": 0.5456, "step": 78100 }, { "epoch": 3.8795073010827457, "grad_norm": 0.10546875, "learning_rate": 0.0004896433892917453, "loss": 0.5356, "step": 78110 }, { "epoch": 3.880003973378365, "grad_norm": 0.11083984375, "learning_rate": 0.0004896036555080957, "loss": 0.5203, "step": 78120 }, { "epoch": 3.880500645673984, "grad_norm": 0.11376953125, "learning_rate": 0.0004895639217244463, "loss": 0.5382, "step": 78130 }, { "epoch": 3.880997317969604, "grad_norm": 0.0966796875, "learning_rate": 0.0004895241879407967, "loss": 0.5274, "step": 78140 }, { "epoch": 3.881493990265223, "grad_norm": 0.1064453125, "learning_rate": 0.0004894844541571471, "loss": 0.5278, "step": 78150 }, { "epoch": 3.8819906625608422, "grad_norm": 0.130859375, "learning_rate": 0.0004894447203734976, "loss": 0.5564, "step": 78160 }, { "epoch": 3.882487334856462, "grad_norm": 0.11181640625, "learning_rate": 0.000489404986589848, "loss": 0.5444, "step": 78170 }, { "epoch": 3.882984007152081, "grad_norm": 0.099609375, "learning_rate": 0.0004893652528061985, "loss": 0.5267, "step": 78180 }, { "epoch": 3.8834806794477004, "grad_norm": 0.10400390625, "learning_rate": 0.000489325519022549, "loss": 0.5116, "step": 78190 }, { "epoch": 3.8839773517433196, "grad_norm": 0.10791015625, "learning_rate": 0.0004892857852388994, "loss": 0.5024, "step": 78200 }, { "epoch": 3.8844740240389393, "grad_norm": 0.11962890625, "learning_rate": 0.0004892460514552498, "loss": 0.5304, "step": 78210 }, { "epoch": 3.8849706963345585, "grad_norm": 0.12890625, "learning_rate": 0.0004892063176716003, "loss": 0.5474, "step": 78220 }, { "epoch": 3.8854673686301777, "grad_norm": 0.158203125, "learning_rate": 0.0004891665838879508, "loss": 0.5243, "step": 78230 }, { "epoch": 3.8859640409257974, "grad_norm": 0.1298828125, "learning_rate": 0.0004891268501043012, "loss": 0.5531, "step": 78240 }, { "epoch": 3.8864607132214166, "grad_norm": 0.109375, "learning_rate": 0.0004890871163206517, "loss": 0.5401, "step": 78250 }, { "epoch": 3.886957385517036, "grad_norm": 0.14453125, "learning_rate": 0.0004890473825370021, "loss": 0.5397, "step": 78260 }, { "epoch": 3.887454057812655, "grad_norm": 0.10302734375, "learning_rate": 0.0004890076487533525, "loss": 0.5461, "step": 78270 }, { "epoch": 3.8879507301082743, "grad_norm": 0.09912109375, "learning_rate": 0.0004889679149697031, "loss": 0.5115, "step": 78280 }, { "epoch": 3.888447402403894, "grad_norm": 0.12353515625, "learning_rate": 0.0004889281811860535, "loss": 0.519, "step": 78290 }, { "epoch": 3.888944074699513, "grad_norm": 0.1083984375, "learning_rate": 0.0004888884474024039, "loss": 0.5557, "step": 78300 }, { "epoch": 3.889440746995133, "grad_norm": 0.10498046875, "learning_rate": 0.0004888487136187543, "loss": 0.5311, "step": 78310 }, { "epoch": 3.889937419290752, "grad_norm": 0.1123046875, "learning_rate": 0.0004888089798351048, "loss": 0.5394, "step": 78320 }, { "epoch": 3.8904340915863713, "grad_norm": 0.10546875, "learning_rate": 0.0004887692460514553, "loss": 0.5154, "step": 78330 }, { "epoch": 3.8909307638819906, "grad_norm": 0.09912109375, "learning_rate": 0.0004887295122678057, "loss": 0.5444, "step": 78340 }, { "epoch": 3.8914274361776098, "grad_norm": 0.1328125, "learning_rate": 0.0004886897784841562, "loss": 0.5237, "step": 78350 }, { "epoch": 3.8919241084732294, "grad_norm": 0.111328125, "learning_rate": 0.0004886500447005066, "loss": 0.5175, "step": 78360 }, { "epoch": 3.8924207807688487, "grad_norm": 0.10986328125, "learning_rate": 0.000488610310916857, "loss": 0.5257, "step": 78370 }, { "epoch": 3.892917453064468, "grad_norm": 0.10107421875, "learning_rate": 0.0004885705771332076, "loss": 0.5148, "step": 78380 }, { "epoch": 3.8934141253600876, "grad_norm": 0.1083984375, "learning_rate": 0.000488530843349558, "loss": 0.5445, "step": 78390 }, { "epoch": 3.893910797655707, "grad_norm": 0.1123046875, "learning_rate": 0.0004884911095659084, "loss": 0.5255, "step": 78400 }, { "epoch": 3.894407469951326, "grad_norm": 0.123046875, "learning_rate": 0.0004884513757822589, "loss": 0.538, "step": 78410 }, { "epoch": 3.8949041422469453, "grad_norm": 0.12255859375, "learning_rate": 0.0004884116419986093, "loss": 0.5602, "step": 78420 }, { "epoch": 3.895400814542565, "grad_norm": 0.10498046875, "learning_rate": 0.0004883719082149598, "loss": 0.5426, "step": 78430 }, { "epoch": 3.895897486838184, "grad_norm": 0.10009765625, "learning_rate": 0.0004883321744313103, "loss": 0.5264, "step": 78440 }, { "epoch": 3.8963941591338034, "grad_norm": 0.1162109375, "learning_rate": 0.0004882924406476607, "loss": 0.5425, "step": 78450 }, { "epoch": 3.896890831429423, "grad_norm": 0.1767578125, "learning_rate": 0.0004882527068640111, "loss": 0.5515, "step": 78460 }, { "epoch": 3.8973875037250423, "grad_norm": 0.1162109375, "learning_rate": 0.0004882129730803616, "loss": 0.5495, "step": 78470 }, { "epoch": 3.8978841760206615, "grad_norm": 0.10693359375, "learning_rate": 0.00048817323929671206, "loss": 0.5271, "step": 78480 }, { "epoch": 3.8983808483162807, "grad_norm": 0.166015625, "learning_rate": 0.0004881335055130625, "loss": 0.5404, "step": 78490 }, { "epoch": 3.8988775206119004, "grad_norm": 0.1142578125, "learning_rate": 0.00048809377172941295, "loss": 0.5227, "step": 78500 }, { "epoch": 3.8993741929075196, "grad_norm": 0.099609375, "learning_rate": 0.0004880540379457634, "loss": 0.5285, "step": 78510 }, { "epoch": 3.899870865203139, "grad_norm": 0.09521484375, "learning_rate": 0.00048801430416211384, "loss": 0.5331, "step": 78520 }, { "epoch": 3.9003675374987585, "grad_norm": 0.1455078125, "learning_rate": 0.0004879745703784643, "loss": 0.5292, "step": 78530 }, { "epoch": 3.9008642097943778, "grad_norm": 0.10595703125, "learning_rate": 0.0004879348365948148, "loss": 0.4952, "step": 78540 }, { "epoch": 3.901360882089997, "grad_norm": 0.12353515625, "learning_rate": 0.0004878951028111652, "loss": 0.5172, "step": 78550 }, { "epoch": 3.901857554385616, "grad_norm": 0.11572265625, "learning_rate": 0.00048785536902751567, "loss": 0.5235, "step": 78560 }, { "epoch": 3.902354226681236, "grad_norm": 0.11328125, "learning_rate": 0.00048781563524386614, "loss": 0.5048, "step": 78570 }, { "epoch": 3.902850898976855, "grad_norm": 0.11279296875, "learning_rate": 0.0004877759014602166, "loss": 0.5437, "step": 78580 }, { "epoch": 3.9033475712724743, "grad_norm": 0.1259765625, "learning_rate": 0.00048773616767656703, "loss": 0.5178, "step": 78590 }, { "epoch": 3.903844243568094, "grad_norm": 0.10302734375, "learning_rate": 0.0004876964338929175, "loss": 0.5071, "step": 78600 }, { "epoch": 3.9043409158637132, "grad_norm": 0.10205078125, "learning_rate": 0.00048765670010926797, "loss": 0.5203, "step": 78610 }, { "epoch": 3.9048375881593325, "grad_norm": 0.1103515625, "learning_rate": 0.00048761696632561833, "loss": 0.5194, "step": 78620 }, { "epoch": 3.9053342604549517, "grad_norm": 0.11083984375, "learning_rate": 0.0004875772325419688, "loss": 0.515, "step": 78630 }, { "epoch": 3.905830932750571, "grad_norm": 0.1337890625, "learning_rate": 0.00048753749875831933, "loss": 0.533, "step": 78640 }, { "epoch": 3.9063276050461906, "grad_norm": 0.1123046875, "learning_rate": 0.0004874977649746697, "loss": 0.5588, "step": 78650 }, { "epoch": 3.90682427734181, "grad_norm": 0.10888671875, "learning_rate": 0.00048745803119102017, "loss": 0.5296, "step": 78660 }, { "epoch": 3.9073209496374295, "grad_norm": 0.1396484375, "learning_rate": 0.00048741829740737064, "loss": 0.5109, "step": 78670 }, { "epoch": 3.9078176219330487, "grad_norm": 0.12158203125, "learning_rate": 0.00048737856362372105, "loss": 0.5399, "step": 78680 }, { "epoch": 3.908314294228668, "grad_norm": 0.10302734375, "learning_rate": 0.0004873388298400715, "loss": 0.509, "step": 78690 }, { "epoch": 3.908810966524287, "grad_norm": 0.115234375, "learning_rate": 0.000487299096056422, "loss": 0.52, "step": 78700 }, { "epoch": 3.9093076388199064, "grad_norm": 0.11328125, "learning_rate": 0.00048725936227277247, "loss": 0.5361, "step": 78710 }, { "epoch": 3.909804311115526, "grad_norm": 0.1025390625, "learning_rate": 0.0004872196284891229, "loss": 0.5012, "step": 78720 }, { "epoch": 3.9103009834111453, "grad_norm": 0.1220703125, "learning_rate": 0.00048717989470547336, "loss": 0.5238, "step": 78730 }, { "epoch": 3.9107976557067645, "grad_norm": 0.11865234375, "learning_rate": 0.00048714016092182383, "loss": 0.5273, "step": 78740 }, { "epoch": 3.911294328002384, "grad_norm": 0.10546875, "learning_rate": 0.00048710042713817424, "loss": 0.532, "step": 78750 }, { "epoch": 3.9117910002980034, "grad_norm": 0.10498046875, "learning_rate": 0.0004870606933545247, "loss": 0.5236, "step": 78760 }, { "epoch": 3.9122876725936226, "grad_norm": 0.11279296875, "learning_rate": 0.0004870209595708752, "loss": 0.5262, "step": 78770 }, { "epoch": 3.912784344889242, "grad_norm": 0.1142578125, "learning_rate": 0.0004869812257872256, "loss": 0.5073, "step": 78780 }, { "epoch": 3.9132810171848615, "grad_norm": 0.11181640625, "learning_rate": 0.0004869414920035761, "loss": 0.5161, "step": 78790 }, { "epoch": 3.9137776894804808, "grad_norm": 0.10400390625, "learning_rate": 0.00048690175821992655, "loss": 0.5226, "step": 78800 }, { "epoch": 3.9142743617761, "grad_norm": 0.1005859375, "learning_rate": 0.0004868620244362769, "loss": 0.5043, "step": 78810 }, { "epoch": 3.9147710340717197, "grad_norm": 0.1064453125, "learning_rate": 0.00048682229065262744, "loss": 0.5119, "step": 78820 }, { "epoch": 3.915267706367339, "grad_norm": 0.201171875, "learning_rate": 0.0004867825568689779, "loss": 0.5354, "step": 78830 }, { "epoch": 3.915764378662958, "grad_norm": 0.09765625, "learning_rate": 0.00048674282308532827, "loss": 0.5004, "step": 78840 }, { "epoch": 3.9162610509585773, "grad_norm": 0.1455078125, "learning_rate": 0.00048670308930167874, "loss": 0.5131, "step": 78850 }, { "epoch": 3.916757723254197, "grad_norm": 0.09912109375, "learning_rate": 0.00048666335551802927, "loss": 0.5342, "step": 78860 }, { "epoch": 3.9172543955498162, "grad_norm": 0.1376953125, "learning_rate": 0.00048662362173437974, "loss": 0.5517, "step": 78870 }, { "epoch": 3.9177510678454355, "grad_norm": 0.10009765625, "learning_rate": 0.0004865838879507301, "loss": 0.5119, "step": 78880 }, { "epoch": 3.918247740141055, "grad_norm": 0.08935546875, "learning_rate": 0.00048654415416708057, "loss": 0.5285, "step": 78890 }, { "epoch": 3.9187444124366744, "grad_norm": 0.11181640625, "learning_rate": 0.00048650442038343104, "loss": 0.5466, "step": 78900 }, { "epoch": 3.9192410847322936, "grad_norm": 0.095703125, "learning_rate": 0.00048646468659978146, "loss": 0.5246, "step": 78910 }, { "epoch": 3.919737757027913, "grad_norm": 0.1337890625, "learning_rate": 0.00048642495281613193, "loss": 0.5565, "step": 78920 }, { "epoch": 3.9202344293235325, "grad_norm": 0.10888671875, "learning_rate": 0.0004863852190324824, "loss": 0.5123, "step": 78930 }, { "epoch": 3.9207311016191517, "grad_norm": 0.11181640625, "learning_rate": 0.0004863454852488328, "loss": 0.522, "step": 78940 }, { "epoch": 3.921227773914771, "grad_norm": 0.10107421875, "learning_rate": 0.0004863057514651833, "loss": 0.5093, "step": 78950 }, { "epoch": 3.9217244462103906, "grad_norm": 0.11572265625, "learning_rate": 0.00048626601768153376, "loss": 0.511, "step": 78960 }, { "epoch": 3.92222111850601, "grad_norm": 0.11376953125, "learning_rate": 0.0004862262838978842, "loss": 0.5285, "step": 78970 }, { "epoch": 3.922717790801629, "grad_norm": 0.1162109375, "learning_rate": 0.00048618655011423465, "loss": 0.5343, "step": 78980 }, { "epoch": 3.9232144630972483, "grad_norm": 0.107421875, "learning_rate": 0.0004861468163305851, "loss": 0.5646, "step": 78990 }, { "epoch": 3.9237111353928675, "grad_norm": 0.1162109375, "learning_rate": 0.0004861070825469356, "loss": 0.5277, "step": 79000 }, { "epoch": 3.924207807688487, "grad_norm": 0.1650390625, "learning_rate": 0.000486067348763286, "loss": 0.5393, "step": 79010 }, { "epoch": 3.9247044799841064, "grad_norm": 0.11279296875, "learning_rate": 0.0004860276149796365, "loss": 0.5405, "step": 79020 }, { "epoch": 3.925201152279726, "grad_norm": 0.1005859375, "learning_rate": 0.00048598788119598696, "loss": 0.5444, "step": 79030 }, { "epoch": 3.9256978245753453, "grad_norm": 0.115234375, "learning_rate": 0.0004859481474123373, "loss": 0.5462, "step": 79040 }, { "epoch": 3.9261944968709646, "grad_norm": 0.0966796875, "learning_rate": 0.00048590841362868784, "loss": 0.5064, "step": 79050 }, { "epoch": 3.926691169166584, "grad_norm": 0.09375, "learning_rate": 0.0004858686798450383, "loss": 0.519, "step": 79060 }, { "epoch": 3.927187841462203, "grad_norm": 0.1142578125, "learning_rate": 0.0004858289460613887, "loss": 0.5443, "step": 79070 }, { "epoch": 3.9276845137578227, "grad_norm": 0.1630859375, "learning_rate": 0.00048578921227773915, "loss": 0.511, "step": 79080 }, { "epoch": 3.928181186053442, "grad_norm": 0.10546875, "learning_rate": 0.0004857494784940897, "loss": 0.5439, "step": 79090 }, { "epoch": 3.928677858349061, "grad_norm": 0.10302734375, "learning_rate": 0.00048570974471044004, "loss": 0.5368, "step": 79100 }, { "epoch": 3.929174530644681, "grad_norm": 0.140625, "learning_rate": 0.0004856700109267905, "loss": 0.5425, "step": 79110 }, { "epoch": 3.9296712029403, "grad_norm": 0.0986328125, "learning_rate": 0.000485630277143141, "loss": 0.5535, "step": 79120 }, { "epoch": 3.9301678752359193, "grad_norm": 0.1162109375, "learning_rate": 0.0004855905433594914, "loss": 0.5268, "step": 79130 }, { "epoch": 3.9306645475315385, "grad_norm": 0.10009765625, "learning_rate": 0.00048555080957584187, "loss": 0.523, "step": 79140 }, { "epoch": 3.931161219827158, "grad_norm": 0.1318359375, "learning_rate": 0.00048551107579219234, "loss": 0.537, "step": 79150 }, { "epoch": 3.9316578921227774, "grad_norm": 0.1162109375, "learning_rate": 0.0004854713420085428, "loss": 0.5496, "step": 79160 }, { "epoch": 3.9321545644183966, "grad_norm": 0.1201171875, "learning_rate": 0.00048543160822489323, "loss": 0.5504, "step": 79170 }, { "epoch": 3.9326512367140163, "grad_norm": 0.1630859375, "learning_rate": 0.0004853918744412437, "loss": 0.5735, "step": 79180 }, { "epoch": 3.9331479090096355, "grad_norm": 0.1083984375, "learning_rate": 0.00048535214065759417, "loss": 0.5313, "step": 79190 }, { "epoch": 3.9336445813052547, "grad_norm": 0.09814453125, "learning_rate": 0.0004853124068739446, "loss": 0.5333, "step": 79200 }, { "epoch": 3.934141253600874, "grad_norm": 0.1337890625, "learning_rate": 0.00048527267309029506, "loss": 0.5218, "step": 79210 }, { "epoch": 3.9346379258964936, "grad_norm": 0.1328125, "learning_rate": 0.00048523293930664553, "loss": 0.5266, "step": 79220 }, { "epoch": 3.935134598192113, "grad_norm": 0.0966796875, "learning_rate": 0.0004851932055229959, "loss": 0.5356, "step": 79230 }, { "epoch": 3.935631270487732, "grad_norm": 0.125, "learning_rate": 0.0004851534717393464, "loss": 0.5371, "step": 79240 }, { "epoch": 3.9361279427833518, "grad_norm": 0.1259765625, "learning_rate": 0.0004851137379556969, "loss": 0.509, "step": 79250 }, { "epoch": 3.936624615078971, "grad_norm": 0.109375, "learning_rate": 0.00048507400417204725, "loss": 0.5251, "step": 79260 }, { "epoch": 3.93712128737459, "grad_norm": 0.1220703125, "learning_rate": 0.0004850342703883977, "loss": 0.5085, "step": 79270 }, { "epoch": 3.9376179596702094, "grad_norm": 0.125, "learning_rate": 0.00048499453660474825, "loss": 0.5571, "step": 79280 }, { "epoch": 3.9381146319658287, "grad_norm": 0.138671875, "learning_rate": 0.0004849548028210986, "loss": 0.5364, "step": 79290 }, { "epoch": 3.9386113042614483, "grad_norm": 0.11279296875, "learning_rate": 0.0004849150690374491, "loss": 0.5451, "step": 79300 }, { "epoch": 3.9391079765570676, "grad_norm": 0.1025390625, "learning_rate": 0.00048487533525379956, "loss": 0.5003, "step": 79310 }, { "epoch": 3.9396046488526872, "grad_norm": 0.1484375, "learning_rate": 0.0004848356014701501, "loss": 0.5263, "step": 79320 }, { "epoch": 3.9401013211483065, "grad_norm": 0.10546875, "learning_rate": 0.00048479586768650045, "loss": 0.523, "step": 79330 }, { "epoch": 3.9405979934439257, "grad_norm": 0.1279296875, "learning_rate": 0.0004847561339028509, "loss": 0.5346, "step": 79340 }, { "epoch": 3.941094665739545, "grad_norm": 0.11474609375, "learning_rate": 0.0004847164001192014, "loss": 0.5233, "step": 79350 }, { "epoch": 3.941591338035164, "grad_norm": 0.1123046875, "learning_rate": 0.0004846766663355518, "loss": 0.5381, "step": 79360 }, { "epoch": 3.942088010330784, "grad_norm": 0.11328125, "learning_rate": 0.0004846369325519023, "loss": 0.5275, "step": 79370 }, { "epoch": 3.942584682626403, "grad_norm": 0.109375, "learning_rate": 0.00048459719876825275, "loss": 0.5336, "step": 79380 }, { "epoch": 3.9430813549220227, "grad_norm": 0.12353515625, "learning_rate": 0.00048455746498460316, "loss": 0.5401, "step": 79390 }, { "epoch": 3.943578027217642, "grad_norm": 0.11083984375, "learning_rate": 0.00048451773120095364, "loss": 0.524, "step": 79400 }, { "epoch": 3.944074699513261, "grad_norm": 0.1142578125, "learning_rate": 0.0004844779974173041, "loss": 0.514, "step": 79410 }, { "epoch": 3.9445713718088804, "grad_norm": 0.10595703125, "learning_rate": 0.0004844382636336545, "loss": 0.5181, "step": 79420 }, { "epoch": 3.9450680441044996, "grad_norm": 0.12353515625, "learning_rate": 0.000484398529850005, "loss": 0.5247, "step": 79430 }, { "epoch": 3.9455647164001193, "grad_norm": 0.09716796875, "learning_rate": 0.00048435879606635547, "loss": 0.5204, "step": 79440 }, { "epoch": 3.9460613886957385, "grad_norm": 0.10400390625, "learning_rate": 0.00048431906228270594, "loss": 0.4897, "step": 79450 }, { "epoch": 3.9465580609913578, "grad_norm": 0.1748046875, "learning_rate": 0.0004842793284990563, "loss": 0.5227, "step": 79460 }, { "epoch": 3.9470547332869774, "grad_norm": 0.130859375, "learning_rate": 0.00048423959471540683, "loss": 0.5644, "step": 79470 }, { "epoch": 3.9475514055825967, "grad_norm": 0.10986328125, "learning_rate": 0.0004841998609317573, "loss": 0.5391, "step": 79480 }, { "epoch": 3.948048077878216, "grad_norm": 0.1083984375, "learning_rate": 0.00048416012714810766, "loss": 0.5092, "step": 79490 }, { "epoch": 3.948544750173835, "grad_norm": 0.1044921875, "learning_rate": 0.00048412039336445813, "loss": 0.5233, "step": 79500 }, { "epoch": 3.9490414224694548, "grad_norm": 0.1083984375, "learning_rate": 0.00048408065958080866, "loss": 0.5487, "step": 79510 }, { "epoch": 3.949538094765074, "grad_norm": 0.1240234375, "learning_rate": 0.000484040925797159, "loss": 0.5287, "step": 79520 }, { "epoch": 3.9500347670606932, "grad_norm": 0.107421875, "learning_rate": 0.0004840011920135095, "loss": 0.5458, "step": 79530 }, { "epoch": 3.950531439356313, "grad_norm": 0.138671875, "learning_rate": 0.00048396145822985996, "loss": 0.522, "step": 79540 }, { "epoch": 3.951028111651932, "grad_norm": 0.115234375, "learning_rate": 0.0004839217244462104, "loss": 0.5134, "step": 79550 }, { "epoch": 3.9515247839475514, "grad_norm": 0.1083984375, "learning_rate": 0.00048388199066256085, "loss": 0.5682, "step": 79560 }, { "epoch": 3.9520214562431706, "grad_norm": 0.099609375, "learning_rate": 0.0004838422568789113, "loss": 0.4989, "step": 79570 }, { "epoch": 3.9525181285387903, "grad_norm": 0.1572265625, "learning_rate": 0.00048380252309526174, "loss": 0.5383, "step": 79580 }, { "epoch": 3.9530148008344095, "grad_norm": 0.11572265625, "learning_rate": 0.0004837627893116122, "loss": 0.5253, "step": 79590 }, { "epoch": 3.9535114731300287, "grad_norm": 0.095703125, "learning_rate": 0.0004837230555279627, "loss": 0.5204, "step": 79600 }, { "epoch": 3.9540081454256484, "grad_norm": 0.1318359375, "learning_rate": 0.00048368332174431316, "loss": 0.5299, "step": 79610 }, { "epoch": 3.9545048177212676, "grad_norm": 0.1416015625, "learning_rate": 0.00048364358796066357, "loss": 0.5158, "step": 79620 }, { "epoch": 3.955001490016887, "grad_norm": 0.1435546875, "learning_rate": 0.00048360385417701404, "loss": 0.5451, "step": 79630 }, { "epoch": 3.955498162312506, "grad_norm": 0.12158203125, "learning_rate": 0.0004835641203933645, "loss": 0.5338, "step": 79640 }, { "epoch": 3.9559948346081253, "grad_norm": 0.1318359375, "learning_rate": 0.00048352438660971493, "loss": 0.5416, "step": 79650 }, { "epoch": 3.956491506903745, "grad_norm": 0.109375, "learning_rate": 0.0004834846528260654, "loss": 0.5345, "step": 79660 }, { "epoch": 3.956988179199364, "grad_norm": 0.1298828125, "learning_rate": 0.0004834449190424159, "loss": 0.549, "step": 79670 }, { "epoch": 3.957484851494984, "grad_norm": 0.11962890625, "learning_rate": 0.00048340518525876624, "loss": 0.5493, "step": 79680 }, { "epoch": 3.957981523790603, "grad_norm": 0.10546875, "learning_rate": 0.00048336545147511676, "loss": 0.5484, "step": 79690 }, { "epoch": 3.9584781960862223, "grad_norm": 0.11669921875, "learning_rate": 0.00048332571769146723, "loss": 0.5111, "step": 79700 }, { "epoch": 3.9589748683818415, "grad_norm": 0.10986328125, "learning_rate": 0.0004832859839078176, "loss": 0.5525, "step": 79710 }, { "epoch": 3.9594715406774608, "grad_norm": 0.12060546875, "learning_rate": 0.00048324625012416807, "loss": 0.516, "step": 79720 }, { "epoch": 3.9599682129730804, "grad_norm": 0.1376953125, "learning_rate": 0.00048320651634051854, "loss": 0.5382, "step": 79730 }, { "epoch": 3.9604648852686997, "grad_norm": 0.10546875, "learning_rate": 0.00048316678255686896, "loss": 0.5643, "step": 79740 }, { "epoch": 3.9609615575643193, "grad_norm": 0.1123046875, "learning_rate": 0.00048312704877321943, "loss": 0.5073, "step": 79750 }, { "epoch": 3.9614582298599386, "grad_norm": 0.1171875, "learning_rate": 0.0004830873149895699, "loss": 0.5077, "step": 79760 }, { "epoch": 3.961954902155558, "grad_norm": 0.123046875, "learning_rate": 0.00048304758120592037, "loss": 0.5649, "step": 79770 }, { "epoch": 3.962451574451177, "grad_norm": 0.0966796875, "learning_rate": 0.0004830078474222708, "loss": 0.5168, "step": 79780 }, { "epoch": 3.9629482467467962, "grad_norm": 0.09912109375, "learning_rate": 0.00048296811363862126, "loss": 0.5375, "step": 79790 }, { "epoch": 3.963444919042416, "grad_norm": 0.1494140625, "learning_rate": 0.00048292837985497173, "loss": 0.5512, "step": 79800 }, { "epoch": 3.963941591338035, "grad_norm": 0.10009765625, "learning_rate": 0.00048288864607132215, "loss": 0.5251, "step": 79810 }, { "epoch": 3.9644382636336544, "grad_norm": 0.1865234375, "learning_rate": 0.0004828489122876726, "loss": 0.5405, "step": 79820 }, { "epoch": 3.964934935929274, "grad_norm": 0.10498046875, "learning_rate": 0.0004828091785040231, "loss": 0.5521, "step": 79830 }, { "epoch": 3.9654316082248933, "grad_norm": 0.1103515625, "learning_rate": 0.0004827694447203735, "loss": 0.5328, "step": 79840 }, { "epoch": 3.9659282805205125, "grad_norm": 0.12060546875, "learning_rate": 0.000482729710936724, "loss": 0.5432, "step": 79850 }, { "epoch": 3.9664249528161317, "grad_norm": 0.14453125, "learning_rate": 0.00048268997715307445, "loss": 0.542, "step": 79860 }, { "epoch": 3.9669216251117514, "grad_norm": 0.138671875, "learning_rate": 0.0004826502433694248, "loss": 0.5276, "step": 79870 }, { "epoch": 3.9674182974073706, "grad_norm": 0.0986328125, "learning_rate": 0.00048261050958577534, "loss": 0.5499, "step": 79880 }, { "epoch": 3.96791496970299, "grad_norm": 0.1201171875, "learning_rate": 0.0004825707758021258, "loss": 0.5069, "step": 79890 }, { "epoch": 3.9684116419986095, "grad_norm": 0.10302734375, "learning_rate": 0.0004825310420184763, "loss": 0.5312, "step": 79900 }, { "epoch": 3.9689083142942287, "grad_norm": 0.125, "learning_rate": 0.00048249130823482665, "loss": 0.5259, "step": 79910 }, { "epoch": 3.969404986589848, "grad_norm": 0.11181640625, "learning_rate": 0.00048245157445117717, "loss": 0.5166, "step": 79920 }, { "epoch": 3.969901658885467, "grad_norm": 0.107421875, "learning_rate": 0.00048241184066752764, "loss": 0.5314, "step": 79930 }, { "epoch": 3.970398331181087, "grad_norm": 0.1455078125, "learning_rate": 0.000482372106883878, "loss": 0.5214, "step": 79940 }, { "epoch": 3.970895003476706, "grad_norm": 0.0888671875, "learning_rate": 0.0004823323731002285, "loss": 0.5146, "step": 79950 }, { "epoch": 3.9713916757723253, "grad_norm": 0.11279296875, "learning_rate": 0.000482292639316579, "loss": 0.538, "step": 79960 }, { "epoch": 3.971888348067945, "grad_norm": 0.0986328125, "learning_rate": 0.00048225290553292937, "loss": 0.5395, "step": 79970 }, { "epoch": 3.9723850203635642, "grad_norm": 0.09912109375, "learning_rate": 0.00048221317174927984, "loss": 0.5594, "step": 79980 }, { "epoch": 3.9728816926591835, "grad_norm": 0.158203125, "learning_rate": 0.0004821734379656303, "loss": 0.5511, "step": 79990 }, { "epoch": 3.9733783649548027, "grad_norm": 0.1416015625, "learning_rate": 0.0004821337041819807, "loss": 0.5274, "step": 80000 }, { "epoch": 3.973875037250422, "grad_norm": 0.0966796875, "learning_rate": 0.0004820939703983312, "loss": 0.5179, "step": 80010 }, { "epoch": 3.9743717095460416, "grad_norm": 0.162109375, "learning_rate": 0.00048205423661468167, "loss": 0.5516, "step": 80020 }, { "epoch": 3.974868381841661, "grad_norm": 0.1806640625, "learning_rate": 0.0004820145028310321, "loss": 0.5319, "step": 80030 }, { "epoch": 3.9753650541372805, "grad_norm": 0.10546875, "learning_rate": 0.00048197476904738256, "loss": 0.5173, "step": 80040 }, { "epoch": 3.9758617264328997, "grad_norm": 0.1201171875, "learning_rate": 0.00048193503526373303, "loss": 0.5147, "step": 80050 }, { "epoch": 3.976358398728519, "grad_norm": 0.1689453125, "learning_rate": 0.0004818953014800835, "loss": 0.5243, "step": 80060 }, { "epoch": 3.976855071024138, "grad_norm": 0.1416015625, "learning_rate": 0.0004818555676964339, "loss": 0.4932, "step": 80070 }, { "epoch": 3.9773517433197574, "grad_norm": 0.107421875, "learning_rate": 0.0004818158339127844, "loss": 0.5149, "step": 80080 }, { "epoch": 3.977848415615377, "grad_norm": 0.13671875, "learning_rate": 0.00048177610012913486, "loss": 0.5575, "step": 80090 }, { "epoch": 3.9783450879109963, "grad_norm": 0.13671875, "learning_rate": 0.0004817363663454852, "loss": 0.5325, "step": 80100 }, { "epoch": 3.978841760206616, "grad_norm": 0.1181640625, "learning_rate": 0.00048169663256183575, "loss": 0.5211, "step": 80110 }, { "epoch": 3.979338432502235, "grad_norm": 0.1123046875, "learning_rate": 0.0004816568987781862, "loss": 0.4939, "step": 80120 }, { "epoch": 3.9798351047978544, "grad_norm": 0.11328125, "learning_rate": 0.0004816171649945366, "loss": 0.5304, "step": 80130 }, { "epoch": 3.9803317770934736, "grad_norm": 0.111328125, "learning_rate": 0.00048157743121088705, "loss": 0.5244, "step": 80140 }, { "epoch": 3.980828449389093, "grad_norm": 0.10595703125, "learning_rate": 0.0004815376974272376, "loss": 0.5435, "step": 80150 }, { "epoch": 3.9813251216847125, "grad_norm": 0.12890625, "learning_rate": 0.00048149796364358794, "loss": 0.5387, "step": 80160 }, { "epoch": 3.9818217939803318, "grad_norm": 0.12451171875, "learning_rate": 0.0004814582298599384, "loss": 0.5371, "step": 80170 }, { "epoch": 3.982318466275951, "grad_norm": 0.1181640625, "learning_rate": 0.0004814184960762889, "loss": 0.5091, "step": 80180 }, { "epoch": 3.9828151385715707, "grad_norm": 0.11083984375, "learning_rate": 0.0004813787622926393, "loss": 0.552, "step": 80190 }, { "epoch": 3.98331181086719, "grad_norm": 0.142578125, "learning_rate": 0.00048133902850898977, "loss": 0.4959, "step": 80200 }, { "epoch": 3.983808483162809, "grad_norm": 0.15625, "learning_rate": 0.00048129929472534024, "loss": 0.5101, "step": 80210 }, { "epoch": 3.9843051554584283, "grad_norm": 0.1240234375, "learning_rate": 0.0004812595609416907, "loss": 0.5362, "step": 80220 }, { "epoch": 3.984801827754048, "grad_norm": 0.10791015625, "learning_rate": 0.00048121982715804113, "loss": 0.5268, "step": 80230 }, { "epoch": 3.9852985000496672, "grad_norm": 0.169921875, "learning_rate": 0.0004811800933743916, "loss": 0.5353, "step": 80240 }, { "epoch": 3.9857951723452865, "grad_norm": 0.10546875, "learning_rate": 0.0004811403595907421, "loss": 0.5479, "step": 80250 }, { "epoch": 3.986291844640906, "grad_norm": 0.1044921875, "learning_rate": 0.0004811006258070925, "loss": 0.5581, "step": 80260 }, { "epoch": 3.9867885169365254, "grad_norm": 0.125, "learning_rate": 0.00048106089202344296, "loss": 0.5255, "step": 80270 }, { "epoch": 3.9872851892321446, "grad_norm": 0.1181640625, "learning_rate": 0.00048102115823979344, "loss": 0.5291, "step": 80280 }, { "epoch": 3.987781861527764, "grad_norm": 0.10693359375, "learning_rate": 0.00048098142445614385, "loss": 0.5187, "step": 80290 }, { "epoch": 3.9882785338233835, "grad_norm": 0.142578125, "learning_rate": 0.0004809416906724943, "loss": 0.5122, "step": 80300 }, { "epoch": 3.9887752061190027, "grad_norm": 0.1298828125, "learning_rate": 0.0004809019568888448, "loss": 0.512, "step": 80310 }, { "epoch": 3.989271878414622, "grad_norm": 0.107421875, "learning_rate": 0.00048086222310519516, "loss": 0.5211, "step": 80320 }, { "epoch": 3.9897685507102416, "grad_norm": 0.1259765625, "learning_rate": 0.00048082248932154563, "loss": 0.5828, "step": 80330 }, { "epoch": 3.990265223005861, "grad_norm": 0.146484375, "learning_rate": 0.00048078275553789615, "loss": 0.5629, "step": 80340 }, { "epoch": 3.99076189530148, "grad_norm": 0.11083984375, "learning_rate": 0.0004807430217542466, "loss": 0.544, "step": 80350 }, { "epoch": 3.9912585675970993, "grad_norm": 0.1064453125, "learning_rate": 0.000480703287970597, "loss": 0.5288, "step": 80360 }, { "epoch": 3.9917552398927185, "grad_norm": 0.1083984375, "learning_rate": 0.00048066355418694746, "loss": 0.5364, "step": 80370 }, { "epoch": 3.992251912188338, "grad_norm": 0.1259765625, "learning_rate": 0.000480623820403298, "loss": 0.5254, "step": 80380 }, { "epoch": 3.9927485844839574, "grad_norm": 0.09912109375, "learning_rate": 0.00048058408661964835, "loss": 0.5298, "step": 80390 }, { "epoch": 3.993245256779577, "grad_norm": 0.1025390625, "learning_rate": 0.0004805443528359988, "loss": 0.5143, "step": 80400 }, { "epoch": 3.9937419290751963, "grad_norm": 0.2021484375, "learning_rate": 0.0004805046190523493, "loss": 0.5279, "step": 80410 }, { "epoch": 3.9942386013708155, "grad_norm": 0.109375, "learning_rate": 0.0004804648852686997, "loss": 0.543, "step": 80420 }, { "epoch": 3.9947352736664348, "grad_norm": 0.1005859375, "learning_rate": 0.0004804251514850502, "loss": 0.5356, "step": 80430 }, { "epoch": 3.995231945962054, "grad_norm": 0.1044921875, "learning_rate": 0.00048038541770140065, "loss": 0.5297, "step": 80440 }, { "epoch": 3.9957286182576737, "grad_norm": 0.11376953125, "learning_rate": 0.00048034568391775107, "loss": 0.5392, "step": 80450 }, { "epoch": 3.996225290553293, "grad_norm": 0.1123046875, "learning_rate": 0.00048030595013410154, "loss": 0.5283, "step": 80460 }, { "epoch": 3.996721962848912, "grad_norm": 0.1064453125, "learning_rate": 0.000480266216350452, "loss": 0.5134, "step": 80470 }, { "epoch": 3.997218635144532, "grad_norm": 0.166015625, "learning_rate": 0.00048022648256680243, "loss": 0.5283, "step": 80480 }, { "epoch": 3.997715307440151, "grad_norm": 0.10595703125, "learning_rate": 0.0004801867487831529, "loss": 0.4861, "step": 80490 }, { "epoch": 3.9982119797357702, "grad_norm": 0.10205078125, "learning_rate": 0.00048014701499950337, "loss": 0.488, "step": 80500 }, { "epoch": 3.9987086520313895, "grad_norm": 0.09765625, "learning_rate": 0.00048010728121585384, "loss": 0.5418, "step": 80510 }, { "epoch": 3.999205324327009, "grad_norm": 0.09814453125, "learning_rate": 0.00048006754743220426, "loss": 0.5178, "step": 80520 }, { "epoch": 3.9997019966226284, "grad_norm": 0.134765625, "learning_rate": 0.00048002781364855473, "loss": 0.563, "step": 80530 }, { "epoch": 4.000198668918248, "grad_norm": 0.1064453125, "learning_rate": 0.0004799880798649052, "loss": 0.5175, "step": 80540 }, { "epoch": 4.000695341213867, "grad_norm": 0.12158203125, "learning_rate": 0.00047994834608125557, "loss": 0.5267, "step": 80550 }, { "epoch": 4.0011920135094865, "grad_norm": 0.146484375, "learning_rate": 0.0004799086122976061, "loss": 0.5212, "step": 80560 }, { "epoch": 4.001688685805106, "grad_norm": 0.1435546875, "learning_rate": 0.00047986887851395656, "loss": 0.5415, "step": 80570 }, { "epoch": 4.002185358100725, "grad_norm": 0.1015625, "learning_rate": 0.0004798291447303069, "loss": 0.5447, "step": 80580 }, { "epoch": 4.002682030396344, "grad_norm": 0.11181640625, "learning_rate": 0.0004797894109466574, "loss": 0.5089, "step": 80590 }, { "epoch": 4.003178702691963, "grad_norm": 0.123046875, "learning_rate": 0.00047974967716300787, "loss": 0.5216, "step": 80600 }, { "epoch": 4.0036753749875835, "grad_norm": 0.09912109375, "learning_rate": 0.0004797099433793583, "loss": 0.5156, "step": 80610 }, { "epoch": 4.004172047283203, "grad_norm": 0.1171875, "learning_rate": 0.00047967020959570876, "loss": 0.5018, "step": 80620 }, { "epoch": 4.004668719578822, "grad_norm": 0.1015625, "learning_rate": 0.00047963047581205923, "loss": 0.5122, "step": 80630 }, { "epoch": 4.005165391874441, "grad_norm": 0.1083984375, "learning_rate": 0.00047959074202840964, "loss": 0.5377, "step": 80640 }, { "epoch": 4.00566206417006, "grad_norm": 0.1005859375, "learning_rate": 0.0004795510082447601, "loss": 0.5088, "step": 80650 }, { "epoch": 4.00615873646568, "grad_norm": 0.11328125, "learning_rate": 0.0004795112744611106, "loss": 0.5172, "step": 80660 }, { "epoch": 4.006655408761299, "grad_norm": 0.1494140625, "learning_rate": 0.00047947154067746106, "loss": 0.5396, "step": 80670 }, { "epoch": 4.007152081056919, "grad_norm": 0.1328125, "learning_rate": 0.0004794318068938115, "loss": 0.5247, "step": 80680 }, { "epoch": 4.007648753352538, "grad_norm": 0.10888671875, "learning_rate": 0.00047939207311016195, "loss": 0.5022, "step": 80690 }, { "epoch": 4.0081454256481575, "grad_norm": 0.2138671875, "learning_rate": 0.0004793523393265124, "loss": 0.499, "step": 80700 }, { "epoch": 4.008642097943777, "grad_norm": 0.09765625, "learning_rate": 0.00047931260554286284, "loss": 0.4814, "step": 80710 }, { "epoch": 4.009138770239396, "grad_norm": 0.1396484375, "learning_rate": 0.0004792728717592133, "loss": 0.5266, "step": 80720 }, { "epoch": 4.009635442535015, "grad_norm": 0.1328125, "learning_rate": 0.0004792331379755638, "loss": 0.5145, "step": 80730 }, { "epoch": 4.010132114830634, "grad_norm": 0.1162109375, "learning_rate": 0.00047919340419191414, "loss": 0.4964, "step": 80740 }, { "epoch": 4.0106287871262545, "grad_norm": 0.1630859375, "learning_rate": 0.00047915367040826467, "loss": 0.5007, "step": 80750 }, { "epoch": 4.011125459421874, "grad_norm": 0.11474609375, "learning_rate": 0.00047911393662461514, "loss": 0.5183, "step": 80760 }, { "epoch": 4.011622131717493, "grad_norm": 0.1005859375, "learning_rate": 0.0004790742028409655, "loss": 0.5162, "step": 80770 }, { "epoch": 4.012118804013112, "grad_norm": 0.10400390625, "learning_rate": 0.00047903446905731597, "loss": 0.5259, "step": 80780 }, { "epoch": 4.012615476308731, "grad_norm": 0.11962890625, "learning_rate": 0.0004789947352736665, "loss": 0.5069, "step": 80790 }, { "epoch": 4.013112148604351, "grad_norm": 0.1142578125, "learning_rate": 0.00047895500149001697, "loss": 0.4948, "step": 80800 }, { "epoch": 4.01360882089997, "grad_norm": 0.146484375, "learning_rate": 0.00047891526770636733, "loss": 0.5593, "step": 80810 }, { "epoch": 4.01410549319559, "grad_norm": 0.1103515625, "learning_rate": 0.0004788755339227178, "loss": 0.5431, "step": 80820 }, { "epoch": 4.014602165491209, "grad_norm": 0.130859375, "learning_rate": 0.0004788358001390683, "loss": 0.5352, "step": 80830 }, { "epoch": 4.015098837786828, "grad_norm": 0.150390625, "learning_rate": 0.0004787960663554187, "loss": 0.5245, "step": 80840 }, { "epoch": 4.015595510082448, "grad_norm": 0.1162109375, "learning_rate": 0.00047875633257176916, "loss": 0.5241, "step": 80850 }, { "epoch": 4.016092182378067, "grad_norm": 0.10888671875, "learning_rate": 0.00047871659878811964, "loss": 0.5128, "step": 80860 }, { "epoch": 4.016588854673686, "grad_norm": 0.12353515625, "learning_rate": 0.00047867686500447005, "loss": 0.5192, "step": 80870 }, { "epoch": 4.017085526969305, "grad_norm": 0.099609375, "learning_rate": 0.0004786371312208205, "loss": 0.5221, "step": 80880 }, { "epoch": 4.017582199264925, "grad_norm": 0.1474609375, "learning_rate": 0.000478597397437171, "loss": 0.5048, "step": 80890 }, { "epoch": 4.018078871560545, "grad_norm": 0.10546875, "learning_rate": 0.0004785576636535214, "loss": 0.5098, "step": 80900 }, { "epoch": 4.018575543856164, "grad_norm": 0.1337890625, "learning_rate": 0.0004785179298698719, "loss": 0.5253, "step": 80910 }, { "epoch": 4.019072216151783, "grad_norm": 0.09912109375, "learning_rate": 0.00047847819608622235, "loss": 0.5029, "step": 80920 }, { "epoch": 4.019568888447402, "grad_norm": 0.12890625, "learning_rate": 0.0004784384623025727, "loss": 0.5064, "step": 80930 }, { "epoch": 4.020065560743022, "grad_norm": 0.10205078125, "learning_rate": 0.00047839872851892324, "loss": 0.5097, "step": 80940 }, { "epoch": 4.020562233038641, "grad_norm": 0.0986328125, "learning_rate": 0.0004783589947352737, "loss": 0.4999, "step": 80950 }, { "epoch": 4.02105890533426, "grad_norm": 0.115234375, "learning_rate": 0.0004783192609516242, "loss": 0.5068, "step": 80960 }, { "epoch": 4.02155557762988, "grad_norm": 0.1298828125, "learning_rate": 0.00047827952716797455, "loss": 0.4947, "step": 80970 }, { "epoch": 4.022052249925499, "grad_norm": 0.1142578125, "learning_rate": 0.0004782397933843251, "loss": 0.5178, "step": 80980 }, { "epoch": 4.022548922221119, "grad_norm": 0.2177734375, "learning_rate": 0.00047820005960067555, "loss": 0.5396, "step": 80990 }, { "epoch": 4.023045594516738, "grad_norm": 0.10302734375, "learning_rate": 0.0004781603258170259, "loss": 0.561, "step": 81000 }, { "epoch": 4.023542266812357, "grad_norm": 0.123046875, "learning_rate": 0.0004781205920333764, "loss": 0.5209, "step": 81010 }, { "epoch": 4.024038939107976, "grad_norm": 0.1025390625, "learning_rate": 0.0004780808582497269, "loss": 0.5166, "step": 81020 }, { "epoch": 4.0245356114035955, "grad_norm": 0.11376953125, "learning_rate": 0.00047804112446607727, "loss": 0.5519, "step": 81030 }, { "epoch": 4.025032283699216, "grad_norm": 0.125, "learning_rate": 0.00047800139068242774, "loss": 0.5737, "step": 81040 }, { "epoch": 4.025528955994835, "grad_norm": 0.1142578125, "learning_rate": 0.0004779616568987782, "loss": 0.5398, "step": 81050 }, { "epoch": 4.026025628290454, "grad_norm": 0.1015625, "learning_rate": 0.00047792192311512863, "loss": 0.5005, "step": 81060 }, { "epoch": 4.026522300586073, "grad_norm": 0.1142578125, "learning_rate": 0.0004778821893314791, "loss": 0.536, "step": 81070 }, { "epoch": 4.0270189728816925, "grad_norm": 0.1591796875, "learning_rate": 0.00047784245554782957, "loss": 0.5223, "step": 81080 }, { "epoch": 4.027515645177312, "grad_norm": 0.1591796875, "learning_rate": 0.00047780272176418004, "loss": 0.505, "step": 81090 }, { "epoch": 4.028012317472931, "grad_norm": 0.119140625, "learning_rate": 0.00047776298798053046, "loss": 0.499, "step": 81100 }, { "epoch": 4.028508989768551, "grad_norm": 0.11181640625, "learning_rate": 0.00047772325419688093, "loss": 0.5178, "step": 81110 }, { "epoch": 4.02900566206417, "grad_norm": 0.12255859375, "learning_rate": 0.0004776835204132314, "loss": 0.5239, "step": 81120 }, { "epoch": 4.0295023343597896, "grad_norm": 0.1103515625, "learning_rate": 0.0004776437866295818, "loss": 0.5163, "step": 81130 }, { "epoch": 4.029999006655409, "grad_norm": 0.154296875, "learning_rate": 0.0004776040528459323, "loss": 0.5398, "step": 81140 }, { "epoch": 4.030495678951028, "grad_norm": 0.11279296875, "learning_rate": 0.00047756431906228276, "loss": 0.5174, "step": 81150 }, { "epoch": 4.030992351246647, "grad_norm": 0.09423828125, "learning_rate": 0.0004775245852786331, "loss": 0.5257, "step": 81160 }, { "epoch": 4.0314890235422665, "grad_norm": 0.142578125, "learning_rate": 0.00047748485149498365, "loss": 0.493, "step": 81170 }, { "epoch": 4.031985695837887, "grad_norm": 0.1240234375, "learning_rate": 0.0004774451177113341, "loss": 0.5354, "step": 81180 }, { "epoch": 4.032482368133506, "grad_norm": 0.126953125, "learning_rate": 0.0004774053839276845, "loss": 0.5447, "step": 81190 }, { "epoch": 4.032979040429125, "grad_norm": 0.09326171875, "learning_rate": 0.00047736565014403496, "loss": 0.5177, "step": 81200 }, { "epoch": 4.033475712724744, "grad_norm": 0.11376953125, "learning_rate": 0.0004773259163603855, "loss": 0.5483, "step": 81210 }, { "epoch": 4.0339723850203635, "grad_norm": 0.1044921875, "learning_rate": 0.00047728618257673585, "loss": 0.5108, "step": 81220 }, { "epoch": 4.034469057315983, "grad_norm": 0.1513671875, "learning_rate": 0.0004772464487930863, "loss": 0.528, "step": 81230 }, { "epoch": 4.034965729611602, "grad_norm": 0.138671875, "learning_rate": 0.0004772067150094368, "loss": 0.5305, "step": 81240 }, { "epoch": 4.035462401907222, "grad_norm": 0.10107421875, "learning_rate": 0.0004771669812257873, "loss": 0.5205, "step": 81250 }, { "epoch": 4.035959074202841, "grad_norm": 0.1650390625, "learning_rate": 0.0004771272474421377, "loss": 0.5339, "step": 81260 }, { "epoch": 4.0364557464984605, "grad_norm": 0.1435546875, "learning_rate": 0.00047708751365848815, "loss": 0.4839, "step": 81270 }, { "epoch": 4.03695241879408, "grad_norm": 0.134765625, "learning_rate": 0.0004770477798748386, "loss": 0.525, "step": 81280 }, { "epoch": 4.037449091089699, "grad_norm": 0.107421875, "learning_rate": 0.00047700804609118904, "loss": 0.5113, "step": 81290 }, { "epoch": 4.037945763385318, "grad_norm": 0.10302734375, "learning_rate": 0.0004769683123075395, "loss": 0.5052, "step": 81300 }, { "epoch": 4.038442435680937, "grad_norm": 0.1328125, "learning_rate": 0.00047692857852389, "loss": 0.522, "step": 81310 }, { "epoch": 4.038939107976557, "grad_norm": 0.11572265625, "learning_rate": 0.0004768888447402404, "loss": 0.5277, "step": 81320 }, { "epoch": 4.039435780272177, "grad_norm": 0.123046875, "learning_rate": 0.00047684911095659087, "loss": 0.5198, "step": 81330 }, { "epoch": 4.039932452567796, "grad_norm": 0.11767578125, "learning_rate": 0.00047680937717294134, "loss": 0.4995, "step": 81340 }, { "epoch": 4.040429124863415, "grad_norm": 0.1005859375, "learning_rate": 0.00047676964338929176, "loss": 0.4999, "step": 81350 }, { "epoch": 4.040925797159034, "grad_norm": 0.125, "learning_rate": 0.00047672990960564223, "loss": 0.5285, "step": 81360 }, { "epoch": 4.041422469454654, "grad_norm": 0.11474609375, "learning_rate": 0.0004766901758219927, "loss": 0.5227, "step": 81370 }, { "epoch": 4.041919141750273, "grad_norm": 0.1416015625, "learning_rate": 0.00047665044203834306, "loss": 0.5269, "step": 81380 }, { "epoch": 4.042415814045892, "grad_norm": 0.09716796875, "learning_rate": 0.0004766107082546936, "loss": 0.4916, "step": 81390 }, { "epoch": 4.042912486341512, "grad_norm": 0.09521484375, "learning_rate": 0.00047657097447104406, "loss": 0.5073, "step": 81400 }, { "epoch": 4.0434091586371315, "grad_norm": 0.10498046875, "learning_rate": 0.00047653124068739453, "loss": 0.5292, "step": 81410 }, { "epoch": 4.043905830932751, "grad_norm": 0.1220703125, "learning_rate": 0.0004764915069037449, "loss": 0.5153, "step": 81420 }, { "epoch": 4.04440250322837, "grad_norm": 0.1591796875, "learning_rate": 0.00047645177312009536, "loss": 0.5215, "step": 81430 }, { "epoch": 4.044899175523989, "grad_norm": 0.1181640625, "learning_rate": 0.0004764120393364459, "loss": 0.5353, "step": 81440 }, { "epoch": 4.045395847819608, "grad_norm": 0.1259765625, "learning_rate": 0.00047637230555279625, "loss": 0.5035, "step": 81450 }, { "epoch": 4.045892520115228, "grad_norm": 0.10107421875, "learning_rate": 0.0004763325717691467, "loss": 0.5241, "step": 81460 }, { "epoch": 4.046389192410848, "grad_norm": 0.115234375, "learning_rate": 0.0004762928379854972, "loss": 0.4885, "step": 81470 }, { "epoch": 4.046885864706467, "grad_norm": 0.1298828125, "learning_rate": 0.0004762531042018476, "loss": 0.5404, "step": 81480 }, { "epoch": 4.047382537002086, "grad_norm": 0.1298828125, "learning_rate": 0.0004762133704181981, "loss": 0.5116, "step": 81490 }, { "epoch": 4.047879209297705, "grad_norm": 0.10546875, "learning_rate": 0.00047617363663454856, "loss": 0.5134, "step": 81500 }, { "epoch": 4.048375881593325, "grad_norm": 0.10009765625, "learning_rate": 0.00047613390285089897, "loss": 0.511, "step": 81510 }, { "epoch": 4.048872553888944, "grad_norm": 0.13671875, "learning_rate": 0.00047609416906724944, "loss": 0.5603, "step": 81520 }, { "epoch": 4.049369226184563, "grad_norm": 0.12890625, "learning_rate": 0.0004760544352835999, "loss": 0.5238, "step": 81530 }, { "epoch": 4.049865898480183, "grad_norm": 0.12255859375, "learning_rate": 0.0004760147014999504, "loss": 0.5093, "step": 81540 }, { "epoch": 4.050362570775802, "grad_norm": 0.123046875, "learning_rate": 0.0004759749677163008, "loss": 0.5349, "step": 81550 }, { "epoch": 4.050859243071422, "grad_norm": 0.13671875, "learning_rate": 0.0004759352339326513, "loss": 0.5305, "step": 81560 }, { "epoch": 4.051355915367041, "grad_norm": 0.10205078125, "learning_rate": 0.00047589550014900175, "loss": 0.5223, "step": 81570 }, { "epoch": 4.05185258766266, "grad_norm": 0.1591796875, "learning_rate": 0.00047585576636535216, "loss": 0.5055, "step": 81580 }, { "epoch": 4.052349259958279, "grad_norm": 0.099609375, "learning_rate": 0.00047581603258170263, "loss": 0.5241, "step": 81590 }, { "epoch": 4.0528459322538986, "grad_norm": 0.11376953125, "learning_rate": 0.0004757762987980531, "loss": 0.5139, "step": 81600 }, { "epoch": 4.053342604549519, "grad_norm": 0.10693359375, "learning_rate": 0.00047573656501440347, "loss": 0.5414, "step": 81610 }, { "epoch": 4.053839276845138, "grad_norm": 0.1171875, "learning_rate": 0.000475696831230754, "loss": 0.5034, "step": 81620 }, { "epoch": 4.054335949140757, "grad_norm": 0.10498046875, "learning_rate": 0.00047565709744710447, "loss": 0.513, "step": 81630 }, { "epoch": 4.054832621436376, "grad_norm": 0.10791015625, "learning_rate": 0.00047561736366345483, "loss": 0.5004, "step": 81640 }, { "epoch": 4.055329293731996, "grad_norm": 0.10498046875, "learning_rate": 0.0004755776298798053, "loss": 0.5158, "step": 81650 }, { "epoch": 4.055825966027615, "grad_norm": 0.142578125, "learning_rate": 0.0004755378960961558, "loss": 0.5057, "step": 81660 }, { "epoch": 4.056322638323234, "grad_norm": 0.11962890625, "learning_rate": 0.0004754981623125062, "loss": 0.503, "step": 81670 }, { "epoch": 4.056819310618853, "grad_norm": 0.10986328125, "learning_rate": 0.00047545842852885666, "loss": 0.5453, "step": 81680 }, { "epoch": 4.057315982914473, "grad_norm": 0.15625, "learning_rate": 0.00047541869474520713, "loss": 0.5194, "step": 81690 }, { "epoch": 4.057812655210093, "grad_norm": 0.1357421875, "learning_rate": 0.0004753789609615576, "loss": 0.5501, "step": 81700 }, { "epoch": 4.058309327505712, "grad_norm": 0.162109375, "learning_rate": 0.000475339227177908, "loss": 0.5369, "step": 81710 }, { "epoch": 4.058805999801331, "grad_norm": 0.10546875, "learning_rate": 0.0004752994933942585, "loss": 0.5098, "step": 81720 }, { "epoch": 4.05930267209695, "grad_norm": 0.1162109375, "learning_rate": 0.00047525975961060896, "loss": 0.5112, "step": 81730 }, { "epoch": 4.0597993443925695, "grad_norm": 0.1064453125, "learning_rate": 0.0004752200258269594, "loss": 0.5086, "step": 81740 }, { "epoch": 4.060296016688189, "grad_norm": 0.11474609375, "learning_rate": 0.00047518029204330985, "loss": 0.5346, "step": 81750 }, { "epoch": 4.060792688983809, "grad_norm": 0.1015625, "learning_rate": 0.0004751405582596603, "loss": 0.5093, "step": 81760 }, { "epoch": 4.061289361279428, "grad_norm": 0.1171875, "learning_rate": 0.00047510082447601074, "loss": 0.4806, "step": 81770 }, { "epoch": 4.061786033575047, "grad_norm": 0.0986328125, "learning_rate": 0.0004750610906923612, "loss": 0.5136, "step": 81780 }, { "epoch": 4.0622827058706665, "grad_norm": 0.1240234375, "learning_rate": 0.0004750213569087117, "loss": 0.4983, "step": 81790 }, { "epoch": 4.062779378166286, "grad_norm": 0.107421875, "learning_rate": 0.00047498162312506205, "loss": 0.5402, "step": 81800 }, { "epoch": 4.063276050461905, "grad_norm": 0.1474609375, "learning_rate": 0.00047494188934141257, "loss": 0.4821, "step": 81810 }, { "epoch": 4.063772722757524, "grad_norm": 0.1396484375, "learning_rate": 0.00047490215555776304, "loss": 0.521, "step": 81820 }, { "epoch": 4.064269395053144, "grad_norm": 0.10546875, "learning_rate": 0.0004748624217741134, "loss": 0.4949, "step": 81830 }, { "epoch": 4.064766067348764, "grad_norm": 0.1611328125, "learning_rate": 0.0004748226879904639, "loss": 0.5308, "step": 81840 }, { "epoch": 4.065262739644383, "grad_norm": 0.111328125, "learning_rate": 0.0004747829542068144, "loss": 0.5349, "step": 81850 }, { "epoch": 4.065759411940002, "grad_norm": 0.10595703125, "learning_rate": 0.0004747432204231649, "loss": 0.5077, "step": 81860 }, { "epoch": 4.066256084235621, "grad_norm": 0.11767578125, "learning_rate": 0.00047470348663951524, "loss": 0.5036, "step": 81870 }, { "epoch": 4.0667527565312405, "grad_norm": 0.10791015625, "learning_rate": 0.0004746637528558657, "loss": 0.5353, "step": 81880 }, { "epoch": 4.06724942882686, "grad_norm": 0.11669921875, "learning_rate": 0.00047462401907221623, "loss": 0.5128, "step": 81890 }, { "epoch": 4.06774610112248, "grad_norm": 0.13671875, "learning_rate": 0.0004745842852885666, "loss": 0.5283, "step": 81900 }, { "epoch": 4.068242773418099, "grad_norm": 0.1328125, "learning_rate": 0.00047454455150491707, "loss": 0.532, "step": 81910 }, { "epoch": 4.068739445713718, "grad_norm": 0.1376953125, "learning_rate": 0.00047450481772126754, "loss": 0.5092, "step": 81920 }, { "epoch": 4.0692361180093375, "grad_norm": 0.10986328125, "learning_rate": 0.00047446508393761796, "loss": 0.5129, "step": 81930 }, { "epoch": 4.069732790304957, "grad_norm": 0.11279296875, "learning_rate": 0.00047442535015396843, "loss": 0.5507, "step": 81940 }, { "epoch": 4.070229462600576, "grad_norm": 0.10009765625, "learning_rate": 0.0004743856163703189, "loss": 0.5067, "step": 81950 }, { "epoch": 4.070726134896195, "grad_norm": 0.11962890625, "learning_rate": 0.0004743458825866693, "loss": 0.5173, "step": 81960 }, { "epoch": 4.071222807191814, "grad_norm": 0.1142578125, "learning_rate": 0.0004743061488030198, "loss": 0.5237, "step": 81970 }, { "epoch": 4.0717194794874345, "grad_norm": 0.123046875, "learning_rate": 0.00047426641501937026, "loss": 0.529, "step": 81980 }, { "epoch": 4.072216151783054, "grad_norm": 0.10693359375, "learning_rate": 0.00047422668123572073, "loss": 0.497, "step": 81990 }, { "epoch": 4.072712824078673, "grad_norm": 0.11279296875, "learning_rate": 0.00047418694745207115, "loss": 0.4832, "step": 82000 }, { "epoch": 4.073209496374292, "grad_norm": 0.1201171875, "learning_rate": 0.0004741472136684216, "loss": 0.5452, "step": 82010 }, { "epoch": 4.073706168669911, "grad_norm": 0.1650390625, "learning_rate": 0.0004741074798847721, "loss": 0.501, "step": 82020 }, { "epoch": 4.074202840965531, "grad_norm": 0.13671875, "learning_rate": 0.00047406774610112245, "loss": 0.5312, "step": 82030 }, { "epoch": 4.07469951326115, "grad_norm": 0.11083984375, "learning_rate": 0.000474028012317473, "loss": 0.521, "step": 82040 }, { "epoch": 4.07519618555677, "grad_norm": 0.12060546875, "learning_rate": 0.00047398827853382345, "loss": 0.5259, "step": 82050 }, { "epoch": 4.075692857852389, "grad_norm": 0.1103515625, "learning_rate": 0.0004739485447501738, "loss": 0.5127, "step": 82060 }, { "epoch": 4.0761895301480084, "grad_norm": 0.111328125, "learning_rate": 0.0004739088109665243, "loss": 0.5266, "step": 82070 }, { "epoch": 4.076686202443628, "grad_norm": 0.12109375, "learning_rate": 0.0004738690771828748, "loss": 0.5267, "step": 82080 }, { "epoch": 4.077182874739247, "grad_norm": 0.10546875, "learning_rate": 0.00047382934339922517, "loss": 0.5366, "step": 82090 }, { "epoch": 4.077679547034866, "grad_norm": 0.1171875, "learning_rate": 0.00047378960961557564, "loss": 0.505, "step": 82100 }, { "epoch": 4.078176219330485, "grad_norm": 0.1572265625, "learning_rate": 0.0004737498758319261, "loss": 0.5062, "step": 82110 }, { "epoch": 4.0786728916261055, "grad_norm": 0.1103515625, "learning_rate": 0.00047371014204827653, "loss": 0.5252, "step": 82120 }, { "epoch": 4.079169563921725, "grad_norm": 0.1015625, "learning_rate": 0.000473670408264627, "loss": 0.5392, "step": 82130 }, { "epoch": 4.079666236217344, "grad_norm": 0.1357421875, "learning_rate": 0.0004736306744809775, "loss": 0.5166, "step": 82140 }, { "epoch": 4.080162908512963, "grad_norm": 0.177734375, "learning_rate": 0.00047359094069732795, "loss": 0.5286, "step": 82150 }, { "epoch": 4.080659580808582, "grad_norm": 0.130859375, "learning_rate": 0.00047355120691367836, "loss": 0.4974, "step": 82160 }, { "epoch": 4.081156253104202, "grad_norm": 0.1162109375, "learning_rate": 0.00047351147313002883, "loss": 0.5004, "step": 82170 }, { "epoch": 4.081652925399821, "grad_norm": 0.1328125, "learning_rate": 0.0004734717393463793, "loss": 0.5137, "step": 82180 }, { "epoch": 4.082149597695441, "grad_norm": 0.09912109375, "learning_rate": 0.0004734320055627297, "loss": 0.4974, "step": 82190 }, { "epoch": 4.08264626999106, "grad_norm": 0.140625, "learning_rate": 0.0004733922717790802, "loss": 0.5247, "step": 82200 }, { "epoch": 4.083142942286679, "grad_norm": 0.10107421875, "learning_rate": 0.00047335253799543067, "loss": 0.5155, "step": 82210 }, { "epoch": 4.083639614582299, "grad_norm": 0.1171875, "learning_rate": 0.0004733128042117811, "loss": 0.4963, "step": 82220 }, { "epoch": 4.084136286877918, "grad_norm": 0.142578125, "learning_rate": 0.00047327307042813155, "loss": 0.5233, "step": 82230 }, { "epoch": 4.084632959173537, "grad_norm": 0.109375, "learning_rate": 0.000473233336644482, "loss": 0.5229, "step": 82240 }, { "epoch": 4.085129631469156, "grad_norm": 0.1064453125, "learning_rate": 0.0004731936028608324, "loss": 0.491, "step": 82250 }, { "epoch": 4.085626303764776, "grad_norm": 0.10693359375, "learning_rate": 0.0004731538690771829, "loss": 0.5364, "step": 82260 }, { "epoch": 4.086122976060396, "grad_norm": 0.119140625, "learning_rate": 0.0004731141352935334, "loss": 0.4996, "step": 82270 }, { "epoch": 4.086619648356015, "grad_norm": 0.11669921875, "learning_rate": 0.00047307440150988375, "loss": 0.5088, "step": 82280 }, { "epoch": 4.087116320651634, "grad_norm": 0.11328125, "learning_rate": 0.0004730346677262342, "loss": 0.5234, "step": 82290 }, { "epoch": 4.087612992947253, "grad_norm": 0.1025390625, "learning_rate": 0.0004729949339425847, "loss": 0.5018, "step": 82300 }, { "epoch": 4.088109665242873, "grad_norm": 0.166015625, "learning_rate": 0.0004729552001589352, "loss": 0.5146, "step": 82310 }, { "epoch": 4.088606337538492, "grad_norm": 0.11572265625, "learning_rate": 0.0004729154663752856, "loss": 0.5191, "step": 82320 }, { "epoch": 4.089103009834112, "grad_norm": 0.11767578125, "learning_rate": 0.00047287573259163605, "loss": 0.5376, "step": 82330 }, { "epoch": 4.089599682129731, "grad_norm": 0.1259765625, "learning_rate": 0.0004728359988079865, "loss": 0.5175, "step": 82340 }, { "epoch": 4.09009635442535, "grad_norm": 0.10400390625, "learning_rate": 0.00047279626502433694, "loss": 0.5092, "step": 82350 }, { "epoch": 4.09059302672097, "grad_norm": 0.12353515625, "learning_rate": 0.0004727565312406874, "loss": 0.5021, "step": 82360 }, { "epoch": 4.091089699016589, "grad_norm": 0.111328125, "learning_rate": 0.0004727167974570379, "loss": 0.5344, "step": 82370 }, { "epoch": 4.091586371312208, "grad_norm": 0.10791015625, "learning_rate": 0.0004726770636733883, "loss": 0.4935, "step": 82380 }, { "epoch": 4.092083043607827, "grad_norm": 0.1376953125, "learning_rate": 0.00047263732988973877, "loss": 0.491, "step": 82390 }, { "epoch": 4.0925797159034465, "grad_norm": 0.162109375, "learning_rate": 0.00047259759610608924, "loss": 0.5281, "step": 82400 }, { "epoch": 4.093076388199067, "grad_norm": 0.11865234375, "learning_rate": 0.00047255786232243966, "loss": 0.4877, "step": 82410 }, { "epoch": 4.093573060494686, "grad_norm": 0.10498046875, "learning_rate": 0.00047251812853879013, "loss": 0.5048, "step": 82420 }, { "epoch": 4.094069732790305, "grad_norm": 0.1376953125, "learning_rate": 0.0004724783947551406, "loss": 0.5371, "step": 82430 }, { "epoch": 4.094566405085924, "grad_norm": 0.1533203125, "learning_rate": 0.0004724386609714911, "loss": 0.5233, "step": 82440 }, { "epoch": 4.0950630773815435, "grad_norm": 0.134765625, "learning_rate": 0.0004723989271878415, "loss": 0.5133, "step": 82450 }, { "epoch": 4.095559749677163, "grad_norm": 0.1337890625, "learning_rate": 0.00047235919340419196, "loss": 0.5158, "step": 82460 }, { "epoch": 4.096056421972782, "grad_norm": 0.1279296875, "learning_rate": 0.00047231945962054243, "loss": 0.517, "step": 82470 }, { "epoch": 4.096553094268402, "grad_norm": 0.10498046875, "learning_rate": 0.0004722797258368928, "loss": 0.4954, "step": 82480 }, { "epoch": 4.097049766564021, "grad_norm": 0.11572265625, "learning_rate": 0.0004722399920532433, "loss": 0.5403, "step": 82490 }, { "epoch": 4.0975464388596405, "grad_norm": 0.474609375, "learning_rate": 0.0004722002582695938, "loss": 0.5425, "step": 82500 }, { "epoch": 4.09804311115526, "grad_norm": 0.09716796875, "learning_rate": 0.00047216052448594416, "loss": 0.524, "step": 82510 }, { "epoch": 4.098539783450879, "grad_norm": 0.12158203125, "learning_rate": 0.00047212079070229463, "loss": 0.4932, "step": 82520 }, { "epoch": 4.099036455746498, "grad_norm": 0.10693359375, "learning_rate": 0.0004720810569186451, "loss": 0.5, "step": 82530 }, { "epoch": 4.0995331280421174, "grad_norm": 0.10888671875, "learning_rate": 0.0004720413231349955, "loss": 0.5181, "step": 82540 }, { "epoch": 4.100029800337738, "grad_norm": 0.1083984375, "learning_rate": 0.000472001589351346, "loss": 0.502, "step": 82550 }, { "epoch": 4.100526472633357, "grad_norm": 0.12158203125, "learning_rate": 0.00047196185556769646, "loss": 0.5251, "step": 82560 }, { "epoch": 4.101023144928976, "grad_norm": 0.1064453125, "learning_rate": 0.0004719221217840469, "loss": 0.5354, "step": 82570 }, { "epoch": 4.101519817224595, "grad_norm": 0.1083984375, "learning_rate": 0.00047188238800039735, "loss": 0.5215, "step": 82580 }, { "epoch": 4.1020164895202145, "grad_norm": 0.1767578125, "learning_rate": 0.0004718426542167478, "loss": 0.5062, "step": 82590 }, { "epoch": 4.102513161815834, "grad_norm": 0.10595703125, "learning_rate": 0.0004718029204330983, "loss": 0.5057, "step": 82600 }, { "epoch": 4.103009834111453, "grad_norm": 0.1171875, "learning_rate": 0.0004717631866494487, "loss": 0.5163, "step": 82610 }, { "epoch": 4.103506506407073, "grad_norm": 0.12109375, "learning_rate": 0.0004717234528657992, "loss": 0.5181, "step": 82620 }, { "epoch": 4.104003178702692, "grad_norm": 0.166015625, "learning_rate": 0.00047168371908214965, "loss": 0.4976, "step": 82630 }, { "epoch": 4.1044998509983115, "grad_norm": 0.11865234375, "learning_rate": 0.00047164398529850007, "loss": 0.5093, "step": 82640 }, { "epoch": 4.104996523293931, "grad_norm": 0.11767578125, "learning_rate": 0.00047160425151485054, "loss": 0.5185, "step": 82650 }, { "epoch": 4.10549319558955, "grad_norm": 0.12158203125, "learning_rate": 0.000471564517731201, "loss": 0.4915, "step": 82660 }, { "epoch": 4.105989867885169, "grad_norm": 0.1064453125, "learning_rate": 0.00047152478394755137, "loss": 0.5473, "step": 82670 }, { "epoch": 4.106486540180788, "grad_norm": 0.1162109375, "learning_rate": 0.0004714850501639019, "loss": 0.5313, "step": 82680 }, { "epoch": 4.106983212476408, "grad_norm": 0.1357421875, "learning_rate": 0.00047144531638025237, "loss": 0.5496, "step": 82690 }, { "epoch": 4.107479884772028, "grad_norm": 0.11328125, "learning_rate": 0.00047140558259660273, "loss": 0.5321, "step": 82700 }, { "epoch": 4.107976557067647, "grad_norm": 0.099609375, "learning_rate": 0.0004713658488129532, "loss": 0.5213, "step": 82710 }, { "epoch": 4.108473229363266, "grad_norm": 0.130859375, "learning_rate": 0.00047132611502930373, "loss": 0.5279, "step": 82720 }, { "epoch": 4.108969901658885, "grad_norm": 0.11083984375, "learning_rate": 0.0004712863812456541, "loss": 0.4876, "step": 82730 }, { "epoch": 4.109466573954505, "grad_norm": 0.1015625, "learning_rate": 0.00047124664746200456, "loss": 0.5223, "step": 82740 }, { "epoch": 4.109963246250124, "grad_norm": 0.11669921875, "learning_rate": 0.00047120691367835504, "loss": 0.5293, "step": 82750 }, { "epoch": 4.110459918545743, "grad_norm": 0.1611328125, "learning_rate": 0.00047116717989470556, "loss": 0.5532, "step": 82760 }, { "epoch": 4.110956590841363, "grad_norm": 0.11083984375, "learning_rate": 0.0004711274461110559, "loss": 0.5227, "step": 82770 }, { "epoch": 4.1114532631369825, "grad_norm": 0.107421875, "learning_rate": 0.0004710877123274064, "loss": 0.5163, "step": 82780 }, { "epoch": 4.111949935432602, "grad_norm": 0.119140625, "learning_rate": 0.00047104797854375687, "loss": 0.4817, "step": 82790 }, { "epoch": 4.112446607728221, "grad_norm": 0.10986328125, "learning_rate": 0.0004710082447601073, "loss": 0.5271, "step": 82800 }, { "epoch": 4.11294328002384, "grad_norm": 0.1201171875, "learning_rate": 0.00047096851097645775, "loss": 0.5263, "step": 82810 }, { "epoch": 4.113439952319459, "grad_norm": 0.09765625, "learning_rate": 0.0004709287771928082, "loss": 0.489, "step": 82820 }, { "epoch": 4.113936624615079, "grad_norm": 0.10693359375, "learning_rate": 0.00047088904340915864, "loss": 0.5237, "step": 82830 }, { "epoch": 4.114433296910699, "grad_norm": 0.1328125, "learning_rate": 0.0004708493096255091, "loss": 0.5245, "step": 82840 }, { "epoch": 4.114929969206318, "grad_norm": 0.1298828125, "learning_rate": 0.0004708095758418596, "loss": 0.5509, "step": 82850 }, { "epoch": 4.115426641501937, "grad_norm": 0.10400390625, "learning_rate": 0.00047076984205820995, "loss": 0.5182, "step": 82860 }, { "epoch": 4.115923313797556, "grad_norm": 0.1005859375, "learning_rate": 0.0004707301082745605, "loss": 0.5243, "step": 82870 }, { "epoch": 4.116419986093176, "grad_norm": 0.1103515625, "learning_rate": 0.00047069037449091095, "loss": 0.497, "step": 82880 }, { "epoch": 4.116916658388795, "grad_norm": 0.10546875, "learning_rate": 0.0004706506407072614, "loss": 0.532, "step": 82890 }, { "epoch": 4.117413330684414, "grad_norm": 0.142578125, "learning_rate": 0.0004706109069236118, "loss": 0.5535, "step": 82900 }, { "epoch": 4.117910002980034, "grad_norm": 0.0947265625, "learning_rate": 0.0004705711731399623, "loss": 0.522, "step": 82910 }, { "epoch": 4.118406675275653, "grad_norm": 0.11962890625, "learning_rate": 0.0004705314393563128, "loss": 0.4981, "step": 82920 }, { "epoch": 4.118903347571273, "grad_norm": 0.1044921875, "learning_rate": 0.00047049170557266314, "loss": 0.5042, "step": 82930 }, { "epoch": 4.119400019866892, "grad_norm": 0.111328125, "learning_rate": 0.0004704519717890136, "loss": 0.5407, "step": 82940 }, { "epoch": 4.119896692162511, "grad_norm": 0.1279296875, "learning_rate": 0.00047041223800536414, "loss": 0.495, "step": 82950 }, { "epoch": 4.12039336445813, "grad_norm": 0.1181640625, "learning_rate": 0.0004703725042217145, "loss": 0.4886, "step": 82960 }, { "epoch": 4.1208900367537495, "grad_norm": 0.134765625, "learning_rate": 0.00047033277043806497, "loss": 0.5248, "step": 82970 }, { "epoch": 4.12138670904937, "grad_norm": 0.1416015625, "learning_rate": 0.00047029303665441544, "loss": 0.5313, "step": 82980 }, { "epoch": 4.121883381344989, "grad_norm": 0.10107421875, "learning_rate": 0.00047025330287076586, "loss": 0.4949, "step": 82990 }, { "epoch": 4.122380053640608, "grad_norm": 0.0966796875, "learning_rate": 0.00047021356908711633, "loss": 0.4992, "step": 83000 }, { "epoch": 4.122876725936227, "grad_norm": 0.12890625, "learning_rate": 0.0004701738353034668, "loss": 0.541, "step": 83010 }, { "epoch": 4.123373398231847, "grad_norm": 0.146484375, "learning_rate": 0.0004701341015198172, "loss": 0.5312, "step": 83020 }, { "epoch": 4.123870070527466, "grad_norm": 0.11083984375, "learning_rate": 0.0004700943677361677, "loss": 0.513, "step": 83030 }, { "epoch": 4.124366742823085, "grad_norm": 0.1640625, "learning_rate": 0.00047005463395251816, "loss": 0.5379, "step": 83040 }, { "epoch": 4.124863415118704, "grad_norm": 0.1796875, "learning_rate": 0.00047001490016886863, "loss": 0.508, "step": 83050 }, { "epoch": 4.125360087414324, "grad_norm": 0.1337890625, "learning_rate": 0.00046997516638521905, "loss": 0.5071, "step": 83060 }, { "epoch": 4.125856759709944, "grad_norm": 0.10498046875, "learning_rate": 0.0004699354326015695, "loss": 0.4966, "step": 83070 }, { "epoch": 4.126353432005563, "grad_norm": 0.1103515625, "learning_rate": 0.00046989569881792, "loss": 0.5498, "step": 83080 }, { "epoch": 4.126850104301182, "grad_norm": 0.1142578125, "learning_rate": 0.0004698559650342704, "loss": 0.5128, "step": 83090 }, { "epoch": 4.127346776596801, "grad_norm": 0.126953125, "learning_rate": 0.0004698162312506209, "loss": 0.534, "step": 83100 }, { "epoch": 4.1278434488924205, "grad_norm": 0.11962890625, "learning_rate": 0.00046977649746697135, "loss": 0.5198, "step": 83110 }, { "epoch": 4.12834012118804, "grad_norm": 0.109375, "learning_rate": 0.0004697367636833217, "loss": 0.5052, "step": 83120 }, { "epoch": 4.12883679348366, "grad_norm": 0.111328125, "learning_rate": 0.0004696970298996722, "loss": 0.528, "step": 83130 }, { "epoch": 4.129333465779279, "grad_norm": 0.10107421875, "learning_rate": 0.0004696572961160227, "loss": 0.5558, "step": 83140 }, { "epoch": 4.129830138074898, "grad_norm": 0.12890625, "learning_rate": 0.0004696175623323731, "loss": 0.5222, "step": 83150 }, { "epoch": 4.1303268103705175, "grad_norm": 0.1044921875, "learning_rate": 0.00046957782854872355, "loss": 0.5299, "step": 83160 }, { "epoch": 4.130823482666137, "grad_norm": 0.1533203125, "learning_rate": 0.000469538094765074, "loss": 0.5209, "step": 83170 }, { "epoch": 4.131320154961756, "grad_norm": 0.11865234375, "learning_rate": 0.00046949836098142444, "loss": 0.528, "step": 83180 }, { "epoch": 4.131816827257375, "grad_norm": 0.1005859375, "learning_rate": 0.0004694586271977749, "loss": 0.5078, "step": 83190 }, { "epoch": 4.132313499552995, "grad_norm": 0.142578125, "learning_rate": 0.0004694188934141254, "loss": 0.5392, "step": 83200 }, { "epoch": 4.1328101718486145, "grad_norm": 0.158203125, "learning_rate": 0.00046937915963047585, "loss": 0.4914, "step": 83210 }, { "epoch": 4.133306844144234, "grad_norm": 0.10693359375, "learning_rate": 0.00046933942584682627, "loss": 0.5129, "step": 83220 }, { "epoch": 4.133803516439853, "grad_norm": 0.1044921875, "learning_rate": 0.00046929969206317674, "loss": 0.531, "step": 83230 }, { "epoch": 4.134300188735472, "grad_norm": 0.11376953125, "learning_rate": 0.0004692599582795272, "loss": 0.5036, "step": 83240 }, { "epoch": 4.1347968610310915, "grad_norm": 0.099609375, "learning_rate": 0.00046922022449587763, "loss": 0.5092, "step": 83250 }, { "epoch": 4.135293533326711, "grad_norm": 0.1435546875, "learning_rate": 0.0004691804907122281, "loss": 0.5096, "step": 83260 }, { "epoch": 4.135790205622331, "grad_norm": 0.126953125, "learning_rate": 0.00046914075692857857, "loss": 0.5065, "step": 83270 }, { "epoch": 4.13628687791795, "grad_norm": 0.11474609375, "learning_rate": 0.000469101023144929, "loss": 0.5523, "step": 83280 }, { "epoch": 4.136783550213569, "grad_norm": 0.10986328125, "learning_rate": 0.00046906128936127946, "loss": 0.5279, "step": 83290 }, { "epoch": 4.1372802225091885, "grad_norm": 0.10009765625, "learning_rate": 0.00046902155557762993, "loss": 0.5352, "step": 83300 }, { "epoch": 4.137776894804808, "grad_norm": 0.1298828125, "learning_rate": 0.0004689818217939803, "loss": 0.5355, "step": 83310 }, { "epoch": 4.138273567100427, "grad_norm": 0.1337890625, "learning_rate": 0.0004689420880103308, "loss": 0.563, "step": 83320 }, { "epoch": 4.138770239396046, "grad_norm": 0.1044921875, "learning_rate": 0.0004689023542266813, "loss": 0.5084, "step": 83330 }, { "epoch": 4.139266911691665, "grad_norm": 0.107421875, "learning_rate": 0.00046886262044303176, "loss": 0.5208, "step": 83340 }, { "epoch": 4.1397635839872855, "grad_norm": 0.115234375, "learning_rate": 0.0004688228866593821, "loss": 0.5138, "step": 83350 }, { "epoch": 4.140260256282905, "grad_norm": 0.1259765625, "learning_rate": 0.00046878315287573265, "loss": 0.5155, "step": 83360 }, { "epoch": 4.140756928578524, "grad_norm": 0.11474609375, "learning_rate": 0.0004687434190920831, "loss": 0.5194, "step": 83370 }, { "epoch": 4.141253600874143, "grad_norm": 0.1142578125, "learning_rate": 0.0004687036853084335, "loss": 0.5232, "step": 83380 }, { "epoch": 4.141750273169762, "grad_norm": 0.11279296875, "learning_rate": 0.00046866395152478396, "loss": 0.5192, "step": 83390 }, { "epoch": 4.142246945465382, "grad_norm": 0.109375, "learning_rate": 0.0004686242177411344, "loss": 0.532, "step": 83400 }, { "epoch": 4.142743617761001, "grad_norm": 0.13671875, "learning_rate": 0.00046858448395748484, "loss": 0.5369, "step": 83410 }, { "epoch": 4.143240290056621, "grad_norm": 0.103515625, "learning_rate": 0.0004685447501738353, "loss": 0.5194, "step": 83420 }, { "epoch": 4.14373696235224, "grad_norm": 0.11669921875, "learning_rate": 0.0004685050163901858, "loss": 0.5188, "step": 83430 }, { "epoch": 4.144233634647859, "grad_norm": 0.1123046875, "learning_rate": 0.0004684652826065362, "loss": 0.4746, "step": 83440 }, { "epoch": 4.144730306943479, "grad_norm": 0.11083984375, "learning_rate": 0.0004684255488228867, "loss": 0.5332, "step": 83450 }, { "epoch": 4.145226979239098, "grad_norm": 0.1162109375, "learning_rate": 0.00046838581503923715, "loss": 0.4914, "step": 83460 }, { "epoch": 4.145723651534717, "grad_norm": 0.10791015625, "learning_rate": 0.00046834608125558756, "loss": 0.5398, "step": 83470 }, { "epoch": 4.146220323830336, "grad_norm": 0.11279296875, "learning_rate": 0.00046830634747193803, "loss": 0.5198, "step": 83480 }, { "epoch": 4.1467169961259565, "grad_norm": 0.10791015625, "learning_rate": 0.0004682666136882885, "loss": 0.5076, "step": 83490 }, { "epoch": 4.147213668421576, "grad_norm": 0.1103515625, "learning_rate": 0.000468226879904639, "loss": 0.5263, "step": 83500 }, { "epoch": 4.147710340717195, "grad_norm": 0.1298828125, "learning_rate": 0.0004681871461209894, "loss": 0.5078, "step": 83510 }, { "epoch": 4.148207013012814, "grad_norm": 0.1162109375, "learning_rate": 0.00046814741233733987, "loss": 0.5411, "step": 83520 }, { "epoch": 4.148703685308433, "grad_norm": 0.11572265625, "learning_rate": 0.00046810767855369034, "loss": 0.5165, "step": 83530 }, { "epoch": 4.149200357604053, "grad_norm": 0.142578125, "learning_rate": 0.0004680679447700407, "loss": 0.5239, "step": 83540 }, { "epoch": 4.149697029899672, "grad_norm": 0.0986328125, "learning_rate": 0.0004680282109863912, "loss": 0.5162, "step": 83550 }, { "epoch": 4.150193702195292, "grad_norm": 0.1064453125, "learning_rate": 0.0004679884772027417, "loss": 0.5268, "step": 83560 }, { "epoch": 4.150690374490911, "grad_norm": 0.1103515625, "learning_rate": 0.00046794874341909206, "loss": 0.4695, "step": 83570 }, { "epoch": 4.15118704678653, "grad_norm": 0.1181640625, "learning_rate": 0.00046790900963544253, "loss": 0.5221, "step": 83580 }, { "epoch": 4.15168371908215, "grad_norm": 0.11767578125, "learning_rate": 0.00046786927585179306, "loss": 0.5106, "step": 83590 }, { "epoch": 4.152180391377769, "grad_norm": 0.12255859375, "learning_rate": 0.0004678295420681434, "loss": 0.5034, "step": 83600 }, { "epoch": 4.152677063673388, "grad_norm": 0.111328125, "learning_rate": 0.0004677898082844939, "loss": 0.5582, "step": 83610 }, { "epoch": 4.153173735969007, "grad_norm": 0.11181640625, "learning_rate": 0.00046775007450084436, "loss": 0.5301, "step": 83620 }, { "epoch": 4.153670408264627, "grad_norm": 0.1318359375, "learning_rate": 0.0004677103407171949, "loss": 0.5215, "step": 83630 }, { "epoch": 4.154167080560247, "grad_norm": 0.130859375, "learning_rate": 0.00046767060693354525, "loss": 0.512, "step": 83640 }, { "epoch": 4.154663752855866, "grad_norm": 0.095703125, "learning_rate": 0.0004676308731498957, "loss": 0.5255, "step": 83650 }, { "epoch": 4.155160425151485, "grad_norm": 0.1279296875, "learning_rate": 0.0004675911393662462, "loss": 0.5147, "step": 83660 }, { "epoch": 4.155657097447104, "grad_norm": 0.1494140625, "learning_rate": 0.0004675514055825966, "loss": 0.5296, "step": 83670 }, { "epoch": 4.1561537697427235, "grad_norm": 0.09912109375, "learning_rate": 0.0004675116717989471, "loss": 0.5289, "step": 83680 }, { "epoch": 4.156650442038343, "grad_norm": 0.1064453125, "learning_rate": 0.00046747193801529755, "loss": 0.5453, "step": 83690 }, { "epoch": 4.157147114333963, "grad_norm": 0.15625, "learning_rate": 0.00046743220423164797, "loss": 0.5377, "step": 83700 }, { "epoch": 4.157643786629582, "grad_norm": 0.0986328125, "learning_rate": 0.00046739247044799844, "loss": 0.4831, "step": 83710 }, { "epoch": 4.158140458925201, "grad_norm": 0.10498046875, "learning_rate": 0.0004673527366643489, "loss": 0.5165, "step": 83720 }, { "epoch": 4.158637131220821, "grad_norm": 0.150390625, "learning_rate": 0.0004673130028806993, "loss": 0.532, "step": 83730 }, { "epoch": 4.15913380351644, "grad_norm": 0.1279296875, "learning_rate": 0.0004672732690970498, "loss": 0.5285, "step": 83740 }, { "epoch": 4.159630475812059, "grad_norm": 0.1162109375, "learning_rate": 0.0004672335353134003, "loss": 0.5117, "step": 83750 }, { "epoch": 4.160127148107678, "grad_norm": 0.1083984375, "learning_rate": 0.00046719380152975064, "loss": 0.5011, "step": 83760 }, { "epoch": 4.1606238204032975, "grad_norm": 0.146484375, "learning_rate": 0.0004671540677461011, "loss": 0.541, "step": 83770 }, { "epoch": 4.161120492698918, "grad_norm": 0.12158203125, "learning_rate": 0.00046711433396245163, "loss": 0.5228, "step": 83780 }, { "epoch": 4.161617164994537, "grad_norm": 0.1201171875, "learning_rate": 0.0004670746001788021, "loss": 0.4957, "step": 83790 }, { "epoch": 4.162113837290156, "grad_norm": 0.11865234375, "learning_rate": 0.00046703486639515247, "loss": 0.4934, "step": 83800 }, { "epoch": 4.162610509585775, "grad_norm": 0.1572265625, "learning_rate": 0.00046699513261150294, "loss": 0.5274, "step": 83810 }, { "epoch": 4.1631071818813945, "grad_norm": 0.1279296875, "learning_rate": 0.00046695539882785346, "loss": 0.5108, "step": 83820 }, { "epoch": 4.163603854177014, "grad_norm": 0.150390625, "learning_rate": 0.00046691566504420383, "loss": 0.5514, "step": 83830 }, { "epoch": 4.164100526472633, "grad_norm": 0.1044921875, "learning_rate": 0.0004668759312605543, "loss": 0.5133, "step": 83840 }, { "epoch": 4.164597198768253, "grad_norm": 0.11962890625, "learning_rate": 0.00046683619747690477, "loss": 0.5386, "step": 83850 }, { "epoch": 4.165093871063872, "grad_norm": 0.1083984375, "learning_rate": 0.0004667964636932552, "loss": 0.5187, "step": 83860 }, { "epoch": 4.1655905433594915, "grad_norm": 0.1484375, "learning_rate": 0.00046675672990960566, "loss": 0.5123, "step": 83870 }, { "epoch": 4.166087215655111, "grad_norm": 0.11962890625, "learning_rate": 0.00046671699612595613, "loss": 0.5165, "step": 83880 }, { "epoch": 4.16658388795073, "grad_norm": 0.166015625, "learning_rate": 0.00046667726234230655, "loss": 0.5761, "step": 83890 }, { "epoch": 4.167080560246349, "grad_norm": 0.1044921875, "learning_rate": 0.000466637528558657, "loss": 0.5479, "step": 83900 }, { "epoch": 4.167577232541968, "grad_norm": 0.10205078125, "learning_rate": 0.0004665977947750075, "loss": 0.5462, "step": 83910 }, { "epoch": 4.1680739048375886, "grad_norm": 0.1123046875, "learning_rate": 0.0004665580609913579, "loss": 0.5091, "step": 83920 }, { "epoch": 4.168570577133208, "grad_norm": 0.099609375, "learning_rate": 0.0004665183272077084, "loss": 0.5157, "step": 83930 }, { "epoch": 4.169067249428827, "grad_norm": 0.115234375, "learning_rate": 0.00046647859342405885, "loss": 0.5151, "step": 83940 }, { "epoch": 4.169563921724446, "grad_norm": 0.126953125, "learning_rate": 0.0004664388596404093, "loss": 0.5114, "step": 83950 }, { "epoch": 4.1700605940200655, "grad_norm": 0.11083984375, "learning_rate": 0.00046639912585675974, "loss": 0.5499, "step": 83960 }, { "epoch": 4.170557266315685, "grad_norm": 0.130859375, "learning_rate": 0.0004663593920731102, "loss": 0.5134, "step": 83970 }, { "epoch": 4.171053938611304, "grad_norm": 0.1357421875, "learning_rate": 0.0004663196582894607, "loss": 0.53, "step": 83980 }, { "epoch": 4.171550610906924, "grad_norm": 0.10986328125, "learning_rate": 0.00046627992450581104, "loss": 0.5353, "step": 83990 }, { "epoch": 4.172047283202543, "grad_norm": 0.11083984375, "learning_rate": 0.0004662401907221615, "loss": 0.5044, "step": 84000 }, { "epoch": 4.1725439554981625, "grad_norm": 0.115234375, "learning_rate": 0.00046620045693851204, "loss": 0.5158, "step": 84010 }, { "epoch": 4.173040627793782, "grad_norm": 0.1298828125, "learning_rate": 0.0004661607231548624, "loss": 0.5087, "step": 84020 }, { "epoch": 4.173537300089401, "grad_norm": 0.1669921875, "learning_rate": 0.0004661209893712129, "loss": 0.5213, "step": 84030 }, { "epoch": 4.17403397238502, "grad_norm": 0.10888671875, "learning_rate": 0.00046608125558756335, "loss": 0.5164, "step": 84040 }, { "epoch": 4.174530644680639, "grad_norm": 0.10302734375, "learning_rate": 0.00046604152180391376, "loss": 0.5075, "step": 84050 }, { "epoch": 4.175027316976259, "grad_norm": 0.11083984375, "learning_rate": 0.00046600178802026423, "loss": 0.4927, "step": 84060 }, { "epoch": 4.175523989271879, "grad_norm": 0.1279296875, "learning_rate": 0.0004659620542366147, "loss": 0.5201, "step": 84070 }, { "epoch": 4.176020661567498, "grad_norm": 0.10498046875, "learning_rate": 0.0004659223204529652, "loss": 0.5072, "step": 84080 }, { "epoch": 4.176517333863117, "grad_norm": 0.10693359375, "learning_rate": 0.0004658825866693156, "loss": 0.5163, "step": 84090 }, { "epoch": 4.177014006158736, "grad_norm": 0.1162109375, "learning_rate": 0.00046584285288566607, "loss": 0.5076, "step": 84100 }, { "epoch": 4.177510678454356, "grad_norm": 0.0986328125, "learning_rate": 0.00046580311910201654, "loss": 0.4845, "step": 84110 }, { "epoch": 4.178007350749975, "grad_norm": 0.107421875, "learning_rate": 0.00046576338531836695, "loss": 0.5261, "step": 84120 }, { "epoch": 4.178504023045594, "grad_norm": 0.1337890625, "learning_rate": 0.0004657236515347174, "loss": 0.5091, "step": 84130 }, { "epoch": 4.179000695341214, "grad_norm": 0.09814453125, "learning_rate": 0.0004656839177510679, "loss": 0.5522, "step": 84140 }, { "epoch": 4.179497367636833, "grad_norm": 0.0986328125, "learning_rate": 0.0004656441839674183, "loss": 0.5152, "step": 84150 }, { "epoch": 4.179994039932453, "grad_norm": 0.10986328125, "learning_rate": 0.0004656044501837688, "loss": 0.5298, "step": 84160 }, { "epoch": 4.180490712228072, "grad_norm": 0.1103515625, "learning_rate": 0.00046556471640011926, "loss": 0.514, "step": 84170 }, { "epoch": 4.180987384523691, "grad_norm": 0.09912109375, "learning_rate": 0.0004655249826164696, "loss": 0.5026, "step": 84180 }, { "epoch": 4.18148405681931, "grad_norm": 0.12353515625, "learning_rate": 0.00046548524883282015, "loss": 0.5257, "step": 84190 }, { "epoch": 4.18198072911493, "grad_norm": 0.1357421875, "learning_rate": 0.0004654455150491706, "loss": 0.5155, "step": 84200 }, { "epoch": 4.18247740141055, "grad_norm": 0.11572265625, "learning_rate": 0.000465405781265521, "loss": 0.5175, "step": 84210 }, { "epoch": 4.182974073706169, "grad_norm": 0.109375, "learning_rate": 0.00046536604748187145, "loss": 0.5337, "step": 84220 }, { "epoch": 4.183470746001788, "grad_norm": 0.10400390625, "learning_rate": 0.0004653263136982219, "loss": 0.5357, "step": 84230 }, { "epoch": 4.183967418297407, "grad_norm": 0.1044921875, "learning_rate": 0.00046528657991457245, "loss": 0.5226, "step": 84240 }, { "epoch": 4.184464090593027, "grad_norm": 0.16015625, "learning_rate": 0.0004652468461309228, "loss": 0.5126, "step": 84250 }, { "epoch": 4.184960762888646, "grad_norm": 0.10205078125, "learning_rate": 0.0004652071123472733, "loss": 0.5091, "step": 84260 }, { "epoch": 4.185457435184265, "grad_norm": 0.1259765625, "learning_rate": 0.00046516737856362375, "loss": 0.5189, "step": 84270 }, { "epoch": 4.185954107479885, "grad_norm": 0.10986328125, "learning_rate": 0.00046512764477997417, "loss": 0.5327, "step": 84280 }, { "epoch": 4.186450779775504, "grad_norm": 0.10791015625, "learning_rate": 0.00046508791099632464, "loss": 0.5098, "step": 84290 }, { "epoch": 4.186947452071124, "grad_norm": 0.1455078125, "learning_rate": 0.0004650481772126751, "loss": 0.5159, "step": 84300 }, { "epoch": 4.187444124366743, "grad_norm": 0.1044921875, "learning_rate": 0.00046500844342902553, "loss": 0.5371, "step": 84310 }, { "epoch": 4.187940796662362, "grad_norm": 0.115234375, "learning_rate": 0.000464968709645376, "loss": 0.5184, "step": 84320 }, { "epoch": 4.188437468957981, "grad_norm": 0.1220703125, "learning_rate": 0.0004649289758617265, "loss": 0.4931, "step": 84330 }, { "epoch": 4.1889341412536005, "grad_norm": 0.10205078125, "learning_rate": 0.0004648892420780769, "loss": 0.5019, "step": 84340 }, { "epoch": 4.189430813549221, "grad_norm": 0.126953125, "learning_rate": 0.00046484950829442736, "loss": 0.5593, "step": 84350 }, { "epoch": 4.18992748584484, "grad_norm": 0.10791015625, "learning_rate": 0.00046480977451077783, "loss": 0.5058, "step": 84360 }, { "epoch": 4.190424158140459, "grad_norm": 0.1337890625, "learning_rate": 0.0004647700407271282, "loss": 0.526, "step": 84370 }, { "epoch": 4.190920830436078, "grad_norm": 0.130859375, "learning_rate": 0.0004647303069434787, "loss": 0.5303, "step": 84380 }, { "epoch": 4.1914175027316976, "grad_norm": 0.1591796875, "learning_rate": 0.0004646905731598292, "loss": 0.5006, "step": 84390 }, { "epoch": 4.191914175027317, "grad_norm": 0.1064453125, "learning_rate": 0.00046465083937617966, "loss": 0.5384, "step": 84400 }, { "epoch": 4.192410847322936, "grad_norm": 0.10107421875, "learning_rate": 0.00046461110559253003, "loss": 0.539, "step": 84410 }, { "epoch": 4.192907519618556, "grad_norm": 0.138671875, "learning_rate": 0.00046457137180888055, "loss": 0.5146, "step": 84420 }, { "epoch": 4.193404191914175, "grad_norm": 0.130859375, "learning_rate": 0.000464531638025231, "loss": 0.5103, "step": 84430 }, { "epoch": 4.193900864209795, "grad_norm": 0.111328125, "learning_rate": 0.0004644919042415814, "loss": 0.5426, "step": 84440 }, { "epoch": 4.194397536505414, "grad_norm": 0.1083984375, "learning_rate": 0.00046445217045793186, "loss": 0.5138, "step": 84450 }, { "epoch": 4.194894208801033, "grad_norm": 0.10693359375, "learning_rate": 0.0004644124366742824, "loss": 0.521, "step": 84460 }, { "epoch": 4.195390881096652, "grad_norm": 0.11474609375, "learning_rate": 0.00046437270289063275, "loss": 0.4961, "step": 84470 }, { "epoch": 4.1958875533922715, "grad_norm": 0.12890625, "learning_rate": 0.0004643329691069832, "loss": 0.5285, "step": 84480 }, { "epoch": 4.196384225687891, "grad_norm": 0.12255859375, "learning_rate": 0.0004642932353233337, "loss": 0.5304, "step": 84490 }, { "epoch": 4.196880897983511, "grad_norm": 0.158203125, "learning_rate": 0.0004642535015396841, "loss": 0.5183, "step": 84500 }, { "epoch": 4.19737757027913, "grad_norm": 0.125, "learning_rate": 0.0004642137677560346, "loss": 0.5008, "step": 84510 }, { "epoch": 4.197874242574749, "grad_norm": 0.1298828125, "learning_rate": 0.00046417403397238505, "loss": 0.527, "step": 84520 }, { "epoch": 4.1983709148703685, "grad_norm": 0.1767578125, "learning_rate": 0.0004641343001887355, "loss": 0.5472, "step": 84530 }, { "epoch": 4.198867587165988, "grad_norm": 0.10498046875, "learning_rate": 0.00046409456640508594, "loss": 0.5314, "step": 84540 }, { "epoch": 4.199364259461607, "grad_norm": 0.11376953125, "learning_rate": 0.0004640548326214364, "loss": 0.5252, "step": 84550 }, { "epoch": 4.199860931757226, "grad_norm": 0.099609375, "learning_rate": 0.0004640150988377869, "loss": 0.5077, "step": 84560 }, { "epoch": 4.200357604052846, "grad_norm": 0.119140625, "learning_rate": 0.0004639753650541373, "loss": 0.5551, "step": 84570 }, { "epoch": 4.2008542763484655, "grad_norm": 0.1123046875, "learning_rate": 0.00046393563127048777, "loss": 0.5278, "step": 84580 }, { "epoch": 4.201350948644085, "grad_norm": 0.11865234375, "learning_rate": 0.00046389589748683824, "loss": 0.5052, "step": 84590 }, { "epoch": 4.201847620939704, "grad_norm": 0.154296875, "learning_rate": 0.0004638561637031886, "loss": 0.5159, "step": 84600 }, { "epoch": 4.202344293235323, "grad_norm": 0.10888671875, "learning_rate": 0.00046381642991953913, "loss": 0.536, "step": 84610 }, { "epoch": 4.202840965530942, "grad_norm": 0.1220703125, "learning_rate": 0.0004637766961358896, "loss": 0.5078, "step": 84620 }, { "epoch": 4.203337637826562, "grad_norm": 0.11376953125, "learning_rate": 0.00046373696235223996, "loss": 0.5414, "step": 84630 }, { "epoch": 4.203834310122182, "grad_norm": 0.12109375, "learning_rate": 0.00046369722856859044, "loss": 0.5545, "step": 84640 }, { "epoch": 4.204330982417801, "grad_norm": 0.1416015625, "learning_rate": 0.00046365749478494096, "loss": 0.5656, "step": 84650 }, { "epoch": 4.20482765471342, "grad_norm": 0.1298828125, "learning_rate": 0.0004636177610012913, "loss": 0.5252, "step": 84660 }, { "epoch": 4.2053243270090395, "grad_norm": 0.1103515625, "learning_rate": 0.0004635780272176418, "loss": 0.5175, "step": 84670 }, { "epoch": 4.205820999304659, "grad_norm": 0.12060546875, "learning_rate": 0.00046353829343399227, "loss": 0.5261, "step": 84680 }, { "epoch": 4.206317671600278, "grad_norm": 0.1259765625, "learning_rate": 0.0004634985596503428, "loss": 0.5269, "step": 84690 }, { "epoch": 4.206814343895897, "grad_norm": 0.1220703125, "learning_rate": 0.00046345882586669315, "loss": 0.5254, "step": 84700 }, { "epoch": 4.207311016191517, "grad_norm": 0.10693359375, "learning_rate": 0.0004634190920830436, "loss": 0.5144, "step": 84710 }, { "epoch": 4.2078076884871365, "grad_norm": 0.1064453125, "learning_rate": 0.0004633793582993941, "loss": 0.5286, "step": 84720 }, { "epoch": 4.208304360782756, "grad_norm": 0.12353515625, "learning_rate": 0.0004633396245157445, "loss": 0.5184, "step": 84730 }, { "epoch": 4.208801033078375, "grad_norm": 0.1484375, "learning_rate": 0.000463299890732095, "loss": 0.5175, "step": 84740 }, { "epoch": 4.209297705373994, "grad_norm": 0.0986328125, "learning_rate": 0.00046326015694844546, "loss": 0.4948, "step": 84750 }, { "epoch": 4.209794377669613, "grad_norm": 0.10546875, "learning_rate": 0.0004632204231647959, "loss": 0.5161, "step": 84760 }, { "epoch": 4.210291049965233, "grad_norm": 0.09375, "learning_rate": 0.00046318068938114635, "loss": 0.5169, "step": 84770 }, { "epoch": 4.210787722260852, "grad_norm": 0.10986328125, "learning_rate": 0.0004631409555974968, "loss": 0.5402, "step": 84780 }, { "epoch": 4.211284394556472, "grad_norm": 0.1259765625, "learning_rate": 0.00046310122181384723, "loss": 0.5178, "step": 84790 }, { "epoch": 4.211781066852091, "grad_norm": 0.11962890625, "learning_rate": 0.0004630614880301977, "loss": 0.5004, "step": 84800 }, { "epoch": 4.21227773914771, "grad_norm": 0.158203125, "learning_rate": 0.0004630217542465482, "loss": 0.5539, "step": 84810 }, { "epoch": 4.21277441144333, "grad_norm": 0.11572265625, "learning_rate": 0.00046298202046289854, "loss": 0.5342, "step": 84820 }, { "epoch": 4.213271083738949, "grad_norm": 0.0986328125, "learning_rate": 0.000462942286679249, "loss": 0.4905, "step": 84830 }, { "epoch": 4.213767756034568, "grad_norm": 0.115234375, "learning_rate": 0.00046290255289559954, "loss": 0.5354, "step": 84840 }, { "epoch": 4.214264428330187, "grad_norm": 0.1982421875, "learning_rate": 0.00046286281911195, "loss": 0.522, "step": 84850 }, { "epoch": 4.214761100625807, "grad_norm": 0.1220703125, "learning_rate": 0.00046282308532830037, "loss": 0.5274, "step": 84860 }, { "epoch": 4.215257772921427, "grad_norm": 0.1005859375, "learning_rate": 0.00046278335154465084, "loss": 0.4981, "step": 84870 }, { "epoch": 4.215754445217046, "grad_norm": 0.1494140625, "learning_rate": 0.00046274361776100137, "loss": 0.4926, "step": 84880 }, { "epoch": 4.216251117512665, "grad_norm": 0.10595703125, "learning_rate": 0.00046270388397735173, "loss": 0.5332, "step": 84890 }, { "epoch": 4.216747789808284, "grad_norm": 0.1298828125, "learning_rate": 0.0004626641501937022, "loss": 0.5076, "step": 84900 }, { "epoch": 4.217244462103904, "grad_norm": 0.0986328125, "learning_rate": 0.0004626244164100527, "loss": 0.514, "step": 84910 }, { "epoch": 4.217741134399523, "grad_norm": 0.1240234375, "learning_rate": 0.0004625846826264031, "loss": 0.5241, "step": 84920 }, { "epoch": 4.218237806695143, "grad_norm": 0.11962890625, "learning_rate": 0.00046254494884275356, "loss": 0.5204, "step": 84930 }, { "epoch": 4.218734478990762, "grad_norm": 0.1435546875, "learning_rate": 0.00046250521505910403, "loss": 0.5244, "step": 84940 }, { "epoch": 4.219231151286381, "grad_norm": 0.1201171875, "learning_rate": 0.00046246548127545445, "loss": 0.5049, "step": 84950 }, { "epoch": 4.219727823582001, "grad_norm": 0.09619140625, "learning_rate": 0.0004624257474918049, "loss": 0.5101, "step": 84960 }, { "epoch": 4.22022449587762, "grad_norm": 0.1103515625, "learning_rate": 0.0004623860137081554, "loss": 0.5321, "step": 84970 }, { "epoch": 4.220721168173239, "grad_norm": 0.1435546875, "learning_rate": 0.00046234627992450586, "loss": 0.5455, "step": 84980 }, { "epoch": 4.221217840468858, "grad_norm": 0.12158203125, "learning_rate": 0.0004623065461408563, "loss": 0.4932, "step": 84990 }, { "epoch": 4.221714512764478, "grad_norm": 0.1484375, "learning_rate": 0.00046226681235720675, "loss": 0.5315, "step": 85000 }, { "epoch": 4.222211185060098, "grad_norm": 0.130859375, "learning_rate": 0.0004622270785735572, "loss": 0.5236, "step": 85010 }, { "epoch": 4.222707857355717, "grad_norm": 0.10302734375, "learning_rate": 0.00046218734478990764, "loss": 0.5112, "step": 85020 }, { "epoch": 4.223204529651336, "grad_norm": 0.12255859375, "learning_rate": 0.0004621476110062581, "loss": 0.513, "step": 85030 }, { "epoch": 4.223701201946955, "grad_norm": 0.1162109375, "learning_rate": 0.0004621078772226086, "loss": 0.4923, "step": 85040 }, { "epoch": 4.2241978742425745, "grad_norm": 0.10888671875, "learning_rate": 0.00046206814343895895, "loss": 0.5019, "step": 85050 }, { "epoch": 4.224694546538194, "grad_norm": 0.13671875, "learning_rate": 0.0004620284096553095, "loss": 0.4975, "step": 85060 }, { "epoch": 4.225191218833814, "grad_norm": 0.11279296875, "learning_rate": 0.00046198867587165994, "loss": 0.5384, "step": 85070 }, { "epoch": 4.225687891129433, "grad_norm": 0.1298828125, "learning_rate": 0.0004619489420880103, "loss": 0.5359, "step": 85080 }, { "epoch": 4.226184563425052, "grad_norm": 0.1328125, "learning_rate": 0.0004619092083043608, "loss": 0.5235, "step": 85090 }, { "epoch": 4.226681235720672, "grad_norm": 0.091796875, "learning_rate": 0.00046186947452071125, "loss": 0.5044, "step": 85100 }, { "epoch": 4.227177908016291, "grad_norm": 0.10546875, "learning_rate": 0.00046182974073706167, "loss": 0.5421, "step": 85110 }, { "epoch": 4.22767458031191, "grad_norm": 0.12109375, "learning_rate": 0.00046179000695341214, "loss": 0.5294, "step": 85120 }, { "epoch": 4.228171252607529, "grad_norm": 0.12109375, "learning_rate": 0.0004617502731697626, "loss": 0.5175, "step": 85130 }, { "epoch": 4.228667924903149, "grad_norm": 0.1259765625, "learning_rate": 0.0004617105393861131, "loss": 0.5488, "step": 85140 }, { "epoch": 4.229164597198769, "grad_norm": 0.1611328125, "learning_rate": 0.0004616708056024635, "loss": 0.4906, "step": 85150 }, { "epoch": 4.229661269494388, "grad_norm": 0.10595703125, "learning_rate": 0.00046163107181881397, "loss": 0.5223, "step": 85160 }, { "epoch": 4.230157941790007, "grad_norm": 0.11865234375, "learning_rate": 0.00046159133803516444, "loss": 0.5071, "step": 85170 }, { "epoch": 4.230654614085626, "grad_norm": 0.10546875, "learning_rate": 0.00046155160425151486, "loss": 0.5137, "step": 85180 }, { "epoch": 4.2311512863812455, "grad_norm": 0.11083984375, "learning_rate": 0.00046151187046786533, "loss": 0.5056, "step": 85190 }, { "epoch": 4.231647958676865, "grad_norm": 0.10107421875, "learning_rate": 0.0004614721366842158, "loss": 0.505, "step": 85200 }, { "epoch": 4.232144630972484, "grad_norm": 0.12060546875, "learning_rate": 0.0004614324029005662, "loss": 0.5275, "step": 85210 }, { "epoch": 4.232641303268104, "grad_norm": 0.169921875, "learning_rate": 0.0004613926691169167, "loss": 0.5189, "step": 85220 }, { "epoch": 4.233137975563723, "grad_norm": 0.12255859375, "learning_rate": 0.00046135293533326716, "loss": 0.5391, "step": 85230 }, { "epoch": 4.2336346478593425, "grad_norm": 0.1484375, "learning_rate": 0.0004613132015496175, "loss": 0.5331, "step": 85240 }, { "epoch": 4.234131320154962, "grad_norm": 0.1083984375, "learning_rate": 0.00046127346776596805, "loss": 0.5216, "step": 85250 }, { "epoch": 4.234627992450581, "grad_norm": 0.10693359375, "learning_rate": 0.0004612337339823185, "loss": 0.5352, "step": 85260 }, { "epoch": 4.2351246647462, "grad_norm": 0.10693359375, "learning_rate": 0.0004611940001986689, "loss": 0.5044, "step": 85270 }, { "epoch": 4.235621337041819, "grad_norm": 0.0966796875, "learning_rate": 0.00046115426641501936, "loss": 0.5219, "step": 85280 }, { "epoch": 4.2361180093374395, "grad_norm": 0.11181640625, "learning_rate": 0.0004611145326313699, "loss": 0.5064, "step": 85290 }, { "epoch": 4.236614681633059, "grad_norm": 0.10498046875, "learning_rate": 0.00046107479884772035, "loss": 0.5503, "step": 85300 }, { "epoch": 4.237111353928678, "grad_norm": 0.1123046875, "learning_rate": 0.0004610350650640707, "loss": 0.5075, "step": 85310 }, { "epoch": 4.237608026224297, "grad_norm": 0.166015625, "learning_rate": 0.0004609953312804212, "loss": 0.5491, "step": 85320 }, { "epoch": 4.2381046985199164, "grad_norm": 0.1123046875, "learning_rate": 0.0004609555974967717, "loss": 0.5304, "step": 85330 }, { "epoch": 4.238601370815536, "grad_norm": 0.09912109375, "learning_rate": 0.0004609158637131221, "loss": 0.5417, "step": 85340 }, { "epoch": 4.239098043111155, "grad_norm": 0.10205078125, "learning_rate": 0.00046087612992947255, "loss": 0.5318, "step": 85350 }, { "epoch": 4.239594715406775, "grad_norm": 0.1376953125, "learning_rate": 0.000460836396145823, "loss": 0.5274, "step": 85360 }, { "epoch": 4.240091387702394, "grad_norm": 0.11572265625, "learning_rate": 0.00046079666236217343, "loss": 0.5192, "step": 85370 }, { "epoch": 4.2405880599980135, "grad_norm": 0.099609375, "learning_rate": 0.0004607569285785239, "loss": 0.5049, "step": 85380 }, { "epoch": 4.241084732293633, "grad_norm": 0.11865234375, "learning_rate": 0.0004607171947948744, "loss": 0.5139, "step": 85390 }, { "epoch": 4.241581404589252, "grad_norm": 0.11767578125, "learning_rate": 0.0004606774610112248, "loss": 0.5182, "step": 85400 }, { "epoch": 4.242078076884871, "grad_norm": 0.109375, "learning_rate": 0.00046063772722757527, "loss": 0.5096, "step": 85410 }, { "epoch": 4.24257474918049, "grad_norm": 0.1376953125, "learning_rate": 0.00046059799344392574, "loss": 0.5349, "step": 85420 }, { "epoch": 4.24307142147611, "grad_norm": 0.134765625, "learning_rate": 0.0004605582596602762, "loss": 0.5377, "step": 85430 }, { "epoch": 4.24356809377173, "grad_norm": 0.134765625, "learning_rate": 0.0004605185258766266, "loss": 0.5205, "step": 85440 }, { "epoch": 4.244064766067349, "grad_norm": 0.107421875, "learning_rate": 0.0004604787920929771, "loss": 0.5251, "step": 85450 }, { "epoch": 4.244561438362968, "grad_norm": 0.1201171875, "learning_rate": 0.00046043905830932757, "loss": 0.5031, "step": 85460 }, { "epoch": 4.245058110658587, "grad_norm": 0.1240234375, "learning_rate": 0.00046039932452567793, "loss": 0.5041, "step": 85470 }, { "epoch": 4.245554782954207, "grad_norm": 0.0986328125, "learning_rate": 0.00046035959074202846, "loss": 0.5095, "step": 85480 }, { "epoch": 4.246051455249826, "grad_norm": 0.11767578125, "learning_rate": 0.00046031985695837893, "loss": 0.5163, "step": 85490 }, { "epoch": 4.246548127545445, "grad_norm": 0.12353515625, "learning_rate": 0.0004602801231747293, "loss": 0.5149, "step": 85500 }, { "epoch": 4.247044799841065, "grad_norm": 0.123046875, "learning_rate": 0.00046024038939107976, "loss": 0.523, "step": 85510 }, { "epoch": 4.247541472136684, "grad_norm": 0.11328125, "learning_rate": 0.0004602006556074303, "loss": 0.5251, "step": 85520 }, { "epoch": 4.248038144432304, "grad_norm": 0.09716796875, "learning_rate": 0.00046016092182378065, "loss": 0.4988, "step": 85530 }, { "epoch": 4.248534816727923, "grad_norm": 0.126953125, "learning_rate": 0.0004601211880401311, "loss": 0.4956, "step": 85540 }, { "epoch": 4.249031489023542, "grad_norm": 0.12060546875, "learning_rate": 0.0004600814542564816, "loss": 0.5315, "step": 85550 }, { "epoch": 4.249528161319161, "grad_norm": 0.11181640625, "learning_rate": 0.000460041720472832, "loss": 0.5234, "step": 85560 }, { "epoch": 4.250024833614781, "grad_norm": 0.11767578125, "learning_rate": 0.0004600019866891825, "loss": 0.5087, "step": 85570 }, { "epoch": 4.250521505910401, "grad_norm": 0.1083984375, "learning_rate": 0.00045996225290553295, "loss": 0.4927, "step": 85580 }, { "epoch": 4.25101817820602, "grad_norm": 0.1513671875, "learning_rate": 0.0004599225191218834, "loss": 0.5326, "step": 85590 }, { "epoch": 4.251514850501639, "grad_norm": 0.10302734375, "learning_rate": 0.00045988278533823384, "loss": 0.4948, "step": 85600 }, { "epoch": 4.252011522797258, "grad_norm": 0.1171875, "learning_rate": 0.0004598430515545843, "loss": 0.521, "step": 85610 }, { "epoch": 4.252508195092878, "grad_norm": 0.115234375, "learning_rate": 0.0004598033177709348, "loss": 0.4986, "step": 85620 }, { "epoch": 4.253004867388497, "grad_norm": 0.1044921875, "learning_rate": 0.0004597635839872852, "loss": 0.512, "step": 85630 }, { "epoch": 4.253501539684116, "grad_norm": 0.15234375, "learning_rate": 0.0004597238502036357, "loss": 0.5244, "step": 85640 }, { "epoch": 4.253998211979736, "grad_norm": 0.107421875, "learning_rate": 0.00045968411641998614, "loss": 0.4869, "step": 85650 }, { "epoch": 4.254494884275355, "grad_norm": 0.10986328125, "learning_rate": 0.0004596443826363365, "loss": 0.5067, "step": 85660 }, { "epoch": 4.254991556570975, "grad_norm": 0.12060546875, "learning_rate": 0.00045960464885268703, "loss": 0.5391, "step": 85670 }, { "epoch": 4.255488228866594, "grad_norm": 0.15625, "learning_rate": 0.0004595649150690375, "loss": 0.4976, "step": 85680 }, { "epoch": 4.255984901162213, "grad_norm": 0.12451171875, "learning_rate": 0.00045952518128538787, "loss": 0.5479, "step": 85690 }, { "epoch": 4.256481573457832, "grad_norm": 0.0986328125, "learning_rate": 0.00045948544750173834, "loss": 0.5283, "step": 85700 }, { "epoch": 4.2569782457534515, "grad_norm": 0.09228515625, "learning_rate": 0.00045944571371808886, "loss": 0.5109, "step": 85710 }, { "epoch": 4.257474918049072, "grad_norm": 0.1005859375, "learning_rate": 0.00045940597993443934, "loss": 0.5107, "step": 85720 }, { "epoch": 4.257971590344691, "grad_norm": 0.216796875, "learning_rate": 0.0004593662461507897, "loss": 0.5134, "step": 85730 }, { "epoch": 4.25846826264031, "grad_norm": 0.12451171875, "learning_rate": 0.00045932651236714017, "loss": 0.5107, "step": 85740 }, { "epoch": 4.258964934935929, "grad_norm": 0.1005859375, "learning_rate": 0.0004592867785834907, "loss": 0.5183, "step": 85750 }, { "epoch": 4.2594616072315485, "grad_norm": 0.11767578125, "learning_rate": 0.00045924704479984106, "loss": 0.54, "step": 85760 }, { "epoch": 4.259958279527168, "grad_norm": 0.10986328125, "learning_rate": 0.00045920731101619153, "loss": 0.5028, "step": 85770 }, { "epoch": 4.260454951822787, "grad_norm": 0.1640625, "learning_rate": 0.000459167577232542, "loss": 0.5201, "step": 85780 }, { "epoch": 4.260951624118407, "grad_norm": 0.1083984375, "learning_rate": 0.0004591278434488924, "loss": 0.4954, "step": 85790 }, { "epoch": 4.261448296414026, "grad_norm": 0.111328125, "learning_rate": 0.0004590881096652429, "loss": 0.5304, "step": 85800 }, { "epoch": 4.261944968709646, "grad_norm": 0.1416015625, "learning_rate": 0.00045904837588159336, "loss": 0.5535, "step": 85810 }, { "epoch": 4.262441641005265, "grad_norm": 0.12255859375, "learning_rate": 0.0004590086420979438, "loss": 0.5143, "step": 85820 }, { "epoch": 4.262938313300884, "grad_norm": 0.1015625, "learning_rate": 0.00045896890831429425, "loss": 0.5171, "step": 85830 }, { "epoch": 4.263434985596503, "grad_norm": 0.10302734375, "learning_rate": 0.0004589291745306447, "loss": 0.5081, "step": 85840 }, { "epoch": 4.2639316578921225, "grad_norm": 0.1064453125, "learning_rate": 0.00045888944074699514, "loss": 0.5469, "step": 85850 }, { "epoch": 4.264428330187743, "grad_norm": 0.1328125, "learning_rate": 0.0004588497069633456, "loss": 0.5263, "step": 85860 }, { "epoch": 4.264925002483362, "grad_norm": 0.11279296875, "learning_rate": 0.0004588099731796961, "loss": 0.5449, "step": 85870 }, { "epoch": 4.265421674778981, "grad_norm": 0.12158203125, "learning_rate": 0.00045877023939604655, "loss": 0.5172, "step": 85880 }, { "epoch": 4.2659183470746, "grad_norm": 0.140625, "learning_rate": 0.00045873050561239697, "loss": 0.5357, "step": 85890 }, { "epoch": 4.2664150193702195, "grad_norm": 0.1640625, "learning_rate": 0.00045869077182874744, "loss": 0.5344, "step": 85900 }, { "epoch": 4.266911691665839, "grad_norm": 0.115234375, "learning_rate": 0.0004586510380450979, "loss": 0.5317, "step": 85910 }, { "epoch": 4.267408363961458, "grad_norm": 0.126953125, "learning_rate": 0.0004586113042614483, "loss": 0.5079, "step": 85920 }, { "epoch": 4.267905036257077, "grad_norm": 0.10791015625, "learning_rate": 0.00045857157047779875, "loss": 0.4963, "step": 85930 }, { "epoch": 4.268401708552697, "grad_norm": 0.1416015625, "learning_rate": 0.00045853183669414927, "loss": 0.5111, "step": 85940 }, { "epoch": 4.2688983808483165, "grad_norm": 0.1689453125, "learning_rate": 0.00045849210291049963, "loss": 0.5146, "step": 85950 }, { "epoch": 4.269395053143936, "grad_norm": 0.1552734375, "learning_rate": 0.0004584523691268501, "loss": 0.5579, "step": 85960 }, { "epoch": 4.269891725439555, "grad_norm": 0.11767578125, "learning_rate": 0.0004584126353432006, "loss": 0.5281, "step": 85970 }, { "epoch": 4.270388397735174, "grad_norm": 0.107421875, "learning_rate": 0.000458372901559551, "loss": 0.5497, "step": 85980 }, { "epoch": 4.270885070030793, "grad_norm": 0.103515625, "learning_rate": 0.00045833316777590147, "loss": 0.4916, "step": 85990 }, { "epoch": 4.271381742326413, "grad_norm": 0.1171875, "learning_rate": 0.00045829343399225194, "loss": 0.5393, "step": 86000 }, { "epoch": 4.271878414622033, "grad_norm": 0.11181640625, "learning_rate": 0.00045825370020860235, "loss": 0.545, "step": 86010 }, { "epoch": 4.272375086917652, "grad_norm": 0.1162109375, "learning_rate": 0.0004582139664249528, "loss": 0.5428, "step": 86020 }, { "epoch": 4.272871759213271, "grad_norm": 0.140625, "learning_rate": 0.0004581742326413033, "loss": 0.5207, "step": 86030 }, { "epoch": 4.2733684315088905, "grad_norm": 0.10986328125, "learning_rate": 0.00045813449885765377, "loss": 0.5163, "step": 86040 }, { "epoch": 4.27386510380451, "grad_norm": 0.1123046875, "learning_rate": 0.0004580947650740042, "loss": 0.515, "step": 86050 }, { "epoch": 4.274361776100129, "grad_norm": 0.1064453125, "learning_rate": 0.00045805503129035466, "loss": 0.549, "step": 86060 }, { "epoch": 4.274858448395748, "grad_norm": 0.146484375, "learning_rate": 0.00045801529750670513, "loss": 0.535, "step": 86070 }, { "epoch": 4.275355120691368, "grad_norm": 0.10595703125, "learning_rate": 0.00045797556372305555, "loss": 0.5278, "step": 86080 }, { "epoch": 4.2758517929869875, "grad_norm": 0.12451171875, "learning_rate": 0.000457935829939406, "loss": 0.5455, "step": 86090 }, { "epoch": 4.276348465282607, "grad_norm": 0.134765625, "learning_rate": 0.0004578960961557565, "loss": 0.5381, "step": 86100 }, { "epoch": 4.276845137578226, "grad_norm": 0.11181640625, "learning_rate": 0.00045785636237210685, "loss": 0.526, "step": 86110 }, { "epoch": 4.277341809873845, "grad_norm": 0.1416015625, "learning_rate": 0.0004578166285884574, "loss": 0.5111, "step": 86120 }, { "epoch": 4.277838482169464, "grad_norm": 0.1064453125, "learning_rate": 0.00045777689480480785, "loss": 0.507, "step": 86130 }, { "epoch": 4.278335154465084, "grad_norm": 0.1171875, "learning_rate": 0.0004577371610211582, "loss": 0.5182, "step": 86140 }, { "epoch": 4.278831826760703, "grad_norm": 0.119140625, "learning_rate": 0.0004576974272375087, "loss": 0.5155, "step": 86150 }, { "epoch": 4.279328499056323, "grad_norm": 0.12109375, "learning_rate": 0.0004576576934538592, "loss": 0.5027, "step": 86160 }, { "epoch": 4.279825171351942, "grad_norm": 0.1064453125, "learning_rate": 0.0004576179596702097, "loss": 0.5301, "step": 86170 }, { "epoch": 4.280321843647561, "grad_norm": 0.1435546875, "learning_rate": 0.00045757822588656004, "loss": 0.5158, "step": 86180 }, { "epoch": 4.280818515943181, "grad_norm": 0.1572265625, "learning_rate": 0.0004575384921029105, "loss": 0.5059, "step": 86190 }, { "epoch": 4.2813151882388, "grad_norm": 0.1240234375, "learning_rate": 0.000457498758319261, "loss": 0.5142, "step": 86200 }, { "epoch": 4.281811860534419, "grad_norm": 0.1162109375, "learning_rate": 0.0004574590245356114, "loss": 0.5515, "step": 86210 }, { "epoch": 4.282308532830038, "grad_norm": 0.1005859375, "learning_rate": 0.0004574192907519619, "loss": 0.5063, "step": 86220 }, { "epoch": 4.282805205125658, "grad_norm": 0.12109375, "learning_rate": 0.00045737955696831234, "loss": 0.4999, "step": 86230 }, { "epoch": 4.283301877421278, "grad_norm": 0.12353515625, "learning_rate": 0.00045733982318466276, "loss": 0.5308, "step": 86240 }, { "epoch": 4.283798549716897, "grad_norm": 0.10546875, "learning_rate": 0.00045730008940101323, "loss": 0.526, "step": 86250 }, { "epoch": 4.284295222012516, "grad_norm": 0.12109375, "learning_rate": 0.0004572603556173637, "loss": 0.5213, "step": 86260 }, { "epoch": 4.284791894308135, "grad_norm": 0.142578125, "learning_rate": 0.0004572206218337141, "loss": 0.539, "step": 86270 }, { "epoch": 4.285288566603755, "grad_norm": 0.11767578125, "learning_rate": 0.0004571808880500646, "loss": 0.5036, "step": 86280 }, { "epoch": 4.285785238899374, "grad_norm": 0.1142578125, "learning_rate": 0.00045714115426641506, "loss": 0.5239, "step": 86290 }, { "epoch": 4.286281911194994, "grad_norm": 0.10009765625, "learning_rate": 0.00045710142048276543, "loss": 0.5123, "step": 86300 }, { "epoch": 4.286778583490613, "grad_norm": 0.11279296875, "learning_rate": 0.00045706168669911595, "loss": 0.5036, "step": 86310 }, { "epoch": 4.287275255786232, "grad_norm": 0.1298828125, "learning_rate": 0.0004570219529154664, "loss": 0.5138, "step": 86320 }, { "epoch": 4.287771928081852, "grad_norm": 0.11328125, "learning_rate": 0.0004569822191318169, "loss": 0.4928, "step": 86330 }, { "epoch": 4.288268600377471, "grad_norm": 0.10546875, "learning_rate": 0.00045694248534816726, "loss": 0.5393, "step": 86340 }, { "epoch": 4.28876527267309, "grad_norm": 0.125, "learning_rate": 0.0004569027515645178, "loss": 0.5234, "step": 86350 }, { "epoch": 4.289261944968709, "grad_norm": 0.115234375, "learning_rate": 0.00045686301778086826, "loss": 0.5304, "step": 86360 }, { "epoch": 4.289758617264329, "grad_norm": 0.203125, "learning_rate": 0.0004568232839972186, "loss": 0.5225, "step": 86370 }, { "epoch": 4.290255289559949, "grad_norm": 0.1083984375, "learning_rate": 0.0004567835502135691, "loss": 0.5119, "step": 86380 }, { "epoch": 4.290751961855568, "grad_norm": 0.1220703125, "learning_rate": 0.0004567438164299196, "loss": 0.5613, "step": 86390 }, { "epoch": 4.291248634151187, "grad_norm": 0.138671875, "learning_rate": 0.00045670408264627, "loss": 0.538, "step": 86400 }, { "epoch": 4.291745306446806, "grad_norm": 0.1259765625, "learning_rate": 0.00045666434886262045, "loss": 0.5313, "step": 86410 }, { "epoch": 4.2922419787424255, "grad_norm": 0.11669921875, "learning_rate": 0.0004566246150789709, "loss": 0.5336, "step": 86420 }, { "epoch": 4.292738651038045, "grad_norm": 0.1494140625, "learning_rate": 0.00045658488129532134, "loss": 0.4929, "step": 86430 }, { "epoch": 4.293235323333665, "grad_norm": 0.134765625, "learning_rate": 0.0004565451475116718, "loss": 0.5121, "step": 86440 }, { "epoch": 4.293731995629284, "grad_norm": 0.123046875, "learning_rate": 0.0004565054137280223, "loss": 0.54, "step": 86450 }, { "epoch": 4.294228667924903, "grad_norm": 0.11962890625, "learning_rate": 0.0004564656799443727, "loss": 0.5073, "step": 86460 }, { "epoch": 4.2947253402205225, "grad_norm": 0.1982421875, "learning_rate": 0.00045642594616072317, "loss": 0.5126, "step": 86470 }, { "epoch": 4.295222012516142, "grad_norm": 0.109375, "learning_rate": 0.00045638621237707364, "loss": 0.5324, "step": 86480 }, { "epoch": 4.295718684811761, "grad_norm": 0.123046875, "learning_rate": 0.0004563464785934241, "loss": 0.52, "step": 86490 }, { "epoch": 4.29621535710738, "grad_norm": 0.1171875, "learning_rate": 0.00045630674480977453, "loss": 0.5243, "step": 86500 }, { "epoch": 4.296712029403, "grad_norm": 0.12353515625, "learning_rate": 0.000456267011026125, "loss": 0.513, "step": 86510 }, { "epoch": 4.29720870169862, "grad_norm": 0.10986328125, "learning_rate": 0.00045622727724247547, "loss": 0.4888, "step": 86520 }, { "epoch": 4.297705373994239, "grad_norm": 0.1611328125, "learning_rate": 0.00045618754345882584, "loss": 0.5306, "step": 86530 }, { "epoch": 4.298202046289858, "grad_norm": 0.111328125, "learning_rate": 0.00045614780967517636, "loss": 0.5291, "step": 86540 }, { "epoch": 4.298698718585477, "grad_norm": 0.1123046875, "learning_rate": 0.00045610807589152683, "loss": 0.5164, "step": 86550 }, { "epoch": 4.2991953908810965, "grad_norm": 0.1337890625, "learning_rate": 0.0004560683421078772, "loss": 0.5422, "step": 86560 }, { "epoch": 4.299692063176716, "grad_norm": 0.1240234375, "learning_rate": 0.00045602860832422767, "loss": 0.5098, "step": 86570 }, { "epoch": 4.300188735472336, "grad_norm": 0.166015625, "learning_rate": 0.0004559888745405782, "loss": 0.5123, "step": 86580 }, { "epoch": 4.300685407767955, "grad_norm": 0.13671875, "learning_rate": 0.00045594914075692855, "loss": 0.4986, "step": 86590 }, { "epoch": 4.301182080063574, "grad_norm": 0.1123046875, "learning_rate": 0.000455909406973279, "loss": 0.5292, "step": 86600 }, { "epoch": 4.3016787523591935, "grad_norm": 0.10107421875, "learning_rate": 0.0004558696731896295, "loss": 0.5057, "step": 86610 }, { "epoch": 4.302175424654813, "grad_norm": 0.1630859375, "learning_rate": 0.00045582993940598, "loss": 0.5015, "step": 86620 }, { "epoch": 4.302672096950432, "grad_norm": 0.19921875, "learning_rate": 0.0004557902056223304, "loss": 0.5086, "step": 86630 }, { "epoch": 4.303168769246051, "grad_norm": 0.11279296875, "learning_rate": 0.00045575047183868086, "loss": 0.5087, "step": 86640 }, { "epoch": 4.30366544154167, "grad_norm": 0.11328125, "learning_rate": 0.00045571073805503133, "loss": 0.4946, "step": 86650 }, { "epoch": 4.3041621138372905, "grad_norm": 0.1201171875, "learning_rate": 0.00045567100427138175, "loss": 0.5068, "step": 86660 }, { "epoch": 4.30465878613291, "grad_norm": 0.1142578125, "learning_rate": 0.0004556312704877322, "loss": 0.4953, "step": 86670 }, { "epoch": 4.305155458428529, "grad_norm": 0.1240234375, "learning_rate": 0.0004555915367040827, "loss": 0.5288, "step": 86680 }, { "epoch": 4.305652130724148, "grad_norm": 0.1396484375, "learning_rate": 0.0004555518029204331, "loss": 0.507, "step": 86690 }, { "epoch": 4.306148803019767, "grad_norm": 0.115234375, "learning_rate": 0.0004555120691367836, "loss": 0.5239, "step": 86700 }, { "epoch": 4.306645475315387, "grad_norm": 0.1279296875, "learning_rate": 0.00045547233535313405, "loss": 0.5222, "step": 86710 }, { "epoch": 4.307142147611006, "grad_norm": 0.119140625, "learning_rate": 0.00045543260156948447, "loss": 0.5368, "step": 86720 }, { "epoch": 4.307638819906626, "grad_norm": 0.10595703125, "learning_rate": 0.00045539286778583494, "loss": 0.5294, "step": 86730 }, { "epoch": 4.308135492202245, "grad_norm": 0.115234375, "learning_rate": 0.0004553531340021854, "loss": 0.5032, "step": 86740 }, { "epoch": 4.3086321644978645, "grad_norm": 0.140625, "learning_rate": 0.00045531340021853577, "loss": 0.5411, "step": 86750 }, { "epoch": 4.309128836793484, "grad_norm": 0.275390625, "learning_rate": 0.0004552736664348863, "loss": 0.5447, "step": 86760 }, { "epoch": 4.309625509089103, "grad_norm": 0.10595703125, "learning_rate": 0.00045523393265123677, "loss": 0.5329, "step": 86770 }, { "epoch": 4.310122181384722, "grad_norm": 0.1513671875, "learning_rate": 0.00045519419886758724, "loss": 0.4954, "step": 86780 }, { "epoch": 4.310618853680341, "grad_norm": 0.103515625, "learning_rate": 0.0004551544650839376, "loss": 0.504, "step": 86790 }, { "epoch": 4.311115525975961, "grad_norm": 0.1015625, "learning_rate": 0.0004551147313002881, "loss": 0.5425, "step": 86800 }, { "epoch": 4.311612198271581, "grad_norm": 0.11962890625, "learning_rate": 0.0004550749975166386, "loss": 0.5113, "step": 86810 }, { "epoch": 4.3121088705672, "grad_norm": 0.1064453125, "learning_rate": 0.00045503526373298896, "loss": 0.5203, "step": 86820 }, { "epoch": 4.312605542862819, "grad_norm": 0.1162109375, "learning_rate": 0.00045499552994933943, "loss": 0.5085, "step": 86830 }, { "epoch": 4.313102215158438, "grad_norm": 0.10693359375, "learning_rate": 0.0004549557961656899, "loss": 0.5188, "step": 86840 }, { "epoch": 4.313598887454058, "grad_norm": 0.1396484375, "learning_rate": 0.0004549160623820403, "loss": 0.5094, "step": 86850 }, { "epoch": 4.314095559749677, "grad_norm": 0.10546875, "learning_rate": 0.0004548763285983908, "loss": 0.5057, "step": 86860 }, { "epoch": 4.314592232045296, "grad_norm": 0.11279296875, "learning_rate": 0.00045483659481474126, "loss": 0.4869, "step": 86870 }, { "epoch": 4.315088904340916, "grad_norm": 0.16015625, "learning_rate": 0.0004547968610310917, "loss": 0.5005, "step": 86880 }, { "epoch": 4.315585576636535, "grad_norm": 0.1328125, "learning_rate": 0.00045475712724744215, "loss": 0.5301, "step": 86890 }, { "epoch": 4.316082248932155, "grad_norm": 0.138671875, "learning_rate": 0.0004547173934637926, "loss": 0.53, "step": 86900 }, { "epoch": 4.316578921227774, "grad_norm": 0.181640625, "learning_rate": 0.00045467765968014304, "loss": 0.5241, "step": 86910 }, { "epoch": 4.317075593523393, "grad_norm": 0.11865234375, "learning_rate": 0.0004546379258964935, "loss": 0.5442, "step": 86920 }, { "epoch": 4.317572265819012, "grad_norm": 0.1044921875, "learning_rate": 0.000454598192112844, "loss": 0.4976, "step": 86930 }, { "epoch": 4.3180689381146315, "grad_norm": 0.1044921875, "learning_rate": 0.00045455845832919446, "loss": 0.5249, "step": 86940 }, { "epoch": 4.318565610410252, "grad_norm": 0.1044921875, "learning_rate": 0.0004545187245455449, "loss": 0.5007, "step": 86950 }, { "epoch": 4.319062282705871, "grad_norm": 0.1201171875, "learning_rate": 0.00045447899076189534, "loss": 0.528, "step": 86960 }, { "epoch": 4.31955895500149, "grad_norm": 0.20703125, "learning_rate": 0.0004544392569782458, "loss": 0.5246, "step": 86970 }, { "epoch": 4.320055627297109, "grad_norm": 0.10546875, "learning_rate": 0.0004543995231945962, "loss": 0.505, "step": 86980 }, { "epoch": 4.320552299592729, "grad_norm": 0.1337890625, "learning_rate": 0.0004543597894109467, "loss": 0.4948, "step": 86990 }, { "epoch": 4.321048971888348, "grad_norm": 0.11328125, "learning_rate": 0.0004543200556272972, "loss": 0.5177, "step": 87000 }, { "epoch": 4.321545644183967, "grad_norm": 0.1904296875, "learning_rate": 0.00045428032184364754, "loss": 0.5274, "step": 87010 }, { "epoch": 4.322042316479587, "grad_norm": 0.1171875, "learning_rate": 0.000454240588059998, "loss": 0.5335, "step": 87020 }, { "epoch": 4.322538988775206, "grad_norm": 0.1328125, "learning_rate": 0.00045420085427634854, "loss": 0.5448, "step": 87030 }, { "epoch": 4.323035661070826, "grad_norm": 0.103515625, "learning_rate": 0.0004541611204926989, "loss": 0.5349, "step": 87040 }, { "epoch": 4.323532333366445, "grad_norm": 0.11865234375, "learning_rate": 0.00045412138670904937, "loss": 0.4996, "step": 87050 }, { "epoch": 4.324029005662064, "grad_norm": 0.1103515625, "learning_rate": 0.00045408165292539984, "loss": 0.516, "step": 87060 }, { "epoch": 4.324525677957683, "grad_norm": 0.11572265625, "learning_rate": 0.0004540419191417503, "loss": 0.5177, "step": 87070 }, { "epoch": 4.3250223502533025, "grad_norm": 0.1083984375, "learning_rate": 0.00045400218535810073, "loss": 0.5055, "step": 87080 }, { "epoch": 4.325519022548923, "grad_norm": 0.10693359375, "learning_rate": 0.0004539624515744512, "loss": 0.5124, "step": 87090 }, { "epoch": 4.326015694844542, "grad_norm": 0.10498046875, "learning_rate": 0.00045392271779080167, "loss": 0.5161, "step": 87100 }, { "epoch": 4.326512367140161, "grad_norm": 0.0966796875, "learning_rate": 0.0004538829840071521, "loss": 0.5411, "step": 87110 }, { "epoch": 4.32700903943578, "grad_norm": 0.12109375, "learning_rate": 0.00045384325022350256, "loss": 0.5097, "step": 87120 }, { "epoch": 4.3275057117313995, "grad_norm": 0.1689453125, "learning_rate": 0.00045380351643985303, "loss": 0.5181, "step": 87130 }, { "epoch": 4.328002384027019, "grad_norm": 0.1025390625, "learning_rate": 0.00045376378265620345, "loss": 0.5099, "step": 87140 }, { "epoch": 4.328499056322638, "grad_norm": 0.1025390625, "learning_rate": 0.0004537240488725539, "loss": 0.5018, "step": 87150 }, { "epoch": 4.328995728618258, "grad_norm": 0.1171875, "learning_rate": 0.0004536843150889044, "loss": 0.5034, "step": 87160 }, { "epoch": 4.329492400913877, "grad_norm": 0.10888671875, "learning_rate": 0.00045364458130525475, "loss": 0.5121, "step": 87170 }, { "epoch": 4.3299890732094966, "grad_norm": 0.1025390625, "learning_rate": 0.0004536048475216053, "loss": 0.5178, "step": 87180 }, { "epoch": 4.330485745505116, "grad_norm": 0.11181640625, "learning_rate": 0.00045356511373795575, "loss": 0.5198, "step": 87190 }, { "epoch": 4.330982417800735, "grad_norm": 0.0966796875, "learning_rate": 0.0004535253799543061, "loss": 0.5295, "step": 87200 }, { "epoch": 4.331479090096354, "grad_norm": 0.140625, "learning_rate": 0.0004534856461706566, "loss": 0.5136, "step": 87210 }, { "epoch": 4.3319757623919735, "grad_norm": 0.12255859375, "learning_rate": 0.0004534459123870071, "loss": 0.5361, "step": 87220 }, { "epoch": 4.332472434687594, "grad_norm": 0.126953125, "learning_rate": 0.0004534061786033576, "loss": 0.5476, "step": 87230 }, { "epoch": 4.332969106983213, "grad_norm": 0.1396484375, "learning_rate": 0.00045336644481970795, "loss": 0.511, "step": 87240 }, { "epoch": 4.333465779278832, "grad_norm": 0.19140625, "learning_rate": 0.0004533267110360584, "loss": 0.5044, "step": 87250 }, { "epoch": 4.333962451574451, "grad_norm": 0.11962890625, "learning_rate": 0.00045328697725240894, "loss": 0.5091, "step": 87260 }, { "epoch": 4.3344591238700705, "grad_norm": 0.1865234375, "learning_rate": 0.0004532472434687593, "loss": 0.5155, "step": 87270 }, { "epoch": 4.33495579616569, "grad_norm": 0.11083984375, "learning_rate": 0.0004532075096851098, "loss": 0.523, "step": 87280 }, { "epoch": 4.335452468461309, "grad_norm": 0.1201171875, "learning_rate": 0.00045316777590146025, "loss": 0.5077, "step": 87290 }, { "epoch": 4.335949140756928, "grad_norm": 0.1005859375, "learning_rate": 0.00045312804211781067, "loss": 0.5133, "step": 87300 }, { "epoch": 4.336445813052548, "grad_norm": 0.09228515625, "learning_rate": 0.00045308830833416114, "loss": 0.504, "step": 87310 }, { "epoch": 4.3369424853481675, "grad_norm": 0.126953125, "learning_rate": 0.0004530485745505116, "loss": 0.5102, "step": 87320 }, { "epoch": 4.337439157643787, "grad_norm": 0.11181640625, "learning_rate": 0.000453008840766862, "loss": 0.528, "step": 87330 }, { "epoch": 4.337935829939406, "grad_norm": 0.169921875, "learning_rate": 0.0004529691069832125, "loss": 0.5072, "step": 87340 }, { "epoch": 4.338432502235025, "grad_norm": 0.12158203125, "learning_rate": 0.00045292937319956297, "loss": 0.5125, "step": 87350 }, { "epoch": 4.338929174530644, "grad_norm": 0.12890625, "learning_rate": 0.00045288963941591333, "loss": 0.517, "step": 87360 }, { "epoch": 4.339425846826264, "grad_norm": 0.1083984375, "learning_rate": 0.00045284990563226386, "loss": 0.5224, "step": 87370 }, { "epoch": 4.339922519121884, "grad_norm": 0.10546875, "learning_rate": 0.00045281017184861433, "loss": 0.5199, "step": 87380 }, { "epoch": 4.340419191417503, "grad_norm": 0.10400390625, "learning_rate": 0.0004527704380649648, "loss": 0.5326, "step": 87390 }, { "epoch": 4.340915863713122, "grad_norm": 0.1220703125, "learning_rate": 0.00045273070428131516, "loss": 0.5324, "step": 87400 }, { "epoch": 4.341412536008741, "grad_norm": 0.10498046875, "learning_rate": 0.0004526909704976657, "loss": 0.5096, "step": 87410 }, { "epoch": 4.341909208304361, "grad_norm": 0.103515625, "learning_rate": 0.00045265123671401616, "loss": 0.526, "step": 87420 }, { "epoch": 4.34240588059998, "grad_norm": 0.10986328125, "learning_rate": 0.0004526115029303665, "loss": 0.5329, "step": 87430 }, { "epoch": 4.342902552895599, "grad_norm": 0.10546875, "learning_rate": 0.000452571769146717, "loss": 0.5089, "step": 87440 }, { "epoch": 4.343399225191219, "grad_norm": 0.10546875, "learning_rate": 0.0004525320353630675, "loss": 0.4873, "step": 87450 }, { "epoch": 4.3438958974868385, "grad_norm": 0.12109375, "learning_rate": 0.0004524923015794179, "loss": 0.5222, "step": 87460 }, { "epoch": 4.344392569782458, "grad_norm": 0.1162109375, "learning_rate": 0.00045245256779576835, "loss": 0.5066, "step": 87470 }, { "epoch": 4.344889242078077, "grad_norm": 0.166015625, "learning_rate": 0.0004524128340121188, "loss": 0.5389, "step": 87480 }, { "epoch": 4.345385914373696, "grad_norm": 0.1162109375, "learning_rate": 0.00045237310022846924, "loss": 0.5425, "step": 87490 }, { "epoch": 4.345882586669315, "grad_norm": 0.1005859375, "learning_rate": 0.0004523333664448197, "loss": 0.5212, "step": 87500 }, { "epoch": 4.346379258964935, "grad_norm": 0.1279296875, "learning_rate": 0.0004522936326611702, "loss": 0.5493, "step": 87510 }, { "epoch": 4.346875931260554, "grad_norm": 0.10595703125, "learning_rate": 0.00045225389887752066, "loss": 0.5508, "step": 87520 }, { "epoch": 4.347372603556174, "grad_norm": 0.1396484375, "learning_rate": 0.0004522141650938711, "loss": 0.5234, "step": 87530 }, { "epoch": 4.347869275851793, "grad_norm": 0.1484375, "learning_rate": 0.00045217443131022154, "loss": 0.5188, "step": 87540 }, { "epoch": 4.348365948147412, "grad_norm": 0.1640625, "learning_rate": 0.000452134697526572, "loss": 0.5179, "step": 87550 }, { "epoch": 4.348862620443032, "grad_norm": 0.142578125, "learning_rate": 0.00045209496374292243, "loss": 0.4952, "step": 87560 }, { "epoch": 4.349359292738651, "grad_norm": 0.11865234375, "learning_rate": 0.0004520552299592729, "loss": 0.5231, "step": 87570 }, { "epoch": 4.34985596503427, "grad_norm": 0.10986328125, "learning_rate": 0.0004520154961756234, "loss": 0.5128, "step": 87580 }, { "epoch": 4.350352637329889, "grad_norm": 0.11572265625, "learning_rate": 0.0004519757623919738, "loss": 0.4745, "step": 87590 }, { "epoch": 4.350849309625509, "grad_norm": 0.10205078125, "learning_rate": 0.00045193602860832426, "loss": 0.5124, "step": 87600 }, { "epoch": 4.351345981921129, "grad_norm": 0.11279296875, "learning_rate": 0.00045189629482467474, "loss": 0.514, "step": 87610 }, { "epoch": 4.351842654216748, "grad_norm": 0.1005859375, "learning_rate": 0.0004518565610410251, "loss": 0.5281, "step": 87620 }, { "epoch": 4.352339326512367, "grad_norm": 0.19140625, "learning_rate": 0.00045181682725737557, "loss": 0.5177, "step": 87630 }, { "epoch": 4.352835998807986, "grad_norm": 0.1201171875, "learning_rate": 0.0004517770934737261, "loss": 0.5217, "step": 87640 }, { "epoch": 4.3533326711036056, "grad_norm": 0.11376953125, "learning_rate": 0.00045173735969007646, "loss": 0.536, "step": 87650 }, { "epoch": 4.353829343399225, "grad_norm": 0.1328125, "learning_rate": 0.00045169762590642693, "loss": 0.5201, "step": 87660 }, { "epoch": 4.354326015694845, "grad_norm": 0.158203125, "learning_rate": 0.0004516578921227774, "loss": 0.531, "step": 87670 }, { "epoch": 4.354822687990464, "grad_norm": 0.1220703125, "learning_rate": 0.0004516181583391279, "loss": 0.5379, "step": 87680 }, { "epoch": 4.355319360286083, "grad_norm": 0.09228515625, "learning_rate": 0.0004515784245554783, "loss": 0.5195, "step": 87690 }, { "epoch": 4.355816032581703, "grad_norm": 0.11572265625, "learning_rate": 0.00045153869077182876, "loss": 0.533, "step": 87700 }, { "epoch": 4.356312704877322, "grad_norm": 0.1162109375, "learning_rate": 0.00045149895698817923, "loss": 0.5403, "step": 87710 }, { "epoch": 4.356809377172941, "grad_norm": 0.1123046875, "learning_rate": 0.00045145922320452965, "loss": 0.5178, "step": 87720 }, { "epoch": 4.35730604946856, "grad_norm": 0.107421875, "learning_rate": 0.0004514194894208801, "loss": 0.525, "step": 87730 }, { "epoch": 4.35780272176418, "grad_norm": 0.11767578125, "learning_rate": 0.0004513797556372306, "loss": 0.5164, "step": 87740 }, { "epoch": 4.3582993940598, "grad_norm": 0.17578125, "learning_rate": 0.000451340021853581, "loss": 0.5465, "step": 87750 }, { "epoch": 4.358796066355419, "grad_norm": 0.11376953125, "learning_rate": 0.0004513002880699315, "loss": 0.5248, "step": 87760 }, { "epoch": 4.359292738651038, "grad_norm": 0.1416015625, "learning_rate": 0.00045126055428628195, "loss": 0.5562, "step": 87770 }, { "epoch": 4.359789410946657, "grad_norm": 0.130859375, "learning_rate": 0.00045122082050263237, "loss": 0.5066, "step": 87780 }, { "epoch": 4.3602860832422765, "grad_norm": 0.0927734375, "learning_rate": 0.00045118108671898284, "loss": 0.4942, "step": 87790 }, { "epoch": 4.360782755537896, "grad_norm": 0.1513671875, "learning_rate": 0.0004511413529353333, "loss": 0.5248, "step": 87800 }, { "epoch": 4.361279427833516, "grad_norm": 0.1279296875, "learning_rate": 0.0004511016191516837, "loss": 0.5215, "step": 87810 }, { "epoch": 4.361776100129135, "grad_norm": 0.12451171875, "learning_rate": 0.0004510618853680342, "loss": 0.5075, "step": 87820 }, { "epoch": 4.362272772424754, "grad_norm": 0.1064453125, "learning_rate": 0.00045102215158438467, "loss": 0.5233, "step": 87830 }, { "epoch": 4.3627694447203735, "grad_norm": 0.11865234375, "learning_rate": 0.00045098241780073514, "loss": 0.5262, "step": 87840 }, { "epoch": 4.363266117015993, "grad_norm": 0.11181640625, "learning_rate": 0.0004509426840170855, "loss": 0.5257, "step": 87850 }, { "epoch": 4.363762789311612, "grad_norm": 0.166015625, "learning_rate": 0.00045090295023343603, "loss": 0.5001, "step": 87860 }, { "epoch": 4.364259461607231, "grad_norm": 0.134765625, "learning_rate": 0.0004508632164497865, "loss": 0.531, "step": 87870 }, { "epoch": 4.364756133902851, "grad_norm": 0.1298828125, "learning_rate": 0.00045082348266613687, "loss": 0.5427, "step": 87880 }, { "epoch": 4.365252806198471, "grad_norm": 0.1044921875, "learning_rate": 0.00045078374888248734, "loss": 0.5221, "step": 87890 }, { "epoch": 4.36574947849409, "grad_norm": 0.13671875, "learning_rate": 0.0004507440150988378, "loss": 0.5203, "step": 87900 }, { "epoch": 4.366246150789709, "grad_norm": 0.15234375, "learning_rate": 0.0004507042813151882, "loss": 0.5099, "step": 87910 }, { "epoch": 4.366742823085328, "grad_norm": 0.1552734375, "learning_rate": 0.0004506645475315387, "loss": 0.5242, "step": 87920 }, { "epoch": 4.3672394953809475, "grad_norm": 0.09765625, "learning_rate": 0.00045062481374788917, "loss": 0.5017, "step": 87930 }, { "epoch": 4.367736167676567, "grad_norm": 0.11865234375, "learning_rate": 0.0004505850799642396, "loss": 0.4999, "step": 87940 }, { "epoch": 4.368232839972187, "grad_norm": 0.1689453125, "learning_rate": 0.00045054534618059006, "loss": 0.5277, "step": 87950 }, { "epoch": 4.368729512267806, "grad_norm": 0.1318359375, "learning_rate": 0.00045050561239694053, "loss": 0.5373, "step": 87960 }, { "epoch": 4.369226184563425, "grad_norm": 0.1865234375, "learning_rate": 0.000450465878613291, "loss": 0.5431, "step": 87970 }, { "epoch": 4.3697228568590445, "grad_norm": 0.125, "learning_rate": 0.0004504261448296414, "loss": 0.5276, "step": 87980 }, { "epoch": 4.370219529154664, "grad_norm": 0.11083984375, "learning_rate": 0.0004503864110459919, "loss": 0.5384, "step": 87990 }, { "epoch": 4.370716201450283, "grad_norm": 0.126953125, "learning_rate": 0.00045034667726234236, "loss": 0.5363, "step": 88000 }, { "epoch": 4.371212873745902, "grad_norm": 0.130859375, "learning_rate": 0.0004503069434786928, "loss": 0.5175, "step": 88010 }, { "epoch": 4.371709546041521, "grad_norm": 0.10595703125, "learning_rate": 0.00045026720969504325, "loss": 0.5321, "step": 88020 }, { "epoch": 4.3722062183371415, "grad_norm": 0.1533203125, "learning_rate": 0.0004502274759113937, "loss": 0.5007, "step": 88030 }, { "epoch": 4.372702890632761, "grad_norm": 0.1328125, "learning_rate": 0.0004501877421277441, "loss": 0.5155, "step": 88040 }, { "epoch": 4.37319956292838, "grad_norm": 0.1279296875, "learning_rate": 0.0004501480083440946, "loss": 0.5093, "step": 88050 }, { "epoch": 4.373696235223999, "grad_norm": 0.134765625, "learning_rate": 0.0004501082745604451, "loss": 0.5215, "step": 88060 }, { "epoch": 4.374192907519618, "grad_norm": 0.09423828125, "learning_rate": 0.00045006854077679544, "loss": 0.4951, "step": 88070 }, { "epoch": 4.374689579815238, "grad_norm": 0.10888671875, "learning_rate": 0.0004500288069931459, "loss": 0.5061, "step": 88080 }, { "epoch": 4.375186252110857, "grad_norm": 0.130859375, "learning_rate": 0.00044998907320949644, "loss": 0.5246, "step": 88090 }, { "epoch": 4.375682924406477, "grad_norm": 0.10205078125, "learning_rate": 0.0004499493394258468, "loss": 0.5417, "step": 88100 }, { "epoch": 4.376179596702096, "grad_norm": 0.11767578125, "learning_rate": 0.0004499096056421973, "loss": 0.552, "step": 88110 }, { "epoch": 4.3766762689977154, "grad_norm": 0.11767578125, "learning_rate": 0.00044986987185854774, "loss": 0.5474, "step": 88120 }, { "epoch": 4.377172941293335, "grad_norm": 0.1201171875, "learning_rate": 0.00044983013807489827, "loss": 0.5339, "step": 88130 }, { "epoch": 4.377669613588954, "grad_norm": 0.1337890625, "learning_rate": 0.00044979040429124863, "loss": 0.5036, "step": 88140 }, { "epoch": 4.378166285884573, "grad_norm": 0.11962890625, "learning_rate": 0.0004497506705075991, "loss": 0.5204, "step": 88150 }, { "epoch": 4.378662958180192, "grad_norm": 0.1123046875, "learning_rate": 0.0004497109367239496, "loss": 0.5123, "step": 88160 }, { "epoch": 4.3791596304758125, "grad_norm": 0.1201171875, "learning_rate": 0.0004496712029403, "loss": 0.5498, "step": 88170 }, { "epoch": 4.379656302771432, "grad_norm": 0.123046875, "learning_rate": 0.00044963146915665046, "loss": 0.5111, "step": 88180 }, { "epoch": 4.380152975067051, "grad_norm": 0.142578125, "learning_rate": 0.00044959173537300094, "loss": 0.5254, "step": 88190 }, { "epoch": 4.38064964736267, "grad_norm": 0.11328125, "learning_rate": 0.00044955200158935135, "loss": 0.5242, "step": 88200 }, { "epoch": 4.381146319658289, "grad_norm": 0.17578125, "learning_rate": 0.0004495122678057018, "loss": 0.5093, "step": 88210 }, { "epoch": 4.381642991953909, "grad_norm": 0.1220703125, "learning_rate": 0.0004494725340220523, "loss": 0.5439, "step": 88220 }, { "epoch": 4.382139664249528, "grad_norm": 0.1396484375, "learning_rate": 0.00044943280023840266, "loss": 0.5357, "step": 88230 }, { "epoch": 4.382636336545147, "grad_norm": 0.091796875, "learning_rate": 0.0004493930664547532, "loss": 0.5, "step": 88240 }, { "epoch": 4.383133008840767, "grad_norm": 0.1123046875, "learning_rate": 0.00044935333267110366, "loss": 0.5414, "step": 88250 }, { "epoch": 4.383629681136386, "grad_norm": 0.2080078125, "learning_rate": 0.0004493135988874541, "loss": 0.5203, "step": 88260 }, { "epoch": 4.384126353432006, "grad_norm": 0.1357421875, "learning_rate": 0.0004492738651038045, "loss": 0.5286, "step": 88270 }, { "epoch": 4.384623025727625, "grad_norm": 0.12451171875, "learning_rate": 0.000449234131320155, "loss": 0.5242, "step": 88280 }, { "epoch": 4.385119698023244, "grad_norm": 0.1435546875, "learning_rate": 0.0004491943975365055, "loss": 0.5384, "step": 88290 }, { "epoch": 4.385616370318863, "grad_norm": 0.123046875, "learning_rate": 0.00044915466375285585, "loss": 0.5252, "step": 88300 }, { "epoch": 4.3861130426144825, "grad_norm": 0.1142578125, "learning_rate": 0.0004491149299692063, "loss": 0.5113, "step": 88310 }, { "epoch": 4.386609714910103, "grad_norm": 0.09619140625, "learning_rate": 0.00044907519618555685, "loss": 0.5088, "step": 88320 }, { "epoch": 4.387106387205722, "grad_norm": 0.11962890625, "learning_rate": 0.0004490354624019072, "loss": 0.5234, "step": 88330 }, { "epoch": 4.387603059501341, "grad_norm": 0.1162109375, "learning_rate": 0.0004489957286182577, "loss": 0.488, "step": 88340 }, { "epoch": 4.38809973179696, "grad_norm": 0.1279296875, "learning_rate": 0.00044895599483460815, "loss": 0.5555, "step": 88350 }, { "epoch": 4.38859640409258, "grad_norm": 0.10888671875, "learning_rate": 0.00044891626105095857, "loss": 0.5122, "step": 88360 }, { "epoch": 4.389093076388199, "grad_norm": 0.1435546875, "learning_rate": 0.00044887652726730904, "loss": 0.5252, "step": 88370 }, { "epoch": 4.389589748683818, "grad_norm": 0.111328125, "learning_rate": 0.0004488367934836595, "loss": 0.538, "step": 88380 }, { "epoch": 4.390086420979438, "grad_norm": 0.123046875, "learning_rate": 0.00044879705970000993, "loss": 0.4978, "step": 88390 }, { "epoch": 4.390583093275057, "grad_norm": 0.1279296875, "learning_rate": 0.0004487573259163604, "loss": 0.5182, "step": 88400 }, { "epoch": 4.391079765570677, "grad_norm": 0.12451171875, "learning_rate": 0.00044871759213271087, "loss": 0.5699, "step": 88410 }, { "epoch": 4.391576437866296, "grad_norm": 0.1279296875, "learning_rate": 0.00044867785834906134, "loss": 0.5379, "step": 88420 }, { "epoch": 4.392073110161915, "grad_norm": 0.11572265625, "learning_rate": 0.00044863812456541176, "loss": 0.5178, "step": 88430 }, { "epoch": 4.392569782457534, "grad_norm": 0.140625, "learning_rate": 0.00044859839078176223, "loss": 0.5005, "step": 88440 }, { "epoch": 4.3930664547531535, "grad_norm": 0.11767578125, "learning_rate": 0.0004485586569981127, "loss": 0.516, "step": 88450 }, { "epoch": 4.393563127048774, "grad_norm": 0.11083984375, "learning_rate": 0.0004485189232144631, "loss": 0.538, "step": 88460 }, { "epoch": 4.394059799344393, "grad_norm": 0.115234375, "learning_rate": 0.0004484791894308136, "loss": 0.5167, "step": 88470 }, { "epoch": 4.394556471640012, "grad_norm": 0.11181640625, "learning_rate": 0.00044843945564716406, "loss": 0.5185, "step": 88480 }, { "epoch": 4.395053143935631, "grad_norm": 0.09912109375, "learning_rate": 0.0004483997218635144, "loss": 0.508, "step": 88490 }, { "epoch": 4.3955498162312505, "grad_norm": 0.10546875, "learning_rate": 0.0004483599880798649, "loss": 0.5249, "step": 88500 }, { "epoch": 4.39604648852687, "grad_norm": 0.134765625, "learning_rate": 0.0004483202542962154, "loss": 0.4992, "step": 88510 }, { "epoch": 4.396543160822489, "grad_norm": 0.1298828125, "learning_rate": 0.0004482805205125658, "loss": 0.5404, "step": 88520 }, { "epoch": 4.397039833118109, "grad_norm": 0.10546875, "learning_rate": 0.00044824078672891626, "loss": 0.5283, "step": 88530 }, { "epoch": 4.397536505413728, "grad_norm": 0.1181640625, "learning_rate": 0.00044820105294526673, "loss": 0.4998, "step": 88540 }, { "epoch": 4.3980331777093475, "grad_norm": 0.11669921875, "learning_rate": 0.00044816131916161715, "loss": 0.5101, "step": 88550 }, { "epoch": 4.398529850004967, "grad_norm": 0.1103515625, "learning_rate": 0.0004481215853779676, "loss": 0.5001, "step": 88560 }, { "epoch": 4.399026522300586, "grad_norm": 0.1162109375, "learning_rate": 0.0004480818515943181, "loss": 0.5308, "step": 88570 }, { "epoch": 4.399523194596205, "grad_norm": 0.109375, "learning_rate": 0.00044804211781066856, "loss": 0.5087, "step": 88580 }, { "epoch": 4.4000198668918244, "grad_norm": 0.0986328125, "learning_rate": 0.000448002384027019, "loss": 0.5077, "step": 88590 }, { "epoch": 4.400516539187445, "grad_norm": 0.126953125, "learning_rate": 0.00044796265024336945, "loss": 0.5311, "step": 88600 }, { "epoch": 4.401013211483064, "grad_norm": 0.1259765625, "learning_rate": 0.0004479229164597199, "loss": 0.5168, "step": 88610 }, { "epoch": 4.401509883778683, "grad_norm": 0.1162109375, "learning_rate": 0.00044788318267607034, "loss": 0.5317, "step": 88620 }, { "epoch": 4.402006556074302, "grad_norm": 0.142578125, "learning_rate": 0.0004478434488924208, "loss": 0.5134, "step": 88630 }, { "epoch": 4.4025032283699215, "grad_norm": 0.10986328125, "learning_rate": 0.0004478037151087713, "loss": 0.5415, "step": 88640 }, { "epoch": 4.402999900665541, "grad_norm": 0.1162109375, "learning_rate": 0.0004477639813251217, "loss": 0.4927, "step": 88650 }, { "epoch": 4.40349657296116, "grad_norm": 0.1474609375, "learning_rate": 0.00044772424754147217, "loss": 0.5059, "step": 88660 }, { "epoch": 4.40399324525678, "grad_norm": 0.1669921875, "learning_rate": 0.00044768451375782264, "loss": 0.5184, "step": 88670 }, { "epoch": 4.404489917552399, "grad_norm": 0.10498046875, "learning_rate": 0.000447644779974173, "loss": 0.5118, "step": 88680 }, { "epoch": 4.4049865898480185, "grad_norm": 0.1630859375, "learning_rate": 0.00044760504619052353, "loss": 0.5293, "step": 88690 }, { "epoch": 4.405483262143638, "grad_norm": 0.11083984375, "learning_rate": 0.000447565312406874, "loss": 0.5356, "step": 88700 }, { "epoch": 4.405979934439257, "grad_norm": 0.1201171875, "learning_rate": 0.00044752557862322447, "loss": 0.5143, "step": 88710 }, { "epoch": 4.406476606734876, "grad_norm": 0.1201171875, "learning_rate": 0.00044748584483957483, "loss": 0.5378, "step": 88720 }, { "epoch": 4.406973279030495, "grad_norm": 0.138671875, "learning_rate": 0.00044744611105592536, "loss": 0.5285, "step": 88730 }, { "epoch": 4.407469951326115, "grad_norm": 0.11572265625, "learning_rate": 0.00044740637727227583, "loss": 0.5536, "step": 88740 }, { "epoch": 4.407966623621735, "grad_norm": 0.09912109375, "learning_rate": 0.0004473666434886262, "loss": 0.5143, "step": 88750 }, { "epoch": 4.408463295917354, "grad_norm": 0.126953125, "learning_rate": 0.00044732690970497666, "loss": 0.4971, "step": 88760 }, { "epoch": 4.408959968212973, "grad_norm": 0.10546875, "learning_rate": 0.00044728717592132714, "loss": 0.5375, "step": 88770 }, { "epoch": 4.409456640508592, "grad_norm": 0.1181640625, "learning_rate": 0.00044724744213767755, "loss": 0.5175, "step": 88780 }, { "epoch": 4.409953312804212, "grad_norm": 0.1337890625, "learning_rate": 0.000447207708354028, "loss": 0.5282, "step": 88790 }, { "epoch": 4.410449985099831, "grad_norm": 0.1201171875, "learning_rate": 0.0004471679745703785, "loss": 0.5248, "step": 88800 }, { "epoch": 4.41094665739545, "grad_norm": 0.1044921875, "learning_rate": 0.0004471282407867289, "loss": 0.5178, "step": 88810 }, { "epoch": 4.41144332969107, "grad_norm": 0.1025390625, "learning_rate": 0.0004470885070030794, "loss": 0.4972, "step": 88820 }, { "epoch": 4.4119400019866895, "grad_norm": 0.0966796875, "learning_rate": 0.00044704877321942986, "loss": 0.5102, "step": 88830 }, { "epoch": 4.412436674282309, "grad_norm": 0.11962890625, "learning_rate": 0.0004470090394357803, "loss": 0.5329, "step": 88840 }, { "epoch": 4.412933346577928, "grad_norm": 0.11181640625, "learning_rate": 0.00044696930565213074, "loss": 0.5102, "step": 88850 }, { "epoch": 4.413430018873547, "grad_norm": 0.1064453125, "learning_rate": 0.0004469295718684812, "loss": 0.4967, "step": 88860 }, { "epoch": 4.413926691169166, "grad_norm": 0.140625, "learning_rate": 0.0004468898380848317, "loss": 0.504, "step": 88870 }, { "epoch": 4.414423363464786, "grad_norm": 0.11328125, "learning_rate": 0.0004468501043011821, "loss": 0.5204, "step": 88880 }, { "epoch": 4.414920035760405, "grad_norm": 0.1064453125, "learning_rate": 0.0004468103705175326, "loss": 0.4997, "step": 88890 }, { "epoch": 4.415416708056025, "grad_norm": 0.11376953125, "learning_rate": 0.00044677063673388305, "loss": 0.5183, "step": 88900 }, { "epoch": 4.415913380351644, "grad_norm": 0.130859375, "learning_rate": 0.0004467309029502334, "loss": 0.5336, "step": 88910 }, { "epoch": 4.416410052647263, "grad_norm": 0.1640625, "learning_rate": 0.00044669116916658394, "loss": 0.4998, "step": 88920 }, { "epoch": 4.416906724942883, "grad_norm": 0.1318359375, "learning_rate": 0.0004466514353829344, "loss": 0.4927, "step": 88930 }, { "epoch": 4.417403397238502, "grad_norm": 0.1279296875, "learning_rate": 0.00044661170159928477, "loss": 0.5184, "step": 88940 }, { "epoch": 4.417900069534121, "grad_norm": 0.10595703125, "learning_rate": 0.00044657196781563524, "loss": 0.5013, "step": 88950 }, { "epoch": 4.41839674182974, "grad_norm": 0.099609375, "learning_rate": 0.00044653223403198577, "loss": 0.4943, "step": 88960 }, { "epoch": 4.41889341412536, "grad_norm": 0.12060546875, "learning_rate": 0.00044649250024833613, "loss": 0.5071, "step": 88970 }, { "epoch": 4.41939008642098, "grad_norm": 0.107421875, "learning_rate": 0.0004464527664646866, "loss": 0.5252, "step": 88980 }, { "epoch": 4.419886758716599, "grad_norm": 0.11083984375, "learning_rate": 0.00044641303268103707, "loss": 0.5049, "step": 88990 }, { "epoch": 4.420383431012218, "grad_norm": 0.10693359375, "learning_rate": 0.0004463732988973875, "loss": 0.507, "step": 89000 }, { "epoch": 4.420880103307837, "grad_norm": 0.10791015625, "learning_rate": 0.00044633356511373796, "loss": 0.4965, "step": 89010 }, { "epoch": 4.4213767756034565, "grad_norm": 0.12158203125, "learning_rate": 0.00044629383133008843, "loss": 0.5358, "step": 89020 }, { "epoch": 4.421873447899076, "grad_norm": 0.12109375, "learning_rate": 0.0004462540975464389, "loss": 0.5142, "step": 89030 }, { "epoch": 4.422370120194696, "grad_norm": 0.111328125, "learning_rate": 0.0004462143637627893, "loss": 0.5253, "step": 89040 }, { "epoch": 4.422866792490315, "grad_norm": 0.10302734375, "learning_rate": 0.0004461746299791398, "loss": 0.512, "step": 89050 }, { "epoch": 4.423363464785934, "grad_norm": 0.107421875, "learning_rate": 0.00044613489619549026, "loss": 0.5219, "step": 89060 }, { "epoch": 4.423860137081554, "grad_norm": 0.1123046875, "learning_rate": 0.0004460951624118407, "loss": 0.5303, "step": 89070 }, { "epoch": 4.424356809377173, "grad_norm": 0.11767578125, "learning_rate": 0.00044605542862819115, "loss": 0.5194, "step": 89080 }, { "epoch": 4.424853481672792, "grad_norm": 0.1279296875, "learning_rate": 0.0004460156948445416, "loss": 0.5159, "step": 89090 }, { "epoch": 4.425350153968411, "grad_norm": 0.130859375, "learning_rate": 0.000445975961060892, "loss": 0.5096, "step": 89100 }, { "epoch": 4.425846826264031, "grad_norm": 0.1025390625, "learning_rate": 0.0004459362272772425, "loss": 0.5184, "step": 89110 }, { "epoch": 4.426343498559651, "grad_norm": 0.1162109375, "learning_rate": 0.000445896493493593, "loss": 0.541, "step": 89120 }, { "epoch": 4.42684017085527, "grad_norm": 0.12353515625, "learning_rate": 0.00044585675970994335, "loss": 0.5351, "step": 89130 }, { "epoch": 4.427336843150889, "grad_norm": 0.11865234375, "learning_rate": 0.0004458170259262938, "loss": 0.4929, "step": 89140 }, { "epoch": 4.427833515446508, "grad_norm": 0.1318359375, "learning_rate": 0.00044577729214264434, "loss": 0.4825, "step": 89150 }, { "epoch": 4.4283301877421275, "grad_norm": 0.11083984375, "learning_rate": 0.0004457375583589948, "loss": 0.5049, "step": 89160 }, { "epoch": 4.428826860037747, "grad_norm": 0.107421875, "learning_rate": 0.0004456978245753452, "loss": 0.5024, "step": 89170 }, { "epoch": 4.429323532333367, "grad_norm": 0.1220703125, "learning_rate": 0.00044565809079169565, "loss": 0.5139, "step": 89180 }, { "epoch": 4.429820204628986, "grad_norm": 0.107421875, "learning_rate": 0.0004456183570080462, "loss": 0.516, "step": 89190 }, { "epoch": 4.430316876924605, "grad_norm": 0.13671875, "learning_rate": 0.00044557862322439654, "loss": 0.5034, "step": 89200 }, { "epoch": 4.4308135492202245, "grad_norm": 0.17578125, "learning_rate": 0.000445538889440747, "loss": 0.5133, "step": 89210 }, { "epoch": 4.431310221515844, "grad_norm": 0.130859375, "learning_rate": 0.0004454991556570975, "loss": 0.4981, "step": 89220 }, { "epoch": 4.431806893811463, "grad_norm": 0.11376953125, "learning_rate": 0.0004454594218734479, "loss": 0.5328, "step": 89230 }, { "epoch": 4.432303566107082, "grad_norm": 0.1416015625, "learning_rate": 0.00044541968808979837, "loss": 0.5556, "step": 89240 }, { "epoch": 4.432800238402702, "grad_norm": 0.134765625, "learning_rate": 0.00044537995430614884, "loss": 0.5132, "step": 89250 }, { "epoch": 4.4332969106983215, "grad_norm": 0.1416015625, "learning_rate": 0.00044534022052249926, "loss": 0.519, "step": 89260 }, { "epoch": 4.433793582993941, "grad_norm": 0.11376953125, "learning_rate": 0.00044530048673884973, "loss": 0.5153, "step": 89270 }, { "epoch": 4.43429025528956, "grad_norm": 0.1044921875, "learning_rate": 0.0004452607529552002, "loss": 0.5048, "step": 89280 }, { "epoch": 4.434786927585179, "grad_norm": 0.171875, "learning_rate": 0.0004452210191715506, "loss": 0.5422, "step": 89290 }, { "epoch": 4.4352835998807985, "grad_norm": 0.10302734375, "learning_rate": 0.0004451812853879011, "loss": 0.5017, "step": 89300 }, { "epoch": 4.435780272176418, "grad_norm": 0.140625, "learning_rate": 0.00044514155160425156, "loss": 0.516, "step": 89310 }, { "epoch": 4.436276944472038, "grad_norm": 0.1025390625, "learning_rate": 0.00044510181782060203, "loss": 0.5387, "step": 89320 }, { "epoch": 4.436773616767657, "grad_norm": 0.10546875, "learning_rate": 0.0004450620840369524, "loss": 0.5209, "step": 89330 }, { "epoch": 4.437270289063276, "grad_norm": 0.11669921875, "learning_rate": 0.0004450223502533029, "loss": 0.5306, "step": 89340 }, { "epoch": 4.4377669613588955, "grad_norm": 0.1220703125, "learning_rate": 0.0004449826164696534, "loss": 0.4843, "step": 89350 }, { "epoch": 4.438263633654515, "grad_norm": 0.11669921875, "learning_rate": 0.00044494288268600375, "loss": 0.5431, "step": 89360 }, { "epoch": 4.438760305950134, "grad_norm": 0.11474609375, "learning_rate": 0.0004449031489023542, "loss": 0.538, "step": 89370 }, { "epoch": 4.439256978245753, "grad_norm": 0.12451171875, "learning_rate": 0.00044486341511870475, "loss": 0.5177, "step": 89380 }, { "epoch": 4.439753650541372, "grad_norm": 0.10302734375, "learning_rate": 0.0004448236813350551, "loss": 0.5403, "step": 89390 }, { "epoch": 4.4402503228369925, "grad_norm": 0.12451171875, "learning_rate": 0.0004447839475514056, "loss": 0.5181, "step": 89400 }, { "epoch": 4.440746995132612, "grad_norm": 0.10888671875, "learning_rate": 0.00044474421376775606, "loss": 0.5358, "step": 89410 }, { "epoch": 4.441243667428231, "grad_norm": 0.13671875, "learning_rate": 0.0004447044799841065, "loss": 0.5469, "step": 89420 }, { "epoch": 4.44174033972385, "grad_norm": 0.1259765625, "learning_rate": 0.00044466474620045694, "loss": 0.4976, "step": 89430 }, { "epoch": 4.442237012019469, "grad_norm": 0.1064453125, "learning_rate": 0.0004446250124168074, "loss": 0.5204, "step": 89440 }, { "epoch": 4.442733684315089, "grad_norm": 0.11279296875, "learning_rate": 0.00044458527863315783, "loss": 0.4834, "step": 89450 }, { "epoch": 4.443230356610708, "grad_norm": 0.1171875, "learning_rate": 0.0004445455448495083, "loss": 0.5473, "step": 89460 }, { "epoch": 4.443727028906328, "grad_norm": 0.1328125, "learning_rate": 0.0004445058110658588, "loss": 0.522, "step": 89470 }, { "epoch": 4.444223701201947, "grad_norm": 0.10205078125, "learning_rate": 0.00044446607728220925, "loss": 0.517, "step": 89480 }, { "epoch": 4.444720373497566, "grad_norm": 0.10693359375, "learning_rate": 0.00044442634349855966, "loss": 0.5171, "step": 89490 }, { "epoch": 4.445217045793186, "grad_norm": 0.130859375, "learning_rate": 0.00044438660971491014, "loss": 0.5147, "step": 89500 }, { "epoch": 4.445713718088805, "grad_norm": 0.12109375, "learning_rate": 0.0004443468759312606, "loss": 0.5131, "step": 89510 }, { "epoch": 4.446210390384424, "grad_norm": 0.1259765625, "learning_rate": 0.000444307142147611, "loss": 0.5493, "step": 89520 }, { "epoch": 4.446707062680043, "grad_norm": 0.134765625, "learning_rate": 0.0004442674083639615, "loss": 0.5042, "step": 89530 }, { "epoch": 4.4472037349756635, "grad_norm": 0.11083984375, "learning_rate": 0.00044422767458031197, "loss": 0.5174, "step": 89540 }, { "epoch": 4.447700407271283, "grad_norm": 0.11669921875, "learning_rate": 0.00044418794079666233, "loss": 0.5088, "step": 89550 }, { "epoch": 4.448197079566902, "grad_norm": 0.10498046875, "learning_rate": 0.00044414820701301286, "loss": 0.5215, "step": 89560 }, { "epoch": 4.448693751862521, "grad_norm": 0.1748046875, "learning_rate": 0.0004441084732293633, "loss": 0.5181, "step": 89570 }, { "epoch": 4.44919042415814, "grad_norm": 0.10888671875, "learning_rate": 0.0004440687394457137, "loss": 0.5321, "step": 89580 }, { "epoch": 4.44968709645376, "grad_norm": 0.1376953125, "learning_rate": 0.00044402900566206416, "loss": 0.5109, "step": 89590 }, { "epoch": 4.450183768749379, "grad_norm": 0.1025390625, "learning_rate": 0.00044398927187841463, "loss": 0.5235, "step": 89600 }, { "epoch": 4.450680441044998, "grad_norm": 0.10498046875, "learning_rate": 0.00044394953809476516, "loss": 0.5096, "step": 89610 }, { "epoch": 4.451177113340618, "grad_norm": 0.11279296875, "learning_rate": 0.0004439098043111155, "loss": 0.5184, "step": 89620 }, { "epoch": 4.451673785636237, "grad_norm": 0.142578125, "learning_rate": 0.000443870070527466, "loss": 0.5522, "step": 89630 }, { "epoch": 4.452170457931857, "grad_norm": 0.1201171875, "learning_rate": 0.00044383033674381646, "loss": 0.5298, "step": 89640 }, { "epoch": 4.452667130227476, "grad_norm": 0.11669921875, "learning_rate": 0.0004437906029601669, "loss": 0.5082, "step": 89650 }, { "epoch": 4.453163802523095, "grad_norm": 0.1103515625, "learning_rate": 0.00044375086917651735, "loss": 0.5256, "step": 89660 }, { "epoch": 4.453660474818714, "grad_norm": 0.099609375, "learning_rate": 0.0004437111353928678, "loss": 0.4979, "step": 89670 }, { "epoch": 4.4541571471143335, "grad_norm": 0.1142578125, "learning_rate": 0.00044367140160921824, "loss": 0.5271, "step": 89680 }, { "epoch": 4.454653819409954, "grad_norm": 0.154296875, "learning_rate": 0.0004436316678255687, "loss": 0.5414, "step": 89690 }, { "epoch": 4.455150491705573, "grad_norm": 0.134765625, "learning_rate": 0.0004435919340419192, "loss": 0.5237, "step": 89700 }, { "epoch": 4.455647164001192, "grad_norm": 0.10400390625, "learning_rate": 0.0004435522002582696, "loss": 0.4943, "step": 89710 }, { "epoch": 4.456143836296811, "grad_norm": 0.11376953125, "learning_rate": 0.00044351246647462007, "loss": 0.5289, "step": 89720 }, { "epoch": 4.4566405085924305, "grad_norm": 0.125, "learning_rate": 0.00044347273269097054, "loss": 0.5164, "step": 89730 }, { "epoch": 4.45713718088805, "grad_norm": 0.09521484375, "learning_rate": 0.0004434329989073209, "loss": 0.4989, "step": 89740 }, { "epoch": 4.457633853183669, "grad_norm": 0.1328125, "learning_rate": 0.00044339326512367143, "loss": 0.5271, "step": 89750 }, { "epoch": 4.458130525479289, "grad_norm": 0.1533203125, "learning_rate": 0.0004433535313400219, "loss": 0.504, "step": 89760 }, { "epoch": 4.458627197774908, "grad_norm": 0.12060546875, "learning_rate": 0.0004433137975563724, "loss": 0.5256, "step": 89770 }, { "epoch": 4.459123870070528, "grad_norm": 0.142578125, "learning_rate": 0.00044327406377272274, "loss": 0.5199, "step": 89780 }, { "epoch": 4.459620542366147, "grad_norm": 0.11767578125, "learning_rate": 0.00044323432998907326, "loss": 0.5165, "step": 89790 }, { "epoch": 4.460117214661766, "grad_norm": 0.12451171875, "learning_rate": 0.00044319459620542373, "loss": 0.4881, "step": 89800 }, { "epoch": 4.460613886957385, "grad_norm": 0.111328125, "learning_rate": 0.0004431548624217741, "loss": 0.5022, "step": 89810 }, { "epoch": 4.4611105592530045, "grad_norm": 0.1103515625, "learning_rate": 0.00044311512863812457, "loss": 0.5424, "step": 89820 }, { "epoch": 4.461607231548625, "grad_norm": 0.109375, "learning_rate": 0.0004430753948544751, "loss": 0.4923, "step": 89830 }, { "epoch": 4.462103903844244, "grad_norm": 0.10791015625, "learning_rate": 0.00044303566107082546, "loss": 0.4948, "step": 89840 }, { "epoch": 4.462600576139863, "grad_norm": 0.103515625, "learning_rate": 0.00044299592728717593, "loss": 0.5069, "step": 89850 }, { "epoch": 4.463097248435482, "grad_norm": 0.1533203125, "learning_rate": 0.0004429561935035264, "loss": 0.5038, "step": 89860 }, { "epoch": 4.4635939207311015, "grad_norm": 0.11328125, "learning_rate": 0.0004429164597198768, "loss": 0.5163, "step": 89870 }, { "epoch": 4.464090593026721, "grad_norm": 0.15234375, "learning_rate": 0.0004428767259362273, "loss": 0.526, "step": 89880 }, { "epoch": 4.46458726532234, "grad_norm": 0.11767578125, "learning_rate": 0.00044283699215257776, "loss": 0.5336, "step": 89890 }, { "epoch": 4.46508393761796, "grad_norm": 0.11962890625, "learning_rate": 0.0004427972583689282, "loss": 0.5185, "step": 89900 }, { "epoch": 4.465580609913579, "grad_norm": 0.1474609375, "learning_rate": 0.00044275752458527865, "loss": 0.494, "step": 89910 }, { "epoch": 4.4660772822091985, "grad_norm": 0.11669921875, "learning_rate": 0.0004427177908016291, "loss": 0.5058, "step": 89920 }, { "epoch": 4.466573954504818, "grad_norm": 0.1171875, "learning_rate": 0.0004426780570179796, "loss": 0.5049, "step": 89930 }, { "epoch": 4.467070626800437, "grad_norm": 0.1103515625, "learning_rate": 0.00044263832323433, "loss": 0.5247, "step": 89940 }, { "epoch": 4.467567299096056, "grad_norm": 0.11572265625, "learning_rate": 0.0004425985894506805, "loss": 0.5206, "step": 89950 }, { "epoch": 4.468063971391675, "grad_norm": 0.1416015625, "learning_rate": 0.00044255885566703095, "loss": 0.5468, "step": 89960 }, { "epoch": 4.4685606436872956, "grad_norm": 0.123046875, "learning_rate": 0.0004425191218833813, "loss": 0.5069, "step": 89970 }, { "epoch": 4.469057315982915, "grad_norm": 0.10498046875, "learning_rate": 0.00044247938809973184, "loss": 0.5266, "step": 89980 }, { "epoch": 4.469553988278534, "grad_norm": 0.10888671875, "learning_rate": 0.0004424396543160823, "loss": 0.4976, "step": 89990 }, { "epoch": 4.470050660574153, "grad_norm": 0.119140625, "learning_rate": 0.0004423999205324327, "loss": 0.5109, "step": 90000 }, { "epoch": 4.4705473328697725, "grad_norm": 0.1005859375, "learning_rate": 0.00044236018674878314, "loss": 0.5179, "step": 90010 }, { "epoch": 4.471044005165392, "grad_norm": 0.10107421875, "learning_rate": 0.00044232045296513367, "loss": 0.5047, "step": 90020 }, { "epoch": 4.471540677461011, "grad_norm": 0.1513671875, "learning_rate": 0.00044228071918148403, "loss": 0.528, "step": 90030 }, { "epoch": 4.472037349756631, "grad_norm": 0.1474609375, "learning_rate": 0.0004422409853978345, "loss": 0.5362, "step": 90040 }, { "epoch": 4.47253402205225, "grad_norm": 0.1015625, "learning_rate": 0.000442201251614185, "loss": 0.5376, "step": 90050 }, { "epoch": 4.4730306943478695, "grad_norm": 0.1201171875, "learning_rate": 0.0004421615178305355, "loss": 0.532, "step": 90060 }, { "epoch": 4.473527366643489, "grad_norm": 0.09814453125, "learning_rate": 0.00044212178404688586, "loss": 0.5163, "step": 90070 }, { "epoch": 4.474024038939108, "grad_norm": 0.130859375, "learning_rate": 0.00044208205026323634, "loss": 0.5136, "step": 90080 }, { "epoch": 4.474520711234727, "grad_norm": 0.1279296875, "learning_rate": 0.0004420423164795868, "loss": 0.5068, "step": 90090 }, { "epoch": 4.475017383530346, "grad_norm": 0.1416015625, "learning_rate": 0.0004420025826959372, "loss": 0.5269, "step": 90100 }, { "epoch": 4.475514055825966, "grad_norm": 0.10546875, "learning_rate": 0.0004419628489122877, "loss": 0.5006, "step": 90110 }, { "epoch": 4.476010728121586, "grad_norm": 0.158203125, "learning_rate": 0.00044192311512863817, "loss": 0.4995, "step": 90120 }, { "epoch": 4.476507400417205, "grad_norm": 0.134765625, "learning_rate": 0.0004418833813449886, "loss": 0.5093, "step": 90130 }, { "epoch": 4.477004072712824, "grad_norm": 0.1015625, "learning_rate": 0.00044184364756133906, "loss": 0.5181, "step": 90140 }, { "epoch": 4.477500745008443, "grad_norm": 0.1220703125, "learning_rate": 0.0004418039137776895, "loss": 0.5198, "step": 90150 }, { "epoch": 4.477997417304063, "grad_norm": 0.1220703125, "learning_rate": 0.00044176417999403994, "loss": 0.5444, "step": 90160 }, { "epoch": 4.478494089599682, "grad_norm": 0.126953125, "learning_rate": 0.0004417244462103904, "loss": 0.5314, "step": 90170 }, { "epoch": 4.478990761895301, "grad_norm": 0.142578125, "learning_rate": 0.0004416847124267409, "loss": 0.4931, "step": 90180 }, { "epoch": 4.479487434190921, "grad_norm": 0.1181640625, "learning_rate": 0.00044164497864309125, "loss": 0.536, "step": 90190 }, { "epoch": 4.47998410648654, "grad_norm": 0.1279296875, "learning_rate": 0.0004416052448594417, "loss": 0.4823, "step": 90200 }, { "epoch": 4.48048077878216, "grad_norm": 0.109375, "learning_rate": 0.00044156551107579225, "loss": 0.5047, "step": 90210 }, { "epoch": 4.480977451077779, "grad_norm": 0.11474609375, "learning_rate": 0.0004415257772921427, "loss": 0.531, "step": 90220 }, { "epoch": 4.481474123373398, "grad_norm": 0.11474609375, "learning_rate": 0.0004414860435084931, "loss": 0.5304, "step": 90230 }, { "epoch": 4.481970795669017, "grad_norm": 0.12109375, "learning_rate": 0.00044144630972484355, "loss": 0.4967, "step": 90240 }, { "epoch": 4.482467467964637, "grad_norm": 0.1298828125, "learning_rate": 0.0004414065759411941, "loss": 0.531, "step": 90250 }, { "epoch": 4.482964140260257, "grad_norm": 0.1396484375, "learning_rate": 0.00044136684215754444, "loss": 0.5198, "step": 90260 }, { "epoch": 4.483460812555876, "grad_norm": 0.1455078125, "learning_rate": 0.0004413271083738949, "loss": 0.5248, "step": 90270 }, { "epoch": 4.483957484851495, "grad_norm": 0.1337890625, "learning_rate": 0.0004412873745902454, "loss": 0.5253, "step": 90280 }, { "epoch": 4.484454157147114, "grad_norm": 0.11083984375, "learning_rate": 0.0004412476408065958, "loss": 0.5224, "step": 90290 }, { "epoch": 4.484950829442734, "grad_norm": 0.1123046875, "learning_rate": 0.00044120790702294627, "loss": 0.4993, "step": 90300 }, { "epoch": 4.485447501738353, "grad_norm": 0.10791015625, "learning_rate": 0.00044116817323929674, "loss": 0.543, "step": 90310 }, { "epoch": 4.485944174033972, "grad_norm": 0.1201171875, "learning_rate": 0.00044112843945564716, "loss": 0.5356, "step": 90320 }, { "epoch": 4.486440846329591, "grad_norm": 0.1552734375, "learning_rate": 0.00044108870567199763, "loss": 0.5123, "step": 90330 }, { "epoch": 4.486937518625211, "grad_norm": 0.1025390625, "learning_rate": 0.0004410489718883481, "loss": 0.5289, "step": 90340 }, { "epoch": 4.487434190920831, "grad_norm": 0.10498046875, "learning_rate": 0.0004410092381046985, "loss": 0.5183, "step": 90350 }, { "epoch": 4.48793086321645, "grad_norm": 0.12890625, "learning_rate": 0.000440969504321049, "loss": 0.5404, "step": 90360 }, { "epoch": 4.488427535512069, "grad_norm": 0.125, "learning_rate": 0.00044092977053739946, "loss": 0.5353, "step": 90370 }, { "epoch": 4.488924207807688, "grad_norm": 0.123046875, "learning_rate": 0.00044089003675374993, "loss": 0.5108, "step": 90380 }, { "epoch": 4.4894208801033075, "grad_norm": 0.1064453125, "learning_rate": 0.00044085030297010035, "loss": 0.5424, "step": 90390 }, { "epoch": 4.489917552398927, "grad_norm": 0.15625, "learning_rate": 0.0004408105691864508, "loss": 0.5061, "step": 90400 }, { "epoch": 4.490414224694547, "grad_norm": 0.11181640625, "learning_rate": 0.0004407708354028013, "loss": 0.5274, "step": 90410 }, { "epoch": 4.490910896990166, "grad_norm": 0.1435546875, "learning_rate": 0.00044073110161915166, "loss": 0.5364, "step": 90420 }, { "epoch": 4.491407569285785, "grad_norm": 0.09814453125, "learning_rate": 0.0004406913678355022, "loss": 0.5274, "step": 90430 }, { "epoch": 4.4919042415814046, "grad_norm": 0.150390625, "learning_rate": 0.00044065163405185265, "loss": 0.5184, "step": 90440 }, { "epoch": 4.492400913877024, "grad_norm": 0.107421875, "learning_rate": 0.000440611900268203, "loss": 0.5215, "step": 90450 }, { "epoch": 4.492897586172643, "grad_norm": 0.1083984375, "learning_rate": 0.0004405721664845535, "loss": 0.528, "step": 90460 }, { "epoch": 4.493394258468262, "grad_norm": 0.1044921875, "learning_rate": 0.00044053243270090396, "loss": 0.5049, "step": 90470 }, { "epoch": 4.493890930763882, "grad_norm": 0.10107421875, "learning_rate": 0.0004404926989172544, "loss": 0.5208, "step": 90480 }, { "epoch": 4.494387603059502, "grad_norm": 0.103515625, "learning_rate": 0.00044045296513360485, "loss": 0.5114, "step": 90490 }, { "epoch": 4.494884275355121, "grad_norm": 0.1103515625, "learning_rate": 0.0004404132313499553, "loss": 0.5216, "step": 90500 }, { "epoch": 4.49538094765074, "grad_norm": 0.1259765625, "learning_rate": 0.0004403734975663058, "loss": 0.5159, "step": 90510 }, { "epoch": 4.495877619946359, "grad_norm": 0.12353515625, "learning_rate": 0.0004403337637826562, "loss": 0.5308, "step": 90520 }, { "epoch": 4.4963742922419785, "grad_norm": 0.1318359375, "learning_rate": 0.0004402940299990067, "loss": 0.5129, "step": 90530 }, { "epoch": 4.496870964537598, "grad_norm": 0.1171875, "learning_rate": 0.00044025429621535715, "loss": 0.5012, "step": 90540 }, { "epoch": 4.497367636833218, "grad_norm": 0.16015625, "learning_rate": 0.00044021456243170757, "loss": 0.5065, "step": 90550 }, { "epoch": 4.497864309128837, "grad_norm": 0.10498046875, "learning_rate": 0.00044017482864805804, "loss": 0.5324, "step": 90560 }, { "epoch": 4.498360981424456, "grad_norm": 0.1240234375, "learning_rate": 0.0004401350948644085, "loss": 0.5212, "step": 90570 }, { "epoch": 4.4988576537200755, "grad_norm": 0.11181640625, "learning_rate": 0.00044009536108075893, "loss": 0.4977, "step": 90580 }, { "epoch": 4.499354326015695, "grad_norm": 0.11572265625, "learning_rate": 0.0004400556272971094, "loss": 0.5069, "step": 90590 }, { "epoch": 4.499850998311314, "grad_norm": 0.1181640625, "learning_rate": 0.00044001589351345987, "loss": 0.5481, "step": 90600 }, { "epoch": 4.500347670606933, "grad_norm": 0.11376953125, "learning_rate": 0.00043997615972981023, "loss": 0.5016, "step": 90610 }, { "epoch": 4.500844342902553, "grad_norm": 0.130859375, "learning_rate": 0.00043993642594616076, "loss": 0.5354, "step": 90620 }, { "epoch": 4.5013410151981725, "grad_norm": 0.12109375, "learning_rate": 0.00043989669216251123, "loss": 0.5329, "step": 90630 }, { "epoch": 4.501837687493792, "grad_norm": 0.1103515625, "learning_rate": 0.0004398569583788616, "loss": 0.523, "step": 90640 }, { "epoch": 4.502334359789411, "grad_norm": 0.130859375, "learning_rate": 0.00043981722459521206, "loss": 0.5129, "step": 90650 }, { "epoch": 4.50283103208503, "grad_norm": 0.1201171875, "learning_rate": 0.0004397774908115626, "loss": 0.4894, "step": 90660 }, { "epoch": 4.503327704380649, "grad_norm": 0.1123046875, "learning_rate": 0.00043973775702791306, "loss": 0.5067, "step": 90670 }, { "epoch": 4.503824376676269, "grad_norm": 0.10888671875, "learning_rate": 0.0004396980232442634, "loss": 0.5217, "step": 90680 }, { "epoch": 4.504321048971889, "grad_norm": 0.119140625, "learning_rate": 0.0004396582894606139, "loss": 0.519, "step": 90690 }, { "epoch": 4.504817721267508, "grad_norm": 0.1240234375, "learning_rate": 0.00043961855567696437, "loss": 0.4938, "step": 90700 }, { "epoch": 4.505314393563127, "grad_norm": 0.11376953125, "learning_rate": 0.0004395788218933148, "loss": 0.5179, "step": 90710 }, { "epoch": 4.5058110658587465, "grad_norm": 0.1357421875, "learning_rate": 0.00043953908810966526, "loss": 0.4878, "step": 90720 }, { "epoch": 4.506307738154366, "grad_norm": 0.14453125, "learning_rate": 0.0004394993543260157, "loss": 0.5139, "step": 90730 }, { "epoch": 4.506804410449985, "grad_norm": 0.12158203125, "learning_rate": 0.00043945962054236614, "loss": 0.5121, "step": 90740 }, { "epoch": 4.507301082745604, "grad_norm": 0.11767578125, "learning_rate": 0.0004394198867587166, "loss": 0.4962, "step": 90750 }, { "epoch": 4.507797755041224, "grad_norm": 0.12060546875, "learning_rate": 0.0004393801529750671, "loss": 0.4949, "step": 90760 }, { "epoch": 4.5082944273368435, "grad_norm": 0.1484375, "learning_rate": 0.0004393404191914175, "loss": 0.5008, "step": 90770 }, { "epoch": 4.508791099632463, "grad_norm": 0.1005859375, "learning_rate": 0.000439300685407768, "loss": 0.5548, "step": 90780 }, { "epoch": 4.509287771928082, "grad_norm": 0.11376953125, "learning_rate": 0.00043926095162411845, "loss": 0.5179, "step": 90790 }, { "epoch": 4.509784444223701, "grad_norm": 0.10498046875, "learning_rate": 0.0004392212178404689, "loss": 0.518, "step": 90800 }, { "epoch": 4.51028111651932, "grad_norm": 0.1103515625, "learning_rate": 0.00043918148405681934, "loss": 0.53, "step": 90810 }, { "epoch": 4.51077778881494, "grad_norm": 0.12060546875, "learning_rate": 0.0004391417502731698, "loss": 0.4934, "step": 90820 }, { "epoch": 4.51127446111056, "grad_norm": 0.1318359375, "learning_rate": 0.0004391020164895203, "loss": 0.5173, "step": 90830 }, { "epoch": 4.511771133406179, "grad_norm": 0.1064453125, "learning_rate": 0.00043906228270587064, "loss": 0.5262, "step": 90840 }, { "epoch": 4.512267805701798, "grad_norm": 0.09814453125, "learning_rate": 0.00043902254892222117, "loss": 0.4987, "step": 90850 }, { "epoch": 4.512764477997417, "grad_norm": 0.1123046875, "learning_rate": 0.00043898281513857164, "loss": 0.5014, "step": 90860 }, { "epoch": 4.513261150293037, "grad_norm": 0.115234375, "learning_rate": 0.000438943081354922, "loss": 0.5175, "step": 90870 }, { "epoch": 4.513757822588656, "grad_norm": 0.146484375, "learning_rate": 0.00043890334757127247, "loss": 0.5199, "step": 90880 }, { "epoch": 4.514254494884275, "grad_norm": 0.12060546875, "learning_rate": 0.000438863613787623, "loss": 0.5094, "step": 90890 }, { "epoch": 4.514751167179894, "grad_norm": 0.12890625, "learning_rate": 0.00043882388000397336, "loss": 0.4907, "step": 90900 }, { "epoch": 4.515247839475514, "grad_norm": 0.1064453125, "learning_rate": 0.00043878414622032383, "loss": 0.5088, "step": 90910 }, { "epoch": 4.515744511771134, "grad_norm": 0.10888671875, "learning_rate": 0.0004387444124366743, "loss": 0.506, "step": 90920 }, { "epoch": 4.516241184066753, "grad_norm": 0.11279296875, "learning_rate": 0.0004387046786530247, "loss": 0.504, "step": 90930 }, { "epoch": 4.516737856362372, "grad_norm": 0.1484375, "learning_rate": 0.0004386649448693752, "loss": 0.5338, "step": 90940 }, { "epoch": 4.517234528657991, "grad_norm": 0.103515625, "learning_rate": 0.00043862521108572566, "loss": 0.5308, "step": 90950 }, { "epoch": 4.517731200953611, "grad_norm": 0.125, "learning_rate": 0.00043858547730207613, "loss": 0.521, "step": 90960 }, { "epoch": 4.51822787324923, "grad_norm": 0.16015625, "learning_rate": 0.00043854574351842655, "loss": 0.5086, "step": 90970 }, { "epoch": 4.518724545544849, "grad_norm": 0.142578125, "learning_rate": 0.000438506009734777, "loss": 0.5313, "step": 90980 }, { "epoch": 4.519221217840469, "grad_norm": 0.109375, "learning_rate": 0.0004384662759511275, "loss": 0.5356, "step": 90990 }, { "epoch": 4.519717890136088, "grad_norm": 0.1337890625, "learning_rate": 0.0004384265421674779, "loss": 0.5374, "step": 91000 }, { "epoch": 4.520214562431708, "grad_norm": 0.103515625, "learning_rate": 0.0004383868083838284, "loss": 0.5043, "step": 91010 }, { "epoch": 4.520711234727327, "grad_norm": 0.11962890625, "learning_rate": 0.00043834707460017885, "loss": 0.5101, "step": 91020 }, { "epoch": 4.521207907022946, "grad_norm": 0.11376953125, "learning_rate": 0.0004383073408165292, "loss": 0.5411, "step": 91030 }, { "epoch": 4.521704579318565, "grad_norm": 0.1357421875, "learning_rate": 0.00043826760703287974, "loss": 0.5164, "step": 91040 }, { "epoch": 4.5222012516141845, "grad_norm": 0.1142578125, "learning_rate": 0.0004382278732492302, "loss": 0.5594, "step": 91050 }, { "epoch": 4.522697923909805, "grad_norm": 0.109375, "learning_rate": 0.0004381881394655806, "loss": 0.4987, "step": 91060 }, { "epoch": 4.523194596205424, "grad_norm": 0.134765625, "learning_rate": 0.00043814840568193105, "loss": 0.5175, "step": 91070 }, { "epoch": 4.523691268501043, "grad_norm": 0.10986328125, "learning_rate": 0.0004381086718982816, "loss": 0.5346, "step": 91080 }, { "epoch": 4.524187940796662, "grad_norm": 0.1767578125, "learning_rate": 0.00043806893811463194, "loss": 0.5303, "step": 91090 }, { "epoch": 4.5246846130922815, "grad_norm": 0.1103515625, "learning_rate": 0.0004380292043309824, "loss": 0.5066, "step": 91100 }, { "epoch": 4.525181285387901, "grad_norm": 0.1162109375, "learning_rate": 0.0004379894705473329, "loss": 0.5155, "step": 91110 }, { "epoch": 4.52567795768352, "grad_norm": 0.10986328125, "learning_rate": 0.0004379497367636834, "loss": 0.5041, "step": 91120 }, { "epoch": 4.52617462997914, "grad_norm": 0.10205078125, "learning_rate": 0.00043791000298003377, "loss": 0.4968, "step": 91130 }, { "epoch": 4.526671302274759, "grad_norm": 0.11474609375, "learning_rate": 0.00043787026919638424, "loss": 0.5027, "step": 91140 }, { "epoch": 4.527167974570379, "grad_norm": 0.1943359375, "learning_rate": 0.0004378305354127347, "loss": 0.5014, "step": 91150 }, { "epoch": 4.527664646865998, "grad_norm": 0.1123046875, "learning_rate": 0.00043779080162908513, "loss": 0.5132, "step": 91160 }, { "epoch": 4.528161319161617, "grad_norm": 0.10595703125, "learning_rate": 0.0004377510678454356, "loss": 0.5287, "step": 91170 }, { "epoch": 4.528657991457236, "grad_norm": 0.0986328125, "learning_rate": 0.00043771133406178607, "loss": 0.5218, "step": 91180 }, { "epoch": 4.5291546637528555, "grad_norm": 0.1044921875, "learning_rate": 0.0004376716002781365, "loss": 0.532, "step": 91190 }, { "epoch": 4.529651336048476, "grad_norm": 0.130859375, "learning_rate": 0.00043763186649448696, "loss": 0.5352, "step": 91200 }, { "epoch": 4.530148008344095, "grad_norm": 0.228515625, "learning_rate": 0.00043759213271083743, "loss": 0.5376, "step": 91210 }, { "epoch": 4.530644680639714, "grad_norm": 0.140625, "learning_rate": 0.00043755239892718785, "loss": 0.5245, "step": 91220 }, { "epoch": 4.531141352935333, "grad_norm": 0.10400390625, "learning_rate": 0.0004375126651435383, "loss": 0.535, "step": 91230 }, { "epoch": 4.5316380252309525, "grad_norm": 0.12353515625, "learning_rate": 0.0004374729313598888, "loss": 0.5091, "step": 91240 }, { "epoch": 4.532134697526572, "grad_norm": 0.1474609375, "learning_rate": 0.00043743319757623926, "loss": 0.5041, "step": 91250 }, { "epoch": 4.532631369822191, "grad_norm": 0.1259765625, "learning_rate": 0.0004373934637925897, "loss": 0.5173, "step": 91260 }, { "epoch": 4.533128042117811, "grad_norm": 0.11376953125, "learning_rate": 0.00043735373000894015, "loss": 0.5157, "step": 91270 }, { "epoch": 4.53362471441343, "grad_norm": 0.11767578125, "learning_rate": 0.0004373139962252906, "loss": 0.5, "step": 91280 }, { "epoch": 4.5341213867090495, "grad_norm": 0.109375, "learning_rate": 0.000437274262441641, "loss": 0.532, "step": 91290 }, { "epoch": 4.534618059004669, "grad_norm": 0.10888671875, "learning_rate": 0.00043723452865799146, "loss": 0.5365, "step": 91300 }, { "epoch": 4.535114731300288, "grad_norm": 0.10595703125, "learning_rate": 0.000437194794874342, "loss": 0.5145, "step": 91310 }, { "epoch": 4.535611403595907, "grad_norm": 0.10693359375, "learning_rate": 0.00043715506109069234, "loss": 0.5214, "step": 91320 }, { "epoch": 4.536108075891526, "grad_norm": 0.10595703125, "learning_rate": 0.0004371153273070428, "loss": 0.512, "step": 91330 }, { "epoch": 4.5366047481871465, "grad_norm": 0.12109375, "learning_rate": 0.0004370755935233933, "loss": 0.5114, "step": 91340 }, { "epoch": 4.537101420482766, "grad_norm": 0.10595703125, "learning_rate": 0.0004370358597397437, "loss": 0.5316, "step": 91350 }, { "epoch": 4.537598092778385, "grad_norm": 0.150390625, "learning_rate": 0.0004369961259560942, "loss": 0.5055, "step": 91360 }, { "epoch": 4.538094765074004, "grad_norm": 0.119140625, "learning_rate": 0.00043695639217244465, "loss": 0.5538, "step": 91370 }, { "epoch": 4.5385914373696234, "grad_norm": 0.1396484375, "learning_rate": 0.00043691665838879506, "loss": 0.5058, "step": 91380 }, { "epoch": 4.539088109665243, "grad_norm": 0.12353515625, "learning_rate": 0.00043687692460514554, "loss": 0.52, "step": 91390 }, { "epoch": 4.539584781960862, "grad_norm": 0.107421875, "learning_rate": 0.000436837190821496, "loss": 0.5364, "step": 91400 }, { "epoch": 4.540081454256482, "grad_norm": 0.11376953125, "learning_rate": 0.0004367974570378465, "loss": 0.5169, "step": 91410 }, { "epoch": 4.540578126552101, "grad_norm": 0.11376953125, "learning_rate": 0.0004367577232541969, "loss": 0.5479, "step": 91420 }, { "epoch": 4.5410747988477205, "grad_norm": 0.107421875, "learning_rate": 0.00043671798947054737, "loss": 0.5127, "step": 91430 }, { "epoch": 4.54157147114334, "grad_norm": 0.1845703125, "learning_rate": 0.00043667825568689784, "loss": 0.5051, "step": 91440 }, { "epoch": 4.542068143438959, "grad_norm": 0.10791015625, "learning_rate": 0.00043663852190324826, "loss": 0.5273, "step": 91450 }, { "epoch": 4.542564815734578, "grad_norm": 0.1279296875, "learning_rate": 0.0004365987881195987, "loss": 0.549, "step": 91460 }, { "epoch": 4.543061488030197, "grad_norm": 0.12255859375, "learning_rate": 0.0004365590543359492, "loss": 0.5372, "step": 91470 }, { "epoch": 4.5435581603258175, "grad_norm": 0.1552734375, "learning_rate": 0.00043651932055229956, "loss": 0.4916, "step": 91480 }, { "epoch": 4.544054832621437, "grad_norm": 0.1025390625, "learning_rate": 0.0004364795867686501, "loss": 0.5001, "step": 91490 }, { "epoch": 4.544551504917056, "grad_norm": 0.1083984375, "learning_rate": 0.00043643985298500056, "loss": 0.5066, "step": 91500 }, { "epoch": 4.545048177212675, "grad_norm": 0.1328125, "learning_rate": 0.0004364001192013509, "loss": 0.481, "step": 91510 }, { "epoch": 4.545544849508294, "grad_norm": 0.1484375, "learning_rate": 0.0004363603854177014, "loss": 0.5047, "step": 91520 }, { "epoch": 4.546041521803914, "grad_norm": 0.1611328125, "learning_rate": 0.0004363206516340519, "loss": 0.5525, "step": 91530 }, { "epoch": 4.546538194099533, "grad_norm": 0.119140625, "learning_rate": 0.0004362809178504023, "loss": 0.52, "step": 91540 }, { "epoch": 4.547034866395153, "grad_norm": 0.10400390625, "learning_rate": 0.00043624118406675275, "loss": 0.4791, "step": 91550 }, { "epoch": 4.547531538690772, "grad_norm": 0.1357421875, "learning_rate": 0.0004362014502831032, "loss": 0.5197, "step": 91560 }, { "epoch": 4.548028210986391, "grad_norm": 0.11279296875, "learning_rate": 0.0004361617164994537, "loss": 0.4996, "step": 91570 }, { "epoch": 4.548524883282011, "grad_norm": 0.1201171875, "learning_rate": 0.0004361219827158041, "loss": 0.5043, "step": 91580 }, { "epoch": 4.54902155557763, "grad_norm": 0.1015625, "learning_rate": 0.0004360822489321546, "loss": 0.5015, "step": 91590 }, { "epoch": 4.549518227873249, "grad_norm": 0.1171875, "learning_rate": 0.00043604251514850505, "loss": 0.5149, "step": 91600 }, { "epoch": 4.550014900168868, "grad_norm": 0.1005859375, "learning_rate": 0.00043600278136485547, "loss": 0.5205, "step": 91610 }, { "epoch": 4.550511572464488, "grad_norm": 0.1484375, "learning_rate": 0.00043596304758120594, "loss": 0.5549, "step": 91620 }, { "epoch": 4.551008244760107, "grad_norm": 0.1201171875, "learning_rate": 0.0004359233137975564, "loss": 0.4934, "step": 91630 }, { "epoch": 4.551504917055727, "grad_norm": 0.1123046875, "learning_rate": 0.00043588358001390683, "loss": 0.5018, "step": 91640 }, { "epoch": 4.552001589351346, "grad_norm": 0.1064453125, "learning_rate": 0.0004358438462302573, "loss": 0.5088, "step": 91650 }, { "epoch": 4.552498261646965, "grad_norm": 0.14453125, "learning_rate": 0.0004358041124466078, "loss": 0.5304, "step": 91660 }, { "epoch": 4.552994933942585, "grad_norm": 0.1044921875, "learning_rate": 0.00043576437866295814, "loss": 0.5237, "step": 91670 }, { "epoch": 4.553491606238204, "grad_norm": 0.15625, "learning_rate": 0.00043572464487930866, "loss": 0.4961, "step": 91680 }, { "epoch": 4.553988278533823, "grad_norm": 0.140625, "learning_rate": 0.00043568491109565913, "loss": 0.507, "step": 91690 }, { "epoch": 4.554484950829442, "grad_norm": 0.107421875, "learning_rate": 0.0004356451773120096, "loss": 0.493, "step": 91700 }, { "epoch": 4.554981623125062, "grad_norm": 0.11279296875, "learning_rate": 0.00043560544352835997, "loss": 0.5198, "step": 91710 }, { "epoch": 4.555478295420682, "grad_norm": 0.130859375, "learning_rate": 0.0004355657097447105, "loss": 0.4914, "step": 91720 }, { "epoch": 4.555974967716301, "grad_norm": 0.12353515625, "learning_rate": 0.00043552597596106097, "loss": 0.5304, "step": 91730 }, { "epoch": 4.55647164001192, "grad_norm": 0.103515625, "learning_rate": 0.00043548624217741133, "loss": 0.5141, "step": 91740 }, { "epoch": 4.556968312307539, "grad_norm": 0.0986328125, "learning_rate": 0.0004354465083937618, "loss": 0.5095, "step": 91750 }, { "epoch": 4.5574649846031585, "grad_norm": 0.09130859375, "learning_rate": 0.0004354067746101123, "loss": 0.5133, "step": 91760 }, { "epoch": 4.557961656898778, "grad_norm": 0.10595703125, "learning_rate": 0.0004353670408264627, "loss": 0.4957, "step": 91770 }, { "epoch": 4.558458329194398, "grad_norm": 0.115234375, "learning_rate": 0.00043532730704281316, "loss": 0.5412, "step": 91780 }, { "epoch": 4.558955001490017, "grad_norm": 0.0986328125, "learning_rate": 0.00043528757325916363, "loss": 0.5023, "step": 91790 }, { "epoch": 4.559451673785636, "grad_norm": 0.11376953125, "learning_rate": 0.00043524783947551405, "loss": 0.5153, "step": 91800 }, { "epoch": 4.5599483460812555, "grad_norm": 0.1064453125, "learning_rate": 0.0004352081056918645, "loss": 0.5302, "step": 91810 }, { "epoch": 4.560445018376875, "grad_norm": 0.1640625, "learning_rate": 0.000435168371908215, "loss": 0.5443, "step": 91820 }, { "epoch": 4.560941690672494, "grad_norm": 0.1328125, "learning_rate": 0.0004351286381245654, "loss": 0.5454, "step": 91830 }, { "epoch": 4.561438362968113, "grad_norm": 0.12109375, "learning_rate": 0.0004350889043409159, "loss": 0.5493, "step": 91840 }, { "epoch": 4.561935035263733, "grad_norm": 0.1064453125, "learning_rate": 0.00043504917055726635, "loss": 0.5348, "step": 91850 }, { "epoch": 4.562431707559353, "grad_norm": 0.10302734375, "learning_rate": 0.0004350094367736168, "loss": 0.5078, "step": 91860 }, { "epoch": 4.562928379854972, "grad_norm": 0.1142578125, "learning_rate": 0.00043496970298996724, "loss": 0.5058, "step": 91870 }, { "epoch": 4.563425052150591, "grad_norm": 0.13671875, "learning_rate": 0.0004349299692063177, "loss": 0.509, "step": 91880 }, { "epoch": 4.56392172444621, "grad_norm": 0.12158203125, "learning_rate": 0.0004348902354226682, "loss": 0.5261, "step": 91890 }, { "epoch": 4.5644183967418295, "grad_norm": 0.12353515625, "learning_rate": 0.00043485050163901854, "loss": 0.5089, "step": 91900 }, { "epoch": 4.564915069037449, "grad_norm": 0.1279296875, "learning_rate": 0.00043481076785536907, "loss": 0.5048, "step": 91910 }, { "epoch": 4.565411741333069, "grad_norm": 0.11181640625, "learning_rate": 0.00043477103407171954, "loss": 0.5006, "step": 91920 }, { "epoch": 4.565908413628688, "grad_norm": 0.1064453125, "learning_rate": 0.0004347313002880699, "loss": 0.4904, "step": 91930 }, { "epoch": 4.566405085924307, "grad_norm": 0.103515625, "learning_rate": 0.0004346915665044204, "loss": 0.5116, "step": 91940 }, { "epoch": 4.5669017582199265, "grad_norm": 0.12353515625, "learning_rate": 0.0004346518327207709, "loss": 0.5094, "step": 91950 }, { "epoch": 4.567398430515546, "grad_norm": 0.1162109375, "learning_rate": 0.00043461209893712126, "loss": 0.4969, "step": 91960 }, { "epoch": 4.567895102811165, "grad_norm": 0.10888671875, "learning_rate": 0.00043457236515347174, "loss": 0.512, "step": 91970 }, { "epoch": 4.568391775106784, "grad_norm": 0.111328125, "learning_rate": 0.0004345326313698222, "loss": 0.5013, "step": 91980 }, { "epoch": 4.568888447402404, "grad_norm": 0.10791015625, "learning_rate": 0.0004344928975861726, "loss": 0.504, "step": 91990 }, { "epoch": 4.5693851196980235, "grad_norm": 0.10498046875, "learning_rate": 0.0004344531638025231, "loss": 0.5093, "step": 92000 }, { "epoch": 4.569881791993643, "grad_norm": 0.126953125, "learning_rate": 0.00043441343001887357, "loss": 0.5301, "step": 92010 }, { "epoch": 4.570378464289262, "grad_norm": 0.126953125, "learning_rate": 0.00043437369623522404, "loss": 0.5294, "step": 92020 }, { "epoch": 4.570875136584881, "grad_norm": 0.1123046875, "learning_rate": 0.00043433396245157446, "loss": 0.501, "step": 92030 }, { "epoch": 4.5713718088805, "grad_norm": 0.1591796875, "learning_rate": 0.0004342942286679249, "loss": 0.5225, "step": 92040 }, { "epoch": 4.57186848117612, "grad_norm": 0.1455078125, "learning_rate": 0.0004342544948842754, "loss": 0.5365, "step": 92050 }, { "epoch": 4.57236515347174, "grad_norm": 0.1103515625, "learning_rate": 0.0004342147611006258, "loss": 0.5126, "step": 92060 }, { "epoch": 4.572861825767359, "grad_norm": 0.1025390625, "learning_rate": 0.0004341750273169763, "loss": 0.5127, "step": 92070 }, { "epoch": 4.573358498062978, "grad_norm": 0.15625, "learning_rate": 0.00043413529353332676, "loss": 0.5216, "step": 92080 }, { "epoch": 4.5738551703585975, "grad_norm": 0.15625, "learning_rate": 0.0004340955597496772, "loss": 0.4977, "step": 92090 }, { "epoch": 4.574351842654217, "grad_norm": 0.11572265625, "learning_rate": 0.00043405582596602765, "loss": 0.5137, "step": 92100 }, { "epoch": 4.574848514949836, "grad_norm": 0.10546875, "learning_rate": 0.0004340160921823781, "loss": 0.5194, "step": 92110 }, { "epoch": 4.575345187245455, "grad_norm": 0.1259765625, "learning_rate": 0.0004339763583987285, "loss": 0.5342, "step": 92120 }, { "epoch": 4.575841859541075, "grad_norm": 0.1357421875, "learning_rate": 0.000433936624615079, "loss": 0.5041, "step": 92130 }, { "epoch": 4.5763385318366945, "grad_norm": 0.12158203125, "learning_rate": 0.0004338968908314295, "loss": 0.5216, "step": 92140 }, { "epoch": 4.576835204132314, "grad_norm": 0.10595703125, "learning_rate": 0.00043385715704777995, "loss": 0.494, "step": 92150 }, { "epoch": 4.577331876427933, "grad_norm": 0.1240234375, "learning_rate": 0.0004338174232641303, "loss": 0.5261, "step": 92160 }, { "epoch": 4.577828548723552, "grad_norm": 0.150390625, "learning_rate": 0.0004337776894804808, "loss": 0.5084, "step": 92170 }, { "epoch": 4.578325221019171, "grad_norm": 0.130859375, "learning_rate": 0.0004337379556968313, "loss": 0.5247, "step": 92180 }, { "epoch": 4.578821893314791, "grad_norm": 0.10400390625, "learning_rate": 0.00043369822191318167, "loss": 0.5207, "step": 92190 }, { "epoch": 4.579318565610411, "grad_norm": 0.1044921875, "learning_rate": 0.00043365848812953214, "loss": 0.5027, "step": 92200 }, { "epoch": 4.57981523790603, "grad_norm": 0.11083984375, "learning_rate": 0.0004336187543458826, "loss": 0.5522, "step": 92210 }, { "epoch": 4.580311910201649, "grad_norm": 0.11474609375, "learning_rate": 0.00043357902056223303, "loss": 0.5035, "step": 92220 }, { "epoch": 4.580808582497268, "grad_norm": 0.10400390625, "learning_rate": 0.0004335392867785835, "loss": 0.5145, "step": 92230 }, { "epoch": 4.581305254792888, "grad_norm": 0.1123046875, "learning_rate": 0.000433499552994934, "loss": 0.5112, "step": 92240 }, { "epoch": 4.581801927088507, "grad_norm": 0.10400390625, "learning_rate": 0.0004334598192112844, "loss": 0.5067, "step": 92250 }, { "epoch": 4.582298599384126, "grad_norm": 0.1064453125, "learning_rate": 0.00043342008542763486, "loss": 0.5239, "step": 92260 }, { "epoch": 4.582795271679745, "grad_norm": 0.125, "learning_rate": 0.00043338035164398533, "loss": 0.5027, "step": 92270 }, { "epoch": 4.583291943975365, "grad_norm": 0.1513671875, "learning_rate": 0.00043334061786033575, "loss": 0.48, "step": 92280 }, { "epoch": 4.583788616270985, "grad_norm": 0.115234375, "learning_rate": 0.0004333008840766862, "loss": 0.5186, "step": 92290 }, { "epoch": 4.584285288566604, "grad_norm": 0.1259765625, "learning_rate": 0.0004332611502930367, "loss": 0.5166, "step": 92300 }, { "epoch": 4.584781960862223, "grad_norm": 0.109375, "learning_rate": 0.00043322141650938717, "loss": 0.5059, "step": 92310 }, { "epoch": 4.585278633157842, "grad_norm": 0.1328125, "learning_rate": 0.0004331816827257376, "loss": 0.5462, "step": 92320 }, { "epoch": 4.585775305453462, "grad_norm": 0.1572265625, "learning_rate": 0.00043314194894208805, "loss": 0.4904, "step": 92330 }, { "epoch": 4.586271977749081, "grad_norm": 0.12890625, "learning_rate": 0.0004331022151584385, "loss": 0.5082, "step": 92340 }, { "epoch": 4.5867686500447, "grad_norm": 0.1181640625, "learning_rate": 0.0004330624813747889, "loss": 0.5404, "step": 92350 }, { "epoch": 4.58726532234032, "grad_norm": 0.1474609375, "learning_rate": 0.0004330227475911394, "loss": 0.5415, "step": 92360 }, { "epoch": 4.587761994635939, "grad_norm": 0.1259765625, "learning_rate": 0.0004329830138074899, "loss": 0.5209, "step": 92370 }, { "epoch": 4.588258666931559, "grad_norm": 0.130859375, "learning_rate": 0.00043294328002384025, "loss": 0.5144, "step": 92380 }, { "epoch": 4.588755339227178, "grad_norm": 0.10888671875, "learning_rate": 0.0004329035462401907, "loss": 0.4951, "step": 92390 }, { "epoch": 4.589252011522797, "grad_norm": 0.10986328125, "learning_rate": 0.0004328638124565412, "loss": 0.506, "step": 92400 }, { "epoch": 4.589748683818416, "grad_norm": 0.11328125, "learning_rate": 0.0004328240786728916, "loss": 0.4986, "step": 92410 }, { "epoch": 4.5902453561140355, "grad_norm": 0.1435546875, "learning_rate": 0.0004327843448892421, "loss": 0.5077, "step": 92420 }, { "epoch": 4.590742028409656, "grad_norm": 0.15625, "learning_rate": 0.00043274461110559255, "loss": 0.5016, "step": 92430 }, { "epoch": 4.591238700705275, "grad_norm": 0.10400390625, "learning_rate": 0.00043270487732194297, "loss": 0.5008, "step": 92440 }, { "epoch": 4.591735373000894, "grad_norm": 0.10986328125, "learning_rate": 0.00043266514353829344, "loss": 0.5352, "step": 92450 }, { "epoch": 4.592232045296513, "grad_norm": 0.1083984375, "learning_rate": 0.0004326254097546439, "loss": 0.5004, "step": 92460 }, { "epoch": 4.5927287175921325, "grad_norm": 0.111328125, "learning_rate": 0.0004325856759709944, "loss": 0.5296, "step": 92470 }, { "epoch": 4.593225389887752, "grad_norm": 0.1337890625, "learning_rate": 0.0004325459421873448, "loss": 0.5047, "step": 92480 }, { "epoch": 4.593722062183371, "grad_norm": 0.1220703125, "learning_rate": 0.00043250620840369527, "loss": 0.5274, "step": 92490 }, { "epoch": 4.594218734478991, "grad_norm": 0.1259765625, "learning_rate": 0.00043246647462004574, "loss": 0.5117, "step": 92500 }, { "epoch": 4.59471540677461, "grad_norm": 0.130859375, "learning_rate": 0.00043242674083639616, "loss": 0.5121, "step": 92510 }, { "epoch": 4.5952120790702295, "grad_norm": 0.1162109375, "learning_rate": 0.00043238700705274663, "loss": 0.5202, "step": 92520 }, { "epoch": 4.595708751365849, "grad_norm": 0.125, "learning_rate": 0.0004323472732690971, "loss": 0.5194, "step": 92530 }, { "epoch": 4.596205423661468, "grad_norm": 0.1396484375, "learning_rate": 0.00043230753948544746, "loss": 0.5228, "step": 92540 }, { "epoch": 4.596702095957087, "grad_norm": 0.09521484375, "learning_rate": 0.000432267805701798, "loss": 0.5171, "step": 92550 }, { "epoch": 4.5971987682527065, "grad_norm": 0.12451171875, "learning_rate": 0.00043222807191814846, "loss": 0.4623, "step": 92560 }, { "epoch": 4.597695440548327, "grad_norm": 0.1171875, "learning_rate": 0.0004321883381344988, "loss": 0.5129, "step": 92570 }, { "epoch": 4.598192112843946, "grad_norm": 0.1572265625, "learning_rate": 0.0004321486043508493, "loss": 0.5342, "step": 92580 }, { "epoch": 4.598688785139565, "grad_norm": 0.11669921875, "learning_rate": 0.0004321088705671998, "loss": 0.523, "step": 92590 }, { "epoch": 4.599185457435184, "grad_norm": 0.1064453125, "learning_rate": 0.0004320691367835503, "loss": 0.5319, "step": 92600 }, { "epoch": 4.5996821297308035, "grad_norm": 0.130859375, "learning_rate": 0.00043202940299990066, "loss": 0.5162, "step": 92610 }, { "epoch": 4.600178802026423, "grad_norm": 0.1201171875, "learning_rate": 0.0004319896692162511, "loss": 0.486, "step": 92620 }, { "epoch": 4.600675474322042, "grad_norm": 0.126953125, "learning_rate": 0.00043194993543260165, "loss": 0.5066, "step": 92630 }, { "epoch": 4.601172146617662, "grad_norm": 0.123046875, "learning_rate": 0.000431910201648952, "loss": 0.5231, "step": 92640 }, { "epoch": 4.601668818913281, "grad_norm": 0.1142578125, "learning_rate": 0.0004318704678653025, "loss": 0.5092, "step": 92650 }, { "epoch": 4.6021654912089005, "grad_norm": 0.115234375, "learning_rate": 0.00043183073408165296, "loss": 0.5104, "step": 92660 }, { "epoch": 4.60266216350452, "grad_norm": 0.12109375, "learning_rate": 0.0004317910002980034, "loss": 0.4988, "step": 92670 }, { "epoch": 4.603158835800139, "grad_norm": 0.11962890625, "learning_rate": 0.00043175126651435385, "loss": 0.5284, "step": 92680 }, { "epoch": 4.603655508095758, "grad_norm": 0.13671875, "learning_rate": 0.0004317115327307043, "loss": 0.5235, "step": 92690 }, { "epoch": 4.604152180391377, "grad_norm": 0.123046875, "learning_rate": 0.00043167179894705474, "loss": 0.5051, "step": 92700 }, { "epoch": 4.6046488526869975, "grad_norm": 0.1025390625, "learning_rate": 0.0004316320651634052, "loss": 0.4786, "step": 92710 }, { "epoch": 4.605145524982617, "grad_norm": 0.1318359375, "learning_rate": 0.0004315923313797557, "loss": 0.4958, "step": 92720 }, { "epoch": 4.605642197278236, "grad_norm": 0.11767578125, "learning_rate": 0.00043155259759610604, "loss": 0.5089, "step": 92730 }, { "epoch": 4.606138869573855, "grad_norm": 0.15234375, "learning_rate": 0.00043151286381245657, "loss": 0.5285, "step": 92740 }, { "epoch": 4.606635541869474, "grad_norm": 0.11572265625, "learning_rate": 0.00043147313002880704, "loss": 0.5246, "step": 92750 }, { "epoch": 4.607132214165094, "grad_norm": 0.111328125, "learning_rate": 0.0004314333962451575, "loss": 0.5399, "step": 92760 }, { "epoch": 4.607628886460713, "grad_norm": 0.12158203125, "learning_rate": 0.00043139366246150787, "loss": 0.5304, "step": 92770 }, { "epoch": 4.608125558756333, "grad_norm": 0.142578125, "learning_rate": 0.0004313539286778584, "loss": 0.5089, "step": 92780 }, { "epoch": 4.608622231051952, "grad_norm": 0.12451171875, "learning_rate": 0.00043131419489420887, "loss": 0.5307, "step": 92790 }, { "epoch": 4.6091189033475715, "grad_norm": 0.125, "learning_rate": 0.00043127446111055923, "loss": 0.5138, "step": 92800 }, { "epoch": 4.609615575643191, "grad_norm": 0.1044921875, "learning_rate": 0.0004312347273269097, "loss": 0.5108, "step": 92810 }, { "epoch": 4.61011224793881, "grad_norm": 0.12060546875, "learning_rate": 0.00043119499354326023, "loss": 0.4865, "step": 92820 }, { "epoch": 4.610608920234429, "grad_norm": 0.1171875, "learning_rate": 0.0004311552597596106, "loss": 0.5165, "step": 92830 }, { "epoch": 4.611105592530048, "grad_norm": 0.1025390625, "learning_rate": 0.00043111552597596106, "loss": 0.491, "step": 92840 }, { "epoch": 4.6116022648256685, "grad_norm": 0.11669921875, "learning_rate": 0.00043107579219231153, "loss": 0.5152, "step": 92850 }, { "epoch": 4.612098937121288, "grad_norm": 0.12890625, "learning_rate": 0.00043103605840866195, "loss": 0.5158, "step": 92860 }, { "epoch": 4.612595609416907, "grad_norm": 0.1416015625, "learning_rate": 0.0004309963246250124, "loss": 0.5234, "step": 92870 }, { "epoch": 4.613092281712526, "grad_norm": 0.1787109375, "learning_rate": 0.0004309565908413629, "loss": 0.5254, "step": 92880 }, { "epoch": 4.613588954008145, "grad_norm": 0.1162109375, "learning_rate": 0.00043091685705771337, "loss": 0.497, "step": 92890 }, { "epoch": 4.614085626303765, "grad_norm": 0.10986328125, "learning_rate": 0.0004308771232740638, "loss": 0.5365, "step": 92900 }, { "epoch": 4.614582298599384, "grad_norm": 0.12109375, "learning_rate": 0.00043083738949041425, "loss": 0.5452, "step": 92910 }, { "epoch": 4.615078970895004, "grad_norm": 0.142578125, "learning_rate": 0.0004307976557067647, "loss": 0.5492, "step": 92920 }, { "epoch": 4.615575643190623, "grad_norm": 0.1103515625, "learning_rate": 0.00043075792192311514, "loss": 0.5096, "step": 92930 }, { "epoch": 4.616072315486242, "grad_norm": 0.1142578125, "learning_rate": 0.0004307181881394656, "loss": 0.5493, "step": 92940 }, { "epoch": 4.616568987781862, "grad_norm": 0.1181640625, "learning_rate": 0.0004306784543558161, "loss": 0.5185, "step": 92950 }, { "epoch": 4.617065660077481, "grad_norm": 0.11669921875, "learning_rate": 0.0004306387205721665, "loss": 0.5305, "step": 92960 }, { "epoch": 4.6175623323731, "grad_norm": 0.1044921875, "learning_rate": 0.000430598986788517, "loss": 0.5127, "step": 92970 }, { "epoch": 4.618059004668719, "grad_norm": 0.109375, "learning_rate": 0.00043055925300486745, "loss": 0.528, "step": 92980 }, { "epoch": 4.6185556769643386, "grad_norm": 0.12890625, "learning_rate": 0.0004305195192212178, "loss": 0.5339, "step": 92990 }, { "epoch": 4.619052349259959, "grad_norm": 0.109375, "learning_rate": 0.0004304797854375683, "loss": 0.5293, "step": 93000 }, { "epoch": 4.619549021555578, "grad_norm": 0.111328125, "learning_rate": 0.0004304400516539188, "loss": 0.5201, "step": 93010 }, { "epoch": 4.620045693851197, "grad_norm": 0.10888671875, "learning_rate": 0.00043040031787026917, "loss": 0.505, "step": 93020 }, { "epoch": 4.620542366146816, "grad_norm": 0.1064453125, "learning_rate": 0.00043036058408661964, "loss": 0.5059, "step": 93030 }, { "epoch": 4.621039038442436, "grad_norm": 0.11279296875, "learning_rate": 0.0004303208503029701, "loss": 0.5123, "step": 93040 }, { "epoch": 4.621535710738055, "grad_norm": 0.10986328125, "learning_rate": 0.00043028111651932064, "loss": 0.5062, "step": 93050 }, { "epoch": 4.622032383033674, "grad_norm": 0.10595703125, "learning_rate": 0.000430241382735671, "loss": 0.5308, "step": 93060 }, { "epoch": 4.622529055329293, "grad_norm": 0.1201171875, "learning_rate": 0.00043020164895202147, "loss": 0.517, "step": 93070 }, { "epoch": 4.623025727624913, "grad_norm": 0.11474609375, "learning_rate": 0.00043016191516837194, "loss": 0.5132, "step": 93080 }, { "epoch": 4.623522399920533, "grad_norm": 0.12060546875, "learning_rate": 0.00043012218138472236, "loss": 0.5055, "step": 93090 }, { "epoch": 4.624019072216152, "grad_norm": 0.10888671875, "learning_rate": 0.00043008244760107283, "loss": 0.4954, "step": 93100 }, { "epoch": 4.624515744511771, "grad_norm": 0.10693359375, "learning_rate": 0.0004300427138174233, "loss": 0.4991, "step": 93110 }, { "epoch": 4.62501241680739, "grad_norm": 0.11865234375, "learning_rate": 0.0004300029800337737, "loss": 0.5172, "step": 93120 }, { "epoch": 4.6255090891030095, "grad_norm": 0.11376953125, "learning_rate": 0.0004299632462501242, "loss": 0.5386, "step": 93130 }, { "epoch": 4.626005761398629, "grad_norm": 0.1298828125, "learning_rate": 0.00042992351246647466, "loss": 0.5271, "step": 93140 }, { "epoch": 4.626502433694249, "grad_norm": 0.11279296875, "learning_rate": 0.0004298837786828251, "loss": 0.5245, "step": 93150 }, { "epoch": 4.626999105989868, "grad_norm": 0.103515625, "learning_rate": 0.00042984404489917555, "loss": 0.541, "step": 93160 }, { "epoch": 4.627495778285487, "grad_norm": 0.11767578125, "learning_rate": 0.000429804311115526, "loss": 0.4988, "step": 93170 }, { "epoch": 4.6279924505811065, "grad_norm": 0.10546875, "learning_rate": 0.0004297645773318764, "loss": 0.5171, "step": 93180 }, { "epoch": 4.628489122876726, "grad_norm": 0.109375, "learning_rate": 0.0004297248435482269, "loss": 0.5038, "step": 93190 }, { "epoch": 4.628985795172345, "grad_norm": 0.12890625, "learning_rate": 0.0004296851097645774, "loss": 0.4955, "step": 93200 }, { "epoch": 4.629482467467964, "grad_norm": 0.11669921875, "learning_rate": 0.00042964537598092785, "loss": 0.5067, "step": 93210 }, { "epoch": 4.629979139763584, "grad_norm": 0.10546875, "learning_rate": 0.0004296056421972782, "loss": 0.5226, "step": 93220 }, { "epoch": 4.6304758120592036, "grad_norm": 0.11279296875, "learning_rate": 0.00042956590841362874, "loss": 0.5224, "step": 93230 }, { "epoch": 4.630972484354823, "grad_norm": 0.1103515625, "learning_rate": 0.0004295261746299792, "loss": 0.5154, "step": 93240 }, { "epoch": 4.631469156650442, "grad_norm": 0.1455078125, "learning_rate": 0.0004294864408463296, "loss": 0.5042, "step": 93250 }, { "epoch": 4.631965828946061, "grad_norm": 0.109375, "learning_rate": 0.00042944670706268005, "loss": 0.5015, "step": 93260 }, { "epoch": 4.6324625012416805, "grad_norm": 0.10205078125, "learning_rate": 0.0004294069732790305, "loss": 0.4962, "step": 93270 }, { "epoch": 4.6329591735373, "grad_norm": 0.1220703125, "learning_rate": 0.00042936723949538094, "loss": 0.5325, "step": 93280 }, { "epoch": 4.63345584583292, "grad_norm": 0.193359375, "learning_rate": 0.0004293275057117314, "loss": 0.5356, "step": 93290 }, { "epoch": 4.633952518128539, "grad_norm": 0.1357421875, "learning_rate": 0.0004292877719280819, "loss": 0.5387, "step": 93300 }, { "epoch": 4.634449190424158, "grad_norm": 0.1220703125, "learning_rate": 0.0004292480381444323, "loss": 0.5287, "step": 93310 }, { "epoch": 4.6349458627197775, "grad_norm": 0.10888671875, "learning_rate": 0.00042920830436078277, "loss": 0.4945, "step": 93320 }, { "epoch": 4.635442535015397, "grad_norm": 0.1064453125, "learning_rate": 0.00042916857057713324, "loss": 0.5056, "step": 93330 }, { "epoch": 4.635939207311016, "grad_norm": 0.1376953125, "learning_rate": 0.0004291288367934837, "loss": 0.5392, "step": 93340 }, { "epoch": 4.636435879606635, "grad_norm": 0.10888671875, "learning_rate": 0.0004290891030098341, "loss": 0.5309, "step": 93350 }, { "epoch": 4.636932551902255, "grad_norm": 0.1064453125, "learning_rate": 0.0004290493692261846, "loss": 0.5161, "step": 93360 }, { "epoch": 4.6374292241978745, "grad_norm": 0.1220703125, "learning_rate": 0.00042900963544253507, "loss": 0.5358, "step": 93370 }, { "epoch": 4.637925896493494, "grad_norm": 0.10693359375, "learning_rate": 0.0004289699016588855, "loss": 0.5189, "step": 93380 }, { "epoch": 4.638422568789113, "grad_norm": 0.09814453125, "learning_rate": 0.00042893016787523596, "loss": 0.5122, "step": 93390 }, { "epoch": 4.638919241084732, "grad_norm": 0.10498046875, "learning_rate": 0.00042889043409158643, "loss": 0.4765, "step": 93400 }, { "epoch": 4.639415913380351, "grad_norm": 0.1728515625, "learning_rate": 0.0004288507003079368, "loss": 0.4987, "step": 93410 }, { "epoch": 4.639912585675971, "grad_norm": 0.099609375, "learning_rate": 0.0004288109665242873, "loss": 0.5058, "step": 93420 }, { "epoch": 4.640409257971591, "grad_norm": 0.12451171875, "learning_rate": 0.0004287712327406378, "loss": 0.4966, "step": 93430 }, { "epoch": 4.64090593026721, "grad_norm": 0.1015625, "learning_rate": 0.00042873149895698815, "loss": 0.5476, "step": 93440 }, { "epoch": 4.641402602562829, "grad_norm": 0.1171875, "learning_rate": 0.0004286917651733386, "loss": 0.5086, "step": 93450 }, { "epoch": 4.641899274858448, "grad_norm": 0.10595703125, "learning_rate": 0.00042865203138968915, "loss": 0.515, "step": 93460 }, { "epoch": 4.642395947154068, "grad_norm": 0.12890625, "learning_rate": 0.0004286122976060395, "loss": 0.4866, "step": 93470 }, { "epoch": 4.642892619449687, "grad_norm": 0.1044921875, "learning_rate": 0.00042857256382239, "loss": 0.4974, "step": 93480 }, { "epoch": 4.643389291745306, "grad_norm": 0.1337890625, "learning_rate": 0.00042853283003874045, "loss": 0.5037, "step": 93490 }, { "epoch": 4.643885964040926, "grad_norm": 0.10302734375, "learning_rate": 0.000428493096255091, "loss": 0.5221, "step": 93500 }, { "epoch": 4.6443826363365455, "grad_norm": 0.1318359375, "learning_rate": 0.00042845336247144134, "loss": 0.4928, "step": 93510 }, { "epoch": 4.644879308632165, "grad_norm": 0.11474609375, "learning_rate": 0.0004284136286877918, "loss": 0.5028, "step": 93520 }, { "epoch": 4.645375980927784, "grad_norm": 0.12158203125, "learning_rate": 0.0004283738949041423, "loss": 0.5423, "step": 93530 }, { "epoch": 4.645872653223403, "grad_norm": 0.1376953125, "learning_rate": 0.0004283341611204927, "loss": 0.4969, "step": 93540 }, { "epoch": 4.646369325519022, "grad_norm": 0.1279296875, "learning_rate": 0.0004282944273368432, "loss": 0.5313, "step": 93550 }, { "epoch": 4.646865997814642, "grad_norm": 0.1064453125, "learning_rate": 0.00042825469355319365, "loss": 0.5243, "step": 93560 }, { "epoch": 4.647362670110262, "grad_norm": 0.140625, "learning_rate": 0.00042821495976954406, "loss": 0.5266, "step": 93570 }, { "epoch": 4.647859342405881, "grad_norm": 0.10595703125, "learning_rate": 0.00042817522598589453, "loss": 0.4925, "step": 93580 }, { "epoch": 4.6483560147015, "grad_norm": 0.11865234375, "learning_rate": 0.000428135492202245, "loss": 0.5117, "step": 93590 }, { "epoch": 4.648852686997119, "grad_norm": 0.1416015625, "learning_rate": 0.00042809575841859537, "loss": 0.502, "step": 93600 }, { "epoch": 4.649349359292739, "grad_norm": 0.12353515625, "learning_rate": 0.0004280560246349459, "loss": 0.5174, "step": 93610 }, { "epoch": 4.649846031588358, "grad_norm": 0.109375, "learning_rate": 0.00042801629085129637, "loss": 0.4945, "step": 93620 }, { "epoch": 4.650342703883977, "grad_norm": 0.12890625, "learning_rate": 0.00042797655706764673, "loss": 0.5148, "step": 93630 }, { "epoch": 4.650839376179597, "grad_norm": 0.1279296875, "learning_rate": 0.0004279368232839972, "loss": 0.5034, "step": 93640 }, { "epoch": 4.651336048475216, "grad_norm": 0.13671875, "learning_rate": 0.0004278970895003477, "loss": 0.5312, "step": 93650 }, { "epoch": 4.651832720770836, "grad_norm": 0.10888671875, "learning_rate": 0.0004278573557166982, "loss": 0.4967, "step": 93660 }, { "epoch": 4.652329393066455, "grad_norm": 0.1474609375, "learning_rate": 0.00042781762193304856, "loss": 0.5173, "step": 93670 }, { "epoch": 4.652826065362074, "grad_norm": 0.1171875, "learning_rate": 0.00042777788814939903, "loss": 0.5002, "step": 93680 }, { "epoch": 4.653322737657693, "grad_norm": 0.10302734375, "learning_rate": 0.00042773815436574956, "loss": 0.5336, "step": 93690 }, { "epoch": 4.653819409953313, "grad_norm": 0.11376953125, "learning_rate": 0.0004276984205820999, "loss": 0.5114, "step": 93700 }, { "epoch": 4.654316082248932, "grad_norm": 0.1083984375, "learning_rate": 0.0004276586867984504, "loss": 0.4867, "step": 93710 }, { "epoch": 4.654812754544551, "grad_norm": 0.12060546875, "learning_rate": 0.00042761895301480086, "loss": 0.4993, "step": 93720 }, { "epoch": 4.655309426840171, "grad_norm": 0.11962890625, "learning_rate": 0.0004275792192311513, "loss": 0.5041, "step": 93730 }, { "epoch": 4.65580609913579, "grad_norm": 0.125, "learning_rate": 0.00042753948544750175, "loss": 0.5209, "step": 93740 }, { "epoch": 4.65630277143141, "grad_norm": 0.2021484375, "learning_rate": 0.0004274997516638522, "loss": 0.5342, "step": 93750 }, { "epoch": 4.656799443727029, "grad_norm": 0.1201171875, "learning_rate": 0.00042746001788020264, "loss": 0.5215, "step": 93760 }, { "epoch": 4.657296116022648, "grad_norm": 0.1279296875, "learning_rate": 0.0004274202840965531, "loss": 0.5045, "step": 93770 }, { "epoch": 4.657792788318267, "grad_norm": 0.1083984375, "learning_rate": 0.0004273805503129036, "loss": 0.5057, "step": 93780 }, { "epoch": 4.6582894606138865, "grad_norm": 0.12890625, "learning_rate": 0.00042734081652925405, "loss": 0.511, "step": 93790 }, { "epoch": 4.658786132909507, "grad_norm": 0.0966796875, "learning_rate": 0.00042730108274560447, "loss": 0.5015, "step": 93800 }, { "epoch": 4.659282805205126, "grad_norm": 0.13671875, "learning_rate": 0.00042726134896195494, "loss": 0.5054, "step": 93810 }, { "epoch": 4.659779477500745, "grad_norm": 0.11376953125, "learning_rate": 0.0004272216151783054, "loss": 0.5361, "step": 93820 }, { "epoch": 4.660276149796364, "grad_norm": 0.1640625, "learning_rate": 0.00042718188139465583, "loss": 0.5188, "step": 93830 }, { "epoch": 4.6607728220919835, "grad_norm": 0.1025390625, "learning_rate": 0.0004271421476110063, "loss": 0.4968, "step": 93840 }, { "epoch": 4.661269494387603, "grad_norm": 0.12353515625, "learning_rate": 0.00042710241382735677, "loss": 0.4918, "step": 93850 }, { "epoch": 4.661766166683222, "grad_norm": 0.13671875, "learning_rate": 0.00042706268004370714, "loss": 0.533, "step": 93860 }, { "epoch": 4.662262838978842, "grad_norm": 0.220703125, "learning_rate": 0.0004270229462600576, "loss": 0.4973, "step": 93870 }, { "epoch": 4.662759511274461, "grad_norm": 0.1181640625, "learning_rate": 0.00042698321247640813, "loss": 0.4874, "step": 93880 }, { "epoch": 4.6632561835700805, "grad_norm": 0.119140625, "learning_rate": 0.0004269434786927585, "loss": 0.5093, "step": 93890 }, { "epoch": 4.6637528558657, "grad_norm": 0.12255859375, "learning_rate": 0.00042690374490910897, "loss": 0.4838, "step": 93900 }, { "epoch": 4.664249528161319, "grad_norm": 0.1279296875, "learning_rate": 0.00042686401112545944, "loss": 0.5036, "step": 93910 }, { "epoch": 4.664746200456938, "grad_norm": 0.1298828125, "learning_rate": 0.00042682427734180986, "loss": 0.5148, "step": 93920 }, { "epoch": 4.6652428727525574, "grad_norm": 0.1064453125, "learning_rate": 0.0004267845435581603, "loss": 0.4976, "step": 93930 }, { "epoch": 4.665739545048178, "grad_norm": 0.1083984375, "learning_rate": 0.0004267448097745108, "loss": 0.5301, "step": 93940 }, { "epoch": 4.666236217343797, "grad_norm": 0.1220703125, "learning_rate": 0.00042670507599086127, "loss": 0.5311, "step": 93950 }, { "epoch": 4.666732889639416, "grad_norm": 0.1123046875, "learning_rate": 0.0004266653422072117, "loss": 0.5232, "step": 93960 }, { "epoch": 4.667229561935035, "grad_norm": 0.12109375, "learning_rate": 0.00042662560842356216, "loss": 0.5074, "step": 93970 }, { "epoch": 4.6677262342306545, "grad_norm": 0.1142578125, "learning_rate": 0.00042658587463991263, "loss": 0.5078, "step": 93980 }, { "epoch": 4.668222906526274, "grad_norm": 0.103515625, "learning_rate": 0.00042654614085626305, "loss": 0.5122, "step": 93990 }, { "epoch": 4.668719578821893, "grad_norm": 0.10400390625, "learning_rate": 0.0004265064070726135, "loss": 0.5162, "step": 94000 }, { "epoch": 4.669216251117513, "grad_norm": 0.1279296875, "learning_rate": 0.000426466673288964, "loss": 0.5243, "step": 94010 }, { "epoch": 4.669712923413132, "grad_norm": 0.125, "learning_rate": 0.0004264269395053144, "loss": 0.5339, "step": 94020 }, { "epoch": 4.6702095957087515, "grad_norm": 0.123046875, "learning_rate": 0.0004263872057216649, "loss": 0.5147, "step": 94030 }, { "epoch": 4.670706268004371, "grad_norm": 0.146484375, "learning_rate": 0.00042634747193801535, "loss": 0.5363, "step": 94040 }, { "epoch": 4.67120294029999, "grad_norm": 0.1181640625, "learning_rate": 0.0004263077381543657, "loss": 0.5158, "step": 94050 }, { "epoch": 4.671699612595609, "grad_norm": 0.11767578125, "learning_rate": 0.00042626800437071624, "loss": 0.5312, "step": 94060 }, { "epoch": 4.672196284891228, "grad_norm": 0.10009765625, "learning_rate": 0.0004262282705870667, "loss": 0.529, "step": 94070 }, { "epoch": 4.6726929571868485, "grad_norm": 0.10546875, "learning_rate": 0.00042618853680341707, "loss": 0.5017, "step": 94080 }, { "epoch": 4.673189629482468, "grad_norm": 0.12890625, "learning_rate": 0.00042614880301976754, "loss": 0.5209, "step": 94090 }, { "epoch": 4.673686301778087, "grad_norm": 0.1982421875, "learning_rate": 0.000426109069236118, "loss": 0.5108, "step": 94100 }, { "epoch": 4.674182974073706, "grad_norm": 0.1552734375, "learning_rate": 0.00042606933545246854, "loss": 0.5438, "step": 94110 }, { "epoch": 4.674679646369325, "grad_norm": 0.126953125, "learning_rate": 0.0004260296016688189, "loss": 0.5113, "step": 94120 }, { "epoch": 4.675176318664945, "grad_norm": 0.138671875, "learning_rate": 0.0004259898678851694, "loss": 0.4907, "step": 94130 }, { "epoch": 4.675672990960564, "grad_norm": 0.11865234375, "learning_rate": 0.00042595013410151985, "loss": 0.528, "step": 94140 }, { "epoch": 4.676169663256184, "grad_norm": 0.158203125, "learning_rate": 0.00042591040031787026, "loss": 0.53, "step": 94150 }, { "epoch": 4.676666335551803, "grad_norm": 0.119140625, "learning_rate": 0.00042587066653422073, "loss": 0.5285, "step": 94160 }, { "epoch": 4.6771630078474224, "grad_norm": 0.1298828125, "learning_rate": 0.0004258309327505712, "loss": 0.5242, "step": 94170 }, { "epoch": 4.677659680143042, "grad_norm": 0.10693359375, "learning_rate": 0.0004257911989669216, "loss": 0.5066, "step": 94180 }, { "epoch": 4.678156352438661, "grad_norm": 0.1806640625, "learning_rate": 0.0004257514651832721, "loss": 0.5107, "step": 94190 }, { "epoch": 4.67865302473428, "grad_norm": 0.11669921875, "learning_rate": 0.00042571173139962257, "loss": 0.5478, "step": 94200 }, { "epoch": 4.679149697029899, "grad_norm": 0.1630859375, "learning_rate": 0.000425671997615973, "loss": 0.5088, "step": 94210 }, { "epoch": 4.6796463693255195, "grad_norm": 0.1298828125, "learning_rate": 0.00042563226383232345, "loss": 0.5017, "step": 94220 }, { "epoch": 4.680143041621139, "grad_norm": 0.1181640625, "learning_rate": 0.0004255925300486739, "loss": 0.5478, "step": 94230 }, { "epoch": 4.680639713916758, "grad_norm": 0.107421875, "learning_rate": 0.0004255527962650244, "loss": 0.5269, "step": 94240 }, { "epoch": 4.681136386212377, "grad_norm": 0.10693359375, "learning_rate": 0.0004255130624813748, "loss": 0.506, "step": 94250 }, { "epoch": 4.681633058507996, "grad_norm": 0.1337890625, "learning_rate": 0.0004254733286977253, "loss": 0.533, "step": 94260 }, { "epoch": 4.682129730803616, "grad_norm": 0.1083984375, "learning_rate": 0.00042543359491407576, "loss": 0.5428, "step": 94270 }, { "epoch": 4.682626403099235, "grad_norm": 0.10595703125, "learning_rate": 0.0004253938611304261, "loss": 0.51, "step": 94280 }, { "epoch": 4.683123075394855, "grad_norm": 0.142578125, "learning_rate": 0.00042535412734677664, "loss": 0.5113, "step": 94290 }, { "epoch": 4.683619747690474, "grad_norm": 0.1103515625, "learning_rate": 0.0004253143935631271, "loss": 0.5103, "step": 94300 }, { "epoch": 4.684116419986093, "grad_norm": 0.11474609375, "learning_rate": 0.0004252746597794775, "loss": 0.4983, "step": 94310 }, { "epoch": 4.684613092281713, "grad_norm": 0.1474609375, "learning_rate": 0.00042523492599582795, "loss": 0.506, "step": 94320 }, { "epoch": 4.685109764577332, "grad_norm": 0.1845703125, "learning_rate": 0.0004251951922121785, "loss": 0.5123, "step": 94330 }, { "epoch": 4.685606436872951, "grad_norm": 0.130859375, "learning_rate": 0.00042515545842852884, "loss": 0.5111, "step": 94340 }, { "epoch": 4.68610310916857, "grad_norm": 0.1201171875, "learning_rate": 0.0004251157246448793, "loss": 0.5294, "step": 94350 }, { "epoch": 4.6865997814641895, "grad_norm": 0.11572265625, "learning_rate": 0.0004250759908612298, "loss": 0.5078, "step": 94360 }, { "epoch": 4.68709645375981, "grad_norm": 0.10986328125, "learning_rate": 0.0004250362570775802, "loss": 0.5218, "step": 94370 }, { "epoch": 4.687593126055429, "grad_norm": 0.12060546875, "learning_rate": 0.00042499652329393067, "loss": 0.5118, "step": 94380 }, { "epoch": 4.688089798351048, "grad_norm": 0.1240234375, "learning_rate": 0.00042495678951028114, "loss": 0.5267, "step": 94390 }, { "epoch": 4.688586470646667, "grad_norm": 0.10009765625, "learning_rate": 0.0004249170557266316, "loss": 0.5034, "step": 94400 }, { "epoch": 4.689083142942287, "grad_norm": 0.12109375, "learning_rate": 0.00042487732194298203, "loss": 0.502, "step": 94410 }, { "epoch": 4.689579815237906, "grad_norm": 0.11083984375, "learning_rate": 0.0004248375881593325, "loss": 0.514, "step": 94420 }, { "epoch": 4.690076487533525, "grad_norm": 0.10595703125, "learning_rate": 0.000424797854375683, "loss": 0.5047, "step": 94430 }, { "epoch": 4.690573159829144, "grad_norm": 0.1123046875, "learning_rate": 0.0004247581205920334, "loss": 0.5045, "step": 94440 }, { "epoch": 4.691069832124764, "grad_norm": 0.11669921875, "learning_rate": 0.00042471838680838386, "loss": 0.5306, "step": 94450 }, { "epoch": 4.691566504420384, "grad_norm": 0.10302734375, "learning_rate": 0.00042467865302473433, "loss": 0.4941, "step": 94460 }, { "epoch": 4.692063176716003, "grad_norm": 0.1171875, "learning_rate": 0.0004246389192410847, "loss": 0.5001, "step": 94470 }, { "epoch": 4.692559849011622, "grad_norm": 0.1298828125, "learning_rate": 0.0004245991854574352, "loss": 0.5268, "step": 94480 }, { "epoch": 4.693056521307241, "grad_norm": 0.1435546875, "learning_rate": 0.0004245594516737857, "loss": 0.5235, "step": 94490 }, { "epoch": 4.6935531936028605, "grad_norm": 0.10546875, "learning_rate": 0.00042451971789013606, "loss": 0.5035, "step": 94500 }, { "epoch": 4.69404986589848, "grad_norm": 0.10205078125, "learning_rate": 0.0004244799841064865, "loss": 0.5141, "step": 94510 }, { "epoch": 4.6945465381941, "grad_norm": 0.126953125, "learning_rate": 0.00042444025032283705, "loss": 0.5439, "step": 94520 }, { "epoch": 4.695043210489719, "grad_norm": 0.1220703125, "learning_rate": 0.0004244005165391874, "loss": 0.5259, "step": 94530 }, { "epoch": 4.695539882785338, "grad_norm": 0.10400390625, "learning_rate": 0.0004243607827555379, "loss": 0.5355, "step": 94540 }, { "epoch": 4.6960365550809575, "grad_norm": 0.10205078125, "learning_rate": 0.00042432104897188836, "loss": 0.5214, "step": 94550 }, { "epoch": 4.696533227376577, "grad_norm": 0.11328125, "learning_rate": 0.0004242813151882389, "loss": 0.5157, "step": 94560 }, { "epoch": 4.697029899672196, "grad_norm": 0.10400390625, "learning_rate": 0.00042424158140458925, "loss": 0.5294, "step": 94570 }, { "epoch": 4.697526571967815, "grad_norm": 0.11962890625, "learning_rate": 0.0004242018476209397, "loss": 0.535, "step": 94580 }, { "epoch": 4.698023244263435, "grad_norm": 0.1279296875, "learning_rate": 0.0004241621138372902, "loss": 0.5182, "step": 94590 }, { "epoch": 4.6985199165590545, "grad_norm": 0.11181640625, "learning_rate": 0.0004241223800536406, "loss": 0.5396, "step": 94600 }, { "epoch": 4.699016588854674, "grad_norm": 0.1220703125, "learning_rate": 0.0004240826462699911, "loss": 0.4946, "step": 94610 }, { "epoch": 4.699513261150293, "grad_norm": 0.10498046875, "learning_rate": 0.00042404291248634155, "loss": 0.5151, "step": 94620 }, { "epoch": 4.700009933445912, "grad_norm": 0.13671875, "learning_rate": 0.00042400317870269197, "loss": 0.514, "step": 94630 }, { "epoch": 4.7005066057415315, "grad_norm": 0.1162109375, "learning_rate": 0.00042396344491904244, "loss": 0.5305, "step": 94640 }, { "epoch": 4.701003278037151, "grad_norm": 0.171875, "learning_rate": 0.0004239237111353929, "loss": 0.5106, "step": 94650 }, { "epoch": 4.701499950332771, "grad_norm": 0.10400390625, "learning_rate": 0.0004238839773517433, "loss": 0.5304, "step": 94660 }, { "epoch": 4.70199662262839, "grad_norm": 0.12060546875, "learning_rate": 0.0004238442435680938, "loss": 0.5195, "step": 94670 }, { "epoch": 4.702493294924009, "grad_norm": 0.185546875, "learning_rate": 0.00042380450978444427, "loss": 0.5279, "step": 94680 }, { "epoch": 4.7029899672196285, "grad_norm": 0.12353515625, "learning_rate": 0.00042376477600079474, "loss": 0.5219, "step": 94690 }, { "epoch": 4.703486639515248, "grad_norm": 0.11376953125, "learning_rate": 0.0004237250422171451, "loss": 0.5455, "step": 94700 }, { "epoch": 4.703983311810867, "grad_norm": 0.1142578125, "learning_rate": 0.00042368530843349563, "loss": 0.5194, "step": 94710 }, { "epoch": 4.704479984106486, "grad_norm": 0.111328125, "learning_rate": 0.0004236455746498461, "loss": 0.5275, "step": 94720 }, { "epoch": 4.704976656402106, "grad_norm": 0.1181640625, "learning_rate": 0.00042360584086619646, "loss": 0.5154, "step": 94730 }, { "epoch": 4.7054733286977255, "grad_norm": 0.1123046875, "learning_rate": 0.00042356610708254693, "loss": 0.5294, "step": 94740 }, { "epoch": 4.705970000993345, "grad_norm": 0.1259765625, "learning_rate": 0.00042352637329889746, "loss": 0.5077, "step": 94750 }, { "epoch": 4.706466673288964, "grad_norm": 0.1728515625, "learning_rate": 0.0004234866395152478, "loss": 0.5258, "step": 94760 }, { "epoch": 4.706963345584583, "grad_norm": 0.177734375, "learning_rate": 0.0004234469057315983, "loss": 0.5071, "step": 94770 }, { "epoch": 4.707460017880202, "grad_norm": 0.119140625, "learning_rate": 0.00042340717194794877, "loss": 0.5164, "step": 94780 }, { "epoch": 4.707956690175822, "grad_norm": 0.142578125, "learning_rate": 0.0004233674381642992, "loss": 0.4936, "step": 94790 }, { "epoch": 4.708453362471442, "grad_norm": 0.1064453125, "learning_rate": 0.00042332770438064965, "loss": 0.5188, "step": 94800 }, { "epoch": 4.708950034767061, "grad_norm": 0.10693359375, "learning_rate": 0.0004232879705970001, "loss": 0.4913, "step": 94810 }, { "epoch": 4.70944670706268, "grad_norm": 0.1298828125, "learning_rate": 0.00042324823681335054, "loss": 0.543, "step": 94820 }, { "epoch": 4.709943379358299, "grad_norm": 0.10888671875, "learning_rate": 0.000423208503029701, "loss": 0.5066, "step": 94830 }, { "epoch": 4.710440051653919, "grad_norm": 0.1083984375, "learning_rate": 0.0004231687692460515, "loss": 0.5207, "step": 94840 }, { "epoch": 4.710936723949538, "grad_norm": 0.142578125, "learning_rate": 0.00042312903546240196, "loss": 0.5341, "step": 94850 }, { "epoch": 4.711433396245157, "grad_norm": 0.1181640625, "learning_rate": 0.0004230893016787524, "loss": 0.5013, "step": 94860 }, { "epoch": 4.711930068540777, "grad_norm": 0.158203125, "learning_rate": 0.00042304956789510285, "loss": 0.5352, "step": 94870 }, { "epoch": 4.7124267408363965, "grad_norm": 0.11181640625, "learning_rate": 0.0004230098341114533, "loss": 0.5075, "step": 94880 }, { "epoch": 4.712923413132016, "grad_norm": 0.1279296875, "learning_rate": 0.00042297010032780373, "loss": 0.4926, "step": 94890 }, { "epoch": 4.713420085427635, "grad_norm": 0.10595703125, "learning_rate": 0.0004229303665441542, "loss": 0.5439, "step": 94900 }, { "epoch": 4.713916757723254, "grad_norm": 0.1455078125, "learning_rate": 0.0004228906327605047, "loss": 0.5166, "step": 94910 }, { "epoch": 4.714413430018873, "grad_norm": 0.173828125, "learning_rate": 0.00042285089897685504, "loss": 0.5406, "step": 94920 }, { "epoch": 4.714910102314493, "grad_norm": 0.12158203125, "learning_rate": 0.00042281116519320556, "loss": 0.5206, "step": 94930 }, { "epoch": 4.715406774610113, "grad_norm": 0.15234375, "learning_rate": 0.00042277143140955604, "loss": 0.4995, "step": 94940 }, { "epoch": 4.715903446905732, "grad_norm": 0.1630859375, "learning_rate": 0.0004227316976259064, "loss": 0.4979, "step": 94950 }, { "epoch": 4.716400119201351, "grad_norm": 0.13671875, "learning_rate": 0.00042269196384225687, "loss": 0.5015, "step": 94960 }, { "epoch": 4.71689679149697, "grad_norm": 0.1376953125, "learning_rate": 0.00042265223005860734, "loss": 0.5038, "step": 94970 }, { "epoch": 4.71739346379259, "grad_norm": 0.11181640625, "learning_rate": 0.00042261249627495776, "loss": 0.5304, "step": 94980 }, { "epoch": 4.717890136088209, "grad_norm": 0.11376953125, "learning_rate": 0.00042257276249130823, "loss": 0.5447, "step": 94990 }, { "epoch": 4.718386808383828, "grad_norm": 0.1298828125, "learning_rate": 0.0004225330287076587, "loss": 0.4964, "step": 95000 }, { "epoch": 4.718883480679448, "grad_norm": 0.1533203125, "learning_rate": 0.0004224932949240092, "loss": 0.5399, "step": 95010 }, { "epoch": 4.719380152975067, "grad_norm": 0.11572265625, "learning_rate": 0.0004224535611403596, "loss": 0.5241, "step": 95020 }, { "epoch": 4.719876825270687, "grad_norm": 0.126953125, "learning_rate": 0.00042241382735671006, "loss": 0.5011, "step": 95030 }, { "epoch": 4.720373497566306, "grad_norm": 0.10693359375, "learning_rate": 0.00042237409357306053, "loss": 0.5227, "step": 95040 }, { "epoch": 4.720870169861925, "grad_norm": 0.12109375, "learning_rate": 0.00042233435978941095, "loss": 0.4831, "step": 95050 }, { "epoch": 4.721366842157544, "grad_norm": 0.10693359375, "learning_rate": 0.0004222946260057614, "loss": 0.5311, "step": 95060 }, { "epoch": 4.7218635144531635, "grad_norm": 0.11279296875, "learning_rate": 0.0004222548922221119, "loss": 0.5486, "step": 95070 }, { "epoch": 4.722360186748783, "grad_norm": 0.10791015625, "learning_rate": 0.0004222151584384623, "loss": 0.4966, "step": 95080 }, { "epoch": 4.722856859044403, "grad_norm": 0.1240234375, "learning_rate": 0.0004221754246548128, "loss": 0.5321, "step": 95090 }, { "epoch": 4.723353531340022, "grad_norm": 0.10693359375, "learning_rate": 0.00042213569087116325, "loss": 0.5146, "step": 95100 }, { "epoch": 4.723850203635641, "grad_norm": 0.12353515625, "learning_rate": 0.0004220959570875136, "loss": 0.5115, "step": 95110 }, { "epoch": 4.724346875931261, "grad_norm": 0.1435546875, "learning_rate": 0.00042205622330386414, "loss": 0.5373, "step": 95120 }, { "epoch": 4.72484354822688, "grad_norm": 0.1005859375, "learning_rate": 0.0004220164895202146, "loss": 0.5361, "step": 95130 }, { "epoch": 4.725340220522499, "grad_norm": 0.134765625, "learning_rate": 0.0004219767557365651, "loss": 0.5529, "step": 95140 }, { "epoch": 4.725836892818118, "grad_norm": 0.11083984375, "learning_rate": 0.00042193702195291545, "loss": 0.5089, "step": 95150 }, { "epoch": 4.7263335651137375, "grad_norm": 0.1240234375, "learning_rate": 0.00042189728816926597, "loss": 0.5577, "step": 95160 }, { "epoch": 4.726830237409358, "grad_norm": 0.12451171875, "learning_rate": 0.00042185755438561644, "loss": 0.5262, "step": 95170 }, { "epoch": 4.727326909704977, "grad_norm": 0.1640625, "learning_rate": 0.0004218178206019668, "loss": 0.5136, "step": 95180 }, { "epoch": 4.727823582000596, "grad_norm": 0.18359375, "learning_rate": 0.0004217780868183173, "loss": 0.4981, "step": 95190 }, { "epoch": 4.728320254296215, "grad_norm": 0.1259765625, "learning_rate": 0.0004217383530346678, "loss": 0.5209, "step": 95200 }, { "epoch": 4.7288169265918345, "grad_norm": 0.1328125, "learning_rate": 0.00042169861925101817, "loss": 0.5366, "step": 95210 }, { "epoch": 4.729313598887454, "grad_norm": 0.1181640625, "learning_rate": 0.00042165888546736864, "loss": 0.4795, "step": 95220 }, { "epoch": 4.729810271183073, "grad_norm": 0.11328125, "learning_rate": 0.0004216191516837191, "loss": 0.5345, "step": 95230 }, { "epoch": 4.730306943478693, "grad_norm": 0.11962890625, "learning_rate": 0.0004215794179000695, "loss": 0.5245, "step": 95240 }, { "epoch": 4.730803615774312, "grad_norm": 0.1279296875, "learning_rate": 0.00042153968411642, "loss": 0.5295, "step": 95250 }, { "epoch": 4.7313002880699315, "grad_norm": 0.12060546875, "learning_rate": 0.00042149995033277047, "loss": 0.5018, "step": 95260 }, { "epoch": 4.731796960365551, "grad_norm": 0.11181640625, "learning_rate": 0.0004214602165491209, "loss": 0.4959, "step": 95270 }, { "epoch": 4.73229363266117, "grad_norm": 0.10107421875, "learning_rate": 0.00042142048276547136, "loss": 0.517, "step": 95280 }, { "epoch": 4.732790304956789, "grad_norm": 0.1396484375, "learning_rate": 0.00042138074898182183, "loss": 0.5057, "step": 95290 }, { "epoch": 4.733286977252408, "grad_norm": 0.1240234375, "learning_rate": 0.0004213410151981723, "loss": 0.5063, "step": 95300 }, { "epoch": 4.7337836495480285, "grad_norm": 0.11279296875, "learning_rate": 0.0004213012814145227, "loss": 0.5184, "step": 95310 }, { "epoch": 4.734280321843648, "grad_norm": 0.10888671875, "learning_rate": 0.0004212615476308732, "loss": 0.5132, "step": 95320 }, { "epoch": 4.734776994139267, "grad_norm": 0.11865234375, "learning_rate": 0.00042122181384722366, "loss": 0.5021, "step": 95330 }, { "epoch": 4.735273666434886, "grad_norm": 0.15234375, "learning_rate": 0.000421182080063574, "loss": 0.4903, "step": 95340 }, { "epoch": 4.7357703387305055, "grad_norm": 0.11962890625, "learning_rate": 0.00042114234627992455, "loss": 0.5328, "step": 95350 }, { "epoch": 4.736267011026125, "grad_norm": 0.11376953125, "learning_rate": 0.000421102612496275, "loss": 0.5182, "step": 95360 }, { "epoch": 4.736763683321744, "grad_norm": 0.11669921875, "learning_rate": 0.0004210628787126254, "loss": 0.5032, "step": 95370 }, { "epoch": 4.737260355617364, "grad_norm": 0.11572265625, "learning_rate": 0.00042102314492897585, "loss": 0.5062, "step": 95380 }, { "epoch": 4.737757027912983, "grad_norm": 0.10986328125, "learning_rate": 0.0004209834111453264, "loss": 0.5156, "step": 95390 }, { "epoch": 4.7382537002086025, "grad_norm": 0.1005859375, "learning_rate": 0.00042094367736167674, "loss": 0.5158, "step": 95400 }, { "epoch": 4.738750372504222, "grad_norm": 0.125, "learning_rate": 0.0004209039435780272, "loss": 0.5502, "step": 95410 }, { "epoch": 4.739247044799841, "grad_norm": 0.1220703125, "learning_rate": 0.0004208642097943777, "loss": 0.5035, "step": 95420 }, { "epoch": 4.73974371709546, "grad_norm": 0.1298828125, "learning_rate": 0.0004208244760107282, "loss": 0.5091, "step": 95430 }, { "epoch": 4.740240389391079, "grad_norm": 0.1103515625, "learning_rate": 0.0004207847422270786, "loss": 0.5248, "step": 95440 }, { "epoch": 4.7407370616866995, "grad_norm": 0.1162109375, "learning_rate": 0.00042074500844342905, "loss": 0.5515, "step": 95450 }, { "epoch": 4.741233733982319, "grad_norm": 0.10009765625, "learning_rate": 0.0004207052746597795, "loss": 0.5064, "step": 95460 }, { "epoch": 4.741730406277938, "grad_norm": 0.1103515625, "learning_rate": 0.00042066554087612993, "loss": 0.506, "step": 95470 }, { "epoch": 4.742227078573557, "grad_norm": 0.11474609375, "learning_rate": 0.0004206258070924804, "loss": 0.4984, "step": 95480 }, { "epoch": 4.742723750869176, "grad_norm": 0.138671875, "learning_rate": 0.0004205860733088309, "loss": 0.5167, "step": 95490 }, { "epoch": 4.743220423164796, "grad_norm": 0.12060546875, "learning_rate": 0.0004205463395251813, "loss": 0.5086, "step": 95500 }, { "epoch": 4.743717095460415, "grad_norm": 0.1279296875, "learning_rate": 0.00042050660574153177, "loss": 0.5237, "step": 95510 }, { "epoch": 4.744213767756035, "grad_norm": 0.140625, "learning_rate": 0.00042046687195788224, "loss": 0.5113, "step": 95520 }, { "epoch": 4.744710440051654, "grad_norm": 0.130859375, "learning_rate": 0.0004204271381742326, "loss": 0.5052, "step": 95530 }, { "epoch": 4.745207112347273, "grad_norm": 0.1005859375, "learning_rate": 0.0004203874043905831, "loss": 0.5447, "step": 95540 }, { "epoch": 4.745703784642893, "grad_norm": 0.109375, "learning_rate": 0.0004203476706069336, "loss": 0.5118, "step": 95550 }, { "epoch": 4.746200456938512, "grad_norm": 0.10400390625, "learning_rate": 0.00042030793682328396, "loss": 0.4928, "step": 95560 }, { "epoch": 4.746697129234131, "grad_norm": 0.126953125, "learning_rate": 0.00042026820303963443, "loss": 0.5002, "step": 95570 }, { "epoch": 4.74719380152975, "grad_norm": 0.166015625, "learning_rate": 0.00042022846925598496, "loss": 0.5171, "step": 95580 }, { "epoch": 4.7476904738253705, "grad_norm": 0.177734375, "learning_rate": 0.00042018873547233543, "loss": 0.5324, "step": 95590 }, { "epoch": 4.74818714612099, "grad_norm": 0.11083984375, "learning_rate": 0.0004201490016886858, "loss": 0.5299, "step": 95600 }, { "epoch": 4.748683818416609, "grad_norm": 0.11279296875, "learning_rate": 0.00042010926790503626, "loss": 0.5255, "step": 95610 }, { "epoch": 4.749180490712228, "grad_norm": 0.109375, "learning_rate": 0.0004200695341213868, "loss": 0.5217, "step": 95620 }, { "epoch": 4.749677163007847, "grad_norm": 0.12255859375, "learning_rate": 0.00042002980033773715, "loss": 0.4949, "step": 95630 }, { "epoch": 4.750173835303467, "grad_norm": 0.123046875, "learning_rate": 0.0004199900665540876, "loss": 0.5195, "step": 95640 }, { "epoch": 4.750670507599086, "grad_norm": 0.12451171875, "learning_rate": 0.0004199503327704381, "loss": 0.5092, "step": 95650 }, { "epoch": 4.751167179894706, "grad_norm": 0.11767578125, "learning_rate": 0.0004199105989867885, "loss": 0.5452, "step": 95660 }, { "epoch": 4.751663852190325, "grad_norm": 0.10546875, "learning_rate": 0.000419870865203139, "loss": 0.5041, "step": 95670 }, { "epoch": 4.752160524485944, "grad_norm": 0.12255859375, "learning_rate": 0.00041983113141948945, "loss": 0.5251, "step": 95680 }, { "epoch": 4.752657196781564, "grad_norm": 0.1171875, "learning_rate": 0.00041979139763583987, "loss": 0.5251, "step": 95690 }, { "epoch": 4.753153869077183, "grad_norm": 0.11083984375, "learning_rate": 0.00041975166385219034, "loss": 0.4777, "step": 95700 }, { "epoch": 4.753650541372802, "grad_norm": 0.1103515625, "learning_rate": 0.0004197119300685408, "loss": 0.5222, "step": 95710 }, { "epoch": 4.754147213668421, "grad_norm": 0.107421875, "learning_rate": 0.00041967219628489123, "loss": 0.5147, "step": 95720 }, { "epoch": 4.754643885964041, "grad_norm": 0.115234375, "learning_rate": 0.0004196324625012417, "loss": 0.5202, "step": 95730 }, { "epoch": 4.755140558259661, "grad_norm": 0.11376953125, "learning_rate": 0.00041959272871759217, "loss": 0.4835, "step": 95740 }, { "epoch": 4.75563723055528, "grad_norm": 0.11962890625, "learning_rate": 0.00041955299493394264, "loss": 0.4836, "step": 95750 }, { "epoch": 4.756133902850899, "grad_norm": 0.14453125, "learning_rate": 0.00041951326115029306, "loss": 0.5481, "step": 95760 }, { "epoch": 4.756630575146518, "grad_norm": 0.11376953125, "learning_rate": 0.00041947352736664353, "loss": 0.5153, "step": 95770 }, { "epoch": 4.7571272474421376, "grad_norm": 0.1474609375, "learning_rate": 0.000419433793582994, "loss": 0.5352, "step": 95780 }, { "epoch": 4.757623919737757, "grad_norm": 0.10888671875, "learning_rate": 0.00041939405979934437, "loss": 0.5043, "step": 95790 }, { "epoch": 4.758120592033376, "grad_norm": 0.12109375, "learning_rate": 0.00041935432601569484, "loss": 0.5119, "step": 95800 }, { "epoch": 4.758617264328995, "grad_norm": 0.12890625, "learning_rate": 0.00041931459223204536, "loss": 0.5162, "step": 95810 }, { "epoch": 4.759113936624615, "grad_norm": 0.10986328125, "learning_rate": 0.0004192748584483957, "loss": 0.5127, "step": 95820 }, { "epoch": 4.759610608920235, "grad_norm": 0.12451171875, "learning_rate": 0.0004192351246647462, "loss": 0.5209, "step": 95830 }, { "epoch": 4.760107281215854, "grad_norm": 0.1298828125, "learning_rate": 0.00041919539088109667, "loss": 0.5126, "step": 95840 }, { "epoch": 4.760603953511473, "grad_norm": 0.111328125, "learning_rate": 0.0004191556570974471, "loss": 0.4896, "step": 95850 }, { "epoch": 4.761100625807092, "grad_norm": 0.11083984375, "learning_rate": 0.00041911592331379756, "loss": 0.5022, "step": 95860 }, { "epoch": 4.7615972981027115, "grad_norm": 0.10888671875, "learning_rate": 0.00041907618953014803, "loss": 0.5168, "step": 95870 }, { "epoch": 4.762093970398331, "grad_norm": 0.1376953125, "learning_rate": 0.0004190364557464985, "loss": 0.5049, "step": 95880 }, { "epoch": 4.762590642693951, "grad_norm": 0.11279296875, "learning_rate": 0.0004189967219628489, "loss": 0.4866, "step": 95890 }, { "epoch": 4.76308731498957, "grad_norm": 0.1259765625, "learning_rate": 0.0004189569881791994, "loss": 0.5106, "step": 95900 }, { "epoch": 4.763583987285189, "grad_norm": 0.1044921875, "learning_rate": 0.00041891725439554986, "loss": 0.4877, "step": 95910 }, { "epoch": 4.7640806595808085, "grad_norm": 0.10400390625, "learning_rate": 0.0004188775206119003, "loss": 0.513, "step": 95920 }, { "epoch": 4.764577331876428, "grad_norm": 0.126953125, "learning_rate": 0.00041883778682825075, "loss": 0.4947, "step": 95930 }, { "epoch": 4.765074004172047, "grad_norm": 0.1591796875, "learning_rate": 0.0004187980530446012, "loss": 0.5392, "step": 95940 }, { "epoch": 4.765570676467666, "grad_norm": 0.154296875, "learning_rate": 0.00041875831926095164, "loss": 0.5238, "step": 95950 }, { "epoch": 4.766067348763286, "grad_norm": 0.107421875, "learning_rate": 0.0004187185854773021, "loss": 0.5083, "step": 95960 }, { "epoch": 4.7665640210589055, "grad_norm": 0.1552734375, "learning_rate": 0.0004186788516936526, "loss": 0.5545, "step": 95970 }, { "epoch": 4.767060693354525, "grad_norm": 0.11962890625, "learning_rate": 0.00041863911791000294, "loss": 0.5336, "step": 95980 }, { "epoch": 4.767557365650144, "grad_norm": 0.111328125, "learning_rate": 0.00041859938412635347, "loss": 0.5086, "step": 95990 }, { "epoch": 4.768054037945763, "grad_norm": 0.11328125, "learning_rate": 0.00041855965034270394, "loss": 0.5403, "step": 96000 }, { "epoch": 4.768550710241382, "grad_norm": 0.107421875, "learning_rate": 0.0004185199165590543, "loss": 0.5288, "step": 96010 }, { "epoch": 4.769047382537002, "grad_norm": 0.103515625, "learning_rate": 0.0004184801827754048, "loss": 0.5142, "step": 96020 }, { "epoch": 4.769544054832622, "grad_norm": 0.12255859375, "learning_rate": 0.0004184404489917553, "loss": 0.509, "step": 96030 }, { "epoch": 4.770040727128241, "grad_norm": 0.1298828125, "learning_rate": 0.00041840071520810577, "loss": 0.5234, "step": 96040 }, { "epoch": 4.77053739942386, "grad_norm": 0.11376953125, "learning_rate": 0.00041836098142445613, "loss": 0.522, "step": 96050 }, { "epoch": 4.7710340717194795, "grad_norm": 0.126953125, "learning_rate": 0.0004183212476408066, "loss": 0.5065, "step": 96060 }, { "epoch": 4.771530744015099, "grad_norm": 0.1279296875, "learning_rate": 0.0004182815138571571, "loss": 0.519, "step": 96070 }, { "epoch": 4.772027416310718, "grad_norm": 0.12890625, "learning_rate": 0.0004182417800735075, "loss": 0.5165, "step": 96080 }, { "epoch": 4.772524088606337, "grad_norm": 0.11669921875, "learning_rate": 0.00041820204628985797, "loss": 0.5168, "step": 96090 }, { "epoch": 4.773020760901957, "grad_norm": 0.09814453125, "learning_rate": 0.00041816231250620844, "loss": 0.4887, "step": 96100 }, { "epoch": 4.7735174331975765, "grad_norm": 0.130859375, "learning_rate": 0.00041812257872255885, "loss": 0.5135, "step": 96110 }, { "epoch": 4.774014105493196, "grad_norm": 0.1572265625, "learning_rate": 0.0004180828449389093, "loss": 0.4996, "step": 96120 }, { "epoch": 4.774510777788815, "grad_norm": 0.10546875, "learning_rate": 0.0004180431111552598, "loss": 0.5189, "step": 96130 }, { "epoch": 4.775007450084434, "grad_norm": 0.111328125, "learning_rate": 0.0004180033773716102, "loss": 0.5156, "step": 96140 }, { "epoch": 4.775504122380053, "grad_norm": 0.12890625, "learning_rate": 0.0004179636435879607, "loss": 0.5192, "step": 96150 }, { "epoch": 4.776000794675673, "grad_norm": 0.1552734375, "learning_rate": 0.00041792390980431116, "loss": 0.5164, "step": 96160 }, { "epoch": 4.776497466971293, "grad_norm": 0.109375, "learning_rate": 0.0004178841760206615, "loss": 0.5318, "step": 96170 }, { "epoch": 4.776994139266912, "grad_norm": 0.10888671875, "learning_rate": 0.00041784444223701204, "loss": 0.5487, "step": 96180 }, { "epoch": 4.777490811562531, "grad_norm": 0.1396484375, "learning_rate": 0.0004178047084533625, "loss": 0.553, "step": 96190 }, { "epoch": 4.77798748385815, "grad_norm": 0.1103515625, "learning_rate": 0.000417764974669713, "loss": 0.5055, "step": 96200 }, { "epoch": 4.77848415615377, "grad_norm": 0.12255859375, "learning_rate": 0.00041772524088606335, "loss": 0.4895, "step": 96210 }, { "epoch": 4.778980828449389, "grad_norm": 0.107421875, "learning_rate": 0.0004176855071024139, "loss": 0.5172, "step": 96220 }, { "epoch": 4.779477500745008, "grad_norm": 0.11376953125, "learning_rate": 0.00041764577331876435, "loss": 0.4994, "step": 96230 }, { "epoch": 4.779974173040628, "grad_norm": 0.126953125, "learning_rate": 0.0004176060395351147, "loss": 0.5072, "step": 96240 }, { "epoch": 4.780470845336247, "grad_norm": 0.11181640625, "learning_rate": 0.0004175663057514652, "loss": 0.5237, "step": 96250 }, { "epoch": 4.780967517631867, "grad_norm": 0.1396484375, "learning_rate": 0.0004175265719678157, "loss": 0.5339, "step": 96260 }, { "epoch": 4.781464189927486, "grad_norm": 0.1396484375, "learning_rate": 0.00041748683818416607, "loss": 0.5089, "step": 96270 }, { "epoch": 4.781960862223105, "grad_norm": 0.16796875, "learning_rate": 0.00041744710440051654, "loss": 0.4891, "step": 96280 }, { "epoch": 4.782457534518724, "grad_norm": 0.126953125, "learning_rate": 0.000417407370616867, "loss": 0.5031, "step": 96290 }, { "epoch": 4.782954206814344, "grad_norm": 0.123046875, "learning_rate": 0.00041736763683321743, "loss": 0.5108, "step": 96300 }, { "epoch": 4.783450879109964, "grad_norm": 0.1142578125, "learning_rate": 0.0004173279030495679, "loss": 0.5089, "step": 96310 }, { "epoch": 4.783947551405583, "grad_norm": 0.1123046875, "learning_rate": 0.00041728816926591837, "loss": 0.5291, "step": 96320 }, { "epoch": 4.784444223701202, "grad_norm": 0.1259765625, "learning_rate": 0.00041724843548226884, "loss": 0.51, "step": 96330 }, { "epoch": 4.784940895996821, "grad_norm": 0.197265625, "learning_rate": 0.00041720870169861926, "loss": 0.4878, "step": 96340 }, { "epoch": 4.785437568292441, "grad_norm": 0.12353515625, "learning_rate": 0.00041716896791496973, "loss": 0.5025, "step": 96350 }, { "epoch": 4.78593424058806, "grad_norm": 0.138671875, "learning_rate": 0.0004171292341313202, "loss": 0.5162, "step": 96360 }, { "epoch": 4.786430912883679, "grad_norm": 0.1416015625, "learning_rate": 0.0004170895003476706, "loss": 0.5003, "step": 96370 }, { "epoch": 4.786927585179299, "grad_norm": 0.12060546875, "learning_rate": 0.0004170497665640211, "loss": 0.5095, "step": 96380 }, { "epoch": 4.787424257474918, "grad_norm": 0.1328125, "learning_rate": 0.00041701003278037156, "loss": 0.4919, "step": 96390 }, { "epoch": 4.787920929770538, "grad_norm": 0.1630859375, "learning_rate": 0.0004169702989967219, "loss": 0.5275, "step": 96400 }, { "epoch": 4.788417602066157, "grad_norm": 0.1044921875, "learning_rate": 0.00041693056521307245, "loss": 0.5021, "step": 96410 }, { "epoch": 4.788914274361776, "grad_norm": 0.125, "learning_rate": 0.0004168908314294229, "loss": 0.5032, "step": 96420 }, { "epoch": 4.789410946657395, "grad_norm": 0.193359375, "learning_rate": 0.0004168510976457733, "loss": 0.519, "step": 96430 }, { "epoch": 4.7899076189530145, "grad_norm": 0.10400390625, "learning_rate": 0.00041681136386212376, "loss": 0.5152, "step": 96440 }, { "epoch": 4.790404291248635, "grad_norm": 0.11083984375, "learning_rate": 0.0004167716300784743, "loss": 0.5233, "step": 96450 }, { "epoch": 4.790900963544254, "grad_norm": 0.1484375, "learning_rate": 0.00041673189629482465, "loss": 0.4867, "step": 96460 }, { "epoch": 4.791397635839873, "grad_norm": 0.10791015625, "learning_rate": 0.0004166921625111751, "loss": 0.5125, "step": 96470 }, { "epoch": 4.791894308135492, "grad_norm": 0.1240234375, "learning_rate": 0.0004166524287275256, "loss": 0.5196, "step": 96480 }, { "epoch": 4.792390980431112, "grad_norm": 0.1328125, "learning_rate": 0.0004166126949438761, "loss": 0.5539, "step": 96490 }, { "epoch": 4.792887652726731, "grad_norm": 0.130859375, "learning_rate": 0.0004165729611602265, "loss": 0.5388, "step": 96500 }, { "epoch": 4.79338432502235, "grad_norm": 0.11865234375, "learning_rate": 0.00041653322737657695, "loss": 0.4887, "step": 96510 }, { "epoch": 4.793880997317969, "grad_norm": 0.1533203125, "learning_rate": 0.0004164934935929274, "loss": 0.495, "step": 96520 }, { "epoch": 4.7943776696135885, "grad_norm": 0.13671875, "learning_rate": 0.00041645375980927784, "loss": 0.4969, "step": 96530 }, { "epoch": 4.794874341909209, "grad_norm": 0.1357421875, "learning_rate": 0.0004164140260256283, "loss": 0.4896, "step": 96540 }, { "epoch": 4.795371014204828, "grad_norm": 0.11328125, "learning_rate": 0.0004163742922419788, "loss": 0.503, "step": 96550 }, { "epoch": 4.795867686500447, "grad_norm": 0.11279296875, "learning_rate": 0.0004163345584583292, "loss": 0.5325, "step": 96560 }, { "epoch": 4.796364358796066, "grad_norm": 0.1171875, "learning_rate": 0.00041629482467467967, "loss": 0.5173, "step": 96570 }, { "epoch": 4.7968610310916855, "grad_norm": 0.13671875, "learning_rate": 0.00041625509089103014, "loss": 0.5358, "step": 96580 }, { "epoch": 4.797357703387305, "grad_norm": 0.1044921875, "learning_rate": 0.00041621535710738056, "loss": 0.535, "step": 96590 }, { "epoch": 4.797854375682924, "grad_norm": 0.11865234375, "learning_rate": 0.00041617562332373103, "loss": 0.522, "step": 96600 }, { "epoch": 4.798351047978544, "grad_norm": 0.171875, "learning_rate": 0.0004161358895400815, "loss": 0.5228, "step": 96610 }, { "epoch": 4.798847720274163, "grad_norm": 0.11279296875, "learning_rate": 0.00041609615575643186, "loss": 0.507, "step": 96620 }, { "epoch": 4.7993443925697825, "grad_norm": 0.11376953125, "learning_rate": 0.0004160564219727824, "loss": 0.4998, "step": 96630 }, { "epoch": 4.799841064865402, "grad_norm": 0.109375, "learning_rate": 0.00041601668818913286, "loss": 0.5215, "step": 96640 }, { "epoch": 4.800337737161021, "grad_norm": 0.197265625, "learning_rate": 0.00041597695440548333, "loss": 0.5134, "step": 96650 }, { "epoch": 4.80083440945664, "grad_norm": 0.126953125, "learning_rate": 0.0004159372206218337, "loss": 0.5027, "step": 96660 }, { "epoch": 4.801331081752259, "grad_norm": 0.1376953125, "learning_rate": 0.00041589748683818417, "loss": 0.4889, "step": 96670 }, { "epoch": 4.8018277540478795, "grad_norm": 0.11962890625, "learning_rate": 0.0004158577530545347, "loss": 0.5308, "step": 96680 }, { "epoch": 4.802324426343499, "grad_norm": 0.1201171875, "learning_rate": 0.00041581801927088505, "loss": 0.5025, "step": 96690 }, { "epoch": 4.802821098639118, "grad_norm": 0.1181640625, "learning_rate": 0.0004157782854872355, "loss": 0.4896, "step": 96700 }, { "epoch": 4.803317770934737, "grad_norm": 0.1064453125, "learning_rate": 0.000415738551703586, "loss": 0.4873, "step": 96710 }, { "epoch": 4.8038144432303564, "grad_norm": 0.1474609375, "learning_rate": 0.0004156988179199364, "loss": 0.5143, "step": 96720 }, { "epoch": 4.804311115525976, "grad_norm": 0.10595703125, "learning_rate": 0.0004156590841362869, "loss": 0.5448, "step": 96730 }, { "epoch": 4.804807787821595, "grad_norm": 0.11669921875, "learning_rate": 0.00041561935035263736, "loss": 0.4952, "step": 96740 }, { "epoch": 4.805304460117215, "grad_norm": 0.1318359375, "learning_rate": 0.0004155796165689878, "loss": 0.5376, "step": 96750 }, { "epoch": 4.805801132412834, "grad_norm": 0.1611328125, "learning_rate": 0.00041553988278533825, "loss": 0.5119, "step": 96760 }, { "epoch": 4.8062978047084535, "grad_norm": 0.1728515625, "learning_rate": 0.0004155001490016887, "loss": 0.5141, "step": 96770 }, { "epoch": 4.806794477004073, "grad_norm": 0.0986328125, "learning_rate": 0.0004154604152180392, "loss": 0.5169, "step": 96780 }, { "epoch": 4.807291149299692, "grad_norm": 0.11376953125, "learning_rate": 0.0004154206814343896, "loss": 0.5102, "step": 96790 }, { "epoch": 4.807787821595311, "grad_norm": 0.11376953125, "learning_rate": 0.0004153809476507401, "loss": 0.5279, "step": 96800 }, { "epoch": 4.80828449389093, "grad_norm": 0.1240234375, "learning_rate": 0.00041534121386709055, "loss": 0.5039, "step": 96810 }, { "epoch": 4.8087811661865505, "grad_norm": 0.10205078125, "learning_rate": 0.00041530148008344096, "loss": 0.497, "step": 96820 }, { "epoch": 4.80927783848217, "grad_norm": 0.1201171875, "learning_rate": 0.00041526174629979144, "loss": 0.4898, "step": 96830 }, { "epoch": 4.809774510777789, "grad_norm": 0.11181640625, "learning_rate": 0.0004152220125161419, "loss": 0.5293, "step": 96840 }, { "epoch": 4.810271183073408, "grad_norm": 0.1279296875, "learning_rate": 0.00041518227873249227, "loss": 0.5282, "step": 96850 }, { "epoch": 4.810767855369027, "grad_norm": 0.11572265625, "learning_rate": 0.0004151425449488428, "loss": 0.5407, "step": 96860 }, { "epoch": 4.811264527664647, "grad_norm": 0.10400390625, "learning_rate": 0.00041510281116519327, "loss": 0.5026, "step": 96870 }, { "epoch": 4.811761199960266, "grad_norm": 0.12890625, "learning_rate": 0.00041506307738154363, "loss": 0.4928, "step": 96880 }, { "epoch": 4.812257872255886, "grad_norm": 0.0986328125, "learning_rate": 0.0004150233435978941, "loss": 0.4927, "step": 96890 }, { "epoch": 4.812754544551505, "grad_norm": 0.11572265625, "learning_rate": 0.00041498360981424463, "loss": 0.5254, "step": 96900 }, { "epoch": 4.813251216847124, "grad_norm": 0.1142578125, "learning_rate": 0.000414943876030595, "loss": 0.5341, "step": 96910 }, { "epoch": 4.813747889142744, "grad_norm": 0.115234375, "learning_rate": 0.00041490414224694546, "loss": 0.4761, "step": 96920 }, { "epoch": 4.814244561438363, "grad_norm": 0.0966796875, "learning_rate": 0.00041486440846329593, "loss": 0.4771, "step": 96930 }, { "epoch": 4.814741233733982, "grad_norm": 0.123046875, "learning_rate": 0.0004148246746796464, "loss": 0.5111, "step": 96940 }, { "epoch": 4.815237906029601, "grad_norm": 0.1103515625, "learning_rate": 0.0004147849408959968, "loss": 0.5093, "step": 96950 }, { "epoch": 4.8157345783252214, "grad_norm": 0.109375, "learning_rate": 0.0004147452071123473, "loss": 0.5041, "step": 96960 }, { "epoch": 4.816231250620841, "grad_norm": 0.11328125, "learning_rate": 0.00041470547332869776, "loss": 0.4918, "step": 96970 }, { "epoch": 4.81672792291646, "grad_norm": 0.125, "learning_rate": 0.0004146657395450482, "loss": 0.5107, "step": 96980 }, { "epoch": 4.817224595212079, "grad_norm": 0.1298828125, "learning_rate": 0.00041462600576139865, "loss": 0.501, "step": 96990 }, { "epoch": 4.817721267507698, "grad_norm": 0.1298828125, "learning_rate": 0.0004145862719777491, "loss": 0.5194, "step": 97000 }, { "epoch": 4.818217939803318, "grad_norm": 0.1123046875, "learning_rate": 0.00041454653819409954, "loss": 0.5162, "step": 97010 }, { "epoch": 4.818714612098937, "grad_norm": 0.11474609375, "learning_rate": 0.00041450680441045, "loss": 0.5299, "step": 97020 }, { "epoch": 4.819211284394557, "grad_norm": 0.119140625, "learning_rate": 0.0004144670706268005, "loss": 0.4721, "step": 97030 }, { "epoch": 4.819707956690176, "grad_norm": 0.1494140625, "learning_rate": 0.00041442733684315085, "loss": 0.5487, "step": 97040 }, { "epoch": 4.820204628985795, "grad_norm": 0.1630859375, "learning_rate": 0.00041438760305950137, "loss": 0.5348, "step": 97050 }, { "epoch": 4.820701301281415, "grad_norm": 0.138671875, "learning_rate": 0.00041434786927585184, "loss": 0.5042, "step": 97060 }, { "epoch": 4.821197973577034, "grad_norm": 0.10791015625, "learning_rate": 0.0004143081354922022, "loss": 0.4872, "step": 97070 }, { "epoch": 4.821694645872653, "grad_norm": 0.11767578125, "learning_rate": 0.0004142684017085527, "loss": 0.4811, "step": 97080 }, { "epoch": 4.822191318168272, "grad_norm": 0.10986328125, "learning_rate": 0.0004142286679249032, "loss": 0.4971, "step": 97090 }, { "epoch": 4.822687990463892, "grad_norm": 0.11962890625, "learning_rate": 0.0004141889341412537, "loss": 0.5381, "step": 97100 }, { "epoch": 4.823184662759512, "grad_norm": 0.1259765625, "learning_rate": 0.00041414920035760404, "loss": 0.5092, "step": 97110 }, { "epoch": 4.823681335055131, "grad_norm": 0.1181640625, "learning_rate": 0.0004141094665739545, "loss": 0.5213, "step": 97120 }, { "epoch": 4.82417800735075, "grad_norm": 0.1162109375, "learning_rate": 0.00041406973279030503, "loss": 0.5406, "step": 97130 }, { "epoch": 4.824674679646369, "grad_norm": 0.10400390625, "learning_rate": 0.0004140299990066554, "loss": 0.5084, "step": 97140 }, { "epoch": 4.8251713519419885, "grad_norm": 0.1689453125, "learning_rate": 0.00041399026522300587, "loss": 0.5019, "step": 97150 }, { "epoch": 4.825668024237608, "grad_norm": 0.12109375, "learning_rate": 0.00041395053143935634, "loss": 0.5396, "step": 97160 }, { "epoch": 4.826164696533227, "grad_norm": 0.10546875, "learning_rate": 0.00041391079765570676, "loss": 0.5518, "step": 97170 }, { "epoch": 4.826661368828847, "grad_norm": 0.10986328125, "learning_rate": 0.00041387106387205723, "loss": 0.4836, "step": 97180 }, { "epoch": 4.827158041124466, "grad_norm": 0.142578125, "learning_rate": 0.0004138313300884077, "loss": 0.5422, "step": 97190 }, { "epoch": 4.827654713420086, "grad_norm": 0.1591796875, "learning_rate": 0.0004137915963047581, "loss": 0.4921, "step": 97200 }, { "epoch": 4.828151385715705, "grad_norm": 0.142578125, "learning_rate": 0.0004137518625211086, "loss": 0.5346, "step": 97210 }, { "epoch": 4.828648058011324, "grad_norm": 0.1220703125, "learning_rate": 0.00041371212873745906, "loss": 0.4824, "step": 97220 }, { "epoch": 4.829144730306943, "grad_norm": 0.130859375, "learning_rate": 0.00041367239495380953, "loss": 0.4936, "step": 97230 }, { "epoch": 4.8296414026025625, "grad_norm": 0.119140625, "learning_rate": 0.00041363266117015995, "loss": 0.5038, "step": 97240 }, { "epoch": 4.830138074898182, "grad_norm": 0.1435546875, "learning_rate": 0.0004135929273865104, "loss": 0.5332, "step": 97250 }, { "epoch": 4.830634747193802, "grad_norm": 0.1279296875, "learning_rate": 0.0004135531936028609, "loss": 0.5166, "step": 97260 }, { "epoch": 4.831131419489421, "grad_norm": 0.1298828125, "learning_rate": 0.00041351345981921125, "loss": 0.5166, "step": 97270 }, { "epoch": 4.83162809178504, "grad_norm": 0.126953125, "learning_rate": 0.0004134737260355618, "loss": 0.495, "step": 97280 }, { "epoch": 4.8321247640806595, "grad_norm": 0.1181640625, "learning_rate": 0.00041343399225191225, "loss": 0.5042, "step": 97290 }, { "epoch": 4.832621436376279, "grad_norm": 0.10498046875, "learning_rate": 0.0004133942584682626, "loss": 0.5037, "step": 97300 }, { "epoch": 4.833118108671898, "grad_norm": 0.1337890625, "learning_rate": 0.0004133545246846131, "loss": 0.5008, "step": 97310 }, { "epoch": 4.833614780967517, "grad_norm": 0.10693359375, "learning_rate": 0.0004133147909009636, "loss": 0.5325, "step": 97320 }, { "epoch": 4.834111453263137, "grad_norm": 0.138671875, "learning_rate": 0.000413275057117314, "loss": 0.4981, "step": 97330 }, { "epoch": 4.8346081255587565, "grad_norm": 0.119140625, "learning_rate": 0.00041323532333366445, "loss": 0.5227, "step": 97340 }, { "epoch": 4.835104797854376, "grad_norm": 0.15234375, "learning_rate": 0.0004131955895500149, "loss": 0.5002, "step": 97350 }, { "epoch": 4.835601470149995, "grad_norm": 0.107421875, "learning_rate": 0.00041315585576636533, "loss": 0.5431, "step": 97360 }, { "epoch": 4.836098142445614, "grad_norm": 0.10595703125, "learning_rate": 0.0004131161219827158, "loss": 0.5112, "step": 97370 }, { "epoch": 4.836594814741233, "grad_norm": 0.11181640625, "learning_rate": 0.0004130763881990663, "loss": 0.5155, "step": 97380 }, { "epoch": 4.837091487036853, "grad_norm": 0.109375, "learning_rate": 0.00041303665441541675, "loss": 0.5001, "step": 97390 }, { "epoch": 4.837588159332473, "grad_norm": 0.10693359375, "learning_rate": 0.00041299692063176717, "loss": 0.499, "step": 97400 }, { "epoch": 4.838084831628092, "grad_norm": 0.134765625, "learning_rate": 0.00041295718684811764, "loss": 0.5312, "step": 97410 }, { "epoch": 4.838581503923711, "grad_norm": 0.16015625, "learning_rate": 0.0004129174530644681, "loss": 0.5056, "step": 97420 }, { "epoch": 4.8390781762193305, "grad_norm": 0.1494140625, "learning_rate": 0.0004128777192808185, "loss": 0.4915, "step": 97430 }, { "epoch": 4.83957484851495, "grad_norm": 0.1328125, "learning_rate": 0.000412837985497169, "loss": 0.504, "step": 97440 }, { "epoch": 4.840071520810569, "grad_norm": 0.11474609375, "learning_rate": 0.00041279825171351947, "loss": 0.5052, "step": 97450 }, { "epoch": 4.840568193106188, "grad_norm": 0.09912109375, "learning_rate": 0.0004127585179298699, "loss": 0.5274, "step": 97460 }, { "epoch": 4.841064865401808, "grad_norm": 0.11669921875, "learning_rate": 0.00041271878414622036, "loss": 0.4997, "step": 97470 }, { "epoch": 4.8415615376974275, "grad_norm": 0.11376953125, "learning_rate": 0.00041267905036257083, "loss": 0.4722, "step": 97480 }, { "epoch": 4.842058209993047, "grad_norm": 0.138671875, "learning_rate": 0.0004126393165789212, "loss": 0.5154, "step": 97490 }, { "epoch": 4.842554882288666, "grad_norm": 0.12109375, "learning_rate": 0.00041259958279527166, "loss": 0.4914, "step": 97500 }, { "epoch": 4.843051554584285, "grad_norm": 0.12158203125, "learning_rate": 0.0004125598490116222, "loss": 0.5148, "step": 97510 }, { "epoch": 4.843548226879904, "grad_norm": 0.1708984375, "learning_rate": 0.00041252011522797266, "loss": 0.5141, "step": 97520 }, { "epoch": 4.844044899175524, "grad_norm": 0.130859375, "learning_rate": 0.000412480381444323, "loss": 0.5172, "step": 97530 }, { "epoch": 4.844541571471144, "grad_norm": 0.1298828125, "learning_rate": 0.0004124406476606735, "loss": 0.5127, "step": 97540 }, { "epoch": 4.845038243766763, "grad_norm": 0.11865234375, "learning_rate": 0.000412400913877024, "loss": 0.4933, "step": 97550 }, { "epoch": 4.845534916062382, "grad_norm": 0.10595703125, "learning_rate": 0.0004123611800933744, "loss": 0.503, "step": 97560 }, { "epoch": 4.846031588358001, "grad_norm": 0.10400390625, "learning_rate": 0.00041232144630972485, "loss": 0.5205, "step": 97570 }, { "epoch": 4.846528260653621, "grad_norm": 0.1376953125, "learning_rate": 0.0004122817125260753, "loss": 0.5166, "step": 97580 }, { "epoch": 4.84702493294924, "grad_norm": 0.10888671875, "learning_rate": 0.00041224197874242574, "loss": 0.5364, "step": 97590 }, { "epoch": 4.847521605244859, "grad_norm": 0.1298828125, "learning_rate": 0.0004122022449587762, "loss": 0.5433, "step": 97600 }, { "epoch": 4.848018277540479, "grad_norm": 0.1162109375, "learning_rate": 0.0004121625111751267, "loss": 0.5206, "step": 97610 }, { "epoch": 4.848514949836098, "grad_norm": 0.1103515625, "learning_rate": 0.0004121227773914771, "loss": 0.5149, "step": 97620 }, { "epoch": 4.849011622131718, "grad_norm": 0.11083984375, "learning_rate": 0.00041208304360782757, "loss": 0.5024, "step": 97630 }, { "epoch": 4.849508294427337, "grad_norm": 0.119140625, "learning_rate": 0.00041204330982417804, "loss": 0.539, "step": 97640 }, { "epoch": 4.850004966722956, "grad_norm": 0.11669921875, "learning_rate": 0.00041200357604052846, "loss": 0.5152, "step": 97650 }, { "epoch": 4.850501639018575, "grad_norm": 0.17578125, "learning_rate": 0.00041196384225687893, "loss": 0.5044, "step": 97660 }, { "epoch": 4.850998311314195, "grad_norm": 0.1337890625, "learning_rate": 0.0004119241084732294, "loss": 0.4817, "step": 97670 }, { "epoch": 4.851494983609815, "grad_norm": 0.1240234375, "learning_rate": 0.0004118843746895799, "loss": 0.5401, "step": 97680 }, { "epoch": 4.851991655905434, "grad_norm": 0.10986328125, "learning_rate": 0.0004118446409059303, "loss": 0.5415, "step": 97690 }, { "epoch": 4.852488328201053, "grad_norm": 0.12158203125, "learning_rate": 0.00041180490712228076, "loss": 0.5361, "step": 97700 }, { "epoch": 4.852985000496672, "grad_norm": 0.119140625, "learning_rate": 0.00041176517333863123, "loss": 0.5263, "step": 97710 }, { "epoch": 4.853481672792292, "grad_norm": 0.126953125, "learning_rate": 0.0004117254395549816, "loss": 0.5104, "step": 97720 }, { "epoch": 4.853978345087911, "grad_norm": 0.11572265625, "learning_rate": 0.0004116857057713321, "loss": 0.5278, "step": 97730 }, { "epoch": 4.85447501738353, "grad_norm": 0.11669921875, "learning_rate": 0.0004116459719876826, "loss": 0.5389, "step": 97740 }, { "epoch": 4.85497168967915, "grad_norm": 0.12060546875, "learning_rate": 0.00041160623820403296, "loss": 0.5303, "step": 97750 }, { "epoch": 4.855468361974769, "grad_norm": 0.11962890625, "learning_rate": 0.00041156650442038343, "loss": 0.5347, "step": 97760 }, { "epoch": 4.855965034270389, "grad_norm": 0.1474609375, "learning_rate": 0.0004115267706367339, "loss": 0.5075, "step": 97770 }, { "epoch": 4.856461706566008, "grad_norm": 0.10205078125, "learning_rate": 0.0004114870368530843, "loss": 0.5044, "step": 97780 }, { "epoch": 4.856958378861627, "grad_norm": 0.1064453125, "learning_rate": 0.0004114473030694348, "loss": 0.5152, "step": 97790 }, { "epoch": 4.857455051157246, "grad_norm": 0.1435546875, "learning_rate": 0.00041140756928578526, "loss": 0.4994, "step": 97800 }, { "epoch": 4.8579517234528655, "grad_norm": 0.1513671875, "learning_rate": 0.0004113678355021357, "loss": 0.5332, "step": 97810 }, { "epoch": 4.858448395748486, "grad_norm": 0.1123046875, "learning_rate": 0.00041132810171848615, "loss": 0.5236, "step": 97820 }, { "epoch": 4.858945068044105, "grad_norm": 0.11181640625, "learning_rate": 0.0004112883679348366, "loss": 0.5387, "step": 97830 }, { "epoch": 4.859441740339724, "grad_norm": 0.1318359375, "learning_rate": 0.0004112486341511871, "loss": 0.4905, "step": 97840 }, { "epoch": 4.859938412635343, "grad_norm": 0.126953125, "learning_rate": 0.0004112089003675375, "loss": 0.4935, "step": 97850 }, { "epoch": 4.8604350849309625, "grad_norm": 0.1611328125, "learning_rate": 0.000411169166583888, "loss": 0.5209, "step": 97860 }, { "epoch": 4.860931757226582, "grad_norm": 0.1357421875, "learning_rate": 0.00041112943280023845, "loss": 0.5148, "step": 97870 }, { "epoch": 4.861428429522201, "grad_norm": 0.1298828125, "learning_rate": 0.00041108969901658887, "loss": 0.4997, "step": 97880 }, { "epoch": 4.86192510181782, "grad_norm": 0.1337890625, "learning_rate": 0.00041104996523293934, "loss": 0.5335, "step": 97890 }, { "epoch": 4.8624217741134395, "grad_norm": 0.119140625, "learning_rate": 0.0004110102314492898, "loss": 0.5021, "step": 97900 }, { "epoch": 4.86291844640906, "grad_norm": 0.1123046875, "learning_rate": 0.0004109704976656402, "loss": 0.521, "step": 97910 }, { "epoch": 4.863415118704679, "grad_norm": 0.1220703125, "learning_rate": 0.0004109307638819907, "loss": 0.522, "step": 97920 }, { "epoch": 4.863911791000298, "grad_norm": 0.10986328125, "learning_rate": 0.00041089103009834117, "loss": 0.5152, "step": 97930 }, { "epoch": 4.864408463295917, "grad_norm": 0.150390625, "learning_rate": 0.00041085129631469153, "loss": 0.4814, "step": 97940 }, { "epoch": 4.8649051355915365, "grad_norm": 0.11767578125, "learning_rate": 0.000410811562531042, "loss": 0.5029, "step": 97950 }, { "epoch": 4.865401807887156, "grad_norm": 0.103515625, "learning_rate": 0.00041077182874739253, "loss": 0.4984, "step": 97960 }, { "epoch": 4.865898480182775, "grad_norm": 0.11279296875, "learning_rate": 0.000410732094963743, "loss": 0.5218, "step": 97970 }, { "epoch": 4.866395152478395, "grad_norm": 0.1240234375, "learning_rate": 0.00041069236118009337, "loss": 0.5101, "step": 97980 }, { "epoch": 4.866891824774014, "grad_norm": 0.1396484375, "learning_rate": 0.00041065262739644384, "loss": 0.5146, "step": 97990 }, { "epoch": 4.8673884970696335, "grad_norm": 0.1220703125, "learning_rate": 0.00041061289361279436, "loss": 0.5067, "step": 98000 }, { "epoch": 4.867885169365253, "grad_norm": 0.1337890625, "learning_rate": 0.0004105731598291447, "loss": 0.4766, "step": 98010 }, { "epoch": 4.868381841660872, "grad_norm": 0.10986328125, "learning_rate": 0.0004105334260454952, "loss": 0.4903, "step": 98020 }, { "epoch": 4.868878513956491, "grad_norm": 0.12109375, "learning_rate": 0.00041049369226184567, "loss": 0.5307, "step": 98030 }, { "epoch": 4.86937518625211, "grad_norm": 0.140625, "learning_rate": 0.0004104539584781961, "loss": 0.5246, "step": 98040 }, { "epoch": 4.8698718585477305, "grad_norm": 0.119140625, "learning_rate": 0.00041041422469454656, "loss": 0.5032, "step": 98050 }, { "epoch": 4.87036853084335, "grad_norm": 0.1103515625, "learning_rate": 0.00041037449091089703, "loss": 0.5249, "step": 98060 }, { "epoch": 4.870865203138969, "grad_norm": 0.11328125, "learning_rate": 0.00041033475712724744, "loss": 0.5255, "step": 98070 }, { "epoch": 4.871361875434588, "grad_norm": 0.1083984375, "learning_rate": 0.0004102950233435979, "loss": 0.5082, "step": 98080 }, { "epoch": 4.871858547730207, "grad_norm": 0.2138671875, "learning_rate": 0.0004102552895599484, "loss": 0.5224, "step": 98090 }, { "epoch": 4.872355220025827, "grad_norm": 0.1728515625, "learning_rate": 0.00041021555577629875, "loss": 0.5227, "step": 98100 }, { "epoch": 4.872851892321446, "grad_norm": 0.12451171875, "learning_rate": 0.0004101758219926493, "loss": 0.5188, "step": 98110 }, { "epoch": 4.873348564617066, "grad_norm": 0.126953125, "learning_rate": 0.00041013608820899975, "loss": 0.4935, "step": 98120 }, { "epoch": 4.873845236912685, "grad_norm": 0.11767578125, "learning_rate": 0.0004100963544253502, "loss": 0.5272, "step": 98130 }, { "epoch": 4.8743419092083045, "grad_norm": 0.1171875, "learning_rate": 0.0004100566206417006, "loss": 0.5161, "step": 98140 }, { "epoch": 4.874838581503924, "grad_norm": 0.154296875, "learning_rate": 0.0004100168868580511, "loss": 0.4965, "step": 98150 }, { "epoch": 4.875335253799543, "grad_norm": 0.12158203125, "learning_rate": 0.0004099771530744016, "loss": 0.4862, "step": 98160 }, { "epoch": 4.875831926095162, "grad_norm": 0.11376953125, "learning_rate": 0.00040993741929075194, "loss": 0.5286, "step": 98170 }, { "epoch": 4.876328598390781, "grad_norm": 0.10986328125, "learning_rate": 0.0004098976855071024, "loss": 0.5334, "step": 98180 }, { "epoch": 4.8768252706864015, "grad_norm": 0.107421875, "learning_rate": 0.00040985795172345294, "loss": 0.5077, "step": 98190 }, { "epoch": 4.877321942982021, "grad_norm": 0.126953125, "learning_rate": 0.0004098182179398033, "loss": 0.5194, "step": 98200 }, { "epoch": 4.87781861527764, "grad_norm": 0.12255859375, "learning_rate": 0.00040977848415615377, "loss": 0.5228, "step": 98210 }, { "epoch": 4.878315287573259, "grad_norm": 0.12890625, "learning_rate": 0.00040973875037250424, "loss": 0.5258, "step": 98220 }, { "epoch": 4.878811959868878, "grad_norm": 0.11279296875, "learning_rate": 0.00040969901658885466, "loss": 0.5157, "step": 98230 }, { "epoch": 4.879308632164498, "grad_norm": 0.130859375, "learning_rate": 0.00040965928280520513, "loss": 0.5134, "step": 98240 }, { "epoch": 4.879805304460117, "grad_norm": 0.1650390625, "learning_rate": 0.0004096195490215556, "loss": 0.5122, "step": 98250 }, { "epoch": 4.880301976755737, "grad_norm": 0.12353515625, "learning_rate": 0.000409579815237906, "loss": 0.5279, "step": 98260 }, { "epoch": 4.880798649051356, "grad_norm": 0.142578125, "learning_rate": 0.0004095400814542565, "loss": 0.5175, "step": 98270 }, { "epoch": 4.881295321346975, "grad_norm": 0.12353515625, "learning_rate": 0.00040950034767060696, "loss": 0.5035, "step": 98280 }, { "epoch": 4.881791993642595, "grad_norm": 0.11376953125, "learning_rate": 0.00040946061388695744, "loss": 0.4978, "step": 98290 }, { "epoch": 4.882288665938214, "grad_norm": 0.1083984375, "learning_rate": 0.00040942088010330785, "loss": 0.4894, "step": 98300 }, { "epoch": 4.882785338233833, "grad_norm": 0.10595703125, "learning_rate": 0.0004093811463196583, "loss": 0.5125, "step": 98310 }, { "epoch": 4.883282010529452, "grad_norm": 0.12890625, "learning_rate": 0.0004093414125360088, "loss": 0.5366, "step": 98320 }, { "epoch": 4.883778682825072, "grad_norm": 0.15625, "learning_rate": 0.0004093016787523592, "loss": 0.509, "step": 98330 }, { "epoch": 4.884275355120692, "grad_norm": 0.130859375, "learning_rate": 0.0004092619449687097, "loss": 0.5094, "step": 98340 }, { "epoch": 4.884772027416311, "grad_norm": 0.1181640625, "learning_rate": 0.00040922221118506015, "loss": 0.4979, "step": 98350 }, { "epoch": 4.88526869971193, "grad_norm": 0.1083984375, "learning_rate": 0.0004091824774014105, "loss": 0.4915, "step": 98360 }, { "epoch": 4.885765372007549, "grad_norm": 0.10693359375, "learning_rate": 0.000409142743617761, "loss": 0.5128, "step": 98370 }, { "epoch": 4.886262044303169, "grad_norm": 0.150390625, "learning_rate": 0.0004091030098341115, "loss": 0.5414, "step": 98380 }, { "epoch": 4.886758716598788, "grad_norm": 0.119140625, "learning_rate": 0.0004090632760504619, "loss": 0.4982, "step": 98390 }, { "epoch": 4.887255388894408, "grad_norm": 0.11865234375, "learning_rate": 0.00040902354226681235, "loss": 0.4913, "step": 98400 }, { "epoch": 4.887752061190027, "grad_norm": 0.1630859375, "learning_rate": 0.0004089838084831628, "loss": 0.5041, "step": 98410 }, { "epoch": 4.888248733485646, "grad_norm": 0.140625, "learning_rate": 0.00040894407469951335, "loss": 0.5105, "step": 98420 }, { "epoch": 4.888745405781266, "grad_norm": 0.119140625, "learning_rate": 0.0004089043409158637, "loss": 0.5313, "step": 98430 }, { "epoch": 4.889242078076885, "grad_norm": 0.1455078125, "learning_rate": 0.0004088646071322142, "loss": 0.5135, "step": 98440 }, { "epoch": 4.889738750372504, "grad_norm": 0.12255859375, "learning_rate": 0.00040882487334856465, "loss": 0.5178, "step": 98450 }, { "epoch": 4.890235422668123, "grad_norm": 0.11083984375, "learning_rate": 0.00040878513956491507, "loss": 0.5356, "step": 98460 }, { "epoch": 4.890732094963743, "grad_norm": 0.10498046875, "learning_rate": 0.00040874540578126554, "loss": 0.5044, "step": 98470 }, { "epoch": 4.891228767259363, "grad_norm": 0.1875, "learning_rate": 0.000408705671997616, "loss": 0.495, "step": 98480 }, { "epoch": 4.891725439554982, "grad_norm": 0.1259765625, "learning_rate": 0.00040866593821396643, "loss": 0.5322, "step": 98490 }, { "epoch": 4.892222111850601, "grad_norm": 0.1123046875, "learning_rate": 0.0004086262044303169, "loss": 0.5052, "step": 98500 }, { "epoch": 4.89271878414622, "grad_norm": 0.1328125, "learning_rate": 0.00040858647064666737, "loss": 0.4894, "step": 98510 }, { "epoch": 4.8932154564418395, "grad_norm": 0.16015625, "learning_rate": 0.0004085467368630178, "loss": 0.5148, "step": 98520 }, { "epoch": 4.893712128737459, "grad_norm": 0.1357421875, "learning_rate": 0.00040850700307936826, "loss": 0.5268, "step": 98530 }, { "epoch": 4.894208801033079, "grad_norm": 0.158203125, "learning_rate": 0.00040846726929571873, "loss": 0.4975, "step": 98540 }, { "epoch": 4.894705473328698, "grad_norm": 0.11376953125, "learning_rate": 0.0004084275355120691, "loss": 0.4836, "step": 98550 }, { "epoch": 4.895202145624317, "grad_norm": 0.125, "learning_rate": 0.0004083878017284196, "loss": 0.5274, "step": 98560 }, { "epoch": 4.8956988179199366, "grad_norm": 0.1357421875, "learning_rate": 0.0004083480679447701, "loss": 0.52, "step": 98570 }, { "epoch": 4.896195490215556, "grad_norm": 0.12255859375, "learning_rate": 0.00040830833416112056, "loss": 0.5171, "step": 98580 }, { "epoch": 4.896692162511175, "grad_norm": 0.11865234375, "learning_rate": 0.0004082686003774709, "loss": 0.5075, "step": 98590 }, { "epoch": 4.897188834806794, "grad_norm": 0.123046875, "learning_rate": 0.00040822886659382145, "loss": 0.5407, "step": 98600 }, { "epoch": 4.8976855071024135, "grad_norm": 0.103515625, "learning_rate": 0.0004081891328101719, "loss": 0.5008, "step": 98610 }, { "epoch": 4.898182179398033, "grad_norm": 0.1484375, "learning_rate": 0.0004081493990265223, "loss": 0.5087, "step": 98620 }, { "epoch": 4.898678851693653, "grad_norm": 0.11669921875, "learning_rate": 0.00040810966524287276, "loss": 0.52, "step": 98630 }, { "epoch": 4.899175523989272, "grad_norm": 0.10986328125, "learning_rate": 0.00040806993145922323, "loss": 0.5035, "step": 98640 }, { "epoch": 4.899672196284891, "grad_norm": 0.15625, "learning_rate": 0.00040803019767557365, "loss": 0.5082, "step": 98650 }, { "epoch": 4.9001688685805105, "grad_norm": 0.11474609375, "learning_rate": 0.0004079904638919241, "loss": 0.4911, "step": 98660 }, { "epoch": 4.90066554087613, "grad_norm": 0.1357421875, "learning_rate": 0.0004079507301082746, "loss": 0.5165, "step": 98670 }, { "epoch": 4.901162213171749, "grad_norm": 0.10693359375, "learning_rate": 0.000407910996324625, "loss": 0.5235, "step": 98680 }, { "epoch": 4.901658885467368, "grad_norm": 0.24609375, "learning_rate": 0.0004078712625409755, "loss": 0.5162, "step": 98690 }, { "epoch": 4.902155557762988, "grad_norm": 0.1611328125, "learning_rate": 0.00040783152875732595, "loss": 0.5158, "step": 98700 }, { "epoch": 4.9026522300586075, "grad_norm": 0.1162109375, "learning_rate": 0.00040779179497367636, "loss": 0.5105, "step": 98710 }, { "epoch": 4.903148902354227, "grad_norm": 0.1171875, "learning_rate": 0.00040775206119002684, "loss": 0.4942, "step": 98720 }, { "epoch": 4.903645574649846, "grad_norm": 0.12060546875, "learning_rate": 0.0004077123274063773, "loss": 0.5166, "step": 98730 }, { "epoch": 4.904142246945465, "grad_norm": 0.1318359375, "learning_rate": 0.0004076725936227278, "loss": 0.5246, "step": 98740 }, { "epoch": 4.904638919241084, "grad_norm": 0.11572265625, "learning_rate": 0.0004076328598390782, "loss": 0.5428, "step": 98750 }, { "epoch": 4.905135591536704, "grad_norm": 0.10693359375, "learning_rate": 0.00040759312605542867, "loss": 0.5142, "step": 98760 }, { "epoch": 4.905632263832324, "grad_norm": 0.1328125, "learning_rate": 0.00040755339227177914, "loss": 0.5362, "step": 98770 }, { "epoch": 4.906128936127943, "grad_norm": 0.1083984375, "learning_rate": 0.0004075136584881295, "loss": 0.5124, "step": 98780 }, { "epoch": 4.906625608423562, "grad_norm": 0.10791015625, "learning_rate": 0.00040747392470448003, "loss": 0.5158, "step": 98790 }, { "epoch": 4.907122280719181, "grad_norm": 0.162109375, "learning_rate": 0.0004074341909208305, "loss": 0.4925, "step": 98800 }, { "epoch": 4.907618953014801, "grad_norm": 0.1318359375, "learning_rate": 0.00040739445713718086, "loss": 0.5157, "step": 98810 }, { "epoch": 4.90811562531042, "grad_norm": 0.10498046875, "learning_rate": 0.00040735472335353133, "loss": 0.4888, "step": 98820 }, { "epoch": 4.908612297606039, "grad_norm": 0.1865234375, "learning_rate": 0.00040731498956988186, "loss": 0.5231, "step": 98830 }, { "epoch": 4.909108969901659, "grad_norm": 0.11181640625, "learning_rate": 0.0004072752557862322, "loss": 0.503, "step": 98840 }, { "epoch": 4.9096056421972785, "grad_norm": 0.1064453125, "learning_rate": 0.0004072355220025827, "loss": 0.4851, "step": 98850 }, { "epoch": 4.910102314492898, "grad_norm": 0.1318359375, "learning_rate": 0.00040719578821893316, "loss": 0.5166, "step": 98860 }, { "epoch": 4.910598986788517, "grad_norm": 0.11279296875, "learning_rate": 0.0004071560544352837, "loss": 0.51, "step": 98870 }, { "epoch": 4.911095659084136, "grad_norm": 0.1318359375, "learning_rate": 0.00040711632065163405, "loss": 0.5411, "step": 98880 }, { "epoch": 4.911592331379755, "grad_norm": 0.1220703125, "learning_rate": 0.0004070765868679845, "loss": 0.4917, "step": 98890 }, { "epoch": 4.912089003675375, "grad_norm": 0.107421875, "learning_rate": 0.000407036853084335, "loss": 0.5179, "step": 98900 }, { "epoch": 4.912585675970995, "grad_norm": 0.1103515625, "learning_rate": 0.0004069971193006854, "loss": 0.5128, "step": 98910 }, { "epoch": 4.913082348266614, "grad_norm": 0.1298828125, "learning_rate": 0.0004069573855170359, "loss": 0.508, "step": 98920 }, { "epoch": 4.913579020562233, "grad_norm": 0.1298828125, "learning_rate": 0.00040691765173338636, "loss": 0.5038, "step": 98930 }, { "epoch": 4.914075692857852, "grad_norm": 0.146484375, "learning_rate": 0.00040687791794973677, "loss": 0.5337, "step": 98940 }, { "epoch": 4.914572365153472, "grad_norm": 0.10107421875, "learning_rate": 0.00040683818416608724, "loss": 0.4889, "step": 98950 }, { "epoch": 4.915069037449091, "grad_norm": 0.189453125, "learning_rate": 0.0004067984503824377, "loss": 0.5286, "step": 98960 }, { "epoch": 4.91556570974471, "grad_norm": 0.11669921875, "learning_rate": 0.0004067587165987881, "loss": 0.5046, "step": 98970 }, { "epoch": 4.91606238204033, "grad_norm": 0.134765625, "learning_rate": 0.0004067189828151386, "loss": 0.4965, "step": 98980 }, { "epoch": 4.916559054335949, "grad_norm": 0.107421875, "learning_rate": 0.0004066792490314891, "loss": 0.5457, "step": 98990 }, { "epoch": 4.917055726631569, "grad_norm": 0.11083984375, "learning_rate": 0.00040663951524783944, "loss": 0.5051, "step": 99000 }, { "epoch": 4.917552398927188, "grad_norm": 0.11376953125, "learning_rate": 0.0004065997814641899, "loss": 0.4822, "step": 99010 }, { "epoch": 4.918049071222807, "grad_norm": 0.115234375, "learning_rate": 0.00040656004768054043, "loss": 0.5285, "step": 99020 }, { "epoch": 4.918545743518426, "grad_norm": 0.11669921875, "learning_rate": 0.0004065203138968909, "loss": 0.4931, "step": 99030 }, { "epoch": 4.9190424158140456, "grad_norm": 0.154296875, "learning_rate": 0.00040648058011324127, "loss": 0.5481, "step": 99040 }, { "epoch": 4.919539088109666, "grad_norm": 0.11279296875, "learning_rate": 0.00040644084632959174, "loss": 0.5057, "step": 99050 }, { "epoch": 4.920035760405285, "grad_norm": 0.1279296875, "learning_rate": 0.00040640111254594227, "loss": 0.4832, "step": 99060 }, { "epoch": 4.920532432700904, "grad_norm": 0.11962890625, "learning_rate": 0.00040636137876229263, "loss": 0.5402, "step": 99070 }, { "epoch": 4.921029104996523, "grad_norm": 0.10400390625, "learning_rate": 0.0004063216449786431, "loss": 0.5001, "step": 99080 }, { "epoch": 4.921525777292143, "grad_norm": 0.1728515625, "learning_rate": 0.00040628191119499357, "loss": 0.5159, "step": 99090 }, { "epoch": 4.922022449587762, "grad_norm": 0.12060546875, "learning_rate": 0.000406242177411344, "loss": 0.5343, "step": 99100 }, { "epoch": 4.922519121883381, "grad_norm": 0.11962890625, "learning_rate": 0.00040620244362769446, "loss": 0.5164, "step": 99110 }, { "epoch": 4.923015794179001, "grad_norm": 0.10205078125, "learning_rate": 0.00040616270984404493, "loss": 0.498, "step": 99120 }, { "epoch": 4.92351246647462, "grad_norm": 0.11962890625, "learning_rate": 0.00040612297606039535, "loss": 0.5175, "step": 99130 }, { "epoch": 4.92400913877024, "grad_norm": 0.12890625, "learning_rate": 0.0004060832422767458, "loss": 0.5302, "step": 99140 }, { "epoch": 4.924505811065859, "grad_norm": 0.1357421875, "learning_rate": 0.0004060435084930963, "loss": 0.5007, "step": 99150 }, { "epoch": 4.925002483361478, "grad_norm": 0.10498046875, "learning_rate": 0.0004060037747094467, "loss": 0.5229, "step": 99160 }, { "epoch": 4.925499155657097, "grad_norm": 0.1220703125, "learning_rate": 0.0004059640409257972, "loss": 0.519, "step": 99170 }, { "epoch": 4.9259958279527165, "grad_norm": 0.11181640625, "learning_rate": 0.00040592430714214765, "loss": 0.5088, "step": 99180 }, { "epoch": 4.926492500248337, "grad_norm": 0.11083984375, "learning_rate": 0.0004058845733584981, "loss": 0.5215, "step": 99190 }, { "epoch": 4.926989172543956, "grad_norm": 0.1064453125, "learning_rate": 0.0004058448395748485, "loss": 0.5158, "step": 99200 }, { "epoch": 4.927485844839575, "grad_norm": 0.12060546875, "learning_rate": 0.000405805105791199, "loss": 0.5514, "step": 99210 }, { "epoch": 4.927982517135194, "grad_norm": 0.12060546875, "learning_rate": 0.0004057653720075495, "loss": 0.5251, "step": 99220 }, { "epoch": 4.9284791894308135, "grad_norm": 0.1015625, "learning_rate": 0.00040572563822389985, "loss": 0.4987, "step": 99230 }, { "epoch": 4.928975861726433, "grad_norm": 0.15234375, "learning_rate": 0.0004056859044402503, "loss": 0.495, "step": 99240 }, { "epoch": 4.929472534022052, "grad_norm": 0.1318359375, "learning_rate": 0.00040564617065660084, "loss": 0.5459, "step": 99250 }, { "epoch": 4.929969206317671, "grad_norm": 0.11083984375, "learning_rate": 0.0004056064368729512, "loss": 0.5065, "step": 99260 }, { "epoch": 4.930465878613291, "grad_norm": 0.125, "learning_rate": 0.0004055667030893017, "loss": 0.5229, "step": 99270 }, { "epoch": 4.930962550908911, "grad_norm": 0.1259765625, "learning_rate": 0.00040552696930565215, "loss": 0.4875, "step": 99280 }, { "epoch": 4.93145922320453, "grad_norm": 0.11572265625, "learning_rate": 0.00040548723552200256, "loss": 0.5139, "step": 99290 }, { "epoch": 4.931955895500149, "grad_norm": 0.10986328125, "learning_rate": 0.00040544750173835304, "loss": 0.528, "step": 99300 }, { "epoch": 4.932452567795768, "grad_norm": 0.10693359375, "learning_rate": 0.0004054077679547035, "loss": 0.5157, "step": 99310 }, { "epoch": 4.9329492400913875, "grad_norm": 0.12890625, "learning_rate": 0.000405368034171054, "loss": 0.5011, "step": 99320 }, { "epoch": 4.933445912387007, "grad_norm": 0.12890625, "learning_rate": 0.0004053283003874044, "loss": 0.4929, "step": 99330 }, { "epoch": 4.933942584682626, "grad_norm": 0.12451171875, "learning_rate": 0.00040528856660375487, "loss": 0.4835, "step": 99340 }, { "epoch": 4.934439256978246, "grad_norm": 0.119140625, "learning_rate": 0.00040524883282010534, "loss": 0.5361, "step": 99350 }, { "epoch": 4.934935929273865, "grad_norm": 0.193359375, "learning_rate": 0.00040520909903645576, "loss": 0.5291, "step": 99360 }, { "epoch": 4.9354326015694845, "grad_norm": 0.11669921875, "learning_rate": 0.00040516936525280623, "loss": 0.5118, "step": 99370 }, { "epoch": 4.935929273865104, "grad_norm": 0.1796875, "learning_rate": 0.0004051296314691567, "loss": 0.4971, "step": 99380 }, { "epoch": 4.936425946160723, "grad_norm": 0.12353515625, "learning_rate": 0.0004050898976855071, "loss": 0.4903, "step": 99390 }, { "epoch": 4.936922618456342, "grad_norm": 0.1669921875, "learning_rate": 0.0004050501639018576, "loss": 0.5122, "step": 99400 }, { "epoch": 4.937419290751961, "grad_norm": 0.11083984375, "learning_rate": 0.00040501043011820806, "loss": 0.4923, "step": 99410 }, { "epoch": 4.9379159630475815, "grad_norm": 0.1318359375, "learning_rate": 0.0004049706963345584, "loss": 0.5291, "step": 99420 }, { "epoch": 4.938412635343201, "grad_norm": 0.12060546875, "learning_rate": 0.00040493096255090895, "loss": 0.5176, "step": 99430 }, { "epoch": 4.93890930763882, "grad_norm": 0.12158203125, "learning_rate": 0.0004048912287672594, "loss": 0.5011, "step": 99440 }, { "epoch": 4.939405979934439, "grad_norm": 0.1435546875, "learning_rate": 0.0004048514949836098, "loss": 0.501, "step": 99450 }, { "epoch": 4.939902652230058, "grad_norm": 0.119140625, "learning_rate": 0.00040481176119996025, "loss": 0.5367, "step": 99460 }, { "epoch": 4.940399324525678, "grad_norm": 0.1259765625, "learning_rate": 0.0004047720274163107, "loss": 0.5023, "step": 99470 }, { "epoch": 4.940895996821297, "grad_norm": 0.140625, "learning_rate": 0.00040473229363266125, "loss": 0.5122, "step": 99480 }, { "epoch": 4.941392669116917, "grad_norm": 0.10595703125, "learning_rate": 0.0004046925598490116, "loss": 0.5038, "step": 99490 }, { "epoch": 4.941889341412536, "grad_norm": 0.12060546875, "learning_rate": 0.0004046528260653621, "loss": 0.5243, "step": 99500 }, { "epoch": 4.9423860137081554, "grad_norm": 0.123046875, "learning_rate": 0.00040461309228171256, "loss": 0.5047, "step": 99510 }, { "epoch": 4.942882686003775, "grad_norm": 0.12158203125, "learning_rate": 0.00040457335849806297, "loss": 0.5128, "step": 99520 }, { "epoch": 4.943379358299394, "grad_norm": 0.10791015625, "learning_rate": 0.00040453362471441344, "loss": 0.5099, "step": 99530 }, { "epoch": 4.943876030595013, "grad_norm": 0.12109375, "learning_rate": 0.0004044938909307639, "loss": 0.5317, "step": 99540 }, { "epoch": 4.944372702890632, "grad_norm": 0.1298828125, "learning_rate": 0.00040445415714711433, "loss": 0.5239, "step": 99550 }, { "epoch": 4.9448693751862525, "grad_norm": 0.10791015625, "learning_rate": 0.0004044144233634648, "loss": 0.5218, "step": 99560 }, { "epoch": 4.945366047481872, "grad_norm": 0.12158203125, "learning_rate": 0.0004043746895798153, "loss": 0.5068, "step": 99570 }, { "epoch": 4.945862719777491, "grad_norm": 0.1611328125, "learning_rate": 0.0004043349557961657, "loss": 0.5081, "step": 99580 }, { "epoch": 4.94635939207311, "grad_norm": 0.138671875, "learning_rate": 0.00040429522201251616, "loss": 0.4895, "step": 99590 }, { "epoch": 4.946856064368729, "grad_norm": 0.1357421875, "learning_rate": 0.00040425548822886663, "loss": 0.5348, "step": 99600 }, { "epoch": 4.947352736664349, "grad_norm": 0.1220703125, "learning_rate": 0.000404215754445217, "loss": 0.5263, "step": 99610 }, { "epoch": 4.947849408959968, "grad_norm": 0.13671875, "learning_rate": 0.0004041760206615675, "loss": 0.5113, "step": 99620 }, { "epoch": 4.948346081255588, "grad_norm": 0.1083984375, "learning_rate": 0.000404136286877918, "loss": 0.4971, "step": 99630 }, { "epoch": 4.948842753551207, "grad_norm": 0.11376953125, "learning_rate": 0.00040409655309426847, "loss": 0.5496, "step": 99640 }, { "epoch": 4.949339425846826, "grad_norm": 0.115234375, "learning_rate": 0.00040405681931061883, "loss": 0.4924, "step": 99650 }, { "epoch": 4.949836098142446, "grad_norm": 0.10400390625, "learning_rate": 0.00040401708552696935, "loss": 0.4995, "step": 99660 }, { "epoch": 4.950332770438065, "grad_norm": 0.1044921875, "learning_rate": 0.0004039773517433198, "loss": 0.5248, "step": 99670 }, { "epoch": 4.950829442733684, "grad_norm": 0.107421875, "learning_rate": 0.0004039376179596702, "loss": 0.5132, "step": 99680 }, { "epoch": 4.951326115029303, "grad_norm": 0.11376953125, "learning_rate": 0.00040389788417602066, "loss": 0.5077, "step": 99690 }, { "epoch": 4.951822787324923, "grad_norm": 0.1162109375, "learning_rate": 0.0004038581503923712, "loss": 0.4941, "step": 99700 }, { "epoch": 4.952319459620543, "grad_norm": 0.146484375, "learning_rate": 0.00040381841660872155, "loss": 0.5123, "step": 99710 }, { "epoch": 4.952816131916162, "grad_norm": 0.12109375, "learning_rate": 0.000403778682825072, "loss": 0.4875, "step": 99720 }, { "epoch": 4.953312804211781, "grad_norm": 0.16015625, "learning_rate": 0.0004037389490414225, "loss": 0.5222, "step": 99730 }, { "epoch": 4.9538094765074, "grad_norm": 0.1298828125, "learning_rate": 0.0004036992152577729, "loss": 0.487, "step": 99740 }, { "epoch": 4.95430614880302, "grad_norm": 0.1044921875, "learning_rate": 0.0004036594814741234, "loss": 0.512, "step": 99750 }, { "epoch": 4.954802821098639, "grad_norm": 0.1201171875, "learning_rate": 0.00040361974769047385, "loss": 0.5148, "step": 99760 }, { "epoch": 4.955299493394259, "grad_norm": 0.10791015625, "learning_rate": 0.0004035800139068243, "loss": 0.4891, "step": 99770 }, { "epoch": 4.955796165689878, "grad_norm": 0.109375, "learning_rate": 0.00040354028012317474, "loss": 0.5063, "step": 99780 }, { "epoch": 4.956292837985497, "grad_norm": 0.1455078125, "learning_rate": 0.0004035005463395252, "loss": 0.5276, "step": 99790 }, { "epoch": 4.956789510281117, "grad_norm": 0.11181640625, "learning_rate": 0.0004034608125558757, "loss": 0.5103, "step": 99800 }, { "epoch": 4.957286182576736, "grad_norm": 0.11181640625, "learning_rate": 0.0004034210787722261, "loss": 0.5315, "step": 99810 }, { "epoch": 4.957782854872355, "grad_norm": 0.111328125, "learning_rate": 0.00040338134498857657, "loss": 0.4878, "step": 99820 }, { "epoch": 4.958279527167974, "grad_norm": 0.12109375, "learning_rate": 0.00040334161120492704, "loss": 0.4885, "step": 99830 }, { "epoch": 4.958776199463594, "grad_norm": 0.1240234375, "learning_rate": 0.0004033018774212774, "loss": 0.5389, "step": 99840 }, { "epoch": 4.959272871759214, "grad_norm": 0.11767578125, "learning_rate": 0.00040326214363762793, "loss": 0.5157, "step": 99850 }, { "epoch": 4.959769544054833, "grad_norm": 0.1123046875, "learning_rate": 0.0004032224098539784, "loss": 0.4998, "step": 99860 }, { "epoch": 4.960266216350452, "grad_norm": 0.1142578125, "learning_rate": 0.00040318267607032877, "loss": 0.4896, "step": 99870 }, { "epoch": 4.960762888646071, "grad_norm": 0.09619140625, "learning_rate": 0.00040314294228667924, "loss": 0.5059, "step": 99880 }, { "epoch": 4.9612595609416905, "grad_norm": 0.1328125, "learning_rate": 0.00040310320850302976, "loss": 0.5358, "step": 99890 }, { "epoch": 4.96175623323731, "grad_norm": 0.130859375, "learning_rate": 0.0004030634747193801, "loss": 0.5355, "step": 99900 }, { "epoch": 4.96225290553293, "grad_norm": 0.12890625, "learning_rate": 0.0004030237409357306, "loss": 0.5232, "step": 99910 }, { "epoch": 4.962749577828549, "grad_norm": 0.130859375, "learning_rate": 0.00040298400715208107, "loss": 0.5263, "step": 99920 }, { "epoch": 4.963246250124168, "grad_norm": 0.12158203125, "learning_rate": 0.0004029442733684316, "loss": 0.5206, "step": 99930 }, { "epoch": 4.9637429224197875, "grad_norm": 0.13671875, "learning_rate": 0.00040290453958478196, "loss": 0.546, "step": 99940 }, { "epoch": 4.964239594715407, "grad_norm": 0.1015625, "learning_rate": 0.00040286480580113243, "loss": 0.5026, "step": 99950 }, { "epoch": 4.964736267011026, "grad_norm": 0.1416015625, "learning_rate": 0.0004028250720174829, "loss": 0.5343, "step": 99960 }, { "epoch": 4.965232939306645, "grad_norm": 0.1611328125, "learning_rate": 0.0004027853382338333, "loss": 0.498, "step": 99970 }, { "epoch": 4.9657296116022644, "grad_norm": 0.111328125, "learning_rate": 0.0004027456044501838, "loss": 0.498, "step": 99980 }, { "epoch": 4.966226283897885, "grad_norm": 0.1064453125, "learning_rate": 0.00040270587066653426, "loss": 0.5245, "step": 99990 }, { "epoch": 4.966722956193504, "grad_norm": 0.1279296875, "learning_rate": 0.0004026661368828847, "loss": 0.5091, "step": 100000 }, { "epoch": 4.967219628489123, "grad_norm": 0.1103515625, "learning_rate": 0.00040262640309923515, "loss": 0.4981, "step": 100010 }, { "epoch": 4.967716300784742, "grad_norm": 0.13671875, "learning_rate": 0.0004025866693155856, "loss": 0.5341, "step": 100020 }, { "epoch": 4.9682129730803615, "grad_norm": 0.11767578125, "learning_rate": 0.00040254693553193604, "loss": 0.5405, "step": 100030 }, { "epoch": 4.968709645375981, "grad_norm": 0.140625, "learning_rate": 0.0004025072017482865, "loss": 0.4873, "step": 100040 }, { "epoch": 4.9692063176716, "grad_norm": 0.140625, "learning_rate": 0.000402467467964637, "loss": 0.5229, "step": 100050 }, { "epoch": 4.969702989967219, "grad_norm": 0.1474609375, "learning_rate": 0.00040242773418098745, "loss": 0.5282, "step": 100060 }, { "epoch": 4.970199662262839, "grad_norm": 0.1123046875, "learning_rate": 0.0004023880003973378, "loss": 0.5119, "step": 100070 }, { "epoch": 4.9706963345584585, "grad_norm": 0.1142578125, "learning_rate": 0.00040234826661368834, "loss": 0.5049, "step": 100080 }, { "epoch": 4.971193006854078, "grad_norm": 0.1533203125, "learning_rate": 0.0004023085328300388, "loss": 0.5449, "step": 100090 }, { "epoch": 4.971689679149697, "grad_norm": 0.1533203125, "learning_rate": 0.00040226879904638917, "loss": 0.5154, "step": 100100 }, { "epoch": 4.972186351445316, "grad_norm": 0.10693359375, "learning_rate": 0.00040222906526273964, "loss": 0.5116, "step": 100110 }, { "epoch": 4.972683023740935, "grad_norm": 0.1328125, "learning_rate": 0.00040218933147909017, "loss": 0.5156, "step": 100120 }, { "epoch": 4.973179696036555, "grad_norm": 0.1474609375, "learning_rate": 0.00040214959769544053, "loss": 0.517, "step": 100130 }, { "epoch": 4.973676368332175, "grad_norm": 0.1455078125, "learning_rate": 0.000402109863911791, "loss": 0.4893, "step": 100140 }, { "epoch": 4.974173040627794, "grad_norm": 0.1103515625, "learning_rate": 0.0004020701301281415, "loss": 0.5071, "step": 100150 }, { "epoch": 4.974669712923413, "grad_norm": 0.119140625, "learning_rate": 0.0004020303963444919, "loss": 0.5004, "step": 100160 }, { "epoch": 4.975166385219032, "grad_norm": 0.1298828125, "learning_rate": 0.00040199066256084236, "loss": 0.538, "step": 100170 }, { "epoch": 4.975663057514652, "grad_norm": 0.19140625, "learning_rate": 0.00040195092877719284, "loss": 0.523, "step": 100180 }, { "epoch": 4.976159729810271, "grad_norm": 0.10546875, "learning_rate": 0.00040191119499354325, "loss": 0.5077, "step": 100190 }, { "epoch": 4.97665640210589, "grad_norm": 0.11572265625, "learning_rate": 0.0004018714612098937, "loss": 0.5341, "step": 100200 }, { "epoch": 4.97715307440151, "grad_norm": 0.1474609375, "learning_rate": 0.0004018317274262442, "loss": 0.5223, "step": 100210 }, { "epoch": 4.9776497466971295, "grad_norm": 0.111328125, "learning_rate": 0.00040179199364259467, "loss": 0.4992, "step": 100220 }, { "epoch": 4.978146418992749, "grad_norm": 0.1572265625, "learning_rate": 0.0004017522598589451, "loss": 0.5209, "step": 100230 }, { "epoch": 4.978643091288368, "grad_norm": 0.1220703125, "learning_rate": 0.00040171252607529555, "loss": 0.5051, "step": 100240 }, { "epoch": 4.979139763583987, "grad_norm": 0.12255859375, "learning_rate": 0.000401672792291646, "loss": 0.5268, "step": 100250 }, { "epoch": 4.979636435879606, "grad_norm": 0.11669921875, "learning_rate": 0.00040163305850799644, "loss": 0.5149, "step": 100260 }, { "epoch": 4.980133108175226, "grad_norm": 0.11376953125, "learning_rate": 0.0004015933247243469, "loss": 0.5173, "step": 100270 }, { "epoch": 4.980629780470846, "grad_norm": 0.1669921875, "learning_rate": 0.0004015535909406974, "loss": 0.4917, "step": 100280 }, { "epoch": 4.981126452766465, "grad_norm": 0.17578125, "learning_rate": 0.00040151385715704775, "loss": 0.5374, "step": 100290 }, { "epoch": 4.981623125062084, "grad_norm": 0.1005859375, "learning_rate": 0.0004014741233733983, "loss": 0.5289, "step": 100300 }, { "epoch": 4.982119797357703, "grad_norm": 0.130859375, "learning_rate": 0.00040143438958974875, "loss": 0.5264, "step": 100310 }, { "epoch": 4.982616469653323, "grad_norm": 0.11767578125, "learning_rate": 0.0004013946558060991, "loss": 0.5166, "step": 100320 }, { "epoch": 4.983113141948942, "grad_norm": 0.1708984375, "learning_rate": 0.0004013549220224496, "loss": 0.5035, "step": 100330 }, { "epoch": 4.983609814244561, "grad_norm": 0.111328125, "learning_rate": 0.00040131518823880005, "loss": 0.5512, "step": 100340 }, { "epoch": 4.984106486540181, "grad_norm": 0.130859375, "learning_rate": 0.00040127545445515047, "loss": 0.5229, "step": 100350 }, { "epoch": 4.9846031588358, "grad_norm": 0.123046875, "learning_rate": 0.00040123572067150094, "loss": 0.5159, "step": 100360 }, { "epoch": 4.98509983113142, "grad_norm": 0.12158203125, "learning_rate": 0.0004011959868878514, "loss": 0.5254, "step": 100370 }, { "epoch": 4.985596503427039, "grad_norm": 0.115234375, "learning_rate": 0.0004011562531042019, "loss": 0.5119, "step": 100380 }, { "epoch": 4.986093175722658, "grad_norm": 0.12890625, "learning_rate": 0.0004011165193205523, "loss": 0.5259, "step": 100390 }, { "epoch": 4.986589848018277, "grad_norm": 0.14453125, "learning_rate": 0.00040107678553690277, "loss": 0.5211, "step": 100400 }, { "epoch": 4.9870865203138965, "grad_norm": 0.12353515625, "learning_rate": 0.00040103705175325324, "loss": 0.5141, "step": 100410 }, { "epoch": 4.987583192609517, "grad_norm": 0.12255859375, "learning_rate": 0.00040099731796960366, "loss": 0.5036, "step": 100420 }, { "epoch": 4.988079864905136, "grad_norm": 0.146484375, "learning_rate": 0.00040095758418595413, "loss": 0.5237, "step": 100430 }, { "epoch": 4.988576537200755, "grad_norm": 0.138671875, "learning_rate": 0.0004009178504023046, "loss": 0.4908, "step": 100440 }, { "epoch": 4.989073209496374, "grad_norm": 0.11474609375, "learning_rate": 0.000400878116618655, "loss": 0.5049, "step": 100450 }, { "epoch": 4.989569881791994, "grad_norm": 0.11279296875, "learning_rate": 0.0004008383828350055, "loss": 0.5167, "step": 100460 }, { "epoch": 4.990066554087613, "grad_norm": 0.1220703125, "learning_rate": 0.00040079864905135596, "loss": 0.4888, "step": 100470 }, { "epoch": 4.990563226383232, "grad_norm": 0.1416015625, "learning_rate": 0.0004007589152677063, "loss": 0.5351, "step": 100480 }, { "epoch": 4.991059898678852, "grad_norm": 0.10400390625, "learning_rate": 0.00040071918148405685, "loss": 0.5331, "step": 100490 }, { "epoch": 4.991556570974471, "grad_norm": 0.111328125, "learning_rate": 0.0004006794477004073, "loss": 0.5051, "step": 100500 }, { "epoch": 4.992053243270091, "grad_norm": 0.12158203125, "learning_rate": 0.0004006397139167578, "loss": 0.5323, "step": 100510 }, { "epoch": 4.99254991556571, "grad_norm": 0.1142578125, "learning_rate": 0.00040059998013310816, "loss": 0.5355, "step": 100520 }, { "epoch": 4.993046587861329, "grad_norm": 0.1318359375, "learning_rate": 0.0004005602463494587, "loss": 0.5263, "step": 100530 }, { "epoch": 4.993543260156948, "grad_norm": 0.12451171875, "learning_rate": 0.00040052051256580915, "loss": 0.5215, "step": 100540 }, { "epoch": 4.9940399324525675, "grad_norm": 0.11767578125, "learning_rate": 0.0004004807787821595, "loss": 0.5193, "step": 100550 }, { "epoch": 4.994536604748188, "grad_norm": 0.1533203125, "learning_rate": 0.00040044104499851, "loss": 0.4774, "step": 100560 }, { "epoch": 4.995033277043807, "grad_norm": 0.1318359375, "learning_rate": 0.00040040131121486046, "loss": 0.5122, "step": 100570 }, { "epoch": 4.995529949339426, "grad_norm": 0.11083984375, "learning_rate": 0.0004003615774312109, "loss": 0.5145, "step": 100580 }, { "epoch": 4.996026621635045, "grad_norm": 0.1484375, "learning_rate": 0.00040032184364756135, "loss": 0.5119, "step": 100590 }, { "epoch": 4.9965232939306645, "grad_norm": 0.109375, "learning_rate": 0.0004002821098639118, "loss": 0.5173, "step": 100600 }, { "epoch": 4.997019966226284, "grad_norm": 0.11376953125, "learning_rate": 0.00040024237608026224, "loss": 0.5329, "step": 100610 }, { "epoch": 4.997516638521903, "grad_norm": 0.13671875, "learning_rate": 0.0004002026422966127, "loss": 0.5277, "step": 100620 }, { "epoch": 4.998013310817523, "grad_norm": 0.1806640625, "learning_rate": 0.0004001629085129632, "loss": 0.5278, "step": 100630 }, { "epoch": 4.998509983113142, "grad_norm": 0.138671875, "learning_rate": 0.0004001231747293136, "loss": 0.514, "step": 100640 }, { "epoch": 4.9990066554087615, "grad_norm": 0.11767578125, "learning_rate": 0.00040008344094566407, "loss": 0.51, "step": 100650 }, { "epoch": 4.999503327704381, "grad_norm": 0.1162109375, "learning_rate": 0.00040004370716201454, "loss": 0.5158, "step": 100660 }, { "epoch": 5.0, "grad_norm": 0.107421875, "learning_rate": 0.000400003973378365, "loss": 0.5036, "step": 100670 }, { "epoch": 5.000496672295619, "grad_norm": 0.12890625, "learning_rate": 0.00039996423959471543, "loss": 0.5218, "step": 100680 }, { "epoch": 5.0009933445912385, "grad_norm": 0.1103515625, "learning_rate": 0.0003999245058110659, "loss": 0.5116, "step": 100690 }, { "epoch": 5.001490016886858, "grad_norm": 0.1201171875, "learning_rate": 0.0003998847720274163, "loss": 0.4877, "step": 100700 }, { "epoch": 5.001986689182478, "grad_norm": 0.1318359375, "learning_rate": 0.0003998450382437668, "loss": 0.5158, "step": 100710 }, { "epoch": 5.002483361478097, "grad_norm": 0.15234375, "learning_rate": 0.00039980530446011726, "loss": 0.5278, "step": 100720 }, { "epoch": 5.002980033773716, "grad_norm": 0.11083984375, "learning_rate": 0.0003997655706764677, "loss": 0.5015, "step": 100730 }, { "epoch": 5.0034767060693355, "grad_norm": 0.11669921875, "learning_rate": 0.00039972583689281815, "loss": 0.4608, "step": 100740 }, { "epoch": 5.003973378364955, "grad_norm": 0.1201171875, "learning_rate": 0.00039968610310916856, "loss": 0.5155, "step": 100750 }, { "epoch": 5.004470050660574, "grad_norm": 0.13671875, "learning_rate": 0.00039964636932551904, "loss": 0.5058, "step": 100760 }, { "epoch": 5.004966722956193, "grad_norm": 0.109375, "learning_rate": 0.0003996066355418695, "loss": 0.4725, "step": 100770 }, { "epoch": 5.005463395251813, "grad_norm": 0.123046875, "learning_rate": 0.0003995669017582199, "loss": 0.4929, "step": 100780 }, { "epoch": 5.0059600675474325, "grad_norm": 0.1240234375, "learning_rate": 0.0003995271679745704, "loss": 0.5012, "step": 100790 }, { "epoch": 5.006456739843052, "grad_norm": 0.107421875, "learning_rate": 0.00039948743419092087, "loss": 0.4765, "step": 100800 }, { "epoch": 5.006953412138671, "grad_norm": 0.162109375, "learning_rate": 0.0003994477004072713, "loss": 0.4979, "step": 100810 }, { "epoch": 5.00745008443429, "grad_norm": 0.12060546875, "learning_rate": 0.00039940796662362176, "loss": 0.5149, "step": 100820 }, { "epoch": 5.007946756729909, "grad_norm": 0.12109375, "learning_rate": 0.0003993682328399722, "loss": 0.4895, "step": 100830 }, { "epoch": 5.008443429025529, "grad_norm": 0.11669921875, "learning_rate": 0.00039932849905632264, "loss": 0.5264, "step": 100840 }, { "epoch": 5.008940101321148, "grad_norm": 0.11181640625, "learning_rate": 0.0003992887652726731, "loss": 0.4954, "step": 100850 }, { "epoch": 5.009436773616768, "grad_norm": 0.1689453125, "learning_rate": 0.00039924903148902353, "loss": 0.5156, "step": 100860 }, { "epoch": 5.009933445912387, "grad_norm": 0.10546875, "learning_rate": 0.000399209297705374, "loss": 0.4886, "step": 100870 }, { "epoch": 5.010430118208006, "grad_norm": 0.13671875, "learning_rate": 0.0003991695639217245, "loss": 0.4991, "step": 100880 }, { "epoch": 5.010926790503626, "grad_norm": 0.130859375, "learning_rate": 0.0003991298301380749, "loss": 0.4754, "step": 100890 }, { "epoch": 5.011423462799245, "grad_norm": 0.1162109375, "learning_rate": 0.00039909009635442536, "loss": 0.5036, "step": 100900 }, { "epoch": 5.011920135094864, "grad_norm": 0.1123046875, "learning_rate": 0.00039905036257077583, "loss": 0.5072, "step": 100910 }, { "epoch": 5.012416807390483, "grad_norm": 0.146484375, "learning_rate": 0.0003990106287871263, "loss": 0.5208, "step": 100920 }, { "epoch": 5.0129134796861035, "grad_norm": 0.1259765625, "learning_rate": 0.0003989708950034767, "loss": 0.5063, "step": 100930 }, { "epoch": 5.013410151981723, "grad_norm": 0.12060546875, "learning_rate": 0.00039893116121982714, "loss": 0.5126, "step": 100940 }, { "epoch": 5.013906824277342, "grad_norm": 0.1162109375, "learning_rate": 0.00039889142743617767, "loss": 0.501, "step": 100950 }, { "epoch": 5.014403496572961, "grad_norm": 0.11328125, "learning_rate": 0.0003988516936525281, "loss": 0.5127, "step": 100960 }, { "epoch": 5.01490016886858, "grad_norm": 0.1240234375, "learning_rate": 0.0003988119598688785, "loss": 0.4887, "step": 100970 }, { "epoch": 5.0153968411642, "grad_norm": 0.1201171875, "learning_rate": 0.00039877222608522897, "loss": 0.5193, "step": 100980 }, { "epoch": 5.015893513459819, "grad_norm": 0.11572265625, "learning_rate": 0.00039873249230157944, "loss": 0.5054, "step": 100990 }, { "epoch": 5.016390185755439, "grad_norm": 0.1552734375, "learning_rate": 0.0003986927585179299, "loss": 0.4664, "step": 101000 }, { "epoch": 5.016886858051058, "grad_norm": 0.1767578125, "learning_rate": 0.00039865302473428033, "loss": 0.4959, "step": 101010 }, { "epoch": 5.017383530346677, "grad_norm": 0.115234375, "learning_rate": 0.0003986132909506308, "loss": 0.5126, "step": 101020 }, { "epoch": 5.017880202642297, "grad_norm": 0.11474609375, "learning_rate": 0.0003985735571669813, "loss": 0.4981, "step": 101030 }, { "epoch": 5.018376874937916, "grad_norm": 0.10693359375, "learning_rate": 0.0003985338233833317, "loss": 0.5043, "step": 101040 }, { "epoch": 5.018873547233535, "grad_norm": 0.123046875, "learning_rate": 0.0003984940895996821, "loss": 0.5113, "step": 101050 }, { "epoch": 5.019370219529154, "grad_norm": 0.134765625, "learning_rate": 0.00039845435581603263, "loss": 0.5359, "step": 101060 }, { "epoch": 5.019866891824774, "grad_norm": 0.123046875, "learning_rate": 0.00039841462203238305, "loss": 0.4985, "step": 101070 }, { "epoch": 5.020363564120394, "grad_norm": 0.12158203125, "learning_rate": 0.0003983748882487335, "loss": 0.4846, "step": 101080 }, { "epoch": 5.020860236416013, "grad_norm": 0.14453125, "learning_rate": 0.00039833515446508394, "loss": 0.5227, "step": 101090 }, { "epoch": 5.021356908711632, "grad_norm": 0.11328125, "learning_rate": 0.0003982954206814344, "loss": 0.5161, "step": 101100 }, { "epoch": 5.021853581007251, "grad_norm": 0.12255859375, "learning_rate": 0.0003982556868977849, "loss": 0.4944, "step": 101110 }, { "epoch": 5.0223502533028705, "grad_norm": 0.13671875, "learning_rate": 0.0003982159531141353, "loss": 0.5032, "step": 101120 }, { "epoch": 5.02284692559849, "grad_norm": 0.1435546875, "learning_rate": 0.00039817621933048577, "loss": 0.5054, "step": 101130 }, { "epoch": 5.02334359789411, "grad_norm": 0.134765625, "learning_rate": 0.00039813648554683624, "loss": 0.5094, "step": 101140 }, { "epoch": 5.023840270189729, "grad_norm": 0.12353515625, "learning_rate": 0.00039809675176318666, "loss": 0.4775, "step": 101150 }, { "epoch": 5.024336942485348, "grad_norm": 0.1552734375, "learning_rate": 0.00039805701797953713, "loss": 0.4934, "step": 101160 }, { "epoch": 5.024833614780968, "grad_norm": 0.107421875, "learning_rate": 0.00039801728419588755, "loss": 0.5035, "step": 101170 }, { "epoch": 5.025330287076587, "grad_norm": 0.1220703125, "learning_rate": 0.000397977550412238, "loss": 0.5133, "step": 101180 }, { "epoch": 5.025826959372206, "grad_norm": 0.1142578125, "learning_rate": 0.0003979378166285885, "loss": 0.5045, "step": 101190 }, { "epoch": 5.026323631667825, "grad_norm": 0.154296875, "learning_rate": 0.0003978980828449389, "loss": 0.495, "step": 101200 }, { "epoch": 5.0268203039634445, "grad_norm": 0.125, "learning_rate": 0.0003978583490612894, "loss": 0.4998, "step": 101210 }, { "epoch": 5.027316976259065, "grad_norm": 0.10986328125, "learning_rate": 0.00039781861527763985, "loss": 0.478, "step": 101220 }, { "epoch": 5.027813648554684, "grad_norm": 0.11181640625, "learning_rate": 0.00039777888149399027, "loss": 0.4937, "step": 101230 }, { "epoch": 5.028310320850303, "grad_norm": 0.11962890625, "learning_rate": 0.00039773914771034074, "loss": 0.4847, "step": 101240 }, { "epoch": 5.028806993145922, "grad_norm": 0.1162109375, "learning_rate": 0.0003976994139266912, "loss": 0.5081, "step": 101250 }, { "epoch": 5.0293036654415415, "grad_norm": 0.126953125, "learning_rate": 0.00039765968014304163, "loss": 0.5119, "step": 101260 }, { "epoch": 5.029800337737161, "grad_norm": 0.10498046875, "learning_rate": 0.0003976199463593921, "loss": 0.5127, "step": 101270 }, { "epoch": 5.03029701003278, "grad_norm": 0.1171875, "learning_rate": 0.0003975802125757425, "loss": 0.4919, "step": 101280 }, { "epoch": 5.0307936823284, "grad_norm": 0.1123046875, "learning_rate": 0.000397540478792093, "loss": 0.5063, "step": 101290 }, { "epoch": 5.031290354624019, "grad_norm": 0.1748046875, "learning_rate": 0.00039750074500844346, "loss": 0.4855, "step": 101300 }, { "epoch": 5.0317870269196385, "grad_norm": 0.1201171875, "learning_rate": 0.0003974610112247939, "loss": 0.4819, "step": 101310 }, { "epoch": 5.032283699215258, "grad_norm": 0.1298828125, "learning_rate": 0.00039742127744114435, "loss": 0.4999, "step": 101320 }, { "epoch": 5.032780371510877, "grad_norm": 0.1181640625, "learning_rate": 0.0003973815436574948, "loss": 0.4966, "step": 101330 }, { "epoch": 5.033277043806496, "grad_norm": 0.1259765625, "learning_rate": 0.00039734180987384524, "loss": 0.529, "step": 101340 }, { "epoch": 5.033773716102115, "grad_norm": 0.11328125, "learning_rate": 0.0003973020760901957, "loss": 0.4916, "step": 101350 }, { "epoch": 5.0342703883977356, "grad_norm": 0.111328125, "learning_rate": 0.0003972623423065462, "loss": 0.4746, "step": 101360 }, { "epoch": 5.034767060693355, "grad_norm": 0.1494140625, "learning_rate": 0.00039722260852289665, "loss": 0.5153, "step": 101370 }, { "epoch": 5.035263732988974, "grad_norm": 0.11328125, "learning_rate": 0.00039718287473924707, "loss": 0.4996, "step": 101380 }, { "epoch": 5.035760405284593, "grad_norm": 0.1259765625, "learning_rate": 0.0003971431409555975, "loss": 0.5083, "step": 101390 }, { "epoch": 5.0362570775802125, "grad_norm": 0.138671875, "learning_rate": 0.000397103407171948, "loss": 0.4963, "step": 101400 }, { "epoch": 5.036753749875832, "grad_norm": 0.1240234375, "learning_rate": 0.0003970636733882984, "loss": 0.5013, "step": 101410 }, { "epoch": 5.037250422171451, "grad_norm": 0.11328125, "learning_rate": 0.00039702393960464884, "loss": 0.4922, "step": 101420 }, { "epoch": 5.037747094467071, "grad_norm": 0.099609375, "learning_rate": 0.0003969842058209993, "loss": 0.4955, "step": 101430 }, { "epoch": 5.03824376676269, "grad_norm": 0.115234375, "learning_rate": 0.0003969444720373498, "loss": 0.4976, "step": 101440 }, { "epoch": 5.0387404390583095, "grad_norm": 0.1435546875, "learning_rate": 0.00039690473825370026, "loss": 0.5043, "step": 101450 }, { "epoch": 5.039237111353929, "grad_norm": 0.11279296875, "learning_rate": 0.0003968650044700507, "loss": 0.4751, "step": 101460 }, { "epoch": 5.039733783649548, "grad_norm": 0.115234375, "learning_rate": 0.0003968252706864011, "loss": 0.4963, "step": 101470 }, { "epoch": 5.040230455945167, "grad_norm": 0.1435546875, "learning_rate": 0.0003967855369027516, "loss": 0.5233, "step": 101480 }, { "epoch": 5.040727128240786, "grad_norm": 0.140625, "learning_rate": 0.00039674580311910203, "loss": 0.5207, "step": 101490 }, { "epoch": 5.0412238005364065, "grad_norm": 0.1083984375, "learning_rate": 0.00039670606933545245, "loss": 0.4913, "step": 101500 }, { "epoch": 5.041720472832026, "grad_norm": 0.107421875, "learning_rate": 0.0003966663355518029, "loss": 0.5108, "step": 101510 }, { "epoch": 5.042217145127645, "grad_norm": 0.1337890625, "learning_rate": 0.0003966266017681534, "loss": 0.5051, "step": 101520 }, { "epoch": 5.042713817423264, "grad_norm": 0.1298828125, "learning_rate": 0.00039658686798450387, "loss": 0.5184, "step": 101530 }, { "epoch": 5.043210489718883, "grad_norm": 0.10986328125, "learning_rate": 0.0003965471342008543, "loss": 0.4665, "step": 101540 }, { "epoch": 5.043707162014503, "grad_norm": 0.1435546875, "learning_rate": 0.00039650740041720475, "loss": 0.522, "step": 101550 }, { "epoch": 5.044203834310122, "grad_norm": 0.12255859375, "learning_rate": 0.0003964676666335552, "loss": 0.4723, "step": 101560 }, { "epoch": 5.044700506605741, "grad_norm": 0.1328125, "learning_rate": 0.00039642793284990564, "loss": 0.4845, "step": 101570 }, { "epoch": 5.045197178901361, "grad_norm": 0.158203125, "learning_rate": 0.00039638819906625606, "loss": 0.4704, "step": 101580 }, { "epoch": 5.04569385119698, "grad_norm": 0.1259765625, "learning_rate": 0.0003963484652826066, "loss": 0.5048, "step": 101590 }, { "epoch": 5.0461905234926, "grad_norm": 0.10986328125, "learning_rate": 0.000396308731498957, "loss": 0.5225, "step": 101600 }, { "epoch": 5.046687195788219, "grad_norm": 0.11669921875, "learning_rate": 0.0003962689977153075, "loss": 0.5385, "step": 101610 }, { "epoch": 5.047183868083838, "grad_norm": 0.11767578125, "learning_rate": 0.0003962292639316579, "loss": 0.4829, "step": 101620 }, { "epoch": 5.047680540379457, "grad_norm": 0.111328125, "learning_rate": 0.00039618953014800836, "loss": 0.4772, "step": 101630 }, { "epoch": 5.048177212675077, "grad_norm": 0.1494140625, "learning_rate": 0.00039614979636435883, "loss": 0.5052, "step": 101640 }, { "epoch": 5.048673884970697, "grad_norm": 0.1083984375, "learning_rate": 0.00039611006258070925, "loss": 0.5336, "step": 101650 }, { "epoch": 5.049170557266316, "grad_norm": 0.1240234375, "learning_rate": 0.0003960703287970597, "loss": 0.5237, "step": 101660 }, { "epoch": 5.049667229561935, "grad_norm": 0.1318359375, "learning_rate": 0.0003960305950134102, "loss": 0.5515, "step": 101670 }, { "epoch": 5.050163901857554, "grad_norm": 0.1337890625, "learning_rate": 0.0003959908612297606, "loss": 0.5257, "step": 101680 }, { "epoch": 5.050660574153174, "grad_norm": 0.154296875, "learning_rate": 0.0003959511274461111, "loss": 0.5001, "step": 101690 }, { "epoch": 5.051157246448793, "grad_norm": 0.1318359375, "learning_rate": 0.00039591139366246155, "loss": 0.5143, "step": 101700 }, { "epoch": 5.051653918744412, "grad_norm": 0.11279296875, "learning_rate": 0.00039587165987881197, "loss": 0.5013, "step": 101710 }, { "epoch": 5.052150591040032, "grad_norm": 0.11767578125, "learning_rate": 0.00039583192609516244, "loss": 0.5004, "step": 101720 }, { "epoch": 5.052647263335651, "grad_norm": 0.11669921875, "learning_rate": 0.00039579219231151286, "loss": 0.515, "step": 101730 }, { "epoch": 5.053143935631271, "grad_norm": 0.11376953125, "learning_rate": 0.00039575245852786333, "loss": 0.4932, "step": 101740 }, { "epoch": 5.05364060792689, "grad_norm": 0.10400390625, "learning_rate": 0.0003957127247442138, "loss": 0.5008, "step": 101750 }, { "epoch": 5.054137280222509, "grad_norm": 0.11572265625, "learning_rate": 0.0003956729909605642, "loss": 0.5068, "step": 101760 }, { "epoch": 5.054633952518128, "grad_norm": 0.11083984375, "learning_rate": 0.0003956332571769147, "loss": 0.4884, "step": 101770 }, { "epoch": 5.0551306248137475, "grad_norm": 0.154296875, "learning_rate": 0.00039559352339326516, "loss": 0.5051, "step": 101780 }, { "epoch": 5.055627297109368, "grad_norm": 0.115234375, "learning_rate": 0.0003955537896096156, "loss": 0.4967, "step": 101790 }, { "epoch": 5.056123969404987, "grad_norm": 0.126953125, "learning_rate": 0.00039551405582596605, "loss": 0.5092, "step": 101800 }, { "epoch": 5.056620641700606, "grad_norm": 0.1318359375, "learning_rate": 0.00039547432204231647, "loss": 0.52, "step": 101810 }, { "epoch": 5.057117313996225, "grad_norm": 0.125, "learning_rate": 0.000395434588258667, "loss": 0.5318, "step": 101820 }, { "epoch": 5.0576139862918446, "grad_norm": 0.1689453125, "learning_rate": 0.0003953948544750174, "loss": 0.4582, "step": 101830 }, { "epoch": 5.058110658587464, "grad_norm": 0.130859375, "learning_rate": 0.00039535512069136783, "loss": 0.4796, "step": 101840 }, { "epoch": 5.058607330883083, "grad_norm": 0.12109375, "learning_rate": 0.0003953153869077183, "loss": 0.52, "step": 101850 }, { "epoch": 5.059104003178703, "grad_norm": 0.1904296875, "learning_rate": 0.00039527565312406877, "loss": 0.4915, "step": 101860 }, { "epoch": 5.059600675474322, "grad_norm": 0.21875, "learning_rate": 0.0003952359193404192, "loss": 0.4914, "step": 101870 }, { "epoch": 5.060097347769942, "grad_norm": 0.1279296875, "learning_rate": 0.00039519618555676966, "loss": 0.5179, "step": 101880 }, { "epoch": 5.060594020065561, "grad_norm": 0.11279296875, "learning_rate": 0.00039515645177312013, "loss": 0.5114, "step": 101890 }, { "epoch": 5.06109069236118, "grad_norm": 0.12353515625, "learning_rate": 0.0003951167179894706, "loss": 0.5279, "step": 101900 }, { "epoch": 5.061587364656799, "grad_norm": 0.1279296875, "learning_rate": 0.000395076984205821, "loss": 0.4745, "step": 101910 }, { "epoch": 5.0620840369524185, "grad_norm": 0.1435546875, "learning_rate": 0.00039503725042217144, "loss": 0.4826, "step": 101920 }, { "epoch": 5.062580709248038, "grad_norm": 0.1103515625, "learning_rate": 0.00039499751663852196, "loss": 0.5154, "step": 101930 }, { "epoch": 5.063077381543658, "grad_norm": 0.12060546875, "learning_rate": 0.0003949577828548724, "loss": 0.4944, "step": 101940 }, { "epoch": 5.063574053839277, "grad_norm": 0.13671875, "learning_rate": 0.0003949180490712228, "loss": 0.5743, "step": 101950 }, { "epoch": 5.064070726134896, "grad_norm": 0.11474609375, "learning_rate": 0.00039487831528757327, "loss": 0.5165, "step": 101960 }, { "epoch": 5.0645673984305155, "grad_norm": 0.125, "learning_rate": 0.00039483858150392374, "loss": 0.4993, "step": 101970 }, { "epoch": 5.065064070726135, "grad_norm": 0.11181640625, "learning_rate": 0.0003947988477202742, "loss": 0.4998, "step": 101980 }, { "epoch": 5.065560743021754, "grad_norm": 0.1494140625, "learning_rate": 0.0003947591139366246, "loss": 0.478, "step": 101990 }, { "epoch": 5.066057415317373, "grad_norm": 0.1259765625, "learning_rate": 0.0003947193801529751, "loss": 0.5334, "step": 102000 }, { "epoch": 5.066554087612993, "grad_norm": 0.11572265625, "learning_rate": 0.00039467964636932557, "loss": 0.5197, "step": 102010 }, { "epoch": 5.0670507599086125, "grad_norm": 0.12158203125, "learning_rate": 0.000394639912585676, "loss": 0.5053, "step": 102020 }, { "epoch": 5.067547432204232, "grad_norm": 0.1396484375, "learning_rate": 0.0003946001788020264, "loss": 0.4691, "step": 102030 }, { "epoch": 5.068044104499851, "grad_norm": 0.1103515625, "learning_rate": 0.0003945604450183769, "loss": 0.4815, "step": 102040 }, { "epoch": 5.06854077679547, "grad_norm": 0.107421875, "learning_rate": 0.00039452071123472735, "loss": 0.4819, "step": 102050 }, { "epoch": 5.069037449091089, "grad_norm": 0.11181640625, "learning_rate": 0.0003944809774510778, "loss": 0.4896, "step": 102060 }, { "epoch": 5.069534121386709, "grad_norm": 0.12353515625, "learning_rate": 0.00039444124366742824, "loss": 0.4975, "step": 102070 }, { "epoch": 5.070030793682329, "grad_norm": 0.1083984375, "learning_rate": 0.0003944015098837787, "loss": 0.487, "step": 102080 }, { "epoch": 5.070527465977948, "grad_norm": 0.11572265625, "learning_rate": 0.0003943617761001292, "loss": 0.5121, "step": 102090 }, { "epoch": 5.071024138273567, "grad_norm": 0.123046875, "learning_rate": 0.0003943220423164796, "loss": 0.5056, "step": 102100 }, { "epoch": 5.0715208105691865, "grad_norm": 0.150390625, "learning_rate": 0.00039428230853283, "loss": 0.5018, "step": 102110 }, { "epoch": 5.072017482864806, "grad_norm": 0.130859375, "learning_rate": 0.00039424257474918054, "loss": 0.5073, "step": 102120 }, { "epoch": 5.072514155160425, "grad_norm": 0.12158203125, "learning_rate": 0.00039420284096553095, "loss": 0.5065, "step": 102130 }, { "epoch": 5.073010827456044, "grad_norm": 0.1142578125, "learning_rate": 0.0003941631071818814, "loss": 0.5004, "step": 102140 }, { "epoch": 5.073507499751664, "grad_norm": 0.1201171875, "learning_rate": 0.00039412337339823184, "loss": 0.5063, "step": 102150 }, { "epoch": 5.0740041720472835, "grad_norm": 0.11181640625, "learning_rate": 0.0003940836396145823, "loss": 0.4852, "step": 102160 }, { "epoch": 5.074500844342903, "grad_norm": 0.14453125, "learning_rate": 0.0003940439058309328, "loss": 0.4731, "step": 102170 }, { "epoch": 5.074997516638522, "grad_norm": 0.11181640625, "learning_rate": 0.0003940041720472832, "loss": 0.5157, "step": 102180 }, { "epoch": 5.075494188934141, "grad_norm": 0.11962890625, "learning_rate": 0.0003939644382636337, "loss": 0.5061, "step": 102190 }, { "epoch": 5.07599086122976, "grad_norm": 0.1025390625, "learning_rate": 0.00039392470447998415, "loss": 0.503, "step": 102200 }, { "epoch": 5.07648753352538, "grad_norm": 0.11865234375, "learning_rate": 0.00039388497069633456, "loss": 0.4641, "step": 102210 }, { "epoch": 5.076984205821, "grad_norm": 0.1220703125, "learning_rate": 0.00039384523691268503, "loss": 0.4968, "step": 102220 }, { "epoch": 5.077480878116619, "grad_norm": 0.146484375, "learning_rate": 0.0003938055031290355, "loss": 0.4932, "step": 102230 }, { "epoch": 5.077977550412238, "grad_norm": 0.142578125, "learning_rate": 0.0003937657693453859, "loss": 0.5269, "step": 102240 }, { "epoch": 5.078474222707857, "grad_norm": 0.11376953125, "learning_rate": 0.0003937260355617364, "loss": 0.4944, "step": 102250 }, { "epoch": 5.078970895003477, "grad_norm": 0.1787109375, "learning_rate": 0.0003936863017780868, "loss": 0.5135, "step": 102260 }, { "epoch": 5.079467567299096, "grad_norm": 0.109375, "learning_rate": 0.0003936465679944373, "loss": 0.5092, "step": 102270 }, { "epoch": 5.079964239594715, "grad_norm": 0.1123046875, "learning_rate": 0.00039360683421078775, "loss": 0.4748, "step": 102280 }, { "epoch": 5.080460911890334, "grad_norm": 0.10888671875, "learning_rate": 0.00039356710042713817, "loss": 0.4965, "step": 102290 }, { "epoch": 5.0809575841859544, "grad_norm": 0.115234375, "learning_rate": 0.00039352736664348864, "loss": 0.523, "step": 102300 }, { "epoch": 5.081454256481574, "grad_norm": 0.10498046875, "learning_rate": 0.0003934876328598391, "loss": 0.5026, "step": 102310 }, { "epoch": 5.081950928777193, "grad_norm": 0.1728515625, "learning_rate": 0.00039344789907618953, "loss": 0.5191, "step": 102320 }, { "epoch": 5.082447601072812, "grad_norm": 0.1201171875, "learning_rate": 0.00039340816529254, "loss": 0.5199, "step": 102330 }, { "epoch": 5.082944273368431, "grad_norm": 0.125, "learning_rate": 0.0003933684315088904, "loss": 0.5079, "step": 102340 }, { "epoch": 5.083440945664051, "grad_norm": 0.130859375, "learning_rate": 0.00039332869772524095, "loss": 0.5021, "step": 102350 }, { "epoch": 5.08393761795967, "grad_norm": 0.11376953125, "learning_rate": 0.00039328896394159136, "loss": 0.5217, "step": 102360 }, { "epoch": 5.08443429025529, "grad_norm": 0.11083984375, "learning_rate": 0.0003932492301579418, "loss": 0.4928, "step": 102370 }, { "epoch": 5.084930962550909, "grad_norm": 0.1123046875, "learning_rate": 0.00039320949637429225, "loss": 0.5058, "step": 102380 }, { "epoch": 5.085427634846528, "grad_norm": 0.10791015625, "learning_rate": 0.0003931697625906427, "loss": 0.4907, "step": 102390 }, { "epoch": 5.085924307142148, "grad_norm": 0.10107421875, "learning_rate": 0.00039313002880699314, "loss": 0.5038, "step": 102400 }, { "epoch": 5.086420979437767, "grad_norm": 0.1240234375, "learning_rate": 0.0003930902950233436, "loss": 0.4868, "step": 102410 }, { "epoch": 5.086917651733386, "grad_norm": 0.11669921875, "learning_rate": 0.0003930505612396941, "loss": 0.4926, "step": 102420 }, { "epoch": 5.087414324029005, "grad_norm": 0.162109375, "learning_rate": 0.00039301082745604455, "loss": 0.5046, "step": 102430 }, { "epoch": 5.087910996324625, "grad_norm": 0.109375, "learning_rate": 0.00039297109367239497, "loss": 0.5194, "step": 102440 }, { "epoch": 5.088407668620245, "grad_norm": 0.10595703125, "learning_rate": 0.0003929313598887454, "loss": 0.5021, "step": 102450 }, { "epoch": 5.088904340915864, "grad_norm": 0.111328125, "learning_rate": 0.0003928916261050959, "loss": 0.5071, "step": 102460 }, { "epoch": 5.089401013211483, "grad_norm": 0.1123046875, "learning_rate": 0.00039285189232144633, "loss": 0.5201, "step": 102470 }, { "epoch": 5.089897685507102, "grad_norm": 0.119140625, "learning_rate": 0.00039281215853779675, "loss": 0.4912, "step": 102480 }, { "epoch": 5.0903943578027215, "grad_norm": 0.171875, "learning_rate": 0.0003927724247541472, "loss": 0.5096, "step": 102490 }, { "epoch": 5.090891030098341, "grad_norm": 0.11669921875, "learning_rate": 0.0003927326909704977, "loss": 0.5001, "step": 102500 }, { "epoch": 5.091387702393961, "grad_norm": 0.11767578125, "learning_rate": 0.00039269295718684816, "loss": 0.4984, "step": 102510 }, { "epoch": 5.09188437468958, "grad_norm": 0.109375, "learning_rate": 0.0003926532234031986, "loss": 0.5196, "step": 102520 }, { "epoch": 5.092381046985199, "grad_norm": 0.11181640625, "learning_rate": 0.00039261348961954905, "loss": 0.4902, "step": 102530 }, { "epoch": 5.092877719280819, "grad_norm": 0.1376953125, "learning_rate": 0.0003925737558358995, "loss": 0.4998, "step": 102540 }, { "epoch": 5.093374391576438, "grad_norm": 0.10302734375, "learning_rate": 0.00039253402205224994, "loss": 0.4996, "step": 102550 }, { "epoch": 5.093871063872057, "grad_norm": 0.11083984375, "learning_rate": 0.00039249428826860036, "loss": 0.4727, "step": 102560 }, { "epoch": 5.094367736167676, "grad_norm": 0.111328125, "learning_rate": 0.0003924545544849508, "loss": 0.4791, "step": 102570 }, { "epoch": 5.094864408463296, "grad_norm": 0.1259765625, "learning_rate": 0.0003924148207013013, "loss": 0.5087, "step": 102580 }, { "epoch": 5.095361080758916, "grad_norm": 0.1279296875, "learning_rate": 0.00039237508691765177, "loss": 0.4893, "step": 102590 }, { "epoch": 5.095857753054535, "grad_norm": 0.1318359375, "learning_rate": 0.0003923353531340022, "loss": 0.4865, "step": 102600 }, { "epoch": 5.096354425350154, "grad_norm": 0.10888671875, "learning_rate": 0.00039229561935035266, "loss": 0.5286, "step": 102610 }, { "epoch": 5.096851097645773, "grad_norm": 0.130859375, "learning_rate": 0.00039225588556670313, "loss": 0.491, "step": 102620 }, { "epoch": 5.0973477699413925, "grad_norm": 0.1328125, "learning_rate": 0.00039221615178305355, "loss": 0.4991, "step": 102630 }, { "epoch": 5.097844442237012, "grad_norm": 0.1494140625, "learning_rate": 0.000392176417999404, "loss": 0.4782, "step": 102640 }, { "epoch": 5.098341114532631, "grad_norm": 0.11865234375, "learning_rate": 0.0003921366842157545, "loss": 0.4842, "step": 102650 }, { "epoch": 5.098837786828251, "grad_norm": 0.1337890625, "learning_rate": 0.0003920969504321049, "loss": 0.5013, "step": 102660 }, { "epoch": 5.09933445912387, "grad_norm": 0.119140625, "learning_rate": 0.0003920572166484554, "loss": 0.5183, "step": 102670 }, { "epoch": 5.0998311314194895, "grad_norm": 0.1337890625, "learning_rate": 0.0003920174828648058, "loss": 0.5004, "step": 102680 }, { "epoch": 5.100327803715109, "grad_norm": 0.1630859375, "learning_rate": 0.00039197774908115627, "loss": 0.5079, "step": 102690 }, { "epoch": 5.100824476010728, "grad_norm": 0.126953125, "learning_rate": 0.00039193801529750674, "loss": 0.5287, "step": 102700 }, { "epoch": 5.101321148306347, "grad_norm": 0.171875, "learning_rate": 0.00039189828151385715, "loss": 0.4842, "step": 102710 }, { "epoch": 5.101817820601966, "grad_norm": 0.12255859375, "learning_rate": 0.0003918585477302076, "loss": 0.496, "step": 102720 }, { "epoch": 5.1023144928975865, "grad_norm": 0.10693359375, "learning_rate": 0.0003918188139465581, "loss": 0.4851, "step": 102730 }, { "epoch": 5.102811165193206, "grad_norm": 0.1337890625, "learning_rate": 0.0003917790801629085, "loss": 0.5089, "step": 102740 }, { "epoch": 5.103307837488825, "grad_norm": 0.11962890625, "learning_rate": 0.000391739346379259, "loss": 0.5283, "step": 102750 }, { "epoch": 5.103804509784444, "grad_norm": 0.166015625, "learning_rate": 0.00039169961259560946, "loss": 0.5042, "step": 102760 }, { "epoch": 5.1043011820800634, "grad_norm": 0.1103515625, "learning_rate": 0.0003916598788119599, "loss": 0.5159, "step": 102770 }, { "epoch": 5.104797854375683, "grad_norm": 0.10595703125, "learning_rate": 0.00039162014502831035, "loss": 0.4816, "step": 102780 }, { "epoch": 5.105294526671302, "grad_norm": 0.162109375, "learning_rate": 0.00039158041124466076, "loss": 0.4739, "step": 102790 }, { "epoch": 5.105791198966922, "grad_norm": 0.146484375, "learning_rate": 0.0003915406774610113, "loss": 0.4951, "step": 102800 }, { "epoch": 5.106287871262541, "grad_norm": 0.1083984375, "learning_rate": 0.0003915009436773617, "loss": 0.5007, "step": 102810 }, { "epoch": 5.1067845435581605, "grad_norm": 0.10986328125, "learning_rate": 0.0003914612098937121, "loss": 0.531, "step": 102820 }, { "epoch": 5.10728121585378, "grad_norm": 0.12255859375, "learning_rate": 0.0003914214761100626, "loss": 0.4928, "step": 102830 }, { "epoch": 5.107777888149399, "grad_norm": 0.1259765625, "learning_rate": 0.00039138174232641307, "loss": 0.5146, "step": 102840 }, { "epoch": 5.108274560445018, "grad_norm": 0.11962890625, "learning_rate": 0.0003913420085427635, "loss": 0.5104, "step": 102850 }, { "epoch": 5.108771232740637, "grad_norm": 0.12109375, "learning_rate": 0.00039130227475911395, "loss": 0.5028, "step": 102860 }, { "epoch": 5.1092679050362575, "grad_norm": 0.11083984375, "learning_rate": 0.00039126254097546437, "loss": 0.5051, "step": 102870 }, { "epoch": 5.109764577331877, "grad_norm": 0.11962890625, "learning_rate": 0.0003912228071918149, "loss": 0.4709, "step": 102880 }, { "epoch": 5.110261249627496, "grad_norm": 0.1181640625, "learning_rate": 0.0003911830734081653, "loss": 0.5024, "step": 102890 }, { "epoch": 5.110757921923115, "grad_norm": 0.1484375, "learning_rate": 0.00039114333962451573, "loss": 0.5313, "step": 102900 }, { "epoch": 5.111254594218734, "grad_norm": 0.12451171875, "learning_rate": 0.0003911036058408662, "loss": 0.4929, "step": 102910 }, { "epoch": 5.111751266514354, "grad_norm": 0.1259765625, "learning_rate": 0.0003910638720572167, "loss": 0.515, "step": 102920 }, { "epoch": 5.112247938809973, "grad_norm": 0.11474609375, "learning_rate": 0.0003910241382735671, "loss": 0.5006, "step": 102930 }, { "epoch": 5.112744611105592, "grad_norm": 0.126953125, "learning_rate": 0.00039098440448991756, "loss": 0.5125, "step": 102940 }, { "epoch": 5.113241283401212, "grad_norm": 0.16015625, "learning_rate": 0.00039094467070626803, "loss": 0.5217, "step": 102950 }, { "epoch": 5.113737955696831, "grad_norm": 0.11767578125, "learning_rate": 0.0003909049369226185, "loss": 0.4785, "step": 102960 }, { "epoch": 5.114234627992451, "grad_norm": 0.1279296875, "learning_rate": 0.0003908652031389689, "loss": 0.5008, "step": 102970 }, { "epoch": 5.11473130028807, "grad_norm": 0.1083984375, "learning_rate": 0.00039082546935531934, "loss": 0.4956, "step": 102980 }, { "epoch": 5.115227972583689, "grad_norm": 0.12255859375, "learning_rate": 0.00039078573557166987, "loss": 0.508, "step": 102990 }, { "epoch": 5.115724644879308, "grad_norm": 0.103515625, "learning_rate": 0.0003907460017880203, "loss": 0.4925, "step": 103000 }, { "epoch": 5.116221317174928, "grad_norm": 0.1279296875, "learning_rate": 0.00039070626800437075, "loss": 0.4951, "step": 103010 }, { "epoch": 5.116717989470548, "grad_norm": 0.12890625, "learning_rate": 0.00039066653422072117, "loss": 0.4956, "step": 103020 }, { "epoch": 5.117214661766167, "grad_norm": 0.1298828125, "learning_rate": 0.00039062680043707164, "loss": 0.478, "step": 103030 }, { "epoch": 5.117711334061786, "grad_norm": 0.11328125, "learning_rate": 0.0003905870666534221, "loss": 0.5068, "step": 103040 }, { "epoch": 5.118208006357405, "grad_norm": 0.130859375, "learning_rate": 0.00039054733286977253, "loss": 0.4952, "step": 103050 }, { "epoch": 5.118704678653025, "grad_norm": 0.11669921875, "learning_rate": 0.000390507599086123, "loss": 0.5261, "step": 103060 }, { "epoch": 5.119201350948644, "grad_norm": 0.12158203125, "learning_rate": 0.0003904678653024735, "loss": 0.5127, "step": 103070 }, { "epoch": 5.119698023244263, "grad_norm": 0.12890625, "learning_rate": 0.0003904281315188239, "loss": 0.5266, "step": 103080 }, { "epoch": 5.120194695539883, "grad_norm": 0.11376953125, "learning_rate": 0.00039038839773517436, "loss": 0.5181, "step": 103090 }, { "epoch": 5.120691367835502, "grad_norm": 0.11669921875, "learning_rate": 0.00039034866395152483, "loss": 0.4815, "step": 103100 }, { "epoch": 5.121188040131122, "grad_norm": 0.154296875, "learning_rate": 0.00039030893016787525, "loss": 0.5016, "step": 103110 }, { "epoch": 5.121684712426741, "grad_norm": 0.11767578125, "learning_rate": 0.0003902691963842257, "loss": 0.5309, "step": 103120 }, { "epoch": 5.12218138472236, "grad_norm": 0.1162109375, "learning_rate": 0.00039022946260057614, "loss": 0.4882, "step": 103130 }, { "epoch": 5.122678057017979, "grad_norm": 0.138671875, "learning_rate": 0.0003901897288169266, "loss": 0.4995, "step": 103140 }, { "epoch": 5.1231747293135985, "grad_norm": 0.1171875, "learning_rate": 0.0003901499950332771, "loss": 0.486, "step": 103150 }, { "epoch": 5.123671401609219, "grad_norm": 0.150390625, "learning_rate": 0.0003901102612496275, "loss": 0.5112, "step": 103160 }, { "epoch": 5.124168073904838, "grad_norm": 0.12255859375, "learning_rate": 0.00039007052746597797, "loss": 0.5025, "step": 103170 }, { "epoch": 5.124664746200457, "grad_norm": 0.126953125, "learning_rate": 0.00039003079368232844, "loss": 0.4685, "step": 103180 }, { "epoch": 5.125161418496076, "grad_norm": 0.1318359375, "learning_rate": 0.00038999105989867886, "loss": 0.5198, "step": 103190 }, { "epoch": 5.1256580907916955, "grad_norm": 0.11767578125, "learning_rate": 0.00038995132611502933, "loss": 0.5119, "step": 103200 }, { "epoch": 5.126154763087315, "grad_norm": 0.119140625, "learning_rate": 0.00038991159233137975, "loss": 0.5381, "step": 103210 }, { "epoch": 5.126651435382934, "grad_norm": 0.1279296875, "learning_rate": 0.0003898718585477302, "loss": 0.5304, "step": 103220 }, { "epoch": 5.127148107678554, "grad_norm": 0.12060546875, "learning_rate": 0.0003898321247640807, "loss": 0.4936, "step": 103230 }, { "epoch": 5.127644779974173, "grad_norm": 0.15625, "learning_rate": 0.0003897923909804311, "loss": 0.4661, "step": 103240 }, { "epoch": 5.128141452269793, "grad_norm": 0.111328125, "learning_rate": 0.0003897526571967816, "loss": 0.5276, "step": 103250 }, { "epoch": 5.128638124565412, "grad_norm": 0.111328125, "learning_rate": 0.00038971292341313205, "loss": 0.4815, "step": 103260 }, { "epoch": 5.129134796861031, "grad_norm": 0.12109375, "learning_rate": 0.00038967318962948247, "loss": 0.5073, "step": 103270 }, { "epoch": 5.12963146915665, "grad_norm": 0.134765625, "learning_rate": 0.00038963345584583294, "loss": 0.4962, "step": 103280 }, { "epoch": 5.1301281414522695, "grad_norm": 0.12890625, "learning_rate": 0.0003895937220621834, "loss": 0.5054, "step": 103290 }, { "epoch": 5.13062481374789, "grad_norm": 0.11767578125, "learning_rate": 0.0003895539882785338, "loss": 0.4623, "step": 103300 }, { "epoch": 5.131121486043509, "grad_norm": 0.11279296875, "learning_rate": 0.0003895142544948843, "loss": 0.5243, "step": 103310 }, { "epoch": 5.131618158339128, "grad_norm": 0.12890625, "learning_rate": 0.0003894745207112347, "loss": 0.5063, "step": 103320 }, { "epoch": 5.132114830634747, "grad_norm": 0.1591796875, "learning_rate": 0.00038943478692758524, "loss": 0.512, "step": 103330 }, { "epoch": 5.1326115029303665, "grad_norm": 0.11865234375, "learning_rate": 0.00038939505314393566, "loss": 0.4908, "step": 103340 }, { "epoch": 5.133108175225986, "grad_norm": 0.1298828125, "learning_rate": 0.0003893553193602861, "loss": 0.4813, "step": 103350 }, { "epoch": 5.133604847521605, "grad_norm": 0.11474609375, "learning_rate": 0.00038931558557663655, "loss": 0.5134, "step": 103360 }, { "epoch": 5.134101519817224, "grad_norm": 0.1376953125, "learning_rate": 0.000389275851792987, "loss": 0.4953, "step": 103370 }, { "epoch": 5.134598192112844, "grad_norm": 0.111328125, "learning_rate": 0.00038923611800933743, "loss": 0.479, "step": 103380 }, { "epoch": 5.1350948644084635, "grad_norm": 0.185546875, "learning_rate": 0.0003891963842256879, "loss": 0.4816, "step": 103390 }, { "epoch": 5.135591536704083, "grad_norm": 0.1201171875, "learning_rate": 0.0003891566504420384, "loss": 0.4737, "step": 103400 }, { "epoch": 5.136088208999702, "grad_norm": 0.126953125, "learning_rate": 0.00038911691665838885, "loss": 0.4755, "step": 103410 }, { "epoch": 5.136584881295321, "grad_norm": 0.1396484375, "learning_rate": 0.00038907718287473927, "loss": 0.5256, "step": 103420 }, { "epoch": 5.13708155359094, "grad_norm": 0.1455078125, "learning_rate": 0.0003890374490910897, "loss": 0.5065, "step": 103430 }, { "epoch": 5.13757822588656, "grad_norm": 0.12890625, "learning_rate": 0.00038899771530744015, "loss": 0.5308, "step": 103440 }, { "epoch": 5.13807489818218, "grad_norm": 0.12255859375, "learning_rate": 0.0003889579815237906, "loss": 0.5085, "step": 103450 }, { "epoch": 5.138571570477799, "grad_norm": 0.10693359375, "learning_rate": 0.0003889182477401411, "loss": 0.4964, "step": 103460 }, { "epoch": 5.139068242773418, "grad_norm": 0.1083984375, "learning_rate": 0.0003888785139564915, "loss": 0.4791, "step": 103470 }, { "epoch": 5.1395649150690375, "grad_norm": 0.12109375, "learning_rate": 0.000388838780172842, "loss": 0.5215, "step": 103480 }, { "epoch": 5.140061587364657, "grad_norm": 0.125, "learning_rate": 0.00038879904638919246, "loss": 0.4902, "step": 103490 }, { "epoch": 5.140558259660276, "grad_norm": 0.10986328125, "learning_rate": 0.0003887593126055429, "loss": 0.5147, "step": 103500 }, { "epoch": 5.141054931955895, "grad_norm": 0.1259765625, "learning_rate": 0.0003887195788218933, "loss": 0.468, "step": 103510 }, { "epoch": 5.141551604251515, "grad_norm": 0.1181640625, "learning_rate": 0.0003886798450382438, "loss": 0.4909, "step": 103520 }, { "epoch": 5.1420482765471345, "grad_norm": 0.11083984375, "learning_rate": 0.00038864011125459423, "loss": 0.5078, "step": 103530 }, { "epoch": 5.142544948842754, "grad_norm": 0.1181640625, "learning_rate": 0.0003886003774709447, "loss": 0.511, "step": 103540 }, { "epoch": 5.143041621138373, "grad_norm": 0.1376953125, "learning_rate": 0.0003885606436872951, "loss": 0.5178, "step": 103550 }, { "epoch": 5.143538293433992, "grad_norm": 0.12060546875, "learning_rate": 0.0003885209099036456, "loss": 0.4828, "step": 103560 }, { "epoch": 5.144034965729611, "grad_norm": 0.1318359375, "learning_rate": 0.00038848117611999607, "loss": 0.4752, "step": 103570 }, { "epoch": 5.144531638025231, "grad_norm": 0.125, "learning_rate": 0.0003884414423363465, "loss": 0.4961, "step": 103580 }, { "epoch": 5.145028310320851, "grad_norm": 0.1123046875, "learning_rate": 0.00038840170855269695, "loss": 0.5233, "step": 103590 }, { "epoch": 5.14552498261647, "grad_norm": 0.12353515625, "learning_rate": 0.0003883619747690474, "loss": 0.5459, "step": 103600 }, { "epoch": 5.146021654912089, "grad_norm": 0.115234375, "learning_rate": 0.00038832224098539784, "loss": 0.5349, "step": 103610 }, { "epoch": 5.146518327207708, "grad_norm": 0.115234375, "learning_rate": 0.0003882825072017483, "loss": 0.5141, "step": 103620 }, { "epoch": 5.147014999503328, "grad_norm": 0.1181640625, "learning_rate": 0.0003882427734180988, "loss": 0.5031, "step": 103630 }, { "epoch": 5.147511671798947, "grad_norm": 0.11474609375, "learning_rate": 0.0003882030396344492, "loss": 0.5, "step": 103640 }, { "epoch": 5.148008344094566, "grad_norm": 0.134765625, "learning_rate": 0.0003881633058507997, "loss": 0.4862, "step": 103650 }, { "epoch": 5.148505016390185, "grad_norm": 0.11865234375, "learning_rate": 0.0003881235720671501, "loss": 0.5254, "step": 103660 }, { "epoch": 5.149001688685805, "grad_norm": 0.134765625, "learning_rate": 0.00038808383828350056, "loss": 0.5075, "step": 103670 }, { "epoch": 5.149498360981425, "grad_norm": 0.1796875, "learning_rate": 0.00038804410449985103, "loss": 0.5022, "step": 103680 }, { "epoch": 5.149995033277044, "grad_norm": 0.1806640625, "learning_rate": 0.00038800437071620145, "loss": 0.5217, "step": 103690 }, { "epoch": 5.150491705572663, "grad_norm": 0.1748046875, "learning_rate": 0.0003879646369325519, "loss": 0.4857, "step": 103700 }, { "epoch": 5.150988377868282, "grad_norm": 0.12158203125, "learning_rate": 0.0003879249031489024, "loss": 0.5378, "step": 103710 }, { "epoch": 5.151485050163902, "grad_norm": 0.1787109375, "learning_rate": 0.0003878851693652528, "loss": 0.4913, "step": 103720 }, { "epoch": 5.151981722459521, "grad_norm": 0.11376953125, "learning_rate": 0.0003878454355816033, "loss": 0.4918, "step": 103730 }, { "epoch": 5.152478394755141, "grad_norm": 0.1044921875, "learning_rate": 0.0003878057017979537, "loss": 0.5136, "step": 103740 }, { "epoch": 5.15297506705076, "grad_norm": 0.13671875, "learning_rate": 0.00038776596801430417, "loss": 0.4936, "step": 103750 }, { "epoch": 5.153471739346379, "grad_norm": 0.1220703125, "learning_rate": 0.00038772623423065464, "loss": 0.4903, "step": 103760 }, { "epoch": 5.153968411641999, "grad_norm": 0.11767578125, "learning_rate": 0.00038768650044700506, "loss": 0.5015, "step": 103770 }, { "epoch": 5.154465083937618, "grad_norm": 0.126953125, "learning_rate": 0.00038764676666335553, "loss": 0.486, "step": 103780 }, { "epoch": 5.154961756233237, "grad_norm": 0.1171875, "learning_rate": 0.000387607032879706, "loss": 0.4983, "step": 103790 }, { "epoch": 5.155458428528856, "grad_norm": 0.1279296875, "learning_rate": 0.0003875672990960564, "loss": 0.5041, "step": 103800 }, { "epoch": 5.155955100824476, "grad_norm": 0.11328125, "learning_rate": 0.0003875275653124069, "loss": 0.5199, "step": 103810 }, { "epoch": 5.156451773120096, "grad_norm": 0.1201171875, "learning_rate": 0.00038748783152875736, "loss": 0.4944, "step": 103820 }, { "epoch": 5.156948445415715, "grad_norm": 0.10107421875, "learning_rate": 0.0003874480977451078, "loss": 0.5237, "step": 103830 }, { "epoch": 5.157445117711334, "grad_norm": 0.1455078125, "learning_rate": 0.00038740836396145825, "loss": 0.4921, "step": 103840 }, { "epoch": 5.157941790006953, "grad_norm": 0.150390625, "learning_rate": 0.00038736863017780867, "loss": 0.5085, "step": 103850 }, { "epoch": 5.1584384623025725, "grad_norm": 0.1279296875, "learning_rate": 0.0003873288963941592, "loss": 0.5085, "step": 103860 }, { "epoch": 5.158935134598192, "grad_norm": 0.10498046875, "learning_rate": 0.0003872891626105096, "loss": 0.4656, "step": 103870 }, { "epoch": 5.159431806893812, "grad_norm": 0.10205078125, "learning_rate": 0.00038724942882686, "loss": 0.4783, "step": 103880 }, { "epoch": 5.159928479189431, "grad_norm": 0.11181640625, "learning_rate": 0.0003872096950432105, "loss": 0.5244, "step": 103890 }, { "epoch": 5.16042515148505, "grad_norm": 0.11962890625, "learning_rate": 0.00038716996125956097, "loss": 0.4822, "step": 103900 }, { "epoch": 5.1609218237806695, "grad_norm": 0.1201171875, "learning_rate": 0.00038713022747591144, "loss": 0.4976, "step": 103910 }, { "epoch": 5.161418496076289, "grad_norm": 0.11572265625, "learning_rate": 0.00038709049369226186, "loss": 0.52, "step": 103920 }, { "epoch": 5.161915168371908, "grad_norm": 0.15625, "learning_rate": 0.00038705075990861233, "loss": 0.4899, "step": 103930 }, { "epoch": 5.162411840667527, "grad_norm": 0.12451171875, "learning_rate": 0.0003870110261249628, "loss": 0.509, "step": 103940 }, { "epoch": 5.162908512963147, "grad_norm": 0.11474609375, "learning_rate": 0.0003869712923413132, "loss": 0.5289, "step": 103950 }, { "epoch": 5.163405185258767, "grad_norm": 0.1796875, "learning_rate": 0.00038693155855766363, "loss": 0.516, "step": 103960 }, { "epoch": 5.163901857554386, "grad_norm": 0.1171875, "learning_rate": 0.0003868918247740141, "loss": 0.5035, "step": 103970 }, { "epoch": 5.164398529850005, "grad_norm": 0.12890625, "learning_rate": 0.0003868520909903646, "loss": 0.4768, "step": 103980 }, { "epoch": 5.164895202145624, "grad_norm": 0.115234375, "learning_rate": 0.00038681235720671505, "loss": 0.5205, "step": 103990 }, { "epoch": 5.1653918744412435, "grad_norm": 0.11962890625, "learning_rate": 0.00038677262342306547, "loss": 0.4778, "step": 104000 }, { "epoch": 5.165888546736863, "grad_norm": 0.138671875, "learning_rate": 0.00038673288963941594, "loss": 0.507, "step": 104010 }, { "epoch": 5.166385219032483, "grad_norm": 0.1259765625, "learning_rate": 0.0003866931558557664, "loss": 0.4865, "step": 104020 }, { "epoch": 5.166881891328102, "grad_norm": 0.11767578125, "learning_rate": 0.0003866534220721168, "loss": 0.513, "step": 104030 }, { "epoch": 5.167378563623721, "grad_norm": 0.1044921875, "learning_rate": 0.00038661368828846724, "loss": 0.498, "step": 104040 }, { "epoch": 5.1678752359193405, "grad_norm": 0.1044921875, "learning_rate": 0.00038657395450481777, "loss": 0.4811, "step": 104050 }, { "epoch": 5.16837190821496, "grad_norm": 0.1435546875, "learning_rate": 0.0003865342207211682, "loss": 0.516, "step": 104060 }, { "epoch": 5.168868580510579, "grad_norm": 0.12353515625, "learning_rate": 0.00038649448693751866, "loss": 0.4896, "step": 104070 }, { "epoch": 5.169365252806198, "grad_norm": 0.162109375, "learning_rate": 0.0003864547531538691, "loss": 0.4965, "step": 104080 }, { "epoch": 5.169861925101817, "grad_norm": 0.1328125, "learning_rate": 0.00038641501937021955, "loss": 0.5355, "step": 104090 }, { "epoch": 5.1703585973974375, "grad_norm": 0.138671875, "learning_rate": 0.00038637528558657, "loss": 0.4911, "step": 104100 }, { "epoch": 5.170855269693057, "grad_norm": 0.10693359375, "learning_rate": 0.00038633555180292043, "loss": 0.4893, "step": 104110 }, { "epoch": 5.171351941988676, "grad_norm": 0.177734375, "learning_rate": 0.0003862958180192709, "loss": 0.5215, "step": 104120 }, { "epoch": 5.171848614284295, "grad_norm": 0.1298828125, "learning_rate": 0.0003862560842356214, "loss": 0.4862, "step": 104130 }, { "epoch": 5.172345286579914, "grad_norm": 0.13671875, "learning_rate": 0.0003862163504519718, "loss": 0.5223, "step": 104140 }, { "epoch": 5.172841958875534, "grad_norm": 0.1240234375, "learning_rate": 0.00038617661666832227, "loss": 0.5009, "step": 104150 }, { "epoch": 5.173338631171153, "grad_norm": 0.1318359375, "learning_rate": 0.00038613688288467274, "loss": 0.5038, "step": 104160 }, { "epoch": 5.173835303466773, "grad_norm": 0.11083984375, "learning_rate": 0.00038609714910102315, "loss": 0.503, "step": 104170 }, { "epoch": 5.174331975762392, "grad_norm": 0.1123046875, "learning_rate": 0.0003860574153173736, "loss": 0.5017, "step": 104180 }, { "epoch": 5.1748286480580115, "grad_norm": 0.11669921875, "learning_rate": 0.00038601768153372404, "loss": 0.5268, "step": 104190 }, { "epoch": 5.175325320353631, "grad_norm": 0.171875, "learning_rate": 0.0003859779477500745, "loss": 0.5443, "step": 104200 }, { "epoch": 5.17582199264925, "grad_norm": 0.126953125, "learning_rate": 0.000385938213966425, "loss": 0.4868, "step": 104210 }, { "epoch": 5.176318664944869, "grad_norm": 0.1298828125, "learning_rate": 0.0003858984801827754, "loss": 0.5093, "step": 104220 }, { "epoch": 5.176815337240488, "grad_norm": 0.1376953125, "learning_rate": 0.0003858587463991259, "loss": 0.4801, "step": 104230 }, { "epoch": 5.1773120095361085, "grad_norm": 0.138671875, "learning_rate": 0.00038581901261547635, "loss": 0.5064, "step": 104240 }, { "epoch": 5.177808681831728, "grad_norm": 0.1044921875, "learning_rate": 0.00038577927883182676, "loss": 0.5123, "step": 104250 }, { "epoch": 5.178305354127347, "grad_norm": 0.11767578125, "learning_rate": 0.00038573954504817723, "loss": 0.4905, "step": 104260 }, { "epoch": 5.178802026422966, "grad_norm": 0.11328125, "learning_rate": 0.00038569981126452765, "loss": 0.4858, "step": 104270 }, { "epoch": 5.179298698718585, "grad_norm": 0.11474609375, "learning_rate": 0.0003856600774808782, "loss": 0.4955, "step": 104280 }, { "epoch": 5.179795371014205, "grad_norm": 0.1123046875, "learning_rate": 0.0003856203436972286, "loss": 0.4959, "step": 104290 }, { "epoch": 5.180292043309824, "grad_norm": 0.126953125, "learning_rate": 0.000385580609913579, "loss": 0.4797, "step": 104300 }, { "epoch": 5.180788715605443, "grad_norm": 0.1357421875, "learning_rate": 0.0003855408761299295, "loss": 0.5123, "step": 104310 }, { "epoch": 5.181285387901063, "grad_norm": 0.11962890625, "learning_rate": 0.00038550114234627995, "loss": 0.5286, "step": 104320 }, { "epoch": 5.181782060196682, "grad_norm": 0.1337890625, "learning_rate": 0.00038546140856263037, "loss": 0.5173, "step": 104330 }, { "epoch": 5.182278732492302, "grad_norm": 0.12109375, "learning_rate": 0.00038542167477898084, "loss": 0.5002, "step": 104340 }, { "epoch": 5.182775404787921, "grad_norm": 0.12158203125, "learning_rate": 0.0003853819409953313, "loss": 0.5172, "step": 104350 }, { "epoch": 5.18327207708354, "grad_norm": 0.11328125, "learning_rate": 0.0003853422072116818, "loss": 0.5017, "step": 104360 }, { "epoch": 5.183768749379159, "grad_norm": 0.1279296875, "learning_rate": 0.0003853024734280322, "loss": 0.5134, "step": 104370 }, { "epoch": 5.1842654216747786, "grad_norm": 0.1484375, "learning_rate": 0.0003852627396443826, "loss": 0.5253, "step": 104380 }, { "epoch": 5.184762093970399, "grad_norm": 0.12353515625, "learning_rate": 0.00038522300586073314, "loss": 0.4886, "step": 104390 }, { "epoch": 5.185258766266018, "grad_norm": 0.130859375, "learning_rate": 0.00038518327207708356, "loss": 0.549, "step": 104400 }, { "epoch": 5.185755438561637, "grad_norm": 0.212890625, "learning_rate": 0.000385143538293434, "loss": 0.5098, "step": 104410 }, { "epoch": 5.186252110857256, "grad_norm": 0.130859375, "learning_rate": 0.00038510380450978445, "loss": 0.5156, "step": 104420 }, { "epoch": 5.186748783152876, "grad_norm": 0.12109375, "learning_rate": 0.0003850640707261349, "loss": 0.5012, "step": 104430 }, { "epoch": 5.187245455448495, "grad_norm": 0.115234375, "learning_rate": 0.0003850243369424854, "loss": 0.4997, "step": 104440 }, { "epoch": 5.187742127744114, "grad_norm": 0.15234375, "learning_rate": 0.0003849846031588358, "loss": 0.5118, "step": 104450 }, { "epoch": 5.188238800039734, "grad_norm": 0.11328125, "learning_rate": 0.0003849448693751863, "loss": 0.4779, "step": 104460 }, { "epoch": 5.188735472335353, "grad_norm": 0.1279296875, "learning_rate": 0.00038490513559153675, "loss": 0.5122, "step": 104470 }, { "epoch": 5.189232144630973, "grad_norm": 0.11865234375, "learning_rate": 0.00038486540180788717, "loss": 0.5162, "step": 104480 }, { "epoch": 5.189728816926592, "grad_norm": 0.1337890625, "learning_rate": 0.0003848256680242376, "loss": 0.5082, "step": 104490 }, { "epoch": 5.190225489222211, "grad_norm": 0.12353515625, "learning_rate": 0.0003847859342405881, "loss": 0.5018, "step": 104500 }, { "epoch": 5.19072216151783, "grad_norm": 0.11767578125, "learning_rate": 0.00038474620045693853, "loss": 0.5197, "step": 104510 }, { "epoch": 5.1912188338134495, "grad_norm": 0.11669921875, "learning_rate": 0.000384706466673289, "loss": 0.4853, "step": 104520 }, { "epoch": 5.19171550610907, "grad_norm": 0.1171875, "learning_rate": 0.0003846667328896394, "loss": 0.5236, "step": 104530 }, { "epoch": 5.192212178404689, "grad_norm": 0.126953125, "learning_rate": 0.0003846269991059899, "loss": 0.5041, "step": 104540 }, { "epoch": 5.192708850700308, "grad_norm": 0.1103515625, "learning_rate": 0.00038458726532234036, "loss": 0.5011, "step": 104550 }, { "epoch": 5.193205522995927, "grad_norm": 0.11376953125, "learning_rate": 0.0003845475315386908, "loss": 0.4773, "step": 104560 }, { "epoch": 5.1937021952915465, "grad_norm": 0.1328125, "learning_rate": 0.0003845077977550412, "loss": 0.5185, "step": 104570 }, { "epoch": 5.194198867587166, "grad_norm": 0.1416015625, "learning_rate": 0.0003844680639713917, "loss": 0.5093, "step": 104580 }, { "epoch": 5.194695539882785, "grad_norm": 0.1298828125, "learning_rate": 0.00038442833018774214, "loss": 0.4653, "step": 104590 }, { "epoch": 5.195192212178405, "grad_norm": 0.1259765625, "learning_rate": 0.0003843885964040926, "loss": 0.5008, "step": 104600 }, { "epoch": 5.195688884474024, "grad_norm": 0.1318359375, "learning_rate": 0.000384348862620443, "loss": 0.4891, "step": 104610 }, { "epoch": 5.1961855567696436, "grad_norm": 0.14453125, "learning_rate": 0.0003843091288367935, "loss": 0.5258, "step": 104620 }, { "epoch": 5.196682229065263, "grad_norm": 0.1240234375, "learning_rate": 0.00038426939505314397, "loss": 0.5075, "step": 104630 }, { "epoch": 5.197178901360882, "grad_norm": 0.1396484375, "learning_rate": 0.0003842296612694944, "loss": 0.5251, "step": 104640 }, { "epoch": 5.197675573656501, "grad_norm": 0.10107421875, "learning_rate": 0.00038418992748584486, "loss": 0.5075, "step": 104650 }, { "epoch": 5.1981722459521205, "grad_norm": 0.169921875, "learning_rate": 0.00038415019370219533, "loss": 0.4976, "step": 104660 }, { "epoch": 5.198668918247741, "grad_norm": 0.1689453125, "learning_rate": 0.00038411045991854575, "loss": 0.4958, "step": 104670 }, { "epoch": 5.19916559054336, "grad_norm": 0.1494140625, "learning_rate": 0.0003840707261348962, "loss": 0.5061, "step": 104680 }, { "epoch": 5.199662262838979, "grad_norm": 0.11962890625, "learning_rate": 0.0003840309923512467, "loss": 0.5057, "step": 104690 }, { "epoch": 5.200158935134598, "grad_norm": 0.11328125, "learning_rate": 0.0003839912585675971, "loss": 0.5427, "step": 104700 }, { "epoch": 5.2006556074302175, "grad_norm": 0.10986328125, "learning_rate": 0.0003839515247839476, "loss": 0.4838, "step": 104710 }, { "epoch": 5.201152279725837, "grad_norm": 0.130859375, "learning_rate": 0.000383911791000298, "loss": 0.5102, "step": 104720 }, { "epoch": 5.201648952021456, "grad_norm": 0.11767578125, "learning_rate": 0.0003838720572166485, "loss": 0.4762, "step": 104730 }, { "epoch": 5.202145624317075, "grad_norm": 0.12890625, "learning_rate": 0.00038383232343299894, "loss": 0.5001, "step": 104740 }, { "epoch": 5.202642296612695, "grad_norm": 0.13671875, "learning_rate": 0.00038379258964934935, "loss": 0.5035, "step": 104750 }, { "epoch": 5.2031389689083145, "grad_norm": 0.1533203125, "learning_rate": 0.0003837528558656998, "loss": 0.5256, "step": 104760 }, { "epoch": 5.203635641203934, "grad_norm": 0.1162109375, "learning_rate": 0.0003837131220820503, "loss": 0.4938, "step": 104770 }, { "epoch": 5.204132313499553, "grad_norm": 0.134765625, "learning_rate": 0.0003836733882984007, "loss": 0.5047, "step": 104780 }, { "epoch": 5.204628985795172, "grad_norm": 0.11962890625, "learning_rate": 0.0003836336545147512, "loss": 0.4969, "step": 104790 }, { "epoch": 5.205125658090791, "grad_norm": 0.119140625, "learning_rate": 0.00038359392073110166, "loss": 0.5321, "step": 104800 }, { "epoch": 5.205622330386411, "grad_norm": 0.1533203125, "learning_rate": 0.00038355418694745213, "loss": 0.49, "step": 104810 }, { "epoch": 5.206119002682031, "grad_norm": 0.1220703125, "learning_rate": 0.00038351445316380255, "loss": 0.4994, "step": 104820 }, { "epoch": 5.20661567497765, "grad_norm": 0.14453125, "learning_rate": 0.00038347471938015296, "loss": 0.494, "step": 104830 }, { "epoch": 5.207112347273269, "grad_norm": 0.1123046875, "learning_rate": 0.00038343498559650343, "loss": 0.5085, "step": 104840 }, { "epoch": 5.207609019568888, "grad_norm": 0.10693359375, "learning_rate": 0.0003833952518128539, "loss": 0.4951, "step": 104850 }, { "epoch": 5.208105691864508, "grad_norm": 0.10693359375, "learning_rate": 0.0003833555180292043, "loss": 0.4921, "step": 104860 }, { "epoch": 5.208602364160127, "grad_norm": 0.1220703125, "learning_rate": 0.0003833157842455548, "loss": 0.4962, "step": 104870 }, { "epoch": 5.209099036455746, "grad_norm": 0.1279296875, "learning_rate": 0.00038327605046190526, "loss": 0.4833, "step": 104880 }, { "epoch": 5.209595708751366, "grad_norm": 0.11376953125, "learning_rate": 0.00038323631667825574, "loss": 0.5146, "step": 104890 }, { "epoch": 5.2100923810469855, "grad_norm": 0.1337890625, "learning_rate": 0.00038319658289460615, "loss": 0.5261, "step": 104900 }, { "epoch": 5.210589053342605, "grad_norm": 0.11376953125, "learning_rate": 0.00038315684911095657, "loss": 0.5051, "step": 104910 }, { "epoch": 5.211085725638224, "grad_norm": 0.103515625, "learning_rate": 0.0003831171153273071, "loss": 0.5243, "step": 104920 }, { "epoch": 5.211582397933843, "grad_norm": 0.111328125, "learning_rate": 0.0003830773815436575, "loss": 0.5141, "step": 104930 }, { "epoch": 5.212079070229462, "grad_norm": 0.1142578125, "learning_rate": 0.00038303764776000793, "loss": 0.5209, "step": 104940 }, { "epoch": 5.212575742525082, "grad_norm": 0.1455078125, "learning_rate": 0.0003829979139763584, "loss": 0.5151, "step": 104950 }, { "epoch": 5.213072414820702, "grad_norm": 0.1435546875, "learning_rate": 0.0003829581801927089, "loss": 0.4639, "step": 104960 }, { "epoch": 5.213569087116321, "grad_norm": 0.1083984375, "learning_rate": 0.00038291844640905934, "loss": 0.5022, "step": 104970 }, { "epoch": 5.21406575941194, "grad_norm": 0.1162109375, "learning_rate": 0.00038287871262540976, "loss": 0.5297, "step": 104980 }, { "epoch": 5.214562431707559, "grad_norm": 0.1123046875, "learning_rate": 0.00038283897884176023, "loss": 0.5268, "step": 104990 }, { "epoch": 5.215059104003179, "grad_norm": 0.10791015625, "learning_rate": 0.0003827992450581107, "loss": 0.4914, "step": 105000 }, { "epoch": 5.215555776298798, "grad_norm": 0.10986328125, "learning_rate": 0.0003827595112744611, "loss": 0.4876, "step": 105010 }, { "epoch": 5.216052448594417, "grad_norm": 0.11279296875, "learning_rate": 0.00038271977749081154, "loss": 0.4791, "step": 105020 }, { "epoch": 5.216549120890036, "grad_norm": 0.1181640625, "learning_rate": 0.00038268004370716206, "loss": 0.5153, "step": 105030 }, { "epoch": 5.217045793185656, "grad_norm": 0.1103515625, "learning_rate": 0.0003826403099235125, "loss": 0.4919, "step": 105040 }, { "epoch": 5.217542465481276, "grad_norm": 0.1201171875, "learning_rate": 0.00038260057613986295, "loss": 0.529, "step": 105050 }, { "epoch": 5.218039137776895, "grad_norm": 0.1181640625, "learning_rate": 0.00038256084235621337, "loss": 0.5042, "step": 105060 }, { "epoch": 5.218535810072514, "grad_norm": 0.1357421875, "learning_rate": 0.00038252110857256384, "loss": 0.4991, "step": 105070 }, { "epoch": 5.219032482368133, "grad_norm": 0.1298828125, "learning_rate": 0.0003824813747889143, "loss": 0.5019, "step": 105080 }, { "epoch": 5.219529154663753, "grad_norm": 0.1337890625, "learning_rate": 0.00038244164100526473, "loss": 0.4988, "step": 105090 }, { "epoch": 5.220025826959372, "grad_norm": 0.11181640625, "learning_rate": 0.0003824019072216152, "loss": 0.5088, "step": 105100 }, { "epoch": 5.220522499254992, "grad_norm": 0.1240234375, "learning_rate": 0.00038236217343796567, "loss": 0.5021, "step": 105110 }, { "epoch": 5.221019171550611, "grad_norm": 0.140625, "learning_rate": 0.0003823224396543161, "loss": 0.4859, "step": 105120 }, { "epoch": 5.22151584384623, "grad_norm": 0.1533203125, "learning_rate": 0.00038228270587066656, "loss": 0.5081, "step": 105130 }, { "epoch": 5.22201251614185, "grad_norm": 0.1513671875, "learning_rate": 0.000382242972087017, "loss": 0.5057, "step": 105140 }, { "epoch": 5.222509188437469, "grad_norm": 0.1083984375, "learning_rate": 0.00038220323830336745, "loss": 0.5026, "step": 105150 }, { "epoch": 5.223005860733088, "grad_norm": 0.125, "learning_rate": 0.0003821635045197179, "loss": 0.5161, "step": 105160 }, { "epoch": 5.223502533028707, "grad_norm": 0.1181640625, "learning_rate": 0.00038212377073606834, "loss": 0.5062, "step": 105170 }, { "epoch": 5.223999205324327, "grad_norm": 0.12109375, "learning_rate": 0.0003820840369524188, "loss": 0.5017, "step": 105180 }, { "epoch": 5.224495877619947, "grad_norm": 0.134765625, "learning_rate": 0.0003820443031687693, "loss": 0.5259, "step": 105190 }, { "epoch": 5.224992549915566, "grad_norm": 0.12890625, "learning_rate": 0.0003820045693851197, "loss": 0.5123, "step": 105200 }, { "epoch": 5.225489222211185, "grad_norm": 0.130859375, "learning_rate": 0.00038196483560147017, "loss": 0.533, "step": 105210 }, { "epoch": 5.225985894506804, "grad_norm": 0.1357421875, "learning_rate": 0.00038192510181782064, "loss": 0.4918, "step": 105220 }, { "epoch": 5.2264825668024235, "grad_norm": 0.11181640625, "learning_rate": 0.00038188536803417106, "loss": 0.4995, "step": 105230 }, { "epoch": 5.226979239098043, "grad_norm": 0.115234375, "learning_rate": 0.00038184563425052153, "loss": 0.5495, "step": 105240 }, { "epoch": 5.227475911393663, "grad_norm": 0.12060546875, "learning_rate": 0.00038180590046687195, "loss": 0.4786, "step": 105250 }, { "epoch": 5.227972583689282, "grad_norm": 0.11669921875, "learning_rate": 0.00038176616668322247, "loss": 0.494, "step": 105260 }, { "epoch": 5.228469255984901, "grad_norm": 0.119140625, "learning_rate": 0.0003817264328995729, "loss": 0.4965, "step": 105270 }, { "epoch": 5.2289659282805205, "grad_norm": 0.12890625, "learning_rate": 0.0003816866991159233, "loss": 0.4844, "step": 105280 }, { "epoch": 5.22946260057614, "grad_norm": 0.1220703125, "learning_rate": 0.0003816469653322738, "loss": 0.4911, "step": 105290 }, { "epoch": 5.229959272871759, "grad_norm": 0.1259765625, "learning_rate": 0.00038160723154862425, "loss": 0.5403, "step": 105300 }, { "epoch": 5.230455945167378, "grad_norm": 0.12255859375, "learning_rate": 0.00038156749776497467, "loss": 0.503, "step": 105310 }, { "epoch": 5.230952617462998, "grad_norm": 0.134765625, "learning_rate": 0.00038152776398132514, "loss": 0.5058, "step": 105320 }, { "epoch": 5.231449289758618, "grad_norm": 0.1474609375, "learning_rate": 0.0003814880301976756, "loss": 0.5087, "step": 105330 }, { "epoch": 5.231945962054237, "grad_norm": 0.1533203125, "learning_rate": 0.0003814482964140261, "loss": 0.5184, "step": 105340 }, { "epoch": 5.232442634349856, "grad_norm": 0.1318359375, "learning_rate": 0.0003814085626303765, "loss": 0.4996, "step": 105350 }, { "epoch": 5.232939306645475, "grad_norm": 0.1318359375, "learning_rate": 0.0003813688288467269, "loss": 0.4916, "step": 105360 }, { "epoch": 5.2334359789410945, "grad_norm": 0.1669921875, "learning_rate": 0.00038132909506307744, "loss": 0.5033, "step": 105370 }, { "epoch": 5.233932651236714, "grad_norm": 0.109375, "learning_rate": 0.00038128936127942786, "loss": 0.5365, "step": 105380 }, { "epoch": 5.234429323532334, "grad_norm": 0.11865234375, "learning_rate": 0.0003812496274957783, "loss": 0.4962, "step": 105390 }, { "epoch": 5.234925995827953, "grad_norm": 0.1083984375, "learning_rate": 0.00038120989371212875, "loss": 0.4889, "step": 105400 }, { "epoch": 5.235422668123572, "grad_norm": 0.12060546875, "learning_rate": 0.0003811701599284792, "loss": 0.5129, "step": 105410 }, { "epoch": 5.2359193404191915, "grad_norm": 0.12353515625, "learning_rate": 0.0003811304261448297, "loss": 0.4706, "step": 105420 }, { "epoch": 5.236416012714811, "grad_norm": 0.125, "learning_rate": 0.0003810906923611801, "loss": 0.4972, "step": 105430 }, { "epoch": 5.23691268501043, "grad_norm": 0.1337890625, "learning_rate": 0.0003810509585775305, "loss": 0.5081, "step": 105440 }, { "epoch": 5.237409357306049, "grad_norm": 0.1474609375, "learning_rate": 0.00038101122479388105, "loss": 0.4888, "step": 105450 }, { "epoch": 5.237906029601668, "grad_norm": 0.134765625, "learning_rate": 0.00038097149101023147, "loss": 0.4939, "step": 105460 }, { "epoch": 5.2384027018972885, "grad_norm": 0.1259765625, "learning_rate": 0.0003809317572265819, "loss": 0.5051, "step": 105470 }, { "epoch": 5.238899374192908, "grad_norm": 0.1171875, "learning_rate": 0.00038089202344293235, "loss": 0.5286, "step": 105480 }, { "epoch": 5.239396046488527, "grad_norm": 0.12353515625, "learning_rate": 0.0003808522896592828, "loss": 0.5226, "step": 105490 }, { "epoch": 5.239892718784146, "grad_norm": 0.154296875, "learning_rate": 0.0003808125558756333, "loss": 0.5016, "step": 105500 }, { "epoch": 5.240389391079765, "grad_norm": 0.11328125, "learning_rate": 0.0003807728220919837, "loss": 0.4833, "step": 105510 }, { "epoch": 5.240886063375385, "grad_norm": 0.12060546875, "learning_rate": 0.0003807330883083342, "loss": 0.5191, "step": 105520 }, { "epoch": 5.241382735671004, "grad_norm": 0.1064453125, "learning_rate": 0.00038069335452468466, "loss": 0.4965, "step": 105530 }, { "epoch": 5.241879407966624, "grad_norm": 0.12060546875, "learning_rate": 0.0003806536207410351, "loss": 0.4785, "step": 105540 }, { "epoch": 5.242376080262243, "grad_norm": 0.12890625, "learning_rate": 0.00038061388695738554, "loss": 0.5122, "step": 105550 }, { "epoch": 5.2428727525578624, "grad_norm": 0.1103515625, "learning_rate": 0.000380574153173736, "loss": 0.4763, "step": 105560 }, { "epoch": 5.243369424853482, "grad_norm": 0.11083984375, "learning_rate": 0.00038053441939008643, "loss": 0.5306, "step": 105570 }, { "epoch": 5.243866097149101, "grad_norm": 0.1142578125, "learning_rate": 0.0003804946856064369, "loss": 0.5305, "step": 105580 }, { "epoch": 5.24436276944472, "grad_norm": 0.1455078125, "learning_rate": 0.0003804549518227873, "loss": 0.4957, "step": 105590 }, { "epoch": 5.244859441740339, "grad_norm": 0.12109375, "learning_rate": 0.0003804152180391378, "loss": 0.5046, "step": 105600 }, { "epoch": 5.2453561140359595, "grad_norm": 0.11279296875, "learning_rate": 0.00038037548425548826, "loss": 0.4918, "step": 105610 }, { "epoch": 5.245852786331579, "grad_norm": 0.125, "learning_rate": 0.0003803357504718387, "loss": 0.486, "step": 105620 }, { "epoch": 5.246349458627198, "grad_norm": 0.1259765625, "learning_rate": 0.00038029601668818915, "loss": 0.4918, "step": 105630 }, { "epoch": 5.246846130922817, "grad_norm": 0.109375, "learning_rate": 0.0003802562829045396, "loss": 0.4641, "step": 105640 }, { "epoch": 5.247342803218436, "grad_norm": 0.130859375, "learning_rate": 0.00038021654912089004, "loss": 0.5065, "step": 105650 }, { "epoch": 5.247839475514056, "grad_norm": 0.1298828125, "learning_rate": 0.0003801768153372405, "loss": 0.5032, "step": 105660 }, { "epoch": 5.248336147809675, "grad_norm": 0.10595703125, "learning_rate": 0.00038013708155359093, "loss": 0.4881, "step": 105670 }, { "epoch": 5.248832820105295, "grad_norm": 0.12158203125, "learning_rate": 0.0003800973477699414, "loss": 0.4983, "step": 105680 }, { "epoch": 5.249329492400914, "grad_norm": 0.1630859375, "learning_rate": 0.00038005761398629187, "loss": 0.4834, "step": 105690 }, { "epoch": 5.249826164696533, "grad_norm": 0.11669921875, "learning_rate": 0.0003800178802026423, "loss": 0.4972, "step": 105700 }, { "epoch": 5.250322836992153, "grad_norm": 0.119140625, "learning_rate": 0.00037997814641899276, "loss": 0.5257, "step": 105710 }, { "epoch": 5.250819509287772, "grad_norm": 0.11865234375, "learning_rate": 0.00037993841263534323, "loss": 0.5216, "step": 105720 }, { "epoch": 5.251316181583391, "grad_norm": 0.11669921875, "learning_rate": 0.00037989867885169365, "loss": 0.4775, "step": 105730 }, { "epoch": 5.25181285387901, "grad_norm": 0.1484375, "learning_rate": 0.0003798589450680441, "loss": 0.4706, "step": 105740 }, { "epoch": 5.2523095261746295, "grad_norm": 0.142578125, "learning_rate": 0.0003798192112843946, "loss": 0.5176, "step": 105750 }, { "epoch": 5.25280619847025, "grad_norm": 0.1318359375, "learning_rate": 0.000379779477500745, "loss": 0.5154, "step": 105760 }, { "epoch": 5.253302870765869, "grad_norm": 0.1650390625, "learning_rate": 0.0003797397437170955, "loss": 0.4837, "step": 105770 }, { "epoch": 5.253799543061488, "grad_norm": 0.1357421875, "learning_rate": 0.0003797000099334459, "loss": 0.5053, "step": 105780 }, { "epoch": 5.254296215357107, "grad_norm": 0.2041015625, "learning_rate": 0.0003796602761497964, "loss": 0.5251, "step": 105790 }, { "epoch": 5.254792887652727, "grad_norm": 0.12255859375, "learning_rate": 0.00037962054236614684, "loss": 0.4851, "step": 105800 }, { "epoch": 5.255289559948346, "grad_norm": 0.1513671875, "learning_rate": 0.00037958080858249726, "loss": 0.502, "step": 105810 }, { "epoch": 5.255786232243965, "grad_norm": 0.11328125, "learning_rate": 0.00037954107479884773, "loss": 0.5086, "step": 105820 }, { "epoch": 5.256282904539585, "grad_norm": 0.13671875, "learning_rate": 0.0003795013410151982, "loss": 0.5406, "step": 105830 }, { "epoch": 5.256779576835204, "grad_norm": 0.1279296875, "learning_rate": 0.0003794616072315486, "loss": 0.5044, "step": 105840 }, { "epoch": 5.257276249130824, "grad_norm": 0.123046875, "learning_rate": 0.0003794218734478991, "loss": 0.4961, "step": 105850 }, { "epoch": 5.257772921426443, "grad_norm": 0.12158203125, "learning_rate": 0.00037938213966424956, "loss": 0.5446, "step": 105860 }, { "epoch": 5.258269593722062, "grad_norm": 0.119140625, "learning_rate": 0.00037934240588060003, "loss": 0.4895, "step": 105870 }, { "epoch": 5.258766266017681, "grad_norm": 0.1826171875, "learning_rate": 0.00037930267209695045, "loss": 0.4977, "step": 105880 }, { "epoch": 5.2592629383133005, "grad_norm": 0.1396484375, "learning_rate": 0.00037926293831330087, "loss": 0.5135, "step": 105890 }, { "epoch": 5.259759610608921, "grad_norm": 0.1298828125, "learning_rate": 0.0003792232045296514, "loss": 0.4763, "step": 105900 }, { "epoch": 5.26025628290454, "grad_norm": 0.1376953125, "learning_rate": 0.0003791834707460018, "loss": 0.4835, "step": 105910 }, { "epoch": 5.260752955200159, "grad_norm": 0.1279296875, "learning_rate": 0.0003791437369623522, "loss": 0.4958, "step": 105920 }, { "epoch": 5.261249627495778, "grad_norm": 0.11669921875, "learning_rate": 0.0003791040031787027, "loss": 0.4818, "step": 105930 }, { "epoch": 5.2617462997913975, "grad_norm": 0.11767578125, "learning_rate": 0.00037906426939505317, "loss": 0.5103, "step": 105940 }, { "epoch": 5.262242972087017, "grad_norm": 0.11474609375, "learning_rate": 0.00037902453561140364, "loss": 0.4874, "step": 105950 }, { "epoch": 5.262739644382636, "grad_norm": 0.1572265625, "learning_rate": 0.00037898480182775406, "loss": 0.5046, "step": 105960 }, { "epoch": 5.263236316678256, "grad_norm": 0.11376953125, "learning_rate": 0.0003789450680441045, "loss": 0.4964, "step": 105970 }, { "epoch": 5.263732988973875, "grad_norm": 0.11328125, "learning_rate": 0.000378905334260455, "loss": 0.4827, "step": 105980 }, { "epoch": 5.2642296612694945, "grad_norm": 0.12255859375, "learning_rate": 0.0003788656004768054, "loss": 0.5055, "step": 105990 }, { "epoch": 5.264726333565114, "grad_norm": 0.12109375, "learning_rate": 0.0003788258666931559, "loss": 0.5091, "step": 106000 }, { "epoch": 5.265223005860733, "grad_norm": 0.12255859375, "learning_rate": 0.0003787861329095063, "loss": 0.5217, "step": 106010 }, { "epoch": 5.265719678156352, "grad_norm": 0.1552734375, "learning_rate": 0.0003787463991258568, "loss": 0.4808, "step": 106020 }, { "epoch": 5.2662163504519715, "grad_norm": 0.11767578125, "learning_rate": 0.00037870666534220725, "loss": 0.5099, "step": 106030 }, { "epoch": 5.266713022747592, "grad_norm": 0.1376953125, "learning_rate": 0.00037866693155855767, "loss": 0.5062, "step": 106040 }, { "epoch": 5.267209695043211, "grad_norm": 0.1083984375, "learning_rate": 0.00037862719777490814, "loss": 0.5205, "step": 106050 }, { "epoch": 5.26770636733883, "grad_norm": 0.142578125, "learning_rate": 0.0003785874639912586, "loss": 0.5015, "step": 106060 }, { "epoch": 5.268203039634449, "grad_norm": 0.123046875, "learning_rate": 0.000378547730207609, "loss": 0.5136, "step": 106070 }, { "epoch": 5.2686997119300685, "grad_norm": 0.11279296875, "learning_rate": 0.0003785079964239595, "loss": 0.4776, "step": 106080 }, { "epoch": 5.269196384225688, "grad_norm": 0.1123046875, "learning_rate": 0.00037846826264030997, "loss": 0.5012, "step": 106090 }, { "epoch": 5.269693056521307, "grad_norm": 0.107421875, "learning_rate": 0.0003784285288566604, "loss": 0.5194, "step": 106100 }, { "epoch": 5.270189728816927, "grad_norm": 0.1240234375, "learning_rate": 0.00037838879507301086, "loss": 0.515, "step": 106110 }, { "epoch": 5.270686401112546, "grad_norm": 0.1650390625, "learning_rate": 0.0003783490612893613, "loss": 0.4908, "step": 106120 }, { "epoch": 5.2711830734081655, "grad_norm": 0.1298828125, "learning_rate": 0.00037830932750571174, "loss": 0.4953, "step": 106130 }, { "epoch": 5.271679745703785, "grad_norm": 0.1162109375, "learning_rate": 0.0003782695937220622, "loss": 0.493, "step": 106140 }, { "epoch": 5.272176417999404, "grad_norm": 0.11767578125, "learning_rate": 0.00037822985993841263, "loss": 0.4988, "step": 106150 }, { "epoch": 5.272673090295023, "grad_norm": 0.125, "learning_rate": 0.0003781901261547631, "loss": 0.4874, "step": 106160 }, { "epoch": 5.273169762590642, "grad_norm": 0.1357421875, "learning_rate": 0.0003781503923711136, "loss": 0.539, "step": 106170 }, { "epoch": 5.273666434886262, "grad_norm": 0.140625, "learning_rate": 0.000378110658587464, "loss": 0.512, "step": 106180 }, { "epoch": 5.274163107181882, "grad_norm": 0.130859375, "learning_rate": 0.00037807092480381446, "loss": 0.4819, "step": 106190 }, { "epoch": 5.274659779477501, "grad_norm": 0.10546875, "learning_rate": 0.00037803119102016494, "loss": 0.5073, "step": 106200 }, { "epoch": 5.27515645177312, "grad_norm": 0.177734375, "learning_rate": 0.00037799145723651535, "loss": 0.4994, "step": 106210 }, { "epoch": 5.275653124068739, "grad_norm": 0.1611328125, "learning_rate": 0.0003779517234528658, "loss": 0.5123, "step": 106220 }, { "epoch": 5.276149796364359, "grad_norm": 0.138671875, "learning_rate": 0.00037791198966921624, "loss": 0.4797, "step": 106230 }, { "epoch": 5.276646468659978, "grad_norm": 0.12158203125, "learning_rate": 0.0003778722558855667, "loss": 0.5056, "step": 106240 }, { "epoch": 5.277143140955597, "grad_norm": 0.11083984375, "learning_rate": 0.0003778325221019172, "loss": 0.5202, "step": 106250 }, { "epoch": 5.277639813251217, "grad_norm": 0.11328125, "learning_rate": 0.0003777927883182676, "loss": 0.5255, "step": 106260 }, { "epoch": 5.2781364855468365, "grad_norm": 0.1181640625, "learning_rate": 0.0003777530545346181, "loss": 0.4985, "step": 106270 }, { "epoch": 5.278633157842456, "grad_norm": 0.1142578125, "learning_rate": 0.00037771332075096854, "loss": 0.5092, "step": 106280 }, { "epoch": 5.279129830138075, "grad_norm": 0.1416015625, "learning_rate": 0.00037767358696731896, "loss": 0.5219, "step": 106290 }, { "epoch": 5.279626502433694, "grad_norm": 0.1787109375, "learning_rate": 0.00037763385318366943, "loss": 0.5119, "step": 106300 }, { "epoch": 5.280123174729313, "grad_norm": 0.130859375, "learning_rate": 0.00037759411940001985, "loss": 0.4844, "step": 106310 }, { "epoch": 5.280619847024933, "grad_norm": 0.142578125, "learning_rate": 0.0003775543856163704, "loss": 0.493, "step": 106320 }, { "epoch": 5.281116519320553, "grad_norm": 0.11865234375, "learning_rate": 0.0003775146518327208, "loss": 0.5006, "step": 106330 }, { "epoch": 5.281613191616172, "grad_norm": 0.1298828125, "learning_rate": 0.0003774749180490712, "loss": 0.494, "step": 106340 }, { "epoch": 5.282109863911791, "grad_norm": 0.125, "learning_rate": 0.0003774351842654217, "loss": 0.4872, "step": 106350 }, { "epoch": 5.28260653620741, "grad_norm": 0.1083984375, "learning_rate": 0.00037739545048177215, "loss": 0.4894, "step": 106360 }, { "epoch": 5.28310320850303, "grad_norm": 0.1240234375, "learning_rate": 0.0003773557166981226, "loss": 0.5106, "step": 106370 }, { "epoch": 5.283599880798649, "grad_norm": 0.123046875, "learning_rate": 0.00037731598291447304, "loss": 0.4896, "step": 106380 }, { "epoch": 5.284096553094268, "grad_norm": 0.11572265625, "learning_rate": 0.0003772762491308235, "loss": 0.513, "step": 106390 }, { "epoch": 5.284593225389887, "grad_norm": 0.130859375, "learning_rate": 0.000377236515347174, "loss": 0.5174, "step": 106400 }, { "epoch": 5.285089897685507, "grad_norm": 0.125, "learning_rate": 0.0003771967815635244, "loss": 0.5198, "step": 106410 }, { "epoch": 5.285586569981127, "grad_norm": 0.11474609375, "learning_rate": 0.0003771570477798748, "loss": 0.513, "step": 106420 }, { "epoch": 5.286083242276746, "grad_norm": 0.1103515625, "learning_rate": 0.00037711731399622534, "loss": 0.4695, "step": 106430 }, { "epoch": 5.286579914572365, "grad_norm": 0.126953125, "learning_rate": 0.00037707758021257576, "loss": 0.5157, "step": 106440 }, { "epoch": 5.287076586867984, "grad_norm": 0.11279296875, "learning_rate": 0.00037703784642892623, "loss": 0.4926, "step": 106450 }, { "epoch": 5.2875732591636035, "grad_norm": 0.12060546875, "learning_rate": 0.00037699811264527665, "loss": 0.5179, "step": 106460 }, { "epoch": 5.288069931459223, "grad_norm": 0.12158203125, "learning_rate": 0.0003769583788616271, "loss": 0.5059, "step": 106470 }, { "epoch": 5.288566603754843, "grad_norm": 0.1103515625, "learning_rate": 0.0003769186450779776, "loss": 0.5065, "step": 106480 }, { "epoch": 5.289063276050462, "grad_norm": 0.1142578125, "learning_rate": 0.000376878911294328, "loss": 0.4938, "step": 106490 }, { "epoch": 5.289559948346081, "grad_norm": 0.189453125, "learning_rate": 0.0003768391775106785, "loss": 0.5068, "step": 106500 }, { "epoch": 5.290056620641701, "grad_norm": 0.12353515625, "learning_rate": 0.00037679944372702895, "loss": 0.5231, "step": 106510 }, { "epoch": 5.29055329293732, "grad_norm": 0.11328125, "learning_rate": 0.00037675970994337937, "loss": 0.5003, "step": 106520 }, { "epoch": 5.291049965232939, "grad_norm": 0.130859375, "learning_rate": 0.00037671997615972984, "loss": 0.4978, "step": 106530 }, { "epoch": 5.291546637528558, "grad_norm": 0.1611328125, "learning_rate": 0.00037668024237608026, "loss": 0.4801, "step": 106540 }, { "epoch": 5.292043309824178, "grad_norm": 0.1484375, "learning_rate": 0.00037664050859243073, "loss": 0.5235, "step": 106550 }, { "epoch": 5.292539982119798, "grad_norm": 0.11474609375, "learning_rate": 0.0003766007748087812, "loss": 0.4832, "step": 106560 }, { "epoch": 5.293036654415417, "grad_norm": 0.1220703125, "learning_rate": 0.0003765610410251316, "loss": 0.5165, "step": 106570 }, { "epoch": 5.293533326711036, "grad_norm": 0.12158203125, "learning_rate": 0.0003765213072414821, "loss": 0.5267, "step": 106580 }, { "epoch": 5.294029999006655, "grad_norm": 0.126953125, "learning_rate": 0.00037648157345783256, "loss": 0.5177, "step": 106590 }, { "epoch": 5.2945266713022745, "grad_norm": 0.1201171875, "learning_rate": 0.000376441839674183, "loss": 0.5257, "step": 106600 }, { "epoch": 5.295023343597894, "grad_norm": 0.146484375, "learning_rate": 0.00037640210589053345, "loss": 0.498, "step": 106610 }, { "epoch": 5.295520015893514, "grad_norm": 0.11669921875, "learning_rate": 0.0003763623721068839, "loss": 0.5108, "step": 106620 }, { "epoch": 5.296016688189133, "grad_norm": 0.11572265625, "learning_rate": 0.00037632263832323434, "loss": 0.4712, "step": 106630 }, { "epoch": 5.296513360484752, "grad_norm": 0.1328125, "learning_rate": 0.0003762829045395848, "loss": 0.4676, "step": 106640 }, { "epoch": 5.2970100327803715, "grad_norm": 0.1279296875, "learning_rate": 0.0003762431707559352, "loss": 0.4873, "step": 106650 }, { "epoch": 5.297506705075991, "grad_norm": 0.125, "learning_rate": 0.0003762034369722857, "loss": 0.5055, "step": 106660 }, { "epoch": 5.29800337737161, "grad_norm": 0.134765625, "learning_rate": 0.00037616370318863617, "loss": 0.507, "step": 106670 }, { "epoch": 5.298500049667229, "grad_norm": 0.140625, "learning_rate": 0.0003761239694049866, "loss": 0.5004, "step": 106680 }, { "epoch": 5.298996721962849, "grad_norm": 0.19921875, "learning_rate": 0.00037608423562133706, "loss": 0.4958, "step": 106690 }, { "epoch": 5.2994933942584685, "grad_norm": 0.125, "learning_rate": 0.00037604450183768753, "loss": 0.5015, "step": 106700 }, { "epoch": 5.299990066554088, "grad_norm": 0.1435546875, "learning_rate": 0.00037600476805403795, "loss": 0.5173, "step": 106710 }, { "epoch": 5.300486738849707, "grad_norm": 0.1328125, "learning_rate": 0.0003759650342703884, "loss": 0.5207, "step": 106720 }, { "epoch": 5.300983411145326, "grad_norm": 0.1318359375, "learning_rate": 0.0003759253004867389, "loss": 0.506, "step": 106730 }, { "epoch": 5.3014800834409455, "grad_norm": 0.11767578125, "learning_rate": 0.0003758855667030893, "loss": 0.4992, "step": 106740 }, { "epoch": 5.301976755736565, "grad_norm": 0.11962890625, "learning_rate": 0.0003758458329194398, "loss": 0.4862, "step": 106750 }, { "epoch": 5.302473428032185, "grad_norm": 0.1455078125, "learning_rate": 0.0003758060991357902, "loss": 0.5146, "step": 106760 }, { "epoch": 5.302970100327804, "grad_norm": 0.181640625, "learning_rate": 0.0003757663653521407, "loss": 0.5075, "step": 106770 }, { "epoch": 5.303466772623423, "grad_norm": 0.111328125, "learning_rate": 0.00037572663156849114, "loss": 0.5059, "step": 106780 }, { "epoch": 5.3039634449190425, "grad_norm": 0.1220703125, "learning_rate": 0.00037568689778484155, "loss": 0.5584, "step": 106790 }, { "epoch": 5.304460117214662, "grad_norm": 0.11962890625, "learning_rate": 0.000375647164001192, "loss": 0.527, "step": 106800 }, { "epoch": 5.304956789510281, "grad_norm": 0.1279296875, "learning_rate": 0.0003756074302175425, "loss": 0.5224, "step": 106810 }, { "epoch": 5.3054534618059, "grad_norm": 0.1494140625, "learning_rate": 0.00037556769643389297, "loss": 0.4929, "step": 106820 }, { "epoch": 5.30595013410152, "grad_norm": 0.109375, "learning_rate": 0.0003755279626502434, "loss": 0.5004, "step": 106830 }, { "epoch": 5.3064468063971395, "grad_norm": 0.1259765625, "learning_rate": 0.0003754882288665938, "loss": 0.5135, "step": 106840 }, { "epoch": 5.306943478692759, "grad_norm": 0.1181640625, "learning_rate": 0.00037544849508294433, "loss": 0.523, "step": 106850 }, { "epoch": 5.307440150988378, "grad_norm": 0.13671875, "learning_rate": 0.00037540876129929474, "loss": 0.5097, "step": 106860 }, { "epoch": 5.307936823283997, "grad_norm": 0.1728515625, "learning_rate": 0.00037536902751564516, "loss": 0.4743, "step": 106870 }, { "epoch": 5.308433495579616, "grad_norm": 0.1533203125, "learning_rate": 0.00037532929373199563, "loss": 0.4911, "step": 106880 }, { "epoch": 5.308930167875236, "grad_norm": 0.11865234375, "learning_rate": 0.0003752895599483461, "loss": 0.4959, "step": 106890 }, { "epoch": 5.309426840170855, "grad_norm": 0.10693359375, "learning_rate": 0.0003752498261646966, "loss": 0.4944, "step": 106900 }, { "epoch": 5.309923512466475, "grad_norm": 0.119140625, "learning_rate": 0.000375210092381047, "loss": 0.4845, "step": 106910 }, { "epoch": 5.310420184762094, "grad_norm": 0.1416015625, "learning_rate": 0.00037517035859739746, "loss": 0.5044, "step": 106920 }, { "epoch": 5.310916857057713, "grad_norm": 0.11962890625, "learning_rate": 0.00037513062481374794, "loss": 0.5066, "step": 106930 }, { "epoch": 5.311413529353333, "grad_norm": 0.11962890625, "learning_rate": 0.00037509089103009835, "loss": 0.5175, "step": 106940 }, { "epoch": 5.311910201648952, "grad_norm": 0.185546875, "learning_rate": 0.00037505115724644877, "loss": 0.4814, "step": 106950 }, { "epoch": 5.312406873944571, "grad_norm": 0.1259765625, "learning_rate": 0.0003750114234627993, "loss": 0.5106, "step": 106960 }, { "epoch": 5.31290354624019, "grad_norm": 0.1513671875, "learning_rate": 0.0003749716896791497, "loss": 0.4795, "step": 106970 }, { "epoch": 5.3134002185358105, "grad_norm": 0.1328125, "learning_rate": 0.0003749319558955002, "loss": 0.5057, "step": 106980 }, { "epoch": 5.31389689083143, "grad_norm": 0.126953125, "learning_rate": 0.0003748922221118506, "loss": 0.4873, "step": 106990 }, { "epoch": 5.314393563127049, "grad_norm": 0.1123046875, "learning_rate": 0.00037485248832820107, "loss": 0.5039, "step": 107000 }, { "epoch": 5.314890235422668, "grad_norm": 0.119140625, "learning_rate": 0.00037481275454455154, "loss": 0.5208, "step": 107010 }, { "epoch": 5.315386907718287, "grad_norm": 0.130859375, "learning_rate": 0.00037477302076090196, "loss": 0.4899, "step": 107020 }, { "epoch": 5.315883580013907, "grad_norm": 0.11279296875, "learning_rate": 0.00037473328697725243, "loss": 0.5226, "step": 107030 }, { "epoch": 5.316380252309526, "grad_norm": 0.11279296875, "learning_rate": 0.0003746935531936029, "loss": 0.5062, "step": 107040 }, { "epoch": 5.316876924605146, "grad_norm": 0.11083984375, "learning_rate": 0.0003746538194099533, "loss": 0.5017, "step": 107050 }, { "epoch": 5.317373596900765, "grad_norm": 0.1220703125, "learning_rate": 0.0003746140856263038, "loss": 0.5258, "step": 107060 }, { "epoch": 5.317870269196384, "grad_norm": 0.125, "learning_rate": 0.00037457435184265426, "loss": 0.4831, "step": 107070 }, { "epoch": 5.318366941492004, "grad_norm": 0.10400390625, "learning_rate": 0.0003745346180590047, "loss": 0.4974, "step": 107080 }, { "epoch": 5.318863613787623, "grad_norm": 0.1611328125, "learning_rate": 0.00037449488427535515, "loss": 0.5158, "step": 107090 }, { "epoch": 5.319360286083242, "grad_norm": 0.123046875, "learning_rate": 0.00037445515049170557, "loss": 0.5136, "step": 107100 }, { "epoch": 5.319856958378861, "grad_norm": 0.1513671875, "learning_rate": 0.00037441541670805604, "loss": 0.4901, "step": 107110 }, { "epoch": 5.3203536306744805, "grad_norm": 0.11328125, "learning_rate": 0.0003743756829244065, "loss": 0.4909, "step": 107120 }, { "epoch": 5.320850302970101, "grad_norm": 0.123046875, "learning_rate": 0.00037433594914075693, "loss": 0.5183, "step": 107130 }, { "epoch": 5.32134697526572, "grad_norm": 0.10595703125, "learning_rate": 0.0003742962153571074, "loss": 0.4713, "step": 107140 }, { "epoch": 5.321843647561339, "grad_norm": 0.1396484375, "learning_rate": 0.00037425648157345787, "loss": 0.5129, "step": 107150 }, { "epoch": 5.322340319856958, "grad_norm": 0.1181640625, "learning_rate": 0.0003742167477898083, "loss": 0.4957, "step": 107160 }, { "epoch": 5.3228369921525776, "grad_norm": 0.138671875, "learning_rate": 0.00037417701400615876, "loss": 0.4968, "step": 107170 }, { "epoch": 5.323333664448197, "grad_norm": 0.11669921875, "learning_rate": 0.0003741372802225092, "loss": 0.4901, "step": 107180 }, { "epoch": 5.323830336743816, "grad_norm": 0.1484375, "learning_rate": 0.00037409754643885965, "loss": 0.5394, "step": 107190 }, { "epoch": 5.324327009039436, "grad_norm": 0.12353515625, "learning_rate": 0.0003740578126552101, "loss": 0.5215, "step": 107200 }, { "epoch": 5.324823681335055, "grad_norm": 0.1259765625, "learning_rate": 0.00037401807887156054, "loss": 0.4856, "step": 107210 }, { "epoch": 5.325320353630675, "grad_norm": 0.1357421875, "learning_rate": 0.000373978345087911, "loss": 0.5135, "step": 107220 }, { "epoch": 5.325817025926294, "grad_norm": 0.1279296875, "learning_rate": 0.0003739386113042615, "loss": 0.5232, "step": 107230 }, { "epoch": 5.326313698221913, "grad_norm": 0.12158203125, "learning_rate": 0.0003738988775206119, "loss": 0.5302, "step": 107240 }, { "epoch": 5.326810370517532, "grad_norm": 0.1416015625, "learning_rate": 0.00037385914373696237, "loss": 0.4882, "step": 107250 }, { "epoch": 5.3273070428131515, "grad_norm": 0.16015625, "learning_rate": 0.00037381940995331284, "loss": 0.5092, "step": 107260 }, { "epoch": 5.327803715108772, "grad_norm": 0.1162109375, "learning_rate": 0.0003737796761696633, "loss": 0.533, "step": 107270 }, { "epoch": 5.328300387404391, "grad_norm": 0.1357421875, "learning_rate": 0.00037373994238601373, "loss": 0.5166, "step": 107280 }, { "epoch": 5.32879705970001, "grad_norm": 0.11669921875, "learning_rate": 0.00037370020860236415, "loss": 0.5068, "step": 107290 }, { "epoch": 5.329293731995629, "grad_norm": 0.12109375, "learning_rate": 0.00037366047481871467, "loss": 0.4994, "step": 107300 }, { "epoch": 5.3297904042912485, "grad_norm": 0.15625, "learning_rate": 0.0003736207410350651, "loss": 0.5246, "step": 107310 }, { "epoch": 5.330287076586868, "grad_norm": 0.111328125, "learning_rate": 0.0003735810072514155, "loss": 0.4979, "step": 107320 }, { "epoch": 5.330783748882487, "grad_norm": 0.12451171875, "learning_rate": 0.000373541273467766, "loss": 0.4869, "step": 107330 }, { "epoch": 5.331280421178107, "grad_norm": 0.11962890625, "learning_rate": 0.00037350153968411645, "loss": 0.4768, "step": 107340 }, { "epoch": 5.331777093473726, "grad_norm": 0.1318359375, "learning_rate": 0.0003734618059004669, "loss": 0.478, "step": 107350 }, { "epoch": 5.3322737657693455, "grad_norm": 0.12109375, "learning_rate": 0.00037342207211681734, "loss": 0.5075, "step": 107360 }, { "epoch": 5.332770438064965, "grad_norm": 0.103515625, "learning_rate": 0.00037338233833316775, "loss": 0.4907, "step": 107370 }, { "epoch": 5.333267110360584, "grad_norm": 0.1552734375, "learning_rate": 0.0003733426045495183, "loss": 0.5123, "step": 107380 }, { "epoch": 5.333763782656203, "grad_norm": 0.126953125, "learning_rate": 0.0003733028707658687, "loss": 0.4853, "step": 107390 }, { "epoch": 5.334260454951822, "grad_norm": 0.11767578125, "learning_rate": 0.0003732631369822191, "loss": 0.5294, "step": 107400 }, { "epoch": 5.3347571272474426, "grad_norm": 0.1298828125, "learning_rate": 0.0003732234031985696, "loss": 0.5243, "step": 107410 }, { "epoch": 5.335253799543062, "grad_norm": 0.125, "learning_rate": 0.00037318366941492006, "loss": 0.4995, "step": 107420 }, { "epoch": 5.335750471838681, "grad_norm": 0.1240234375, "learning_rate": 0.00037314393563127053, "loss": 0.4997, "step": 107430 }, { "epoch": 5.3362471441343, "grad_norm": 0.15234375, "learning_rate": 0.00037310420184762094, "loss": 0.4856, "step": 107440 }, { "epoch": 5.3367438164299195, "grad_norm": 0.1181640625, "learning_rate": 0.0003730644680639714, "loss": 0.4911, "step": 107450 }, { "epoch": 5.337240488725539, "grad_norm": 0.171875, "learning_rate": 0.0003730247342803219, "loss": 0.5015, "step": 107460 }, { "epoch": 5.337737161021158, "grad_norm": 0.1259765625, "learning_rate": 0.0003729850004966723, "loss": 0.4819, "step": 107470 }, { "epoch": 5.338233833316778, "grad_norm": 0.162109375, "learning_rate": 0.0003729452667130227, "loss": 0.4923, "step": 107480 }, { "epoch": 5.338730505612397, "grad_norm": 0.1142578125, "learning_rate": 0.00037290553292937325, "loss": 0.4997, "step": 107490 }, { "epoch": 5.3392271779080165, "grad_norm": 0.11376953125, "learning_rate": 0.00037286579914572366, "loss": 0.502, "step": 107500 }, { "epoch": 5.339723850203636, "grad_norm": 0.12255859375, "learning_rate": 0.00037282606536207414, "loss": 0.5228, "step": 107510 }, { "epoch": 5.340220522499255, "grad_norm": 0.138671875, "learning_rate": 0.00037278633157842455, "loss": 0.4805, "step": 107520 }, { "epoch": 5.340717194794874, "grad_norm": 0.1328125, "learning_rate": 0.000372746597794775, "loss": 0.5407, "step": 107530 }, { "epoch": 5.341213867090493, "grad_norm": 0.1591796875, "learning_rate": 0.0003727068640111255, "loss": 0.523, "step": 107540 }, { "epoch": 5.3417105393861135, "grad_norm": 0.1513671875, "learning_rate": 0.0003726671302274759, "loss": 0.4976, "step": 107550 }, { "epoch": 5.342207211681733, "grad_norm": 0.1279296875, "learning_rate": 0.0003726273964438264, "loss": 0.4908, "step": 107560 }, { "epoch": 5.342703883977352, "grad_norm": 0.1494140625, "learning_rate": 0.00037258766266017686, "loss": 0.5216, "step": 107570 }, { "epoch": 5.343200556272971, "grad_norm": 0.1337890625, "learning_rate": 0.00037254792887652727, "loss": 0.4958, "step": 107580 }, { "epoch": 5.34369722856859, "grad_norm": 0.1337890625, "learning_rate": 0.00037250819509287774, "loss": 0.4956, "step": 107590 }, { "epoch": 5.34419390086421, "grad_norm": 0.13671875, "learning_rate": 0.0003724684613092282, "loss": 0.4909, "step": 107600 }, { "epoch": 5.344690573159829, "grad_norm": 0.1328125, "learning_rate": 0.00037242872752557863, "loss": 0.4814, "step": 107610 }, { "epoch": 5.345187245455448, "grad_norm": 0.1298828125, "learning_rate": 0.0003723889937419291, "loss": 0.4906, "step": 107620 }, { "epoch": 5.345683917751068, "grad_norm": 0.12890625, "learning_rate": 0.0003723492599582795, "loss": 0.4998, "step": 107630 }, { "epoch": 5.346180590046687, "grad_norm": 0.1162109375, "learning_rate": 0.00037230952617463, "loss": 0.4901, "step": 107640 }, { "epoch": 5.346677262342307, "grad_norm": 0.1162109375, "learning_rate": 0.00037226979239098046, "loss": 0.4998, "step": 107650 }, { "epoch": 5.347173934637926, "grad_norm": 0.142578125, "learning_rate": 0.0003722300586073309, "loss": 0.4889, "step": 107660 }, { "epoch": 5.347670606933545, "grad_norm": 0.1494140625, "learning_rate": 0.00037219032482368135, "loss": 0.5287, "step": 107670 }, { "epoch": 5.348167279229164, "grad_norm": 0.11572265625, "learning_rate": 0.0003721505910400318, "loss": 0.5008, "step": 107680 }, { "epoch": 5.348663951524784, "grad_norm": 0.1220703125, "learning_rate": 0.00037211085725638224, "loss": 0.4908, "step": 107690 }, { "epoch": 5.349160623820404, "grad_norm": 0.1201171875, "learning_rate": 0.0003720711234727327, "loss": 0.4943, "step": 107700 }, { "epoch": 5.349657296116023, "grad_norm": 0.1435546875, "learning_rate": 0.00037203138968908313, "loss": 0.5044, "step": 107710 }, { "epoch": 5.350153968411642, "grad_norm": 0.12890625, "learning_rate": 0.00037199165590543365, "loss": 0.4952, "step": 107720 }, { "epoch": 5.350650640707261, "grad_norm": 0.12109375, "learning_rate": 0.00037195192212178407, "loss": 0.5112, "step": 107730 }, { "epoch": 5.351147313002881, "grad_norm": 0.1318359375, "learning_rate": 0.0003719121883381345, "loss": 0.4978, "step": 107740 }, { "epoch": 5.3516439852985, "grad_norm": 0.11083984375, "learning_rate": 0.00037187245455448496, "loss": 0.4872, "step": 107750 }, { "epoch": 5.352140657594119, "grad_norm": 0.11474609375, "learning_rate": 0.00037183272077083543, "loss": 0.5244, "step": 107760 }, { "epoch": 5.352637329889738, "grad_norm": 0.126953125, "learning_rate": 0.00037179298698718585, "loss": 0.5032, "step": 107770 }, { "epoch": 5.353134002185358, "grad_norm": 0.11474609375, "learning_rate": 0.0003717532532035363, "loss": 0.5246, "step": 107780 }, { "epoch": 5.353630674480978, "grad_norm": 0.1396484375, "learning_rate": 0.0003717135194198868, "loss": 0.4951, "step": 107790 }, { "epoch": 5.354127346776597, "grad_norm": 0.130859375, "learning_rate": 0.00037167378563623726, "loss": 0.5311, "step": 107800 }, { "epoch": 5.354624019072216, "grad_norm": 0.130859375, "learning_rate": 0.0003716340518525877, "loss": 0.5226, "step": 107810 }, { "epoch": 5.355120691367835, "grad_norm": 0.11279296875, "learning_rate": 0.0003715943180689381, "loss": 0.5087, "step": 107820 }, { "epoch": 5.3556173636634545, "grad_norm": 0.1357421875, "learning_rate": 0.0003715545842852886, "loss": 0.4915, "step": 107830 }, { "epoch": 5.356114035959074, "grad_norm": 0.10595703125, "learning_rate": 0.00037151485050163904, "loss": 0.4851, "step": 107840 }, { "epoch": 5.356610708254694, "grad_norm": 0.10986328125, "learning_rate": 0.00037147511671798946, "loss": 0.4998, "step": 107850 }, { "epoch": 5.357107380550313, "grad_norm": 0.134765625, "learning_rate": 0.00037143538293433993, "loss": 0.5159, "step": 107860 }, { "epoch": 5.357604052845932, "grad_norm": 0.10302734375, "learning_rate": 0.0003713956491506904, "loss": 0.5063, "step": 107870 }, { "epoch": 5.358100725141552, "grad_norm": 0.12353515625, "learning_rate": 0.00037135591536704087, "loss": 0.505, "step": 107880 }, { "epoch": 5.358597397437171, "grad_norm": 0.11376953125, "learning_rate": 0.0003713161815833913, "loss": 0.5133, "step": 107890 }, { "epoch": 5.35909406973279, "grad_norm": 0.12109375, "learning_rate": 0.00037127644779974176, "loss": 0.489, "step": 107900 }, { "epoch": 5.359590742028409, "grad_norm": 0.1162109375, "learning_rate": 0.00037123671401609223, "loss": 0.5074, "step": 107910 }, { "epoch": 5.360087414324029, "grad_norm": 0.1455078125, "learning_rate": 0.00037119698023244265, "loss": 0.4719, "step": 107920 }, { "epoch": 5.360584086619649, "grad_norm": 0.1220703125, "learning_rate": 0.00037115724644879307, "loss": 0.4836, "step": 107930 }, { "epoch": 5.361080758915268, "grad_norm": 0.1474609375, "learning_rate": 0.00037111751266514354, "loss": 0.5127, "step": 107940 }, { "epoch": 5.361577431210887, "grad_norm": 0.12890625, "learning_rate": 0.000371077778881494, "loss": 0.5171, "step": 107950 }, { "epoch": 5.362074103506506, "grad_norm": 0.134765625, "learning_rate": 0.0003710380450978445, "loss": 0.4905, "step": 107960 }, { "epoch": 5.3625707758021255, "grad_norm": 0.1455078125, "learning_rate": 0.0003709983113141949, "loss": 0.4623, "step": 107970 }, { "epoch": 5.363067448097745, "grad_norm": 0.11474609375, "learning_rate": 0.00037095857753054537, "loss": 0.5042, "step": 107980 }, { "epoch": 5.363564120393365, "grad_norm": 0.138671875, "learning_rate": 0.00037091884374689584, "loss": 0.523, "step": 107990 }, { "epoch": 5.364060792688984, "grad_norm": 0.12060546875, "learning_rate": 0.00037087910996324626, "loss": 0.4975, "step": 108000 }, { "epoch": 5.364557464984603, "grad_norm": 0.1201171875, "learning_rate": 0.0003708393761795967, "loss": 0.472, "step": 108010 }, { "epoch": 5.3650541372802225, "grad_norm": 0.1220703125, "learning_rate": 0.0003707996423959472, "loss": 0.5176, "step": 108020 }, { "epoch": 5.365550809575842, "grad_norm": 0.1650390625, "learning_rate": 0.0003707599086122976, "loss": 0.505, "step": 108030 }, { "epoch": 5.366047481871461, "grad_norm": 0.11328125, "learning_rate": 0.0003707201748286481, "loss": 0.522, "step": 108040 }, { "epoch": 5.36654415416708, "grad_norm": 0.1494140625, "learning_rate": 0.0003706804410449985, "loss": 0.503, "step": 108050 }, { "epoch": 5.3670408264627, "grad_norm": 0.16796875, "learning_rate": 0.000370640707261349, "loss": 0.5157, "step": 108060 }, { "epoch": 5.3675374987583195, "grad_norm": 0.1162109375, "learning_rate": 0.00037060097347769945, "loss": 0.497, "step": 108070 }, { "epoch": 5.368034171053939, "grad_norm": 0.1298828125, "learning_rate": 0.00037056123969404986, "loss": 0.4749, "step": 108080 }, { "epoch": 5.368530843349558, "grad_norm": 0.1298828125, "learning_rate": 0.00037052150591040034, "loss": 0.4949, "step": 108090 }, { "epoch": 5.369027515645177, "grad_norm": 0.12109375, "learning_rate": 0.0003704817721267508, "loss": 0.4938, "step": 108100 }, { "epoch": 5.3695241879407964, "grad_norm": 0.12060546875, "learning_rate": 0.0003704420383431012, "loss": 0.48, "step": 108110 }, { "epoch": 5.370020860236416, "grad_norm": 0.126953125, "learning_rate": 0.0003704023045594517, "loss": 0.536, "step": 108120 }, { "epoch": 5.370517532532036, "grad_norm": 0.1025390625, "learning_rate": 0.00037036257077580217, "loss": 0.4826, "step": 108130 }, { "epoch": 5.371014204827655, "grad_norm": 0.11669921875, "learning_rate": 0.0003703228369921526, "loss": 0.4985, "step": 108140 }, { "epoch": 5.371510877123274, "grad_norm": 0.1298828125, "learning_rate": 0.00037028310320850306, "loss": 0.5027, "step": 108150 }, { "epoch": 5.3720075494188935, "grad_norm": 0.15234375, "learning_rate": 0.00037024336942485347, "loss": 0.4979, "step": 108160 }, { "epoch": 5.372504221714513, "grad_norm": 0.1904296875, "learning_rate": 0.000370203635641204, "loss": 0.4905, "step": 108170 }, { "epoch": 5.373000894010132, "grad_norm": 0.138671875, "learning_rate": 0.0003701639018575544, "loss": 0.5008, "step": 108180 }, { "epoch": 5.373497566305751, "grad_norm": 0.1083984375, "learning_rate": 0.00037012416807390483, "loss": 0.4952, "step": 108190 }, { "epoch": 5.373994238601371, "grad_norm": 0.11669921875, "learning_rate": 0.0003700844342902553, "loss": 0.5018, "step": 108200 }, { "epoch": 5.3744909108969905, "grad_norm": 0.115234375, "learning_rate": 0.0003700447005066058, "loss": 0.486, "step": 108210 }, { "epoch": 5.37498758319261, "grad_norm": 0.1337890625, "learning_rate": 0.0003700049667229562, "loss": 0.5243, "step": 108220 }, { "epoch": 5.375484255488229, "grad_norm": 0.12451171875, "learning_rate": 0.00036996523293930666, "loss": 0.4858, "step": 108230 }, { "epoch": 5.375980927783848, "grad_norm": 0.1171875, "learning_rate": 0.0003699254991556571, "loss": 0.4943, "step": 108240 }, { "epoch": 5.376477600079467, "grad_norm": 0.14453125, "learning_rate": 0.0003698857653720076, "loss": 0.5185, "step": 108250 }, { "epoch": 5.376974272375087, "grad_norm": 0.11572265625, "learning_rate": 0.000369846031588358, "loss": 0.4881, "step": 108260 }, { "epoch": 5.377470944670706, "grad_norm": 0.1240234375, "learning_rate": 0.00036980629780470844, "loss": 0.501, "step": 108270 }, { "epoch": 5.377967616966326, "grad_norm": 0.1259765625, "learning_rate": 0.0003697665640210589, "loss": 0.5114, "step": 108280 }, { "epoch": 5.378464289261945, "grad_norm": 0.1328125, "learning_rate": 0.0003697268302374094, "loss": 0.5176, "step": 108290 }, { "epoch": 5.378960961557564, "grad_norm": 0.1201171875, "learning_rate": 0.0003696870964537598, "loss": 0.516, "step": 108300 }, { "epoch": 5.379457633853184, "grad_norm": 0.1455078125, "learning_rate": 0.00036964736267011027, "loss": 0.5143, "step": 108310 }, { "epoch": 5.379954306148803, "grad_norm": 0.138671875, "learning_rate": 0.00036960762888646074, "loss": 0.5209, "step": 108320 }, { "epoch": 5.380450978444422, "grad_norm": 0.1220703125, "learning_rate": 0.0003695678951028112, "loss": 0.5073, "step": 108330 }, { "epoch": 5.380947650740041, "grad_norm": 0.11376953125, "learning_rate": 0.00036952816131916163, "loss": 0.4927, "step": 108340 }, { "epoch": 5.3814443230356614, "grad_norm": 0.1357421875, "learning_rate": 0.00036948842753551205, "loss": 0.4988, "step": 108350 }, { "epoch": 5.381940995331281, "grad_norm": 0.11865234375, "learning_rate": 0.0003694486937518626, "loss": 0.5168, "step": 108360 }, { "epoch": 5.3824376676269, "grad_norm": 0.12109375, "learning_rate": 0.000369408959968213, "loss": 0.4888, "step": 108370 }, { "epoch": 5.382934339922519, "grad_norm": 0.1806640625, "learning_rate": 0.0003693692261845634, "loss": 0.5031, "step": 108380 }, { "epoch": 5.383431012218138, "grad_norm": 0.10546875, "learning_rate": 0.0003693294924009139, "loss": 0.4912, "step": 108390 }, { "epoch": 5.383927684513758, "grad_norm": 0.11376953125, "learning_rate": 0.00036928975861726435, "loss": 0.5249, "step": 108400 }, { "epoch": 5.384424356809377, "grad_norm": 0.126953125, "learning_rate": 0.0003692500248336148, "loss": 0.4943, "step": 108410 }, { "epoch": 5.384921029104997, "grad_norm": 0.162109375, "learning_rate": 0.00036921029104996524, "loss": 0.4961, "step": 108420 }, { "epoch": 5.385417701400616, "grad_norm": 0.1552734375, "learning_rate": 0.0003691705572663157, "loss": 0.521, "step": 108430 }, { "epoch": 5.385914373696235, "grad_norm": 0.11669921875, "learning_rate": 0.0003691308234826662, "loss": 0.5057, "step": 108440 }, { "epoch": 5.386411045991855, "grad_norm": 0.12158203125, "learning_rate": 0.0003690910896990166, "loss": 0.4832, "step": 108450 }, { "epoch": 5.386907718287474, "grad_norm": 0.140625, "learning_rate": 0.000369051355915367, "loss": 0.4958, "step": 108460 }, { "epoch": 5.387404390583093, "grad_norm": 0.130859375, "learning_rate": 0.00036901162213171754, "loss": 0.5162, "step": 108470 }, { "epoch": 5.387901062878712, "grad_norm": 0.138671875, "learning_rate": 0.00036897188834806796, "loss": 0.4704, "step": 108480 }, { "epoch": 5.3883977351743315, "grad_norm": 0.1416015625, "learning_rate": 0.00036893215456441843, "loss": 0.4988, "step": 108490 }, { "epoch": 5.388894407469952, "grad_norm": 0.15625, "learning_rate": 0.00036889242078076885, "loss": 0.4893, "step": 108500 }, { "epoch": 5.389391079765571, "grad_norm": 0.1328125, "learning_rate": 0.0003688526869971193, "loss": 0.509, "step": 108510 }, { "epoch": 5.38988775206119, "grad_norm": 0.1376953125, "learning_rate": 0.0003688129532134698, "loss": 0.5246, "step": 108520 }, { "epoch": 5.390384424356809, "grad_norm": 0.125, "learning_rate": 0.0003687732194298202, "loss": 0.5198, "step": 108530 }, { "epoch": 5.3908810966524285, "grad_norm": 0.1123046875, "learning_rate": 0.0003687334856461707, "loss": 0.4906, "step": 108540 }, { "epoch": 5.391377768948048, "grad_norm": 0.10791015625, "learning_rate": 0.00036869375186252115, "loss": 0.494, "step": 108550 }, { "epoch": 5.391874441243667, "grad_norm": 0.16015625, "learning_rate": 0.00036865401807887157, "loss": 0.5274, "step": 108560 }, { "epoch": 5.392371113539287, "grad_norm": 0.115234375, "learning_rate": 0.00036861428429522204, "loss": 0.5298, "step": 108570 }, { "epoch": 5.392867785834906, "grad_norm": 0.11328125, "learning_rate": 0.00036857455051157246, "loss": 0.4937, "step": 108580 }, { "epoch": 5.393364458130526, "grad_norm": 0.1318359375, "learning_rate": 0.00036853481672792293, "loss": 0.5091, "step": 108590 }, { "epoch": 5.393861130426145, "grad_norm": 0.1337890625, "learning_rate": 0.0003684950829442734, "loss": 0.5224, "step": 108600 }, { "epoch": 5.394357802721764, "grad_norm": 0.12060546875, "learning_rate": 0.0003684553491606238, "loss": 0.5062, "step": 108610 }, { "epoch": 5.394854475017383, "grad_norm": 0.138671875, "learning_rate": 0.0003684156153769743, "loss": 0.4836, "step": 108620 }, { "epoch": 5.3953511473130025, "grad_norm": 0.12109375, "learning_rate": 0.00036837588159332476, "loss": 0.5242, "step": 108630 }, { "epoch": 5.395847819608623, "grad_norm": 0.1318359375, "learning_rate": 0.0003683361478096752, "loss": 0.5123, "step": 108640 }, { "epoch": 5.396344491904242, "grad_norm": 0.1279296875, "learning_rate": 0.00036829641402602565, "loss": 0.4747, "step": 108650 }, { "epoch": 5.396841164199861, "grad_norm": 0.13671875, "learning_rate": 0.0003682566802423761, "loss": 0.4758, "step": 108660 }, { "epoch": 5.39733783649548, "grad_norm": 0.16796875, "learning_rate": 0.00036821694645872654, "loss": 0.5006, "step": 108670 }, { "epoch": 5.3978345087910995, "grad_norm": 0.1298828125, "learning_rate": 0.000368177212675077, "loss": 0.4897, "step": 108680 }, { "epoch": 5.398331181086719, "grad_norm": 0.119140625, "learning_rate": 0.0003681374788914274, "loss": 0.5123, "step": 108690 }, { "epoch": 5.398827853382338, "grad_norm": 0.1240234375, "learning_rate": 0.00036809774510777795, "loss": 0.4788, "step": 108700 }, { "epoch": 5.399324525677958, "grad_norm": 0.1337890625, "learning_rate": 0.00036805801132412837, "loss": 0.4871, "step": 108710 }, { "epoch": 5.399821197973577, "grad_norm": 0.12353515625, "learning_rate": 0.0003680182775404788, "loss": 0.4988, "step": 108720 }, { "epoch": 5.4003178702691965, "grad_norm": 0.1123046875, "learning_rate": 0.00036797854375682926, "loss": 0.49, "step": 108730 }, { "epoch": 5.400814542564816, "grad_norm": 0.130859375, "learning_rate": 0.00036793880997317973, "loss": 0.5178, "step": 108740 }, { "epoch": 5.401311214860435, "grad_norm": 0.14453125, "learning_rate": 0.00036789907618953014, "loss": 0.5088, "step": 108750 }, { "epoch": 5.401807887156054, "grad_norm": 0.11767578125, "learning_rate": 0.0003678593424058806, "loss": 0.502, "step": 108760 }, { "epoch": 5.402304559451673, "grad_norm": 0.1513671875, "learning_rate": 0.00036781960862223103, "loss": 0.4923, "step": 108770 }, { "epoch": 5.4028012317472935, "grad_norm": 0.1328125, "learning_rate": 0.00036777987483858156, "loss": 0.4984, "step": 108780 }, { "epoch": 5.403297904042913, "grad_norm": 0.11572265625, "learning_rate": 0.000367740141054932, "loss": 0.5066, "step": 108790 }, { "epoch": 5.403794576338532, "grad_norm": 0.1513671875, "learning_rate": 0.0003677004072712824, "loss": 0.5195, "step": 108800 }, { "epoch": 5.404291248634151, "grad_norm": 0.1376953125, "learning_rate": 0.00036766067348763286, "loss": 0.5054, "step": 108810 }, { "epoch": 5.4047879209297705, "grad_norm": 0.1171875, "learning_rate": 0.00036762093970398334, "loss": 0.5097, "step": 108820 }, { "epoch": 5.40528459322539, "grad_norm": 0.1142578125, "learning_rate": 0.00036758120592033375, "loss": 0.504, "step": 108830 }, { "epoch": 5.405781265521009, "grad_norm": 0.12353515625, "learning_rate": 0.0003675414721366842, "loss": 0.5155, "step": 108840 }, { "epoch": 5.406277937816629, "grad_norm": 0.1279296875, "learning_rate": 0.0003675017383530347, "loss": 0.4882, "step": 108850 }, { "epoch": 5.406774610112248, "grad_norm": 0.150390625, "learning_rate": 0.00036746200456938517, "loss": 0.5012, "step": 108860 }, { "epoch": 5.4072712824078675, "grad_norm": 0.10498046875, "learning_rate": 0.0003674222707857356, "loss": 0.5125, "step": 108870 }, { "epoch": 5.407767954703487, "grad_norm": 0.12109375, "learning_rate": 0.000367382537002086, "loss": 0.5049, "step": 108880 }, { "epoch": 5.408264626999106, "grad_norm": 0.11669921875, "learning_rate": 0.0003673428032184365, "loss": 0.5241, "step": 108890 }, { "epoch": 5.408761299294725, "grad_norm": 0.115234375, "learning_rate": 0.00036730306943478694, "loss": 0.5068, "step": 108900 }, { "epoch": 5.409257971590344, "grad_norm": 0.126953125, "learning_rate": 0.0003672633356511374, "loss": 0.496, "step": 108910 }, { "epoch": 5.4097546438859645, "grad_norm": 0.1572265625, "learning_rate": 0.00036722360186748783, "loss": 0.5095, "step": 108920 }, { "epoch": 5.410251316181584, "grad_norm": 0.169921875, "learning_rate": 0.0003671838680838383, "loss": 0.503, "step": 108930 }, { "epoch": 5.410747988477203, "grad_norm": 0.1259765625, "learning_rate": 0.0003671441343001888, "loss": 0.4936, "step": 108940 }, { "epoch": 5.411244660772822, "grad_norm": 0.166015625, "learning_rate": 0.0003671044005165392, "loss": 0.4863, "step": 108950 }, { "epoch": 5.411741333068441, "grad_norm": 0.1455078125, "learning_rate": 0.00036706466673288966, "loss": 0.5501, "step": 108960 }, { "epoch": 5.412238005364061, "grad_norm": 0.111328125, "learning_rate": 0.00036702493294924013, "loss": 0.4985, "step": 108970 }, { "epoch": 5.41273467765968, "grad_norm": 0.1201171875, "learning_rate": 0.00036698519916559055, "loss": 0.5307, "step": 108980 }, { "epoch": 5.413231349955299, "grad_norm": 0.138671875, "learning_rate": 0.000366945465381941, "loss": 0.5281, "step": 108990 }, { "epoch": 5.413728022250919, "grad_norm": 0.134765625, "learning_rate": 0.0003669057315982915, "loss": 0.5336, "step": 109000 }, { "epoch": 5.414224694546538, "grad_norm": 0.150390625, "learning_rate": 0.0003668659978146419, "loss": 0.4793, "step": 109010 }, { "epoch": 5.414721366842158, "grad_norm": 0.1650390625, "learning_rate": 0.0003668262640309924, "loss": 0.5175, "step": 109020 }, { "epoch": 5.415218039137777, "grad_norm": 0.1357421875, "learning_rate": 0.0003667865302473428, "loss": 0.4748, "step": 109030 }, { "epoch": 5.415714711433396, "grad_norm": 0.12890625, "learning_rate": 0.00036674679646369327, "loss": 0.5304, "step": 109040 }, { "epoch": 5.416211383729015, "grad_norm": 0.1484375, "learning_rate": 0.00036670706268004374, "loss": 0.5149, "step": 109050 }, { "epoch": 5.416708056024635, "grad_norm": 0.125, "learning_rate": 0.00036666732889639416, "loss": 0.4983, "step": 109060 }, { "epoch": 5.417204728320255, "grad_norm": 0.1259765625, "learning_rate": 0.00036662759511274463, "loss": 0.5059, "step": 109070 }, { "epoch": 5.417701400615874, "grad_norm": 0.11376953125, "learning_rate": 0.0003665878613290951, "loss": 0.4924, "step": 109080 }, { "epoch": 5.418198072911493, "grad_norm": 0.11279296875, "learning_rate": 0.0003665481275454455, "loss": 0.4965, "step": 109090 }, { "epoch": 5.418694745207112, "grad_norm": 0.1201171875, "learning_rate": 0.000366508393761796, "loss": 0.4815, "step": 109100 }, { "epoch": 5.419191417502732, "grad_norm": 0.1240234375, "learning_rate": 0.0003664686599781464, "loss": 0.5138, "step": 109110 }, { "epoch": 5.419688089798351, "grad_norm": 0.1328125, "learning_rate": 0.0003664289261944969, "loss": 0.4985, "step": 109120 }, { "epoch": 5.42018476209397, "grad_norm": 0.12890625, "learning_rate": 0.00036638919241084735, "loss": 0.5111, "step": 109130 }, { "epoch": 5.42068143438959, "grad_norm": 0.11328125, "learning_rate": 0.00036634945862719777, "loss": 0.5004, "step": 109140 }, { "epoch": 5.421178106685209, "grad_norm": 0.12109375, "learning_rate": 0.00036630972484354824, "loss": 0.4852, "step": 109150 }, { "epoch": 5.421674778980829, "grad_norm": 0.1328125, "learning_rate": 0.0003662699910598987, "loss": 0.5202, "step": 109160 }, { "epoch": 5.422171451276448, "grad_norm": 0.1162109375, "learning_rate": 0.00036623025727624913, "loss": 0.4982, "step": 109170 }, { "epoch": 5.422668123572067, "grad_norm": 0.1240234375, "learning_rate": 0.0003661905234925996, "loss": 0.4969, "step": 109180 }, { "epoch": 5.423164795867686, "grad_norm": 0.11865234375, "learning_rate": 0.00036615078970895007, "loss": 0.5026, "step": 109190 }, { "epoch": 5.4236614681633055, "grad_norm": 0.11474609375, "learning_rate": 0.0003661110559253005, "loss": 0.5074, "step": 109200 }, { "epoch": 5.424158140458925, "grad_norm": 0.1259765625, "learning_rate": 0.00036607132214165096, "loss": 0.4977, "step": 109210 }, { "epoch": 5.424654812754545, "grad_norm": 0.12451171875, "learning_rate": 0.0003660315883580014, "loss": 0.5052, "step": 109220 }, { "epoch": 5.425151485050164, "grad_norm": 0.12353515625, "learning_rate": 0.0003659918545743519, "loss": 0.5185, "step": 109230 }, { "epoch": 5.425648157345783, "grad_norm": 0.1171875, "learning_rate": 0.0003659521207907023, "loss": 0.5034, "step": 109240 }, { "epoch": 5.4261448296414025, "grad_norm": 0.14453125, "learning_rate": 0.00036591238700705274, "loss": 0.5235, "step": 109250 }, { "epoch": 5.426641501937022, "grad_norm": 0.1259765625, "learning_rate": 0.0003658726532234032, "loss": 0.5166, "step": 109260 }, { "epoch": 5.427138174232641, "grad_norm": 0.138671875, "learning_rate": 0.0003658329194397537, "loss": 0.4626, "step": 109270 }, { "epoch": 5.42763484652826, "grad_norm": 0.1259765625, "learning_rate": 0.0003657931856561041, "loss": 0.5009, "step": 109280 }, { "epoch": 5.42813151882388, "grad_norm": 0.12890625, "learning_rate": 0.00036575345187245457, "loss": 0.4857, "step": 109290 }, { "epoch": 5.4286281911195, "grad_norm": 0.11328125, "learning_rate": 0.00036571371808880504, "loss": 0.5296, "step": 109300 }, { "epoch": 5.429124863415119, "grad_norm": 0.1357421875, "learning_rate": 0.0003656739843051555, "loss": 0.5115, "step": 109310 }, { "epoch": 5.429621535710738, "grad_norm": 0.126953125, "learning_rate": 0.00036563425052150593, "loss": 0.506, "step": 109320 }, { "epoch": 5.430118208006357, "grad_norm": 0.11962890625, "learning_rate": 0.00036559451673785634, "loss": 0.4808, "step": 109330 }, { "epoch": 5.4306148803019765, "grad_norm": 0.12158203125, "learning_rate": 0.0003655547829542068, "loss": 0.513, "step": 109340 }, { "epoch": 5.431111552597596, "grad_norm": 0.11474609375, "learning_rate": 0.0003655150491705573, "loss": 0.4674, "step": 109350 }, { "epoch": 5.431608224893216, "grad_norm": 0.12890625, "learning_rate": 0.00036547531538690776, "loss": 0.5004, "step": 109360 }, { "epoch": 5.432104897188835, "grad_norm": 0.1357421875, "learning_rate": 0.0003654355816032582, "loss": 0.5174, "step": 109370 }, { "epoch": 5.432601569484454, "grad_norm": 0.11376953125, "learning_rate": 0.00036539584781960865, "loss": 0.5371, "step": 109380 }, { "epoch": 5.4330982417800735, "grad_norm": 0.12890625, "learning_rate": 0.0003653561140359591, "loss": 0.5203, "step": 109390 }, { "epoch": 5.433594914075693, "grad_norm": 0.1142578125, "learning_rate": 0.00036531638025230954, "loss": 0.4986, "step": 109400 }, { "epoch": 5.434091586371312, "grad_norm": 0.11767578125, "learning_rate": 0.00036527664646865995, "loss": 0.4866, "step": 109410 }, { "epoch": 5.434588258666931, "grad_norm": 0.1298828125, "learning_rate": 0.0003652369126850105, "loss": 0.5068, "step": 109420 }, { "epoch": 5.435084930962551, "grad_norm": 0.1357421875, "learning_rate": 0.0003651971789013609, "loss": 0.5019, "step": 109430 }, { "epoch": 5.4355816032581705, "grad_norm": 0.134765625, "learning_rate": 0.00036515744511771137, "loss": 0.4791, "step": 109440 }, { "epoch": 5.43607827555379, "grad_norm": 0.1435546875, "learning_rate": 0.0003651177113340618, "loss": 0.5114, "step": 109450 }, { "epoch": 5.436574947849409, "grad_norm": 0.115234375, "learning_rate": 0.00036507797755041226, "loss": 0.491, "step": 109460 }, { "epoch": 5.437071620145028, "grad_norm": 0.1259765625, "learning_rate": 0.0003650382437667627, "loss": 0.504, "step": 109470 }, { "epoch": 5.437568292440647, "grad_norm": 0.125, "learning_rate": 0.00036499850998311314, "loss": 0.5213, "step": 109480 }, { "epoch": 5.438064964736267, "grad_norm": 0.12109375, "learning_rate": 0.0003649587761994636, "loss": 0.5141, "step": 109490 }, { "epoch": 5.438561637031887, "grad_norm": 0.109375, "learning_rate": 0.0003649190424158141, "loss": 0.5127, "step": 109500 }, { "epoch": 5.439058309327506, "grad_norm": 0.1796875, "learning_rate": 0.0003648793086321645, "loss": 0.49, "step": 109510 }, { "epoch": 5.439554981623125, "grad_norm": 0.130859375, "learning_rate": 0.000364839574848515, "loss": 0.5063, "step": 109520 }, { "epoch": 5.4400516539187445, "grad_norm": 0.1142578125, "learning_rate": 0.00036479984106486545, "loss": 0.4929, "step": 109530 }, { "epoch": 5.440548326214364, "grad_norm": 0.130859375, "learning_rate": 0.00036476010728121586, "loss": 0.5066, "step": 109540 }, { "epoch": 5.441044998509983, "grad_norm": 0.1259765625, "learning_rate": 0.00036472037349756633, "loss": 0.5232, "step": 109550 }, { "epoch": 5.441541670805602, "grad_norm": 0.15234375, "learning_rate": 0.00036468063971391675, "loss": 0.5274, "step": 109560 }, { "epoch": 5.442038343101222, "grad_norm": 0.11572265625, "learning_rate": 0.0003646409059302672, "loss": 0.491, "step": 109570 }, { "epoch": 5.4425350153968415, "grad_norm": 0.1611328125, "learning_rate": 0.0003646011721466177, "loss": 0.4898, "step": 109580 }, { "epoch": 5.443031687692461, "grad_norm": 0.1201171875, "learning_rate": 0.0003645614383629681, "loss": 0.5091, "step": 109590 }, { "epoch": 5.44352835998808, "grad_norm": 0.1259765625, "learning_rate": 0.0003645217045793186, "loss": 0.5018, "step": 109600 }, { "epoch": 5.444025032283699, "grad_norm": 0.1591796875, "learning_rate": 0.00036448197079566905, "loss": 0.5004, "step": 109610 }, { "epoch": 5.444521704579318, "grad_norm": 0.14453125, "learning_rate": 0.00036444223701201947, "loss": 0.4917, "step": 109620 }, { "epoch": 5.445018376874938, "grad_norm": 0.1279296875, "learning_rate": 0.00036440250322836994, "loss": 0.5041, "step": 109630 }, { "epoch": 5.445515049170558, "grad_norm": 0.12255859375, "learning_rate": 0.00036436276944472036, "loss": 0.5227, "step": 109640 }, { "epoch": 5.446011721466177, "grad_norm": 0.115234375, "learning_rate": 0.00036432303566107083, "loss": 0.509, "step": 109650 }, { "epoch": 5.446508393761796, "grad_norm": 0.12353515625, "learning_rate": 0.0003642833018774213, "loss": 0.4936, "step": 109660 }, { "epoch": 5.447005066057415, "grad_norm": 0.1162109375, "learning_rate": 0.0003642435680937717, "loss": 0.4939, "step": 109670 }, { "epoch": 5.447501738353035, "grad_norm": 0.12890625, "learning_rate": 0.0003642038343101222, "loss": 0.4946, "step": 109680 }, { "epoch": 5.447998410648654, "grad_norm": 0.1162109375, "learning_rate": 0.00036416410052647266, "loss": 0.5041, "step": 109690 }, { "epoch": 5.448495082944273, "grad_norm": 0.119140625, "learning_rate": 0.0003641243667428231, "loss": 0.4819, "step": 109700 }, { "epoch": 5.448991755239892, "grad_norm": 0.14453125, "learning_rate": 0.00036408463295917355, "loss": 0.5064, "step": 109710 }, { "epoch": 5.449488427535512, "grad_norm": 0.1630859375, "learning_rate": 0.000364044899175524, "loss": 0.5081, "step": 109720 }, { "epoch": 5.449985099831132, "grad_norm": 0.11669921875, "learning_rate": 0.0003640051653918745, "loss": 0.503, "step": 109730 }, { "epoch": 5.450481772126751, "grad_norm": 0.12158203125, "learning_rate": 0.0003639654316082249, "loss": 0.52, "step": 109740 }, { "epoch": 5.45097844442237, "grad_norm": 0.11669921875, "learning_rate": 0.00036392569782457533, "loss": 0.4944, "step": 109750 }, { "epoch": 5.451475116717989, "grad_norm": 0.16796875, "learning_rate": 0.00036388596404092585, "loss": 0.5124, "step": 109760 }, { "epoch": 5.451971789013609, "grad_norm": 0.119140625, "learning_rate": 0.00036384623025727627, "loss": 0.5257, "step": 109770 }, { "epoch": 5.452468461309228, "grad_norm": 0.11767578125, "learning_rate": 0.0003638064964736267, "loss": 0.5101, "step": 109780 }, { "epoch": 5.452965133604848, "grad_norm": 0.126953125, "learning_rate": 0.00036376676268997716, "loss": 0.4994, "step": 109790 }, { "epoch": 5.453461805900467, "grad_norm": 0.1181640625, "learning_rate": 0.00036372702890632763, "loss": 0.5259, "step": 109800 }, { "epoch": 5.453958478196086, "grad_norm": 0.111328125, "learning_rate": 0.0003636872951226781, "loss": 0.4796, "step": 109810 }, { "epoch": 5.454455150491706, "grad_norm": 0.11279296875, "learning_rate": 0.0003636475613390285, "loss": 0.477, "step": 109820 }, { "epoch": 5.454951822787325, "grad_norm": 0.12158203125, "learning_rate": 0.000363607827555379, "loss": 0.493, "step": 109830 }, { "epoch": 5.455448495082944, "grad_norm": 0.123046875, "learning_rate": 0.00036356809377172946, "loss": 0.516, "step": 109840 }, { "epoch": 5.455945167378563, "grad_norm": 0.150390625, "learning_rate": 0.0003635283599880799, "loss": 0.4964, "step": 109850 }, { "epoch": 5.4564418396741825, "grad_norm": 0.11962890625, "learning_rate": 0.0003634886262044303, "loss": 0.5178, "step": 109860 }, { "epoch": 5.456938511969803, "grad_norm": 0.1337890625, "learning_rate": 0.0003634488924207808, "loss": 0.5005, "step": 109870 }, { "epoch": 5.457435184265422, "grad_norm": 0.126953125, "learning_rate": 0.00036340915863713124, "loss": 0.4756, "step": 109880 }, { "epoch": 5.457931856561041, "grad_norm": 0.1611328125, "learning_rate": 0.0003633694248534817, "loss": 0.5116, "step": 109890 }, { "epoch": 5.45842852885666, "grad_norm": 0.12353515625, "learning_rate": 0.00036332969106983213, "loss": 0.511, "step": 109900 }, { "epoch": 5.4589252011522795, "grad_norm": 0.12255859375, "learning_rate": 0.0003632899572861826, "loss": 0.4732, "step": 109910 }, { "epoch": 5.459421873447899, "grad_norm": 0.115234375, "learning_rate": 0.00036325022350253307, "loss": 0.487, "step": 109920 }, { "epoch": 5.459918545743518, "grad_norm": 0.1337890625, "learning_rate": 0.0003632104897188835, "loss": 0.5186, "step": 109930 }, { "epoch": 5.460415218039138, "grad_norm": 0.12451171875, "learning_rate": 0.0003631707559352339, "loss": 0.5075, "step": 109940 }, { "epoch": 5.460911890334757, "grad_norm": 0.138671875, "learning_rate": 0.00036313102215158443, "loss": 0.5035, "step": 109950 }, { "epoch": 5.4614085626303766, "grad_norm": 0.1025390625, "learning_rate": 0.00036309128836793485, "loss": 0.4867, "step": 109960 }, { "epoch": 5.461905234925996, "grad_norm": 0.1181640625, "learning_rate": 0.0003630515545842853, "loss": 0.4873, "step": 109970 }, { "epoch": 5.462401907221615, "grad_norm": 0.2021484375, "learning_rate": 0.00036301182080063574, "loss": 0.5273, "step": 109980 }, { "epoch": 5.462898579517234, "grad_norm": 0.12451171875, "learning_rate": 0.0003629720870169862, "loss": 0.4841, "step": 109990 }, { "epoch": 5.4633952518128535, "grad_norm": 0.1298828125, "learning_rate": 0.0003629323532333367, "loss": 0.4807, "step": 110000 }, { "epoch": 5.463891924108474, "grad_norm": 0.12109375, "learning_rate": 0.0003628926194496871, "loss": 0.5149, "step": 110010 }, { "epoch": 5.464388596404093, "grad_norm": 0.1435546875, "learning_rate": 0.00036285288566603757, "loss": 0.5286, "step": 110020 }, { "epoch": 5.464885268699712, "grad_norm": 0.1279296875, "learning_rate": 0.00036281315188238804, "loss": 0.5148, "step": 110030 }, { "epoch": 5.465381940995331, "grad_norm": 0.119140625, "learning_rate": 0.00036277341809873846, "loss": 0.5212, "step": 110040 }, { "epoch": 5.4658786132909505, "grad_norm": 0.1875, "learning_rate": 0.0003627336843150889, "loss": 0.5017, "step": 110050 }, { "epoch": 5.46637528558657, "grad_norm": 0.1904296875, "learning_rate": 0.0003626939505314394, "loss": 0.5164, "step": 110060 }, { "epoch": 5.466871957882189, "grad_norm": 0.1181640625, "learning_rate": 0.0003626542167477898, "loss": 0.5149, "step": 110070 }, { "epoch": 5.467368630177809, "grad_norm": 0.11474609375, "learning_rate": 0.0003626144829641403, "loss": 0.4969, "step": 110080 }, { "epoch": 5.467865302473428, "grad_norm": 0.10400390625, "learning_rate": 0.0003625747491804907, "loss": 0.5312, "step": 110090 }, { "epoch": 5.4683619747690475, "grad_norm": 0.1376953125, "learning_rate": 0.0003625350153968412, "loss": 0.4916, "step": 110100 }, { "epoch": 5.468858647064667, "grad_norm": 0.11865234375, "learning_rate": 0.00036249528161319165, "loss": 0.4932, "step": 110110 }, { "epoch": 5.469355319360286, "grad_norm": 0.12060546875, "learning_rate": 0.00036245554782954206, "loss": 0.5117, "step": 110120 }, { "epoch": 5.469851991655905, "grad_norm": 0.1123046875, "learning_rate": 0.00036241581404589254, "loss": 0.5093, "step": 110130 }, { "epoch": 5.470348663951524, "grad_norm": 0.11767578125, "learning_rate": 0.000362376080262243, "loss": 0.5008, "step": 110140 }, { "epoch": 5.4708453362471445, "grad_norm": 0.1181640625, "learning_rate": 0.0003623363464785934, "loss": 0.4835, "step": 110150 }, { "epoch": 5.471342008542764, "grad_norm": 0.12451171875, "learning_rate": 0.0003622966126949439, "loss": 0.5138, "step": 110160 }, { "epoch": 5.471838680838383, "grad_norm": 0.1142578125, "learning_rate": 0.00036225687891129437, "loss": 0.4963, "step": 110170 }, { "epoch": 5.472335353134002, "grad_norm": 0.140625, "learning_rate": 0.00036221714512764484, "loss": 0.5067, "step": 110180 }, { "epoch": 5.472832025429621, "grad_norm": 0.1689453125, "learning_rate": 0.00036217741134399525, "loss": 0.4944, "step": 110190 }, { "epoch": 5.473328697725241, "grad_norm": 0.1630859375, "learning_rate": 0.00036213767756034567, "loss": 0.5065, "step": 110200 }, { "epoch": 5.47382537002086, "grad_norm": 0.1337890625, "learning_rate": 0.00036209794377669614, "loss": 0.5064, "step": 110210 }, { "epoch": 5.47432204231648, "grad_norm": 0.11376953125, "learning_rate": 0.0003620582099930466, "loss": 0.4773, "step": 110220 }, { "epoch": 5.474818714612099, "grad_norm": 0.11962890625, "learning_rate": 0.00036201847620939703, "loss": 0.4551, "step": 110230 }, { "epoch": 5.4753153869077185, "grad_norm": 0.115234375, "learning_rate": 0.0003619787424257475, "loss": 0.4933, "step": 110240 }, { "epoch": 5.475812059203338, "grad_norm": 0.134765625, "learning_rate": 0.000361939008642098, "loss": 0.4873, "step": 110250 }, { "epoch": 5.476308731498957, "grad_norm": 0.12109375, "learning_rate": 0.00036189927485844845, "loss": 0.4914, "step": 110260 }, { "epoch": 5.476805403794576, "grad_norm": 0.130859375, "learning_rate": 0.00036185954107479886, "loss": 0.518, "step": 110270 }, { "epoch": 5.477302076090195, "grad_norm": 0.1455078125, "learning_rate": 0.0003618198072911493, "loss": 0.4907, "step": 110280 }, { "epoch": 5.4777987483858155, "grad_norm": 0.1259765625, "learning_rate": 0.0003617800735074998, "loss": 0.4896, "step": 110290 }, { "epoch": 5.478295420681435, "grad_norm": 0.138671875, "learning_rate": 0.0003617403397238502, "loss": 0.4898, "step": 110300 }, { "epoch": 5.478792092977054, "grad_norm": 0.1298828125, "learning_rate": 0.00036170060594020064, "loss": 0.5006, "step": 110310 }, { "epoch": 5.479288765272673, "grad_norm": 0.119140625, "learning_rate": 0.0003616608721565511, "loss": 0.5047, "step": 110320 }, { "epoch": 5.479785437568292, "grad_norm": 0.11767578125, "learning_rate": 0.0003616211383729016, "loss": 0.5147, "step": 110330 }, { "epoch": 5.480282109863912, "grad_norm": 0.1435546875, "learning_rate": 0.00036158140458925205, "loss": 0.5337, "step": 110340 }, { "epoch": 5.480778782159531, "grad_norm": 0.11572265625, "learning_rate": 0.00036154167080560247, "loss": 0.5247, "step": 110350 }, { "epoch": 5.48127545445515, "grad_norm": 0.11767578125, "learning_rate": 0.00036150193702195294, "loss": 0.5077, "step": 110360 }, { "epoch": 5.48177212675077, "grad_norm": 0.119140625, "learning_rate": 0.0003614622032383034, "loss": 0.4701, "step": 110370 }, { "epoch": 5.482268799046389, "grad_norm": 0.1201171875, "learning_rate": 0.00036142246945465383, "loss": 0.5172, "step": 110380 }, { "epoch": 5.482765471342009, "grad_norm": 0.111328125, "learning_rate": 0.00036138273567100425, "loss": 0.5148, "step": 110390 }, { "epoch": 5.483262143637628, "grad_norm": 0.123046875, "learning_rate": 0.0003613430018873548, "loss": 0.5129, "step": 110400 }, { "epoch": 5.483758815933247, "grad_norm": 0.11962890625, "learning_rate": 0.0003613032681037052, "loss": 0.4989, "step": 110410 }, { "epoch": 5.484255488228866, "grad_norm": 0.16015625, "learning_rate": 0.00036126353432005566, "loss": 0.4895, "step": 110420 }, { "epoch": 5.4847521605244856, "grad_norm": 0.1171875, "learning_rate": 0.0003612238005364061, "loss": 0.4899, "step": 110430 }, { "epoch": 5.485248832820106, "grad_norm": 0.1484375, "learning_rate": 0.00036118406675275655, "loss": 0.525, "step": 110440 }, { "epoch": 5.485745505115725, "grad_norm": 0.1142578125, "learning_rate": 0.000361144332969107, "loss": 0.4797, "step": 110450 }, { "epoch": 5.486242177411344, "grad_norm": 0.154296875, "learning_rate": 0.00036110459918545744, "loss": 0.526, "step": 110460 }, { "epoch": 5.486738849706963, "grad_norm": 0.1298828125, "learning_rate": 0.00036106486540180786, "loss": 0.5061, "step": 110470 }, { "epoch": 5.487235522002583, "grad_norm": 0.1572265625, "learning_rate": 0.0003610251316181584, "loss": 0.4925, "step": 110480 }, { "epoch": 5.487732194298202, "grad_norm": 0.12109375, "learning_rate": 0.0003609853978345088, "loss": 0.4995, "step": 110490 }, { "epoch": 5.488228866593821, "grad_norm": 0.115234375, "learning_rate": 0.00036094566405085927, "loss": 0.495, "step": 110500 }, { "epoch": 5.488725538889441, "grad_norm": 0.1376953125, "learning_rate": 0.0003609059302672097, "loss": 0.4999, "step": 110510 }, { "epoch": 5.48922221118506, "grad_norm": 0.1083984375, "learning_rate": 0.00036086619648356016, "loss": 0.4985, "step": 110520 }, { "epoch": 5.48971888348068, "grad_norm": 0.13671875, "learning_rate": 0.00036082646269991063, "loss": 0.4989, "step": 110530 }, { "epoch": 5.490215555776299, "grad_norm": 0.1572265625, "learning_rate": 0.00036078672891626105, "loss": 0.5001, "step": 110540 }, { "epoch": 5.490712228071918, "grad_norm": 0.1259765625, "learning_rate": 0.0003607469951326115, "loss": 0.4665, "step": 110550 }, { "epoch": 5.491208900367537, "grad_norm": 0.345703125, "learning_rate": 0.000360707261348962, "loss": 0.5204, "step": 110560 }, { "epoch": 5.4917055726631565, "grad_norm": 0.18359375, "learning_rate": 0.0003606675275653124, "loss": 0.4917, "step": 110570 }, { "epoch": 5.492202244958776, "grad_norm": 0.12890625, "learning_rate": 0.0003606277937816629, "loss": 0.4965, "step": 110580 }, { "epoch": 5.492698917254396, "grad_norm": 0.115234375, "learning_rate": 0.00036058805999801335, "loss": 0.5131, "step": 110590 }, { "epoch": 5.493195589550015, "grad_norm": 0.1103515625, "learning_rate": 0.00036054832621436377, "loss": 0.4856, "step": 110600 }, { "epoch": 5.493692261845634, "grad_norm": 0.1611328125, "learning_rate": 0.00036050859243071424, "loss": 0.4854, "step": 110610 }, { "epoch": 5.4941889341412535, "grad_norm": 0.12109375, "learning_rate": 0.00036046885864706466, "loss": 0.4969, "step": 110620 }, { "epoch": 5.494685606436873, "grad_norm": 0.12353515625, "learning_rate": 0.0003604291248634152, "loss": 0.5093, "step": 110630 }, { "epoch": 5.495182278732492, "grad_norm": 0.1474609375, "learning_rate": 0.0003603893910797656, "loss": 0.5054, "step": 110640 }, { "epoch": 5.495678951028111, "grad_norm": 0.11669921875, "learning_rate": 0.000360349657296116, "loss": 0.4877, "step": 110650 }, { "epoch": 5.496175623323731, "grad_norm": 0.1259765625, "learning_rate": 0.0003603099235124665, "loss": 0.5036, "step": 110660 }, { "epoch": 5.496672295619351, "grad_norm": 0.1435546875, "learning_rate": 0.00036027018972881696, "loss": 0.5093, "step": 110670 }, { "epoch": 5.49716896791497, "grad_norm": 0.1435546875, "learning_rate": 0.0003602304559451674, "loss": 0.4902, "step": 110680 }, { "epoch": 5.497665640210589, "grad_norm": 0.130859375, "learning_rate": 0.00036019072216151785, "loss": 0.4923, "step": 110690 }, { "epoch": 5.498162312506208, "grad_norm": 0.11962890625, "learning_rate": 0.0003601509883778683, "loss": 0.5205, "step": 110700 }, { "epoch": 5.4986589848018275, "grad_norm": 0.1396484375, "learning_rate": 0.0003601112545942188, "loss": 0.5088, "step": 110710 }, { "epoch": 5.499155657097447, "grad_norm": 0.125, "learning_rate": 0.0003600715208105692, "loss": 0.5202, "step": 110720 }, { "epoch": 5.499652329393067, "grad_norm": 0.1259765625, "learning_rate": 0.0003600317870269196, "loss": 0.5072, "step": 110730 }, { "epoch": 5.500149001688686, "grad_norm": 0.1171875, "learning_rate": 0.0003599920532432701, "loss": 0.5187, "step": 110740 }, { "epoch": 5.500645673984305, "grad_norm": 0.12060546875, "learning_rate": 0.00035995231945962057, "loss": 0.4875, "step": 110750 }, { "epoch": 5.5011423462799245, "grad_norm": 0.12109375, "learning_rate": 0.000359912585675971, "loss": 0.4937, "step": 110760 }, { "epoch": 5.501639018575544, "grad_norm": 0.1259765625, "learning_rate": 0.00035987285189232146, "loss": 0.5154, "step": 110770 }, { "epoch": 5.502135690871163, "grad_norm": 0.126953125, "learning_rate": 0.0003598331181086719, "loss": 0.521, "step": 110780 }, { "epoch": 5.502632363166782, "grad_norm": 0.1513671875, "learning_rate": 0.0003597933843250224, "loss": 0.5052, "step": 110790 }, { "epoch": 5.503129035462402, "grad_norm": 0.1259765625, "learning_rate": 0.0003597536505413728, "loss": 0.4976, "step": 110800 }, { "epoch": 5.5036257077580215, "grad_norm": 0.11181640625, "learning_rate": 0.00035971391675772323, "loss": 0.4796, "step": 110810 }, { "epoch": 5.504122380053641, "grad_norm": 0.126953125, "learning_rate": 0.00035967418297407376, "loss": 0.491, "step": 110820 }, { "epoch": 5.50461905234926, "grad_norm": 0.12890625, "learning_rate": 0.0003596344491904242, "loss": 0.5089, "step": 110830 }, { "epoch": 5.505115724644879, "grad_norm": 0.115234375, "learning_rate": 0.0003595947154067746, "loss": 0.5173, "step": 110840 }, { "epoch": 5.505612396940498, "grad_norm": 0.1064453125, "learning_rate": 0.00035955498162312506, "loss": 0.4792, "step": 110850 }, { "epoch": 5.506109069236118, "grad_norm": 0.14453125, "learning_rate": 0.00035951524783947553, "loss": 0.5183, "step": 110860 }, { "epoch": 5.506605741531738, "grad_norm": 0.17578125, "learning_rate": 0.000359475514055826, "loss": 0.4963, "step": 110870 }, { "epoch": 5.507102413827357, "grad_norm": 0.1650390625, "learning_rate": 0.0003594357802721764, "loss": 0.5182, "step": 110880 }, { "epoch": 5.507599086122976, "grad_norm": 0.1611328125, "learning_rate": 0.0003593960464885269, "loss": 0.5177, "step": 110890 }, { "epoch": 5.5080957584185954, "grad_norm": 0.115234375, "learning_rate": 0.00035935631270487737, "loss": 0.5087, "step": 110900 }, { "epoch": 5.508592430714215, "grad_norm": 0.14453125, "learning_rate": 0.0003593165789212278, "loss": 0.5081, "step": 110910 }, { "epoch": 5.509089103009834, "grad_norm": 0.125, "learning_rate": 0.0003592768451375782, "loss": 0.5092, "step": 110920 }, { "epoch": 5.509585775305453, "grad_norm": 0.1181640625, "learning_rate": 0.0003592371113539287, "loss": 0.501, "step": 110930 }, { "epoch": 5.510082447601073, "grad_norm": 0.1318359375, "learning_rate": 0.00035919737757027914, "loss": 0.5157, "step": 110940 }, { "epoch": 5.5105791198966925, "grad_norm": 0.109375, "learning_rate": 0.0003591576437866296, "loss": 0.4858, "step": 110950 }, { "epoch": 5.511075792192312, "grad_norm": 0.1494140625, "learning_rate": 0.00035911791000298003, "loss": 0.5092, "step": 110960 }, { "epoch": 5.511572464487931, "grad_norm": 0.15234375, "learning_rate": 0.0003590781762193305, "loss": 0.5241, "step": 110970 }, { "epoch": 5.51206913678355, "grad_norm": 0.169921875, "learning_rate": 0.000359038442435681, "loss": 0.4872, "step": 110980 }, { "epoch": 5.512565809079169, "grad_norm": 0.1298828125, "learning_rate": 0.0003589987086520314, "loss": 0.5347, "step": 110990 }, { "epoch": 5.513062481374789, "grad_norm": 0.154296875, "learning_rate": 0.00035895897486838186, "loss": 0.535, "step": 111000 }, { "epoch": 5.513559153670409, "grad_norm": 0.11083984375, "learning_rate": 0.00035891924108473233, "loss": 0.4963, "step": 111010 }, { "epoch": 5.514055825966028, "grad_norm": 0.134765625, "learning_rate": 0.00035887950730108275, "loss": 0.4886, "step": 111020 }, { "epoch": 5.514552498261647, "grad_norm": 0.140625, "learning_rate": 0.0003588397735174332, "loss": 0.5294, "step": 111030 }, { "epoch": 5.515049170557266, "grad_norm": 0.1630859375, "learning_rate": 0.00035880003973378364, "loss": 0.5258, "step": 111040 }, { "epoch": 5.515545842852886, "grad_norm": 0.125, "learning_rate": 0.0003587603059501341, "loss": 0.513, "step": 111050 }, { "epoch": 5.516042515148505, "grad_norm": 0.1416015625, "learning_rate": 0.0003587205721664846, "loss": 0.4839, "step": 111060 }, { "epoch": 5.516539187444124, "grad_norm": 0.1328125, "learning_rate": 0.000358680838382835, "loss": 0.49, "step": 111070 }, { "epoch": 5.517035859739744, "grad_norm": 0.1220703125, "learning_rate": 0.00035864110459918547, "loss": 0.545, "step": 111080 }, { "epoch": 5.517532532035363, "grad_norm": 0.173828125, "learning_rate": 0.00035860137081553594, "loss": 0.478, "step": 111090 }, { "epoch": 5.518029204330983, "grad_norm": 0.1201171875, "learning_rate": 0.00035856163703188636, "loss": 0.4944, "step": 111100 }, { "epoch": 5.518525876626602, "grad_norm": 0.11328125, "learning_rate": 0.00035852190324823683, "loss": 0.5055, "step": 111110 }, { "epoch": 5.519022548922221, "grad_norm": 0.1787109375, "learning_rate": 0.0003584821694645873, "loss": 0.4963, "step": 111120 }, { "epoch": 5.51951922121784, "grad_norm": 0.11376953125, "learning_rate": 0.0003584424356809377, "loss": 0.522, "step": 111130 }, { "epoch": 5.52001589351346, "grad_norm": 0.140625, "learning_rate": 0.0003584027018972882, "loss": 0.5008, "step": 111140 }, { "epoch": 5.520512565809079, "grad_norm": 0.1630859375, "learning_rate": 0.0003583629681136386, "loss": 0.5025, "step": 111150 }, { "epoch": 5.521009238104699, "grad_norm": 0.11962890625, "learning_rate": 0.00035832323432998913, "loss": 0.5103, "step": 111160 }, { "epoch": 5.521505910400318, "grad_norm": 0.1279296875, "learning_rate": 0.00035828350054633955, "loss": 0.5237, "step": 111170 }, { "epoch": 5.522002582695937, "grad_norm": 0.1396484375, "learning_rate": 0.00035824376676268997, "loss": 0.5219, "step": 111180 }, { "epoch": 5.522499254991557, "grad_norm": 0.1767578125, "learning_rate": 0.00035820403297904044, "loss": 0.5106, "step": 111190 }, { "epoch": 5.522995927287176, "grad_norm": 0.1884765625, "learning_rate": 0.0003581642991953909, "loss": 0.5001, "step": 111200 }, { "epoch": 5.523492599582795, "grad_norm": 0.11328125, "learning_rate": 0.00035812456541174133, "loss": 0.5162, "step": 111210 }, { "epoch": 5.523989271878414, "grad_norm": 0.1455078125, "learning_rate": 0.0003580848316280918, "loss": 0.5089, "step": 111220 }, { "epoch": 5.5244859441740335, "grad_norm": 0.115234375, "learning_rate": 0.00035804509784444227, "loss": 0.5231, "step": 111230 }, { "epoch": 5.524982616469654, "grad_norm": 0.12109375, "learning_rate": 0.00035800536406079274, "loss": 0.4949, "step": 111240 }, { "epoch": 5.525479288765273, "grad_norm": 0.1435546875, "learning_rate": 0.00035796563027714316, "loss": 0.5107, "step": 111250 }, { "epoch": 5.525975961060892, "grad_norm": 0.1103515625, "learning_rate": 0.0003579258964934936, "loss": 0.5279, "step": 111260 }, { "epoch": 5.526472633356511, "grad_norm": 0.1376953125, "learning_rate": 0.0003578861627098441, "loss": 0.5147, "step": 111270 }, { "epoch": 5.5269693056521305, "grad_norm": 0.11474609375, "learning_rate": 0.0003578464289261945, "loss": 0.4759, "step": 111280 }, { "epoch": 5.52746597794775, "grad_norm": 0.1279296875, "learning_rate": 0.00035780669514254494, "loss": 0.4928, "step": 111290 }, { "epoch": 5.527962650243369, "grad_norm": 0.138671875, "learning_rate": 0.0003577669613588954, "loss": 0.4903, "step": 111300 }, { "epoch": 5.528459322538989, "grad_norm": 0.1455078125, "learning_rate": 0.0003577272275752459, "loss": 0.5192, "step": 111310 }, { "epoch": 5.528955994834608, "grad_norm": 0.140625, "learning_rate": 0.00035768749379159635, "loss": 0.5061, "step": 111320 }, { "epoch": 5.5294526671302275, "grad_norm": 0.12353515625, "learning_rate": 0.00035764776000794677, "loss": 0.4941, "step": 111330 }, { "epoch": 5.529949339425847, "grad_norm": 0.1484375, "learning_rate": 0.0003576080262242972, "loss": 0.5264, "step": 111340 }, { "epoch": 5.530446011721466, "grad_norm": 0.12890625, "learning_rate": 0.0003575682924406477, "loss": 0.4951, "step": 111350 }, { "epoch": 5.530942684017085, "grad_norm": 0.138671875, "learning_rate": 0.0003575285586569981, "loss": 0.5178, "step": 111360 }, { "epoch": 5.5314393563127044, "grad_norm": 0.1201171875, "learning_rate": 0.00035748882487334854, "loss": 0.5282, "step": 111370 }, { "epoch": 5.531936028608325, "grad_norm": 0.10888671875, "learning_rate": 0.000357449091089699, "loss": 0.5222, "step": 111380 }, { "epoch": 5.532432700903944, "grad_norm": 0.1171875, "learning_rate": 0.0003574093573060495, "loss": 0.5135, "step": 111390 }, { "epoch": 5.532929373199563, "grad_norm": 0.12255859375, "learning_rate": 0.00035736962352239996, "loss": 0.4777, "step": 111400 }, { "epoch": 5.533426045495182, "grad_norm": 0.12158203125, "learning_rate": 0.0003573298897387504, "loss": 0.4899, "step": 111410 }, { "epoch": 5.5339227177908015, "grad_norm": 0.134765625, "learning_rate": 0.00035729015595510085, "loss": 0.524, "step": 111420 }, { "epoch": 5.534419390086421, "grad_norm": 0.1103515625, "learning_rate": 0.0003572504221714513, "loss": 0.4869, "step": 111430 }, { "epoch": 5.53491606238204, "grad_norm": 0.12451171875, "learning_rate": 0.00035721068838780173, "loss": 0.4676, "step": 111440 }, { "epoch": 5.53541273467766, "grad_norm": 0.11572265625, "learning_rate": 0.0003571709546041522, "loss": 0.4894, "step": 111450 }, { "epoch": 5.535909406973279, "grad_norm": 0.12353515625, "learning_rate": 0.0003571312208205027, "loss": 0.4867, "step": 111460 }, { "epoch": 5.5364060792688985, "grad_norm": 0.1298828125, "learning_rate": 0.0003570914870368531, "loss": 0.5043, "step": 111470 }, { "epoch": 5.536902751564518, "grad_norm": 0.11669921875, "learning_rate": 0.00035705175325320357, "loss": 0.5002, "step": 111480 }, { "epoch": 5.537399423860137, "grad_norm": 0.11279296875, "learning_rate": 0.000357012019469554, "loss": 0.5109, "step": 111490 }, { "epoch": 5.537896096155756, "grad_norm": 0.1279296875, "learning_rate": 0.00035697228568590445, "loss": 0.4679, "step": 111500 }, { "epoch": 5.538392768451375, "grad_norm": 0.1259765625, "learning_rate": 0.0003569325519022549, "loss": 0.4981, "step": 111510 }, { "epoch": 5.5388894407469955, "grad_norm": 0.11083984375, "learning_rate": 0.00035689281811860534, "loss": 0.4789, "step": 111520 }, { "epoch": 5.539386113042615, "grad_norm": 0.1396484375, "learning_rate": 0.0003568530843349558, "loss": 0.508, "step": 111530 }, { "epoch": 5.539882785338234, "grad_norm": 0.12109375, "learning_rate": 0.0003568133505513063, "loss": 0.5006, "step": 111540 }, { "epoch": 5.540379457633853, "grad_norm": 0.11962890625, "learning_rate": 0.0003567736167676567, "loss": 0.4994, "step": 111550 }, { "epoch": 5.540876129929472, "grad_norm": 0.125, "learning_rate": 0.0003567338829840072, "loss": 0.5043, "step": 111560 }, { "epoch": 5.541372802225092, "grad_norm": 0.142578125, "learning_rate": 0.00035669414920035765, "loss": 0.5191, "step": 111570 }, { "epoch": 5.541869474520711, "grad_norm": 0.1357421875, "learning_rate": 0.00035665441541670806, "loss": 0.5158, "step": 111580 }, { "epoch": 5.542366146816331, "grad_norm": 0.1298828125, "learning_rate": 0.00035661468163305853, "loss": 0.5008, "step": 111590 }, { "epoch": 5.54286281911195, "grad_norm": 0.1318359375, "learning_rate": 0.00035657494784940895, "loss": 0.4865, "step": 111600 }, { "epoch": 5.5433594914075695, "grad_norm": 0.12255859375, "learning_rate": 0.0003565352140657594, "loss": 0.4886, "step": 111610 }, { "epoch": 5.543856163703189, "grad_norm": 0.1083984375, "learning_rate": 0.0003564954802821099, "loss": 0.512, "step": 111620 }, { "epoch": 5.544352835998808, "grad_norm": 0.1357421875, "learning_rate": 0.0003564557464984603, "loss": 0.4671, "step": 111630 }, { "epoch": 5.544849508294427, "grad_norm": 0.15625, "learning_rate": 0.0003564160127148108, "loss": 0.527, "step": 111640 }, { "epoch": 5.545346180590046, "grad_norm": 0.12353515625, "learning_rate": 0.00035637627893116125, "loss": 0.506, "step": 111650 }, { "epoch": 5.5458428528856665, "grad_norm": 0.1240234375, "learning_rate": 0.00035633654514751167, "loss": 0.5005, "step": 111660 }, { "epoch": 5.546339525181286, "grad_norm": 0.13671875, "learning_rate": 0.00035629681136386214, "loss": 0.5076, "step": 111670 }, { "epoch": 5.546836197476905, "grad_norm": 0.1171875, "learning_rate": 0.00035625707758021256, "loss": 0.496, "step": 111680 }, { "epoch": 5.547332869772524, "grad_norm": 0.134765625, "learning_rate": 0.0003562173437965631, "loss": 0.4994, "step": 111690 }, { "epoch": 5.547829542068143, "grad_norm": 0.140625, "learning_rate": 0.0003561776100129135, "loss": 0.4964, "step": 111700 }, { "epoch": 5.548326214363763, "grad_norm": 0.1279296875, "learning_rate": 0.0003561378762292639, "loss": 0.5088, "step": 111710 }, { "epoch": 5.548822886659382, "grad_norm": 0.134765625, "learning_rate": 0.0003560981424456144, "loss": 0.4856, "step": 111720 }, { "epoch": 5.549319558955002, "grad_norm": 0.111328125, "learning_rate": 0.00035605840866196486, "loss": 0.5071, "step": 111730 }, { "epoch": 5.549816231250621, "grad_norm": 0.1357421875, "learning_rate": 0.0003560186748783153, "loss": 0.5245, "step": 111740 }, { "epoch": 5.55031290354624, "grad_norm": 0.11865234375, "learning_rate": 0.00035597894109466575, "loss": 0.4938, "step": 111750 }, { "epoch": 5.55080957584186, "grad_norm": 0.1201171875, "learning_rate": 0.0003559392073110162, "loss": 0.4827, "step": 111760 }, { "epoch": 5.551306248137479, "grad_norm": 0.181640625, "learning_rate": 0.0003558994735273667, "loss": 0.5306, "step": 111770 }, { "epoch": 5.551802920433098, "grad_norm": 0.11572265625, "learning_rate": 0.0003558597397437171, "loss": 0.5068, "step": 111780 }, { "epoch": 5.552299592728717, "grad_norm": 0.162109375, "learning_rate": 0.00035582000596006753, "loss": 0.5106, "step": 111790 }, { "epoch": 5.552796265024337, "grad_norm": 0.189453125, "learning_rate": 0.00035578027217641805, "loss": 0.5352, "step": 111800 }, { "epoch": 5.553292937319957, "grad_norm": 0.11572265625, "learning_rate": 0.00035574053839276847, "loss": 0.4932, "step": 111810 }, { "epoch": 5.553789609615576, "grad_norm": 0.1123046875, "learning_rate": 0.0003557008046091189, "loss": 0.5136, "step": 111820 }, { "epoch": 5.554286281911195, "grad_norm": 0.126953125, "learning_rate": 0.00035566107082546936, "loss": 0.5193, "step": 111830 }, { "epoch": 5.554782954206814, "grad_norm": 0.1171875, "learning_rate": 0.00035562133704181983, "loss": 0.4814, "step": 111840 }, { "epoch": 5.555279626502434, "grad_norm": 0.126953125, "learning_rate": 0.0003555816032581703, "loss": 0.5252, "step": 111850 }, { "epoch": 5.555776298798053, "grad_norm": 0.11767578125, "learning_rate": 0.0003555418694745207, "loss": 0.5123, "step": 111860 }, { "epoch": 5.556272971093672, "grad_norm": 0.123046875, "learning_rate": 0.0003555021356908712, "loss": 0.4962, "step": 111870 }, { "epoch": 5.556769643389291, "grad_norm": 0.1572265625, "learning_rate": 0.00035546240190722166, "loss": 0.4998, "step": 111880 }, { "epoch": 5.557266315684911, "grad_norm": 0.169921875, "learning_rate": 0.0003554226681235721, "loss": 0.5142, "step": 111890 }, { "epoch": 5.557762987980531, "grad_norm": 0.1181640625, "learning_rate": 0.00035538293433992255, "loss": 0.4966, "step": 111900 }, { "epoch": 5.55825966027615, "grad_norm": 0.10693359375, "learning_rate": 0.00035534320055627297, "loss": 0.5049, "step": 111910 }, { "epoch": 5.558756332571769, "grad_norm": 0.126953125, "learning_rate": 0.00035530346677262344, "loss": 0.5032, "step": 111920 }, { "epoch": 5.559253004867388, "grad_norm": 0.123046875, "learning_rate": 0.0003552637329889739, "loss": 0.5364, "step": 111930 }, { "epoch": 5.5597496771630075, "grad_norm": 0.1337890625, "learning_rate": 0.0003552239992053243, "loss": 0.5056, "step": 111940 }, { "epoch": 5.560246349458627, "grad_norm": 0.1396484375, "learning_rate": 0.0003551842654216748, "loss": 0.4908, "step": 111950 }, { "epoch": 5.560743021754247, "grad_norm": 0.1298828125, "learning_rate": 0.00035514453163802527, "loss": 0.4613, "step": 111960 }, { "epoch": 5.561239694049866, "grad_norm": 0.115234375, "learning_rate": 0.0003551047978543757, "loss": 0.5262, "step": 111970 }, { "epoch": 5.561736366345485, "grad_norm": 0.1328125, "learning_rate": 0.00035506506407072616, "loss": 0.4986, "step": 111980 }, { "epoch": 5.5622330386411045, "grad_norm": 0.1220703125, "learning_rate": 0.00035502533028707663, "loss": 0.5007, "step": 111990 }, { "epoch": 5.562729710936724, "grad_norm": 0.11865234375, "learning_rate": 0.00035498559650342705, "loss": 0.5097, "step": 112000 }, { "epoch": 5.563226383232343, "grad_norm": 0.10693359375, "learning_rate": 0.0003549458627197775, "loss": 0.5015, "step": 112010 }, { "epoch": 5.563723055527962, "grad_norm": 0.1533203125, "learning_rate": 0.00035490612893612794, "loss": 0.5014, "step": 112020 }, { "epoch": 5.564219727823582, "grad_norm": 0.15234375, "learning_rate": 0.0003548663951524784, "loss": 0.4997, "step": 112030 }, { "epoch": 5.5647164001192015, "grad_norm": 0.138671875, "learning_rate": 0.0003548266613688289, "loss": 0.5313, "step": 112040 }, { "epoch": 5.565213072414821, "grad_norm": 0.130859375, "learning_rate": 0.0003547869275851793, "loss": 0.5256, "step": 112050 }, { "epoch": 5.56570974471044, "grad_norm": 0.12060546875, "learning_rate": 0.00035474719380152977, "loss": 0.4737, "step": 112060 }, { "epoch": 5.566206417006059, "grad_norm": 0.12158203125, "learning_rate": 0.00035470746001788024, "loss": 0.4921, "step": 112070 }, { "epoch": 5.5667030893016785, "grad_norm": 0.10791015625, "learning_rate": 0.00035466772623423065, "loss": 0.5041, "step": 112080 }, { "epoch": 5.567199761597298, "grad_norm": 0.1650390625, "learning_rate": 0.0003546279924505811, "loss": 0.5132, "step": 112090 }, { "epoch": 5.567696433892918, "grad_norm": 0.12451171875, "learning_rate": 0.0003545882586669316, "loss": 0.511, "step": 112100 }, { "epoch": 5.568193106188537, "grad_norm": 0.126953125, "learning_rate": 0.000354548524883282, "loss": 0.5041, "step": 112110 }, { "epoch": 5.568689778484156, "grad_norm": 0.1337890625, "learning_rate": 0.0003545087910996325, "loss": 0.4775, "step": 112120 }, { "epoch": 5.5691864507797755, "grad_norm": 0.126953125, "learning_rate": 0.0003544690573159829, "loss": 0.5107, "step": 112130 }, { "epoch": 5.569683123075395, "grad_norm": 0.1298828125, "learning_rate": 0.0003544293235323334, "loss": 0.4981, "step": 112140 }, { "epoch": 5.570179795371014, "grad_norm": 0.12060546875, "learning_rate": 0.00035438958974868385, "loss": 0.5178, "step": 112150 }, { "epoch": 5.570676467666633, "grad_norm": 0.142578125, "learning_rate": 0.00035434985596503426, "loss": 0.515, "step": 112160 }, { "epoch": 5.571173139962253, "grad_norm": 0.146484375, "learning_rate": 0.00035431012218138473, "loss": 0.5216, "step": 112170 }, { "epoch": 5.5716698122578725, "grad_norm": 0.10791015625, "learning_rate": 0.0003542703883977352, "loss": 0.4892, "step": 112180 }, { "epoch": 5.572166484553492, "grad_norm": 0.1201171875, "learning_rate": 0.0003542306546140856, "loss": 0.4874, "step": 112190 }, { "epoch": 5.572663156849111, "grad_norm": 0.12109375, "learning_rate": 0.0003541909208304361, "loss": 0.4912, "step": 112200 }, { "epoch": 5.57315982914473, "grad_norm": 0.11328125, "learning_rate": 0.0003541511870467865, "loss": 0.5085, "step": 112210 }, { "epoch": 5.573656501440349, "grad_norm": 0.17578125, "learning_rate": 0.00035411145326313704, "loss": 0.5043, "step": 112220 }, { "epoch": 5.574153173735969, "grad_norm": 0.1748046875, "learning_rate": 0.00035407171947948745, "loss": 0.5144, "step": 112230 }, { "epoch": 5.574649846031589, "grad_norm": 0.1103515625, "learning_rate": 0.00035403198569583787, "loss": 0.5169, "step": 112240 }, { "epoch": 5.575146518327208, "grad_norm": 0.12060546875, "learning_rate": 0.00035399225191218834, "loss": 0.4937, "step": 112250 }, { "epoch": 5.575643190622827, "grad_norm": 0.15234375, "learning_rate": 0.0003539525181285388, "loss": 0.5045, "step": 112260 }, { "epoch": 5.576139862918446, "grad_norm": 0.126953125, "learning_rate": 0.0003539127843448893, "loss": 0.5102, "step": 112270 }, { "epoch": 5.576636535214066, "grad_norm": 0.1416015625, "learning_rate": 0.0003538730505612397, "loss": 0.5018, "step": 112280 }, { "epoch": 5.577133207509685, "grad_norm": 0.166015625, "learning_rate": 0.0003538333167775902, "loss": 0.4976, "step": 112290 }, { "epoch": 5.577629879805304, "grad_norm": 0.123046875, "learning_rate": 0.00035379358299394065, "loss": 0.5036, "step": 112300 }, { "epoch": 5.578126552100924, "grad_norm": 0.1435546875, "learning_rate": 0.00035375384921029106, "loss": 0.4883, "step": 112310 }, { "epoch": 5.5786232243965435, "grad_norm": 0.1142578125, "learning_rate": 0.0003537141154266415, "loss": 0.4795, "step": 112320 }, { "epoch": 5.579119896692163, "grad_norm": 0.1259765625, "learning_rate": 0.000353674381642992, "loss": 0.5187, "step": 112330 }, { "epoch": 5.579616568987782, "grad_norm": 0.11572265625, "learning_rate": 0.0003536346478593424, "loss": 0.4851, "step": 112340 }, { "epoch": 5.580113241283401, "grad_norm": 0.1220703125, "learning_rate": 0.0003535949140756929, "loss": 0.5324, "step": 112350 }, { "epoch": 5.58060991357902, "grad_norm": 0.1484375, "learning_rate": 0.0003535551802920433, "loss": 0.5057, "step": 112360 }, { "epoch": 5.58110658587464, "grad_norm": 0.1572265625, "learning_rate": 0.0003535154465083938, "loss": 0.5042, "step": 112370 }, { "epoch": 5.58160325817026, "grad_norm": 0.12109375, "learning_rate": 0.00035347571272474425, "loss": 0.4992, "step": 112380 }, { "epoch": 5.582099930465879, "grad_norm": 0.1533203125, "learning_rate": 0.00035343597894109467, "loss": 0.4987, "step": 112390 }, { "epoch": 5.582596602761498, "grad_norm": 0.1142578125, "learning_rate": 0.00035339624515744514, "loss": 0.4869, "step": 112400 }, { "epoch": 5.583093275057117, "grad_norm": 0.1298828125, "learning_rate": 0.0003533565113737956, "loss": 0.5111, "step": 112410 }, { "epoch": 5.583589947352737, "grad_norm": 0.11572265625, "learning_rate": 0.00035331677759014603, "loss": 0.4857, "step": 112420 }, { "epoch": 5.584086619648356, "grad_norm": 0.1171875, "learning_rate": 0.0003532770438064965, "loss": 0.5344, "step": 112430 }, { "epoch": 5.584583291943975, "grad_norm": 0.12451171875, "learning_rate": 0.0003532373100228469, "loss": 0.5189, "step": 112440 }, { "epoch": 5.585079964239595, "grad_norm": 0.1318359375, "learning_rate": 0.0003531975762391974, "loss": 0.4859, "step": 112450 }, { "epoch": 5.585576636535214, "grad_norm": 0.140625, "learning_rate": 0.00035315784245554786, "loss": 0.511, "step": 112460 }, { "epoch": 5.586073308830834, "grad_norm": 0.126953125, "learning_rate": 0.0003531181086718983, "loss": 0.4974, "step": 112470 }, { "epoch": 5.586569981126453, "grad_norm": 0.14453125, "learning_rate": 0.00035307837488824875, "loss": 0.4927, "step": 112480 }, { "epoch": 5.587066653422072, "grad_norm": 0.154296875, "learning_rate": 0.0003530386411045992, "loss": 0.4816, "step": 112490 }, { "epoch": 5.587563325717691, "grad_norm": 0.12255859375, "learning_rate": 0.00035299890732094964, "loss": 0.466, "step": 112500 }, { "epoch": 5.5880599980133105, "grad_norm": 0.12255859375, "learning_rate": 0.0003529591735373001, "loss": 0.5036, "step": 112510 }, { "epoch": 5.588556670308931, "grad_norm": 0.1162109375, "learning_rate": 0.0003529194397536506, "loss": 0.5224, "step": 112520 }, { "epoch": 5.58905334260455, "grad_norm": 0.197265625, "learning_rate": 0.000352879705970001, "loss": 0.4723, "step": 112530 }, { "epoch": 5.589550014900169, "grad_norm": 0.10205078125, "learning_rate": 0.00035283997218635147, "loss": 0.5103, "step": 112540 }, { "epoch": 5.590046687195788, "grad_norm": 0.11181640625, "learning_rate": 0.0003528002384027019, "loss": 0.502, "step": 112550 }, { "epoch": 5.590543359491408, "grad_norm": 0.1455078125, "learning_rate": 0.00035276050461905236, "loss": 0.5116, "step": 112560 }, { "epoch": 5.591040031787027, "grad_norm": 0.11962890625, "learning_rate": 0.00035272077083540283, "loss": 0.5403, "step": 112570 }, { "epoch": 5.591536704082646, "grad_norm": 0.1416015625, "learning_rate": 0.00035268103705175325, "loss": 0.5146, "step": 112580 }, { "epoch": 5.592033376378265, "grad_norm": 0.12060546875, "learning_rate": 0.0003526413032681037, "loss": 0.5076, "step": 112590 }, { "epoch": 5.5925300486738845, "grad_norm": 0.123046875, "learning_rate": 0.0003526015694844542, "loss": 0.5097, "step": 112600 }, { "epoch": 5.593026720969505, "grad_norm": 0.123046875, "learning_rate": 0.0003525618357008046, "loss": 0.4999, "step": 112610 }, { "epoch": 5.593523393265124, "grad_norm": 0.134765625, "learning_rate": 0.0003525221019171551, "loss": 0.4954, "step": 112620 }, { "epoch": 5.594020065560743, "grad_norm": 0.1513671875, "learning_rate": 0.00035248236813350555, "loss": 0.5037, "step": 112630 }, { "epoch": 5.594516737856362, "grad_norm": 0.11669921875, "learning_rate": 0.00035244263434985597, "loss": 0.5443, "step": 112640 }, { "epoch": 5.5950134101519815, "grad_norm": 0.1455078125, "learning_rate": 0.00035240290056620644, "loss": 0.4872, "step": 112650 }, { "epoch": 5.595510082447601, "grad_norm": 0.134765625, "learning_rate": 0.00035236316678255686, "loss": 0.5063, "step": 112660 }, { "epoch": 5.59600675474322, "grad_norm": 0.1181640625, "learning_rate": 0.0003523234329989074, "loss": 0.518, "step": 112670 }, { "epoch": 5.59650342703884, "grad_norm": 0.1298828125, "learning_rate": 0.0003522836992152578, "loss": 0.5125, "step": 112680 }, { "epoch": 5.597000099334459, "grad_norm": 0.158203125, "learning_rate": 0.0003522439654316082, "loss": 0.5128, "step": 112690 }, { "epoch": 5.5974967716300785, "grad_norm": 0.10986328125, "learning_rate": 0.0003522042316479587, "loss": 0.5089, "step": 112700 }, { "epoch": 5.597993443925698, "grad_norm": 0.126953125, "learning_rate": 0.00035216449786430916, "loss": 0.4967, "step": 112710 }, { "epoch": 5.598490116221317, "grad_norm": 0.1240234375, "learning_rate": 0.00035212476408065963, "loss": 0.5239, "step": 112720 }, { "epoch": 5.598986788516936, "grad_norm": 0.109375, "learning_rate": 0.00035208503029701005, "loss": 0.512, "step": 112730 }, { "epoch": 5.599483460812555, "grad_norm": 0.1337890625, "learning_rate": 0.00035204529651336046, "loss": 0.4772, "step": 112740 }, { "epoch": 5.5999801331081756, "grad_norm": 0.1357421875, "learning_rate": 0.000352005562729711, "loss": 0.5299, "step": 112750 }, { "epoch": 5.600476805403795, "grad_norm": 0.12353515625, "learning_rate": 0.0003519658289460614, "loss": 0.4781, "step": 112760 }, { "epoch": 5.600973477699414, "grad_norm": 0.11865234375, "learning_rate": 0.0003519260951624118, "loss": 0.5015, "step": 112770 }, { "epoch": 5.601470149995033, "grad_norm": 0.16015625, "learning_rate": 0.0003518863613787623, "loss": 0.4884, "step": 112780 }, { "epoch": 5.6019668222906525, "grad_norm": 0.123046875, "learning_rate": 0.00035184662759511277, "loss": 0.4633, "step": 112790 }, { "epoch": 5.602463494586272, "grad_norm": 0.140625, "learning_rate": 0.00035180689381146324, "loss": 0.4953, "step": 112800 }, { "epoch": 5.602960166881891, "grad_norm": 0.1337890625, "learning_rate": 0.00035176716002781365, "loss": 0.5085, "step": 112810 }, { "epoch": 5.603456839177511, "grad_norm": 0.2236328125, "learning_rate": 0.0003517274262441641, "loss": 0.4908, "step": 112820 }, { "epoch": 5.60395351147313, "grad_norm": 0.130859375, "learning_rate": 0.0003516876924605146, "loss": 0.4784, "step": 112830 }, { "epoch": 5.6044501837687495, "grad_norm": 0.12353515625, "learning_rate": 0.000351647958676865, "loss": 0.5038, "step": 112840 }, { "epoch": 5.604946856064369, "grad_norm": 0.1318359375, "learning_rate": 0.00035160822489321543, "loss": 0.5014, "step": 112850 }, { "epoch": 5.605443528359988, "grad_norm": 0.10986328125, "learning_rate": 0.00035156849110956596, "loss": 0.5004, "step": 112860 }, { "epoch": 5.605940200655607, "grad_norm": 0.119140625, "learning_rate": 0.0003515287573259164, "loss": 0.4771, "step": 112870 }, { "epoch": 5.606436872951226, "grad_norm": 0.1220703125, "learning_rate": 0.00035148902354226685, "loss": 0.5097, "step": 112880 }, { "epoch": 5.6069335452468465, "grad_norm": 0.162109375, "learning_rate": 0.00035144928975861726, "loss": 0.4958, "step": 112890 }, { "epoch": 5.607430217542466, "grad_norm": 0.119140625, "learning_rate": 0.00035140955597496773, "loss": 0.4939, "step": 112900 }, { "epoch": 5.607926889838085, "grad_norm": 0.12890625, "learning_rate": 0.0003513698221913182, "loss": 0.5129, "step": 112910 }, { "epoch": 5.608423562133704, "grad_norm": 0.11572265625, "learning_rate": 0.0003513300884076686, "loss": 0.4886, "step": 112920 }, { "epoch": 5.608920234429323, "grad_norm": 0.10546875, "learning_rate": 0.0003512903546240191, "loss": 0.5186, "step": 112930 }, { "epoch": 5.609416906724943, "grad_norm": 0.11669921875, "learning_rate": 0.00035125062084036957, "loss": 0.4782, "step": 112940 }, { "epoch": 5.609913579020562, "grad_norm": 0.1328125, "learning_rate": 0.00035121088705672, "loss": 0.518, "step": 112950 }, { "epoch": 5.610410251316182, "grad_norm": 0.1337890625, "learning_rate": 0.00035117115327307045, "loss": 0.505, "step": 112960 }, { "epoch": 5.610906923611801, "grad_norm": 0.1416015625, "learning_rate": 0.0003511314194894209, "loss": 0.51, "step": 112970 }, { "epoch": 5.61140359590742, "grad_norm": 0.12451171875, "learning_rate": 0.00035109168570577134, "loss": 0.5075, "step": 112980 }, { "epoch": 5.61190026820304, "grad_norm": 0.1357421875, "learning_rate": 0.0003510519519221218, "loss": 0.5147, "step": 112990 }, { "epoch": 5.612396940498659, "grad_norm": 0.134765625, "learning_rate": 0.00035101221813847223, "loss": 0.4915, "step": 113000 }, { "epoch": 5.612893612794278, "grad_norm": 0.123046875, "learning_rate": 0.0003509724843548227, "loss": 0.5033, "step": 113010 }, { "epoch": 5.613390285089897, "grad_norm": 0.12255859375, "learning_rate": 0.0003509327505711732, "loss": 0.5055, "step": 113020 }, { "epoch": 5.6138869573855175, "grad_norm": 0.10986328125, "learning_rate": 0.0003508930167875236, "loss": 0.496, "step": 113030 }, { "epoch": 5.614383629681137, "grad_norm": 0.15625, "learning_rate": 0.00035085328300387406, "loss": 0.5035, "step": 113040 }, { "epoch": 5.614880301976756, "grad_norm": 0.11962890625, "learning_rate": 0.00035081354922022453, "loss": 0.4637, "step": 113050 }, { "epoch": 5.615376974272375, "grad_norm": 0.10986328125, "learning_rate": 0.00035077381543657495, "loss": 0.5175, "step": 113060 }, { "epoch": 5.615873646567994, "grad_norm": 0.11328125, "learning_rate": 0.0003507340816529254, "loss": 0.5448, "step": 113070 }, { "epoch": 5.616370318863614, "grad_norm": 0.16796875, "learning_rate": 0.00035069434786927584, "loss": 0.5084, "step": 113080 }, { "epoch": 5.616866991159233, "grad_norm": 0.1103515625, "learning_rate": 0.0003506546140856263, "loss": 0.4975, "step": 113090 }, { "epoch": 5.617363663454853, "grad_norm": 0.1533203125, "learning_rate": 0.0003506148803019768, "loss": 0.5024, "step": 113100 }, { "epoch": 5.617860335750472, "grad_norm": 0.1220703125, "learning_rate": 0.0003505751465183272, "loss": 0.5177, "step": 113110 }, { "epoch": 5.618357008046091, "grad_norm": 0.11767578125, "learning_rate": 0.00035053541273467767, "loss": 0.4979, "step": 113120 }, { "epoch": 5.618853680341711, "grad_norm": 0.1259765625, "learning_rate": 0.00035049567895102814, "loss": 0.5113, "step": 113130 }, { "epoch": 5.61935035263733, "grad_norm": 0.12255859375, "learning_rate": 0.00035045594516737856, "loss": 0.5245, "step": 113140 }, { "epoch": 5.619847024932949, "grad_norm": 0.14453125, "learning_rate": 0.00035041621138372903, "loss": 0.535, "step": 113150 }, { "epoch": 5.620343697228568, "grad_norm": 0.1064453125, "learning_rate": 0.0003503764776000795, "loss": 0.5191, "step": 113160 }, { "epoch": 5.620840369524188, "grad_norm": 0.1162109375, "learning_rate": 0.00035033674381642997, "loss": 0.501, "step": 113170 }, { "epoch": 5.621337041819808, "grad_norm": 0.1171875, "learning_rate": 0.0003502970100327804, "loss": 0.509, "step": 113180 }, { "epoch": 5.621833714115427, "grad_norm": 0.142578125, "learning_rate": 0.0003502572762491308, "loss": 0.507, "step": 113190 }, { "epoch": 5.622330386411046, "grad_norm": 0.1279296875, "learning_rate": 0.00035021754246548133, "loss": 0.5035, "step": 113200 }, { "epoch": 5.622827058706665, "grad_norm": 0.123046875, "learning_rate": 0.00035017780868183175, "loss": 0.5092, "step": 113210 }, { "epoch": 5.6233237310022846, "grad_norm": 0.11572265625, "learning_rate": 0.00035013807489818217, "loss": 0.4871, "step": 113220 }, { "epoch": 5.623820403297904, "grad_norm": 0.1259765625, "learning_rate": 0.00035009834111453264, "loss": 0.4793, "step": 113230 }, { "epoch": 5.624317075593523, "grad_norm": 0.12255859375, "learning_rate": 0.0003500586073308831, "loss": 0.4974, "step": 113240 }, { "epoch": 5.624813747889143, "grad_norm": 0.12060546875, "learning_rate": 0.0003500188735472336, "loss": 0.5088, "step": 113250 }, { "epoch": 5.625310420184762, "grad_norm": 0.150390625, "learning_rate": 0.000349979139763584, "loss": 0.4957, "step": 113260 }, { "epoch": 5.625807092480382, "grad_norm": 0.12109375, "learning_rate": 0.00034993940597993447, "loss": 0.4833, "step": 113270 }, { "epoch": 5.626303764776001, "grad_norm": 0.1689453125, "learning_rate": 0.00034989967219628494, "loss": 0.4983, "step": 113280 }, { "epoch": 5.62680043707162, "grad_norm": 0.1171875, "learning_rate": 0.00034985993841263536, "loss": 0.4798, "step": 113290 }, { "epoch": 5.627297109367239, "grad_norm": 0.1640625, "learning_rate": 0.0003498202046289858, "loss": 0.5027, "step": 113300 }, { "epoch": 5.6277937816628585, "grad_norm": 0.1279296875, "learning_rate": 0.00034978047084533625, "loss": 0.4887, "step": 113310 }, { "epoch": 5.628290453958478, "grad_norm": 0.12109375, "learning_rate": 0.0003497407370616867, "loss": 0.4926, "step": 113320 }, { "epoch": 5.628787126254098, "grad_norm": 0.1298828125, "learning_rate": 0.0003497010032780372, "loss": 0.5032, "step": 113330 }, { "epoch": 5.629283798549717, "grad_norm": 0.1318359375, "learning_rate": 0.0003496612694943876, "loss": 0.4918, "step": 113340 }, { "epoch": 5.629780470845336, "grad_norm": 0.103515625, "learning_rate": 0.0003496215357107381, "loss": 0.5331, "step": 113350 }, { "epoch": 5.6302771431409555, "grad_norm": 0.11083984375, "learning_rate": 0.00034958180192708855, "loss": 0.5183, "step": 113360 }, { "epoch": 5.630773815436575, "grad_norm": 0.1318359375, "learning_rate": 0.00034954206814343897, "loss": 0.5034, "step": 113370 }, { "epoch": 5.631270487732194, "grad_norm": 0.1318359375, "learning_rate": 0.0003495023343597894, "loss": 0.503, "step": 113380 }, { "epoch": 5.631767160027813, "grad_norm": 0.1533203125, "learning_rate": 0.0003494626005761399, "loss": 0.5195, "step": 113390 }, { "epoch": 5.632263832323433, "grad_norm": 0.1240234375, "learning_rate": 0.0003494228667924903, "loss": 0.4737, "step": 113400 }, { "epoch": 5.6327605046190525, "grad_norm": 0.1337890625, "learning_rate": 0.0003493831330088408, "loss": 0.5063, "step": 113410 }, { "epoch": 5.633257176914672, "grad_norm": 0.1875, "learning_rate": 0.0003493433992251912, "loss": 0.5096, "step": 113420 }, { "epoch": 5.633753849210291, "grad_norm": 0.134765625, "learning_rate": 0.0003493036654415417, "loss": 0.4873, "step": 113430 }, { "epoch": 5.63425052150591, "grad_norm": 0.150390625, "learning_rate": 0.00034926393165789216, "loss": 0.4895, "step": 113440 }, { "epoch": 5.634747193801529, "grad_norm": 0.123046875, "learning_rate": 0.0003492241978742426, "loss": 0.5058, "step": 113450 }, { "epoch": 5.635243866097149, "grad_norm": 0.11181640625, "learning_rate": 0.00034918446409059305, "loss": 0.4999, "step": 113460 }, { "epoch": 5.635740538392769, "grad_norm": 0.1708984375, "learning_rate": 0.0003491447303069435, "loss": 0.5198, "step": 113470 }, { "epoch": 5.636237210688388, "grad_norm": 0.1162109375, "learning_rate": 0.00034910499652329393, "loss": 0.5367, "step": 113480 }, { "epoch": 5.636733882984007, "grad_norm": 0.1357421875, "learning_rate": 0.0003490652627396444, "loss": 0.5206, "step": 113490 }, { "epoch": 5.6372305552796265, "grad_norm": 0.1220703125, "learning_rate": 0.0003490255289559949, "loss": 0.4997, "step": 113500 }, { "epoch": 5.637727227575246, "grad_norm": 0.1201171875, "learning_rate": 0.0003489857951723453, "loss": 0.4914, "step": 113510 }, { "epoch": 5.638223899870865, "grad_norm": 0.130859375, "learning_rate": 0.00034894606138869577, "loss": 0.5156, "step": 113520 }, { "epoch": 5.638720572166484, "grad_norm": 0.130859375, "learning_rate": 0.0003489063276050462, "loss": 0.5263, "step": 113530 }, { "epoch": 5.639217244462104, "grad_norm": 0.12890625, "learning_rate": 0.0003488665938213967, "loss": 0.5106, "step": 113540 }, { "epoch": 5.6397139167577235, "grad_norm": 0.154296875, "learning_rate": 0.0003488268600377471, "loss": 0.4905, "step": 113550 }, { "epoch": 5.640210589053343, "grad_norm": 0.12451171875, "learning_rate": 0.00034878712625409754, "loss": 0.4771, "step": 113560 }, { "epoch": 5.640707261348962, "grad_norm": 0.1376953125, "learning_rate": 0.000348747392470448, "loss": 0.4879, "step": 113570 }, { "epoch": 5.641203933644581, "grad_norm": 0.126953125, "learning_rate": 0.0003487076586867985, "loss": 0.5118, "step": 113580 }, { "epoch": 5.6417006059402, "grad_norm": 0.134765625, "learning_rate": 0.0003486679249031489, "loss": 0.5268, "step": 113590 }, { "epoch": 5.64219727823582, "grad_norm": 0.11474609375, "learning_rate": 0.0003486281911194994, "loss": 0.5118, "step": 113600 }, { "epoch": 5.64269395053144, "grad_norm": 0.1435546875, "learning_rate": 0.0003485884573358498, "loss": 0.501, "step": 113610 }, { "epoch": 5.643190622827059, "grad_norm": 0.11962890625, "learning_rate": 0.0003485487235522003, "loss": 0.528, "step": 113620 }, { "epoch": 5.643687295122678, "grad_norm": 0.13671875, "learning_rate": 0.00034850898976855073, "loss": 0.5168, "step": 113630 }, { "epoch": 5.644183967418297, "grad_norm": 0.12255859375, "learning_rate": 0.00034846925598490115, "loss": 0.5091, "step": 113640 }, { "epoch": 5.644680639713917, "grad_norm": 0.150390625, "learning_rate": 0.0003484295222012516, "loss": 0.5126, "step": 113650 }, { "epoch": 5.645177312009536, "grad_norm": 0.10986328125, "learning_rate": 0.0003483897884176021, "loss": 0.5085, "step": 113660 }, { "epoch": 5.645673984305155, "grad_norm": 0.15625, "learning_rate": 0.0003483500546339525, "loss": 0.5356, "step": 113670 }, { "epoch": 5.646170656600775, "grad_norm": 0.125, "learning_rate": 0.000348310320850303, "loss": 0.5088, "step": 113680 }, { "epoch": 5.6466673288963944, "grad_norm": 0.134765625, "learning_rate": 0.00034827058706665345, "loss": 0.5075, "step": 113690 }, { "epoch": 5.647164001192014, "grad_norm": 0.1083984375, "learning_rate": 0.0003482308532830039, "loss": 0.5144, "step": 113700 }, { "epoch": 5.647660673487633, "grad_norm": 0.12255859375, "learning_rate": 0.00034819111949935434, "loss": 0.4816, "step": 113710 }, { "epoch": 5.648157345783252, "grad_norm": 0.11376953125, "learning_rate": 0.00034815138571570476, "loss": 0.523, "step": 113720 }, { "epoch": 5.648654018078871, "grad_norm": 0.11376953125, "learning_rate": 0.0003481116519320553, "loss": 0.5075, "step": 113730 }, { "epoch": 5.649150690374491, "grad_norm": 0.12451171875, "learning_rate": 0.0003480719181484057, "loss": 0.5195, "step": 113740 }, { "epoch": 5.649647362670111, "grad_norm": 0.1337890625, "learning_rate": 0.0003480321843647561, "loss": 0.5362, "step": 113750 }, { "epoch": 5.65014403496573, "grad_norm": 0.1572265625, "learning_rate": 0.0003479924505811066, "loss": 0.515, "step": 113760 }, { "epoch": 5.650640707261349, "grad_norm": 0.1142578125, "learning_rate": 0.00034795271679745706, "loss": 0.491, "step": 113770 }, { "epoch": 5.651137379556968, "grad_norm": 0.1240234375, "learning_rate": 0.00034791298301380753, "loss": 0.4865, "step": 113780 }, { "epoch": 5.651634051852588, "grad_norm": 0.11962890625, "learning_rate": 0.00034787324923015795, "loss": 0.5131, "step": 113790 }, { "epoch": 5.652130724148207, "grad_norm": 0.1416015625, "learning_rate": 0.0003478335154465084, "loss": 0.5108, "step": 113800 }, { "epoch": 5.652627396443826, "grad_norm": 0.1240234375, "learning_rate": 0.0003477937816628589, "loss": 0.5055, "step": 113810 }, { "epoch": 5.653124068739446, "grad_norm": 0.1142578125, "learning_rate": 0.0003477540478792093, "loss": 0.4903, "step": 113820 }, { "epoch": 5.653620741035065, "grad_norm": 0.11279296875, "learning_rate": 0.0003477143140955597, "loss": 0.5069, "step": 113830 }, { "epoch": 5.654117413330685, "grad_norm": 0.11767578125, "learning_rate": 0.0003476745803119102, "loss": 0.4916, "step": 113840 }, { "epoch": 5.654614085626304, "grad_norm": 0.138671875, "learning_rate": 0.00034763484652826067, "loss": 0.5005, "step": 113850 }, { "epoch": 5.655110757921923, "grad_norm": 0.13671875, "learning_rate": 0.00034759511274461114, "loss": 0.4995, "step": 113860 }, { "epoch": 5.655607430217542, "grad_norm": 0.1513671875, "learning_rate": 0.00034755537896096156, "loss": 0.5523, "step": 113870 }, { "epoch": 5.6561041025131615, "grad_norm": 0.12890625, "learning_rate": 0.00034751564517731203, "loss": 0.522, "step": 113880 }, { "epoch": 5.656600774808782, "grad_norm": 0.126953125, "learning_rate": 0.0003474759113936625, "loss": 0.4748, "step": 113890 }, { "epoch": 5.657097447104401, "grad_norm": 0.12109375, "learning_rate": 0.0003474361776100129, "loss": 0.5299, "step": 113900 }, { "epoch": 5.65759411940002, "grad_norm": 0.11767578125, "learning_rate": 0.00034739644382636334, "loss": 0.5318, "step": 113910 }, { "epoch": 5.658090791695639, "grad_norm": 0.130859375, "learning_rate": 0.00034735671004271386, "loss": 0.524, "step": 113920 }, { "epoch": 5.658587463991259, "grad_norm": 0.15625, "learning_rate": 0.0003473169762590643, "loss": 0.494, "step": 113930 }, { "epoch": 5.659084136286878, "grad_norm": 0.1640625, "learning_rate": 0.00034727724247541475, "loss": 0.4914, "step": 113940 }, { "epoch": 5.659580808582497, "grad_norm": 0.115234375, "learning_rate": 0.00034723750869176517, "loss": 0.4848, "step": 113950 }, { "epoch": 5.660077480878116, "grad_norm": 0.123046875, "learning_rate": 0.00034719777490811564, "loss": 0.4869, "step": 113960 }, { "epoch": 5.660574153173736, "grad_norm": 0.11669921875, "learning_rate": 0.0003471580411244661, "loss": 0.4768, "step": 113970 }, { "epoch": 5.661070825469356, "grad_norm": 0.203125, "learning_rate": 0.0003471183073408165, "loss": 0.491, "step": 113980 }, { "epoch": 5.661567497764975, "grad_norm": 0.1162109375, "learning_rate": 0.000347078573557167, "loss": 0.4945, "step": 113990 }, { "epoch": 5.662064170060594, "grad_norm": 0.11279296875, "learning_rate": 0.00034703883977351747, "loss": 0.5128, "step": 114000 }, { "epoch": 5.662560842356213, "grad_norm": 0.1337890625, "learning_rate": 0.0003469991059898679, "loss": 0.4963, "step": 114010 }, { "epoch": 5.6630575146518325, "grad_norm": 0.12890625, "learning_rate": 0.00034695937220621836, "loss": 0.5241, "step": 114020 }, { "epoch": 5.663554186947452, "grad_norm": 0.115234375, "learning_rate": 0.00034691963842256883, "loss": 0.5257, "step": 114030 }, { "epoch": 5.664050859243071, "grad_norm": 0.125, "learning_rate": 0.00034687990463891925, "loss": 0.5243, "step": 114040 }, { "epoch": 5.664547531538691, "grad_norm": 0.1181640625, "learning_rate": 0.0003468401708552697, "loss": 0.4992, "step": 114050 }, { "epoch": 5.66504420383431, "grad_norm": 0.13671875, "learning_rate": 0.00034680043707162013, "loss": 0.5164, "step": 114060 }, { "epoch": 5.6655408761299295, "grad_norm": 0.15625, "learning_rate": 0.00034676070328797066, "loss": 0.5001, "step": 114070 }, { "epoch": 5.666037548425549, "grad_norm": 0.1435546875, "learning_rate": 0.0003467209695043211, "loss": 0.5042, "step": 114080 }, { "epoch": 5.666534220721168, "grad_norm": 0.1142578125, "learning_rate": 0.0003466812357206715, "loss": 0.5113, "step": 114090 }, { "epoch": 5.667030893016787, "grad_norm": 0.12109375, "learning_rate": 0.00034664150193702197, "loss": 0.5155, "step": 114100 }, { "epoch": 5.667527565312406, "grad_norm": 0.119140625, "learning_rate": 0.00034660176815337244, "loss": 0.5023, "step": 114110 }, { "epoch": 5.6680242376080265, "grad_norm": 0.142578125, "learning_rate": 0.00034656203436972285, "loss": 0.4907, "step": 114120 }, { "epoch": 5.668520909903646, "grad_norm": 0.1318359375, "learning_rate": 0.0003465223005860733, "loss": 0.481, "step": 114130 }, { "epoch": 5.669017582199265, "grad_norm": 0.126953125, "learning_rate": 0.00034648256680242374, "loss": 0.5019, "step": 114140 }, { "epoch": 5.669514254494884, "grad_norm": 0.115234375, "learning_rate": 0.00034644283301877427, "loss": 0.4908, "step": 114150 }, { "epoch": 5.6700109267905034, "grad_norm": 0.138671875, "learning_rate": 0.0003464030992351247, "loss": 0.5105, "step": 114160 }, { "epoch": 5.670507599086123, "grad_norm": 0.171875, "learning_rate": 0.0003463633654514751, "loss": 0.5104, "step": 114170 }, { "epoch": 5.671004271381742, "grad_norm": 0.134765625, "learning_rate": 0.0003463236316678256, "loss": 0.5042, "step": 114180 }, { "epoch": 5.671500943677362, "grad_norm": 0.166015625, "learning_rate": 0.00034628389788417605, "loss": 0.5159, "step": 114190 }, { "epoch": 5.671997615972981, "grad_norm": 0.1357421875, "learning_rate": 0.00034624416410052646, "loss": 0.5134, "step": 114200 }, { "epoch": 5.6724942882686005, "grad_norm": 0.1240234375, "learning_rate": 0.00034620443031687693, "loss": 0.5087, "step": 114210 }, { "epoch": 5.67299096056422, "grad_norm": 0.12890625, "learning_rate": 0.0003461646965332274, "loss": 0.5168, "step": 114220 }, { "epoch": 5.673487632859839, "grad_norm": 0.12890625, "learning_rate": 0.0003461249627495779, "loss": 0.5074, "step": 114230 }, { "epoch": 5.673984305155458, "grad_norm": 0.1220703125, "learning_rate": 0.0003460852289659283, "loss": 0.5065, "step": 114240 }, { "epoch": 5.674480977451077, "grad_norm": 0.12060546875, "learning_rate": 0.0003460454951822787, "loss": 0.5326, "step": 114250 }, { "epoch": 5.6749776497466975, "grad_norm": 0.126953125, "learning_rate": 0.00034600576139862924, "loss": 0.4766, "step": 114260 }, { "epoch": 5.675474322042317, "grad_norm": 0.1298828125, "learning_rate": 0.00034596602761497965, "loss": 0.5074, "step": 114270 }, { "epoch": 5.675970994337936, "grad_norm": 0.126953125, "learning_rate": 0.00034592629383133007, "loss": 0.497, "step": 114280 }, { "epoch": 5.676467666633555, "grad_norm": 0.1123046875, "learning_rate": 0.00034588656004768054, "loss": 0.5005, "step": 114290 }, { "epoch": 5.676964338929174, "grad_norm": 0.138671875, "learning_rate": 0.000345846826264031, "loss": 0.4988, "step": 114300 }, { "epoch": 5.677461011224794, "grad_norm": 0.1318359375, "learning_rate": 0.0003458070924803815, "loss": 0.5226, "step": 114310 }, { "epoch": 5.677957683520413, "grad_norm": 0.11865234375, "learning_rate": 0.0003457673586967319, "loss": 0.5349, "step": 114320 }, { "epoch": 5.678454355816033, "grad_norm": 0.140625, "learning_rate": 0.0003457276249130824, "loss": 0.4942, "step": 114330 }, { "epoch": 5.678951028111652, "grad_norm": 0.142578125, "learning_rate": 0.00034568789112943284, "loss": 0.4801, "step": 114340 }, { "epoch": 5.679447700407271, "grad_norm": 0.130859375, "learning_rate": 0.00034564815734578326, "loss": 0.5183, "step": 114350 }, { "epoch": 5.679944372702891, "grad_norm": 0.1708984375, "learning_rate": 0.0003456084235621337, "loss": 0.4792, "step": 114360 }, { "epoch": 5.68044104499851, "grad_norm": 0.11328125, "learning_rate": 0.0003455686897784842, "loss": 0.5151, "step": 114370 }, { "epoch": 5.680937717294129, "grad_norm": 0.1806640625, "learning_rate": 0.0003455289559948346, "loss": 0.5321, "step": 114380 }, { "epoch": 5.681434389589748, "grad_norm": 0.18359375, "learning_rate": 0.0003454892222111851, "loss": 0.4892, "step": 114390 }, { "epoch": 5.6819310618853685, "grad_norm": 0.12158203125, "learning_rate": 0.0003454494884275355, "loss": 0.4954, "step": 114400 }, { "epoch": 5.682427734180988, "grad_norm": 0.1396484375, "learning_rate": 0.000345409754643886, "loss": 0.4942, "step": 114410 }, { "epoch": 5.682924406476607, "grad_norm": 0.14453125, "learning_rate": 0.00034537002086023645, "loss": 0.5226, "step": 114420 }, { "epoch": 5.683421078772226, "grad_norm": 0.123046875, "learning_rate": 0.00034533028707658687, "loss": 0.5203, "step": 114430 }, { "epoch": 5.683917751067845, "grad_norm": 0.1376953125, "learning_rate": 0.00034529055329293734, "loss": 0.5137, "step": 114440 }, { "epoch": 5.684414423363465, "grad_norm": 0.12060546875, "learning_rate": 0.0003452508195092878, "loss": 0.5044, "step": 114450 }, { "epoch": 5.684911095659084, "grad_norm": 0.1259765625, "learning_rate": 0.00034521108572563823, "loss": 0.5375, "step": 114460 }, { "epoch": 5.685407767954704, "grad_norm": 0.1455078125, "learning_rate": 0.0003451713519419887, "loss": 0.4996, "step": 114470 }, { "epoch": 5.685904440250323, "grad_norm": 0.11865234375, "learning_rate": 0.0003451316181583391, "loss": 0.4906, "step": 114480 }, { "epoch": 5.686401112545942, "grad_norm": 0.12890625, "learning_rate": 0.0003450918843746896, "loss": 0.5102, "step": 114490 }, { "epoch": 5.686897784841562, "grad_norm": 0.1201171875, "learning_rate": 0.00034505215059104006, "loss": 0.5323, "step": 114500 }, { "epoch": 5.687394457137181, "grad_norm": 0.1669921875, "learning_rate": 0.0003450124168073905, "loss": 0.5173, "step": 114510 }, { "epoch": 5.6878911294328, "grad_norm": 0.138671875, "learning_rate": 0.00034497268302374095, "loss": 0.5176, "step": 114520 }, { "epoch": 5.688387801728419, "grad_norm": 0.1376953125, "learning_rate": 0.0003449329492400914, "loss": 0.5059, "step": 114530 }, { "epoch": 5.688884474024039, "grad_norm": 0.1689453125, "learning_rate": 0.00034489321545644184, "loss": 0.5014, "step": 114540 }, { "epoch": 5.689381146319659, "grad_norm": 0.13671875, "learning_rate": 0.0003448534816727923, "loss": 0.51, "step": 114550 }, { "epoch": 5.689877818615278, "grad_norm": 0.1142578125, "learning_rate": 0.0003448137478891428, "loss": 0.5091, "step": 114560 }, { "epoch": 5.690374490910897, "grad_norm": 0.12890625, "learning_rate": 0.0003447740141054932, "loss": 0.5274, "step": 114570 }, { "epoch": 5.690871163206516, "grad_norm": 0.134765625, "learning_rate": 0.00034473428032184367, "loss": 0.5088, "step": 114580 }, { "epoch": 5.6913678355021355, "grad_norm": 0.134765625, "learning_rate": 0.0003446945465381941, "loss": 0.4935, "step": 114590 }, { "epoch": 5.691864507797755, "grad_norm": 0.11572265625, "learning_rate": 0.0003446548127545446, "loss": 0.527, "step": 114600 }, { "epoch": 5.692361180093375, "grad_norm": 0.12451171875, "learning_rate": 0.00034461507897089503, "loss": 0.5384, "step": 114610 }, { "epoch": 5.692857852388994, "grad_norm": 0.1279296875, "learning_rate": 0.00034457534518724545, "loss": 0.5439, "step": 114620 }, { "epoch": 5.693354524684613, "grad_norm": 0.140625, "learning_rate": 0.0003445356114035959, "loss": 0.5353, "step": 114630 }, { "epoch": 5.693851196980233, "grad_norm": 0.14453125, "learning_rate": 0.0003444958776199464, "loss": 0.4961, "step": 114640 }, { "epoch": 5.694347869275852, "grad_norm": 0.1376953125, "learning_rate": 0.0003444561438362968, "loss": 0.5086, "step": 114650 }, { "epoch": 5.694844541571471, "grad_norm": 0.1875, "learning_rate": 0.0003444164100526473, "loss": 0.5043, "step": 114660 }, { "epoch": 5.69534121386709, "grad_norm": 0.10693359375, "learning_rate": 0.00034437667626899775, "loss": 0.4958, "step": 114670 }, { "epoch": 5.6958378861627095, "grad_norm": 0.1396484375, "learning_rate": 0.0003443369424853482, "loss": 0.4841, "step": 114680 }, { "epoch": 5.696334558458329, "grad_norm": 0.12353515625, "learning_rate": 0.00034429720870169864, "loss": 0.4919, "step": 114690 }, { "epoch": 5.696831230753949, "grad_norm": 0.1298828125, "learning_rate": 0.00034425747491804905, "loss": 0.5071, "step": 114700 }, { "epoch": 5.697327903049568, "grad_norm": 0.1142578125, "learning_rate": 0.0003442177411343995, "loss": 0.4982, "step": 114710 }, { "epoch": 5.697824575345187, "grad_norm": 0.1259765625, "learning_rate": 0.00034417800735075, "loss": 0.533, "step": 114720 }, { "epoch": 5.6983212476408065, "grad_norm": 0.11474609375, "learning_rate": 0.0003441382735671004, "loss": 0.4986, "step": 114730 }, { "epoch": 5.698817919936426, "grad_norm": 0.12890625, "learning_rate": 0.0003440985397834509, "loss": 0.5243, "step": 114740 }, { "epoch": 5.699314592232045, "grad_norm": 0.1279296875, "learning_rate": 0.00034405880599980136, "loss": 0.503, "step": 114750 }, { "epoch": 5.699811264527664, "grad_norm": 0.1357421875, "learning_rate": 0.00034401907221615183, "loss": 0.494, "step": 114760 }, { "epoch": 5.700307936823284, "grad_norm": 0.12890625, "learning_rate": 0.00034397933843250225, "loss": 0.5262, "step": 114770 }, { "epoch": 5.7008046091189035, "grad_norm": 0.1328125, "learning_rate": 0.00034393960464885266, "loss": 0.5391, "step": 114780 }, { "epoch": 5.701301281414523, "grad_norm": 0.1328125, "learning_rate": 0.0003438998708652032, "loss": 0.5111, "step": 114790 }, { "epoch": 5.701797953710142, "grad_norm": 0.12353515625, "learning_rate": 0.0003438601370815536, "loss": 0.4906, "step": 114800 }, { "epoch": 5.702294626005761, "grad_norm": 0.12109375, "learning_rate": 0.0003438204032979041, "loss": 0.4968, "step": 114810 }, { "epoch": 5.70279129830138, "grad_norm": 0.12353515625, "learning_rate": 0.0003437806695142545, "loss": 0.4965, "step": 114820 }, { "epoch": 5.703287970597, "grad_norm": 0.1328125, "learning_rate": 0.00034374093573060497, "loss": 0.5153, "step": 114830 }, { "epoch": 5.70378464289262, "grad_norm": 0.12060546875, "learning_rate": 0.00034370120194695544, "loss": 0.5069, "step": 114840 }, { "epoch": 5.704281315188239, "grad_norm": 0.1259765625, "learning_rate": 0.00034366146816330585, "loss": 0.4992, "step": 114850 }, { "epoch": 5.704777987483858, "grad_norm": 0.12451171875, "learning_rate": 0.0003436217343796563, "loss": 0.4968, "step": 114860 }, { "epoch": 5.7052746597794775, "grad_norm": 0.1328125, "learning_rate": 0.0003435820005960068, "loss": 0.482, "step": 114870 }, { "epoch": 5.705771332075097, "grad_norm": 0.115234375, "learning_rate": 0.0003435422668123572, "loss": 0.4977, "step": 114880 }, { "epoch": 5.706268004370716, "grad_norm": 0.1328125, "learning_rate": 0.0003435025330287077, "loss": 0.5002, "step": 114890 }, { "epoch": 5.706764676666335, "grad_norm": 0.1572265625, "learning_rate": 0.00034346279924505816, "loss": 0.5028, "step": 114900 }, { "epoch": 5.707261348961955, "grad_norm": 0.1279296875, "learning_rate": 0.0003434230654614086, "loss": 0.522, "step": 114910 }, { "epoch": 5.7077580212575745, "grad_norm": 0.1728515625, "learning_rate": 0.00034338333167775904, "loss": 0.4802, "step": 114920 }, { "epoch": 5.708254693553194, "grad_norm": 0.158203125, "learning_rate": 0.00034334359789410946, "loss": 0.4978, "step": 114930 }, { "epoch": 5.708751365848813, "grad_norm": 0.1416015625, "learning_rate": 0.00034330386411045993, "loss": 0.5344, "step": 114940 }, { "epoch": 5.709248038144432, "grad_norm": 0.103515625, "learning_rate": 0.0003432641303268104, "loss": 0.5075, "step": 114950 }, { "epoch": 5.709744710440051, "grad_norm": 0.1357421875, "learning_rate": 0.0003432243965431608, "loss": 0.4632, "step": 114960 }, { "epoch": 5.710241382735671, "grad_norm": 0.11767578125, "learning_rate": 0.0003431846627595113, "loss": 0.495, "step": 114970 }, { "epoch": 5.710738055031291, "grad_norm": 0.140625, "learning_rate": 0.00034314492897586176, "loss": 0.5062, "step": 114980 }, { "epoch": 5.71123472732691, "grad_norm": 0.1279296875, "learning_rate": 0.0003431051951922122, "loss": 0.5016, "step": 114990 }, { "epoch": 5.711731399622529, "grad_norm": 0.11376953125, "learning_rate": 0.00034306546140856265, "loss": 0.4985, "step": 115000 }, { "epoch": 5.712228071918148, "grad_norm": 0.126953125, "learning_rate": 0.00034302572762491307, "loss": 0.5135, "step": 115010 }, { "epoch": 5.712724744213768, "grad_norm": 0.12890625, "learning_rate": 0.00034298599384126354, "loss": 0.5065, "step": 115020 }, { "epoch": 5.713221416509387, "grad_norm": 0.1611328125, "learning_rate": 0.000342946260057614, "loss": 0.4908, "step": 115030 }, { "epoch": 5.713718088805006, "grad_norm": 0.11962890625, "learning_rate": 0.00034290652627396443, "loss": 0.5072, "step": 115040 }, { "epoch": 5.714214761100626, "grad_norm": 0.1259765625, "learning_rate": 0.0003428667924903149, "loss": 0.4969, "step": 115050 }, { "epoch": 5.714711433396245, "grad_norm": 0.1318359375, "learning_rate": 0.00034282705870666537, "loss": 0.4945, "step": 115060 }, { "epoch": 5.715208105691865, "grad_norm": 0.11962890625, "learning_rate": 0.0003427873249230158, "loss": 0.502, "step": 115070 }, { "epoch": 5.715704777987484, "grad_norm": 0.1337890625, "learning_rate": 0.00034274759113936626, "loss": 0.5, "step": 115080 }, { "epoch": 5.716201450283103, "grad_norm": 0.142578125, "learning_rate": 0.00034270785735571673, "loss": 0.49, "step": 115090 }, { "epoch": 5.716698122578722, "grad_norm": 0.11279296875, "learning_rate": 0.00034266812357206715, "loss": 0.4713, "step": 115100 }, { "epoch": 5.717194794874342, "grad_norm": 0.1494140625, "learning_rate": 0.0003426283897884176, "loss": 0.4878, "step": 115110 }, { "epoch": 5.717691467169962, "grad_norm": 0.12353515625, "learning_rate": 0.00034258865600476804, "loss": 0.4958, "step": 115120 }, { "epoch": 5.718188139465581, "grad_norm": 0.11328125, "learning_rate": 0.00034254892222111856, "loss": 0.4822, "step": 115130 }, { "epoch": 5.7186848117612, "grad_norm": 0.1552734375, "learning_rate": 0.000342509188437469, "loss": 0.5026, "step": 115140 }, { "epoch": 5.719181484056819, "grad_norm": 0.125, "learning_rate": 0.0003424694546538194, "loss": 0.5005, "step": 115150 }, { "epoch": 5.719678156352439, "grad_norm": 0.166015625, "learning_rate": 0.00034242972087016987, "loss": 0.5175, "step": 115160 }, { "epoch": 5.720174828648058, "grad_norm": 0.1484375, "learning_rate": 0.00034238998708652034, "loss": 0.4952, "step": 115170 }, { "epoch": 5.720671500943677, "grad_norm": 0.1357421875, "learning_rate": 0.00034235025330287076, "loss": 0.4982, "step": 115180 }, { "epoch": 5.721168173239297, "grad_norm": 0.10888671875, "learning_rate": 0.00034231051951922123, "loss": 0.4914, "step": 115190 }, { "epoch": 5.721664845534916, "grad_norm": 0.11474609375, "learning_rate": 0.0003422707857355717, "loss": 0.5042, "step": 115200 }, { "epoch": 5.722161517830536, "grad_norm": 0.1171875, "learning_rate": 0.00034223105195192217, "loss": 0.503, "step": 115210 }, { "epoch": 5.722658190126155, "grad_norm": 0.1455078125, "learning_rate": 0.0003421913181682726, "loss": 0.4848, "step": 115220 }, { "epoch": 5.723154862421774, "grad_norm": 0.16796875, "learning_rate": 0.000342151584384623, "loss": 0.4676, "step": 115230 }, { "epoch": 5.723651534717393, "grad_norm": 0.11083984375, "learning_rate": 0.00034211185060097353, "loss": 0.4628, "step": 115240 }, { "epoch": 5.7241482070130125, "grad_norm": 0.115234375, "learning_rate": 0.00034207211681732395, "loss": 0.5114, "step": 115250 }, { "epoch": 5.724644879308633, "grad_norm": 0.1357421875, "learning_rate": 0.0003420323830336744, "loss": 0.5178, "step": 115260 }, { "epoch": 5.725141551604252, "grad_norm": 0.138671875, "learning_rate": 0.00034199264925002484, "loss": 0.49, "step": 115270 }, { "epoch": 5.725638223899871, "grad_norm": 0.11328125, "learning_rate": 0.0003419529154663753, "loss": 0.4979, "step": 115280 }, { "epoch": 5.72613489619549, "grad_norm": 0.11376953125, "learning_rate": 0.0003419131816827258, "loss": 0.52, "step": 115290 }, { "epoch": 5.7266315684911095, "grad_norm": 0.142578125, "learning_rate": 0.0003418734478990762, "loss": 0.4943, "step": 115300 }, { "epoch": 5.727128240786729, "grad_norm": 0.134765625, "learning_rate": 0.0003418337141154266, "loss": 0.5051, "step": 115310 }, { "epoch": 5.727624913082348, "grad_norm": 0.12890625, "learning_rate": 0.00034179398033177714, "loss": 0.4983, "step": 115320 }, { "epoch": 5.728121585377967, "grad_norm": 0.123046875, "learning_rate": 0.00034175424654812756, "loss": 0.5153, "step": 115330 }, { "epoch": 5.728618257673587, "grad_norm": 0.119140625, "learning_rate": 0.00034171451276447803, "loss": 0.4977, "step": 115340 }, { "epoch": 5.729114929969207, "grad_norm": 0.1298828125, "learning_rate": 0.00034167477898082845, "loss": 0.5151, "step": 115350 }, { "epoch": 5.729611602264826, "grad_norm": 0.1416015625, "learning_rate": 0.0003416350451971789, "loss": 0.4898, "step": 115360 }, { "epoch": 5.730108274560445, "grad_norm": 0.1259765625, "learning_rate": 0.0003415953114135294, "loss": 0.4805, "step": 115370 }, { "epoch": 5.730604946856064, "grad_norm": 0.1171875, "learning_rate": 0.0003415555776298798, "loss": 0.4821, "step": 115380 }, { "epoch": 5.7311016191516835, "grad_norm": 0.1220703125, "learning_rate": 0.0003415158438462303, "loss": 0.5175, "step": 115390 }, { "epoch": 5.731598291447303, "grad_norm": 0.12060546875, "learning_rate": 0.00034147611006258075, "loss": 0.5031, "step": 115400 }, { "epoch": 5.732094963742922, "grad_norm": 0.119140625, "learning_rate": 0.00034143637627893117, "loss": 0.5136, "step": 115410 }, { "epoch": 5.732591636038542, "grad_norm": 0.12255859375, "learning_rate": 0.00034139664249528164, "loss": 0.5181, "step": 115420 }, { "epoch": 5.733088308334161, "grad_norm": 0.1162109375, "learning_rate": 0.0003413569087116321, "loss": 0.4761, "step": 115430 }, { "epoch": 5.7335849806297805, "grad_norm": 0.1611328125, "learning_rate": 0.0003413171749279825, "loss": 0.513, "step": 115440 }, { "epoch": 5.7340816529254, "grad_norm": 0.12890625, "learning_rate": 0.000341277441144333, "loss": 0.4977, "step": 115450 }, { "epoch": 5.734578325221019, "grad_norm": 0.1328125, "learning_rate": 0.0003412377073606834, "loss": 0.4923, "step": 115460 }, { "epoch": 5.735074997516638, "grad_norm": 0.1328125, "learning_rate": 0.0003411979735770339, "loss": 0.4929, "step": 115470 }, { "epoch": 5.735571669812257, "grad_norm": 0.11669921875, "learning_rate": 0.00034115823979338436, "loss": 0.5056, "step": 115480 }, { "epoch": 5.7360683421078775, "grad_norm": 0.1171875, "learning_rate": 0.0003411185060097348, "loss": 0.487, "step": 115490 }, { "epoch": 5.736565014403497, "grad_norm": 0.1630859375, "learning_rate": 0.00034107877222608524, "loss": 0.483, "step": 115500 }, { "epoch": 5.737061686699116, "grad_norm": 0.12109375, "learning_rate": 0.0003410390384424357, "loss": 0.5022, "step": 115510 }, { "epoch": 5.737558358994735, "grad_norm": 0.1240234375, "learning_rate": 0.00034099930465878613, "loss": 0.4968, "step": 115520 }, { "epoch": 5.738055031290354, "grad_norm": 0.1298828125, "learning_rate": 0.0003409595708751366, "loss": 0.5115, "step": 115530 }, { "epoch": 5.738551703585974, "grad_norm": 0.12255859375, "learning_rate": 0.000340919837091487, "loss": 0.4868, "step": 115540 }, { "epoch": 5.739048375881593, "grad_norm": 0.11962890625, "learning_rate": 0.0003408801033078375, "loss": 0.5299, "step": 115550 }, { "epoch": 5.739545048177213, "grad_norm": 0.1318359375, "learning_rate": 0.00034084036952418796, "loss": 0.4985, "step": 115560 }, { "epoch": 5.740041720472832, "grad_norm": 0.12060546875, "learning_rate": 0.0003408006357405384, "loss": 0.4886, "step": 115570 }, { "epoch": 5.7405383927684515, "grad_norm": 0.1171875, "learning_rate": 0.00034076090195688885, "loss": 0.4821, "step": 115580 }, { "epoch": 5.741035065064071, "grad_norm": 0.1376953125, "learning_rate": 0.0003407211681732393, "loss": 0.5072, "step": 115590 }, { "epoch": 5.74153173735969, "grad_norm": 0.1279296875, "learning_rate": 0.00034068143438958974, "loss": 0.5099, "step": 115600 }, { "epoch": 5.742028409655309, "grad_norm": 0.140625, "learning_rate": 0.0003406417006059402, "loss": 0.5313, "step": 115610 }, { "epoch": 5.742525081950928, "grad_norm": 0.12890625, "learning_rate": 0.0003406019668222907, "loss": 0.5168, "step": 115620 }, { "epoch": 5.7430217542465485, "grad_norm": 0.1494140625, "learning_rate": 0.00034056223303864116, "loss": 0.4783, "step": 115630 }, { "epoch": 5.743518426542168, "grad_norm": 0.138671875, "learning_rate": 0.00034052249925499157, "loss": 0.5161, "step": 115640 }, { "epoch": 5.744015098837787, "grad_norm": 0.1298828125, "learning_rate": 0.000340482765471342, "loss": 0.4956, "step": 115650 }, { "epoch": 5.744511771133406, "grad_norm": 0.134765625, "learning_rate": 0.0003404430316876925, "loss": 0.4915, "step": 115660 }, { "epoch": 5.745008443429025, "grad_norm": 0.115234375, "learning_rate": 0.00034040329790404293, "loss": 0.5149, "step": 115670 }, { "epoch": 5.745505115724645, "grad_norm": 0.12353515625, "learning_rate": 0.00034036356412039335, "loss": 0.4939, "step": 115680 }, { "epoch": 5.746001788020264, "grad_norm": 0.1259765625, "learning_rate": 0.0003403238303367438, "loss": 0.4763, "step": 115690 }, { "epoch": 5.746498460315884, "grad_norm": 0.1318359375, "learning_rate": 0.0003402840965530943, "loss": 0.5225, "step": 115700 }, { "epoch": 5.746995132611503, "grad_norm": 0.1787109375, "learning_rate": 0.00034024436276944476, "loss": 0.5139, "step": 115710 }, { "epoch": 5.747491804907122, "grad_norm": 0.1298828125, "learning_rate": 0.0003402046289857952, "loss": 0.5062, "step": 115720 }, { "epoch": 5.747988477202742, "grad_norm": 0.1533203125, "learning_rate": 0.00034016489520214565, "loss": 0.5167, "step": 115730 }, { "epoch": 5.748485149498361, "grad_norm": 0.1396484375, "learning_rate": 0.0003401251614184961, "loss": 0.5147, "step": 115740 }, { "epoch": 5.74898182179398, "grad_norm": 0.11767578125, "learning_rate": 0.00034008542763484654, "loss": 0.5476, "step": 115750 }, { "epoch": 5.749478494089599, "grad_norm": 0.1279296875, "learning_rate": 0.00034004569385119696, "loss": 0.5189, "step": 115760 }, { "epoch": 5.749975166385219, "grad_norm": 0.1298828125, "learning_rate": 0.0003400059600675475, "loss": 0.4933, "step": 115770 }, { "epoch": 5.750471838680839, "grad_norm": 0.119140625, "learning_rate": 0.0003399662262838979, "loss": 0.5053, "step": 115780 }, { "epoch": 5.750968510976458, "grad_norm": 0.1318359375, "learning_rate": 0.00033992649250024837, "loss": 0.5216, "step": 115790 }, { "epoch": 5.751465183272077, "grad_norm": 0.12353515625, "learning_rate": 0.0003398867587165988, "loss": 0.4888, "step": 115800 }, { "epoch": 5.751961855567696, "grad_norm": 0.1318359375, "learning_rate": 0.00033984702493294926, "loss": 0.5226, "step": 115810 }, { "epoch": 5.752458527863316, "grad_norm": 0.1240234375, "learning_rate": 0.00033980729114929973, "loss": 0.5049, "step": 115820 }, { "epoch": 5.752955200158935, "grad_norm": 0.1396484375, "learning_rate": 0.00033976755736565015, "loss": 0.5168, "step": 115830 }, { "epoch": 5.753451872454555, "grad_norm": 0.134765625, "learning_rate": 0.00033972782358200057, "loss": 0.4807, "step": 115840 }, { "epoch": 5.753948544750174, "grad_norm": 0.1162109375, "learning_rate": 0.0003396880897983511, "loss": 0.492, "step": 115850 }, { "epoch": 5.754445217045793, "grad_norm": 0.1279296875, "learning_rate": 0.0003396483560147015, "loss": 0.4788, "step": 115860 }, { "epoch": 5.754941889341413, "grad_norm": 0.12451171875, "learning_rate": 0.000339608622231052, "loss": 0.507, "step": 115870 }, { "epoch": 5.755438561637032, "grad_norm": 0.1396484375, "learning_rate": 0.0003395688884474024, "loss": 0.5107, "step": 115880 }, { "epoch": 5.755935233932651, "grad_norm": 0.142578125, "learning_rate": 0.00033952915466375287, "loss": 0.4984, "step": 115890 }, { "epoch": 5.75643190622827, "grad_norm": 0.12109375, "learning_rate": 0.00033948942088010334, "loss": 0.5008, "step": 115900 }, { "epoch": 5.75692857852389, "grad_norm": 0.126953125, "learning_rate": 0.00033944968709645376, "loss": 0.508, "step": 115910 }, { "epoch": 5.75742525081951, "grad_norm": 0.181640625, "learning_rate": 0.00033940995331280423, "loss": 0.5232, "step": 115920 }, { "epoch": 5.757921923115129, "grad_norm": 0.126953125, "learning_rate": 0.0003393702195291547, "loss": 0.4951, "step": 115930 }, { "epoch": 5.758418595410748, "grad_norm": 0.11572265625, "learning_rate": 0.0003393304857455051, "loss": 0.5019, "step": 115940 }, { "epoch": 5.758915267706367, "grad_norm": 0.115234375, "learning_rate": 0.0003392907519618556, "loss": 0.5148, "step": 115950 }, { "epoch": 5.7594119400019865, "grad_norm": 0.134765625, "learning_rate": 0.00033925101817820606, "loss": 0.5035, "step": 115960 }, { "epoch": 5.759908612297606, "grad_norm": 0.126953125, "learning_rate": 0.0003392112843945565, "loss": 0.4882, "step": 115970 }, { "epoch": 5.760405284593226, "grad_norm": 0.150390625, "learning_rate": 0.00033917155061090695, "loss": 0.4819, "step": 115980 }, { "epoch": 5.760901956888845, "grad_norm": 0.126953125, "learning_rate": 0.00033913181682725737, "loss": 0.5107, "step": 115990 }, { "epoch": 5.761398629184464, "grad_norm": 0.1259765625, "learning_rate": 0.00033909208304360784, "loss": 0.4972, "step": 116000 }, { "epoch": 5.7618953014800836, "grad_norm": 0.11572265625, "learning_rate": 0.0003390523492599583, "loss": 0.5162, "step": 116010 }, { "epoch": 5.762391973775703, "grad_norm": 0.12255859375, "learning_rate": 0.0003390126154763087, "loss": 0.5025, "step": 116020 }, { "epoch": 5.762888646071322, "grad_norm": 0.1318359375, "learning_rate": 0.0003389728816926592, "loss": 0.5068, "step": 116030 }, { "epoch": 5.763385318366941, "grad_norm": 0.11572265625, "learning_rate": 0.00033893314790900967, "loss": 0.4853, "step": 116040 }, { "epoch": 5.7638819906625605, "grad_norm": 0.11572265625, "learning_rate": 0.0003388934141253601, "loss": 0.5271, "step": 116050 }, { "epoch": 5.764378662958181, "grad_norm": 0.11376953125, "learning_rate": 0.00033885368034171056, "loss": 0.4749, "step": 116060 }, { "epoch": 5.7648753352538, "grad_norm": 0.10888671875, "learning_rate": 0.00033881394655806103, "loss": 0.486, "step": 116070 }, { "epoch": 5.765372007549419, "grad_norm": 0.1455078125, "learning_rate": 0.0003387742127744115, "loss": 0.4745, "step": 116080 }, { "epoch": 5.765868679845038, "grad_norm": 0.11328125, "learning_rate": 0.0003387344789907619, "loss": 0.4979, "step": 116090 }, { "epoch": 5.7663653521406575, "grad_norm": 0.109375, "learning_rate": 0.00033869474520711233, "loss": 0.4611, "step": 116100 }, { "epoch": 5.766862024436277, "grad_norm": 0.11669921875, "learning_rate": 0.0003386550114234628, "loss": 0.5248, "step": 116110 }, { "epoch": 5.767358696731896, "grad_norm": 0.1318359375, "learning_rate": 0.0003386152776398133, "loss": 0.5053, "step": 116120 }, { "epoch": 5.767855369027515, "grad_norm": 0.11376953125, "learning_rate": 0.0003385755438561637, "loss": 0.5074, "step": 116130 }, { "epoch": 5.768352041323135, "grad_norm": 0.11376953125, "learning_rate": 0.00033853581007251416, "loss": 0.5085, "step": 116140 }, { "epoch": 5.7688487136187545, "grad_norm": 0.1328125, "learning_rate": 0.00033849607628886464, "loss": 0.5461, "step": 116150 }, { "epoch": 5.769345385914374, "grad_norm": 0.126953125, "learning_rate": 0.0003384563425052151, "loss": 0.5136, "step": 116160 }, { "epoch": 5.769842058209993, "grad_norm": 0.13671875, "learning_rate": 0.0003384166087215655, "loss": 0.5117, "step": 116170 }, { "epoch": 5.770338730505612, "grad_norm": 0.11962890625, "learning_rate": 0.00033837687493791594, "loss": 0.5246, "step": 116180 }, { "epoch": 5.770835402801231, "grad_norm": 0.11474609375, "learning_rate": 0.00033833714115426647, "loss": 0.4963, "step": 116190 }, { "epoch": 5.771332075096851, "grad_norm": 0.1171875, "learning_rate": 0.0003382974073706169, "loss": 0.5181, "step": 116200 }, { "epoch": 5.771828747392471, "grad_norm": 0.10888671875, "learning_rate": 0.0003382576735869673, "loss": 0.4944, "step": 116210 }, { "epoch": 5.77232541968809, "grad_norm": 0.1416015625, "learning_rate": 0.0003382179398033178, "loss": 0.525, "step": 116220 }, { "epoch": 5.772822091983709, "grad_norm": 0.1171875, "learning_rate": 0.00033817820601966824, "loss": 0.4985, "step": 116230 }, { "epoch": 5.773318764279328, "grad_norm": 0.11962890625, "learning_rate": 0.0003381384722360187, "loss": 0.4936, "step": 116240 }, { "epoch": 5.773815436574948, "grad_norm": 0.154296875, "learning_rate": 0.00033809873845236913, "loss": 0.498, "step": 116250 }, { "epoch": 5.774312108870567, "grad_norm": 0.14453125, "learning_rate": 0.0003380590046687196, "loss": 0.5084, "step": 116260 }, { "epoch": 5.774808781166186, "grad_norm": 0.1220703125, "learning_rate": 0.0003380192708850701, "loss": 0.4746, "step": 116270 }, { "epoch": 5.775305453461806, "grad_norm": 0.12060546875, "learning_rate": 0.0003379795371014205, "loss": 0.5184, "step": 116280 }, { "epoch": 5.7758021257574255, "grad_norm": 0.146484375, "learning_rate": 0.0003379398033177709, "loss": 0.5068, "step": 116290 }, { "epoch": 5.776298798053045, "grad_norm": 0.1142578125, "learning_rate": 0.00033790006953412144, "loss": 0.5128, "step": 116300 }, { "epoch": 5.776795470348664, "grad_norm": 0.1494140625, "learning_rate": 0.00033786033575047185, "loss": 0.4971, "step": 116310 }, { "epoch": 5.777292142644283, "grad_norm": 0.12451171875, "learning_rate": 0.0003378206019668223, "loss": 0.5134, "step": 116320 }, { "epoch": 5.777788814939902, "grad_norm": 0.12353515625, "learning_rate": 0.00033778086818317274, "loss": 0.4887, "step": 116330 }, { "epoch": 5.778285487235522, "grad_norm": 0.12890625, "learning_rate": 0.0003377411343995232, "loss": 0.4808, "step": 116340 }, { "epoch": 5.778782159531142, "grad_norm": 0.1298828125, "learning_rate": 0.0003377014006158737, "loss": 0.5144, "step": 116350 }, { "epoch": 5.779278831826761, "grad_norm": 0.1201171875, "learning_rate": 0.0003376616668322241, "loss": 0.4807, "step": 116360 }, { "epoch": 5.77977550412238, "grad_norm": 0.142578125, "learning_rate": 0.00033762193304857457, "loss": 0.4983, "step": 116370 }, { "epoch": 5.780272176417999, "grad_norm": 0.130859375, "learning_rate": 0.00033758219926492504, "loss": 0.4903, "step": 116380 }, { "epoch": 5.780768848713619, "grad_norm": 0.1435546875, "learning_rate": 0.00033754246548127546, "loss": 0.5133, "step": 116390 }, { "epoch": 5.781265521009238, "grad_norm": 0.119140625, "learning_rate": 0.00033750273169762593, "loss": 0.5031, "step": 116400 }, { "epoch": 5.781762193304857, "grad_norm": 0.1328125, "learning_rate": 0.00033746299791397635, "loss": 0.4948, "step": 116410 }, { "epoch": 5.782258865600477, "grad_norm": 0.1259765625, "learning_rate": 0.0003374232641303268, "loss": 0.4974, "step": 116420 }, { "epoch": 5.782755537896096, "grad_norm": 0.130859375, "learning_rate": 0.0003373835303466773, "loss": 0.5041, "step": 116430 }, { "epoch": 5.783252210191716, "grad_norm": 0.1494140625, "learning_rate": 0.0003373437965630277, "loss": 0.5212, "step": 116440 }, { "epoch": 5.783748882487335, "grad_norm": 0.125, "learning_rate": 0.0003373040627793782, "loss": 0.4982, "step": 116450 }, { "epoch": 5.784245554782954, "grad_norm": 0.166015625, "learning_rate": 0.00033726432899572865, "loss": 0.5074, "step": 116460 }, { "epoch": 5.784742227078573, "grad_norm": 0.140625, "learning_rate": 0.00033722459521207907, "loss": 0.5313, "step": 116470 }, { "epoch": 5.785238899374193, "grad_norm": 0.123046875, "learning_rate": 0.00033718486142842954, "loss": 0.521, "step": 116480 }, { "epoch": 5.785735571669813, "grad_norm": 0.1640625, "learning_rate": 0.00033714512764478, "loss": 0.5085, "step": 116490 }, { "epoch": 5.786232243965432, "grad_norm": 0.1328125, "learning_rate": 0.00033710539386113043, "loss": 0.5141, "step": 116500 }, { "epoch": 5.786728916261051, "grad_norm": 0.12890625, "learning_rate": 0.0003370656600774809, "loss": 0.5, "step": 116510 }, { "epoch": 5.78722558855667, "grad_norm": 0.1259765625, "learning_rate": 0.0003370259262938313, "loss": 0.5004, "step": 116520 }, { "epoch": 5.78772226085229, "grad_norm": 0.146484375, "learning_rate": 0.00033698619251018184, "loss": 0.4756, "step": 116530 }, { "epoch": 5.788218933147909, "grad_norm": 0.1259765625, "learning_rate": 0.00033694645872653226, "loss": 0.4956, "step": 116540 }, { "epoch": 5.788715605443528, "grad_norm": 0.1484375, "learning_rate": 0.0003369067249428827, "loss": 0.4909, "step": 116550 }, { "epoch": 5.789212277739148, "grad_norm": 0.115234375, "learning_rate": 0.00033686699115923315, "loss": 0.5151, "step": 116560 }, { "epoch": 5.789708950034767, "grad_norm": 0.1298828125, "learning_rate": 0.0003368272573755836, "loss": 0.5047, "step": 116570 }, { "epoch": 5.790205622330387, "grad_norm": 0.1376953125, "learning_rate": 0.00033678752359193404, "loss": 0.5064, "step": 116580 }, { "epoch": 5.790702294626006, "grad_norm": 0.1318359375, "learning_rate": 0.0003367477898082845, "loss": 0.5127, "step": 116590 }, { "epoch": 5.791198966921625, "grad_norm": 0.146484375, "learning_rate": 0.000336708056024635, "loss": 0.5037, "step": 116600 }, { "epoch": 5.791695639217244, "grad_norm": 0.1591796875, "learning_rate": 0.00033666832224098545, "loss": 0.5015, "step": 116610 }, { "epoch": 5.7921923115128635, "grad_norm": 0.126953125, "learning_rate": 0.00033662858845733587, "loss": 0.524, "step": 116620 }, { "epoch": 5.792688983808484, "grad_norm": 0.1376953125, "learning_rate": 0.0003365888546736863, "loss": 0.4881, "step": 116630 }, { "epoch": 5.793185656104103, "grad_norm": 0.125, "learning_rate": 0.0003365491208900368, "loss": 0.5178, "step": 116640 }, { "epoch": 5.793682328399722, "grad_norm": 0.1171875, "learning_rate": 0.00033650938710638723, "loss": 0.4994, "step": 116650 }, { "epoch": 5.794179000695341, "grad_norm": 0.1162109375, "learning_rate": 0.00033646965332273765, "loss": 0.4946, "step": 116660 }, { "epoch": 5.7946756729909605, "grad_norm": 0.134765625, "learning_rate": 0.0003364299195390881, "loss": 0.4832, "step": 116670 }, { "epoch": 5.79517234528658, "grad_norm": 0.12109375, "learning_rate": 0.0003363901857554386, "loss": 0.5086, "step": 116680 }, { "epoch": 5.795669017582199, "grad_norm": 0.12890625, "learning_rate": 0.00033635045197178906, "loss": 0.5078, "step": 116690 }, { "epoch": 5.796165689877819, "grad_norm": 0.11474609375, "learning_rate": 0.0003363107181881395, "loss": 0.5327, "step": 116700 }, { "epoch": 5.796662362173438, "grad_norm": 0.1142578125, "learning_rate": 0.0003362709844044899, "loss": 0.4991, "step": 116710 }, { "epoch": 5.797159034469058, "grad_norm": 0.1845703125, "learning_rate": 0.0003362312506208404, "loss": 0.4685, "step": 116720 }, { "epoch": 5.797655706764677, "grad_norm": 0.12109375, "learning_rate": 0.00033619151683719084, "loss": 0.5092, "step": 116730 }, { "epoch": 5.798152379060296, "grad_norm": 0.1279296875, "learning_rate": 0.00033615178305354125, "loss": 0.4894, "step": 116740 }, { "epoch": 5.798649051355915, "grad_norm": 0.107421875, "learning_rate": 0.0003361120492698917, "loss": 0.4927, "step": 116750 }, { "epoch": 5.7991457236515345, "grad_norm": 0.1318359375, "learning_rate": 0.0003360723154862422, "loss": 0.5244, "step": 116760 }, { "epoch": 5.799642395947154, "grad_norm": 0.11572265625, "learning_rate": 0.00033603258170259267, "loss": 0.5282, "step": 116770 }, { "epoch": 5.800139068242773, "grad_norm": 0.1396484375, "learning_rate": 0.0003359928479189431, "loss": 0.5163, "step": 116780 }, { "epoch": 5.800635740538393, "grad_norm": 0.138671875, "learning_rate": 0.00033595311413529356, "loss": 0.5125, "step": 116790 }, { "epoch": 5.801132412834012, "grad_norm": 0.1435546875, "learning_rate": 0.00033591338035164403, "loss": 0.5156, "step": 116800 }, { "epoch": 5.8016290851296315, "grad_norm": 0.17578125, "learning_rate": 0.00033587364656799444, "loss": 0.4645, "step": 116810 }, { "epoch": 5.802125757425251, "grad_norm": 0.1357421875, "learning_rate": 0.00033583391278434486, "loss": 0.4997, "step": 116820 }, { "epoch": 5.80262242972087, "grad_norm": 0.1298828125, "learning_rate": 0.0003357941790006954, "loss": 0.5129, "step": 116830 }, { "epoch": 5.803119102016489, "grad_norm": 0.11328125, "learning_rate": 0.0003357544452170458, "loss": 0.4992, "step": 116840 }, { "epoch": 5.803615774312108, "grad_norm": 0.150390625, "learning_rate": 0.0003357147114333963, "loss": 0.5252, "step": 116850 }, { "epoch": 5.8041124466077285, "grad_norm": 0.12353515625, "learning_rate": 0.0003356749776497467, "loss": 0.5282, "step": 116860 }, { "epoch": 5.804609118903348, "grad_norm": 0.12109375, "learning_rate": 0.00033563524386609716, "loss": 0.4747, "step": 116870 }, { "epoch": 5.805105791198967, "grad_norm": 0.130859375, "learning_rate": 0.00033559551008244764, "loss": 0.4811, "step": 116880 }, { "epoch": 5.805602463494586, "grad_norm": 0.1513671875, "learning_rate": 0.00033555577629879805, "loss": 0.4758, "step": 116890 }, { "epoch": 5.806099135790205, "grad_norm": 0.142578125, "learning_rate": 0.0003355160425151485, "loss": 0.5076, "step": 116900 }, { "epoch": 5.806595808085825, "grad_norm": 0.1279296875, "learning_rate": 0.000335476308731499, "loss": 0.4745, "step": 116910 }, { "epoch": 5.807092480381444, "grad_norm": 0.1162109375, "learning_rate": 0.0003354365749478494, "loss": 0.5009, "step": 116920 }, { "epoch": 5.807589152677064, "grad_norm": 0.1318359375, "learning_rate": 0.0003353968411641999, "loss": 0.5389, "step": 116930 }, { "epoch": 5.808085824972683, "grad_norm": 0.12353515625, "learning_rate": 0.00033535710738055036, "loss": 0.5034, "step": 116940 }, { "epoch": 5.8085824972683024, "grad_norm": 0.1337890625, "learning_rate": 0.00033531737359690077, "loss": 0.5461, "step": 116950 }, { "epoch": 5.809079169563922, "grad_norm": 0.125, "learning_rate": 0.00033527763981325124, "loss": 0.4941, "step": 116960 }, { "epoch": 5.809575841859541, "grad_norm": 0.1083984375, "learning_rate": 0.00033523790602960166, "loss": 0.5037, "step": 116970 }, { "epoch": 5.81007251415516, "grad_norm": 0.1142578125, "learning_rate": 0.00033519817224595213, "loss": 0.5012, "step": 116980 }, { "epoch": 5.810569186450779, "grad_norm": 0.11962890625, "learning_rate": 0.0003351584384623026, "loss": 0.4791, "step": 116990 }, { "epoch": 5.8110658587463995, "grad_norm": 0.1259765625, "learning_rate": 0.000335118704678653, "loss": 0.49, "step": 117000 }, { "epoch": 5.811562531042019, "grad_norm": 0.1435546875, "learning_rate": 0.0003350789708950035, "loss": 0.4665, "step": 117010 }, { "epoch": 5.812059203337638, "grad_norm": 0.1201171875, "learning_rate": 0.00033503923711135396, "loss": 0.5215, "step": 117020 }, { "epoch": 5.812555875633257, "grad_norm": 0.1533203125, "learning_rate": 0.0003349995033277044, "loss": 0.5096, "step": 117030 }, { "epoch": 5.813052547928876, "grad_norm": 0.126953125, "learning_rate": 0.00033495976954405485, "loss": 0.4757, "step": 117040 }, { "epoch": 5.813549220224496, "grad_norm": 0.1103515625, "learning_rate": 0.00033492003576040527, "loss": 0.4872, "step": 117050 }, { "epoch": 5.814045892520115, "grad_norm": 0.1171875, "learning_rate": 0.0003348803019767558, "loss": 0.4954, "step": 117060 }, { "epoch": 5.814542564815735, "grad_norm": 0.1455078125, "learning_rate": 0.0003348405681931062, "loss": 0.5031, "step": 117070 }, { "epoch": 5.815039237111354, "grad_norm": 0.12158203125, "learning_rate": 0.00033480083440945663, "loss": 0.5036, "step": 117080 }, { "epoch": 5.815535909406973, "grad_norm": 0.1201171875, "learning_rate": 0.0003347611006258071, "loss": 0.4804, "step": 117090 }, { "epoch": 5.816032581702593, "grad_norm": 0.1103515625, "learning_rate": 0.00033472136684215757, "loss": 0.5143, "step": 117100 }, { "epoch": 5.816529253998212, "grad_norm": 0.1357421875, "learning_rate": 0.000334681633058508, "loss": 0.5067, "step": 117110 }, { "epoch": 5.817025926293831, "grad_norm": 0.119140625, "learning_rate": 0.00033464189927485846, "loss": 0.5047, "step": 117120 }, { "epoch": 5.81752259858945, "grad_norm": 0.1416015625, "learning_rate": 0.00033460216549120893, "loss": 0.5313, "step": 117130 }, { "epoch": 5.81801927088507, "grad_norm": 0.1318359375, "learning_rate": 0.0003345624317075594, "loss": 0.5599, "step": 117140 }, { "epoch": 5.81851594318069, "grad_norm": 0.150390625, "learning_rate": 0.0003345226979239098, "loss": 0.4944, "step": 117150 }, { "epoch": 5.819012615476309, "grad_norm": 0.119140625, "learning_rate": 0.00033448296414026024, "loss": 0.5168, "step": 117160 }, { "epoch": 5.819509287771928, "grad_norm": 0.1162109375, "learning_rate": 0.00033444323035661076, "loss": 0.4926, "step": 117170 }, { "epoch": 5.820005960067547, "grad_norm": 0.111328125, "learning_rate": 0.0003344034965729612, "loss": 0.4978, "step": 117180 }, { "epoch": 5.820502632363167, "grad_norm": 0.1767578125, "learning_rate": 0.0003343637627893116, "loss": 0.5229, "step": 117190 }, { "epoch": 5.820999304658786, "grad_norm": 0.203125, "learning_rate": 0.00033432402900566207, "loss": 0.5083, "step": 117200 }, { "epoch": 5.821495976954406, "grad_norm": 0.126953125, "learning_rate": 0.00033428429522201254, "loss": 0.5103, "step": 117210 }, { "epoch": 5.821992649250025, "grad_norm": 0.119140625, "learning_rate": 0.000334244561438363, "loss": 0.508, "step": 117220 }, { "epoch": 5.822489321545644, "grad_norm": 0.111328125, "learning_rate": 0.00033420482765471343, "loss": 0.4983, "step": 117230 }, { "epoch": 5.822985993841264, "grad_norm": 0.115234375, "learning_rate": 0.00033416509387106385, "loss": 0.5179, "step": 117240 }, { "epoch": 5.823482666136883, "grad_norm": 0.13671875, "learning_rate": 0.00033412536008741437, "loss": 0.5201, "step": 117250 }, { "epoch": 5.823979338432502, "grad_norm": 0.11767578125, "learning_rate": 0.0003340856263037648, "loss": 0.5031, "step": 117260 }, { "epoch": 5.824476010728121, "grad_norm": 0.140625, "learning_rate": 0.0003340458925201152, "loss": 0.5299, "step": 117270 }, { "epoch": 5.824972683023741, "grad_norm": 0.11962890625, "learning_rate": 0.0003340061587364657, "loss": 0.5158, "step": 117280 }, { "epoch": 5.825469355319361, "grad_norm": 0.109375, "learning_rate": 0.00033396642495281615, "loss": 0.4898, "step": 117290 }, { "epoch": 5.82596602761498, "grad_norm": 0.12255859375, "learning_rate": 0.0003339266911691666, "loss": 0.5246, "step": 117300 }, { "epoch": 5.826462699910599, "grad_norm": 0.1083984375, "learning_rate": 0.00033388695738551704, "loss": 0.5086, "step": 117310 }, { "epoch": 5.826959372206218, "grad_norm": 0.130859375, "learning_rate": 0.0003338472236018675, "loss": 0.4911, "step": 117320 }, { "epoch": 5.8274560445018375, "grad_norm": 0.1572265625, "learning_rate": 0.000333807489818218, "loss": 0.5211, "step": 117330 }, { "epoch": 5.827952716797457, "grad_norm": 0.12060546875, "learning_rate": 0.0003337677560345684, "loss": 0.514, "step": 117340 }, { "epoch": 5.828449389093077, "grad_norm": 0.1298828125, "learning_rate": 0.00033372802225091887, "loss": 0.49, "step": 117350 }, { "epoch": 5.828946061388696, "grad_norm": 0.1162109375, "learning_rate": 0.00033368828846726934, "loss": 0.511, "step": 117360 }, { "epoch": 5.829442733684315, "grad_norm": 0.111328125, "learning_rate": 0.00033364855468361976, "loss": 0.5215, "step": 117370 }, { "epoch": 5.8299394059799345, "grad_norm": 0.146484375, "learning_rate": 0.00033360882089997023, "loss": 0.4963, "step": 117380 }, { "epoch": 5.830436078275554, "grad_norm": 0.1298828125, "learning_rate": 0.00033356908711632064, "loss": 0.5116, "step": 117390 }, { "epoch": 5.830932750571173, "grad_norm": 0.1435546875, "learning_rate": 0.0003335293533326711, "loss": 0.4927, "step": 117400 }, { "epoch": 5.831429422866792, "grad_norm": 0.1259765625, "learning_rate": 0.0003334896195490216, "loss": 0.4983, "step": 117410 }, { "epoch": 5.831926095162412, "grad_norm": 0.1259765625, "learning_rate": 0.000333449885765372, "loss": 0.4852, "step": 117420 }, { "epoch": 5.832422767458032, "grad_norm": 0.1572265625, "learning_rate": 0.0003334101519817225, "loss": 0.5369, "step": 117430 }, { "epoch": 5.832919439753651, "grad_norm": 0.1767578125, "learning_rate": 0.00033337041819807295, "loss": 0.5075, "step": 117440 }, { "epoch": 5.83341611204927, "grad_norm": 0.12890625, "learning_rate": 0.00033333068441442336, "loss": 0.5141, "step": 117450 }, { "epoch": 5.833912784344889, "grad_norm": 0.140625, "learning_rate": 0.00033329095063077384, "loss": 0.5138, "step": 117460 }, { "epoch": 5.8344094566405085, "grad_norm": 0.1376953125, "learning_rate": 0.0003332512168471243, "loss": 0.5337, "step": 117470 }, { "epoch": 5.834906128936128, "grad_norm": 0.111328125, "learning_rate": 0.0003332114830634747, "loss": 0.5158, "step": 117480 }, { "epoch": 5.835402801231747, "grad_norm": 0.1337890625, "learning_rate": 0.0003331717492798252, "loss": 0.497, "step": 117490 }, { "epoch": 5.835899473527366, "grad_norm": 0.11669921875, "learning_rate": 0.0003331320154961756, "loss": 0.4804, "step": 117500 }, { "epoch": 5.836396145822986, "grad_norm": 0.140625, "learning_rate": 0.0003330922817125261, "loss": 0.5316, "step": 117510 }, { "epoch": 5.8368928181186055, "grad_norm": 0.1357421875, "learning_rate": 0.00033305254792887656, "loss": 0.5328, "step": 117520 }, { "epoch": 5.837389490414225, "grad_norm": 0.12353515625, "learning_rate": 0.00033301281414522697, "loss": 0.4812, "step": 117530 }, { "epoch": 5.837886162709844, "grad_norm": 0.126953125, "learning_rate": 0.00033297308036157744, "loss": 0.4885, "step": 117540 }, { "epoch": 5.838382835005463, "grad_norm": 0.142578125, "learning_rate": 0.0003329333465779279, "loss": 0.4769, "step": 117550 }, { "epoch": 5.838879507301082, "grad_norm": 0.1328125, "learning_rate": 0.00033289361279427833, "loss": 0.4961, "step": 117560 }, { "epoch": 5.839376179596702, "grad_norm": 0.12890625, "learning_rate": 0.0003328538790106288, "loss": 0.4942, "step": 117570 }, { "epoch": 5.839872851892322, "grad_norm": 0.1337890625, "learning_rate": 0.0003328141452269792, "loss": 0.5328, "step": 117580 }, { "epoch": 5.840369524187941, "grad_norm": 0.11279296875, "learning_rate": 0.00033277441144332975, "loss": 0.4926, "step": 117590 }, { "epoch": 5.84086619648356, "grad_norm": 0.12255859375, "learning_rate": 0.00033273467765968016, "loss": 0.5053, "step": 117600 }, { "epoch": 5.841362868779179, "grad_norm": 0.12255859375, "learning_rate": 0.0003326949438760306, "loss": 0.498, "step": 117610 }, { "epoch": 5.841859541074799, "grad_norm": 0.11474609375, "learning_rate": 0.00033265521009238105, "loss": 0.4987, "step": 117620 }, { "epoch": 5.842356213370418, "grad_norm": 0.1298828125, "learning_rate": 0.0003326154763087315, "loss": 0.4759, "step": 117630 }, { "epoch": 5.842852885666037, "grad_norm": 0.1181640625, "learning_rate": 0.00033257574252508194, "loss": 0.5073, "step": 117640 }, { "epoch": 5.843349557961657, "grad_norm": 0.1396484375, "learning_rate": 0.0003325360087414324, "loss": 0.4777, "step": 117650 }, { "epoch": 5.8438462302572765, "grad_norm": 0.126953125, "learning_rate": 0.0003324962749577829, "loss": 0.5161, "step": 117660 }, { "epoch": 5.844342902552896, "grad_norm": 0.1240234375, "learning_rate": 0.00033245654117413335, "loss": 0.4854, "step": 117670 }, { "epoch": 5.844839574848515, "grad_norm": 0.11572265625, "learning_rate": 0.00033241680739048377, "loss": 0.517, "step": 117680 }, { "epoch": 5.845336247144134, "grad_norm": 0.1259765625, "learning_rate": 0.0003323770736068342, "loss": 0.5202, "step": 117690 }, { "epoch": 5.845832919439753, "grad_norm": 0.1259765625, "learning_rate": 0.0003323373398231847, "loss": 0.4899, "step": 117700 }, { "epoch": 5.846329591735373, "grad_norm": 0.123046875, "learning_rate": 0.00033229760603953513, "loss": 0.4762, "step": 117710 }, { "epoch": 5.846826264030993, "grad_norm": 0.203125, "learning_rate": 0.00033225787225588555, "loss": 0.4953, "step": 117720 }, { "epoch": 5.847322936326612, "grad_norm": 0.1328125, "learning_rate": 0.000332218138472236, "loss": 0.4721, "step": 117730 }, { "epoch": 5.847819608622231, "grad_norm": 0.1435546875, "learning_rate": 0.0003321784046885865, "loss": 0.503, "step": 117740 }, { "epoch": 5.84831628091785, "grad_norm": 0.126953125, "learning_rate": 0.00033213867090493696, "loss": 0.4832, "step": 117750 }, { "epoch": 5.84881295321347, "grad_norm": 0.11865234375, "learning_rate": 0.0003320989371212874, "loss": 0.4994, "step": 117760 }, { "epoch": 5.849309625509089, "grad_norm": 0.119140625, "learning_rate": 0.00033205920333763785, "loss": 0.4854, "step": 117770 }, { "epoch": 5.849806297804708, "grad_norm": 0.12255859375, "learning_rate": 0.0003320194695539883, "loss": 0.4945, "step": 117780 }, { "epoch": 5.850302970100328, "grad_norm": 0.11962890625, "learning_rate": 0.00033197973577033874, "loss": 0.49, "step": 117790 }, { "epoch": 5.850799642395947, "grad_norm": 0.140625, "learning_rate": 0.0003319400019866892, "loss": 0.4972, "step": 117800 }, { "epoch": 5.851296314691567, "grad_norm": 0.1376953125, "learning_rate": 0.00033190026820303963, "loss": 0.4971, "step": 117810 }, { "epoch": 5.851792986987186, "grad_norm": 0.126953125, "learning_rate": 0.0003318605344193901, "loss": 0.5217, "step": 117820 }, { "epoch": 5.852289659282805, "grad_norm": 0.11962890625, "learning_rate": 0.00033182080063574057, "loss": 0.5168, "step": 117830 }, { "epoch": 5.852786331578424, "grad_norm": 0.1533203125, "learning_rate": 0.000331781066852091, "loss": 0.4887, "step": 117840 }, { "epoch": 5.8532830038740435, "grad_norm": 0.14453125, "learning_rate": 0.00033174133306844146, "loss": 0.4975, "step": 117850 }, { "epoch": 5.853779676169664, "grad_norm": 0.12890625, "learning_rate": 0.00033170159928479193, "loss": 0.5065, "step": 117860 }, { "epoch": 5.854276348465283, "grad_norm": 0.10888671875, "learning_rate": 0.00033166186550114235, "loss": 0.4773, "step": 117870 }, { "epoch": 5.854773020760902, "grad_norm": 0.201171875, "learning_rate": 0.0003316221317174928, "loss": 0.507, "step": 117880 }, { "epoch": 5.855269693056521, "grad_norm": 0.1416015625, "learning_rate": 0.0003315823979338433, "loss": 0.4938, "step": 117890 }, { "epoch": 5.855766365352141, "grad_norm": 0.1708984375, "learning_rate": 0.0003315426641501937, "loss": 0.5063, "step": 117900 }, { "epoch": 5.85626303764776, "grad_norm": 0.1103515625, "learning_rate": 0.0003315029303665442, "loss": 0.5022, "step": 117910 }, { "epoch": 5.856759709943379, "grad_norm": 0.140625, "learning_rate": 0.0003314631965828946, "loss": 0.5145, "step": 117920 }, { "epoch": 5.857256382238999, "grad_norm": 0.1572265625, "learning_rate": 0.00033142346279924507, "loss": 0.5042, "step": 117930 }, { "epoch": 5.857753054534618, "grad_norm": 0.1357421875, "learning_rate": 0.00033138372901559554, "loss": 0.4761, "step": 117940 }, { "epoch": 5.858249726830238, "grad_norm": 0.1240234375, "learning_rate": 0.00033134399523194596, "loss": 0.4655, "step": 117950 }, { "epoch": 5.858746399125857, "grad_norm": 0.1591796875, "learning_rate": 0.00033130426144829643, "loss": 0.5248, "step": 117960 }, { "epoch": 5.859243071421476, "grad_norm": 0.1357421875, "learning_rate": 0.0003312645276646469, "loss": 0.5076, "step": 117970 }, { "epoch": 5.859739743717095, "grad_norm": 0.1572265625, "learning_rate": 0.0003312247938809973, "loss": 0.5143, "step": 117980 }, { "epoch": 5.8602364160127145, "grad_norm": 0.13671875, "learning_rate": 0.0003311850600973478, "loss": 0.5171, "step": 117990 }, { "epoch": 5.860733088308335, "grad_norm": 0.138671875, "learning_rate": 0.00033114532631369826, "loss": 0.4982, "step": 118000 }, { "epoch": 5.861229760603954, "grad_norm": 0.1416015625, "learning_rate": 0.0003311055925300487, "loss": 0.4797, "step": 118010 }, { "epoch": 5.861726432899573, "grad_norm": 0.12890625, "learning_rate": 0.00033106585874639915, "loss": 0.4773, "step": 118020 }, { "epoch": 5.862223105195192, "grad_norm": 0.11279296875, "learning_rate": 0.00033102612496274956, "loss": 0.5194, "step": 118030 }, { "epoch": 5.8627197774908115, "grad_norm": 0.1162109375, "learning_rate": 0.0003309863911791001, "loss": 0.5051, "step": 118040 }, { "epoch": 5.863216449786431, "grad_norm": 0.14453125, "learning_rate": 0.0003309466573954505, "loss": 0.4984, "step": 118050 }, { "epoch": 5.86371312208205, "grad_norm": 0.134765625, "learning_rate": 0.0003309069236118009, "loss": 0.4948, "step": 118060 }, { "epoch": 5.86420979437767, "grad_norm": 0.12060546875, "learning_rate": 0.0003308671898281514, "loss": 0.486, "step": 118070 }, { "epoch": 5.864706466673289, "grad_norm": 0.10986328125, "learning_rate": 0.00033082745604450187, "loss": 0.5001, "step": 118080 }, { "epoch": 5.8652031389689085, "grad_norm": 0.11962890625, "learning_rate": 0.0003307877222608523, "loss": 0.4748, "step": 118090 }, { "epoch": 5.865699811264528, "grad_norm": 0.126953125, "learning_rate": 0.00033074798847720276, "loss": 0.5161, "step": 118100 }, { "epoch": 5.866196483560147, "grad_norm": 0.11865234375, "learning_rate": 0.0003307082546935532, "loss": 0.521, "step": 118110 }, { "epoch": 5.866693155855766, "grad_norm": 0.140625, "learning_rate": 0.0003306685209099037, "loss": 0.5077, "step": 118120 }, { "epoch": 5.8671898281513855, "grad_norm": 0.1396484375, "learning_rate": 0.0003306287871262541, "loss": 0.5289, "step": 118130 }, { "epoch": 5.867686500447005, "grad_norm": 0.11376953125, "learning_rate": 0.00033058905334260453, "loss": 0.5263, "step": 118140 }, { "epoch": 5.868183172742625, "grad_norm": 0.1591796875, "learning_rate": 0.000330549319558955, "loss": 0.5321, "step": 118150 }, { "epoch": 5.868679845038244, "grad_norm": 0.171875, "learning_rate": 0.0003305095857753055, "loss": 0.501, "step": 118160 }, { "epoch": 5.869176517333863, "grad_norm": 0.1279296875, "learning_rate": 0.00033046985199165595, "loss": 0.5024, "step": 118170 }, { "epoch": 5.8696731896294825, "grad_norm": 0.134765625, "learning_rate": 0.00033043011820800636, "loss": 0.5344, "step": 118180 }, { "epoch": 5.870169861925102, "grad_norm": 0.11376953125, "learning_rate": 0.00033039038442435684, "loss": 0.5226, "step": 118190 }, { "epoch": 5.870666534220721, "grad_norm": 0.1337890625, "learning_rate": 0.0003303506506407073, "loss": 0.5285, "step": 118200 }, { "epoch": 5.87116320651634, "grad_norm": 0.1474609375, "learning_rate": 0.0003303109168570577, "loss": 0.4912, "step": 118210 }, { "epoch": 5.871659878811959, "grad_norm": 0.130859375, "learning_rate": 0.00033027118307340814, "loss": 0.4776, "step": 118220 }, { "epoch": 5.8721565511075795, "grad_norm": 0.11279296875, "learning_rate": 0.00033023144928975867, "loss": 0.4904, "step": 118230 }, { "epoch": 5.872653223403199, "grad_norm": 0.1259765625, "learning_rate": 0.0003301917155061091, "loss": 0.5071, "step": 118240 }, { "epoch": 5.873149895698818, "grad_norm": 0.12353515625, "learning_rate": 0.00033015198172245956, "loss": 0.4906, "step": 118250 }, { "epoch": 5.873646567994437, "grad_norm": 0.119140625, "learning_rate": 0.00033011224793880997, "loss": 0.495, "step": 118260 }, { "epoch": 5.874143240290056, "grad_norm": 0.134765625, "learning_rate": 0.00033007251415516044, "loss": 0.4985, "step": 118270 }, { "epoch": 5.874639912585676, "grad_norm": 0.1640625, "learning_rate": 0.0003300327803715109, "loss": 0.4986, "step": 118280 }, { "epoch": 5.875136584881295, "grad_norm": 0.1533203125, "learning_rate": 0.00032999304658786133, "loss": 0.4812, "step": 118290 }, { "epoch": 5.875633257176915, "grad_norm": 0.1259765625, "learning_rate": 0.0003299533128042118, "loss": 0.4896, "step": 118300 }, { "epoch": 5.876129929472534, "grad_norm": 0.1728515625, "learning_rate": 0.0003299135790205623, "loss": 0.4845, "step": 118310 }, { "epoch": 5.876626601768153, "grad_norm": 0.11767578125, "learning_rate": 0.0003298738452369127, "loss": 0.5161, "step": 118320 }, { "epoch": 5.877123274063773, "grad_norm": 0.1328125, "learning_rate": 0.00032983411145326316, "loss": 0.5102, "step": 118330 }, { "epoch": 5.877619946359392, "grad_norm": 0.1279296875, "learning_rate": 0.00032979437766961363, "loss": 0.4987, "step": 118340 }, { "epoch": 5.878116618655011, "grad_norm": 0.1259765625, "learning_rate": 0.00032975464388596405, "loss": 0.5095, "step": 118350 }, { "epoch": 5.87861329095063, "grad_norm": 0.11865234375, "learning_rate": 0.0003297149101023145, "loss": 0.5482, "step": 118360 }, { "epoch": 5.8791099632462505, "grad_norm": 0.1357421875, "learning_rate": 0.00032967517631866494, "loss": 0.5039, "step": 118370 }, { "epoch": 5.87960663554187, "grad_norm": 0.138671875, "learning_rate": 0.0003296354425350154, "loss": 0.5345, "step": 118380 }, { "epoch": 5.880103307837489, "grad_norm": 0.130859375, "learning_rate": 0.0003295957087513659, "loss": 0.4986, "step": 118390 }, { "epoch": 5.880599980133108, "grad_norm": 0.162109375, "learning_rate": 0.0003295559749677163, "loss": 0.5201, "step": 118400 }, { "epoch": 5.881096652428727, "grad_norm": 0.134765625, "learning_rate": 0.00032951624118406677, "loss": 0.4885, "step": 118410 }, { "epoch": 5.881593324724347, "grad_norm": 0.1435546875, "learning_rate": 0.00032947650740041724, "loss": 0.505, "step": 118420 }, { "epoch": 5.882089997019966, "grad_norm": 0.12890625, "learning_rate": 0.00032943677361676766, "loss": 0.4985, "step": 118430 }, { "epoch": 5.882586669315586, "grad_norm": 0.1982421875, "learning_rate": 0.00032939703983311813, "loss": 0.5121, "step": 118440 }, { "epoch": 5.883083341611205, "grad_norm": 0.1318359375, "learning_rate": 0.00032935730604946855, "loss": 0.5012, "step": 118450 }, { "epoch": 5.883580013906824, "grad_norm": 0.1416015625, "learning_rate": 0.000329317572265819, "loss": 0.519, "step": 118460 }, { "epoch": 5.884076686202444, "grad_norm": 0.123046875, "learning_rate": 0.0003292778384821695, "loss": 0.5172, "step": 118470 }, { "epoch": 5.884573358498063, "grad_norm": 0.1279296875, "learning_rate": 0.0003292381046985199, "loss": 0.4944, "step": 118480 }, { "epoch": 5.885070030793682, "grad_norm": 0.171875, "learning_rate": 0.0003291983709148704, "loss": 0.5043, "step": 118490 }, { "epoch": 5.885566703089301, "grad_norm": 0.126953125, "learning_rate": 0.00032915863713122085, "loss": 0.4903, "step": 118500 }, { "epoch": 5.886063375384921, "grad_norm": 0.1318359375, "learning_rate": 0.00032911890334757127, "loss": 0.5015, "step": 118510 }, { "epoch": 5.886560047680541, "grad_norm": 0.119140625, "learning_rate": 0.00032907916956392174, "loss": 0.4816, "step": 118520 }, { "epoch": 5.88705671997616, "grad_norm": 0.11865234375, "learning_rate": 0.0003290394357802722, "loss": 0.5231, "step": 118530 }, { "epoch": 5.887553392271779, "grad_norm": 0.1279296875, "learning_rate": 0.00032899970199662263, "loss": 0.4856, "step": 118540 }, { "epoch": 5.888050064567398, "grad_norm": 0.1435546875, "learning_rate": 0.0003289599682129731, "loss": 0.5284, "step": 118550 }, { "epoch": 5.8885467368630175, "grad_norm": 0.119140625, "learning_rate": 0.0003289202344293235, "loss": 0.4831, "step": 118560 }, { "epoch": 5.889043409158637, "grad_norm": 0.12060546875, "learning_rate": 0.00032888050064567404, "loss": 0.5038, "step": 118570 }, { "epoch": 5.889540081454257, "grad_norm": 0.12890625, "learning_rate": 0.00032884076686202446, "loss": 0.5066, "step": 118580 }, { "epoch": 5.890036753749876, "grad_norm": 0.134765625, "learning_rate": 0.0003288010330783749, "loss": 0.5145, "step": 118590 }, { "epoch": 5.890533426045495, "grad_norm": 0.1484375, "learning_rate": 0.00032876129929472535, "loss": 0.5495, "step": 118600 }, { "epoch": 5.891030098341115, "grad_norm": 0.1171875, "learning_rate": 0.0003287215655110758, "loss": 0.4683, "step": 118610 }, { "epoch": 5.891526770636734, "grad_norm": 0.11962890625, "learning_rate": 0.0003286818317274263, "loss": 0.4913, "step": 118620 }, { "epoch": 5.892023442932353, "grad_norm": 0.13671875, "learning_rate": 0.0003286420979437767, "loss": 0.5002, "step": 118630 }, { "epoch": 5.892520115227972, "grad_norm": 0.13671875, "learning_rate": 0.0003286023641601272, "loss": 0.4984, "step": 118640 }, { "epoch": 5.893016787523592, "grad_norm": 0.1298828125, "learning_rate": 0.00032856263037647765, "loss": 0.5122, "step": 118650 }, { "epoch": 5.893513459819212, "grad_norm": 0.126953125, "learning_rate": 0.00032852289659282807, "loss": 0.532, "step": 118660 }, { "epoch": 5.894010132114831, "grad_norm": 0.1171875, "learning_rate": 0.0003284831628091785, "loss": 0.5041, "step": 118670 }, { "epoch": 5.89450680441045, "grad_norm": 0.140625, "learning_rate": 0.00032844342902552896, "loss": 0.5026, "step": 118680 }, { "epoch": 5.895003476706069, "grad_norm": 0.125, "learning_rate": 0.00032840369524187943, "loss": 0.4592, "step": 118690 }, { "epoch": 5.8955001490016885, "grad_norm": 0.1328125, "learning_rate": 0.0003283639614582299, "loss": 0.5028, "step": 118700 }, { "epoch": 5.895996821297308, "grad_norm": 0.12353515625, "learning_rate": 0.0003283242276745803, "loss": 0.4951, "step": 118710 }, { "epoch": 5.896493493592928, "grad_norm": 0.126953125, "learning_rate": 0.0003282844938909308, "loss": 0.5101, "step": 118720 }, { "epoch": 5.896990165888547, "grad_norm": 0.115234375, "learning_rate": 0.00032824476010728126, "loss": 0.4962, "step": 118730 }, { "epoch": 5.897486838184166, "grad_norm": 0.123046875, "learning_rate": 0.0003282050263236317, "loss": 0.4997, "step": 118740 }, { "epoch": 5.8979835104797855, "grad_norm": 0.1298828125, "learning_rate": 0.0003281652925399821, "loss": 0.5008, "step": 118750 }, { "epoch": 5.898480182775405, "grad_norm": 0.12158203125, "learning_rate": 0.0003281255587563326, "loss": 0.5007, "step": 118760 }, { "epoch": 5.898976855071024, "grad_norm": 0.123046875, "learning_rate": 0.00032808582497268304, "loss": 0.4933, "step": 118770 }, { "epoch": 5.899473527366643, "grad_norm": 0.1279296875, "learning_rate": 0.0003280460911890335, "loss": 0.508, "step": 118780 }, { "epoch": 5.899970199662263, "grad_norm": 0.13671875, "learning_rate": 0.0003280063574053839, "loss": 0.4906, "step": 118790 }, { "epoch": 5.9004668719578826, "grad_norm": 0.1494140625, "learning_rate": 0.0003279666236217344, "loss": 0.5132, "step": 118800 }, { "epoch": 5.900963544253502, "grad_norm": 0.1357421875, "learning_rate": 0.00032792688983808487, "loss": 0.5058, "step": 118810 }, { "epoch": 5.901460216549121, "grad_norm": 0.11669921875, "learning_rate": 0.0003278871560544353, "loss": 0.5065, "step": 118820 }, { "epoch": 5.90195688884474, "grad_norm": 0.1259765625, "learning_rate": 0.00032784742227078576, "loss": 0.5009, "step": 118830 }, { "epoch": 5.9024535611403595, "grad_norm": 0.1279296875, "learning_rate": 0.0003278076884871362, "loss": 0.5332, "step": 118840 }, { "epoch": 5.902950233435979, "grad_norm": 0.1298828125, "learning_rate": 0.00032776795470348664, "loss": 0.493, "step": 118850 }, { "epoch": 5.903446905731598, "grad_norm": 0.1513671875, "learning_rate": 0.0003277282209198371, "loss": 0.4811, "step": 118860 }, { "epoch": 5.903943578027217, "grad_norm": 0.11767578125, "learning_rate": 0.0003276884871361876, "loss": 0.4869, "step": 118870 }, { "epoch": 5.904440250322837, "grad_norm": 0.1279296875, "learning_rate": 0.000327648753352538, "loss": 0.5196, "step": 118880 }, { "epoch": 5.9049369226184565, "grad_norm": 0.126953125, "learning_rate": 0.0003276090195688885, "loss": 0.4773, "step": 118890 }, { "epoch": 5.905433594914076, "grad_norm": 0.1376953125, "learning_rate": 0.0003275692857852389, "loss": 0.5117, "step": 118900 }, { "epoch": 5.905930267209695, "grad_norm": 0.13671875, "learning_rate": 0.00032752955200158936, "loss": 0.5024, "step": 118910 }, { "epoch": 5.906426939505314, "grad_norm": 0.1474609375, "learning_rate": 0.00032748981821793983, "loss": 0.4918, "step": 118920 }, { "epoch": 5.906923611800933, "grad_norm": 0.11865234375, "learning_rate": 0.00032745008443429025, "loss": 0.503, "step": 118930 }, { "epoch": 5.907420284096553, "grad_norm": 0.11669921875, "learning_rate": 0.0003274103506506407, "loss": 0.5204, "step": 118940 }, { "epoch": 5.907916956392173, "grad_norm": 0.12890625, "learning_rate": 0.0003273706168669912, "loss": 0.4752, "step": 118950 }, { "epoch": 5.908413628687792, "grad_norm": 0.1318359375, "learning_rate": 0.0003273308830833416, "loss": 0.5238, "step": 118960 }, { "epoch": 5.908910300983411, "grad_norm": 0.126953125, "learning_rate": 0.0003272911492996921, "loss": 0.4999, "step": 118970 }, { "epoch": 5.90940697327903, "grad_norm": 0.109375, "learning_rate": 0.0003272514155160425, "loss": 0.5208, "step": 118980 }, { "epoch": 5.90990364557465, "grad_norm": 0.1240234375, "learning_rate": 0.00032721168173239297, "loss": 0.5137, "step": 118990 }, { "epoch": 5.910400317870269, "grad_norm": 0.12158203125, "learning_rate": 0.00032717194794874344, "loss": 0.4515, "step": 119000 }, { "epoch": 5.910896990165888, "grad_norm": 0.146484375, "learning_rate": 0.00032713221416509386, "loss": 0.5002, "step": 119010 }, { "epoch": 5.911393662461508, "grad_norm": 0.185546875, "learning_rate": 0.00032709248038144433, "loss": 0.5258, "step": 119020 }, { "epoch": 5.911890334757127, "grad_norm": 0.1943359375, "learning_rate": 0.0003270527465977948, "loss": 0.4869, "step": 119030 }, { "epoch": 5.912387007052747, "grad_norm": 0.248046875, "learning_rate": 0.0003270130128141452, "loss": 0.513, "step": 119040 }, { "epoch": 5.912883679348366, "grad_norm": 0.12060546875, "learning_rate": 0.0003269732790304957, "loss": 0.4948, "step": 119050 }, { "epoch": 5.913380351643985, "grad_norm": 0.1259765625, "learning_rate": 0.00032693354524684616, "loss": 0.5159, "step": 119060 }, { "epoch": 5.913877023939604, "grad_norm": 0.123046875, "learning_rate": 0.00032689381146319663, "loss": 0.4906, "step": 119070 }, { "epoch": 5.914373696235224, "grad_norm": 0.146484375, "learning_rate": 0.00032685407767954705, "loss": 0.4982, "step": 119080 }, { "epoch": 5.914870368530844, "grad_norm": 0.1572265625, "learning_rate": 0.00032681434389589747, "loss": 0.5223, "step": 119090 }, { "epoch": 5.915367040826463, "grad_norm": 0.12890625, "learning_rate": 0.000326774610112248, "loss": 0.5294, "step": 119100 }, { "epoch": 5.915863713122082, "grad_norm": 0.126953125, "learning_rate": 0.0003267348763285984, "loss": 0.5004, "step": 119110 }, { "epoch": 5.916360385417701, "grad_norm": 0.12158203125, "learning_rate": 0.00032669514254494883, "loss": 0.5063, "step": 119120 }, { "epoch": 5.916857057713321, "grad_norm": 0.1474609375, "learning_rate": 0.0003266554087612993, "loss": 0.5225, "step": 119130 }, { "epoch": 5.91735373000894, "grad_norm": 0.12158203125, "learning_rate": 0.00032661567497764977, "loss": 0.5236, "step": 119140 }, { "epoch": 5.917850402304559, "grad_norm": 0.1259765625, "learning_rate": 0.00032657594119400024, "loss": 0.5233, "step": 119150 }, { "epoch": 5.918347074600179, "grad_norm": 0.1279296875, "learning_rate": 0.00032653620741035066, "loss": 0.5082, "step": 119160 }, { "epoch": 5.918843746895798, "grad_norm": 0.1201171875, "learning_rate": 0.00032649647362670113, "loss": 0.5145, "step": 119170 }, { "epoch": 5.919340419191418, "grad_norm": 0.142578125, "learning_rate": 0.0003264567398430516, "loss": 0.4983, "step": 119180 }, { "epoch": 5.919837091487037, "grad_norm": 0.1337890625, "learning_rate": 0.000326417006059402, "loss": 0.5243, "step": 119190 }, { "epoch": 5.920333763782656, "grad_norm": 0.1298828125, "learning_rate": 0.00032637727227575244, "loss": 0.5181, "step": 119200 }, { "epoch": 5.920830436078275, "grad_norm": 0.1474609375, "learning_rate": 0.0003263375384921029, "loss": 0.5084, "step": 119210 }, { "epoch": 5.9213271083738945, "grad_norm": 0.16015625, "learning_rate": 0.0003262978047084534, "loss": 0.4905, "step": 119220 }, { "epoch": 5.921823780669515, "grad_norm": 0.1240234375, "learning_rate": 0.00032625807092480385, "loss": 0.5001, "step": 119230 }, { "epoch": 5.922320452965134, "grad_norm": 0.1259765625, "learning_rate": 0.00032621833714115427, "loss": 0.5056, "step": 119240 }, { "epoch": 5.922817125260753, "grad_norm": 0.1376953125, "learning_rate": 0.00032617860335750474, "loss": 0.4853, "step": 119250 }, { "epoch": 5.923313797556372, "grad_norm": 0.19140625, "learning_rate": 0.0003261388695738552, "loss": 0.4933, "step": 119260 }, { "epoch": 5.9238104698519916, "grad_norm": 0.130859375, "learning_rate": 0.00032609913579020563, "loss": 0.5344, "step": 119270 }, { "epoch": 5.924307142147611, "grad_norm": 0.1181640625, "learning_rate": 0.00032605940200655604, "loss": 0.5063, "step": 119280 }, { "epoch": 5.92480381444323, "grad_norm": 0.13671875, "learning_rate": 0.00032601966822290657, "loss": 0.5223, "step": 119290 }, { "epoch": 5.92530048673885, "grad_norm": 0.140625, "learning_rate": 0.000325979934439257, "loss": 0.5342, "step": 119300 }, { "epoch": 5.925797159034469, "grad_norm": 0.1396484375, "learning_rate": 0.00032594020065560746, "loss": 0.5046, "step": 119310 }, { "epoch": 5.926293831330089, "grad_norm": 0.1259765625, "learning_rate": 0.0003259004668719579, "loss": 0.5225, "step": 119320 }, { "epoch": 5.926790503625708, "grad_norm": 0.126953125, "learning_rate": 0.00032586073308830835, "loss": 0.4972, "step": 119330 }, { "epoch": 5.927287175921327, "grad_norm": 0.1171875, "learning_rate": 0.0003258209993046588, "loss": 0.5219, "step": 119340 }, { "epoch": 5.927783848216946, "grad_norm": 0.11474609375, "learning_rate": 0.00032578126552100924, "loss": 0.4868, "step": 119350 }, { "epoch": 5.9282805205125655, "grad_norm": 0.1328125, "learning_rate": 0.0003257415317373597, "loss": 0.5057, "step": 119360 }, { "epoch": 5.928777192808186, "grad_norm": 0.12353515625, "learning_rate": 0.0003257017979537102, "loss": 0.4783, "step": 119370 }, { "epoch": 5.929273865103805, "grad_norm": 0.1630859375, "learning_rate": 0.0003256620641700606, "loss": 0.5347, "step": 119380 }, { "epoch": 5.929770537399424, "grad_norm": 0.11865234375, "learning_rate": 0.00032562233038641107, "loss": 0.4995, "step": 119390 }, { "epoch": 5.930267209695043, "grad_norm": 0.1494140625, "learning_rate": 0.00032558259660276154, "loss": 0.5117, "step": 119400 }, { "epoch": 5.9307638819906625, "grad_norm": 0.11328125, "learning_rate": 0.00032554286281911196, "loss": 0.4947, "step": 119410 }, { "epoch": 5.931260554286282, "grad_norm": 0.1240234375, "learning_rate": 0.0003255031290354624, "loss": 0.5476, "step": 119420 }, { "epoch": 5.931757226581901, "grad_norm": 0.1357421875, "learning_rate": 0.00032546339525181284, "loss": 0.5358, "step": 119430 }, { "epoch": 5.932253898877521, "grad_norm": 0.12890625, "learning_rate": 0.00032542366146816337, "loss": 0.4916, "step": 119440 }, { "epoch": 5.93275057117314, "grad_norm": 0.11181640625, "learning_rate": 0.0003253839276845138, "loss": 0.5134, "step": 119450 }, { "epoch": 5.9332472434687595, "grad_norm": 0.1337890625, "learning_rate": 0.0003253441939008642, "loss": 0.5055, "step": 119460 }, { "epoch": 5.933743915764379, "grad_norm": 0.1171875, "learning_rate": 0.0003253044601172147, "loss": 0.5068, "step": 119470 }, { "epoch": 5.934240588059998, "grad_norm": 0.12060546875, "learning_rate": 0.00032526472633356515, "loss": 0.5089, "step": 119480 }, { "epoch": 5.934737260355617, "grad_norm": 0.12890625, "learning_rate": 0.00032522499254991556, "loss": 0.539, "step": 119490 }, { "epoch": 5.935233932651236, "grad_norm": 0.1357421875, "learning_rate": 0.00032518525876626604, "loss": 0.5004, "step": 119500 }, { "epoch": 5.935730604946857, "grad_norm": 0.142578125, "learning_rate": 0.00032514552498261645, "loss": 0.5103, "step": 119510 }, { "epoch": 5.936227277242476, "grad_norm": 0.138671875, "learning_rate": 0.000325105791198967, "loss": 0.4969, "step": 119520 }, { "epoch": 5.936723949538095, "grad_norm": 0.150390625, "learning_rate": 0.0003250660574153174, "loss": 0.5127, "step": 119530 }, { "epoch": 5.937220621833714, "grad_norm": 0.1533203125, "learning_rate": 0.0003250263236316678, "loss": 0.5122, "step": 119540 }, { "epoch": 5.9377172941293335, "grad_norm": 0.130859375, "learning_rate": 0.0003249865898480183, "loss": 0.5198, "step": 119550 }, { "epoch": 5.938213966424953, "grad_norm": 0.12890625, "learning_rate": 0.00032494685606436875, "loss": 0.5059, "step": 119560 }, { "epoch": 5.938710638720572, "grad_norm": 0.12353515625, "learning_rate": 0.00032490712228071917, "loss": 0.5045, "step": 119570 }, { "epoch": 5.939207311016191, "grad_norm": 0.146484375, "learning_rate": 0.00032486738849706964, "loss": 0.498, "step": 119580 }, { "epoch": 5.93970398331181, "grad_norm": 0.126953125, "learning_rate": 0.0003248276547134201, "loss": 0.49, "step": 119590 }, { "epoch": 5.9402006556074305, "grad_norm": 0.11669921875, "learning_rate": 0.0003247879209297706, "loss": 0.5009, "step": 119600 }, { "epoch": 5.94069732790305, "grad_norm": 0.12255859375, "learning_rate": 0.000324748187146121, "loss": 0.5049, "step": 119610 }, { "epoch": 5.941194000198669, "grad_norm": 0.142578125, "learning_rate": 0.0003247084533624714, "loss": 0.4934, "step": 119620 }, { "epoch": 5.941690672494288, "grad_norm": 0.130859375, "learning_rate": 0.00032466871957882195, "loss": 0.5154, "step": 119630 }, { "epoch": 5.942187344789907, "grad_norm": 0.1328125, "learning_rate": 0.00032462898579517236, "loss": 0.5056, "step": 119640 }, { "epoch": 5.942684017085527, "grad_norm": 0.11865234375, "learning_rate": 0.0003245892520115228, "loss": 0.4994, "step": 119650 }, { "epoch": 5.943180689381146, "grad_norm": 0.12890625, "learning_rate": 0.00032454951822787325, "loss": 0.5202, "step": 119660 }, { "epoch": 5.943677361676766, "grad_norm": 0.115234375, "learning_rate": 0.0003245097844442237, "loss": 0.4709, "step": 119670 }, { "epoch": 5.944174033972385, "grad_norm": 0.134765625, "learning_rate": 0.0003244700506605742, "loss": 0.4979, "step": 119680 }, { "epoch": 5.944670706268004, "grad_norm": 0.1181640625, "learning_rate": 0.0003244303168769246, "loss": 0.4862, "step": 119690 }, { "epoch": 5.945167378563624, "grad_norm": 0.125, "learning_rate": 0.0003243905830932751, "loss": 0.4885, "step": 119700 }, { "epoch": 5.945664050859243, "grad_norm": 0.140625, "learning_rate": 0.00032435084930962555, "loss": 0.5369, "step": 119710 }, { "epoch": 5.946160723154862, "grad_norm": 0.12353515625, "learning_rate": 0.00032431111552597597, "loss": 0.5043, "step": 119720 }, { "epoch": 5.946657395450481, "grad_norm": 0.1416015625, "learning_rate": 0.0003242713817423264, "loss": 0.5042, "step": 119730 }, { "epoch": 5.9471540677461014, "grad_norm": 0.12109375, "learning_rate": 0.0003242316479586769, "loss": 0.4988, "step": 119740 }, { "epoch": 5.947650740041721, "grad_norm": 0.150390625, "learning_rate": 0.00032419191417502733, "loss": 0.4771, "step": 119750 }, { "epoch": 5.94814741233734, "grad_norm": 0.1357421875, "learning_rate": 0.0003241521803913778, "loss": 0.4992, "step": 119760 }, { "epoch": 5.948644084632959, "grad_norm": 0.1484375, "learning_rate": 0.0003241124466077282, "loss": 0.4915, "step": 119770 }, { "epoch": 5.949140756928578, "grad_norm": 0.1259765625, "learning_rate": 0.0003240727128240787, "loss": 0.4938, "step": 119780 }, { "epoch": 5.949637429224198, "grad_norm": 0.1318359375, "learning_rate": 0.00032403297904042916, "loss": 0.5259, "step": 119790 }, { "epoch": 5.950134101519817, "grad_norm": 0.1396484375, "learning_rate": 0.0003239932452567796, "loss": 0.5263, "step": 119800 }, { "epoch": 5.950630773815437, "grad_norm": 0.1201171875, "learning_rate": 0.00032395351147313, "loss": 0.5156, "step": 119810 }, { "epoch": 5.951127446111056, "grad_norm": 0.1337890625, "learning_rate": 0.0003239137776894805, "loss": 0.4934, "step": 119820 }, { "epoch": 5.951624118406675, "grad_norm": 0.12158203125, "learning_rate": 0.00032387404390583094, "loss": 0.4919, "step": 119830 }, { "epoch": 5.952120790702295, "grad_norm": 0.154296875, "learning_rate": 0.0003238343101221814, "loss": 0.482, "step": 119840 }, { "epoch": 5.952617462997914, "grad_norm": 0.130859375, "learning_rate": 0.00032379457633853183, "loss": 0.4674, "step": 119850 }, { "epoch": 5.953114135293533, "grad_norm": 0.126953125, "learning_rate": 0.0003237548425548823, "loss": 0.5062, "step": 119860 }, { "epoch": 5.953610807589152, "grad_norm": 0.1337890625, "learning_rate": 0.00032371510877123277, "loss": 0.492, "step": 119870 }, { "epoch": 5.954107479884772, "grad_norm": 0.126953125, "learning_rate": 0.0003236753749875832, "loss": 0.5158, "step": 119880 }, { "epoch": 5.954604152180392, "grad_norm": 0.11767578125, "learning_rate": 0.00032363564120393366, "loss": 0.51, "step": 119890 }, { "epoch": 5.955100824476011, "grad_norm": 0.125, "learning_rate": 0.00032359590742028413, "loss": 0.5204, "step": 119900 }, { "epoch": 5.95559749677163, "grad_norm": 0.1513671875, "learning_rate": 0.00032355617363663455, "loss": 0.4954, "step": 119910 }, { "epoch": 5.956094169067249, "grad_norm": 0.119140625, "learning_rate": 0.000323516439852985, "loss": 0.4786, "step": 119920 }, { "epoch": 5.9565908413628685, "grad_norm": 0.1572265625, "learning_rate": 0.0003234767060693355, "loss": 0.4845, "step": 119930 }, { "epoch": 5.957087513658488, "grad_norm": 0.10888671875, "learning_rate": 0.0003234369722856859, "loss": 0.5167, "step": 119940 }, { "epoch": 5.957584185954108, "grad_norm": 0.11572265625, "learning_rate": 0.0003233972385020364, "loss": 0.5018, "step": 119950 }, { "epoch": 5.958080858249727, "grad_norm": 0.19921875, "learning_rate": 0.0003233575047183868, "loss": 0.4911, "step": 119960 }, { "epoch": 5.958577530545346, "grad_norm": 0.11181640625, "learning_rate": 0.0003233177709347373, "loss": 0.5116, "step": 119970 }, { "epoch": 5.959074202840966, "grad_norm": 0.1357421875, "learning_rate": 0.00032327803715108774, "loss": 0.4855, "step": 119980 }, { "epoch": 5.959570875136585, "grad_norm": 0.1201171875, "learning_rate": 0.00032323830336743816, "loss": 0.5309, "step": 119990 }, { "epoch": 5.960067547432204, "grad_norm": 0.1162109375, "learning_rate": 0.0003231985695837886, "loss": 0.4953, "step": 120000 }, { "epoch": 5.960564219727823, "grad_norm": 0.1298828125, "learning_rate": 0.0003231588358001391, "loss": 0.5211, "step": 120010 }, { "epoch": 5.961060892023443, "grad_norm": 0.15234375, "learning_rate": 0.0003231191020164895, "loss": 0.5121, "step": 120020 }, { "epoch": 5.961557564319063, "grad_norm": 0.1435546875, "learning_rate": 0.00032307936823284, "loss": 0.4955, "step": 120030 }, { "epoch": 5.962054236614682, "grad_norm": 0.162109375, "learning_rate": 0.00032303963444919046, "loss": 0.5018, "step": 120040 }, { "epoch": 5.962550908910301, "grad_norm": 0.166015625, "learning_rate": 0.00032299990066554093, "loss": 0.5069, "step": 120050 }, { "epoch": 5.96304758120592, "grad_norm": 0.12158203125, "learning_rate": 0.00032296016688189135, "loss": 0.4962, "step": 120060 }, { "epoch": 5.9635442535015395, "grad_norm": 0.1357421875, "learning_rate": 0.00032292043309824176, "loss": 0.4756, "step": 120070 }, { "epoch": 5.964040925797159, "grad_norm": 0.1357421875, "learning_rate": 0.00032288069931459224, "loss": 0.4822, "step": 120080 }, { "epoch": 5.964537598092779, "grad_norm": 0.13671875, "learning_rate": 0.0003228409655309427, "loss": 0.5071, "step": 120090 }, { "epoch": 5.965034270388398, "grad_norm": 0.2109375, "learning_rate": 0.0003228012317472931, "loss": 0.4711, "step": 120100 }, { "epoch": 5.965530942684017, "grad_norm": 0.1337890625, "learning_rate": 0.0003227614979636436, "loss": 0.4887, "step": 120110 }, { "epoch": 5.9660276149796365, "grad_norm": 0.1337890625, "learning_rate": 0.00032272176417999407, "loss": 0.5109, "step": 120120 }, { "epoch": 5.966524287275256, "grad_norm": 0.1298828125, "learning_rate": 0.00032268203039634454, "loss": 0.5055, "step": 120130 }, { "epoch": 5.967020959570875, "grad_norm": 0.1376953125, "learning_rate": 0.00032264229661269495, "loss": 0.5203, "step": 120140 }, { "epoch": 5.967517631866494, "grad_norm": 0.11865234375, "learning_rate": 0.00032260256282904537, "loss": 0.5225, "step": 120150 }, { "epoch": 5.968014304162114, "grad_norm": 0.158203125, "learning_rate": 0.0003225628290453959, "loss": 0.5164, "step": 120160 }, { "epoch": 5.9685109764577335, "grad_norm": 0.134765625, "learning_rate": 0.0003225230952617463, "loss": 0.4974, "step": 120170 }, { "epoch": 5.969007648753353, "grad_norm": 0.1552734375, "learning_rate": 0.00032248336147809673, "loss": 0.5121, "step": 120180 }, { "epoch": 5.969504321048972, "grad_norm": 0.11572265625, "learning_rate": 0.0003224436276944472, "loss": 0.5109, "step": 120190 }, { "epoch": 5.970000993344591, "grad_norm": 0.1337890625, "learning_rate": 0.0003224038939107977, "loss": 0.5188, "step": 120200 }, { "epoch": 5.9704976656402104, "grad_norm": 0.15234375, "learning_rate": 0.00032236416012714815, "loss": 0.4893, "step": 120210 }, { "epoch": 5.97099433793583, "grad_norm": 0.15234375, "learning_rate": 0.00032232442634349856, "loss": 0.5092, "step": 120220 }, { "epoch": 5.971491010231449, "grad_norm": 0.1708984375, "learning_rate": 0.00032228469255984903, "loss": 0.496, "step": 120230 }, { "epoch": 5.971987682527069, "grad_norm": 0.119140625, "learning_rate": 0.0003222449587761995, "loss": 0.5161, "step": 120240 }, { "epoch": 5.972484354822688, "grad_norm": 0.14453125, "learning_rate": 0.0003222052249925499, "loss": 0.531, "step": 120250 }, { "epoch": 5.9729810271183075, "grad_norm": 0.1943359375, "learning_rate": 0.0003221654912089004, "loss": 0.4973, "step": 120260 }, { "epoch": 5.973477699413927, "grad_norm": 0.11572265625, "learning_rate": 0.00032212575742525087, "loss": 0.5332, "step": 120270 }, { "epoch": 5.973974371709546, "grad_norm": 0.123046875, "learning_rate": 0.0003220860236416013, "loss": 0.5034, "step": 120280 }, { "epoch": 5.974471044005165, "grad_norm": 0.11962890625, "learning_rate": 0.00032204628985795175, "loss": 0.5177, "step": 120290 }, { "epoch": 5.974967716300784, "grad_norm": 0.1259765625, "learning_rate": 0.00032200655607430217, "loss": 0.4775, "step": 120300 }, { "epoch": 5.975464388596404, "grad_norm": 0.1337890625, "learning_rate": 0.00032196682229065264, "loss": 0.4971, "step": 120310 }, { "epoch": 5.975961060892024, "grad_norm": 0.1357421875, "learning_rate": 0.0003219270885070031, "loss": 0.5205, "step": 120320 }, { "epoch": 5.976457733187643, "grad_norm": 0.1337890625, "learning_rate": 0.00032188735472335353, "loss": 0.507, "step": 120330 }, { "epoch": 5.976954405483262, "grad_norm": 0.1337890625, "learning_rate": 0.000321847620939704, "loss": 0.5493, "step": 120340 }, { "epoch": 5.977451077778881, "grad_norm": 0.12109375, "learning_rate": 0.0003218078871560545, "loss": 0.4735, "step": 120350 }, { "epoch": 5.977947750074501, "grad_norm": 0.13671875, "learning_rate": 0.0003217681533724049, "loss": 0.5099, "step": 120360 }, { "epoch": 5.97844442237012, "grad_norm": 0.1474609375, "learning_rate": 0.00032172841958875536, "loss": 0.486, "step": 120370 }, { "epoch": 5.978941094665739, "grad_norm": 0.1376953125, "learning_rate": 0.0003216886858051058, "loss": 0.5223, "step": 120380 }, { "epoch": 5.979437766961359, "grad_norm": 0.1396484375, "learning_rate": 0.00032164895202145625, "loss": 0.489, "step": 120390 }, { "epoch": 5.979934439256978, "grad_norm": 0.1357421875, "learning_rate": 0.0003216092182378067, "loss": 0.4881, "step": 120400 }, { "epoch": 5.980431111552598, "grad_norm": 0.1337890625, "learning_rate": 0.00032156948445415714, "loss": 0.4873, "step": 120410 }, { "epoch": 5.980927783848217, "grad_norm": 0.12255859375, "learning_rate": 0.0003215297506705076, "loss": 0.4935, "step": 120420 }, { "epoch": 5.981424456143836, "grad_norm": 0.1376953125, "learning_rate": 0.0003214900168868581, "loss": 0.5151, "step": 120430 }, { "epoch": 5.981921128439455, "grad_norm": 0.12255859375, "learning_rate": 0.0003214502831032085, "loss": 0.4887, "step": 120440 }, { "epoch": 5.982417800735075, "grad_norm": 0.1328125, "learning_rate": 0.00032141054931955897, "loss": 0.4957, "step": 120450 }, { "epoch": 5.982914473030695, "grad_norm": 0.1240234375, "learning_rate": 0.00032137081553590944, "loss": 0.5141, "step": 120460 }, { "epoch": 5.983411145326314, "grad_norm": 0.1279296875, "learning_rate": 0.00032133108175225986, "loss": 0.4862, "step": 120470 }, { "epoch": 5.983907817621933, "grad_norm": 0.1357421875, "learning_rate": 0.00032129134796861033, "loss": 0.5018, "step": 120480 }, { "epoch": 5.984404489917552, "grad_norm": 0.1552734375, "learning_rate": 0.00032125161418496075, "loss": 0.5064, "step": 120490 }, { "epoch": 5.984901162213172, "grad_norm": 0.126953125, "learning_rate": 0.0003212118804013113, "loss": 0.4906, "step": 120500 }, { "epoch": 5.985397834508791, "grad_norm": 0.123046875, "learning_rate": 0.0003211721466176617, "loss": 0.4987, "step": 120510 }, { "epoch": 5.98589450680441, "grad_norm": 0.1337890625, "learning_rate": 0.0003211324128340121, "loss": 0.4978, "step": 120520 }, { "epoch": 5.98639117910003, "grad_norm": 0.11962890625, "learning_rate": 0.0003210926790503626, "loss": 0.4796, "step": 120530 }, { "epoch": 5.986887851395649, "grad_norm": 0.12451171875, "learning_rate": 0.00032105294526671305, "loss": 0.5211, "step": 120540 }, { "epoch": 5.987384523691269, "grad_norm": 0.115234375, "learning_rate": 0.00032101321148306347, "loss": 0.5118, "step": 120550 }, { "epoch": 5.987881195986888, "grad_norm": 0.1376953125, "learning_rate": 0.00032097347769941394, "loss": 0.5214, "step": 120560 }, { "epoch": 5.988377868282507, "grad_norm": 0.1376953125, "learning_rate": 0.0003209337439157644, "loss": 0.5044, "step": 120570 }, { "epoch": 5.988874540578126, "grad_norm": 0.1318359375, "learning_rate": 0.0003208940101321149, "loss": 0.4716, "step": 120580 }, { "epoch": 5.9893712128737455, "grad_norm": 0.123046875, "learning_rate": 0.0003208542763484653, "loss": 0.5025, "step": 120590 }, { "epoch": 5.989867885169366, "grad_norm": 0.1298828125, "learning_rate": 0.0003208145425648157, "loss": 0.4836, "step": 120600 }, { "epoch": 5.990364557464985, "grad_norm": 0.1259765625, "learning_rate": 0.0003207748087811662, "loss": 0.5115, "step": 120610 }, { "epoch": 5.990861229760604, "grad_norm": 0.11962890625, "learning_rate": 0.00032073507499751666, "loss": 0.4903, "step": 120620 }, { "epoch": 5.991357902056223, "grad_norm": 0.13671875, "learning_rate": 0.0003206953412138671, "loss": 0.4831, "step": 120630 }, { "epoch": 5.9918545743518425, "grad_norm": 0.1396484375, "learning_rate": 0.00032065560743021755, "loss": 0.4961, "step": 120640 }, { "epoch": 5.992351246647462, "grad_norm": 0.1181640625, "learning_rate": 0.000320615873646568, "loss": 0.4973, "step": 120650 }, { "epoch": 5.992847918943081, "grad_norm": 0.1181640625, "learning_rate": 0.0003205761398629185, "loss": 0.4923, "step": 120660 }, { "epoch": 5.993344591238701, "grad_norm": 0.1416015625, "learning_rate": 0.0003205364060792689, "loss": 0.5084, "step": 120670 }, { "epoch": 5.99384126353432, "grad_norm": 0.11474609375, "learning_rate": 0.0003204966722956193, "loss": 0.5122, "step": 120680 }, { "epoch": 5.99433793582994, "grad_norm": 0.12255859375, "learning_rate": 0.00032045693851196985, "loss": 0.4817, "step": 120690 }, { "epoch": 5.994834608125559, "grad_norm": 0.1279296875, "learning_rate": 0.00032041720472832027, "loss": 0.5426, "step": 120700 }, { "epoch": 5.995331280421178, "grad_norm": 0.1435546875, "learning_rate": 0.00032037747094467074, "loss": 0.514, "step": 120710 }, { "epoch": 5.995827952716797, "grad_norm": 0.1806640625, "learning_rate": 0.00032033773716102116, "loss": 0.4534, "step": 120720 }, { "epoch": 5.9963246250124165, "grad_norm": 0.1337890625, "learning_rate": 0.0003202980033773716, "loss": 0.4894, "step": 120730 }, { "epoch": 5.996821297308037, "grad_norm": 0.1474609375, "learning_rate": 0.0003202582695937221, "loss": 0.495, "step": 120740 }, { "epoch": 5.997317969603656, "grad_norm": 0.126953125, "learning_rate": 0.0003202185358100725, "loss": 0.481, "step": 120750 }, { "epoch": 5.997814641899275, "grad_norm": 0.119140625, "learning_rate": 0.000320178802026423, "loss": 0.4966, "step": 120760 }, { "epoch": 5.998311314194894, "grad_norm": 0.12109375, "learning_rate": 0.00032013906824277346, "loss": 0.5028, "step": 120770 }, { "epoch": 5.9988079864905135, "grad_norm": 0.125, "learning_rate": 0.0003200993344591239, "loss": 0.4975, "step": 120780 }, { "epoch": 5.999304658786133, "grad_norm": 0.11669921875, "learning_rate": 0.00032005960067547435, "loss": 0.4972, "step": 120790 }, { "epoch": 5.999801331081752, "grad_norm": 0.12890625, "learning_rate": 0.0003200198668918248, "loss": 0.5059, "step": 120800 }, { "epoch": 6.000298003377371, "grad_norm": 0.140625, "learning_rate": 0.00031998013310817523, "loss": 0.4947, "step": 120810 }, { "epoch": 6.000794675672991, "grad_norm": 0.1318359375, "learning_rate": 0.0003199403993245257, "loss": 0.4986, "step": 120820 }, { "epoch": 6.0012913479686105, "grad_norm": 0.1474609375, "learning_rate": 0.0003199006655408761, "loss": 0.4952, "step": 120830 }, { "epoch": 6.00178802026423, "grad_norm": 0.111328125, "learning_rate": 0.0003198609317572266, "loss": 0.4735, "step": 120840 }, { "epoch": 6.002284692559849, "grad_norm": 0.140625, "learning_rate": 0.00031982119797357707, "loss": 0.5047, "step": 120850 }, { "epoch": 6.002781364855468, "grad_norm": 0.2119140625, "learning_rate": 0.0003197814641899275, "loss": 0.5082, "step": 120860 }, { "epoch": 6.003278037151087, "grad_norm": 0.11865234375, "learning_rate": 0.00031974173040627795, "loss": 0.5107, "step": 120870 }, { "epoch": 6.003774709446707, "grad_norm": 0.11962890625, "learning_rate": 0.0003197019966226284, "loss": 0.488, "step": 120880 }, { "epoch": 6.004271381742327, "grad_norm": 0.12890625, "learning_rate": 0.00031966226283897884, "loss": 0.4976, "step": 120890 }, { "epoch": 6.004768054037946, "grad_norm": 0.11572265625, "learning_rate": 0.0003196225290553293, "loss": 0.4974, "step": 120900 }, { "epoch": 6.005264726333565, "grad_norm": 0.134765625, "learning_rate": 0.00031958279527167973, "loss": 0.4583, "step": 120910 }, { "epoch": 6.0057613986291845, "grad_norm": 0.1298828125, "learning_rate": 0.0003195430614880302, "loss": 0.4876, "step": 120920 }, { "epoch": 6.006258070924804, "grad_norm": 0.13671875, "learning_rate": 0.0003195033277043807, "loss": 0.4885, "step": 120930 }, { "epoch": 6.006754743220423, "grad_norm": 0.1376953125, "learning_rate": 0.0003194635939207311, "loss": 0.5095, "step": 120940 }, { "epoch": 6.007251415516042, "grad_norm": 0.1611328125, "learning_rate": 0.00031942386013708156, "loss": 0.4792, "step": 120950 }, { "epoch": 6.007748087811662, "grad_norm": 0.19921875, "learning_rate": 0.00031938412635343203, "loss": 0.4897, "step": 120960 }, { "epoch": 6.0082447601072815, "grad_norm": 0.12890625, "learning_rate": 0.00031934439256978245, "loss": 0.4957, "step": 120970 }, { "epoch": 6.008741432402901, "grad_norm": 0.146484375, "learning_rate": 0.0003193046587861329, "loss": 0.4716, "step": 120980 }, { "epoch": 6.00923810469852, "grad_norm": 0.12109375, "learning_rate": 0.0003192649250024834, "loss": 0.4969, "step": 120990 }, { "epoch": 6.009734776994139, "grad_norm": 0.115234375, "learning_rate": 0.0003192251912188338, "loss": 0.4757, "step": 121000 }, { "epoch": 6.010231449289758, "grad_norm": 0.12890625, "learning_rate": 0.0003191854574351843, "loss": 0.4886, "step": 121010 }, { "epoch": 6.010728121585378, "grad_norm": 0.125, "learning_rate": 0.0003191457236515347, "loss": 0.4558, "step": 121020 }, { "epoch": 6.011224793880998, "grad_norm": 0.12060546875, "learning_rate": 0.0003191059898678852, "loss": 0.476, "step": 121030 }, { "epoch": 6.011721466176617, "grad_norm": 0.162109375, "learning_rate": 0.00031906625608423564, "loss": 0.5165, "step": 121040 }, { "epoch": 6.012218138472236, "grad_norm": 0.1328125, "learning_rate": 0.00031902652230058606, "loss": 0.493, "step": 121050 }, { "epoch": 6.012714810767855, "grad_norm": 0.123046875, "learning_rate": 0.00031898678851693653, "loss": 0.4721, "step": 121060 }, { "epoch": 6.013211483063475, "grad_norm": 0.1279296875, "learning_rate": 0.000318947054733287, "loss": 0.5085, "step": 121070 }, { "epoch": 6.013708155359094, "grad_norm": 0.115234375, "learning_rate": 0.0003189073209496374, "loss": 0.4958, "step": 121080 }, { "epoch": 6.014204827654713, "grad_norm": 0.12451171875, "learning_rate": 0.0003188675871659879, "loss": 0.4813, "step": 121090 }, { "epoch": 6.014701499950333, "grad_norm": 0.11962890625, "learning_rate": 0.00031882785338233836, "loss": 0.4742, "step": 121100 }, { "epoch": 6.015198172245952, "grad_norm": 0.130859375, "learning_rate": 0.00031878811959868883, "loss": 0.484, "step": 121110 }, { "epoch": 6.015694844541572, "grad_norm": 0.12255859375, "learning_rate": 0.00031874838581503925, "loss": 0.4848, "step": 121120 }, { "epoch": 6.016191516837191, "grad_norm": 0.1513671875, "learning_rate": 0.00031870865203138967, "loss": 0.4943, "step": 121130 }, { "epoch": 6.01668818913281, "grad_norm": 0.11767578125, "learning_rate": 0.0003186689182477402, "loss": 0.4791, "step": 121140 }, { "epoch": 6.017184861428429, "grad_norm": 0.126953125, "learning_rate": 0.0003186291844640906, "loss": 0.4831, "step": 121150 }, { "epoch": 6.017681533724049, "grad_norm": 0.1494140625, "learning_rate": 0.0003185894506804411, "loss": 0.49, "step": 121160 }, { "epoch": 6.018178206019668, "grad_norm": 0.1572265625, "learning_rate": 0.0003185497168967915, "loss": 0.5079, "step": 121170 }, { "epoch": 6.018674878315288, "grad_norm": 0.12255859375, "learning_rate": 0.00031850998311314197, "loss": 0.4653, "step": 121180 }, { "epoch": 6.019171550610907, "grad_norm": 0.12890625, "learning_rate": 0.00031847024932949244, "loss": 0.479, "step": 121190 }, { "epoch": 6.019668222906526, "grad_norm": 0.130859375, "learning_rate": 0.00031843051554584286, "loss": 0.5137, "step": 121200 }, { "epoch": 6.020164895202146, "grad_norm": 0.1357421875, "learning_rate": 0.0003183907817621933, "loss": 0.4862, "step": 121210 }, { "epoch": 6.020661567497765, "grad_norm": 0.142578125, "learning_rate": 0.0003183510479785438, "loss": 0.5049, "step": 121220 }, { "epoch": 6.021158239793384, "grad_norm": 0.1259765625, "learning_rate": 0.0003183113141948942, "loss": 0.4808, "step": 121230 }, { "epoch": 6.021654912089003, "grad_norm": 0.126953125, "learning_rate": 0.0003182715804112447, "loss": 0.5062, "step": 121240 }, { "epoch": 6.022151584384623, "grad_norm": 0.1533203125, "learning_rate": 0.0003182318466275951, "loss": 0.4616, "step": 121250 }, { "epoch": 6.022648256680243, "grad_norm": 0.12353515625, "learning_rate": 0.0003181921128439456, "loss": 0.4979, "step": 121260 }, { "epoch": 6.023144928975862, "grad_norm": 0.1337890625, "learning_rate": 0.00031815237906029605, "loss": 0.479, "step": 121270 }, { "epoch": 6.023641601271481, "grad_norm": 0.1337890625, "learning_rate": 0.00031811264527664647, "loss": 0.5044, "step": 121280 }, { "epoch": 6.0241382735671, "grad_norm": 0.125, "learning_rate": 0.00031807291149299694, "loss": 0.4953, "step": 121290 }, { "epoch": 6.0246349458627195, "grad_norm": 0.12060546875, "learning_rate": 0.0003180331777093474, "loss": 0.477, "step": 121300 }, { "epoch": 6.025131618158339, "grad_norm": 0.1455078125, "learning_rate": 0.0003179934439256978, "loss": 0.4868, "step": 121310 }, { "epoch": 6.025628290453959, "grad_norm": 0.1416015625, "learning_rate": 0.0003179537101420483, "loss": 0.4859, "step": 121320 }, { "epoch": 6.026124962749578, "grad_norm": 0.12890625, "learning_rate": 0.00031791397635839877, "loss": 0.4923, "step": 121330 }, { "epoch": 6.026621635045197, "grad_norm": 0.12255859375, "learning_rate": 0.0003178742425747492, "loss": 0.5309, "step": 121340 }, { "epoch": 6.0271183073408165, "grad_norm": 0.1357421875, "learning_rate": 0.00031783450879109966, "loss": 0.4882, "step": 121350 }, { "epoch": 6.027614979636436, "grad_norm": 0.1259765625, "learning_rate": 0.0003177947750074501, "loss": 0.5061, "step": 121360 }, { "epoch": 6.028111651932055, "grad_norm": 0.142578125, "learning_rate": 0.00031775504122380055, "loss": 0.4982, "step": 121370 }, { "epoch": 6.028608324227674, "grad_norm": 0.115234375, "learning_rate": 0.000317715307440151, "loss": 0.4912, "step": 121380 }, { "epoch": 6.029104996523294, "grad_norm": 0.1357421875, "learning_rate": 0.00031767557365650143, "loss": 0.4962, "step": 121390 }, { "epoch": 6.029601668818914, "grad_norm": 0.1748046875, "learning_rate": 0.0003176358398728519, "loss": 0.5142, "step": 121400 }, { "epoch": 6.030098341114533, "grad_norm": 0.1220703125, "learning_rate": 0.0003175961060892024, "loss": 0.5126, "step": 121410 }, { "epoch": 6.030595013410152, "grad_norm": 0.1572265625, "learning_rate": 0.0003175563723055528, "loss": 0.4588, "step": 121420 }, { "epoch": 6.031091685705771, "grad_norm": 0.166015625, "learning_rate": 0.00031751663852190327, "loss": 0.4973, "step": 121430 }, { "epoch": 6.0315883580013905, "grad_norm": 0.12158203125, "learning_rate": 0.00031747690473825374, "loss": 0.504, "step": 121440 }, { "epoch": 6.03208503029701, "grad_norm": 0.11669921875, "learning_rate": 0.00031743717095460415, "loss": 0.5137, "step": 121450 }, { "epoch": 6.03258170259263, "grad_norm": 0.1142578125, "learning_rate": 0.0003173974371709546, "loss": 0.4801, "step": 121460 }, { "epoch": 6.033078374888249, "grad_norm": 0.1259765625, "learning_rate": 0.00031735770338730504, "loss": 0.4453, "step": 121470 }, { "epoch": 6.033575047183868, "grad_norm": 0.1337890625, "learning_rate": 0.0003173179696036555, "loss": 0.5028, "step": 121480 }, { "epoch": 6.0340717194794875, "grad_norm": 0.12890625, "learning_rate": 0.000317278235820006, "loss": 0.5093, "step": 121490 }, { "epoch": 6.034568391775107, "grad_norm": 0.11669921875, "learning_rate": 0.0003172385020363564, "loss": 0.4924, "step": 121500 }, { "epoch": 6.035065064070726, "grad_norm": 0.140625, "learning_rate": 0.0003171987682527069, "loss": 0.5078, "step": 121510 }, { "epoch": 6.035561736366345, "grad_norm": 0.1240234375, "learning_rate": 0.00031715903446905735, "loss": 0.4967, "step": 121520 }, { "epoch": 6.036058408661964, "grad_norm": 0.1396484375, "learning_rate": 0.0003171193006854078, "loss": 0.4631, "step": 121530 }, { "epoch": 6.0365550809575845, "grad_norm": 0.1083984375, "learning_rate": 0.00031707956690175823, "loss": 0.5028, "step": 121540 }, { "epoch": 6.037051753253204, "grad_norm": 0.12060546875, "learning_rate": 0.00031703983311810865, "loss": 0.4688, "step": 121550 }, { "epoch": 6.037548425548823, "grad_norm": 0.1181640625, "learning_rate": 0.0003170000993344592, "loss": 0.4739, "step": 121560 }, { "epoch": 6.038045097844442, "grad_norm": 0.1376953125, "learning_rate": 0.0003169603655508096, "loss": 0.5197, "step": 121570 }, { "epoch": 6.038541770140061, "grad_norm": 0.17578125, "learning_rate": 0.00031692063176716, "loss": 0.5127, "step": 121580 }, { "epoch": 6.039038442435681, "grad_norm": 0.138671875, "learning_rate": 0.0003168808979835105, "loss": 0.4882, "step": 121590 }, { "epoch": 6.0395351147313, "grad_norm": 0.1298828125, "learning_rate": 0.00031684116419986095, "loss": 0.4512, "step": 121600 }, { "epoch": 6.04003178702692, "grad_norm": 0.138671875, "learning_rate": 0.0003168014304162114, "loss": 0.508, "step": 121610 }, { "epoch": 6.040528459322539, "grad_norm": 0.123046875, "learning_rate": 0.00031676169663256184, "loss": 0.4672, "step": 121620 }, { "epoch": 6.0410251316181585, "grad_norm": 0.154296875, "learning_rate": 0.0003167219628489123, "loss": 0.5086, "step": 121630 }, { "epoch": 6.041521803913778, "grad_norm": 0.1240234375, "learning_rate": 0.0003166822290652628, "loss": 0.4855, "step": 121640 }, { "epoch": 6.042018476209397, "grad_norm": 0.1416015625, "learning_rate": 0.0003166424952816132, "loss": 0.4929, "step": 121650 }, { "epoch": 6.042515148505016, "grad_norm": 0.16015625, "learning_rate": 0.0003166027614979636, "loss": 0.5249, "step": 121660 }, { "epoch": 6.043011820800635, "grad_norm": 0.142578125, "learning_rate": 0.00031656302771431415, "loss": 0.5053, "step": 121670 }, { "epoch": 6.0435084930962555, "grad_norm": 0.1337890625, "learning_rate": 0.00031652329393066456, "loss": 0.4981, "step": 121680 }, { "epoch": 6.044005165391875, "grad_norm": 0.1474609375, "learning_rate": 0.00031648356014701503, "loss": 0.5165, "step": 121690 }, { "epoch": 6.044501837687494, "grad_norm": 0.150390625, "learning_rate": 0.00031644382636336545, "loss": 0.4961, "step": 121700 }, { "epoch": 6.044998509983113, "grad_norm": 0.10888671875, "learning_rate": 0.0003164040925797159, "loss": 0.4909, "step": 121710 }, { "epoch": 6.045495182278732, "grad_norm": 0.11572265625, "learning_rate": 0.0003163643587960664, "loss": 0.4829, "step": 121720 }, { "epoch": 6.045991854574352, "grad_norm": 0.158203125, "learning_rate": 0.0003163246250124168, "loss": 0.4891, "step": 121730 }, { "epoch": 6.046488526869971, "grad_norm": 0.12353515625, "learning_rate": 0.0003162848912287673, "loss": 0.4948, "step": 121740 }, { "epoch": 6.046985199165591, "grad_norm": 0.138671875, "learning_rate": 0.00031624515744511775, "loss": 0.5221, "step": 121750 }, { "epoch": 6.04748187146121, "grad_norm": 0.1474609375, "learning_rate": 0.00031620542366146817, "loss": 0.4994, "step": 121760 }, { "epoch": 6.047978543756829, "grad_norm": 0.130859375, "learning_rate": 0.00031616568987781864, "loss": 0.5257, "step": 121770 }, { "epoch": 6.048475216052449, "grad_norm": 0.11865234375, "learning_rate": 0.00031612595609416906, "loss": 0.4944, "step": 121780 }, { "epoch": 6.048971888348068, "grad_norm": 0.1279296875, "learning_rate": 0.00031608622231051953, "loss": 0.5183, "step": 121790 }, { "epoch": 6.049468560643687, "grad_norm": 0.11376953125, "learning_rate": 0.00031604648852687, "loss": 0.4723, "step": 121800 }, { "epoch": 6.049965232939306, "grad_norm": 0.15625, "learning_rate": 0.0003160067547432204, "loss": 0.4915, "step": 121810 }, { "epoch": 6.0504619052349256, "grad_norm": 0.130859375, "learning_rate": 0.0003159670209595709, "loss": 0.4774, "step": 121820 }, { "epoch": 6.050958577530546, "grad_norm": 0.1357421875, "learning_rate": 0.00031592728717592136, "loss": 0.5147, "step": 121830 }, { "epoch": 6.051455249826165, "grad_norm": 0.13671875, "learning_rate": 0.0003158875533922718, "loss": 0.4558, "step": 121840 }, { "epoch": 6.051951922121784, "grad_norm": 0.1259765625, "learning_rate": 0.00031584781960862225, "loss": 0.456, "step": 121850 }, { "epoch": 6.052448594417403, "grad_norm": 0.1318359375, "learning_rate": 0.0003158080858249727, "loss": 0.4759, "step": 121860 }, { "epoch": 6.052945266713023, "grad_norm": 0.146484375, "learning_rate": 0.00031576835204132314, "loss": 0.4621, "step": 121870 }, { "epoch": 6.053441939008642, "grad_norm": 0.11962890625, "learning_rate": 0.0003157286182576736, "loss": 0.5008, "step": 121880 }, { "epoch": 6.053938611304261, "grad_norm": 0.146484375, "learning_rate": 0.000315688884474024, "loss": 0.5374, "step": 121890 }, { "epoch": 6.054435283599881, "grad_norm": 0.1357421875, "learning_rate": 0.0003156491506903745, "loss": 0.4992, "step": 121900 }, { "epoch": 6.0549319558955, "grad_norm": 0.12890625, "learning_rate": 0.00031560941690672497, "loss": 0.5023, "step": 121910 }, { "epoch": 6.05542862819112, "grad_norm": 0.1279296875, "learning_rate": 0.0003155696831230754, "loss": 0.5027, "step": 121920 }, { "epoch": 6.055925300486739, "grad_norm": 0.1328125, "learning_rate": 0.00031552994933942586, "loss": 0.4602, "step": 121930 }, { "epoch": 6.056421972782358, "grad_norm": 0.126953125, "learning_rate": 0.00031549021555577633, "loss": 0.4808, "step": 121940 }, { "epoch": 6.056918645077977, "grad_norm": 0.12109375, "learning_rate": 0.00031545048177212675, "loss": 0.4805, "step": 121950 }, { "epoch": 6.0574153173735965, "grad_norm": 0.11962890625, "learning_rate": 0.0003154107479884772, "loss": 0.5113, "step": 121960 }, { "epoch": 6.057911989669217, "grad_norm": 0.1220703125, "learning_rate": 0.0003153710142048277, "loss": 0.4748, "step": 121970 }, { "epoch": 6.058408661964836, "grad_norm": 0.12158203125, "learning_rate": 0.00031533128042117816, "loss": 0.4888, "step": 121980 }, { "epoch": 6.058905334260455, "grad_norm": 0.12451171875, "learning_rate": 0.0003152915466375286, "loss": 0.4952, "step": 121990 }, { "epoch": 6.059402006556074, "grad_norm": 0.1875, "learning_rate": 0.000315251812853879, "loss": 0.4945, "step": 122000 }, { "epoch": 6.0598986788516935, "grad_norm": 0.1220703125, "learning_rate": 0.00031521207907022947, "loss": 0.4795, "step": 122010 }, { "epoch": 6.060395351147313, "grad_norm": 0.142578125, "learning_rate": 0.00031517234528657994, "loss": 0.4664, "step": 122020 }, { "epoch": 6.060892023442932, "grad_norm": 0.12158203125, "learning_rate": 0.00031513261150293035, "loss": 0.4937, "step": 122030 }, { "epoch": 6.061388695738552, "grad_norm": 0.1630859375, "learning_rate": 0.0003150928777192808, "loss": 0.5135, "step": 122040 }, { "epoch": 6.061885368034171, "grad_norm": 0.1279296875, "learning_rate": 0.0003150531439356313, "loss": 0.4935, "step": 122050 }, { "epoch": 6.0623820403297906, "grad_norm": 0.1630859375, "learning_rate": 0.00031501341015198177, "loss": 0.4765, "step": 122060 }, { "epoch": 6.06287871262541, "grad_norm": 0.1123046875, "learning_rate": 0.0003149736763683322, "loss": 0.4895, "step": 122070 }, { "epoch": 6.063375384921029, "grad_norm": 0.1455078125, "learning_rate": 0.0003149339425846826, "loss": 0.4835, "step": 122080 }, { "epoch": 6.063872057216648, "grad_norm": 0.15625, "learning_rate": 0.00031489420880103313, "loss": 0.4845, "step": 122090 }, { "epoch": 6.0643687295122675, "grad_norm": 0.11474609375, "learning_rate": 0.00031485447501738355, "loss": 0.4877, "step": 122100 }, { "epoch": 6.064865401807888, "grad_norm": 0.1220703125, "learning_rate": 0.00031481474123373396, "loss": 0.5008, "step": 122110 }, { "epoch": 6.065362074103507, "grad_norm": 0.1298828125, "learning_rate": 0.00031477500745008443, "loss": 0.5, "step": 122120 }, { "epoch": 6.065858746399126, "grad_norm": 0.134765625, "learning_rate": 0.0003147352736664349, "loss": 0.49, "step": 122130 }, { "epoch": 6.066355418694745, "grad_norm": 0.11962890625, "learning_rate": 0.0003146955398827854, "loss": 0.4981, "step": 122140 }, { "epoch": 6.0668520909903645, "grad_norm": 0.12890625, "learning_rate": 0.0003146558060991358, "loss": 0.4632, "step": 122150 }, { "epoch": 6.067348763285984, "grad_norm": 0.11474609375, "learning_rate": 0.00031461607231548627, "loss": 0.4868, "step": 122160 }, { "epoch": 6.067845435581603, "grad_norm": 0.126953125, "learning_rate": 0.00031457633853183674, "loss": 0.4793, "step": 122170 }, { "epoch": 6.068342107877223, "grad_norm": 0.1767578125, "learning_rate": 0.00031453660474818715, "loss": 0.5122, "step": 122180 }, { "epoch": 6.068838780172842, "grad_norm": 0.1328125, "learning_rate": 0.00031449687096453757, "loss": 0.4774, "step": 122190 }, { "epoch": 6.0693354524684615, "grad_norm": 0.134765625, "learning_rate": 0.0003144571371808881, "loss": 0.4966, "step": 122200 }, { "epoch": 6.069832124764081, "grad_norm": 0.1416015625, "learning_rate": 0.0003144174033972385, "loss": 0.4972, "step": 122210 }, { "epoch": 6.0703287970597, "grad_norm": 0.16015625, "learning_rate": 0.000314377669613589, "loss": 0.5048, "step": 122220 }, { "epoch": 6.070825469355319, "grad_norm": 0.123046875, "learning_rate": 0.0003143379358299394, "loss": 0.4965, "step": 122230 }, { "epoch": 6.071322141650938, "grad_norm": 0.1240234375, "learning_rate": 0.0003142982020462899, "loss": 0.489, "step": 122240 }, { "epoch": 6.071818813946558, "grad_norm": 0.1416015625, "learning_rate": 0.00031425846826264035, "loss": 0.4829, "step": 122250 }, { "epoch": 6.072315486242178, "grad_norm": 0.11767578125, "learning_rate": 0.00031421873447899076, "loss": 0.4837, "step": 122260 }, { "epoch": 6.072812158537797, "grad_norm": 0.162109375, "learning_rate": 0.00031417900069534123, "loss": 0.4843, "step": 122270 }, { "epoch": 6.073308830833416, "grad_norm": 0.138671875, "learning_rate": 0.0003141392669116917, "loss": 0.4852, "step": 122280 }, { "epoch": 6.073805503129035, "grad_norm": 0.11767578125, "learning_rate": 0.0003140995331280421, "loss": 0.502, "step": 122290 }, { "epoch": 6.074302175424655, "grad_norm": 0.12158203125, "learning_rate": 0.0003140597993443926, "loss": 0.4878, "step": 122300 }, { "epoch": 6.074798847720274, "grad_norm": 0.138671875, "learning_rate": 0.000314020065560743, "loss": 0.4589, "step": 122310 }, { "epoch": 6.075295520015893, "grad_norm": 0.1552734375, "learning_rate": 0.0003139803317770935, "loss": 0.4846, "step": 122320 }, { "epoch": 6.075792192311513, "grad_norm": 0.142578125, "learning_rate": 0.00031394059799344395, "loss": 0.4871, "step": 122330 }, { "epoch": 6.0762888646071325, "grad_norm": 0.1279296875, "learning_rate": 0.00031390086420979437, "loss": 0.4678, "step": 122340 }, { "epoch": 6.076785536902752, "grad_norm": 0.134765625, "learning_rate": 0.00031386113042614484, "loss": 0.4907, "step": 122350 }, { "epoch": 6.077282209198371, "grad_norm": 0.1318359375, "learning_rate": 0.0003138213966424953, "loss": 0.4916, "step": 122360 }, { "epoch": 6.07777888149399, "grad_norm": 0.14453125, "learning_rate": 0.00031378166285884573, "loss": 0.482, "step": 122370 }, { "epoch": 6.078275553789609, "grad_norm": 0.134765625, "learning_rate": 0.0003137419290751962, "loss": 0.518, "step": 122380 }, { "epoch": 6.078772226085229, "grad_norm": 0.11767578125, "learning_rate": 0.0003137021952915467, "loss": 0.5041, "step": 122390 }, { "epoch": 6.079268898380849, "grad_norm": 0.1259765625, "learning_rate": 0.0003136624615078971, "loss": 0.4746, "step": 122400 }, { "epoch": 6.079765570676468, "grad_norm": 0.13671875, "learning_rate": 0.00031362272772424756, "loss": 0.5145, "step": 122410 }, { "epoch": 6.080262242972087, "grad_norm": 0.11767578125, "learning_rate": 0.000313582993940598, "loss": 0.4585, "step": 122420 }, { "epoch": 6.080758915267706, "grad_norm": 0.125, "learning_rate": 0.0003135432601569485, "loss": 0.4651, "step": 122430 }, { "epoch": 6.081255587563326, "grad_norm": 0.130859375, "learning_rate": 0.0003135035263732989, "loss": 0.4972, "step": 122440 }, { "epoch": 6.081752259858945, "grad_norm": 0.12890625, "learning_rate": 0.00031346379258964934, "loss": 0.4813, "step": 122450 }, { "epoch": 6.082248932154564, "grad_norm": 0.12353515625, "learning_rate": 0.0003134240588059998, "loss": 0.5177, "step": 122460 }, { "epoch": 6.082745604450184, "grad_norm": 0.12158203125, "learning_rate": 0.0003133843250223503, "loss": 0.4936, "step": 122470 }, { "epoch": 6.083242276745803, "grad_norm": 0.1220703125, "learning_rate": 0.0003133445912387007, "loss": 0.4782, "step": 122480 }, { "epoch": 6.083738949041423, "grad_norm": 0.13671875, "learning_rate": 0.00031330485745505117, "loss": 0.4816, "step": 122490 }, { "epoch": 6.084235621337042, "grad_norm": 0.1298828125, "learning_rate": 0.00031326512367140164, "loss": 0.4851, "step": 122500 }, { "epoch": 6.084732293632661, "grad_norm": 0.12890625, "learning_rate": 0.0003132253898877521, "loss": 0.5047, "step": 122510 }, { "epoch": 6.08522896592828, "grad_norm": 0.1748046875, "learning_rate": 0.00031318565610410253, "loss": 0.4938, "step": 122520 }, { "epoch": 6.0857256382239, "grad_norm": 0.1279296875, "learning_rate": 0.00031314592232045295, "loss": 0.4875, "step": 122530 }, { "epoch": 6.086222310519519, "grad_norm": 0.1201171875, "learning_rate": 0.00031310618853680347, "loss": 0.5155, "step": 122540 }, { "epoch": 6.086718982815139, "grad_norm": 0.1298828125, "learning_rate": 0.0003130664547531539, "loss": 0.4958, "step": 122550 }, { "epoch": 6.087215655110758, "grad_norm": 0.12890625, "learning_rate": 0.0003130267209695043, "loss": 0.4881, "step": 122560 }, { "epoch": 6.087712327406377, "grad_norm": 0.162109375, "learning_rate": 0.0003129869871858548, "loss": 0.4943, "step": 122570 }, { "epoch": 6.088208999701997, "grad_norm": 0.1328125, "learning_rate": 0.00031294725340220525, "loss": 0.5165, "step": 122580 }, { "epoch": 6.088705671997616, "grad_norm": 0.1396484375, "learning_rate": 0.0003129075196185557, "loss": 0.5117, "step": 122590 }, { "epoch": 6.089202344293235, "grad_norm": 0.1298828125, "learning_rate": 0.00031286778583490614, "loss": 0.5006, "step": 122600 }, { "epoch": 6.089699016588854, "grad_norm": 0.1328125, "learning_rate": 0.00031282805205125656, "loss": 0.4859, "step": 122610 }, { "epoch": 6.090195688884474, "grad_norm": 0.1767578125, "learning_rate": 0.0003127883182676071, "loss": 0.522, "step": 122620 }, { "epoch": 6.090692361180094, "grad_norm": 0.1279296875, "learning_rate": 0.0003127485844839575, "loss": 0.5042, "step": 122630 }, { "epoch": 6.091189033475713, "grad_norm": 0.1181640625, "learning_rate": 0.0003127088507003079, "loss": 0.4801, "step": 122640 }, { "epoch": 6.091685705771332, "grad_norm": 0.1318359375, "learning_rate": 0.0003126691169166584, "loss": 0.4733, "step": 122650 }, { "epoch": 6.092182378066951, "grad_norm": 0.16015625, "learning_rate": 0.00031262938313300886, "loss": 0.497, "step": 122660 }, { "epoch": 6.0926790503625705, "grad_norm": 0.1416015625, "learning_rate": 0.00031258964934935933, "loss": 0.4845, "step": 122670 }, { "epoch": 6.09317572265819, "grad_norm": 0.1259765625, "learning_rate": 0.00031254991556570975, "loss": 0.4707, "step": 122680 }, { "epoch": 6.09367239495381, "grad_norm": 0.12255859375, "learning_rate": 0.0003125101817820602, "loss": 0.4533, "step": 122690 }, { "epoch": 6.094169067249429, "grad_norm": 0.1357421875, "learning_rate": 0.0003124704479984107, "loss": 0.5105, "step": 122700 }, { "epoch": 6.094665739545048, "grad_norm": 0.1025390625, "learning_rate": 0.0003124307142147611, "loss": 0.4662, "step": 122710 }, { "epoch": 6.0951624118406675, "grad_norm": 0.1259765625, "learning_rate": 0.0003123909804311115, "loss": 0.474, "step": 122720 }, { "epoch": 6.095659084136287, "grad_norm": 0.15234375, "learning_rate": 0.00031235124664746205, "loss": 0.4727, "step": 122730 }, { "epoch": 6.096155756431906, "grad_norm": 0.1435546875, "learning_rate": 0.00031231151286381247, "loss": 0.4855, "step": 122740 }, { "epoch": 6.096652428727525, "grad_norm": 0.125, "learning_rate": 0.00031227177908016294, "loss": 0.4932, "step": 122750 }, { "epoch": 6.097149101023145, "grad_norm": 0.125, "learning_rate": 0.00031223204529651335, "loss": 0.4787, "step": 122760 }, { "epoch": 6.097645773318765, "grad_norm": 0.1328125, "learning_rate": 0.0003121923115128638, "loss": 0.4811, "step": 122770 }, { "epoch": 6.098142445614384, "grad_norm": 0.119140625, "learning_rate": 0.0003121525777292143, "loss": 0.5072, "step": 122780 }, { "epoch": 6.098639117910003, "grad_norm": 0.1279296875, "learning_rate": 0.0003121128439455647, "loss": 0.4753, "step": 122790 }, { "epoch": 6.099135790205622, "grad_norm": 0.1865234375, "learning_rate": 0.0003120731101619152, "loss": 0.4942, "step": 122800 }, { "epoch": 6.0996324625012415, "grad_norm": 0.134765625, "learning_rate": 0.00031203337637826566, "loss": 0.5047, "step": 122810 }, { "epoch": 6.100129134796861, "grad_norm": 0.12890625, "learning_rate": 0.0003119936425946161, "loss": 0.4998, "step": 122820 }, { "epoch": 6.100625807092481, "grad_norm": 0.12353515625, "learning_rate": 0.00031195390881096655, "loss": 0.5084, "step": 122830 }, { "epoch": 6.1011224793881, "grad_norm": 0.12890625, "learning_rate": 0.000311914175027317, "loss": 0.4893, "step": 122840 }, { "epoch": 6.101619151683719, "grad_norm": 0.12353515625, "learning_rate": 0.00031187444124366743, "loss": 0.4915, "step": 122850 }, { "epoch": 6.1021158239793385, "grad_norm": 0.11767578125, "learning_rate": 0.0003118347074600179, "loss": 0.4839, "step": 122860 }, { "epoch": 6.102612496274958, "grad_norm": 0.1416015625, "learning_rate": 0.0003117949736763683, "loss": 0.4837, "step": 122870 }, { "epoch": 6.103109168570577, "grad_norm": 0.130859375, "learning_rate": 0.0003117552398927188, "loss": 0.5083, "step": 122880 }, { "epoch": 6.103605840866196, "grad_norm": 0.1259765625, "learning_rate": 0.00031171550610906927, "loss": 0.477, "step": 122890 }, { "epoch": 6.104102513161815, "grad_norm": 0.1279296875, "learning_rate": 0.0003116757723254197, "loss": 0.4669, "step": 122900 }, { "epoch": 6.1045991854574355, "grad_norm": 0.158203125, "learning_rate": 0.00031163603854177015, "loss": 0.5053, "step": 122910 }, { "epoch": 6.105095857753055, "grad_norm": 0.12890625, "learning_rate": 0.0003115963047581206, "loss": 0.4852, "step": 122920 }, { "epoch": 6.105592530048674, "grad_norm": 0.2294921875, "learning_rate": 0.00031155657097447104, "loss": 0.4933, "step": 122930 }, { "epoch": 6.106089202344293, "grad_norm": 0.15625, "learning_rate": 0.0003115168371908215, "loss": 0.4907, "step": 122940 }, { "epoch": 6.106585874639912, "grad_norm": 0.1396484375, "learning_rate": 0.00031147710340717193, "loss": 0.473, "step": 122950 }, { "epoch": 6.107082546935532, "grad_norm": 0.1259765625, "learning_rate": 0.00031143736962352246, "loss": 0.4875, "step": 122960 }, { "epoch": 6.107579219231151, "grad_norm": 0.1357421875, "learning_rate": 0.0003113976358398729, "loss": 0.4991, "step": 122970 }, { "epoch": 6.108075891526771, "grad_norm": 0.1533203125, "learning_rate": 0.0003113579020562233, "loss": 0.5012, "step": 122980 }, { "epoch": 6.10857256382239, "grad_norm": 0.12158203125, "learning_rate": 0.00031131816827257376, "loss": 0.4928, "step": 122990 }, { "epoch": 6.1090692361180094, "grad_norm": 0.203125, "learning_rate": 0.00031127843448892423, "loss": 0.5107, "step": 123000 }, { "epoch": 6.109565908413629, "grad_norm": 0.126953125, "learning_rate": 0.00031123870070527465, "loss": 0.5096, "step": 123010 }, { "epoch": 6.110062580709248, "grad_norm": 0.1220703125, "learning_rate": 0.0003111989669216251, "loss": 0.5067, "step": 123020 }, { "epoch": 6.110559253004867, "grad_norm": 0.11328125, "learning_rate": 0.0003111592331379756, "loss": 0.4906, "step": 123030 }, { "epoch": 6.111055925300486, "grad_norm": 0.1796875, "learning_rate": 0.00031111949935432606, "loss": 0.5247, "step": 123040 }, { "epoch": 6.1115525975961065, "grad_norm": 0.11376953125, "learning_rate": 0.0003110797655706765, "loss": 0.4742, "step": 123050 }, { "epoch": 6.112049269891726, "grad_norm": 0.12890625, "learning_rate": 0.0003110400317870269, "loss": 0.5073, "step": 123060 }, { "epoch": 6.112545942187345, "grad_norm": 0.1337890625, "learning_rate": 0.0003110002980033774, "loss": 0.5044, "step": 123070 }, { "epoch": 6.113042614482964, "grad_norm": 0.1396484375, "learning_rate": 0.00031096056421972784, "loss": 0.4577, "step": 123080 }, { "epoch": 6.113539286778583, "grad_norm": 0.1357421875, "learning_rate": 0.00031092083043607826, "loss": 0.5072, "step": 123090 }, { "epoch": 6.114035959074203, "grad_norm": 0.2177734375, "learning_rate": 0.00031088109665242873, "loss": 0.4861, "step": 123100 }, { "epoch": 6.114532631369822, "grad_norm": 0.203125, "learning_rate": 0.0003108413628687792, "loss": 0.4976, "step": 123110 }, { "epoch": 6.115029303665442, "grad_norm": 0.1123046875, "learning_rate": 0.00031080162908512967, "loss": 0.5007, "step": 123120 }, { "epoch": 6.115525975961061, "grad_norm": 0.134765625, "learning_rate": 0.0003107618953014801, "loss": 0.508, "step": 123130 }, { "epoch": 6.11602264825668, "grad_norm": 0.1318359375, "learning_rate": 0.00031072216151783056, "loss": 0.4942, "step": 123140 }, { "epoch": 6.1165193205523, "grad_norm": 0.1376953125, "learning_rate": 0.00031068242773418103, "loss": 0.4963, "step": 123150 }, { "epoch": 6.117015992847919, "grad_norm": 0.1552734375, "learning_rate": 0.00031064269395053145, "loss": 0.5308, "step": 123160 }, { "epoch": 6.117512665143538, "grad_norm": 0.138671875, "learning_rate": 0.00031060296016688187, "loss": 0.513, "step": 123170 }, { "epoch": 6.118009337439157, "grad_norm": 0.1767578125, "learning_rate": 0.00031056322638323234, "loss": 0.5011, "step": 123180 }, { "epoch": 6.118506009734777, "grad_norm": 0.1416015625, "learning_rate": 0.0003105234925995828, "loss": 0.4966, "step": 123190 }, { "epoch": 6.119002682030397, "grad_norm": 0.1328125, "learning_rate": 0.0003104837588159333, "loss": 0.5091, "step": 123200 }, { "epoch": 6.119499354326016, "grad_norm": 0.138671875, "learning_rate": 0.0003104440250322837, "loss": 0.5286, "step": 123210 }, { "epoch": 6.119996026621635, "grad_norm": 0.154296875, "learning_rate": 0.00031040429124863417, "loss": 0.5122, "step": 123220 }, { "epoch": 6.120492698917254, "grad_norm": 0.125, "learning_rate": 0.00031036455746498464, "loss": 0.4795, "step": 123230 }, { "epoch": 6.120989371212874, "grad_norm": 0.1455078125, "learning_rate": 0.00031032482368133506, "loss": 0.4663, "step": 123240 }, { "epoch": 6.121486043508493, "grad_norm": 0.130859375, "learning_rate": 0.00031028508989768553, "loss": 0.5169, "step": 123250 }, { "epoch": 6.121982715804112, "grad_norm": 0.130859375, "learning_rate": 0.000310245356114036, "loss": 0.5197, "step": 123260 }, { "epoch": 6.122479388099732, "grad_norm": 0.115234375, "learning_rate": 0.0003102056223303864, "loss": 0.4805, "step": 123270 }, { "epoch": 6.122976060395351, "grad_norm": 0.119140625, "learning_rate": 0.0003101658885467369, "loss": 0.478, "step": 123280 }, { "epoch": 6.123472732690971, "grad_norm": 0.11962890625, "learning_rate": 0.0003101261547630873, "loss": 0.507, "step": 123290 }, { "epoch": 6.12396940498659, "grad_norm": 0.1220703125, "learning_rate": 0.0003100864209794378, "loss": 0.4978, "step": 123300 }, { "epoch": 6.124466077282209, "grad_norm": 0.13671875, "learning_rate": 0.00031004668719578825, "loss": 0.4718, "step": 123310 }, { "epoch": 6.124962749577828, "grad_norm": 0.13671875, "learning_rate": 0.00031000695341213867, "loss": 0.4664, "step": 123320 }, { "epoch": 6.1254594218734475, "grad_norm": 0.1279296875, "learning_rate": 0.00030996721962848914, "loss": 0.4814, "step": 123330 }, { "epoch": 6.125956094169068, "grad_norm": 0.12890625, "learning_rate": 0.0003099274858448396, "loss": 0.5186, "step": 123340 }, { "epoch": 6.126452766464687, "grad_norm": 0.1650390625, "learning_rate": 0.00030988775206119, "loss": 0.4886, "step": 123350 }, { "epoch": 6.126949438760306, "grad_norm": 0.11328125, "learning_rate": 0.0003098480182775405, "loss": 0.514, "step": 123360 }, { "epoch": 6.127446111055925, "grad_norm": 0.1484375, "learning_rate": 0.00030980828449389097, "loss": 0.4787, "step": 123370 }, { "epoch": 6.1279427833515445, "grad_norm": 0.1279296875, "learning_rate": 0.0003097685507102414, "loss": 0.4772, "step": 123380 }, { "epoch": 6.128439455647164, "grad_norm": 0.1298828125, "learning_rate": 0.00030972881692659186, "loss": 0.4977, "step": 123390 }, { "epoch": 6.128936127942783, "grad_norm": 0.1396484375, "learning_rate": 0.0003096890831429423, "loss": 0.4828, "step": 123400 }, { "epoch": 6.129432800238403, "grad_norm": 0.12353515625, "learning_rate": 0.0003096493493592928, "loss": 0.5129, "step": 123410 }, { "epoch": 6.129929472534022, "grad_norm": 0.1259765625, "learning_rate": 0.0003096096155756432, "loss": 0.4896, "step": 123420 }, { "epoch": 6.1304261448296415, "grad_norm": 0.12890625, "learning_rate": 0.00030956988179199363, "loss": 0.5021, "step": 123430 }, { "epoch": 6.130922817125261, "grad_norm": 0.1416015625, "learning_rate": 0.0003095301480083441, "loss": 0.5188, "step": 123440 }, { "epoch": 6.13141948942088, "grad_norm": 0.1376953125, "learning_rate": 0.0003094904142246946, "loss": 0.4963, "step": 123450 }, { "epoch": 6.131916161716499, "grad_norm": 0.1279296875, "learning_rate": 0.000309450680441045, "loss": 0.4983, "step": 123460 }, { "epoch": 6.1324128340121185, "grad_norm": 0.12060546875, "learning_rate": 0.00030941094665739547, "loss": 0.4857, "step": 123470 }, { "epoch": 6.132909506307739, "grad_norm": 0.138671875, "learning_rate": 0.0003093712128737459, "loss": 0.4902, "step": 123480 }, { "epoch": 6.133406178603358, "grad_norm": 0.1181640625, "learning_rate": 0.0003093314790900964, "loss": 0.4763, "step": 123490 }, { "epoch": 6.133902850898977, "grad_norm": 0.12158203125, "learning_rate": 0.0003092917453064468, "loss": 0.4937, "step": 123500 }, { "epoch": 6.134399523194596, "grad_norm": 0.142578125, "learning_rate": 0.00030925201152279724, "loss": 0.4926, "step": 123510 }, { "epoch": 6.1348961954902155, "grad_norm": 0.11962890625, "learning_rate": 0.0003092122777391477, "loss": 0.5156, "step": 123520 }, { "epoch": 6.135392867785835, "grad_norm": 0.115234375, "learning_rate": 0.0003091725439554982, "loss": 0.4528, "step": 123530 }, { "epoch": 6.135889540081454, "grad_norm": 0.12060546875, "learning_rate": 0.0003091328101718486, "loss": 0.4661, "step": 123540 }, { "epoch": 6.136386212377074, "grad_norm": 0.1396484375, "learning_rate": 0.0003090930763881991, "loss": 0.4997, "step": 123550 }, { "epoch": 6.136882884672693, "grad_norm": 0.125, "learning_rate": 0.00030905334260454954, "loss": 0.5024, "step": 123560 }, { "epoch": 6.1373795569683125, "grad_norm": 0.1474609375, "learning_rate": 0.0003090136088209, "loss": 0.5092, "step": 123570 }, { "epoch": 6.137876229263932, "grad_norm": 0.1259765625, "learning_rate": 0.00030897387503725043, "loss": 0.462, "step": 123580 }, { "epoch": 6.138372901559551, "grad_norm": 0.11962890625, "learning_rate": 0.00030893414125360085, "loss": 0.5407, "step": 123590 }, { "epoch": 6.13886957385517, "grad_norm": 0.15234375, "learning_rate": 0.0003088944074699514, "loss": 0.4835, "step": 123600 }, { "epoch": 6.139366246150789, "grad_norm": 0.16015625, "learning_rate": 0.0003088546736863018, "loss": 0.5255, "step": 123610 }, { "epoch": 6.139862918446409, "grad_norm": 0.12451171875, "learning_rate": 0.0003088149399026522, "loss": 0.4959, "step": 123620 }, { "epoch": 6.140359590742029, "grad_norm": 0.126953125, "learning_rate": 0.0003087752061190027, "loss": 0.4974, "step": 123630 }, { "epoch": 6.140856263037648, "grad_norm": 0.158203125, "learning_rate": 0.00030873547233535315, "loss": 0.4779, "step": 123640 }, { "epoch": 6.141352935333267, "grad_norm": 0.1591796875, "learning_rate": 0.0003086957385517036, "loss": 0.51, "step": 123650 }, { "epoch": 6.141849607628886, "grad_norm": 0.150390625, "learning_rate": 0.00030865600476805404, "loss": 0.4807, "step": 123660 }, { "epoch": 6.142346279924506, "grad_norm": 0.130859375, "learning_rate": 0.0003086162709844045, "loss": 0.482, "step": 123670 }, { "epoch": 6.142842952220125, "grad_norm": 0.1318359375, "learning_rate": 0.000308576537200755, "loss": 0.4833, "step": 123680 }, { "epoch": 6.143339624515744, "grad_norm": 0.12060546875, "learning_rate": 0.0003085368034171054, "loss": 0.4663, "step": 123690 }, { "epoch": 6.143836296811364, "grad_norm": 0.14453125, "learning_rate": 0.00030849706963345587, "loss": 0.5121, "step": 123700 }, { "epoch": 6.1443329691069835, "grad_norm": 0.15234375, "learning_rate": 0.0003084573358498063, "loss": 0.5233, "step": 123710 }, { "epoch": 6.144829641402603, "grad_norm": 0.1298828125, "learning_rate": 0.00030841760206615676, "loss": 0.4788, "step": 123720 }, { "epoch": 6.145326313698222, "grad_norm": 0.1220703125, "learning_rate": 0.00030837786828250723, "loss": 0.5001, "step": 123730 }, { "epoch": 6.145822985993841, "grad_norm": 0.1259765625, "learning_rate": 0.00030833813449885765, "loss": 0.4717, "step": 123740 }, { "epoch": 6.14631965828946, "grad_norm": 0.1337890625, "learning_rate": 0.0003082984007152081, "loss": 0.4938, "step": 123750 }, { "epoch": 6.14681633058508, "grad_norm": 0.1416015625, "learning_rate": 0.0003082586669315586, "loss": 0.4876, "step": 123760 }, { "epoch": 6.1473130028807, "grad_norm": 0.1318359375, "learning_rate": 0.000308218933147909, "loss": 0.5093, "step": 123770 }, { "epoch": 6.147809675176319, "grad_norm": 0.11572265625, "learning_rate": 0.0003081791993642595, "loss": 0.4692, "step": 123780 }, { "epoch": 6.148306347471938, "grad_norm": 0.1474609375, "learning_rate": 0.00030813946558060995, "loss": 0.5277, "step": 123790 }, { "epoch": 6.148803019767557, "grad_norm": 0.126953125, "learning_rate": 0.00030809973179696037, "loss": 0.4778, "step": 123800 }, { "epoch": 6.149299692063177, "grad_norm": 0.1611328125, "learning_rate": 0.00030805999801331084, "loss": 0.4829, "step": 123810 }, { "epoch": 6.149796364358796, "grad_norm": 0.126953125, "learning_rate": 0.00030802026422966126, "loss": 0.5135, "step": 123820 }, { "epoch": 6.150293036654415, "grad_norm": 0.12353515625, "learning_rate": 0.00030798053044601173, "loss": 0.4854, "step": 123830 }, { "epoch": 6.150789708950035, "grad_norm": 0.1298828125, "learning_rate": 0.0003079407966623622, "loss": 0.4766, "step": 123840 }, { "epoch": 6.151286381245654, "grad_norm": 0.1298828125, "learning_rate": 0.0003079010628787126, "loss": 0.4788, "step": 123850 }, { "epoch": 6.151783053541274, "grad_norm": 0.12890625, "learning_rate": 0.0003078613290950631, "loss": 0.4611, "step": 123860 }, { "epoch": 6.152279725836893, "grad_norm": 0.140625, "learning_rate": 0.00030782159531141356, "loss": 0.4839, "step": 123870 }, { "epoch": 6.152776398132512, "grad_norm": 0.126953125, "learning_rate": 0.000307781861527764, "loss": 0.4894, "step": 123880 }, { "epoch": 6.153273070428131, "grad_norm": 0.1279296875, "learning_rate": 0.00030774212774411445, "loss": 0.5046, "step": 123890 }, { "epoch": 6.1537697427237505, "grad_norm": 0.1357421875, "learning_rate": 0.0003077023939604649, "loss": 0.4849, "step": 123900 }, { "epoch": 6.15426641501937, "grad_norm": 0.1708984375, "learning_rate": 0.00030766266017681534, "loss": 0.4949, "step": 123910 }, { "epoch": 6.15476308731499, "grad_norm": 0.126953125, "learning_rate": 0.0003076229263931658, "loss": 0.4976, "step": 123920 }, { "epoch": 6.155259759610609, "grad_norm": 0.162109375, "learning_rate": 0.0003075831926095162, "loss": 0.4847, "step": 123930 }, { "epoch": 6.155756431906228, "grad_norm": 0.1162109375, "learning_rate": 0.00030754345882586675, "loss": 0.479, "step": 123940 }, { "epoch": 6.156253104201848, "grad_norm": 0.12890625, "learning_rate": 0.00030750372504221717, "loss": 0.5073, "step": 123950 }, { "epoch": 6.156749776497467, "grad_norm": 0.1318359375, "learning_rate": 0.0003074639912585676, "loss": 0.5041, "step": 123960 }, { "epoch": 6.157246448793086, "grad_norm": 0.150390625, "learning_rate": 0.00030742425747491806, "loss": 0.4983, "step": 123970 }, { "epoch": 6.157743121088705, "grad_norm": 0.1435546875, "learning_rate": 0.00030738452369126853, "loss": 0.4878, "step": 123980 }, { "epoch": 6.158239793384325, "grad_norm": 0.130859375, "learning_rate": 0.00030734478990761895, "loss": 0.4951, "step": 123990 }, { "epoch": 6.158736465679945, "grad_norm": 0.1435546875, "learning_rate": 0.0003073050561239694, "loss": 0.515, "step": 124000 }, { "epoch": 6.159233137975564, "grad_norm": 0.1513671875, "learning_rate": 0.00030726532234031983, "loss": 0.4662, "step": 124010 }, { "epoch": 6.159729810271183, "grad_norm": 0.13671875, "learning_rate": 0.00030722558855667036, "loss": 0.4846, "step": 124020 }, { "epoch": 6.160226482566802, "grad_norm": 0.2216796875, "learning_rate": 0.0003071858547730208, "loss": 0.4826, "step": 124030 }, { "epoch": 6.1607231548624215, "grad_norm": 0.1357421875, "learning_rate": 0.0003071461209893712, "loss": 0.5076, "step": 124040 }, { "epoch": 6.161219827158041, "grad_norm": 0.1328125, "learning_rate": 0.00030710638720572167, "loss": 0.4848, "step": 124050 }, { "epoch": 6.161716499453661, "grad_norm": 0.16796875, "learning_rate": 0.00030706665342207214, "loss": 0.4963, "step": 124060 }, { "epoch": 6.16221317174928, "grad_norm": 0.134765625, "learning_rate": 0.0003070269196384226, "loss": 0.4908, "step": 124070 }, { "epoch": 6.162709844044899, "grad_norm": 0.142578125, "learning_rate": 0.000306987185854773, "loss": 0.5055, "step": 124080 }, { "epoch": 6.1632065163405185, "grad_norm": 0.12451171875, "learning_rate": 0.0003069474520711235, "loss": 0.5082, "step": 124090 }, { "epoch": 6.163703188636138, "grad_norm": 0.1513671875, "learning_rate": 0.00030690771828747397, "loss": 0.5017, "step": 124100 }, { "epoch": 6.164199860931757, "grad_norm": 0.1376953125, "learning_rate": 0.0003068679845038244, "loss": 0.5244, "step": 124110 }, { "epoch": 6.164696533227376, "grad_norm": 0.123046875, "learning_rate": 0.0003068282507201748, "loss": 0.4966, "step": 124120 }, { "epoch": 6.165193205522996, "grad_norm": 0.11767578125, "learning_rate": 0.00030678851693652533, "loss": 0.4702, "step": 124130 }, { "epoch": 6.1656898778186155, "grad_norm": 0.1826171875, "learning_rate": 0.00030674878315287575, "loss": 0.4807, "step": 124140 }, { "epoch": 6.166186550114235, "grad_norm": 0.1318359375, "learning_rate": 0.0003067090493692262, "loss": 0.4972, "step": 124150 }, { "epoch": 6.166683222409854, "grad_norm": 0.1298828125, "learning_rate": 0.00030666931558557663, "loss": 0.4697, "step": 124160 }, { "epoch": 6.167179894705473, "grad_norm": 0.1259765625, "learning_rate": 0.0003066295818019271, "loss": 0.4771, "step": 124170 }, { "epoch": 6.1676765670010925, "grad_norm": 0.1552734375, "learning_rate": 0.0003065898480182776, "loss": 0.4711, "step": 124180 }, { "epoch": 6.168173239296712, "grad_norm": 0.126953125, "learning_rate": 0.000306550114234628, "loss": 0.4897, "step": 124190 }, { "epoch": 6.168669911592332, "grad_norm": 0.1376953125, "learning_rate": 0.00030651038045097846, "loss": 0.4892, "step": 124200 }, { "epoch": 6.169166583887951, "grad_norm": 0.126953125, "learning_rate": 0.00030647064666732894, "loss": 0.5014, "step": 124210 }, { "epoch": 6.16966325618357, "grad_norm": 0.150390625, "learning_rate": 0.00030643091288367935, "loss": 0.5003, "step": 124220 }, { "epoch": 6.1701599284791895, "grad_norm": 0.2001953125, "learning_rate": 0.0003063911791000298, "loss": 0.4752, "step": 124230 }, { "epoch": 6.170656600774809, "grad_norm": 0.1416015625, "learning_rate": 0.0003063514453163803, "loss": 0.5083, "step": 124240 }, { "epoch": 6.171153273070428, "grad_norm": 0.1240234375, "learning_rate": 0.0003063117115327307, "loss": 0.487, "step": 124250 }, { "epoch": 6.171649945366047, "grad_norm": 0.12255859375, "learning_rate": 0.0003062719777490812, "loss": 0.4781, "step": 124260 }, { "epoch": 6.172146617661667, "grad_norm": 0.1337890625, "learning_rate": 0.0003062322439654316, "loss": 0.5144, "step": 124270 }, { "epoch": 6.1726432899572865, "grad_norm": 0.146484375, "learning_rate": 0.0003061925101817821, "loss": 0.4547, "step": 124280 }, { "epoch": 6.173139962252906, "grad_norm": 0.1259765625, "learning_rate": 0.00030615277639813254, "loss": 0.5286, "step": 124290 }, { "epoch": 6.173636634548525, "grad_norm": 0.14453125, "learning_rate": 0.00030611304261448296, "loss": 0.4818, "step": 124300 }, { "epoch": 6.174133306844144, "grad_norm": 0.1474609375, "learning_rate": 0.00030607330883083343, "loss": 0.4961, "step": 124310 }, { "epoch": 6.174629979139763, "grad_norm": 0.12353515625, "learning_rate": 0.0003060335750471839, "loss": 0.4753, "step": 124320 }, { "epoch": 6.175126651435383, "grad_norm": 0.130859375, "learning_rate": 0.0003059938412635343, "loss": 0.4897, "step": 124330 }, { "epoch": 6.175623323731002, "grad_norm": 0.1318359375, "learning_rate": 0.0003059541074798848, "loss": 0.4628, "step": 124340 }, { "epoch": 6.176119996026622, "grad_norm": 0.1376953125, "learning_rate": 0.0003059143736962352, "loss": 0.5122, "step": 124350 }, { "epoch": 6.176616668322241, "grad_norm": 0.11962890625, "learning_rate": 0.0003058746399125857, "loss": 0.4718, "step": 124360 }, { "epoch": 6.17711334061786, "grad_norm": 0.130859375, "learning_rate": 0.00030583490612893615, "loss": 0.495, "step": 124370 }, { "epoch": 6.17761001291348, "grad_norm": 0.11572265625, "learning_rate": 0.00030579517234528657, "loss": 0.4725, "step": 124380 }, { "epoch": 6.178106685209099, "grad_norm": 0.16015625, "learning_rate": 0.00030575543856163704, "loss": 0.4962, "step": 124390 }, { "epoch": 6.178603357504718, "grad_norm": 0.1298828125, "learning_rate": 0.0003057157047779875, "loss": 0.4841, "step": 124400 }, { "epoch": 6.179100029800337, "grad_norm": 0.11962890625, "learning_rate": 0.00030567597099433793, "loss": 0.4938, "step": 124410 }, { "epoch": 6.1795967020959575, "grad_norm": 0.12255859375, "learning_rate": 0.0003056362372106884, "loss": 0.4639, "step": 124420 }, { "epoch": 6.180093374391577, "grad_norm": 0.1875, "learning_rate": 0.00030559650342703887, "loss": 0.4921, "step": 124430 }, { "epoch": 6.180590046687196, "grad_norm": 0.126953125, "learning_rate": 0.0003055567696433893, "loss": 0.4631, "step": 124440 }, { "epoch": 6.181086718982815, "grad_norm": 0.12353515625, "learning_rate": 0.00030551703585973976, "loss": 0.4965, "step": 124450 }, { "epoch": 6.181583391278434, "grad_norm": 0.1376953125, "learning_rate": 0.0003054773020760902, "loss": 0.4815, "step": 124460 }, { "epoch": 6.182080063574054, "grad_norm": 0.125, "learning_rate": 0.0003054375682924407, "loss": 0.4847, "step": 124470 }, { "epoch": 6.182576735869673, "grad_norm": 0.1298828125, "learning_rate": 0.0003053978345087911, "loss": 0.4787, "step": 124480 }, { "epoch": 6.183073408165293, "grad_norm": 0.126953125, "learning_rate": 0.00030535810072514154, "loss": 0.4774, "step": 124490 }, { "epoch": 6.183570080460912, "grad_norm": 0.138671875, "learning_rate": 0.000305318366941492, "loss": 0.4949, "step": 124500 }, { "epoch": 6.184066752756531, "grad_norm": 0.1328125, "learning_rate": 0.0003052786331578425, "loss": 0.4976, "step": 124510 }, { "epoch": 6.184563425052151, "grad_norm": 0.158203125, "learning_rate": 0.00030523889937419295, "loss": 0.4635, "step": 124520 }, { "epoch": 6.18506009734777, "grad_norm": 0.12158203125, "learning_rate": 0.00030519916559054337, "loss": 0.4866, "step": 124530 }, { "epoch": 6.185556769643389, "grad_norm": 0.140625, "learning_rate": 0.00030515943180689384, "loss": 0.4859, "step": 124540 }, { "epoch": 6.186053441939008, "grad_norm": 0.1533203125, "learning_rate": 0.0003051196980232443, "loss": 0.4943, "step": 124550 }, { "epoch": 6.186550114234628, "grad_norm": 0.1552734375, "learning_rate": 0.00030507996423959473, "loss": 0.4914, "step": 124560 }, { "epoch": 6.187046786530248, "grad_norm": 0.109375, "learning_rate": 0.00030504023045594515, "loss": 0.5256, "step": 124570 }, { "epoch": 6.187543458825867, "grad_norm": 0.12255859375, "learning_rate": 0.0003050004966722956, "loss": 0.5115, "step": 124580 }, { "epoch": 6.188040131121486, "grad_norm": 0.1337890625, "learning_rate": 0.0003049607628886461, "loss": 0.5121, "step": 124590 }, { "epoch": 6.188536803417105, "grad_norm": 0.1416015625, "learning_rate": 0.00030492102910499656, "loss": 0.4957, "step": 124600 }, { "epoch": 6.1890334757127246, "grad_norm": 0.1416015625, "learning_rate": 0.000304881295321347, "loss": 0.5048, "step": 124610 }, { "epoch": 6.189530148008344, "grad_norm": 0.1572265625, "learning_rate": 0.00030484156153769745, "loss": 0.4986, "step": 124620 }, { "epoch": 6.190026820303963, "grad_norm": 0.140625, "learning_rate": 0.0003048018277540479, "loss": 0.5007, "step": 124630 }, { "epoch": 6.190523492599583, "grad_norm": 0.12060546875, "learning_rate": 0.00030476209397039834, "loss": 0.4803, "step": 124640 }, { "epoch": 6.191020164895202, "grad_norm": 0.1328125, "learning_rate": 0.00030472236018674875, "loss": 0.5185, "step": 124650 }, { "epoch": 6.191516837190822, "grad_norm": 0.11572265625, "learning_rate": 0.0003046826264030993, "loss": 0.4786, "step": 124660 }, { "epoch": 6.192013509486441, "grad_norm": 0.150390625, "learning_rate": 0.0003046428926194497, "loss": 0.513, "step": 124670 }, { "epoch": 6.19251018178206, "grad_norm": 0.1552734375, "learning_rate": 0.00030460315883580017, "loss": 0.4815, "step": 124680 }, { "epoch": 6.193006854077679, "grad_norm": 0.2041015625, "learning_rate": 0.0003045634250521506, "loss": 0.5076, "step": 124690 }, { "epoch": 6.1935035263732985, "grad_norm": 0.12451171875, "learning_rate": 0.00030452369126850106, "loss": 0.4995, "step": 124700 }, { "epoch": 6.194000198668919, "grad_norm": 0.1533203125, "learning_rate": 0.00030448395748485153, "loss": 0.4794, "step": 124710 }, { "epoch": 6.194496870964538, "grad_norm": 0.126953125, "learning_rate": 0.00030444422370120195, "loss": 0.5052, "step": 124720 }, { "epoch": 6.194993543260157, "grad_norm": 0.150390625, "learning_rate": 0.0003044044899175524, "loss": 0.4816, "step": 124730 }, { "epoch": 6.195490215555776, "grad_norm": 0.1376953125, "learning_rate": 0.0003043647561339029, "loss": 0.4861, "step": 124740 }, { "epoch": 6.1959868878513955, "grad_norm": 0.123046875, "learning_rate": 0.0003043250223502533, "loss": 0.4902, "step": 124750 }, { "epoch": 6.196483560147015, "grad_norm": 0.11572265625, "learning_rate": 0.0003042852885666038, "loss": 0.4454, "step": 124760 }, { "epoch": 6.196980232442634, "grad_norm": 0.126953125, "learning_rate": 0.00030424555478295425, "loss": 0.5016, "step": 124770 }, { "epoch": 6.197476904738254, "grad_norm": 0.12451171875, "learning_rate": 0.00030420582099930467, "loss": 0.5039, "step": 124780 }, { "epoch": 6.197973577033873, "grad_norm": 0.1552734375, "learning_rate": 0.00030416608721565514, "loss": 0.5065, "step": 124790 }, { "epoch": 6.1984702493294925, "grad_norm": 0.11865234375, "learning_rate": 0.00030412635343200555, "loss": 0.4974, "step": 124800 }, { "epoch": 6.198966921625112, "grad_norm": 0.1572265625, "learning_rate": 0.000304086619648356, "loss": 0.4879, "step": 124810 }, { "epoch": 6.199463593920731, "grad_norm": 0.1337890625, "learning_rate": 0.0003040468858647065, "loss": 0.5344, "step": 124820 }, { "epoch": 6.19996026621635, "grad_norm": 0.1328125, "learning_rate": 0.0003040071520810569, "loss": 0.4836, "step": 124830 }, { "epoch": 6.200456938511969, "grad_norm": 0.125, "learning_rate": 0.0003039674182974074, "loss": 0.4812, "step": 124840 }, { "epoch": 6.2009536108075896, "grad_norm": 0.15234375, "learning_rate": 0.00030392768451375786, "loss": 0.4752, "step": 124850 }, { "epoch": 6.201450283103209, "grad_norm": 0.126953125, "learning_rate": 0.0003038879507301083, "loss": 0.4966, "step": 124860 }, { "epoch": 6.201946955398828, "grad_norm": 0.1376953125, "learning_rate": 0.00030384821694645874, "loss": 0.5055, "step": 124870 }, { "epoch": 6.202443627694447, "grad_norm": 0.11767578125, "learning_rate": 0.00030380848316280916, "loss": 0.491, "step": 124880 }, { "epoch": 6.2029402999900665, "grad_norm": 0.12109375, "learning_rate": 0.00030376874937915963, "loss": 0.4983, "step": 124890 }, { "epoch": 6.203436972285686, "grad_norm": 0.1484375, "learning_rate": 0.0003037290155955101, "loss": 0.4944, "step": 124900 }, { "epoch": 6.203933644581305, "grad_norm": 0.1396484375, "learning_rate": 0.0003036892818118605, "loss": 0.4925, "step": 124910 }, { "epoch": 6.204430316876925, "grad_norm": 0.150390625, "learning_rate": 0.000303649548028211, "loss": 0.49, "step": 124920 }, { "epoch": 6.204926989172544, "grad_norm": 0.1484375, "learning_rate": 0.00030360981424456146, "loss": 0.4816, "step": 124930 }, { "epoch": 6.2054236614681635, "grad_norm": 0.1337890625, "learning_rate": 0.0003035700804609119, "loss": 0.4919, "step": 124940 }, { "epoch": 6.205920333763783, "grad_norm": 0.125, "learning_rate": 0.00030353034667726235, "loss": 0.4759, "step": 124950 }, { "epoch": 6.206417006059402, "grad_norm": 0.2236328125, "learning_rate": 0.0003034906128936128, "loss": 0.5108, "step": 124960 }, { "epoch": 6.206913678355021, "grad_norm": 0.1416015625, "learning_rate": 0.0003034508791099633, "loss": 0.4897, "step": 124970 }, { "epoch": 6.20741035065064, "grad_norm": 0.1611328125, "learning_rate": 0.0003034111453263137, "loss": 0.4982, "step": 124980 }, { "epoch": 6.2079070229462605, "grad_norm": 0.1552734375, "learning_rate": 0.00030337141154266413, "loss": 0.5029, "step": 124990 }, { "epoch": 6.20840369524188, "grad_norm": 0.11767578125, "learning_rate": 0.00030333167775901466, "loss": 0.4894, "step": 125000 }, { "epoch": 6.208900367537499, "grad_norm": 0.181640625, "learning_rate": 0.00030329194397536507, "loss": 0.4799, "step": 125010 }, { "epoch": 6.209397039833118, "grad_norm": 0.142578125, "learning_rate": 0.0003032522101917155, "loss": 0.5078, "step": 125020 }, { "epoch": 6.209893712128737, "grad_norm": 0.1181640625, "learning_rate": 0.00030321247640806596, "loss": 0.5166, "step": 125030 }, { "epoch": 6.210390384424357, "grad_norm": 0.1376953125, "learning_rate": 0.00030317274262441643, "loss": 0.4705, "step": 125040 }, { "epoch": 6.210887056719976, "grad_norm": 0.125, "learning_rate": 0.0003031330088407669, "loss": 0.4962, "step": 125050 }, { "epoch": 6.211383729015595, "grad_norm": 0.1494140625, "learning_rate": 0.0003030932750571173, "loss": 0.4978, "step": 125060 }, { "epoch": 6.211880401311215, "grad_norm": 0.1279296875, "learning_rate": 0.0003030535412734678, "loss": 0.4915, "step": 125070 }, { "epoch": 6.212377073606834, "grad_norm": 0.12353515625, "learning_rate": 0.00030301380748981826, "loss": 0.463, "step": 125080 }, { "epoch": 6.212873745902454, "grad_norm": 0.138671875, "learning_rate": 0.0003029740737061687, "loss": 0.4876, "step": 125090 }, { "epoch": 6.213370418198073, "grad_norm": 0.150390625, "learning_rate": 0.0003029343399225191, "loss": 0.5183, "step": 125100 }, { "epoch": 6.213867090493692, "grad_norm": 0.1513671875, "learning_rate": 0.0003028946061388696, "loss": 0.5035, "step": 125110 }, { "epoch": 6.214363762789311, "grad_norm": 0.11865234375, "learning_rate": 0.00030285487235522004, "loss": 0.5088, "step": 125120 }, { "epoch": 6.214860435084931, "grad_norm": 0.1328125, "learning_rate": 0.0003028151385715705, "loss": 0.4658, "step": 125130 }, { "epoch": 6.215357107380551, "grad_norm": 0.1357421875, "learning_rate": 0.00030277540478792093, "loss": 0.4993, "step": 125140 }, { "epoch": 6.21585377967617, "grad_norm": 0.13671875, "learning_rate": 0.0003027356710042714, "loss": 0.5285, "step": 125150 }, { "epoch": 6.216350451971789, "grad_norm": 0.1669921875, "learning_rate": 0.00030269593722062187, "loss": 0.4791, "step": 125160 }, { "epoch": 6.216847124267408, "grad_norm": 0.1669921875, "learning_rate": 0.0003026562034369723, "loss": 0.5056, "step": 125170 }, { "epoch": 6.217343796563028, "grad_norm": 0.119140625, "learning_rate": 0.0003026164696533227, "loss": 0.5113, "step": 125180 }, { "epoch": 6.217840468858647, "grad_norm": 0.1376953125, "learning_rate": 0.00030257673586967323, "loss": 0.4772, "step": 125190 }, { "epoch": 6.218337141154266, "grad_norm": 0.1552734375, "learning_rate": 0.00030253700208602365, "loss": 0.4725, "step": 125200 }, { "epoch": 6.218833813449886, "grad_norm": 0.123046875, "learning_rate": 0.0003024972683023741, "loss": 0.4514, "step": 125210 }, { "epoch": 6.219330485745505, "grad_norm": 0.1611328125, "learning_rate": 0.00030245753451872454, "loss": 0.4987, "step": 125220 }, { "epoch": 6.219827158041125, "grad_norm": 0.12158203125, "learning_rate": 0.000302417800735075, "loss": 0.5278, "step": 125230 }, { "epoch": 6.220323830336744, "grad_norm": 0.142578125, "learning_rate": 0.0003023780669514255, "loss": 0.4924, "step": 125240 }, { "epoch": 6.220820502632363, "grad_norm": 0.14453125, "learning_rate": 0.0003023383331677759, "loss": 0.4994, "step": 125250 }, { "epoch": 6.221317174927982, "grad_norm": 0.134765625, "learning_rate": 0.00030229859938412637, "loss": 0.5085, "step": 125260 }, { "epoch": 6.2218138472236015, "grad_norm": 0.1376953125, "learning_rate": 0.00030225886560047684, "loss": 0.5046, "step": 125270 }, { "epoch": 6.222310519519221, "grad_norm": 0.1337890625, "learning_rate": 0.00030221913181682726, "loss": 0.5063, "step": 125280 }, { "epoch": 6.222807191814841, "grad_norm": 0.1552734375, "learning_rate": 0.00030217939803317773, "loss": 0.5053, "step": 125290 }, { "epoch": 6.22330386411046, "grad_norm": 0.1279296875, "learning_rate": 0.0003021396642495282, "loss": 0.4956, "step": 125300 }, { "epoch": 6.223800536406079, "grad_norm": 0.1865234375, "learning_rate": 0.0003020999304658786, "loss": 0.523, "step": 125310 }, { "epoch": 6.224297208701699, "grad_norm": 0.1298828125, "learning_rate": 0.0003020601966822291, "loss": 0.4867, "step": 125320 }, { "epoch": 6.224793880997318, "grad_norm": 0.12158203125, "learning_rate": 0.0003020204628985795, "loss": 0.4856, "step": 125330 }, { "epoch": 6.225290553292937, "grad_norm": 0.1259765625, "learning_rate": 0.00030198072911493003, "loss": 0.4829, "step": 125340 }, { "epoch": 6.225787225588556, "grad_norm": 0.1298828125, "learning_rate": 0.00030194099533128045, "loss": 0.5264, "step": 125350 }, { "epoch": 6.226283897884176, "grad_norm": 0.1279296875, "learning_rate": 0.00030190126154763087, "loss": 0.4902, "step": 125360 }, { "epoch": 6.226780570179796, "grad_norm": 0.154296875, "learning_rate": 0.00030186152776398134, "loss": 0.5243, "step": 125370 }, { "epoch": 6.227277242475415, "grad_norm": 0.1279296875, "learning_rate": 0.0003018217939803318, "loss": 0.4956, "step": 125380 }, { "epoch": 6.227773914771034, "grad_norm": 0.1298828125, "learning_rate": 0.0003017820601966822, "loss": 0.5035, "step": 125390 }, { "epoch": 6.228270587066653, "grad_norm": 0.1220703125, "learning_rate": 0.0003017423264130327, "loss": 0.5026, "step": 125400 }, { "epoch": 6.2287672593622725, "grad_norm": 0.140625, "learning_rate": 0.0003017025926293831, "loss": 0.4802, "step": 125410 }, { "epoch": 6.229263931657892, "grad_norm": 0.12890625, "learning_rate": 0.00030166285884573364, "loss": 0.5156, "step": 125420 }, { "epoch": 6.229760603953512, "grad_norm": 0.125, "learning_rate": 0.00030162312506208406, "loss": 0.4948, "step": 125430 }, { "epoch": 6.230257276249131, "grad_norm": 0.1337890625, "learning_rate": 0.0003015833912784345, "loss": 0.4874, "step": 125440 }, { "epoch": 6.23075394854475, "grad_norm": 0.1279296875, "learning_rate": 0.00030154365749478494, "loss": 0.4725, "step": 125450 }, { "epoch": 6.2312506208403695, "grad_norm": 0.140625, "learning_rate": 0.0003015039237111354, "loss": 0.4897, "step": 125460 }, { "epoch": 6.231747293135989, "grad_norm": 0.12890625, "learning_rate": 0.00030146418992748583, "loss": 0.5026, "step": 125470 }, { "epoch": 6.232243965431608, "grad_norm": 0.11669921875, "learning_rate": 0.0003014244561438363, "loss": 0.4734, "step": 125480 }, { "epoch": 6.232740637727227, "grad_norm": 0.142578125, "learning_rate": 0.0003013847223601868, "loss": 0.4932, "step": 125490 }, { "epoch": 6.233237310022847, "grad_norm": 0.14453125, "learning_rate": 0.00030134498857653725, "loss": 0.4852, "step": 125500 }, { "epoch": 6.2337339823184665, "grad_norm": 0.2236328125, "learning_rate": 0.00030130525479288766, "loss": 0.4757, "step": 125510 }, { "epoch": 6.234230654614086, "grad_norm": 0.1396484375, "learning_rate": 0.0003012655210092381, "loss": 0.4989, "step": 125520 }, { "epoch": 6.234727326909705, "grad_norm": 0.1279296875, "learning_rate": 0.0003012257872255886, "loss": 0.4904, "step": 125530 }, { "epoch": 6.235223999205324, "grad_norm": 0.1962890625, "learning_rate": 0.000301186053441939, "loss": 0.5209, "step": 125540 }, { "epoch": 6.2357206715009434, "grad_norm": 0.1259765625, "learning_rate": 0.00030114631965828944, "loss": 0.4679, "step": 125550 }, { "epoch": 6.236217343796563, "grad_norm": 0.1376953125, "learning_rate": 0.0003011065858746399, "loss": 0.5565, "step": 125560 }, { "epoch": 6.236714016092183, "grad_norm": 0.12255859375, "learning_rate": 0.0003010668520909904, "loss": 0.4529, "step": 125570 }, { "epoch": 6.237210688387802, "grad_norm": 0.13671875, "learning_rate": 0.00030102711830734086, "loss": 0.4735, "step": 125580 }, { "epoch": 6.237707360683421, "grad_norm": 0.1328125, "learning_rate": 0.00030098738452369127, "loss": 0.4933, "step": 125590 }, { "epoch": 6.2382040329790405, "grad_norm": 0.1494140625, "learning_rate": 0.00030094765074004174, "loss": 0.4917, "step": 125600 }, { "epoch": 6.23870070527466, "grad_norm": 0.12451171875, "learning_rate": 0.0003009079169563922, "loss": 0.4819, "step": 125610 }, { "epoch": 6.239197377570279, "grad_norm": 0.150390625, "learning_rate": 0.00030086818317274263, "loss": 0.4896, "step": 125620 }, { "epoch": 6.239694049865898, "grad_norm": 0.177734375, "learning_rate": 0.00030082844938909305, "loss": 0.5149, "step": 125630 }, { "epoch": 6.240190722161518, "grad_norm": 0.1298828125, "learning_rate": 0.0003007887156054436, "loss": 0.5049, "step": 125640 }, { "epoch": 6.2406873944571375, "grad_norm": 0.12109375, "learning_rate": 0.000300748981821794, "loss": 0.4964, "step": 125650 }, { "epoch": 6.241184066752757, "grad_norm": 0.12158203125, "learning_rate": 0.00030070924803814446, "loss": 0.5195, "step": 125660 }, { "epoch": 6.241680739048376, "grad_norm": 0.1357421875, "learning_rate": 0.0003006695142544949, "loss": 0.4992, "step": 125670 }, { "epoch": 6.242177411343995, "grad_norm": 0.12451171875, "learning_rate": 0.00030062978047084535, "loss": 0.4925, "step": 125680 }, { "epoch": 6.242674083639614, "grad_norm": 0.1337890625, "learning_rate": 0.0003005900466871958, "loss": 0.4941, "step": 125690 }, { "epoch": 6.243170755935234, "grad_norm": 0.150390625, "learning_rate": 0.00030055031290354624, "loss": 0.5038, "step": 125700 }, { "epoch": 6.243667428230853, "grad_norm": 0.1494140625, "learning_rate": 0.00030051057911989666, "loss": 0.4964, "step": 125710 }, { "epoch": 6.244164100526473, "grad_norm": 0.126953125, "learning_rate": 0.0003004708453362472, "loss": 0.4944, "step": 125720 }, { "epoch": 6.244660772822092, "grad_norm": 0.1708984375, "learning_rate": 0.0003004311115525976, "loss": 0.455, "step": 125730 }, { "epoch": 6.245157445117711, "grad_norm": 0.1376953125, "learning_rate": 0.00030039137776894807, "loss": 0.502, "step": 125740 }, { "epoch": 6.245654117413331, "grad_norm": 0.1328125, "learning_rate": 0.0003003516439852985, "loss": 0.4861, "step": 125750 }, { "epoch": 6.24615078970895, "grad_norm": 0.1298828125, "learning_rate": 0.00030031191020164896, "loss": 0.5074, "step": 125760 }, { "epoch": 6.246647462004569, "grad_norm": 0.13671875, "learning_rate": 0.00030027217641799943, "loss": 0.5044, "step": 125770 }, { "epoch": 6.247144134300188, "grad_norm": 0.1201171875, "learning_rate": 0.00030023244263434985, "loss": 0.504, "step": 125780 }, { "epoch": 6.2476408065958084, "grad_norm": 0.173828125, "learning_rate": 0.0003001927088507003, "loss": 0.5104, "step": 125790 }, { "epoch": 6.248137478891428, "grad_norm": 0.150390625, "learning_rate": 0.0003001529750670508, "loss": 0.4601, "step": 125800 }, { "epoch": 6.248634151187047, "grad_norm": 0.125, "learning_rate": 0.0003001132412834012, "loss": 0.5146, "step": 125810 }, { "epoch": 6.249130823482666, "grad_norm": 0.1279296875, "learning_rate": 0.0003000735074997517, "loss": 0.4963, "step": 125820 }, { "epoch": 6.249627495778285, "grad_norm": 0.142578125, "learning_rate": 0.00030003377371610215, "loss": 0.4802, "step": 125830 }, { "epoch": 6.250124168073905, "grad_norm": 0.1279296875, "learning_rate": 0.00029999403993245257, "loss": 0.489, "step": 125840 }, { "epoch": 6.250620840369524, "grad_norm": 0.12451171875, "learning_rate": 0.00029995430614880304, "loss": 0.4896, "step": 125850 }, { "epoch": 6.251117512665144, "grad_norm": 0.1630859375, "learning_rate": 0.00029991457236515346, "loss": 0.4813, "step": 125860 }, { "epoch": 6.251614184960763, "grad_norm": 0.138671875, "learning_rate": 0.000299874838581504, "loss": 0.5038, "step": 125870 }, { "epoch": 6.252110857256382, "grad_norm": 0.1337890625, "learning_rate": 0.0002998351047978544, "loss": 0.5025, "step": 125880 }, { "epoch": 6.252607529552002, "grad_norm": 0.1279296875, "learning_rate": 0.0002997953710142048, "loss": 0.4924, "step": 125890 }, { "epoch": 6.253104201847621, "grad_norm": 0.12890625, "learning_rate": 0.0002997556372305553, "loss": 0.5108, "step": 125900 }, { "epoch": 6.25360087414324, "grad_norm": 0.1689453125, "learning_rate": 0.00029971590344690576, "loss": 0.5069, "step": 125910 }, { "epoch": 6.254097546438859, "grad_norm": 0.193359375, "learning_rate": 0.0002996761696632562, "loss": 0.4908, "step": 125920 }, { "epoch": 6.254594218734479, "grad_norm": 0.1279296875, "learning_rate": 0.00029963643587960665, "loss": 0.5153, "step": 125930 }, { "epoch": 6.255090891030099, "grad_norm": 0.12890625, "learning_rate": 0.0002995967020959571, "loss": 0.4955, "step": 125940 }, { "epoch": 6.255587563325718, "grad_norm": 0.154296875, "learning_rate": 0.0002995569683123076, "loss": 0.5118, "step": 125950 }, { "epoch": 6.256084235621337, "grad_norm": 0.1298828125, "learning_rate": 0.000299517234528658, "loss": 0.5032, "step": 125960 }, { "epoch": 6.256580907916956, "grad_norm": 0.154296875, "learning_rate": 0.0002994775007450084, "loss": 0.5198, "step": 125970 }, { "epoch": 6.2570775802125755, "grad_norm": 0.1474609375, "learning_rate": 0.0002994377669613589, "loss": 0.4657, "step": 125980 }, { "epoch": 6.257574252508195, "grad_norm": 0.15234375, "learning_rate": 0.00029939803317770937, "loss": 0.5155, "step": 125990 }, { "epoch": 6.258070924803814, "grad_norm": 0.123046875, "learning_rate": 0.0002993582993940598, "loss": 0.4896, "step": 126000 }, { "epoch": 6.258567597099434, "grad_norm": 0.12890625, "learning_rate": 0.00029931856561041026, "loss": 0.4847, "step": 126010 }, { "epoch": 6.259064269395053, "grad_norm": 0.1259765625, "learning_rate": 0.00029927883182676073, "loss": 0.5099, "step": 126020 }, { "epoch": 6.259560941690673, "grad_norm": 0.1279296875, "learning_rate": 0.0002992390980431112, "loss": 0.503, "step": 126030 }, { "epoch": 6.260057613986292, "grad_norm": 0.13671875, "learning_rate": 0.0002991993642594616, "loss": 0.5044, "step": 126040 }, { "epoch": 6.260554286281911, "grad_norm": 0.1376953125, "learning_rate": 0.00029915963047581203, "loss": 0.5047, "step": 126050 }, { "epoch": 6.26105095857753, "grad_norm": 0.142578125, "learning_rate": 0.00029911989669216256, "loss": 0.4773, "step": 126060 }, { "epoch": 6.2615476308731495, "grad_norm": 0.1279296875, "learning_rate": 0.000299080162908513, "loss": 0.4588, "step": 126070 }, { "epoch": 6.26204430316877, "grad_norm": 0.126953125, "learning_rate": 0.0002990404291248634, "loss": 0.4935, "step": 126080 }, { "epoch": 6.262540975464389, "grad_norm": 0.140625, "learning_rate": 0.00029900069534121386, "loss": 0.5197, "step": 126090 }, { "epoch": 6.263037647760008, "grad_norm": 0.12353515625, "learning_rate": 0.00029896096155756434, "loss": 0.4806, "step": 126100 }, { "epoch": 6.263534320055627, "grad_norm": 0.1533203125, "learning_rate": 0.0002989212277739148, "loss": 0.4985, "step": 126110 }, { "epoch": 6.2640309923512465, "grad_norm": 0.1259765625, "learning_rate": 0.0002988814939902652, "loss": 0.4776, "step": 126120 }, { "epoch": 6.264527664646866, "grad_norm": 0.171875, "learning_rate": 0.0002988417602066157, "loss": 0.4892, "step": 126130 }, { "epoch": 6.265024336942485, "grad_norm": 0.134765625, "learning_rate": 0.00029880202642296617, "loss": 0.5194, "step": 126140 }, { "epoch": 6.265521009238105, "grad_norm": 0.1337890625, "learning_rate": 0.0002987622926393166, "loss": 0.5437, "step": 126150 }, { "epoch": 6.266017681533724, "grad_norm": 0.1181640625, "learning_rate": 0.00029872255885566706, "loss": 0.48, "step": 126160 }, { "epoch": 6.2665143538293435, "grad_norm": 0.1318359375, "learning_rate": 0.00029868282507201753, "loss": 0.4789, "step": 126170 }, { "epoch": 6.267011026124963, "grad_norm": 0.1728515625, "learning_rate": 0.00029864309128836794, "loss": 0.4868, "step": 126180 }, { "epoch": 6.267507698420582, "grad_norm": 0.134765625, "learning_rate": 0.0002986033575047184, "loss": 0.5062, "step": 126190 }, { "epoch": 6.268004370716201, "grad_norm": 0.126953125, "learning_rate": 0.00029856362372106883, "loss": 0.4907, "step": 126200 }, { "epoch": 6.26850104301182, "grad_norm": 0.1328125, "learning_rate": 0.0002985238899374193, "loss": 0.5097, "step": 126210 }, { "epoch": 6.2689977153074405, "grad_norm": 0.12890625, "learning_rate": 0.0002984841561537698, "loss": 0.4833, "step": 126220 }, { "epoch": 6.26949438760306, "grad_norm": 0.20703125, "learning_rate": 0.0002984444223701202, "loss": 0.4974, "step": 126230 }, { "epoch": 6.269991059898679, "grad_norm": 0.1357421875, "learning_rate": 0.00029840468858647066, "loss": 0.5146, "step": 126240 }, { "epoch": 6.270487732194298, "grad_norm": 0.1376953125, "learning_rate": 0.00029836495480282114, "loss": 0.4795, "step": 126250 }, { "epoch": 6.2709844044899175, "grad_norm": 0.1357421875, "learning_rate": 0.00029832522101917155, "loss": 0.4976, "step": 126260 }, { "epoch": 6.271481076785537, "grad_norm": 0.134765625, "learning_rate": 0.000298285487235522, "loss": 0.5039, "step": 126270 }, { "epoch": 6.271977749081156, "grad_norm": 0.12353515625, "learning_rate": 0.00029824575345187244, "loss": 0.488, "step": 126280 }, { "epoch": 6.272474421376776, "grad_norm": 0.11376953125, "learning_rate": 0.0002982060196682229, "loss": 0.4881, "step": 126290 }, { "epoch": 6.272971093672395, "grad_norm": 0.1474609375, "learning_rate": 0.0002981662858845734, "loss": 0.52, "step": 126300 }, { "epoch": 6.2734677659680145, "grad_norm": 0.19140625, "learning_rate": 0.0002981265521009238, "loss": 0.5048, "step": 126310 }, { "epoch": 6.273964438263634, "grad_norm": 0.138671875, "learning_rate": 0.00029808681831727427, "loss": 0.4607, "step": 126320 }, { "epoch": 6.274461110559253, "grad_norm": 0.1298828125, "learning_rate": 0.00029804708453362474, "loss": 0.5297, "step": 126330 }, { "epoch": 6.274957782854872, "grad_norm": 0.158203125, "learning_rate": 0.00029800735074997516, "loss": 0.5118, "step": 126340 }, { "epoch": 6.275454455150491, "grad_norm": 0.158203125, "learning_rate": 0.00029796761696632563, "loss": 0.4898, "step": 126350 }, { "epoch": 6.2759511274461115, "grad_norm": 0.1279296875, "learning_rate": 0.0002979278831826761, "loss": 0.4915, "step": 126360 }, { "epoch": 6.276447799741731, "grad_norm": 0.12353515625, "learning_rate": 0.0002978881493990265, "loss": 0.4895, "step": 126370 }, { "epoch": 6.27694447203735, "grad_norm": 0.162109375, "learning_rate": 0.000297848415615377, "loss": 0.5334, "step": 126380 }, { "epoch": 6.277441144332969, "grad_norm": 0.130859375, "learning_rate": 0.0002978086818317274, "loss": 0.4788, "step": 126390 }, { "epoch": 6.277937816628588, "grad_norm": 0.1201171875, "learning_rate": 0.00029776894804807793, "loss": 0.4646, "step": 126400 }, { "epoch": 6.278434488924208, "grad_norm": 0.1484375, "learning_rate": 0.00029772921426442835, "loss": 0.5157, "step": 126410 }, { "epoch": 6.278931161219827, "grad_norm": 0.12109375, "learning_rate": 0.00029768948048077877, "loss": 0.4983, "step": 126420 }, { "epoch": 6.279427833515447, "grad_norm": 0.1318359375, "learning_rate": 0.00029764974669712924, "loss": 0.4527, "step": 126430 }, { "epoch": 6.279924505811066, "grad_norm": 0.177734375, "learning_rate": 0.0002976100129134797, "loss": 0.5077, "step": 126440 }, { "epoch": 6.280421178106685, "grad_norm": 0.146484375, "learning_rate": 0.00029757027912983013, "loss": 0.5107, "step": 126450 }, { "epoch": 6.280917850402305, "grad_norm": 0.1494140625, "learning_rate": 0.0002975305453461806, "loss": 0.4965, "step": 126460 }, { "epoch": 6.281414522697924, "grad_norm": 0.12255859375, "learning_rate": 0.00029749081156253107, "loss": 0.4856, "step": 126470 }, { "epoch": 6.281911194993543, "grad_norm": 0.171875, "learning_rate": 0.00029745107777888154, "loss": 0.5022, "step": 126480 }, { "epoch": 6.282407867289162, "grad_norm": 0.1865234375, "learning_rate": 0.00029741134399523196, "loss": 0.4864, "step": 126490 }, { "epoch": 6.282904539584782, "grad_norm": 0.1259765625, "learning_rate": 0.0002973716102115824, "loss": 0.5043, "step": 126500 }, { "epoch": 6.283401211880402, "grad_norm": 0.126953125, "learning_rate": 0.0002973318764279329, "loss": 0.4887, "step": 126510 }, { "epoch": 6.283897884176021, "grad_norm": 0.142578125, "learning_rate": 0.0002972921426442833, "loss": 0.4909, "step": 126520 }, { "epoch": 6.28439455647164, "grad_norm": 0.125, "learning_rate": 0.00029725240886063374, "loss": 0.4986, "step": 126530 }, { "epoch": 6.284891228767259, "grad_norm": 0.1240234375, "learning_rate": 0.0002972126750769842, "loss": 0.4875, "step": 126540 }, { "epoch": 6.285387901062879, "grad_norm": 0.12890625, "learning_rate": 0.0002971729412933347, "loss": 0.4904, "step": 126550 }, { "epoch": 6.285884573358498, "grad_norm": 0.1318359375, "learning_rate": 0.00029713320750968515, "loss": 0.5244, "step": 126560 }, { "epoch": 6.286381245654117, "grad_norm": 0.1201171875, "learning_rate": 0.00029709347372603557, "loss": 0.4744, "step": 126570 }, { "epoch": 6.286877917949737, "grad_norm": 0.12890625, "learning_rate": 0.000297053739942386, "loss": 0.4744, "step": 126580 }, { "epoch": 6.287374590245356, "grad_norm": 0.1923828125, "learning_rate": 0.0002970140061587365, "loss": 0.4994, "step": 126590 }, { "epoch": 6.287871262540976, "grad_norm": 0.12451171875, "learning_rate": 0.00029697427237508693, "loss": 0.4812, "step": 126600 }, { "epoch": 6.288367934836595, "grad_norm": 0.125, "learning_rate": 0.0002969345385914374, "loss": 0.488, "step": 126610 }, { "epoch": 6.288864607132214, "grad_norm": 0.1416015625, "learning_rate": 0.0002968948048077878, "loss": 0.5159, "step": 126620 }, { "epoch": 6.289361279427833, "grad_norm": 0.12158203125, "learning_rate": 0.0002968550710241383, "loss": 0.4736, "step": 126630 }, { "epoch": 6.2898579517234525, "grad_norm": 0.1669921875, "learning_rate": 0.00029681533724048876, "loss": 0.4954, "step": 126640 }, { "epoch": 6.290354624019072, "grad_norm": 0.1640625, "learning_rate": 0.0002967756034568392, "loss": 0.5119, "step": 126650 }, { "epoch": 6.290851296314692, "grad_norm": 0.154296875, "learning_rate": 0.00029673586967318965, "loss": 0.5072, "step": 126660 }, { "epoch": 6.291347968610311, "grad_norm": 0.1376953125, "learning_rate": 0.0002966961358895401, "loss": 0.4805, "step": 126670 }, { "epoch": 6.29184464090593, "grad_norm": 0.12451171875, "learning_rate": 0.00029665640210589054, "loss": 0.4895, "step": 126680 }, { "epoch": 6.2923413132015495, "grad_norm": 0.146484375, "learning_rate": 0.000296616668322241, "loss": 0.5036, "step": 126690 }, { "epoch": 6.292837985497169, "grad_norm": 0.12255859375, "learning_rate": 0.0002965769345385915, "loss": 0.4635, "step": 126700 }, { "epoch": 6.293334657792788, "grad_norm": 0.138671875, "learning_rate": 0.0002965372007549419, "loss": 0.5062, "step": 126710 }, { "epoch": 6.293831330088407, "grad_norm": 0.11767578125, "learning_rate": 0.00029649746697129237, "loss": 0.4842, "step": 126720 }, { "epoch": 6.294328002384027, "grad_norm": 0.14453125, "learning_rate": 0.0002964577331876428, "loss": 0.5248, "step": 126730 }, { "epoch": 6.294824674679647, "grad_norm": 0.12890625, "learning_rate": 0.00029641799940399326, "loss": 0.4795, "step": 126740 }, { "epoch": 6.295321346975266, "grad_norm": 0.1201171875, "learning_rate": 0.00029637826562034373, "loss": 0.5147, "step": 126750 }, { "epoch": 6.295818019270885, "grad_norm": 0.1298828125, "learning_rate": 0.00029633853183669414, "loss": 0.4838, "step": 126760 }, { "epoch": 6.296314691566504, "grad_norm": 0.1357421875, "learning_rate": 0.0002962987980530446, "loss": 0.5255, "step": 126770 }, { "epoch": 6.2968113638621235, "grad_norm": 0.169921875, "learning_rate": 0.0002962590642693951, "loss": 0.5057, "step": 126780 }, { "epoch": 6.297308036157743, "grad_norm": 0.14453125, "learning_rate": 0.0002962193304857455, "loss": 0.496, "step": 126790 }, { "epoch": 6.297804708453363, "grad_norm": 0.130859375, "learning_rate": 0.000296179596702096, "loss": 0.4741, "step": 126800 }, { "epoch": 6.298301380748982, "grad_norm": 0.15625, "learning_rate": 0.00029613986291844645, "loss": 0.4893, "step": 126810 }, { "epoch": 6.298798053044601, "grad_norm": 0.1259765625, "learning_rate": 0.00029610012913479686, "loss": 0.483, "step": 126820 }, { "epoch": 6.2992947253402205, "grad_norm": 0.130859375, "learning_rate": 0.00029606039535114734, "loss": 0.5172, "step": 126830 }, { "epoch": 6.29979139763584, "grad_norm": 0.126953125, "learning_rate": 0.00029602066156749775, "loss": 0.5366, "step": 126840 }, { "epoch": 6.300288069931459, "grad_norm": 0.1240234375, "learning_rate": 0.0002959809277838482, "loss": 0.5062, "step": 126850 }, { "epoch": 6.300784742227078, "grad_norm": 0.12255859375, "learning_rate": 0.0002959411940001987, "loss": 0.4965, "step": 126860 }, { "epoch": 6.301281414522698, "grad_norm": 0.1220703125, "learning_rate": 0.0002959014602165491, "loss": 0.4638, "step": 126870 }, { "epoch": 6.3017780868183175, "grad_norm": 0.1201171875, "learning_rate": 0.0002958617264328996, "loss": 0.4834, "step": 126880 }, { "epoch": 6.302274759113937, "grad_norm": 0.126953125, "learning_rate": 0.00029582199264925006, "loss": 0.4711, "step": 126890 }, { "epoch": 6.302771431409556, "grad_norm": 0.1484375, "learning_rate": 0.00029578225886560047, "loss": 0.5093, "step": 126900 }, { "epoch": 6.303268103705175, "grad_norm": 0.11767578125, "learning_rate": 0.00029574252508195094, "loss": 0.496, "step": 126910 }, { "epoch": 6.303764776000794, "grad_norm": 0.1796875, "learning_rate": 0.00029570279129830136, "loss": 0.5124, "step": 126920 }, { "epoch": 6.304261448296414, "grad_norm": 0.1328125, "learning_rate": 0.0002956630575146519, "loss": 0.4961, "step": 126930 }, { "epoch": 6.304758120592034, "grad_norm": 0.1142578125, "learning_rate": 0.0002956233237310023, "loss": 0.4907, "step": 126940 }, { "epoch": 6.305254792887653, "grad_norm": 0.125, "learning_rate": 0.0002955835899473527, "loss": 0.4881, "step": 126950 }, { "epoch": 6.305751465183272, "grad_norm": 0.1298828125, "learning_rate": 0.0002955438561637032, "loss": 0.4682, "step": 126960 }, { "epoch": 6.3062481374788915, "grad_norm": 0.140625, "learning_rate": 0.00029550412238005366, "loss": 0.5269, "step": 126970 }, { "epoch": 6.306744809774511, "grad_norm": 0.13671875, "learning_rate": 0.0002954643885964041, "loss": 0.513, "step": 126980 }, { "epoch": 6.30724148207013, "grad_norm": 0.12890625, "learning_rate": 0.00029542465481275455, "loss": 0.4934, "step": 126990 }, { "epoch": 6.307738154365749, "grad_norm": 0.11767578125, "learning_rate": 0.000295384921029105, "loss": 0.4866, "step": 127000 }, { "epoch": 6.308234826661369, "grad_norm": 0.142578125, "learning_rate": 0.0002953451872454555, "loss": 0.4747, "step": 127010 }, { "epoch": 6.3087314989569885, "grad_norm": 0.12158203125, "learning_rate": 0.0002953054534618059, "loss": 0.5089, "step": 127020 }, { "epoch": 6.309228171252608, "grad_norm": 0.12255859375, "learning_rate": 0.00029526571967815633, "loss": 0.5182, "step": 127030 }, { "epoch": 6.309724843548227, "grad_norm": 0.1220703125, "learning_rate": 0.00029522598589450685, "loss": 0.4992, "step": 127040 }, { "epoch": 6.310221515843846, "grad_norm": 0.134765625, "learning_rate": 0.00029518625211085727, "loss": 0.4945, "step": 127050 }, { "epoch": 6.310718188139465, "grad_norm": 0.138671875, "learning_rate": 0.00029514651832720774, "loss": 0.4966, "step": 127060 }, { "epoch": 6.311214860435085, "grad_norm": 0.1328125, "learning_rate": 0.00029510678454355816, "loss": 0.5039, "step": 127070 }, { "epoch": 6.311711532730705, "grad_norm": 0.1796875, "learning_rate": 0.00029506705075990863, "loss": 0.501, "step": 127080 }, { "epoch": 6.312208205026324, "grad_norm": 0.1474609375, "learning_rate": 0.0002950273169762591, "loss": 0.5039, "step": 127090 }, { "epoch": 6.312704877321943, "grad_norm": 0.1259765625, "learning_rate": 0.0002949875831926095, "loss": 0.4992, "step": 127100 }, { "epoch": 6.313201549617562, "grad_norm": 0.154296875, "learning_rate": 0.00029494784940895994, "loss": 0.5168, "step": 127110 }, { "epoch": 6.313698221913182, "grad_norm": 0.14453125, "learning_rate": 0.00029490811562531046, "loss": 0.5004, "step": 127120 }, { "epoch": 6.314194894208801, "grad_norm": 0.12890625, "learning_rate": 0.0002948683818416609, "loss": 0.4948, "step": 127130 }, { "epoch": 6.31469156650442, "grad_norm": 0.146484375, "learning_rate": 0.00029482864805801135, "loss": 0.4946, "step": 127140 }, { "epoch": 6.315188238800039, "grad_norm": 0.1318359375, "learning_rate": 0.00029478891427436177, "loss": 0.5133, "step": 127150 }, { "epoch": 6.315684911095659, "grad_norm": 0.12451171875, "learning_rate": 0.00029474918049071224, "loss": 0.4776, "step": 127160 }, { "epoch": 6.316181583391279, "grad_norm": 0.12255859375, "learning_rate": 0.0002947094467070627, "loss": 0.5288, "step": 127170 }, { "epoch": 6.316678255686898, "grad_norm": 0.15625, "learning_rate": 0.00029466971292341313, "loss": 0.4827, "step": 127180 }, { "epoch": 6.317174927982517, "grad_norm": 0.1298828125, "learning_rate": 0.0002946299791397636, "loss": 0.504, "step": 127190 }, { "epoch": 6.317671600278136, "grad_norm": 0.1279296875, "learning_rate": 0.00029459024535611407, "loss": 0.5046, "step": 127200 }, { "epoch": 6.318168272573756, "grad_norm": 0.15234375, "learning_rate": 0.0002945505115724645, "loss": 0.4915, "step": 127210 }, { "epoch": 6.318664944869375, "grad_norm": 0.126953125, "learning_rate": 0.00029451077778881496, "loss": 0.5133, "step": 127220 }, { "epoch": 6.319161617164995, "grad_norm": 0.173828125, "learning_rate": 0.00029447104400516543, "loss": 0.5189, "step": 127230 }, { "epoch": 6.319658289460614, "grad_norm": 0.1396484375, "learning_rate": 0.00029443131022151585, "loss": 0.5, "step": 127240 }, { "epoch": 6.320154961756233, "grad_norm": 0.1318359375, "learning_rate": 0.0002943915764378663, "loss": 0.4652, "step": 127250 }, { "epoch": 6.320651634051853, "grad_norm": 0.142578125, "learning_rate": 0.00029435184265421674, "loss": 0.4974, "step": 127260 }, { "epoch": 6.321148306347472, "grad_norm": 0.1455078125, "learning_rate": 0.0002943121088705672, "loss": 0.4879, "step": 127270 }, { "epoch": 6.321644978643091, "grad_norm": 0.142578125, "learning_rate": 0.0002942723750869177, "loss": 0.5183, "step": 127280 }, { "epoch": 6.32214165093871, "grad_norm": 0.1513671875, "learning_rate": 0.0002942326413032681, "loss": 0.4927, "step": 127290 }, { "epoch": 6.32263832323433, "grad_norm": 0.126953125, "learning_rate": 0.00029419290751961857, "loss": 0.5134, "step": 127300 }, { "epoch": 6.32313499552995, "grad_norm": 0.1474609375, "learning_rate": 0.00029415317373596904, "loss": 0.4883, "step": 127310 }, { "epoch": 6.323631667825569, "grad_norm": 0.130859375, "learning_rate": 0.00029411343995231946, "loss": 0.4833, "step": 127320 }, { "epoch": 6.324128340121188, "grad_norm": 0.14453125, "learning_rate": 0.00029407370616866993, "loss": 0.504, "step": 127330 }, { "epoch": 6.324625012416807, "grad_norm": 0.1240234375, "learning_rate": 0.0002940339723850204, "loss": 0.5042, "step": 127340 }, { "epoch": 6.3251216847124265, "grad_norm": 0.1416015625, "learning_rate": 0.0002939942386013708, "loss": 0.5016, "step": 127350 }, { "epoch": 6.325618357008046, "grad_norm": 0.1630859375, "learning_rate": 0.0002939545048177213, "loss": 0.4883, "step": 127360 }, { "epoch": 6.326115029303665, "grad_norm": 0.146484375, "learning_rate": 0.0002939147710340717, "loss": 0.481, "step": 127370 }, { "epoch": 6.326611701599285, "grad_norm": 0.1494140625, "learning_rate": 0.0002938750372504222, "loss": 0.5074, "step": 127380 }, { "epoch": 6.327108373894904, "grad_norm": 0.1162109375, "learning_rate": 0.00029383530346677265, "loss": 0.5119, "step": 127390 }, { "epoch": 6.3276050461905236, "grad_norm": 0.138671875, "learning_rate": 0.00029379556968312306, "loss": 0.4492, "step": 127400 }, { "epoch": 6.328101718486143, "grad_norm": 0.1767578125, "learning_rate": 0.00029375583589947354, "loss": 0.4809, "step": 127410 }, { "epoch": 6.328598390781762, "grad_norm": 0.140625, "learning_rate": 0.000293716102115824, "loss": 0.4887, "step": 127420 }, { "epoch": 6.329095063077381, "grad_norm": 0.146484375, "learning_rate": 0.0002936763683321745, "loss": 0.5163, "step": 127430 }, { "epoch": 6.3295917353730005, "grad_norm": 0.1435546875, "learning_rate": 0.0002936366345485249, "loss": 0.5008, "step": 127440 }, { "epoch": 6.330088407668621, "grad_norm": 0.12890625, "learning_rate": 0.0002935969007648753, "loss": 0.5162, "step": 127450 }, { "epoch": 6.33058507996424, "grad_norm": 0.1298828125, "learning_rate": 0.00029355716698122584, "loss": 0.4824, "step": 127460 }, { "epoch": 6.331081752259859, "grad_norm": 0.1298828125, "learning_rate": 0.00029351743319757626, "loss": 0.4889, "step": 127470 }, { "epoch": 6.331578424555478, "grad_norm": 0.150390625, "learning_rate": 0.00029347769941392667, "loss": 0.4926, "step": 127480 }, { "epoch": 6.3320750968510975, "grad_norm": 0.150390625, "learning_rate": 0.00029343796563027714, "loss": 0.4883, "step": 127490 }, { "epoch": 6.332571769146717, "grad_norm": 0.1328125, "learning_rate": 0.0002933982318466276, "loss": 0.4851, "step": 127500 }, { "epoch": 6.333068441442336, "grad_norm": 0.22265625, "learning_rate": 0.0002933584980629781, "loss": 0.5089, "step": 127510 }, { "epoch": 6.333565113737956, "grad_norm": 0.1513671875, "learning_rate": 0.0002933187642793285, "loss": 0.4786, "step": 127520 }, { "epoch": 6.334061786033575, "grad_norm": 0.1591796875, "learning_rate": 0.000293279030495679, "loss": 0.5079, "step": 127530 }, { "epoch": 6.3345584583291945, "grad_norm": 0.12255859375, "learning_rate": 0.00029323929671202945, "loss": 0.4996, "step": 127540 }, { "epoch": 6.335055130624814, "grad_norm": 0.1298828125, "learning_rate": 0.00029319956292837986, "loss": 0.4921, "step": 127550 }, { "epoch": 6.335551802920433, "grad_norm": 0.134765625, "learning_rate": 0.0002931598291447303, "loss": 0.5025, "step": 127560 }, { "epoch": 6.336048475216052, "grad_norm": 0.1396484375, "learning_rate": 0.0002931200953610808, "loss": 0.4897, "step": 127570 }, { "epoch": 6.336545147511671, "grad_norm": 0.134765625, "learning_rate": 0.0002930803615774312, "loss": 0.481, "step": 127580 }, { "epoch": 6.3370418198072915, "grad_norm": 0.140625, "learning_rate": 0.0002930406277937817, "loss": 0.5139, "step": 127590 }, { "epoch": 6.337538492102911, "grad_norm": 0.125, "learning_rate": 0.0002930008940101321, "loss": 0.4962, "step": 127600 }, { "epoch": 6.33803516439853, "grad_norm": 0.14453125, "learning_rate": 0.0002929611602264826, "loss": 0.5171, "step": 127610 }, { "epoch": 6.338531836694149, "grad_norm": 0.154296875, "learning_rate": 0.00029292142644283305, "loss": 0.4813, "step": 127620 }, { "epoch": 6.339028508989768, "grad_norm": 0.1318359375, "learning_rate": 0.00029288169265918347, "loss": 0.4879, "step": 127630 }, { "epoch": 6.339525181285388, "grad_norm": 0.126953125, "learning_rate": 0.00029284195887553394, "loss": 0.4723, "step": 127640 }, { "epoch": 6.340021853581007, "grad_norm": 0.146484375, "learning_rate": 0.0002928022250918844, "loss": 0.4918, "step": 127650 }, { "epoch": 6.340518525876627, "grad_norm": 0.1328125, "learning_rate": 0.00029276249130823483, "loss": 0.4824, "step": 127660 }, { "epoch": 6.341015198172246, "grad_norm": 0.1240234375, "learning_rate": 0.0002927227575245853, "loss": 0.4864, "step": 127670 }, { "epoch": 6.3415118704678655, "grad_norm": 0.140625, "learning_rate": 0.0002926830237409357, "loss": 0.5111, "step": 127680 }, { "epoch": 6.342008542763485, "grad_norm": 0.15234375, "learning_rate": 0.0002926432899572862, "loss": 0.4874, "step": 127690 }, { "epoch": 6.342505215059104, "grad_norm": 0.130859375, "learning_rate": 0.00029260355617363666, "loss": 0.4807, "step": 127700 }, { "epoch": 6.343001887354723, "grad_norm": 0.140625, "learning_rate": 0.0002925638223899871, "loss": 0.504, "step": 127710 }, { "epoch": 6.343498559650342, "grad_norm": 0.1474609375, "learning_rate": 0.00029252408860633755, "loss": 0.511, "step": 127720 }, { "epoch": 6.3439952319459625, "grad_norm": 0.12158203125, "learning_rate": 0.000292484354822688, "loss": 0.4594, "step": 127730 }, { "epoch": 6.344491904241582, "grad_norm": 0.1240234375, "learning_rate": 0.00029244462103903844, "loss": 0.501, "step": 127740 }, { "epoch": 6.344988576537201, "grad_norm": 0.20703125, "learning_rate": 0.0002924048872553889, "loss": 0.504, "step": 127750 }, { "epoch": 6.34548524883282, "grad_norm": 0.1572265625, "learning_rate": 0.0002923651534717394, "loss": 0.4805, "step": 127760 }, { "epoch": 6.345981921128439, "grad_norm": 0.1259765625, "learning_rate": 0.0002923254196880898, "loss": 0.5137, "step": 127770 }, { "epoch": 6.346478593424059, "grad_norm": 0.1220703125, "learning_rate": 0.00029228568590444027, "loss": 0.4669, "step": 127780 }, { "epoch": 6.346975265719678, "grad_norm": 0.162109375, "learning_rate": 0.0002922459521207907, "loss": 0.4747, "step": 127790 }, { "epoch": 6.347471938015298, "grad_norm": 0.1708984375, "learning_rate": 0.00029220621833714116, "loss": 0.5053, "step": 127800 }, { "epoch": 6.347968610310917, "grad_norm": 0.16015625, "learning_rate": 0.00029216648455349163, "loss": 0.4982, "step": 127810 }, { "epoch": 6.348465282606536, "grad_norm": 0.13671875, "learning_rate": 0.00029212675076984205, "loss": 0.505, "step": 127820 }, { "epoch": 6.348961954902156, "grad_norm": 0.1298828125, "learning_rate": 0.0002920870169861925, "loss": 0.5074, "step": 127830 }, { "epoch": 6.349458627197775, "grad_norm": 0.115234375, "learning_rate": 0.000292047283202543, "loss": 0.4657, "step": 127840 }, { "epoch": 6.349955299493394, "grad_norm": 0.15234375, "learning_rate": 0.0002920075494188934, "loss": 0.5303, "step": 127850 }, { "epoch": 6.350451971789013, "grad_norm": 0.126953125, "learning_rate": 0.0002919678156352439, "loss": 0.4858, "step": 127860 }, { "epoch": 6.3509486440846326, "grad_norm": 0.1171875, "learning_rate": 0.00029192808185159435, "loss": 0.4904, "step": 127870 }, { "epoch": 6.351445316380253, "grad_norm": 0.1201171875, "learning_rate": 0.0002918883480679448, "loss": 0.4802, "step": 127880 }, { "epoch": 6.351941988675872, "grad_norm": 0.1259765625, "learning_rate": 0.00029184861428429524, "loss": 0.4675, "step": 127890 }, { "epoch": 6.352438660971491, "grad_norm": 0.1513671875, "learning_rate": 0.00029180888050064566, "loss": 0.509, "step": 127900 }, { "epoch": 6.35293533326711, "grad_norm": 0.1591796875, "learning_rate": 0.0002917691467169962, "loss": 0.5031, "step": 127910 }, { "epoch": 6.35343200556273, "grad_norm": 0.134765625, "learning_rate": 0.0002917294129333466, "loss": 0.4812, "step": 127920 }, { "epoch": 6.353928677858349, "grad_norm": 0.123046875, "learning_rate": 0.000291689679149697, "loss": 0.4554, "step": 127930 }, { "epoch": 6.354425350153968, "grad_norm": 0.1279296875, "learning_rate": 0.0002916499453660475, "loss": 0.5126, "step": 127940 }, { "epoch": 6.354922022449588, "grad_norm": 0.1220703125, "learning_rate": 0.00029161021158239796, "loss": 0.4959, "step": 127950 }, { "epoch": 6.355418694745207, "grad_norm": 0.185546875, "learning_rate": 0.00029157047779874843, "loss": 0.5187, "step": 127960 }, { "epoch": 6.355915367040827, "grad_norm": 0.1279296875, "learning_rate": 0.00029153074401509885, "loss": 0.505, "step": 127970 }, { "epoch": 6.356412039336446, "grad_norm": 0.126953125, "learning_rate": 0.00029149101023144926, "loss": 0.4917, "step": 127980 }, { "epoch": 6.356908711632065, "grad_norm": 0.134765625, "learning_rate": 0.0002914512764477998, "loss": 0.467, "step": 127990 }, { "epoch": 6.357405383927684, "grad_norm": 0.1337890625, "learning_rate": 0.0002914115426641502, "loss": 0.4777, "step": 128000 }, { "epoch": 6.3579020562233035, "grad_norm": 0.12158203125, "learning_rate": 0.0002913718088805006, "loss": 0.4678, "step": 128010 }, { "epoch": 6.358398728518924, "grad_norm": 0.1484375, "learning_rate": 0.0002913320750968511, "loss": 0.522, "step": 128020 }, { "epoch": 6.358895400814543, "grad_norm": 0.126953125, "learning_rate": 0.00029129234131320157, "loss": 0.4893, "step": 128030 }, { "epoch": 6.359392073110162, "grad_norm": 0.12353515625, "learning_rate": 0.00029125260752955204, "loss": 0.5169, "step": 128040 }, { "epoch": 6.359888745405781, "grad_norm": 0.1630859375, "learning_rate": 0.00029121287374590246, "loss": 0.4913, "step": 128050 }, { "epoch": 6.3603854177014005, "grad_norm": 0.123046875, "learning_rate": 0.00029117313996225293, "loss": 0.4992, "step": 128060 }, { "epoch": 6.36088208999702, "grad_norm": 0.1376953125, "learning_rate": 0.0002911334061786034, "loss": 0.5027, "step": 128070 }, { "epoch": 6.361378762292639, "grad_norm": 0.12158203125, "learning_rate": 0.0002910936723949538, "loss": 0.4832, "step": 128080 }, { "epoch": 6.361875434588258, "grad_norm": 0.1669921875, "learning_rate": 0.00029105393861130423, "loss": 0.4816, "step": 128090 }, { "epoch": 6.362372106883878, "grad_norm": 0.15234375, "learning_rate": 0.00029101420482765476, "loss": 0.4841, "step": 128100 }, { "epoch": 6.362868779179498, "grad_norm": 0.1416015625, "learning_rate": 0.0002909744710440052, "loss": 0.4955, "step": 128110 }, { "epoch": 6.363365451475117, "grad_norm": 0.12890625, "learning_rate": 0.00029093473726035565, "loss": 0.5074, "step": 128120 }, { "epoch": 6.363862123770736, "grad_norm": 0.12353515625, "learning_rate": 0.00029089500347670606, "loss": 0.4901, "step": 128130 }, { "epoch": 6.364358796066355, "grad_norm": 0.146484375, "learning_rate": 0.00029085526969305654, "loss": 0.509, "step": 128140 }, { "epoch": 6.3648554683619745, "grad_norm": 0.1279296875, "learning_rate": 0.000290815535909407, "loss": 0.4937, "step": 128150 }, { "epoch": 6.365352140657594, "grad_norm": 0.1279296875, "learning_rate": 0.0002907758021257574, "loss": 0.5029, "step": 128160 }, { "epoch": 6.365848812953214, "grad_norm": 0.12353515625, "learning_rate": 0.0002907360683421079, "loss": 0.4996, "step": 128170 }, { "epoch": 6.366345485248833, "grad_norm": 0.1318359375, "learning_rate": 0.00029069633455845837, "loss": 0.4869, "step": 128180 }, { "epoch": 6.366842157544452, "grad_norm": 0.12255859375, "learning_rate": 0.0002906566007748088, "loss": 0.5017, "step": 128190 }, { "epoch": 6.3673388298400715, "grad_norm": 0.11572265625, "learning_rate": 0.00029061686699115926, "loss": 0.4763, "step": 128200 }, { "epoch": 6.367835502135691, "grad_norm": 0.1201171875, "learning_rate": 0.0002905771332075097, "loss": 0.5134, "step": 128210 }, { "epoch": 6.36833217443131, "grad_norm": 0.1904296875, "learning_rate": 0.00029053739942386014, "loss": 0.4816, "step": 128220 }, { "epoch": 6.368828846726929, "grad_norm": 0.140625, "learning_rate": 0.0002904976656402106, "loss": 0.4838, "step": 128230 }, { "epoch": 6.369325519022549, "grad_norm": 0.125, "learning_rate": 0.00029045793185656103, "loss": 0.4561, "step": 128240 }, { "epoch": 6.3698221913181685, "grad_norm": 0.1669921875, "learning_rate": 0.0002904181980729115, "loss": 0.5005, "step": 128250 }, { "epoch": 6.370318863613788, "grad_norm": 0.12255859375, "learning_rate": 0.000290378464289262, "loss": 0.5194, "step": 128260 }, { "epoch": 6.370815535909407, "grad_norm": 0.1884765625, "learning_rate": 0.0002903387305056124, "loss": 0.4942, "step": 128270 }, { "epoch": 6.371312208205026, "grad_norm": 0.130859375, "learning_rate": 0.00029029899672196286, "loss": 0.4992, "step": 128280 }, { "epoch": 6.371808880500645, "grad_norm": 0.140625, "learning_rate": 0.00029025926293831333, "loss": 0.5089, "step": 128290 }, { "epoch": 6.372305552796265, "grad_norm": 0.146484375, "learning_rate": 0.00029021952915466375, "loss": 0.511, "step": 128300 }, { "epoch": 6.372802225091885, "grad_norm": 0.158203125, "learning_rate": 0.0002901797953710142, "loss": 0.5151, "step": 128310 }, { "epoch": 6.373298897387504, "grad_norm": 0.205078125, "learning_rate": 0.00029014006158736464, "loss": 0.495, "step": 128320 }, { "epoch": 6.373795569683123, "grad_norm": 0.1435546875, "learning_rate": 0.00029010032780371517, "loss": 0.4658, "step": 128330 }, { "epoch": 6.3742922419787424, "grad_norm": 0.140625, "learning_rate": 0.0002900605940200656, "loss": 0.4733, "step": 128340 }, { "epoch": 6.374788914274362, "grad_norm": 0.1201171875, "learning_rate": 0.000290020860236416, "loss": 0.5039, "step": 128350 }, { "epoch": 6.375285586569981, "grad_norm": 0.1357421875, "learning_rate": 0.00028998112645276647, "loss": 0.4856, "step": 128360 }, { "epoch": 6.3757822588656, "grad_norm": 0.134765625, "learning_rate": 0.00028994139266911694, "loss": 0.4915, "step": 128370 }, { "epoch": 6.37627893116122, "grad_norm": 0.12060546875, "learning_rate": 0.00028990165888546736, "loss": 0.5009, "step": 128380 }, { "epoch": 6.3767756034568395, "grad_norm": 0.1455078125, "learning_rate": 0.00028986192510181783, "loss": 0.5039, "step": 128390 }, { "epoch": 6.377272275752459, "grad_norm": 0.17578125, "learning_rate": 0.0002898221913181683, "loss": 0.5007, "step": 128400 }, { "epoch": 6.377768948048078, "grad_norm": 0.1376953125, "learning_rate": 0.0002897824575345188, "loss": 0.4989, "step": 128410 }, { "epoch": 6.378265620343697, "grad_norm": 0.1435546875, "learning_rate": 0.0002897427237508692, "loss": 0.4918, "step": 128420 }, { "epoch": 6.378762292639316, "grad_norm": 0.1787109375, "learning_rate": 0.0002897029899672196, "loss": 0.5297, "step": 128430 }, { "epoch": 6.379258964934936, "grad_norm": 0.150390625, "learning_rate": 0.00028966325618357013, "loss": 0.5047, "step": 128440 }, { "epoch": 6.379755637230556, "grad_norm": 0.1181640625, "learning_rate": 0.00028962352239992055, "loss": 0.4742, "step": 128450 }, { "epoch": 6.380252309526175, "grad_norm": 0.130859375, "learning_rate": 0.00028958378861627097, "loss": 0.486, "step": 128460 }, { "epoch": 6.380748981821794, "grad_norm": 0.13671875, "learning_rate": 0.00028954405483262144, "loss": 0.5336, "step": 128470 }, { "epoch": 6.381245654117413, "grad_norm": 0.134765625, "learning_rate": 0.0002895043210489719, "loss": 0.5085, "step": 128480 }, { "epoch": 6.381742326413033, "grad_norm": 0.12353515625, "learning_rate": 0.0002894645872653224, "loss": 0.4967, "step": 128490 }, { "epoch": 6.382238998708652, "grad_norm": 0.1298828125, "learning_rate": 0.0002894248534816728, "loss": 0.4979, "step": 128500 }, { "epoch": 6.382735671004271, "grad_norm": 0.126953125, "learning_rate": 0.00028938511969802327, "loss": 0.4953, "step": 128510 }, { "epoch": 6.383232343299891, "grad_norm": 0.1591796875, "learning_rate": 0.00028934538591437374, "loss": 0.4948, "step": 128520 }, { "epoch": 6.38372901559551, "grad_norm": 0.126953125, "learning_rate": 0.00028930565213072416, "loss": 0.517, "step": 128530 }, { "epoch": 6.38422568789113, "grad_norm": 0.12451171875, "learning_rate": 0.0002892659183470746, "loss": 0.506, "step": 128540 }, { "epoch": 6.384722360186749, "grad_norm": 0.1376953125, "learning_rate": 0.00028922618456342505, "loss": 0.4918, "step": 128550 }, { "epoch": 6.385219032482368, "grad_norm": 0.13671875, "learning_rate": 0.0002891864507797755, "loss": 0.5063, "step": 128560 }, { "epoch": 6.385715704777987, "grad_norm": 0.1318359375, "learning_rate": 0.000289146716996126, "loss": 0.5084, "step": 128570 }, { "epoch": 6.386212377073607, "grad_norm": 0.1533203125, "learning_rate": 0.0002891069832124764, "loss": 0.4974, "step": 128580 }, { "epoch": 6.386709049369226, "grad_norm": 0.1259765625, "learning_rate": 0.0002890672494288269, "loss": 0.4743, "step": 128590 }, { "epoch": 6.387205721664846, "grad_norm": 0.123046875, "learning_rate": 0.00028902751564517735, "loss": 0.4642, "step": 128600 }, { "epoch": 6.387702393960465, "grad_norm": 0.12890625, "learning_rate": 0.00028898778186152777, "loss": 0.4721, "step": 128610 }, { "epoch": 6.388199066256084, "grad_norm": 0.134765625, "learning_rate": 0.0002889480480778782, "loss": 0.4628, "step": 128620 }, { "epoch": 6.388695738551704, "grad_norm": 0.1298828125, "learning_rate": 0.0002889083142942287, "loss": 0.4595, "step": 128630 }, { "epoch": 6.389192410847323, "grad_norm": 0.1435546875, "learning_rate": 0.00028886858051057913, "loss": 0.5141, "step": 128640 }, { "epoch": 6.389689083142942, "grad_norm": 0.1181640625, "learning_rate": 0.0002888288467269296, "loss": 0.5169, "step": 128650 }, { "epoch": 6.390185755438561, "grad_norm": 0.1494140625, "learning_rate": 0.00028878911294328, "loss": 0.489, "step": 128660 }, { "epoch": 6.390682427734181, "grad_norm": 0.1591796875, "learning_rate": 0.0002887493791596305, "loss": 0.4895, "step": 128670 }, { "epoch": 6.391179100029801, "grad_norm": 0.15625, "learning_rate": 0.00028870964537598096, "loss": 0.4744, "step": 128680 }, { "epoch": 6.39167577232542, "grad_norm": 0.11474609375, "learning_rate": 0.0002886699115923314, "loss": 0.464, "step": 128690 }, { "epoch": 6.392172444621039, "grad_norm": 0.1572265625, "learning_rate": 0.00028863017780868185, "loss": 0.496, "step": 128700 }, { "epoch": 6.392669116916658, "grad_norm": 0.130859375, "learning_rate": 0.0002885904440250323, "loss": 0.4764, "step": 128710 }, { "epoch": 6.3931657892122775, "grad_norm": 0.1552734375, "learning_rate": 0.00028855071024138274, "loss": 0.4833, "step": 128720 }, { "epoch": 6.393662461507897, "grad_norm": 0.1298828125, "learning_rate": 0.0002885109764577332, "loss": 0.4572, "step": 128730 }, { "epoch": 6.394159133803516, "grad_norm": 0.12451171875, "learning_rate": 0.0002884712426740837, "loss": 0.4848, "step": 128740 }, { "epoch": 6.394655806099136, "grad_norm": 0.1240234375, "learning_rate": 0.0002884315088904341, "loss": 0.4934, "step": 128750 }, { "epoch": 6.395152478394755, "grad_norm": 0.1591796875, "learning_rate": 0.00028839177510678457, "loss": 0.5331, "step": 128760 }, { "epoch": 6.3956491506903745, "grad_norm": 0.12158203125, "learning_rate": 0.000288352041323135, "loss": 0.4802, "step": 128770 }, { "epoch": 6.396145822985994, "grad_norm": 0.1416015625, "learning_rate": 0.00028831230753948546, "loss": 0.5062, "step": 128780 }, { "epoch": 6.396642495281613, "grad_norm": 0.1513671875, "learning_rate": 0.0002882725737558359, "loss": 0.5086, "step": 128790 }, { "epoch": 6.397139167577232, "grad_norm": 0.1318359375, "learning_rate": 0.00028823283997218634, "loss": 0.5039, "step": 128800 }, { "epoch": 6.3976358398728514, "grad_norm": 0.126953125, "learning_rate": 0.0002881931061885368, "loss": 0.5243, "step": 128810 }, { "epoch": 6.398132512168472, "grad_norm": 0.1279296875, "learning_rate": 0.0002881533724048873, "loss": 0.4958, "step": 128820 }, { "epoch": 6.398629184464091, "grad_norm": 0.125, "learning_rate": 0.0002881136386212377, "loss": 0.4818, "step": 128830 }, { "epoch": 6.39912585675971, "grad_norm": 0.1328125, "learning_rate": 0.0002880739048375882, "loss": 0.4959, "step": 128840 }, { "epoch": 6.399622529055329, "grad_norm": 0.1474609375, "learning_rate": 0.0002880341710539386, "loss": 0.5146, "step": 128850 }, { "epoch": 6.4001192013509485, "grad_norm": 0.1318359375, "learning_rate": 0.0002879944372702891, "loss": 0.5259, "step": 128860 }, { "epoch": 6.400615873646568, "grad_norm": 0.12890625, "learning_rate": 0.00028795470348663953, "loss": 0.4883, "step": 128870 }, { "epoch": 6.401112545942187, "grad_norm": 0.12109375, "learning_rate": 0.00028791496970298995, "loss": 0.4779, "step": 128880 }, { "epoch": 6.401609218237807, "grad_norm": 0.1416015625, "learning_rate": 0.0002878752359193404, "loss": 0.5104, "step": 128890 }, { "epoch": 6.402105890533426, "grad_norm": 0.1435546875, "learning_rate": 0.0002878355021356909, "loss": 0.5174, "step": 128900 }, { "epoch": 6.4026025628290455, "grad_norm": 0.1435546875, "learning_rate": 0.0002877957683520413, "loss": 0.4908, "step": 128910 }, { "epoch": 6.403099235124665, "grad_norm": 0.1396484375, "learning_rate": 0.0002877560345683918, "loss": 0.4861, "step": 128920 }, { "epoch": 6.403595907420284, "grad_norm": 0.1552734375, "learning_rate": 0.00028771630078474225, "loss": 0.4865, "step": 128930 }, { "epoch": 6.404092579715903, "grad_norm": 0.1455078125, "learning_rate": 0.0002876765670010927, "loss": 0.5096, "step": 128940 }, { "epoch": 6.404589252011522, "grad_norm": 0.130859375, "learning_rate": 0.00028763683321744314, "loss": 0.5094, "step": 128950 }, { "epoch": 6.4050859243071425, "grad_norm": 0.1513671875, "learning_rate": 0.00028759709943379356, "loss": 0.4718, "step": 128960 }, { "epoch": 6.405582596602762, "grad_norm": 0.13671875, "learning_rate": 0.0002875573656501441, "loss": 0.512, "step": 128970 }, { "epoch": 6.406079268898381, "grad_norm": 0.166015625, "learning_rate": 0.0002875176318664945, "loss": 0.497, "step": 128980 }, { "epoch": 6.406575941194, "grad_norm": 0.12060546875, "learning_rate": 0.0002874778980828449, "loss": 0.4937, "step": 128990 }, { "epoch": 6.407072613489619, "grad_norm": 0.12890625, "learning_rate": 0.0002874381642991954, "loss": 0.5073, "step": 129000 }, { "epoch": 6.407569285785239, "grad_norm": 0.142578125, "learning_rate": 0.00028739843051554586, "loss": 0.5271, "step": 129010 }, { "epoch": 6.408065958080858, "grad_norm": 0.1357421875, "learning_rate": 0.00028735869673189633, "loss": 0.4926, "step": 129020 }, { "epoch": 6.408562630376478, "grad_norm": 0.123046875, "learning_rate": 0.00028731896294824675, "loss": 0.4877, "step": 129030 }, { "epoch": 6.409059302672097, "grad_norm": 0.16015625, "learning_rate": 0.0002872792291645972, "loss": 0.4803, "step": 129040 }, { "epoch": 6.4095559749677165, "grad_norm": 0.119140625, "learning_rate": 0.0002872394953809477, "loss": 0.476, "step": 129050 }, { "epoch": 6.410052647263336, "grad_norm": 0.1572265625, "learning_rate": 0.0002871997615972981, "loss": 0.5281, "step": 129060 }, { "epoch": 6.410549319558955, "grad_norm": 0.1494140625, "learning_rate": 0.00028716002781364853, "loss": 0.4813, "step": 129070 }, { "epoch": 6.411045991854574, "grad_norm": 0.146484375, "learning_rate": 0.000287120294029999, "loss": 0.4843, "step": 129080 }, { "epoch": 6.411542664150193, "grad_norm": 0.1416015625, "learning_rate": 0.00028708056024634947, "loss": 0.4956, "step": 129090 }, { "epoch": 6.4120393364458135, "grad_norm": 0.1279296875, "learning_rate": 0.00028704082646269994, "loss": 0.4909, "step": 129100 }, { "epoch": 6.412536008741433, "grad_norm": 0.12255859375, "learning_rate": 0.00028700109267905036, "loss": 0.4595, "step": 129110 }, { "epoch": 6.413032681037052, "grad_norm": 0.119140625, "learning_rate": 0.00028696135889540083, "loss": 0.4983, "step": 129120 }, { "epoch": 6.413529353332671, "grad_norm": 0.12255859375, "learning_rate": 0.0002869216251117513, "loss": 0.476, "step": 129130 }, { "epoch": 6.41402602562829, "grad_norm": 0.1337890625, "learning_rate": 0.0002868818913281017, "loss": 0.4897, "step": 129140 }, { "epoch": 6.41452269792391, "grad_norm": 0.138671875, "learning_rate": 0.0002868421575444522, "loss": 0.4898, "step": 129150 }, { "epoch": 6.415019370219529, "grad_norm": 0.130859375, "learning_rate": 0.00028680242376080266, "loss": 0.4681, "step": 129160 }, { "epoch": 6.415516042515149, "grad_norm": 0.1533203125, "learning_rate": 0.0002867626899771531, "loss": 0.4966, "step": 129170 }, { "epoch": 6.416012714810768, "grad_norm": 0.1435546875, "learning_rate": 0.00028672295619350355, "loss": 0.5034, "step": 129180 }, { "epoch": 6.416509387106387, "grad_norm": 0.11865234375, "learning_rate": 0.00028668322240985397, "loss": 0.4797, "step": 129190 }, { "epoch": 6.417006059402007, "grad_norm": 0.15234375, "learning_rate": 0.00028664348862620444, "loss": 0.5159, "step": 129200 }, { "epoch": 6.417502731697626, "grad_norm": 0.1279296875, "learning_rate": 0.0002866037548425549, "loss": 0.4948, "step": 129210 }, { "epoch": 6.417999403993245, "grad_norm": 0.15625, "learning_rate": 0.00028656402105890533, "loss": 0.513, "step": 129220 }, { "epoch": 6.418496076288864, "grad_norm": 0.1279296875, "learning_rate": 0.0002865242872752558, "loss": 0.4909, "step": 129230 }, { "epoch": 6.4189927485844835, "grad_norm": 0.14453125, "learning_rate": 0.00028648455349160627, "loss": 0.4891, "step": 129240 }, { "epoch": 6.419489420880104, "grad_norm": 0.14453125, "learning_rate": 0.0002864448197079567, "loss": 0.4745, "step": 129250 }, { "epoch": 6.419986093175723, "grad_norm": 0.16015625, "learning_rate": 0.00028640508592430716, "loss": 0.4804, "step": 129260 }, { "epoch": 6.420482765471342, "grad_norm": 0.130859375, "learning_rate": 0.00028636535214065763, "loss": 0.4977, "step": 129270 }, { "epoch": 6.420979437766961, "grad_norm": 0.134765625, "learning_rate": 0.00028632561835700805, "loss": 0.5057, "step": 129280 }, { "epoch": 6.421476110062581, "grad_norm": 0.13671875, "learning_rate": 0.0002862858845733585, "loss": 0.5113, "step": 129290 }, { "epoch": 6.4219727823582, "grad_norm": 0.12158203125, "learning_rate": 0.00028624615078970894, "loss": 0.4901, "step": 129300 }, { "epoch": 6.422469454653819, "grad_norm": 0.146484375, "learning_rate": 0.00028620641700605946, "loss": 0.5311, "step": 129310 }, { "epoch": 6.422966126949439, "grad_norm": 0.15625, "learning_rate": 0.0002861666832224099, "loss": 0.4552, "step": 129320 }, { "epoch": 6.423462799245058, "grad_norm": 0.1455078125, "learning_rate": 0.0002861269494387603, "loss": 0.4717, "step": 129330 }, { "epoch": 6.423959471540678, "grad_norm": 0.123046875, "learning_rate": 0.00028608721565511077, "loss": 0.4843, "step": 129340 }, { "epoch": 6.424456143836297, "grad_norm": 0.1484375, "learning_rate": 0.00028604748187146124, "loss": 0.5129, "step": 129350 }, { "epoch": 6.424952816131916, "grad_norm": 0.126953125, "learning_rate": 0.00028600774808781166, "loss": 0.4949, "step": 129360 }, { "epoch": 6.425449488427535, "grad_norm": 0.142578125, "learning_rate": 0.0002859680143041621, "loss": 0.4898, "step": 129370 }, { "epoch": 6.4259461607231545, "grad_norm": 0.1357421875, "learning_rate": 0.00028592828052051254, "loss": 0.4962, "step": 129380 }, { "epoch": 6.426442833018775, "grad_norm": 0.1279296875, "learning_rate": 0.00028588854673686307, "loss": 0.4867, "step": 129390 }, { "epoch": 6.426939505314394, "grad_norm": 0.1533203125, "learning_rate": 0.0002858488129532135, "loss": 0.5046, "step": 129400 }, { "epoch": 6.427436177610013, "grad_norm": 0.1474609375, "learning_rate": 0.0002858090791695639, "loss": 0.4614, "step": 129410 }, { "epoch": 6.427932849905632, "grad_norm": 0.1259765625, "learning_rate": 0.0002857693453859144, "loss": 0.4749, "step": 129420 }, { "epoch": 6.4284295222012515, "grad_norm": 0.12890625, "learning_rate": 0.00028572961160226485, "loss": 0.5002, "step": 129430 }, { "epoch": 6.428926194496871, "grad_norm": 0.14453125, "learning_rate": 0.00028568987781861526, "loss": 0.4991, "step": 129440 }, { "epoch": 6.42942286679249, "grad_norm": 0.1298828125, "learning_rate": 0.00028565014403496574, "loss": 0.4621, "step": 129450 }, { "epoch": 6.429919539088109, "grad_norm": 0.130859375, "learning_rate": 0.0002856104102513162, "loss": 0.4931, "step": 129460 }, { "epoch": 6.430416211383729, "grad_norm": 0.14453125, "learning_rate": 0.0002855706764676667, "loss": 0.4974, "step": 129470 }, { "epoch": 6.4309128836793485, "grad_norm": 0.1201171875, "learning_rate": 0.0002855309426840171, "loss": 0.4677, "step": 129480 }, { "epoch": 6.431409555974968, "grad_norm": 0.142578125, "learning_rate": 0.0002854912089003675, "loss": 0.4817, "step": 129490 }, { "epoch": 6.431906228270587, "grad_norm": 0.1435546875, "learning_rate": 0.00028545147511671804, "loss": 0.4852, "step": 129500 }, { "epoch": 6.432402900566206, "grad_norm": 0.15234375, "learning_rate": 0.00028541174133306845, "loss": 0.4982, "step": 129510 }, { "epoch": 6.4328995728618255, "grad_norm": 0.14453125, "learning_rate": 0.00028537200754941887, "loss": 0.5092, "step": 129520 }, { "epoch": 6.433396245157445, "grad_norm": 0.150390625, "learning_rate": 0.00028533227376576934, "loss": 0.476, "step": 129530 }, { "epoch": 6.433892917453065, "grad_norm": 0.12890625, "learning_rate": 0.0002852925399821198, "loss": 0.4969, "step": 129540 }, { "epoch": 6.434389589748684, "grad_norm": 0.1416015625, "learning_rate": 0.0002852528061984703, "loss": 0.4964, "step": 129550 }, { "epoch": 6.434886262044303, "grad_norm": 0.150390625, "learning_rate": 0.0002852130724148207, "loss": 0.484, "step": 129560 }, { "epoch": 6.4353829343399225, "grad_norm": 0.1298828125, "learning_rate": 0.0002851733386311712, "loss": 0.4956, "step": 129570 }, { "epoch": 6.435879606635542, "grad_norm": 0.1796875, "learning_rate": 0.00028513360484752165, "loss": 0.5129, "step": 129580 }, { "epoch": 6.436376278931161, "grad_norm": 0.1484375, "learning_rate": 0.00028509387106387206, "loss": 0.5242, "step": 129590 }, { "epoch": 6.43687295122678, "grad_norm": 0.126953125, "learning_rate": 0.00028505413728022253, "loss": 0.4968, "step": 129600 }, { "epoch": 6.4373696235224, "grad_norm": 0.1279296875, "learning_rate": 0.000285014403496573, "loss": 0.4754, "step": 129610 }, { "epoch": 6.4378662958180195, "grad_norm": 0.1669921875, "learning_rate": 0.0002849746697129234, "loss": 0.5006, "step": 129620 }, { "epoch": 6.438362968113639, "grad_norm": 0.1376953125, "learning_rate": 0.0002849349359292739, "loss": 0.4809, "step": 129630 }, { "epoch": 6.438859640409258, "grad_norm": 0.1416015625, "learning_rate": 0.0002848952021456243, "loss": 0.5065, "step": 129640 }, { "epoch": 6.439356312704877, "grad_norm": 0.13671875, "learning_rate": 0.0002848554683619748, "loss": 0.5027, "step": 129650 }, { "epoch": 6.439852985000496, "grad_norm": 0.1728515625, "learning_rate": 0.00028481573457832525, "loss": 0.4714, "step": 129660 }, { "epoch": 6.440349657296116, "grad_norm": 0.134765625, "learning_rate": 0.00028477600079467567, "loss": 0.5001, "step": 129670 }, { "epoch": 6.440846329591736, "grad_norm": 0.1435546875, "learning_rate": 0.00028473626701102614, "loss": 0.5064, "step": 129680 }, { "epoch": 6.441343001887355, "grad_norm": 0.1435546875, "learning_rate": 0.0002846965332273766, "loss": 0.4961, "step": 129690 }, { "epoch": 6.441839674182974, "grad_norm": 0.1279296875, "learning_rate": 0.00028465679944372703, "loss": 0.5148, "step": 129700 }, { "epoch": 6.442336346478593, "grad_norm": 0.1279296875, "learning_rate": 0.0002846170656600775, "loss": 0.5455, "step": 129710 }, { "epoch": 6.442833018774213, "grad_norm": 0.1279296875, "learning_rate": 0.0002845773318764279, "loss": 0.4687, "step": 129720 }, { "epoch": 6.443329691069832, "grad_norm": 0.12451171875, "learning_rate": 0.0002845375980927784, "loss": 0.4709, "step": 129730 }, { "epoch": 6.443826363365451, "grad_norm": 0.12451171875, "learning_rate": 0.00028449786430912886, "loss": 0.4912, "step": 129740 }, { "epoch": 6.444323035661071, "grad_norm": 0.1669921875, "learning_rate": 0.0002844581305254793, "loss": 0.5387, "step": 129750 }, { "epoch": 6.4448197079566905, "grad_norm": 0.123046875, "learning_rate": 0.00028441839674182975, "loss": 0.4964, "step": 129760 }, { "epoch": 6.44531638025231, "grad_norm": 0.15234375, "learning_rate": 0.0002843786629581802, "loss": 0.4962, "step": 129770 }, { "epoch": 6.445813052547929, "grad_norm": 0.123046875, "learning_rate": 0.00028433892917453064, "loss": 0.504, "step": 129780 }, { "epoch": 6.446309724843548, "grad_norm": 0.12451171875, "learning_rate": 0.0002842991953908811, "loss": 0.5037, "step": 129790 }, { "epoch": 6.446806397139167, "grad_norm": 0.1259765625, "learning_rate": 0.0002842594616072316, "loss": 0.4845, "step": 129800 }, { "epoch": 6.447303069434787, "grad_norm": 0.1376953125, "learning_rate": 0.000284219727823582, "loss": 0.5116, "step": 129810 }, { "epoch": 6.447799741730407, "grad_norm": 0.1171875, "learning_rate": 0.00028417999403993247, "loss": 0.4593, "step": 129820 }, { "epoch": 6.448296414026026, "grad_norm": 0.13671875, "learning_rate": 0.0002841402602562829, "loss": 0.4668, "step": 129830 }, { "epoch": 6.448793086321645, "grad_norm": 0.12353515625, "learning_rate": 0.0002841005264726334, "loss": 0.4476, "step": 129840 }, { "epoch": 6.449289758617264, "grad_norm": 0.146484375, "learning_rate": 0.00028406079268898383, "loss": 0.481, "step": 129850 }, { "epoch": 6.449786430912884, "grad_norm": 0.138671875, "learning_rate": 0.00028402105890533425, "loss": 0.5138, "step": 129860 }, { "epoch": 6.450283103208503, "grad_norm": 0.1337890625, "learning_rate": 0.0002839813251216847, "loss": 0.4492, "step": 129870 }, { "epoch": 6.450779775504122, "grad_norm": 0.1376953125, "learning_rate": 0.0002839415913380352, "loss": 0.4905, "step": 129880 }, { "epoch": 6.451276447799742, "grad_norm": 0.1337890625, "learning_rate": 0.0002839018575543856, "loss": 0.5089, "step": 129890 }, { "epoch": 6.451773120095361, "grad_norm": 0.140625, "learning_rate": 0.0002838621237707361, "loss": 0.4971, "step": 129900 }, { "epoch": 6.452269792390981, "grad_norm": 0.12255859375, "learning_rate": 0.00028382238998708655, "loss": 0.5208, "step": 129910 }, { "epoch": 6.4527664646866, "grad_norm": 0.171875, "learning_rate": 0.000283782656203437, "loss": 0.4988, "step": 129920 }, { "epoch": 6.453263136982219, "grad_norm": 0.134765625, "learning_rate": 0.00028374292241978744, "loss": 0.4563, "step": 129930 }, { "epoch": 6.453759809277838, "grad_norm": 0.140625, "learning_rate": 0.00028370318863613786, "loss": 0.5077, "step": 129940 }, { "epoch": 6.4542564815734575, "grad_norm": 0.15625, "learning_rate": 0.0002836634548524883, "loss": 0.4954, "step": 129950 }, { "epoch": 6.454753153869077, "grad_norm": 0.1376953125, "learning_rate": 0.0002836237210688388, "loss": 0.4795, "step": 129960 }, { "epoch": 6.455249826164697, "grad_norm": 0.134765625, "learning_rate": 0.00028358398728518927, "loss": 0.4732, "step": 129970 }, { "epoch": 6.455746498460316, "grad_norm": 0.1572265625, "learning_rate": 0.0002835442535015397, "loss": 0.4989, "step": 129980 }, { "epoch": 6.456243170755935, "grad_norm": 0.1337890625, "learning_rate": 0.00028350451971789016, "loss": 0.4869, "step": 129990 }, { "epoch": 6.456739843051555, "grad_norm": 0.171875, "learning_rate": 0.00028346478593424063, "loss": 0.488, "step": 130000 }, { "epoch": 6.457236515347174, "grad_norm": 0.11767578125, "learning_rate": 0.00028342505215059105, "loss": 0.4937, "step": 130010 }, { "epoch": 6.457733187642793, "grad_norm": 0.1416015625, "learning_rate": 0.00028338531836694146, "loss": 0.5094, "step": 130020 }, { "epoch": 6.458229859938412, "grad_norm": 0.1376953125, "learning_rate": 0.000283345584583292, "loss": 0.4815, "step": 130030 }, { "epoch": 6.458726532234032, "grad_norm": 0.130859375, "learning_rate": 0.0002833058507996424, "loss": 0.4699, "step": 130040 }, { "epoch": 6.459223204529652, "grad_norm": 0.1279296875, "learning_rate": 0.0002832661170159929, "loss": 0.4936, "step": 130050 }, { "epoch": 6.459719876825271, "grad_norm": 0.1591796875, "learning_rate": 0.0002832263832323433, "loss": 0.5095, "step": 130060 }, { "epoch": 6.46021654912089, "grad_norm": 0.13671875, "learning_rate": 0.00028318664944869377, "loss": 0.4635, "step": 130070 }, { "epoch": 6.460713221416509, "grad_norm": 0.1357421875, "learning_rate": 0.00028314691566504424, "loss": 0.5229, "step": 130080 }, { "epoch": 6.4612098937121285, "grad_norm": 0.1181640625, "learning_rate": 0.00028310718188139466, "loss": 0.4524, "step": 130090 }, { "epoch": 6.461706566007748, "grad_norm": 0.1650390625, "learning_rate": 0.0002830674480977451, "loss": 0.4919, "step": 130100 }, { "epoch": 6.462203238303368, "grad_norm": 0.1259765625, "learning_rate": 0.0002830277143140956, "loss": 0.4804, "step": 130110 }, { "epoch": 6.462699910598987, "grad_norm": 0.14453125, "learning_rate": 0.000282987980530446, "loss": 0.4807, "step": 130120 }, { "epoch": 6.463196582894606, "grad_norm": 0.1318359375, "learning_rate": 0.0002829482467467965, "loss": 0.4682, "step": 130130 }, { "epoch": 6.4636932551902255, "grad_norm": 0.1416015625, "learning_rate": 0.00028290851296314696, "loss": 0.4976, "step": 130140 }, { "epoch": 6.464189927485845, "grad_norm": 0.12890625, "learning_rate": 0.0002828687791794974, "loss": 0.5323, "step": 130150 }, { "epoch": 6.464686599781464, "grad_norm": 0.1357421875, "learning_rate": 0.00028282904539584785, "loss": 0.483, "step": 130160 }, { "epoch": 6.465183272077083, "grad_norm": 0.138671875, "learning_rate": 0.00028278931161219826, "loss": 0.5056, "step": 130170 }, { "epoch": 6.465679944372702, "grad_norm": 0.1357421875, "learning_rate": 0.00028274957782854873, "loss": 0.5006, "step": 130180 }, { "epoch": 6.4661766166683226, "grad_norm": 0.1494140625, "learning_rate": 0.0002827098440448992, "loss": 0.4996, "step": 130190 }, { "epoch": 6.466673288963942, "grad_norm": 0.2060546875, "learning_rate": 0.0002826701102612496, "loss": 0.5152, "step": 130200 }, { "epoch": 6.467169961259561, "grad_norm": 0.1416015625, "learning_rate": 0.0002826303764776001, "loss": 0.5057, "step": 130210 }, { "epoch": 6.46766663355518, "grad_norm": 0.1552734375, "learning_rate": 0.00028259064269395057, "loss": 0.5236, "step": 130220 }, { "epoch": 6.4681633058507995, "grad_norm": 0.12060546875, "learning_rate": 0.000282550908910301, "loss": 0.4987, "step": 130230 }, { "epoch": 6.468659978146419, "grad_norm": 0.1318359375, "learning_rate": 0.00028251117512665145, "loss": 0.5053, "step": 130240 }, { "epoch": 6.469156650442038, "grad_norm": 0.13671875, "learning_rate": 0.00028247144134300187, "loss": 0.4914, "step": 130250 }, { "epoch": 6.469653322737658, "grad_norm": 0.1591796875, "learning_rate": 0.00028243170755935234, "loss": 0.4776, "step": 130260 }, { "epoch": 6.470149995033277, "grad_norm": 0.134765625, "learning_rate": 0.0002823919737757028, "loss": 0.4943, "step": 130270 }, { "epoch": 6.4706466673288965, "grad_norm": 0.154296875, "learning_rate": 0.00028235223999205323, "loss": 0.4928, "step": 130280 }, { "epoch": 6.471143339624516, "grad_norm": 0.140625, "learning_rate": 0.0002823125062084037, "loss": 0.4775, "step": 130290 }, { "epoch": 6.471640011920135, "grad_norm": 0.162109375, "learning_rate": 0.0002822727724247542, "loss": 0.4926, "step": 130300 }, { "epoch": 6.472136684215754, "grad_norm": 0.119140625, "learning_rate": 0.0002822330386411046, "loss": 0.5075, "step": 130310 }, { "epoch": 6.472633356511373, "grad_norm": 0.19140625, "learning_rate": 0.00028219330485745506, "loss": 0.5062, "step": 130320 }, { "epoch": 6.4731300288069935, "grad_norm": 0.14453125, "learning_rate": 0.00028215357107380553, "loss": 0.4768, "step": 130330 }, { "epoch": 6.473626701102613, "grad_norm": 0.1259765625, "learning_rate": 0.00028211383729015595, "loss": 0.4855, "step": 130340 }, { "epoch": 6.474123373398232, "grad_norm": 0.1357421875, "learning_rate": 0.0002820741035065064, "loss": 0.4859, "step": 130350 }, { "epoch": 6.474620045693851, "grad_norm": 0.1220703125, "learning_rate": 0.00028203436972285684, "loss": 0.4693, "step": 130360 }, { "epoch": 6.47511671798947, "grad_norm": 0.1259765625, "learning_rate": 0.00028199463593920737, "loss": 0.4863, "step": 130370 }, { "epoch": 6.47561339028509, "grad_norm": 0.13671875, "learning_rate": 0.0002819549021555578, "loss": 0.5194, "step": 130380 }, { "epoch": 6.476110062580709, "grad_norm": 0.1455078125, "learning_rate": 0.0002819151683719082, "loss": 0.4944, "step": 130390 }, { "epoch": 6.476606734876329, "grad_norm": 0.138671875, "learning_rate": 0.00028187543458825867, "loss": 0.4976, "step": 130400 }, { "epoch": 6.477103407171948, "grad_norm": 0.130859375, "learning_rate": 0.00028183570080460914, "loss": 0.4932, "step": 130410 }, { "epoch": 6.477600079467567, "grad_norm": 0.1357421875, "learning_rate": 0.0002817959670209596, "loss": 0.4907, "step": 130420 }, { "epoch": 6.478096751763187, "grad_norm": 0.154296875, "learning_rate": 0.00028175623323731003, "loss": 0.4963, "step": 130430 }, { "epoch": 6.478593424058806, "grad_norm": 0.150390625, "learning_rate": 0.0002817164994536605, "loss": 0.4813, "step": 130440 }, { "epoch": 6.479090096354425, "grad_norm": 0.12890625, "learning_rate": 0.000281676765670011, "loss": 0.4851, "step": 130450 }, { "epoch": 6.479586768650044, "grad_norm": 0.1611328125, "learning_rate": 0.0002816370318863614, "loss": 0.4949, "step": 130460 }, { "epoch": 6.4800834409456645, "grad_norm": 0.1572265625, "learning_rate": 0.0002815972981027118, "loss": 0.5014, "step": 130470 }, { "epoch": 6.480580113241284, "grad_norm": 0.1611328125, "learning_rate": 0.0002815575643190623, "loss": 0.4666, "step": 130480 }, { "epoch": 6.481076785536903, "grad_norm": 0.15234375, "learning_rate": 0.00028151783053541275, "loss": 0.478, "step": 130490 }, { "epoch": 6.481573457832522, "grad_norm": 0.11083984375, "learning_rate": 0.0002814780967517632, "loss": 0.4738, "step": 130500 }, { "epoch": 6.482070130128141, "grad_norm": 0.1435546875, "learning_rate": 0.00028143836296811364, "loss": 0.4727, "step": 130510 }, { "epoch": 6.482566802423761, "grad_norm": 0.1455078125, "learning_rate": 0.0002813986291844641, "loss": 0.4706, "step": 130520 }, { "epoch": 6.48306347471938, "grad_norm": 0.1396484375, "learning_rate": 0.0002813588954008146, "loss": 0.5028, "step": 130530 }, { "epoch": 6.483560147015, "grad_norm": 0.1337890625, "learning_rate": 0.000281319161617165, "loss": 0.4949, "step": 130540 }, { "epoch": 6.484056819310619, "grad_norm": 0.11962890625, "learning_rate": 0.0002812794278335154, "loss": 0.4685, "step": 130550 }, { "epoch": 6.484553491606238, "grad_norm": 0.1806640625, "learning_rate": 0.00028123969404986594, "loss": 0.4842, "step": 130560 }, { "epoch": 6.485050163901858, "grad_norm": 0.150390625, "learning_rate": 0.00028119996026621636, "loss": 0.4985, "step": 130570 }, { "epoch": 6.485546836197477, "grad_norm": 0.1396484375, "learning_rate": 0.00028116022648256683, "loss": 0.5261, "step": 130580 }, { "epoch": 6.486043508493096, "grad_norm": 0.142578125, "learning_rate": 0.00028112049269891725, "loss": 0.5217, "step": 130590 }, { "epoch": 6.486540180788715, "grad_norm": 0.1513671875, "learning_rate": 0.0002810807589152677, "loss": 0.4858, "step": 130600 }, { "epoch": 6.487036853084335, "grad_norm": 0.1376953125, "learning_rate": 0.0002810410251316182, "loss": 0.4955, "step": 130610 }, { "epoch": 6.487533525379955, "grad_norm": 0.138671875, "learning_rate": 0.0002810012913479686, "loss": 0.5115, "step": 130620 }, { "epoch": 6.488030197675574, "grad_norm": 0.1630859375, "learning_rate": 0.0002809615575643191, "loss": 0.5005, "step": 130630 }, { "epoch": 6.488526869971193, "grad_norm": 0.1416015625, "learning_rate": 0.00028092182378066955, "loss": 0.5045, "step": 130640 }, { "epoch": 6.489023542266812, "grad_norm": 0.162109375, "learning_rate": 0.00028088208999701997, "loss": 0.5191, "step": 130650 }, { "epoch": 6.4895202145624316, "grad_norm": 0.1298828125, "learning_rate": 0.00028084235621337044, "loss": 0.5194, "step": 130660 }, { "epoch": 6.490016886858051, "grad_norm": 0.140625, "learning_rate": 0.0002808026224297209, "loss": 0.5083, "step": 130670 }, { "epoch": 6.49051355915367, "grad_norm": 0.140625, "learning_rate": 0.0002807628886460713, "loss": 0.4884, "step": 130680 }, { "epoch": 6.49101023144929, "grad_norm": 0.1279296875, "learning_rate": 0.0002807231548624218, "loss": 0.4933, "step": 130690 }, { "epoch": 6.491506903744909, "grad_norm": 0.1318359375, "learning_rate": 0.0002806834210787722, "loss": 0.5009, "step": 130700 }, { "epoch": 6.492003576040529, "grad_norm": 0.1220703125, "learning_rate": 0.0002806436872951227, "loss": 0.4687, "step": 130710 }, { "epoch": 6.492500248336148, "grad_norm": 0.1298828125, "learning_rate": 0.00028060395351147316, "loss": 0.485, "step": 130720 }, { "epoch": 6.492996920631767, "grad_norm": 0.1298828125, "learning_rate": 0.0002805642197278236, "loss": 0.4935, "step": 130730 }, { "epoch": 6.493493592927386, "grad_norm": 0.1376953125, "learning_rate": 0.00028052448594417405, "loss": 0.502, "step": 130740 }, { "epoch": 6.4939902652230055, "grad_norm": 0.1474609375, "learning_rate": 0.0002804847521605245, "loss": 0.4907, "step": 130750 }, { "epoch": 6.494486937518626, "grad_norm": 0.134765625, "learning_rate": 0.00028044501837687493, "loss": 0.5057, "step": 130760 }, { "epoch": 6.494983609814245, "grad_norm": 0.15625, "learning_rate": 0.0002804052845932254, "loss": 0.5142, "step": 130770 }, { "epoch": 6.495480282109864, "grad_norm": 0.185546875, "learning_rate": 0.0002803655508095758, "loss": 0.4872, "step": 130780 }, { "epoch": 6.495976954405483, "grad_norm": 0.1279296875, "learning_rate": 0.00028032581702592635, "loss": 0.4561, "step": 130790 }, { "epoch": 6.4964736267011025, "grad_norm": 0.12353515625, "learning_rate": 0.00028028608324227677, "loss": 0.5, "step": 130800 }, { "epoch": 6.496970298996722, "grad_norm": 0.138671875, "learning_rate": 0.0002802463494586272, "loss": 0.4935, "step": 130810 }, { "epoch": 6.497466971292341, "grad_norm": 0.119140625, "learning_rate": 0.00028020661567497765, "loss": 0.4962, "step": 130820 }, { "epoch": 6.49796364358796, "grad_norm": 0.1298828125, "learning_rate": 0.0002801668818913281, "loss": 0.4927, "step": 130830 }, { "epoch": 6.49846031588358, "grad_norm": 0.15234375, "learning_rate": 0.00028012714810767854, "loss": 0.4761, "step": 130840 }, { "epoch": 6.4989569881791995, "grad_norm": 0.1279296875, "learning_rate": 0.000280087414324029, "loss": 0.4948, "step": 130850 }, { "epoch": 6.499453660474819, "grad_norm": 0.1572265625, "learning_rate": 0.0002800476805403795, "loss": 0.4841, "step": 130860 }, { "epoch": 6.499950332770438, "grad_norm": 0.1728515625, "learning_rate": 0.00028000794675672996, "loss": 0.5016, "step": 130870 }, { "epoch": 6.500447005066057, "grad_norm": 0.12255859375, "learning_rate": 0.0002799682129730804, "loss": 0.5021, "step": 130880 }, { "epoch": 6.500943677361676, "grad_norm": 0.158203125, "learning_rate": 0.0002799284791894308, "loss": 0.4649, "step": 130890 }, { "epoch": 6.501440349657296, "grad_norm": 0.1337890625, "learning_rate": 0.0002798887454057813, "loss": 0.527, "step": 130900 }, { "epoch": 6.501937021952916, "grad_norm": 0.126953125, "learning_rate": 0.00027984901162213173, "loss": 0.4981, "step": 130910 }, { "epoch": 6.502433694248535, "grad_norm": 0.1328125, "learning_rate": 0.00027980927783848215, "loss": 0.4976, "step": 130920 }, { "epoch": 6.502930366544154, "grad_norm": 0.1298828125, "learning_rate": 0.0002797695440548326, "loss": 0.4933, "step": 130930 }, { "epoch": 6.5034270388397735, "grad_norm": 0.1484375, "learning_rate": 0.0002797298102711831, "loss": 0.5107, "step": 130940 }, { "epoch": 6.503923711135393, "grad_norm": 0.1416015625, "learning_rate": 0.00027969007648753357, "loss": 0.5004, "step": 130950 }, { "epoch": 6.504420383431012, "grad_norm": 0.12255859375, "learning_rate": 0.000279650342703884, "loss": 0.4815, "step": 130960 }, { "epoch": 6.504917055726631, "grad_norm": 0.1279296875, "learning_rate": 0.00027961060892023445, "loss": 0.4926, "step": 130970 }, { "epoch": 6.505413728022251, "grad_norm": 0.1455078125, "learning_rate": 0.0002795708751365849, "loss": 0.4914, "step": 130980 }, { "epoch": 6.5059104003178705, "grad_norm": 0.11865234375, "learning_rate": 0.00027953114135293534, "loss": 0.4831, "step": 130990 }, { "epoch": 6.50640707261349, "grad_norm": 0.14453125, "learning_rate": 0.00027949140756928576, "loss": 0.4801, "step": 131000 }, { "epoch": 6.506903744909109, "grad_norm": 0.189453125, "learning_rate": 0.0002794516737856363, "loss": 0.5064, "step": 131010 }, { "epoch": 6.507400417204728, "grad_norm": 0.1279296875, "learning_rate": 0.0002794119400019867, "loss": 0.4795, "step": 131020 }, { "epoch": 6.507897089500347, "grad_norm": 0.1376953125, "learning_rate": 0.0002793722062183372, "loss": 0.4814, "step": 131030 }, { "epoch": 6.508393761795967, "grad_norm": 0.1181640625, "learning_rate": 0.0002793324724346876, "loss": 0.4836, "step": 131040 }, { "epoch": 6.508890434091587, "grad_norm": 0.1357421875, "learning_rate": 0.00027929273865103806, "loss": 0.4854, "step": 131050 }, { "epoch": 6.509387106387206, "grad_norm": 0.140625, "learning_rate": 0.00027925300486738853, "loss": 0.4736, "step": 131060 }, { "epoch": 6.509883778682825, "grad_norm": 0.1337890625, "learning_rate": 0.00027921327108373895, "loss": 0.4926, "step": 131070 }, { "epoch": 6.510380450978444, "grad_norm": 0.1484375, "learning_rate": 0.00027917353730008937, "loss": 0.487, "step": 131080 }, { "epoch": 6.510877123274064, "grad_norm": 0.12890625, "learning_rate": 0.0002791338035164399, "loss": 0.4976, "step": 131090 }, { "epoch": 6.511373795569683, "grad_norm": 0.130859375, "learning_rate": 0.0002790940697327903, "loss": 0.4842, "step": 131100 }, { "epoch": 6.511870467865302, "grad_norm": 0.1259765625, "learning_rate": 0.0002790543359491408, "loss": 0.4664, "step": 131110 }, { "epoch": 6.512367140160922, "grad_norm": 0.1396484375, "learning_rate": 0.0002790146021654912, "loss": 0.4964, "step": 131120 }, { "epoch": 6.5128638124565414, "grad_norm": 0.1455078125, "learning_rate": 0.00027897486838184167, "loss": 0.5052, "step": 131130 }, { "epoch": 6.513360484752161, "grad_norm": 0.1396484375, "learning_rate": 0.00027893513459819214, "loss": 0.4974, "step": 131140 }, { "epoch": 6.51385715704778, "grad_norm": 0.13671875, "learning_rate": 0.00027889540081454256, "loss": 0.4822, "step": 131150 }, { "epoch": 6.514353829343399, "grad_norm": 0.1435546875, "learning_rate": 0.00027885566703089303, "loss": 0.5037, "step": 131160 }, { "epoch": 6.514850501639018, "grad_norm": 0.130859375, "learning_rate": 0.0002788159332472435, "loss": 0.4931, "step": 131170 }, { "epoch": 6.515347173934638, "grad_norm": 0.1416015625, "learning_rate": 0.0002787761994635939, "loss": 0.501, "step": 131180 }, { "epoch": 6.515843846230258, "grad_norm": 0.13671875, "learning_rate": 0.0002787364656799444, "loss": 0.4913, "step": 131190 }, { "epoch": 6.516340518525877, "grad_norm": 0.134765625, "learning_rate": 0.00027869673189629486, "loss": 0.5102, "step": 131200 }, { "epoch": 6.516837190821496, "grad_norm": 0.130859375, "learning_rate": 0.0002786569981126453, "loss": 0.5017, "step": 131210 }, { "epoch": 6.517333863117115, "grad_norm": 0.1337890625, "learning_rate": 0.00027861726432899575, "loss": 0.5112, "step": 131220 }, { "epoch": 6.517830535412735, "grad_norm": 0.1328125, "learning_rate": 0.00027857753054534617, "loss": 0.4644, "step": 131230 }, { "epoch": 6.518327207708354, "grad_norm": 0.1259765625, "learning_rate": 0.0002785377967616967, "loss": 0.48, "step": 131240 }, { "epoch": 6.518823880003973, "grad_norm": 0.130859375, "learning_rate": 0.0002784980629780471, "loss": 0.4718, "step": 131250 }, { "epoch": 6.519320552299593, "grad_norm": 0.130859375, "learning_rate": 0.0002784583291943975, "loss": 0.4539, "step": 131260 }, { "epoch": 6.519817224595212, "grad_norm": 0.1357421875, "learning_rate": 0.000278418595410748, "loss": 0.5051, "step": 131270 }, { "epoch": 6.520313896890832, "grad_norm": 0.13671875, "learning_rate": 0.00027837886162709847, "loss": 0.5004, "step": 131280 }, { "epoch": 6.520810569186451, "grad_norm": 0.14453125, "learning_rate": 0.0002783391278434489, "loss": 0.4924, "step": 131290 }, { "epoch": 6.52130724148207, "grad_norm": 0.1279296875, "learning_rate": 0.00027829939405979936, "loss": 0.4745, "step": 131300 }, { "epoch": 6.521803913777689, "grad_norm": 0.12890625, "learning_rate": 0.00027825966027614983, "loss": 0.489, "step": 131310 }, { "epoch": 6.5223005860733085, "grad_norm": 0.1513671875, "learning_rate": 0.0002782199264925003, "loss": 0.5139, "step": 131320 }, { "epoch": 6.522797258368929, "grad_norm": 0.1650390625, "learning_rate": 0.0002781801927088507, "loss": 0.5139, "step": 131330 }, { "epoch": 6.523293930664548, "grad_norm": 0.12353515625, "learning_rate": 0.00027814045892520114, "loss": 0.4829, "step": 131340 }, { "epoch": 6.523790602960167, "grad_norm": 0.1416015625, "learning_rate": 0.0002781007251415516, "loss": 0.4999, "step": 131350 }, { "epoch": 6.524287275255786, "grad_norm": 0.1455078125, "learning_rate": 0.0002780609913579021, "loss": 0.4658, "step": 131360 }, { "epoch": 6.524783947551406, "grad_norm": 0.1455078125, "learning_rate": 0.0002780212575742525, "loss": 0.4999, "step": 131370 }, { "epoch": 6.525280619847025, "grad_norm": 0.146484375, "learning_rate": 0.00027798152379060297, "loss": 0.4937, "step": 131380 }, { "epoch": 6.525777292142644, "grad_norm": 0.15234375, "learning_rate": 0.00027794179000695344, "loss": 0.4633, "step": 131390 }, { "epoch": 6.526273964438264, "grad_norm": 0.154296875, "learning_rate": 0.0002779020562233039, "loss": 0.4726, "step": 131400 }, { "epoch": 6.526770636733883, "grad_norm": 0.1728515625, "learning_rate": 0.0002778623224396543, "loss": 0.5023, "step": 131410 }, { "epoch": 6.527267309029503, "grad_norm": 0.140625, "learning_rate": 0.00027782258865600474, "loss": 0.4984, "step": 131420 }, { "epoch": 6.527763981325122, "grad_norm": 0.13671875, "learning_rate": 0.00027778285487235527, "loss": 0.5, "step": 131430 }, { "epoch": 6.528260653620741, "grad_norm": 0.1513671875, "learning_rate": 0.0002777431210887057, "loss": 0.4708, "step": 131440 }, { "epoch": 6.52875732591636, "grad_norm": 0.1259765625, "learning_rate": 0.0002777033873050561, "loss": 0.4609, "step": 131450 }, { "epoch": 6.5292539982119795, "grad_norm": 0.1484375, "learning_rate": 0.0002776636535214066, "loss": 0.4748, "step": 131460 }, { "epoch": 6.529750670507599, "grad_norm": 0.1318359375, "learning_rate": 0.00027762391973775705, "loss": 0.5029, "step": 131470 }, { "epoch": 6.530247342803218, "grad_norm": 0.1337890625, "learning_rate": 0.0002775841859541075, "loss": 0.4988, "step": 131480 }, { "epoch": 6.530744015098838, "grad_norm": 0.1337890625, "learning_rate": 0.00027754445217045793, "loss": 0.5001, "step": 131490 }, { "epoch": 6.531240687394457, "grad_norm": 0.134765625, "learning_rate": 0.0002775047183868084, "loss": 0.4659, "step": 131500 }, { "epoch": 6.5317373596900765, "grad_norm": 0.1318359375, "learning_rate": 0.0002774649846031589, "loss": 0.4866, "step": 131510 }, { "epoch": 6.532234031985696, "grad_norm": 0.134765625, "learning_rate": 0.0002774252508195093, "loss": 0.4882, "step": 131520 }, { "epoch": 6.532730704281315, "grad_norm": 0.1669921875, "learning_rate": 0.0002773855170358597, "loss": 0.4618, "step": 131530 }, { "epoch": 6.533227376576934, "grad_norm": 0.140625, "learning_rate": 0.00027734578325221024, "loss": 0.4812, "step": 131540 }, { "epoch": 6.533724048872553, "grad_norm": 0.1552734375, "learning_rate": 0.00027730604946856065, "loss": 0.4814, "step": 131550 }, { "epoch": 6.5342207211681735, "grad_norm": 0.1513671875, "learning_rate": 0.0002772663156849111, "loss": 0.4608, "step": 131560 }, { "epoch": 6.534717393463793, "grad_norm": 0.1240234375, "learning_rate": 0.00027722658190126154, "loss": 0.4921, "step": 131570 }, { "epoch": 6.535214065759412, "grad_norm": 0.1259765625, "learning_rate": 0.000277186848117612, "loss": 0.482, "step": 131580 }, { "epoch": 6.535710738055031, "grad_norm": 0.1474609375, "learning_rate": 0.0002771471143339625, "loss": 0.4734, "step": 131590 }, { "epoch": 6.5362074103506504, "grad_norm": 0.126953125, "learning_rate": 0.0002771073805503129, "loss": 0.4999, "step": 131600 }, { "epoch": 6.53670408264627, "grad_norm": 0.1640625, "learning_rate": 0.0002770676467666634, "loss": 0.4949, "step": 131610 }, { "epoch": 6.537200754941889, "grad_norm": 0.12158203125, "learning_rate": 0.00027702791298301385, "loss": 0.5104, "step": 131620 }, { "epoch": 6.537697427237509, "grad_norm": 0.134765625, "learning_rate": 0.00027698817919936426, "loss": 0.5106, "step": 131630 }, { "epoch": 6.538194099533128, "grad_norm": 0.1318359375, "learning_rate": 0.00027694844541571473, "loss": 0.4902, "step": 131640 }, { "epoch": 6.5386907718287475, "grad_norm": 0.15234375, "learning_rate": 0.00027690871163206515, "loss": 0.4874, "step": 131650 }, { "epoch": 6.539187444124367, "grad_norm": 0.134765625, "learning_rate": 0.0002768689778484156, "loss": 0.4876, "step": 131660 }, { "epoch": 6.539684116419986, "grad_norm": 0.142578125, "learning_rate": 0.0002768292440647661, "loss": 0.5096, "step": 131670 }, { "epoch": 6.540180788715605, "grad_norm": 0.1357421875, "learning_rate": 0.0002767895102811165, "loss": 0.4685, "step": 131680 }, { "epoch": 6.540677461011224, "grad_norm": 0.12451171875, "learning_rate": 0.000276749776497467, "loss": 0.5019, "step": 131690 }, { "epoch": 6.5411741333068445, "grad_norm": 0.1357421875, "learning_rate": 0.00027671004271381745, "loss": 0.5112, "step": 131700 }, { "epoch": 6.541670805602464, "grad_norm": 0.1357421875, "learning_rate": 0.00027667030893016787, "loss": 0.4696, "step": 131710 }, { "epoch": 6.542167477898083, "grad_norm": 0.1435546875, "learning_rate": 0.00027663057514651834, "loss": 0.5079, "step": 131720 }, { "epoch": 6.542664150193702, "grad_norm": 0.1279296875, "learning_rate": 0.0002765908413628688, "loss": 0.4758, "step": 131730 }, { "epoch": 6.543160822489321, "grad_norm": 0.12255859375, "learning_rate": 0.00027655110757921923, "loss": 0.5078, "step": 131740 }, { "epoch": 6.543657494784941, "grad_norm": 0.12890625, "learning_rate": 0.0002765113737955697, "loss": 0.4915, "step": 131750 }, { "epoch": 6.54415416708056, "grad_norm": 0.1298828125, "learning_rate": 0.0002764716400119201, "loss": 0.4816, "step": 131760 }, { "epoch": 6.54465083937618, "grad_norm": 0.1298828125, "learning_rate": 0.00027643190622827064, "loss": 0.4976, "step": 131770 }, { "epoch": 6.545147511671799, "grad_norm": 0.1484375, "learning_rate": 0.00027639217244462106, "loss": 0.5056, "step": 131780 }, { "epoch": 6.545644183967418, "grad_norm": 0.1318359375, "learning_rate": 0.0002763524386609715, "loss": 0.4844, "step": 131790 }, { "epoch": 6.546140856263038, "grad_norm": 0.138671875, "learning_rate": 0.00027631270487732195, "loss": 0.5009, "step": 131800 }, { "epoch": 6.546637528558657, "grad_norm": 0.1591796875, "learning_rate": 0.0002762729710936724, "loss": 0.5147, "step": 131810 }, { "epoch": 6.547134200854276, "grad_norm": 0.1328125, "learning_rate": 0.00027623323731002284, "loss": 0.472, "step": 131820 }, { "epoch": 6.547630873149895, "grad_norm": 0.15625, "learning_rate": 0.0002761935035263733, "loss": 0.512, "step": 131830 }, { "epoch": 6.5481275454455155, "grad_norm": 0.12451171875, "learning_rate": 0.0002761537697427238, "loss": 0.5148, "step": 131840 }, { "epoch": 6.548624217741135, "grad_norm": 0.1318359375, "learning_rate": 0.00027611403595907425, "loss": 0.4977, "step": 131850 }, { "epoch": 6.549120890036754, "grad_norm": 0.142578125, "learning_rate": 0.00027607430217542467, "loss": 0.4998, "step": 131860 }, { "epoch": 6.549617562332373, "grad_norm": 0.12890625, "learning_rate": 0.0002760345683917751, "loss": 0.5116, "step": 131870 }, { "epoch": 6.550114234627992, "grad_norm": 0.13671875, "learning_rate": 0.00027599483460812556, "loss": 0.5357, "step": 131880 }, { "epoch": 6.550610906923612, "grad_norm": 0.1416015625, "learning_rate": 0.00027595510082447603, "loss": 0.4937, "step": 131890 }, { "epoch": 6.551107579219231, "grad_norm": 0.1455078125, "learning_rate": 0.00027591536704082645, "loss": 0.5095, "step": 131900 }, { "epoch": 6.551604251514851, "grad_norm": 0.1298828125, "learning_rate": 0.0002758756332571769, "loss": 0.49, "step": 131910 }, { "epoch": 6.55210092381047, "grad_norm": 0.12890625, "learning_rate": 0.0002758358994735274, "loss": 0.5151, "step": 131920 }, { "epoch": 6.552597596106089, "grad_norm": 0.146484375, "learning_rate": 0.00027579616568987786, "loss": 0.4589, "step": 131930 }, { "epoch": 6.553094268401709, "grad_norm": 0.111328125, "learning_rate": 0.0002757564319062283, "loss": 0.4864, "step": 131940 }, { "epoch": 6.553590940697328, "grad_norm": 0.134765625, "learning_rate": 0.0002757166981225787, "loss": 0.4945, "step": 131950 }, { "epoch": 6.554087612992947, "grad_norm": 0.11474609375, "learning_rate": 0.0002756769643389292, "loss": 0.4959, "step": 131960 }, { "epoch": 6.554584285288566, "grad_norm": 0.140625, "learning_rate": 0.00027563723055527964, "loss": 0.4824, "step": 131970 }, { "epoch": 6.555080957584186, "grad_norm": 0.1298828125, "learning_rate": 0.00027559749677163005, "loss": 0.5406, "step": 131980 }, { "epoch": 6.555577629879806, "grad_norm": 0.140625, "learning_rate": 0.0002755577629879805, "loss": 0.4884, "step": 131990 }, { "epoch": 6.556074302175425, "grad_norm": 0.1484375, "learning_rate": 0.000275518029204331, "loss": 0.4944, "step": 132000 }, { "epoch": 6.556570974471044, "grad_norm": 0.1533203125, "learning_rate": 0.00027547829542068147, "loss": 0.49, "step": 132010 }, { "epoch": 6.557067646766663, "grad_norm": 0.138671875, "learning_rate": 0.0002754385616370319, "loss": 0.5078, "step": 132020 }, { "epoch": 6.5575643190622825, "grad_norm": 0.18359375, "learning_rate": 0.00027539882785338236, "loss": 0.4808, "step": 132030 }, { "epoch": 6.558060991357902, "grad_norm": 0.140625, "learning_rate": 0.00027535909406973283, "loss": 0.5131, "step": 132040 }, { "epoch": 6.558557663653522, "grad_norm": 0.1337890625, "learning_rate": 0.00027531936028608325, "loss": 0.5028, "step": 132050 }, { "epoch": 6.559054335949141, "grad_norm": 0.1259765625, "learning_rate": 0.0002752796265024337, "loss": 0.4891, "step": 132060 }, { "epoch": 6.55955100824476, "grad_norm": 0.134765625, "learning_rate": 0.0002752398927187842, "loss": 0.4724, "step": 132070 }, { "epoch": 6.56004768054038, "grad_norm": 0.13671875, "learning_rate": 0.0002752001589351346, "loss": 0.5097, "step": 132080 }, { "epoch": 6.560544352835999, "grad_norm": 0.1328125, "learning_rate": 0.0002751604251514851, "loss": 0.4934, "step": 132090 }, { "epoch": 6.561041025131618, "grad_norm": 0.1376953125, "learning_rate": 0.0002751206913678355, "loss": 0.4868, "step": 132100 }, { "epoch": 6.561537697427237, "grad_norm": 0.134765625, "learning_rate": 0.00027508095758418597, "loss": 0.4855, "step": 132110 }, { "epoch": 6.5620343697228565, "grad_norm": 0.12890625, "learning_rate": 0.00027504122380053644, "loss": 0.4691, "step": 132120 }, { "epoch": 6.562531042018477, "grad_norm": 0.12890625, "learning_rate": 0.00027500149001688685, "loss": 0.4996, "step": 132130 }, { "epoch": 6.563027714314096, "grad_norm": 0.150390625, "learning_rate": 0.0002749617562332373, "loss": 0.5151, "step": 132140 }, { "epoch": 6.563524386609715, "grad_norm": 0.13671875, "learning_rate": 0.0002749220224495878, "loss": 0.4846, "step": 132150 }, { "epoch": 6.564021058905334, "grad_norm": 0.1318359375, "learning_rate": 0.0002748822886659382, "loss": 0.493, "step": 132160 }, { "epoch": 6.5645177312009535, "grad_norm": 0.154296875, "learning_rate": 0.0002748425548822887, "loss": 0.4981, "step": 132170 }, { "epoch": 6.565014403496573, "grad_norm": 0.1572265625, "learning_rate": 0.0002748028210986391, "loss": 0.4957, "step": 132180 }, { "epoch": 6.565511075792192, "grad_norm": 0.1171875, "learning_rate": 0.0002747630873149896, "loss": 0.476, "step": 132190 }, { "epoch": 6.566007748087811, "grad_norm": 0.1318359375, "learning_rate": 0.00027472335353134005, "loss": 0.4689, "step": 132200 }, { "epoch": 6.566504420383431, "grad_norm": 0.1357421875, "learning_rate": 0.00027468361974769046, "loss": 0.5053, "step": 132210 }, { "epoch": 6.5670010926790505, "grad_norm": 0.1259765625, "learning_rate": 0.00027464388596404093, "loss": 0.4794, "step": 132220 }, { "epoch": 6.56749776497467, "grad_norm": 0.125, "learning_rate": 0.0002746041521803914, "loss": 0.4824, "step": 132230 }, { "epoch": 6.567994437270289, "grad_norm": 0.1357421875, "learning_rate": 0.0002745644183967418, "loss": 0.5269, "step": 132240 }, { "epoch": 6.568491109565908, "grad_norm": 0.13671875, "learning_rate": 0.0002745246846130923, "loss": 0.4838, "step": 132250 }, { "epoch": 6.568987781861527, "grad_norm": 0.142578125, "learning_rate": 0.00027448495082944277, "loss": 0.497, "step": 132260 }, { "epoch": 6.569484454157147, "grad_norm": 0.1259765625, "learning_rate": 0.0002744452170457932, "loss": 0.5044, "step": 132270 }, { "epoch": 6.569981126452767, "grad_norm": 0.1630859375, "learning_rate": 0.00027440548326214365, "loss": 0.4951, "step": 132280 }, { "epoch": 6.570477798748386, "grad_norm": 0.169921875, "learning_rate": 0.00027436574947849407, "loss": 0.5129, "step": 132290 }, { "epoch": 6.570974471044005, "grad_norm": 0.138671875, "learning_rate": 0.0002743260156948446, "loss": 0.4742, "step": 132300 }, { "epoch": 6.5714711433396245, "grad_norm": 0.150390625, "learning_rate": 0.000274286281911195, "loss": 0.4861, "step": 132310 }, { "epoch": 6.571967815635244, "grad_norm": 0.1279296875, "learning_rate": 0.00027424654812754543, "loss": 0.4985, "step": 132320 }, { "epoch": 6.572464487930863, "grad_norm": 0.1279296875, "learning_rate": 0.0002742068143438959, "loss": 0.4778, "step": 132330 }, { "epoch": 6.572961160226482, "grad_norm": 0.1435546875, "learning_rate": 0.0002741670805602464, "loss": 0.5032, "step": 132340 }, { "epoch": 6.573457832522102, "grad_norm": 0.1513671875, "learning_rate": 0.0002741273467765968, "loss": 0.4898, "step": 132350 }, { "epoch": 6.5739545048177215, "grad_norm": 0.140625, "learning_rate": 0.00027408761299294726, "loss": 0.5188, "step": 132360 }, { "epoch": 6.574451177113341, "grad_norm": 0.125, "learning_rate": 0.00027404787920929773, "loss": 0.4875, "step": 132370 }, { "epoch": 6.57494784940896, "grad_norm": 0.1318359375, "learning_rate": 0.0002740081454256482, "loss": 0.5, "step": 132380 }, { "epoch": 6.575444521704579, "grad_norm": 0.1396484375, "learning_rate": 0.0002739684116419986, "loss": 0.4891, "step": 132390 }, { "epoch": 6.575941194000198, "grad_norm": 0.1328125, "learning_rate": 0.00027392867785834904, "loss": 0.4886, "step": 132400 }, { "epoch": 6.576437866295818, "grad_norm": 0.14453125, "learning_rate": 0.00027388894407469956, "loss": 0.4917, "step": 132410 }, { "epoch": 6.576934538591438, "grad_norm": 0.125, "learning_rate": 0.00027384921029105, "loss": 0.4932, "step": 132420 }, { "epoch": 6.577431210887057, "grad_norm": 0.12255859375, "learning_rate": 0.0002738094765074004, "loss": 0.4747, "step": 132430 }, { "epoch": 6.577927883182676, "grad_norm": 0.1259765625, "learning_rate": 0.00027376974272375087, "loss": 0.5104, "step": 132440 }, { "epoch": 6.578424555478295, "grad_norm": 0.130859375, "learning_rate": 0.00027373000894010134, "loss": 0.4977, "step": 132450 }, { "epoch": 6.578921227773915, "grad_norm": 0.13671875, "learning_rate": 0.0002736902751564518, "loss": 0.4797, "step": 132460 }, { "epoch": 6.579417900069534, "grad_norm": 0.123046875, "learning_rate": 0.00027365054137280223, "loss": 0.4796, "step": 132470 }, { "epoch": 6.579914572365153, "grad_norm": 0.1318359375, "learning_rate": 0.00027361080758915265, "loss": 0.4642, "step": 132480 }, { "epoch": 6.580411244660773, "grad_norm": 0.1396484375, "learning_rate": 0.00027357107380550317, "loss": 0.4925, "step": 132490 }, { "epoch": 6.580907916956392, "grad_norm": 0.12890625, "learning_rate": 0.0002735313400218536, "loss": 0.486, "step": 132500 }, { "epoch": 6.581404589252012, "grad_norm": 0.1318359375, "learning_rate": 0.00027349160623820406, "loss": 0.4796, "step": 132510 }, { "epoch": 6.581901261547631, "grad_norm": 0.1298828125, "learning_rate": 0.0002734518724545545, "loss": 0.512, "step": 132520 }, { "epoch": 6.58239793384325, "grad_norm": 0.12353515625, "learning_rate": 0.00027341213867090495, "loss": 0.4844, "step": 132530 }, { "epoch": 6.582894606138869, "grad_norm": 0.1318359375, "learning_rate": 0.0002733724048872554, "loss": 0.4681, "step": 132540 }, { "epoch": 6.583391278434489, "grad_norm": 0.1279296875, "learning_rate": 0.00027333267110360584, "loss": 0.4842, "step": 132550 }, { "epoch": 6.583887950730109, "grad_norm": 0.154296875, "learning_rate": 0.0002732929373199563, "loss": 0.4881, "step": 132560 }, { "epoch": 6.584384623025728, "grad_norm": 0.1376953125, "learning_rate": 0.0002732532035363068, "loss": 0.5233, "step": 132570 }, { "epoch": 6.584881295321347, "grad_norm": 0.126953125, "learning_rate": 0.0002732134697526572, "loss": 0.4756, "step": 132580 }, { "epoch": 6.585377967616966, "grad_norm": 0.12890625, "learning_rate": 0.00027317373596900767, "loss": 0.4881, "step": 132590 }, { "epoch": 6.585874639912586, "grad_norm": 0.12109375, "learning_rate": 0.00027313400218535814, "loss": 0.4969, "step": 132600 }, { "epoch": 6.586371312208205, "grad_norm": 0.1298828125, "learning_rate": 0.00027309426840170856, "loss": 0.504, "step": 132610 }, { "epoch": 6.586867984503824, "grad_norm": 0.130859375, "learning_rate": 0.00027305453461805903, "loss": 0.4739, "step": 132620 }, { "epoch": 6.587364656799444, "grad_norm": 0.1318359375, "learning_rate": 0.00027301480083440945, "loss": 0.5395, "step": 132630 }, { "epoch": 6.587861329095063, "grad_norm": 0.1259765625, "learning_rate": 0.0002729750670507599, "loss": 0.4975, "step": 132640 }, { "epoch": 6.588358001390683, "grad_norm": 0.1259765625, "learning_rate": 0.0002729353332671104, "loss": 0.4969, "step": 132650 }, { "epoch": 6.588854673686302, "grad_norm": 0.1513671875, "learning_rate": 0.0002728955994834608, "loss": 0.4937, "step": 132660 }, { "epoch": 6.589351345981921, "grad_norm": 0.1396484375, "learning_rate": 0.0002728558656998113, "loss": 0.4718, "step": 132670 }, { "epoch": 6.58984801827754, "grad_norm": 0.142578125, "learning_rate": 0.00027281613191616175, "loss": 0.4972, "step": 132680 }, { "epoch": 6.5903446905731595, "grad_norm": 0.1611328125, "learning_rate": 0.00027277639813251217, "loss": 0.4969, "step": 132690 }, { "epoch": 6.59084136286878, "grad_norm": 0.138671875, "learning_rate": 0.00027273666434886264, "loss": 0.4912, "step": 132700 }, { "epoch": 6.591338035164399, "grad_norm": 0.130859375, "learning_rate": 0.0002726969305652131, "loss": 0.5153, "step": 132710 }, { "epoch": 6.591834707460018, "grad_norm": 0.130859375, "learning_rate": 0.0002726571967815635, "loss": 0.4904, "step": 132720 }, { "epoch": 6.592331379755637, "grad_norm": 0.1455078125, "learning_rate": 0.000272617462997914, "loss": 0.49, "step": 132730 }, { "epoch": 6.5928280520512565, "grad_norm": 0.1630859375, "learning_rate": 0.0002725777292142644, "loss": 0.4925, "step": 132740 }, { "epoch": 6.593324724346876, "grad_norm": 0.1376953125, "learning_rate": 0.0002725379954306149, "loss": 0.5258, "step": 132750 }, { "epoch": 6.593821396642495, "grad_norm": 0.1728515625, "learning_rate": 0.00027249826164696536, "loss": 0.4999, "step": 132760 }, { "epoch": 6.594318068938115, "grad_norm": 0.1826171875, "learning_rate": 0.0002724585278633158, "loss": 0.4972, "step": 132770 }, { "epoch": 6.594814741233734, "grad_norm": 0.1484375, "learning_rate": 0.00027241879407966625, "loss": 0.4713, "step": 132780 }, { "epoch": 6.595311413529354, "grad_norm": 0.1376953125, "learning_rate": 0.0002723790602960167, "loss": 0.49, "step": 132790 }, { "epoch": 6.595808085824973, "grad_norm": 0.1494140625, "learning_rate": 0.00027233932651236713, "loss": 0.504, "step": 132800 }, { "epoch": 6.596304758120592, "grad_norm": 0.1220703125, "learning_rate": 0.0002722995927287176, "loss": 0.4771, "step": 132810 }, { "epoch": 6.596801430416211, "grad_norm": 0.130859375, "learning_rate": 0.000272259858945068, "loss": 0.4936, "step": 132820 }, { "epoch": 6.5972981027118305, "grad_norm": 0.130859375, "learning_rate": 0.00027222012516141855, "loss": 0.4815, "step": 132830 }, { "epoch": 6.59779477500745, "grad_norm": 0.146484375, "learning_rate": 0.00027218039137776897, "loss": 0.5094, "step": 132840 }, { "epoch": 6.59829144730307, "grad_norm": 0.130859375, "learning_rate": 0.0002721406575941194, "loss": 0.5089, "step": 132850 }, { "epoch": 6.598788119598689, "grad_norm": 0.13671875, "learning_rate": 0.00027210092381046985, "loss": 0.4632, "step": 132860 }, { "epoch": 6.599284791894308, "grad_norm": 0.130859375, "learning_rate": 0.0002720611900268203, "loss": 0.483, "step": 132870 }, { "epoch": 6.5997814641899275, "grad_norm": 0.1767578125, "learning_rate": 0.00027202145624317074, "loss": 0.4891, "step": 132880 }, { "epoch": 6.600278136485547, "grad_norm": 0.138671875, "learning_rate": 0.0002719817224595212, "loss": 0.4684, "step": 132890 }, { "epoch": 6.600774808781166, "grad_norm": 0.126953125, "learning_rate": 0.0002719419886758717, "loss": 0.4608, "step": 132900 }, { "epoch": 6.601271481076785, "grad_norm": 0.2119140625, "learning_rate": 0.00027190225489222216, "loss": 0.485, "step": 132910 }, { "epoch": 6.601768153372404, "grad_norm": 0.1357421875, "learning_rate": 0.0002718625211085726, "loss": 0.4507, "step": 132920 }, { "epoch": 6.6022648256680245, "grad_norm": 0.1376953125, "learning_rate": 0.000271822787324923, "loss": 0.4968, "step": 132930 }, { "epoch": 6.602761497963644, "grad_norm": 0.1640625, "learning_rate": 0.0002717830535412735, "loss": 0.4773, "step": 132940 }, { "epoch": 6.603258170259263, "grad_norm": 0.130859375, "learning_rate": 0.00027174331975762393, "loss": 0.4796, "step": 132950 }, { "epoch": 6.603754842554882, "grad_norm": 0.1201171875, "learning_rate": 0.0002717035859739744, "loss": 0.488, "step": 132960 }, { "epoch": 6.604251514850501, "grad_norm": 0.125, "learning_rate": 0.0002716638521903248, "loss": 0.4728, "step": 132970 }, { "epoch": 6.604748187146121, "grad_norm": 0.1533203125, "learning_rate": 0.0002716241184066753, "loss": 0.5108, "step": 132980 }, { "epoch": 6.60524485944174, "grad_norm": 0.1376953125, "learning_rate": 0.00027158438462302576, "loss": 0.4891, "step": 132990 }, { "epoch": 6.60574153173736, "grad_norm": 0.134765625, "learning_rate": 0.0002715446508393762, "loss": 0.512, "step": 133000 }, { "epoch": 6.606238204032979, "grad_norm": 0.138671875, "learning_rate": 0.00027150491705572665, "loss": 0.4805, "step": 133010 }, { "epoch": 6.6067348763285985, "grad_norm": 0.1181640625, "learning_rate": 0.0002714651832720771, "loss": 0.4996, "step": 133020 }, { "epoch": 6.607231548624218, "grad_norm": 0.1376953125, "learning_rate": 0.00027142544948842754, "loss": 0.5027, "step": 133030 }, { "epoch": 6.607728220919837, "grad_norm": 0.1474609375, "learning_rate": 0.000271385715704778, "loss": 0.53, "step": 133040 }, { "epoch": 6.608224893215456, "grad_norm": 0.1494140625, "learning_rate": 0.00027134598192112843, "loss": 0.507, "step": 133050 }, { "epoch": 6.608721565511075, "grad_norm": 0.1328125, "learning_rate": 0.0002713062481374789, "loss": 0.4711, "step": 133060 }, { "epoch": 6.6092182378066955, "grad_norm": 0.1337890625, "learning_rate": 0.00027126651435382937, "loss": 0.4811, "step": 133070 }, { "epoch": 6.609714910102315, "grad_norm": 0.138671875, "learning_rate": 0.0002712267805701798, "loss": 0.4837, "step": 133080 }, { "epoch": 6.610211582397934, "grad_norm": 0.1416015625, "learning_rate": 0.00027118704678653026, "loss": 0.5085, "step": 133090 }, { "epoch": 6.610708254693553, "grad_norm": 0.1328125, "learning_rate": 0.00027114731300288073, "loss": 0.5287, "step": 133100 }, { "epoch": 6.611204926989172, "grad_norm": 0.1435546875, "learning_rate": 0.00027110757921923115, "loss": 0.5062, "step": 133110 }, { "epoch": 6.611701599284792, "grad_norm": 0.130859375, "learning_rate": 0.0002710678454355816, "loss": 0.4982, "step": 133120 }, { "epoch": 6.612198271580411, "grad_norm": 0.13671875, "learning_rate": 0.0002710281116519321, "loss": 0.4991, "step": 133130 }, { "epoch": 6.612694943876031, "grad_norm": 0.13671875, "learning_rate": 0.0002709883778682825, "loss": 0.5036, "step": 133140 }, { "epoch": 6.61319161617165, "grad_norm": 0.12890625, "learning_rate": 0.000270948644084633, "loss": 0.4817, "step": 133150 }, { "epoch": 6.613688288467269, "grad_norm": 0.1376953125, "learning_rate": 0.0002709089103009834, "loss": 0.4849, "step": 133160 }, { "epoch": 6.614184960762889, "grad_norm": 0.1396484375, "learning_rate": 0.00027086917651733387, "loss": 0.5049, "step": 133170 }, { "epoch": 6.614681633058508, "grad_norm": 0.140625, "learning_rate": 0.00027082944273368434, "loss": 0.5086, "step": 133180 }, { "epoch": 6.615178305354127, "grad_norm": 0.130859375, "learning_rate": 0.00027078970895003476, "loss": 0.4887, "step": 133190 }, { "epoch": 6.615674977649746, "grad_norm": 0.12255859375, "learning_rate": 0.00027074997516638523, "loss": 0.4756, "step": 133200 }, { "epoch": 6.616171649945366, "grad_norm": 0.1748046875, "learning_rate": 0.0002707102413827357, "loss": 0.4732, "step": 133210 }, { "epoch": 6.616668322240986, "grad_norm": 0.146484375, "learning_rate": 0.0002706705075990861, "loss": 0.4917, "step": 133220 }, { "epoch": 6.617164994536605, "grad_norm": 0.1240234375, "learning_rate": 0.0002706307738154366, "loss": 0.4943, "step": 133230 }, { "epoch": 6.617661666832224, "grad_norm": 0.134765625, "learning_rate": 0.00027059104003178706, "loss": 0.4608, "step": 133240 }, { "epoch": 6.618158339127843, "grad_norm": 0.171875, "learning_rate": 0.0002705513062481375, "loss": 0.4674, "step": 133250 }, { "epoch": 6.618655011423463, "grad_norm": 0.1376953125, "learning_rate": 0.00027051157246448795, "loss": 0.4938, "step": 133260 }, { "epoch": 6.619151683719082, "grad_norm": 0.1416015625, "learning_rate": 0.00027047183868083837, "loss": 0.4672, "step": 133270 }, { "epoch": 6.619648356014702, "grad_norm": 0.138671875, "learning_rate": 0.0002704321048971889, "loss": 0.5048, "step": 133280 }, { "epoch": 6.620145028310321, "grad_norm": 0.1728515625, "learning_rate": 0.0002703923711135393, "loss": 0.4894, "step": 133290 }, { "epoch": 6.62064170060594, "grad_norm": 0.12255859375, "learning_rate": 0.0002703526373298897, "loss": 0.4835, "step": 133300 }, { "epoch": 6.62113837290156, "grad_norm": 0.1318359375, "learning_rate": 0.0002703129035462402, "loss": 0.5134, "step": 133310 }, { "epoch": 6.621635045197179, "grad_norm": 0.1259765625, "learning_rate": 0.00027027316976259067, "loss": 0.5127, "step": 133320 }, { "epoch": 6.622131717492798, "grad_norm": 0.14453125, "learning_rate": 0.00027023343597894114, "loss": 0.4695, "step": 133330 }, { "epoch": 6.622628389788417, "grad_norm": 0.1396484375, "learning_rate": 0.00027019370219529156, "loss": 0.5054, "step": 133340 }, { "epoch": 6.623125062084037, "grad_norm": 0.142578125, "learning_rate": 0.000270153968411642, "loss": 0.5493, "step": 133350 }, { "epoch": 6.623621734379657, "grad_norm": 0.13671875, "learning_rate": 0.0002701142346279925, "loss": 0.4761, "step": 133360 }, { "epoch": 6.624118406675276, "grad_norm": 0.1318359375, "learning_rate": 0.0002700745008443429, "loss": 0.5189, "step": 133370 }, { "epoch": 6.624615078970895, "grad_norm": 0.17578125, "learning_rate": 0.00027003476706069333, "loss": 0.4894, "step": 133380 }, { "epoch": 6.625111751266514, "grad_norm": 0.1357421875, "learning_rate": 0.0002699950332770438, "loss": 0.5142, "step": 133390 }, { "epoch": 6.6256084235621335, "grad_norm": 0.1533203125, "learning_rate": 0.0002699552994933943, "loss": 0.4884, "step": 133400 }, { "epoch": 6.626105095857753, "grad_norm": 0.1796875, "learning_rate": 0.00026991556570974475, "loss": 0.4813, "step": 133410 }, { "epoch": 6.626601768153373, "grad_norm": 0.18359375, "learning_rate": 0.00026987583192609517, "loss": 0.5112, "step": 133420 }, { "epoch": 6.627098440448992, "grad_norm": 0.140625, "learning_rate": 0.00026983609814244564, "loss": 0.5361, "step": 133430 }, { "epoch": 6.627595112744611, "grad_norm": 0.1279296875, "learning_rate": 0.0002697963643587961, "loss": 0.4426, "step": 133440 }, { "epoch": 6.6280917850402306, "grad_norm": 0.150390625, "learning_rate": 0.0002697566305751465, "loss": 0.4809, "step": 133450 }, { "epoch": 6.62858845733585, "grad_norm": 0.13671875, "learning_rate": 0.00026971689679149694, "loss": 0.5134, "step": 133460 }, { "epoch": 6.629085129631469, "grad_norm": 0.140625, "learning_rate": 0.00026967716300784747, "loss": 0.5242, "step": 133470 }, { "epoch": 6.629581801927088, "grad_norm": 0.1298828125, "learning_rate": 0.0002696374292241979, "loss": 0.4854, "step": 133480 }, { "epoch": 6.630078474222708, "grad_norm": 0.130859375, "learning_rate": 0.00026959769544054836, "loss": 0.4724, "step": 133490 }, { "epoch": 6.630575146518328, "grad_norm": 0.1669921875, "learning_rate": 0.0002695579616568988, "loss": 0.5376, "step": 133500 }, { "epoch": 6.631071818813947, "grad_norm": 0.134765625, "learning_rate": 0.00026951822787324925, "loss": 0.4984, "step": 133510 }, { "epoch": 6.631568491109566, "grad_norm": 0.134765625, "learning_rate": 0.0002694784940895997, "loss": 0.5126, "step": 133520 }, { "epoch": 6.632065163405185, "grad_norm": 0.140625, "learning_rate": 0.00026943876030595013, "loss": 0.511, "step": 133530 }, { "epoch": 6.6325618357008045, "grad_norm": 0.1513671875, "learning_rate": 0.0002693990265223006, "loss": 0.4826, "step": 133540 }, { "epoch": 6.633058507996424, "grad_norm": 0.1416015625, "learning_rate": 0.0002693592927386511, "loss": 0.4895, "step": 133550 }, { "epoch": 6.633555180292043, "grad_norm": 0.1279296875, "learning_rate": 0.0002693195589550015, "loss": 0.5184, "step": 133560 }, { "epoch": 6.634051852587662, "grad_norm": 0.1259765625, "learning_rate": 0.00026927982517135196, "loss": 0.4698, "step": 133570 }, { "epoch": 6.634548524883282, "grad_norm": 0.1455078125, "learning_rate": 0.0002692400913877024, "loss": 0.5026, "step": 133580 }, { "epoch": 6.6350451971789015, "grad_norm": 0.1474609375, "learning_rate": 0.00026920035760405285, "loss": 0.461, "step": 133590 }, { "epoch": 6.635541869474521, "grad_norm": 0.1416015625, "learning_rate": 0.0002691606238204033, "loss": 0.4852, "step": 133600 }, { "epoch": 6.63603854177014, "grad_norm": 0.146484375, "learning_rate": 0.00026912089003675374, "loss": 0.5103, "step": 133610 }, { "epoch": 6.636535214065759, "grad_norm": 0.1337890625, "learning_rate": 0.0002690811562531042, "loss": 0.4986, "step": 133620 }, { "epoch": 6.637031886361378, "grad_norm": 0.1337890625, "learning_rate": 0.0002690414224694547, "loss": 0.483, "step": 133630 }, { "epoch": 6.637528558656998, "grad_norm": 0.13671875, "learning_rate": 0.0002690016886858051, "loss": 0.5236, "step": 133640 }, { "epoch": 6.638025230952618, "grad_norm": 0.1318359375, "learning_rate": 0.0002689619549021556, "loss": 0.4868, "step": 133650 }, { "epoch": 6.638521903248237, "grad_norm": 0.1318359375, "learning_rate": 0.00026892222111850604, "loss": 0.4662, "step": 133660 }, { "epoch": 6.639018575543856, "grad_norm": 0.15234375, "learning_rate": 0.00026888248733485646, "loss": 0.4987, "step": 133670 }, { "epoch": 6.639515247839475, "grad_norm": 0.125, "learning_rate": 0.00026884275355120693, "loss": 0.4798, "step": 133680 }, { "epoch": 6.640011920135095, "grad_norm": 0.1484375, "learning_rate": 0.00026880301976755735, "loss": 0.5026, "step": 133690 }, { "epoch": 6.640508592430714, "grad_norm": 0.1845703125, "learning_rate": 0.0002687632859839078, "loss": 0.5088, "step": 133700 }, { "epoch": 6.641005264726333, "grad_norm": 0.1474609375, "learning_rate": 0.0002687235522002583, "loss": 0.4896, "step": 133710 }, { "epoch": 6.641501937021953, "grad_norm": 0.1337890625, "learning_rate": 0.0002686838184166087, "loss": 0.5207, "step": 133720 }, { "epoch": 6.6419986093175725, "grad_norm": 0.1435546875, "learning_rate": 0.0002686440846329592, "loss": 0.5255, "step": 133730 }, { "epoch": 6.642495281613192, "grad_norm": 0.126953125, "learning_rate": 0.00026860435084930965, "loss": 0.4937, "step": 133740 }, { "epoch": 6.642991953908811, "grad_norm": 0.15625, "learning_rate": 0.00026856461706566007, "loss": 0.4631, "step": 133750 }, { "epoch": 6.64348862620443, "grad_norm": 0.1298828125, "learning_rate": 0.00026852488328201054, "loss": 0.5256, "step": 133760 }, { "epoch": 6.643985298500049, "grad_norm": 0.1416015625, "learning_rate": 0.000268485149498361, "loss": 0.505, "step": 133770 }, { "epoch": 6.644481970795669, "grad_norm": 0.1484375, "learning_rate": 0.0002684454157147115, "loss": 0.4995, "step": 133780 }, { "epoch": 6.644978643091289, "grad_norm": 0.1708984375, "learning_rate": 0.0002684056819310619, "loss": 0.5255, "step": 133790 }, { "epoch": 6.645475315386908, "grad_norm": 0.1162109375, "learning_rate": 0.0002683659481474123, "loss": 0.4723, "step": 133800 }, { "epoch": 6.645971987682527, "grad_norm": 0.1357421875, "learning_rate": 0.00026832621436376284, "loss": 0.5012, "step": 133810 }, { "epoch": 6.646468659978146, "grad_norm": 0.169921875, "learning_rate": 0.00026828648058011326, "loss": 0.4947, "step": 133820 }, { "epoch": 6.646965332273766, "grad_norm": 0.1630859375, "learning_rate": 0.0002682467467964637, "loss": 0.5005, "step": 133830 }, { "epoch": 6.647462004569385, "grad_norm": 0.1318359375, "learning_rate": 0.00026820701301281415, "loss": 0.5042, "step": 133840 }, { "epoch": 6.647958676865004, "grad_norm": 0.12353515625, "learning_rate": 0.0002681672792291646, "loss": 0.4901, "step": 133850 }, { "epoch": 6.648455349160624, "grad_norm": 0.138671875, "learning_rate": 0.0002681275454455151, "loss": 0.5007, "step": 133860 }, { "epoch": 6.648952021456243, "grad_norm": 0.140625, "learning_rate": 0.0002680878116618655, "loss": 0.4806, "step": 133870 }, { "epoch": 6.649448693751863, "grad_norm": 0.1396484375, "learning_rate": 0.0002680480778782159, "loss": 0.4836, "step": 133880 }, { "epoch": 6.649945366047482, "grad_norm": 0.134765625, "learning_rate": 0.00026800834409456645, "loss": 0.5106, "step": 133890 }, { "epoch": 6.650442038343101, "grad_norm": 0.1201171875, "learning_rate": 0.00026796861031091687, "loss": 0.4862, "step": 133900 }, { "epoch": 6.65093871063872, "grad_norm": 0.146484375, "learning_rate": 0.0002679288765272673, "loss": 0.4535, "step": 133910 }, { "epoch": 6.65143538293434, "grad_norm": 0.15625, "learning_rate": 0.00026788914274361776, "loss": 0.5245, "step": 133920 }, { "epoch": 6.65193205522996, "grad_norm": 0.142578125, "learning_rate": 0.00026784940895996823, "loss": 0.5196, "step": 133930 }, { "epoch": 6.652428727525579, "grad_norm": 0.13671875, "learning_rate": 0.0002678096751763187, "loss": 0.5149, "step": 133940 }, { "epoch": 6.652925399821198, "grad_norm": 0.1357421875, "learning_rate": 0.0002677699413926691, "loss": 0.4959, "step": 133950 }, { "epoch": 6.653422072116817, "grad_norm": 0.1318359375, "learning_rate": 0.0002677302076090196, "loss": 0.4859, "step": 133960 }, { "epoch": 6.653918744412437, "grad_norm": 0.1259765625, "learning_rate": 0.00026769047382537006, "loss": 0.518, "step": 133970 }, { "epoch": 6.654415416708056, "grad_norm": 0.1318359375, "learning_rate": 0.0002676507400417205, "loss": 0.477, "step": 133980 }, { "epoch": 6.654912089003675, "grad_norm": 0.197265625, "learning_rate": 0.0002676110062580709, "loss": 0.483, "step": 133990 }, { "epoch": 6.655408761299295, "grad_norm": 0.125, "learning_rate": 0.0002675712724744214, "loss": 0.4983, "step": 134000 }, { "epoch": 6.655905433594914, "grad_norm": 0.126953125, "learning_rate": 0.00026753153869077184, "loss": 0.4666, "step": 134010 }, { "epoch": 6.656402105890534, "grad_norm": 0.142578125, "learning_rate": 0.0002674918049071223, "loss": 0.5227, "step": 134020 }, { "epoch": 6.656898778186153, "grad_norm": 0.1435546875, "learning_rate": 0.0002674520711234727, "loss": 0.4894, "step": 134030 }, { "epoch": 6.657395450481772, "grad_norm": 0.1416015625, "learning_rate": 0.0002674123373398232, "loss": 0.4588, "step": 134040 }, { "epoch": 6.657892122777391, "grad_norm": 0.140625, "learning_rate": 0.00026737260355617367, "loss": 0.4926, "step": 134050 }, { "epoch": 6.6583887950730105, "grad_norm": 0.1455078125, "learning_rate": 0.0002673328697725241, "loss": 0.4998, "step": 134060 }, { "epoch": 6.658885467368631, "grad_norm": 0.12158203125, "learning_rate": 0.00026729313598887456, "loss": 0.5003, "step": 134070 }, { "epoch": 6.65938213966425, "grad_norm": 0.1484375, "learning_rate": 0.00026725340220522503, "loss": 0.5129, "step": 134080 }, { "epoch": 6.659878811959869, "grad_norm": 0.1328125, "learning_rate": 0.00026721366842157545, "loss": 0.4808, "step": 134090 }, { "epoch": 6.660375484255488, "grad_norm": 0.140625, "learning_rate": 0.0002671739346379259, "loss": 0.4807, "step": 134100 }, { "epoch": 6.6608721565511075, "grad_norm": 0.126953125, "learning_rate": 0.0002671342008542764, "loss": 0.5135, "step": 134110 }, { "epoch": 6.661368828846727, "grad_norm": 0.1298828125, "learning_rate": 0.0002670944670706268, "loss": 0.4837, "step": 134120 }, { "epoch": 6.661865501142346, "grad_norm": 0.1298828125, "learning_rate": 0.0002670547332869773, "loss": 0.478, "step": 134130 }, { "epoch": 6.662362173437966, "grad_norm": 0.138671875, "learning_rate": 0.0002670149995033277, "loss": 0.4888, "step": 134140 }, { "epoch": 6.662858845733585, "grad_norm": 0.171875, "learning_rate": 0.00026697526571967816, "loss": 0.4739, "step": 134150 }, { "epoch": 6.663355518029205, "grad_norm": 0.1533203125, "learning_rate": 0.00026693553193602864, "loss": 0.5055, "step": 134160 }, { "epoch": 6.663852190324824, "grad_norm": 0.1376953125, "learning_rate": 0.00026689579815237905, "loss": 0.4849, "step": 134170 }, { "epoch": 6.664348862620443, "grad_norm": 0.1220703125, "learning_rate": 0.0002668560643687295, "loss": 0.4776, "step": 134180 }, { "epoch": 6.664845534916062, "grad_norm": 0.154296875, "learning_rate": 0.00026681633058508, "loss": 0.5023, "step": 134190 }, { "epoch": 6.6653422072116815, "grad_norm": 0.1337890625, "learning_rate": 0.0002667765968014304, "loss": 0.4725, "step": 134200 }, { "epoch": 6.665838879507301, "grad_norm": 0.130859375, "learning_rate": 0.0002667368630177809, "loss": 0.4813, "step": 134210 }, { "epoch": 6.666335551802921, "grad_norm": 0.1357421875, "learning_rate": 0.0002666971292341313, "loss": 0.4912, "step": 134220 }, { "epoch": 6.66683222409854, "grad_norm": 0.2197265625, "learning_rate": 0.00026665739545048183, "loss": 0.5012, "step": 134230 }, { "epoch": 6.667328896394159, "grad_norm": 0.1455078125, "learning_rate": 0.00026661766166683224, "loss": 0.4752, "step": 134240 }, { "epoch": 6.6678255686897785, "grad_norm": 0.12890625, "learning_rate": 0.00026657792788318266, "loss": 0.4683, "step": 134250 }, { "epoch": 6.668322240985398, "grad_norm": 0.13671875, "learning_rate": 0.00026653819409953313, "loss": 0.507, "step": 134260 }, { "epoch": 6.668818913281017, "grad_norm": 0.158203125, "learning_rate": 0.0002664984603158836, "loss": 0.5005, "step": 134270 }, { "epoch": 6.669315585576636, "grad_norm": 0.1484375, "learning_rate": 0.000266458726532234, "loss": 0.4964, "step": 134280 }, { "epoch": 6.669812257872255, "grad_norm": 0.1298828125, "learning_rate": 0.0002664189927485845, "loss": 0.5075, "step": 134290 }, { "epoch": 6.6703089301678755, "grad_norm": 0.12890625, "learning_rate": 0.00026637925896493496, "loss": 0.4932, "step": 134300 }, { "epoch": 6.670805602463495, "grad_norm": 0.1259765625, "learning_rate": 0.00026633952518128544, "loss": 0.5096, "step": 134310 }, { "epoch": 6.671302274759114, "grad_norm": 0.125, "learning_rate": 0.00026629979139763585, "loss": 0.4881, "step": 134320 }, { "epoch": 6.671798947054733, "grad_norm": 0.171875, "learning_rate": 0.00026626005761398627, "loss": 0.4874, "step": 134330 }, { "epoch": 6.672295619350352, "grad_norm": 0.1337890625, "learning_rate": 0.0002662203238303368, "loss": 0.486, "step": 134340 }, { "epoch": 6.672792291645972, "grad_norm": 0.185546875, "learning_rate": 0.0002661805900466872, "loss": 0.5361, "step": 134350 }, { "epoch": 6.673288963941591, "grad_norm": 0.158203125, "learning_rate": 0.00026614085626303763, "loss": 0.4694, "step": 134360 }, { "epoch": 6.673785636237211, "grad_norm": 0.12890625, "learning_rate": 0.0002661011224793881, "loss": 0.4767, "step": 134370 }, { "epoch": 6.67428230853283, "grad_norm": 0.138671875, "learning_rate": 0.00026606138869573857, "loss": 0.4853, "step": 134380 }, { "epoch": 6.6747789808284494, "grad_norm": 0.142578125, "learning_rate": 0.00026602165491208904, "loss": 0.4636, "step": 134390 }, { "epoch": 6.675275653124069, "grad_norm": 0.1201171875, "learning_rate": 0.00026598192112843946, "loss": 0.4859, "step": 134400 }, { "epoch": 6.675772325419688, "grad_norm": 0.13671875, "learning_rate": 0.00026594218734478993, "loss": 0.5127, "step": 134410 }, { "epoch": 6.676268997715307, "grad_norm": 0.1357421875, "learning_rate": 0.0002659024535611404, "loss": 0.4887, "step": 134420 }, { "epoch": 6.676765670010926, "grad_norm": 0.150390625, "learning_rate": 0.0002658627197774908, "loss": 0.5102, "step": 134430 }, { "epoch": 6.6772623423065465, "grad_norm": 0.13671875, "learning_rate": 0.00026582298599384124, "loss": 0.4927, "step": 134440 }, { "epoch": 6.677759014602166, "grad_norm": 0.12109375, "learning_rate": 0.0002657832522101917, "loss": 0.5023, "step": 134450 }, { "epoch": 6.678255686897785, "grad_norm": 0.1416015625, "learning_rate": 0.0002657435184265422, "loss": 0.4924, "step": 134460 }, { "epoch": 6.678752359193404, "grad_norm": 0.1357421875, "learning_rate": 0.00026570378464289265, "loss": 0.4979, "step": 134470 }, { "epoch": 6.679249031489023, "grad_norm": 0.13671875, "learning_rate": 0.00026566405085924307, "loss": 0.4738, "step": 134480 }, { "epoch": 6.679745703784643, "grad_norm": 0.1337890625, "learning_rate": 0.00026562431707559354, "loss": 0.4945, "step": 134490 }, { "epoch": 6.680242376080262, "grad_norm": 0.1357421875, "learning_rate": 0.000265584583291944, "loss": 0.4789, "step": 134500 }, { "epoch": 6.680739048375882, "grad_norm": 0.140625, "learning_rate": 0.00026554484950829443, "loss": 0.4905, "step": 134510 }, { "epoch": 6.681235720671501, "grad_norm": 0.1435546875, "learning_rate": 0.00026550511572464485, "loss": 0.4961, "step": 134520 }, { "epoch": 6.68173239296712, "grad_norm": 0.1337890625, "learning_rate": 0.00026546538194099537, "loss": 0.5094, "step": 134530 }, { "epoch": 6.68222906526274, "grad_norm": 0.1474609375, "learning_rate": 0.0002654256481573458, "loss": 0.5259, "step": 134540 }, { "epoch": 6.682725737558359, "grad_norm": 0.1357421875, "learning_rate": 0.00026538591437369626, "loss": 0.5188, "step": 134550 }, { "epoch": 6.683222409853978, "grad_norm": 0.130859375, "learning_rate": 0.0002653461805900467, "loss": 0.4919, "step": 134560 }, { "epoch": 6.683719082149597, "grad_norm": 0.134765625, "learning_rate": 0.00026530644680639715, "loss": 0.4937, "step": 134570 }, { "epoch": 6.684215754445217, "grad_norm": 0.1318359375, "learning_rate": 0.0002652667130227476, "loss": 0.4981, "step": 134580 }, { "epoch": 6.684712426740837, "grad_norm": 0.1259765625, "learning_rate": 0.00026522697923909804, "loss": 0.478, "step": 134590 }, { "epoch": 6.685209099036456, "grad_norm": 0.171875, "learning_rate": 0.0002651872454554485, "loss": 0.482, "step": 134600 }, { "epoch": 6.685705771332075, "grad_norm": 0.1240234375, "learning_rate": 0.000265147511671799, "loss": 0.4786, "step": 134610 }, { "epoch": 6.686202443627694, "grad_norm": 0.1259765625, "learning_rate": 0.0002651077778881494, "loss": 0.5206, "step": 134620 }, { "epoch": 6.686699115923314, "grad_norm": 0.134765625, "learning_rate": 0.00026506804410449987, "loss": 0.4924, "step": 134630 }, { "epoch": 6.687195788218933, "grad_norm": 0.142578125, "learning_rate": 0.00026502831032085034, "loss": 0.487, "step": 134640 }, { "epoch": 6.687692460514553, "grad_norm": 0.1435546875, "learning_rate": 0.00026498857653720076, "loss": 0.4893, "step": 134650 }, { "epoch": 6.688189132810172, "grad_norm": 0.1826171875, "learning_rate": 0.00026494884275355123, "loss": 0.5016, "step": 134660 }, { "epoch": 6.688685805105791, "grad_norm": 0.154296875, "learning_rate": 0.00026490910896990165, "loss": 0.48, "step": 134670 }, { "epoch": 6.689182477401411, "grad_norm": 0.138671875, "learning_rate": 0.00026486937518625217, "loss": 0.4858, "step": 134680 }, { "epoch": 6.68967914969703, "grad_norm": 0.12158203125, "learning_rate": 0.0002648296414026026, "loss": 0.502, "step": 134690 }, { "epoch": 6.690175821992649, "grad_norm": 0.150390625, "learning_rate": 0.000264789907618953, "loss": 0.4786, "step": 134700 }, { "epoch": 6.690672494288268, "grad_norm": 0.1279296875, "learning_rate": 0.0002647501738353035, "loss": 0.5083, "step": 134710 }, { "epoch": 6.691169166583888, "grad_norm": 0.1298828125, "learning_rate": 0.00026471044005165395, "loss": 0.5071, "step": 134720 }, { "epoch": 6.691665838879508, "grad_norm": 0.158203125, "learning_rate": 0.00026467070626800437, "loss": 0.4903, "step": 134730 }, { "epoch": 6.692162511175127, "grad_norm": 0.1630859375, "learning_rate": 0.00026463097248435484, "loss": 0.4844, "step": 134740 }, { "epoch": 6.692659183470746, "grad_norm": 0.12109375, "learning_rate": 0.00026459123870070525, "loss": 0.5136, "step": 134750 }, { "epoch": 6.693155855766365, "grad_norm": 0.142578125, "learning_rate": 0.0002645515049170558, "loss": 0.5069, "step": 134760 }, { "epoch": 6.6936525280619845, "grad_norm": 0.130859375, "learning_rate": 0.0002645117711334062, "loss": 0.4855, "step": 134770 }, { "epoch": 6.694149200357604, "grad_norm": 0.1474609375, "learning_rate": 0.0002644720373497566, "loss": 0.4965, "step": 134780 }, { "epoch": 6.694645872653224, "grad_norm": 0.142578125, "learning_rate": 0.0002644323035661071, "loss": 0.5055, "step": 134790 }, { "epoch": 6.695142544948843, "grad_norm": 0.12255859375, "learning_rate": 0.00026439256978245756, "loss": 0.4824, "step": 134800 }, { "epoch": 6.695639217244462, "grad_norm": 0.1416015625, "learning_rate": 0.000264352835998808, "loss": 0.4634, "step": 134810 }, { "epoch": 6.6961358895400815, "grad_norm": 0.1337890625, "learning_rate": 0.00026431310221515844, "loss": 0.4952, "step": 134820 }, { "epoch": 6.696632561835701, "grad_norm": 0.142578125, "learning_rate": 0.0002642733684315089, "loss": 0.4851, "step": 134830 }, { "epoch": 6.69712923413132, "grad_norm": 0.1533203125, "learning_rate": 0.0002642336346478594, "loss": 0.4957, "step": 134840 }, { "epoch": 6.697625906426939, "grad_norm": 0.123046875, "learning_rate": 0.0002641939008642098, "loss": 0.5094, "step": 134850 }, { "epoch": 6.698122578722559, "grad_norm": 0.1328125, "learning_rate": 0.0002641541670805602, "loss": 0.4804, "step": 134860 }, { "epoch": 6.698619251018179, "grad_norm": 0.150390625, "learning_rate": 0.00026411443329691075, "loss": 0.4908, "step": 134870 }, { "epoch": 6.699115923313798, "grad_norm": 0.1337890625, "learning_rate": 0.00026407469951326116, "loss": 0.4931, "step": 134880 }, { "epoch": 6.699612595609417, "grad_norm": 0.126953125, "learning_rate": 0.0002640349657296116, "loss": 0.4713, "step": 134890 }, { "epoch": 6.700109267905036, "grad_norm": 0.1689453125, "learning_rate": 0.00026399523194596205, "loss": 0.4666, "step": 134900 }, { "epoch": 6.7006059402006555, "grad_norm": 0.1845703125, "learning_rate": 0.0002639554981623125, "loss": 0.4869, "step": 134910 }, { "epoch": 6.701102612496275, "grad_norm": 0.142578125, "learning_rate": 0.000263915764378663, "loss": 0.5096, "step": 134920 }, { "epoch": 6.701599284791894, "grad_norm": 0.1298828125, "learning_rate": 0.0002638760305950134, "loss": 0.4614, "step": 134930 }, { "epoch": 6.702095957087514, "grad_norm": 0.1259765625, "learning_rate": 0.0002638362968113639, "loss": 0.4738, "step": 134940 }, { "epoch": 6.702592629383133, "grad_norm": 0.1640625, "learning_rate": 0.00026379656302771436, "loss": 0.4953, "step": 134950 }, { "epoch": 6.7030893016787525, "grad_norm": 0.1357421875, "learning_rate": 0.00026375682924406477, "loss": 0.5163, "step": 134960 }, { "epoch": 6.703585973974372, "grad_norm": 0.15234375, "learning_rate": 0.0002637170954604152, "loss": 0.4771, "step": 134970 }, { "epoch": 6.704082646269991, "grad_norm": 0.12890625, "learning_rate": 0.0002636773616767657, "loss": 0.5037, "step": 134980 }, { "epoch": 6.70457931856561, "grad_norm": 0.1494140625, "learning_rate": 0.00026363762789311613, "loss": 0.4834, "step": 134990 }, { "epoch": 6.705075990861229, "grad_norm": 0.1484375, "learning_rate": 0.0002635978941094666, "loss": 0.5574, "step": 135000 }, { "epoch": 6.705572663156849, "grad_norm": 0.1591796875, "learning_rate": 0.000263558160325817, "loss": 0.5143, "step": 135010 }, { "epoch": 6.706069335452469, "grad_norm": 0.1201171875, "learning_rate": 0.0002635184265421675, "loss": 0.5004, "step": 135020 }, { "epoch": 6.706566007748088, "grad_norm": 0.1533203125, "learning_rate": 0.00026347869275851796, "loss": 0.5572, "step": 135030 }, { "epoch": 6.707062680043707, "grad_norm": 0.13671875, "learning_rate": 0.0002634389589748684, "loss": 0.4847, "step": 135040 }, { "epoch": 6.707559352339326, "grad_norm": 0.177734375, "learning_rate": 0.00026339922519121885, "loss": 0.4937, "step": 135050 }, { "epoch": 6.708056024634946, "grad_norm": 0.1416015625, "learning_rate": 0.0002633594914075693, "loss": 0.5069, "step": 135060 }, { "epoch": 6.708552696930565, "grad_norm": 0.1767578125, "learning_rate": 0.00026331975762391974, "loss": 0.4741, "step": 135070 }, { "epoch": 6.709049369226184, "grad_norm": 0.1552734375, "learning_rate": 0.0002632800238402702, "loss": 0.4897, "step": 135080 }, { "epoch": 6.709546041521804, "grad_norm": 0.158203125, "learning_rate": 0.00026324029005662063, "loss": 0.4932, "step": 135090 }, { "epoch": 6.7100427138174235, "grad_norm": 0.1328125, "learning_rate": 0.0002632005562729711, "loss": 0.4921, "step": 135100 }, { "epoch": 6.710539386113043, "grad_norm": 0.134765625, "learning_rate": 0.00026316082248932157, "loss": 0.4973, "step": 135110 }, { "epoch": 6.711036058408662, "grad_norm": 0.14453125, "learning_rate": 0.000263121088705672, "loss": 0.5009, "step": 135120 }, { "epoch": 6.711532730704281, "grad_norm": 0.1328125, "learning_rate": 0.00026308135492202246, "loss": 0.501, "step": 135130 }, { "epoch": 6.7120294029999, "grad_norm": 0.1376953125, "learning_rate": 0.00026304162113837293, "loss": 0.498, "step": 135140 }, { "epoch": 6.71252607529552, "grad_norm": 0.1494140625, "learning_rate": 0.00026300188735472335, "loss": 0.5147, "step": 135150 }, { "epoch": 6.71302274759114, "grad_norm": 0.126953125, "learning_rate": 0.0002629621535710738, "loss": 0.4895, "step": 135160 }, { "epoch": 6.713519419886759, "grad_norm": 0.14453125, "learning_rate": 0.0002629224197874243, "loss": 0.4905, "step": 135170 }, { "epoch": 6.714016092182378, "grad_norm": 0.1591796875, "learning_rate": 0.0002628826860037747, "loss": 0.462, "step": 135180 }, { "epoch": 6.714512764477997, "grad_norm": 0.1181640625, "learning_rate": 0.0002628429522201252, "loss": 0.5165, "step": 135190 }, { "epoch": 6.715009436773617, "grad_norm": 0.126953125, "learning_rate": 0.0002628032184364756, "loss": 0.5055, "step": 135200 }, { "epoch": 6.715506109069236, "grad_norm": 0.1533203125, "learning_rate": 0.0002627634846528261, "loss": 0.4938, "step": 135210 }, { "epoch": 6.716002781364855, "grad_norm": 0.138671875, "learning_rate": 0.00026272375086917654, "loss": 0.4777, "step": 135220 }, { "epoch": 6.716499453660475, "grad_norm": 0.1357421875, "learning_rate": 0.00026268401708552696, "loss": 0.4791, "step": 135230 }, { "epoch": 6.716996125956094, "grad_norm": 0.146484375, "learning_rate": 0.00026264428330187743, "loss": 0.4691, "step": 135240 }, { "epoch": 6.717492798251714, "grad_norm": 0.15234375, "learning_rate": 0.0002626045495182279, "loss": 0.4933, "step": 135250 }, { "epoch": 6.717989470547333, "grad_norm": 0.126953125, "learning_rate": 0.0002625648157345783, "loss": 0.5045, "step": 135260 }, { "epoch": 6.718486142842952, "grad_norm": 0.138671875, "learning_rate": 0.0002625250819509288, "loss": 0.4828, "step": 135270 }, { "epoch": 6.718982815138571, "grad_norm": 0.1552734375, "learning_rate": 0.0002624853481672792, "loss": 0.4916, "step": 135280 }, { "epoch": 6.7194794874341905, "grad_norm": 0.1689453125, "learning_rate": 0.00026244561438362973, "loss": 0.5101, "step": 135290 }, { "epoch": 6.719976159729811, "grad_norm": 0.1328125, "learning_rate": 0.00026240588059998015, "loss": 0.5071, "step": 135300 }, { "epoch": 6.72047283202543, "grad_norm": 0.150390625, "learning_rate": 0.00026236614681633057, "loss": 0.5014, "step": 135310 }, { "epoch": 6.720969504321049, "grad_norm": 0.1787109375, "learning_rate": 0.00026232641303268104, "loss": 0.4723, "step": 135320 }, { "epoch": 6.721466176616668, "grad_norm": 0.1435546875, "learning_rate": 0.0002622866792490315, "loss": 0.4911, "step": 135330 }, { "epoch": 6.721962848912288, "grad_norm": 0.12109375, "learning_rate": 0.0002622469454653819, "loss": 0.5158, "step": 135340 }, { "epoch": 6.722459521207907, "grad_norm": 0.12255859375, "learning_rate": 0.0002622072116817324, "loss": 0.4811, "step": 135350 }, { "epoch": 6.722956193503526, "grad_norm": 0.1337890625, "learning_rate": 0.00026216747789808287, "loss": 0.5049, "step": 135360 }, { "epoch": 6.723452865799146, "grad_norm": 0.1416015625, "learning_rate": 0.00026212774411443334, "loss": 0.5028, "step": 135370 }, { "epoch": 6.723949538094765, "grad_norm": 0.14453125, "learning_rate": 0.00026208801033078376, "loss": 0.5054, "step": 135380 }, { "epoch": 6.724446210390385, "grad_norm": 0.13671875, "learning_rate": 0.0002620482765471342, "loss": 0.484, "step": 135390 }, { "epoch": 6.724942882686004, "grad_norm": 0.1435546875, "learning_rate": 0.0002620085427634847, "loss": 0.4932, "step": 135400 }, { "epoch": 6.725439554981623, "grad_norm": 0.12890625, "learning_rate": 0.0002619688089798351, "loss": 0.525, "step": 135410 }, { "epoch": 6.725936227277242, "grad_norm": 0.1279296875, "learning_rate": 0.00026192907519618553, "loss": 0.4812, "step": 135420 }, { "epoch": 6.7264328995728615, "grad_norm": 0.1513671875, "learning_rate": 0.000261889341412536, "loss": 0.4896, "step": 135430 }, { "epoch": 6.726929571868482, "grad_norm": 0.1611328125, "learning_rate": 0.0002618496076288865, "loss": 0.488, "step": 135440 }, { "epoch": 6.727426244164101, "grad_norm": 0.158203125, "learning_rate": 0.00026180987384523695, "loss": 0.5218, "step": 135450 }, { "epoch": 6.72792291645972, "grad_norm": 0.1494140625, "learning_rate": 0.00026177014006158736, "loss": 0.4835, "step": 135460 }, { "epoch": 6.728419588755339, "grad_norm": 0.1357421875, "learning_rate": 0.00026173040627793784, "loss": 0.5187, "step": 135470 }, { "epoch": 6.7289162610509585, "grad_norm": 0.12890625, "learning_rate": 0.0002616906724942883, "loss": 0.4934, "step": 135480 }, { "epoch": 6.729412933346578, "grad_norm": 0.12255859375, "learning_rate": 0.0002616509387106387, "loss": 0.4806, "step": 135490 }, { "epoch": 6.729909605642197, "grad_norm": 0.12890625, "learning_rate": 0.0002616112049269892, "loss": 0.496, "step": 135500 }, { "epoch": 6.730406277937817, "grad_norm": 0.15234375, "learning_rate": 0.00026157147114333967, "loss": 0.5232, "step": 135510 }, { "epoch": 6.730902950233436, "grad_norm": 0.1279296875, "learning_rate": 0.0002615317373596901, "loss": 0.4889, "step": 135520 }, { "epoch": 6.7313996225290555, "grad_norm": 0.15234375, "learning_rate": 0.00026149200357604056, "loss": 0.5107, "step": 135530 }, { "epoch": 6.731896294824675, "grad_norm": 0.1767578125, "learning_rate": 0.00026145226979239097, "loss": 0.5284, "step": 135540 }, { "epoch": 6.732392967120294, "grad_norm": 0.1455078125, "learning_rate": 0.00026141253600874144, "loss": 0.4982, "step": 135550 }, { "epoch": 6.732889639415913, "grad_norm": 0.134765625, "learning_rate": 0.0002613728022250919, "loss": 0.5103, "step": 135560 }, { "epoch": 6.7333863117115325, "grad_norm": 0.1279296875, "learning_rate": 0.00026133306844144233, "loss": 0.4855, "step": 135570 }, { "epoch": 6.733882984007153, "grad_norm": 0.15625, "learning_rate": 0.0002612933346577928, "loss": 0.5072, "step": 135580 }, { "epoch": 6.734379656302772, "grad_norm": 0.12255859375, "learning_rate": 0.0002612536008741433, "loss": 0.4879, "step": 135590 }, { "epoch": 6.734876328598391, "grad_norm": 0.1259765625, "learning_rate": 0.0002612138670904937, "loss": 0.5167, "step": 135600 }, { "epoch": 6.73537300089401, "grad_norm": 0.134765625, "learning_rate": 0.00026117413330684416, "loss": 0.4997, "step": 135610 }, { "epoch": 6.7358696731896295, "grad_norm": 0.134765625, "learning_rate": 0.0002611343995231946, "loss": 0.5016, "step": 135620 }, { "epoch": 6.736366345485249, "grad_norm": 0.1416015625, "learning_rate": 0.00026109466573954505, "loss": 0.5042, "step": 135630 }, { "epoch": 6.736863017780868, "grad_norm": 0.1416015625, "learning_rate": 0.0002610549319558955, "loss": 0.5122, "step": 135640 }, { "epoch": 6.737359690076487, "grad_norm": 0.1318359375, "learning_rate": 0.00026101519817224594, "loss": 0.5026, "step": 135650 }, { "epoch": 6.737856362372106, "grad_norm": 0.1435546875, "learning_rate": 0.0002609754643885964, "loss": 0.4977, "step": 135660 }, { "epoch": 6.7383530346677265, "grad_norm": 0.12451171875, "learning_rate": 0.0002609357306049469, "loss": 0.452, "step": 135670 }, { "epoch": 6.738849706963346, "grad_norm": 0.1669921875, "learning_rate": 0.0002608959968212973, "loss": 0.4967, "step": 135680 }, { "epoch": 6.739346379258965, "grad_norm": 0.1357421875, "learning_rate": 0.00026085626303764777, "loss": 0.5096, "step": 135690 }, { "epoch": 6.739843051554584, "grad_norm": 0.1337890625, "learning_rate": 0.00026081652925399824, "loss": 0.5105, "step": 135700 }, { "epoch": 6.740339723850203, "grad_norm": 0.1435546875, "learning_rate": 0.00026077679547034866, "loss": 0.4877, "step": 135710 }, { "epoch": 6.740836396145823, "grad_norm": 0.150390625, "learning_rate": 0.00026073706168669913, "loss": 0.4725, "step": 135720 }, { "epoch": 6.741333068441442, "grad_norm": 0.1416015625, "learning_rate": 0.00026069732790304955, "loss": 0.4899, "step": 135730 }, { "epoch": 6.741829740737062, "grad_norm": 0.1474609375, "learning_rate": 0.0002606575941194001, "loss": 0.5124, "step": 135740 }, { "epoch": 6.742326413032681, "grad_norm": 0.1484375, "learning_rate": 0.0002606178603357505, "loss": 0.5151, "step": 135750 }, { "epoch": 6.7428230853283, "grad_norm": 0.1171875, "learning_rate": 0.0002605781265521009, "loss": 0.4804, "step": 135760 }, { "epoch": 6.74331975762392, "grad_norm": 0.1376953125, "learning_rate": 0.0002605383927684514, "loss": 0.4874, "step": 135770 }, { "epoch": 6.743816429919539, "grad_norm": 0.1533203125, "learning_rate": 0.00026049865898480185, "loss": 0.5055, "step": 135780 }, { "epoch": 6.744313102215158, "grad_norm": 0.1611328125, "learning_rate": 0.00026045892520115227, "loss": 0.4592, "step": 135790 }, { "epoch": 6.744809774510777, "grad_norm": 0.1435546875, "learning_rate": 0.00026041919141750274, "loss": 0.4691, "step": 135800 }, { "epoch": 6.7453064468063975, "grad_norm": 0.142578125, "learning_rate": 0.0002603794576338532, "loss": 0.5086, "step": 135810 }, { "epoch": 6.745803119102017, "grad_norm": 0.1220703125, "learning_rate": 0.0002603397238502037, "loss": 0.5025, "step": 135820 }, { "epoch": 6.746299791397636, "grad_norm": 0.1396484375, "learning_rate": 0.0002602999900665541, "loss": 0.5012, "step": 135830 }, { "epoch": 6.746796463693255, "grad_norm": 0.158203125, "learning_rate": 0.0002602602562829045, "loss": 0.4888, "step": 135840 }, { "epoch": 6.747293135988874, "grad_norm": 0.142578125, "learning_rate": 0.000260220522499255, "loss": 0.4955, "step": 135850 }, { "epoch": 6.747789808284494, "grad_norm": 0.1328125, "learning_rate": 0.00026018078871560546, "loss": 0.5009, "step": 135860 }, { "epoch": 6.748286480580113, "grad_norm": 0.1484375, "learning_rate": 0.00026014105493195593, "loss": 0.5116, "step": 135870 }, { "epoch": 6.748783152875733, "grad_norm": 0.1484375, "learning_rate": 0.00026010132114830635, "loss": 0.5054, "step": 135880 }, { "epoch": 6.749279825171352, "grad_norm": 0.1357421875, "learning_rate": 0.0002600615873646568, "loss": 0.4901, "step": 135890 }, { "epoch": 6.749776497466971, "grad_norm": 0.1787109375, "learning_rate": 0.0002600218535810073, "loss": 0.5006, "step": 135900 }, { "epoch": 6.750273169762591, "grad_norm": 0.1337890625, "learning_rate": 0.0002599821197973577, "loss": 0.4814, "step": 135910 }, { "epoch": 6.75076984205821, "grad_norm": 0.1279296875, "learning_rate": 0.0002599423860137081, "loss": 0.4964, "step": 135920 }, { "epoch": 6.751266514353829, "grad_norm": 0.1455078125, "learning_rate": 0.00025990265223005865, "loss": 0.5052, "step": 135930 }, { "epoch": 6.751763186649448, "grad_norm": 0.126953125, "learning_rate": 0.00025986291844640907, "loss": 0.4758, "step": 135940 }, { "epoch": 6.752259858945068, "grad_norm": 0.1611328125, "learning_rate": 0.00025982318466275954, "loss": 0.471, "step": 135950 }, { "epoch": 6.752756531240688, "grad_norm": 0.138671875, "learning_rate": 0.00025978345087910996, "loss": 0.4947, "step": 135960 }, { "epoch": 6.753253203536307, "grad_norm": 0.1533203125, "learning_rate": 0.00025974371709546043, "loss": 0.5145, "step": 135970 }, { "epoch": 6.753749875831926, "grad_norm": 0.1396484375, "learning_rate": 0.0002597039833118109, "loss": 0.5098, "step": 135980 }, { "epoch": 6.754246548127545, "grad_norm": 0.1396484375, "learning_rate": 0.0002596642495281613, "loss": 0.4997, "step": 135990 }, { "epoch": 6.7547432204231646, "grad_norm": 0.1396484375, "learning_rate": 0.0002596245157445118, "loss": 0.4737, "step": 136000 }, { "epoch": 6.755239892718784, "grad_norm": 0.13671875, "learning_rate": 0.00025958478196086226, "loss": 0.5139, "step": 136010 }, { "epoch": 6.755736565014404, "grad_norm": 0.1455078125, "learning_rate": 0.0002595450481772127, "loss": 0.5038, "step": 136020 }, { "epoch": 6.756233237310023, "grad_norm": 0.1572265625, "learning_rate": 0.00025950531439356315, "loss": 0.5041, "step": 136030 }, { "epoch": 6.756729909605642, "grad_norm": 0.1357421875, "learning_rate": 0.0002594655806099136, "loss": 0.5137, "step": 136040 }, { "epoch": 6.757226581901262, "grad_norm": 0.1640625, "learning_rate": 0.00025942584682626404, "loss": 0.4855, "step": 136050 }, { "epoch": 6.757723254196881, "grad_norm": 0.126953125, "learning_rate": 0.0002593861130426145, "loss": 0.4702, "step": 136060 }, { "epoch": 6.7582199264925, "grad_norm": 0.16015625, "learning_rate": 0.0002593463792589649, "loss": 0.5077, "step": 136070 }, { "epoch": 6.758716598788119, "grad_norm": 0.1279296875, "learning_rate": 0.0002593066454753154, "loss": 0.4921, "step": 136080 }, { "epoch": 6.759213271083739, "grad_norm": 0.1513671875, "learning_rate": 0.00025926691169166587, "loss": 0.494, "step": 136090 }, { "epoch": 6.759709943379359, "grad_norm": 0.123046875, "learning_rate": 0.0002592271779080163, "loss": 0.4766, "step": 136100 }, { "epoch": 6.760206615674978, "grad_norm": 0.150390625, "learning_rate": 0.00025918744412436676, "loss": 0.4798, "step": 136110 }, { "epoch": 6.760703287970597, "grad_norm": 0.14453125, "learning_rate": 0.00025914771034071723, "loss": 0.4939, "step": 136120 }, { "epoch": 6.761199960266216, "grad_norm": 0.1337890625, "learning_rate": 0.00025910797655706764, "loss": 0.5104, "step": 136130 }, { "epoch": 6.7616966325618355, "grad_norm": 0.142578125, "learning_rate": 0.0002590682427734181, "loss": 0.5014, "step": 136140 }, { "epoch": 6.762193304857455, "grad_norm": 0.146484375, "learning_rate": 0.00025902850898976853, "loss": 0.4916, "step": 136150 }, { "epoch": 6.762689977153075, "grad_norm": 0.146484375, "learning_rate": 0.000258988775206119, "loss": 0.5142, "step": 136160 }, { "epoch": 6.763186649448694, "grad_norm": 0.16796875, "learning_rate": 0.0002589490414224695, "loss": 0.4729, "step": 136170 }, { "epoch": 6.763683321744313, "grad_norm": 0.126953125, "learning_rate": 0.0002589093076388199, "loss": 0.4935, "step": 136180 }, { "epoch": 6.7641799940399325, "grad_norm": 0.1298828125, "learning_rate": 0.00025886957385517036, "loss": 0.4747, "step": 136190 }, { "epoch": 6.764676666335552, "grad_norm": 0.1328125, "learning_rate": 0.00025882984007152084, "loss": 0.4835, "step": 136200 }, { "epoch": 6.765173338631171, "grad_norm": 0.2373046875, "learning_rate": 0.00025879010628787125, "loss": 0.453, "step": 136210 }, { "epoch": 6.76567001092679, "grad_norm": 0.1376953125, "learning_rate": 0.0002587503725042217, "loss": 0.5033, "step": 136220 }, { "epoch": 6.76616668322241, "grad_norm": 0.1328125, "learning_rate": 0.0002587106387205722, "loss": 0.5052, "step": 136230 }, { "epoch": 6.7666633555180296, "grad_norm": 0.1435546875, "learning_rate": 0.0002586709049369226, "loss": 0.4999, "step": 136240 }, { "epoch": 6.767160027813649, "grad_norm": 0.1611328125, "learning_rate": 0.0002586311711532731, "loss": 0.4879, "step": 136250 }, { "epoch": 6.767656700109268, "grad_norm": 0.1572265625, "learning_rate": 0.0002585914373696235, "loss": 0.5078, "step": 136260 }, { "epoch": 6.768153372404887, "grad_norm": 0.1845703125, "learning_rate": 0.000258551703585974, "loss": 0.5187, "step": 136270 }, { "epoch": 6.7686500447005065, "grad_norm": 0.1474609375, "learning_rate": 0.00025851196980232444, "loss": 0.4798, "step": 136280 }, { "epoch": 6.769146716996126, "grad_norm": 0.1259765625, "learning_rate": 0.00025847223601867486, "loss": 0.4725, "step": 136290 }, { "epoch": 6.769643389291746, "grad_norm": 0.158203125, "learning_rate": 0.00025843250223502533, "loss": 0.4921, "step": 136300 }, { "epoch": 6.770140061587365, "grad_norm": 0.146484375, "learning_rate": 0.0002583927684513758, "loss": 0.5116, "step": 136310 }, { "epoch": 6.770636733882984, "grad_norm": 0.1337890625, "learning_rate": 0.0002583530346677263, "loss": 0.472, "step": 136320 }, { "epoch": 6.7711334061786035, "grad_norm": 0.1513671875, "learning_rate": 0.0002583133008840767, "loss": 0.4923, "step": 136330 }, { "epoch": 6.771630078474223, "grad_norm": 0.1337890625, "learning_rate": 0.00025827356710042716, "loss": 0.4859, "step": 136340 }, { "epoch": 6.772126750769842, "grad_norm": 0.146484375, "learning_rate": 0.00025823383331677763, "loss": 0.4872, "step": 136350 }, { "epoch": 6.772623423065461, "grad_norm": 0.1455078125, "learning_rate": 0.00025819409953312805, "loss": 0.5, "step": 136360 }, { "epoch": 6.77312009536108, "grad_norm": 0.1494140625, "learning_rate": 0.00025815436574947847, "loss": 0.4895, "step": 136370 }, { "epoch": 6.7736167676567, "grad_norm": 0.134765625, "learning_rate": 0.000258114631965829, "loss": 0.494, "step": 136380 }, { "epoch": 6.77411343995232, "grad_norm": 0.1552734375, "learning_rate": 0.0002580748981821794, "loss": 0.47, "step": 136390 }, { "epoch": 6.774610112247939, "grad_norm": 0.1396484375, "learning_rate": 0.0002580351643985299, "loss": 0.4972, "step": 136400 }, { "epoch": 6.775106784543558, "grad_norm": 0.171875, "learning_rate": 0.0002579954306148803, "loss": 0.495, "step": 136410 }, { "epoch": 6.775603456839177, "grad_norm": 0.171875, "learning_rate": 0.00025795569683123077, "loss": 0.5207, "step": 136420 }, { "epoch": 6.776100129134797, "grad_norm": 0.14453125, "learning_rate": 0.00025791596304758124, "loss": 0.5088, "step": 136430 }, { "epoch": 6.776596801430416, "grad_norm": 0.1689453125, "learning_rate": 0.00025787622926393166, "loss": 0.5184, "step": 136440 }, { "epoch": 6.777093473726035, "grad_norm": 0.1494140625, "learning_rate": 0.0002578364954802821, "loss": 0.4857, "step": 136450 }, { "epoch": 6.777590146021655, "grad_norm": 0.146484375, "learning_rate": 0.0002577967616966326, "loss": 0.4996, "step": 136460 }, { "epoch": 6.778086818317274, "grad_norm": 0.1396484375, "learning_rate": 0.000257757027912983, "loss": 0.4963, "step": 136470 }, { "epoch": 6.778583490612894, "grad_norm": 0.125, "learning_rate": 0.0002577172941293335, "loss": 0.4915, "step": 136480 }, { "epoch": 6.779080162908513, "grad_norm": 0.1494140625, "learning_rate": 0.0002576775603456839, "loss": 0.5114, "step": 136490 }, { "epoch": 6.779576835204132, "grad_norm": 0.1337890625, "learning_rate": 0.0002576378265620344, "loss": 0.4829, "step": 136500 }, { "epoch": 6.780073507499751, "grad_norm": 0.16015625, "learning_rate": 0.00025759809277838485, "loss": 0.4805, "step": 136510 }, { "epoch": 6.780570179795371, "grad_norm": 0.1328125, "learning_rate": 0.00025755835899473527, "loss": 0.4742, "step": 136520 }, { "epoch": 6.781066852090991, "grad_norm": 0.1318359375, "learning_rate": 0.00025751862521108574, "loss": 0.4905, "step": 136530 }, { "epoch": 6.78156352438661, "grad_norm": 0.1328125, "learning_rate": 0.0002574788914274362, "loss": 0.5023, "step": 136540 }, { "epoch": 6.782060196682229, "grad_norm": 0.1494140625, "learning_rate": 0.00025743915764378663, "loss": 0.4985, "step": 136550 }, { "epoch": 6.782556868977848, "grad_norm": 0.1298828125, "learning_rate": 0.0002573994238601371, "loss": 0.5227, "step": 136560 }, { "epoch": 6.783053541273468, "grad_norm": 0.1318359375, "learning_rate": 0.00025735969007648757, "loss": 0.494, "step": 136570 }, { "epoch": 6.783550213569087, "grad_norm": 0.130859375, "learning_rate": 0.000257319956292838, "loss": 0.5016, "step": 136580 }, { "epoch": 6.784046885864706, "grad_norm": 0.1650390625, "learning_rate": 0.00025728022250918846, "loss": 0.4944, "step": 136590 }, { "epoch": 6.784543558160326, "grad_norm": 0.1572265625, "learning_rate": 0.0002572404887255389, "loss": 0.4921, "step": 136600 }, { "epoch": 6.785040230455945, "grad_norm": 0.140625, "learning_rate": 0.00025720075494188935, "loss": 0.4741, "step": 136610 }, { "epoch": 6.785536902751565, "grad_norm": 0.134765625, "learning_rate": 0.0002571610211582398, "loss": 0.4933, "step": 136620 }, { "epoch": 6.786033575047184, "grad_norm": 0.146484375, "learning_rate": 0.00025712128737459024, "loss": 0.4757, "step": 136630 }, { "epoch": 6.786530247342803, "grad_norm": 0.130859375, "learning_rate": 0.0002570815535909407, "loss": 0.4882, "step": 136640 }, { "epoch": 6.787026919638422, "grad_norm": 0.1357421875, "learning_rate": 0.0002570418198072912, "loss": 0.4844, "step": 136650 }, { "epoch": 6.7875235919340415, "grad_norm": 0.1337890625, "learning_rate": 0.0002570020860236416, "loss": 0.5032, "step": 136660 }, { "epoch": 6.788020264229662, "grad_norm": 0.1396484375, "learning_rate": 0.00025696235223999207, "loss": 0.4978, "step": 136670 }, { "epoch": 6.788516936525281, "grad_norm": 0.142578125, "learning_rate": 0.00025692261845634254, "loss": 0.5073, "step": 136680 }, { "epoch": 6.7890136088209, "grad_norm": 0.146484375, "learning_rate": 0.000256882884672693, "loss": 0.4952, "step": 136690 }, { "epoch": 6.789510281116519, "grad_norm": 0.1484375, "learning_rate": 0.00025684315088904343, "loss": 0.5072, "step": 136700 }, { "epoch": 6.790006953412139, "grad_norm": 0.13671875, "learning_rate": 0.00025680341710539384, "loss": 0.4873, "step": 136710 }, { "epoch": 6.790503625707758, "grad_norm": 0.1435546875, "learning_rate": 0.0002567636833217443, "loss": 0.5001, "step": 136720 }, { "epoch": 6.791000298003377, "grad_norm": 0.125, "learning_rate": 0.0002567239495380948, "loss": 0.4856, "step": 136730 }, { "epoch": 6.791496970298997, "grad_norm": 0.1416015625, "learning_rate": 0.0002566842157544452, "loss": 0.4997, "step": 136740 }, { "epoch": 6.791993642594616, "grad_norm": 0.1396484375, "learning_rate": 0.0002566444819707957, "loss": 0.4973, "step": 136750 }, { "epoch": 6.792490314890236, "grad_norm": 0.171875, "learning_rate": 0.00025660474818714615, "loss": 0.5042, "step": 136760 }, { "epoch": 6.792986987185855, "grad_norm": 0.1591796875, "learning_rate": 0.0002565650144034966, "loss": 0.5157, "step": 136770 }, { "epoch": 6.793483659481474, "grad_norm": 0.1298828125, "learning_rate": 0.00025652528061984704, "loss": 0.5132, "step": 136780 }, { "epoch": 6.793980331777093, "grad_norm": 0.1240234375, "learning_rate": 0.00025648554683619745, "loss": 0.4975, "step": 136790 }, { "epoch": 6.7944770040727125, "grad_norm": 0.15625, "learning_rate": 0.000256445813052548, "loss": 0.5163, "step": 136800 }, { "epoch": 6.794973676368333, "grad_norm": 0.1611328125, "learning_rate": 0.0002564060792688984, "loss": 0.4743, "step": 136810 }, { "epoch": 6.795470348663952, "grad_norm": 0.140625, "learning_rate": 0.0002563663454852488, "loss": 0.4745, "step": 136820 }, { "epoch": 6.795967020959571, "grad_norm": 0.13671875, "learning_rate": 0.0002563266117015993, "loss": 0.5037, "step": 136830 }, { "epoch": 6.79646369325519, "grad_norm": 0.1552734375, "learning_rate": 0.00025628687791794976, "loss": 0.453, "step": 136840 }, { "epoch": 6.7969603655508095, "grad_norm": 0.1416015625, "learning_rate": 0.0002562471441343002, "loss": 0.4808, "step": 136850 }, { "epoch": 6.797457037846429, "grad_norm": 0.126953125, "learning_rate": 0.00025620741035065064, "loss": 0.4868, "step": 136860 }, { "epoch": 6.797953710142048, "grad_norm": 0.15234375, "learning_rate": 0.0002561676765670011, "loss": 0.4915, "step": 136870 }, { "epoch": 6.798450382437668, "grad_norm": 0.1435546875, "learning_rate": 0.0002561279427833516, "loss": 0.4938, "step": 136880 }, { "epoch": 6.798947054733287, "grad_norm": 0.1279296875, "learning_rate": 0.000256088208999702, "loss": 0.4555, "step": 136890 }, { "epoch": 6.7994437270289065, "grad_norm": 0.15625, "learning_rate": 0.0002560484752160524, "loss": 0.5093, "step": 136900 }, { "epoch": 6.799940399324526, "grad_norm": 0.134765625, "learning_rate": 0.00025600874143240295, "loss": 0.4832, "step": 136910 }, { "epoch": 6.800437071620145, "grad_norm": 0.1455078125, "learning_rate": 0.00025596900764875336, "loss": 0.4586, "step": 136920 }, { "epoch": 6.800933743915764, "grad_norm": 0.12890625, "learning_rate": 0.00025592927386510384, "loss": 0.4659, "step": 136930 }, { "epoch": 6.8014304162113834, "grad_norm": 0.1298828125, "learning_rate": 0.00025588954008145425, "loss": 0.4829, "step": 136940 }, { "epoch": 6.801927088507004, "grad_norm": 0.15625, "learning_rate": 0.0002558498062978047, "loss": 0.4846, "step": 136950 }, { "epoch": 6.802423760802623, "grad_norm": 0.1591796875, "learning_rate": 0.0002558100725141552, "loss": 0.5186, "step": 136960 }, { "epoch": 6.802920433098242, "grad_norm": 0.1337890625, "learning_rate": 0.0002557703387305056, "loss": 0.4911, "step": 136970 }, { "epoch": 6.803417105393861, "grad_norm": 0.138671875, "learning_rate": 0.00025573060494685603, "loss": 0.4802, "step": 136980 }, { "epoch": 6.8039137776894805, "grad_norm": 0.1376953125, "learning_rate": 0.00025569087116320655, "loss": 0.5097, "step": 136990 }, { "epoch": 6.8044104499851, "grad_norm": 0.1357421875, "learning_rate": 0.00025565113737955697, "loss": 0.4759, "step": 137000 }, { "epoch": 6.804907122280719, "grad_norm": 0.140625, "learning_rate": 0.00025561140359590744, "loss": 0.4875, "step": 137010 }, { "epoch": 6.805403794576338, "grad_norm": 0.1904296875, "learning_rate": 0.00025557166981225786, "loss": 0.5124, "step": 137020 }, { "epoch": 6.805900466871958, "grad_norm": 0.1318359375, "learning_rate": 0.00025553193602860833, "loss": 0.4782, "step": 137030 }, { "epoch": 6.8063971391675775, "grad_norm": 0.1494140625, "learning_rate": 0.0002554922022449588, "loss": 0.476, "step": 137040 }, { "epoch": 6.806893811463197, "grad_norm": 0.1484375, "learning_rate": 0.0002554524684613092, "loss": 0.4942, "step": 137050 }, { "epoch": 6.807390483758816, "grad_norm": 0.130859375, "learning_rate": 0.0002554127346776597, "loss": 0.5152, "step": 137060 }, { "epoch": 6.807887156054435, "grad_norm": 0.1748046875, "learning_rate": 0.00025537300089401016, "loss": 0.5123, "step": 137070 }, { "epoch": 6.808383828350054, "grad_norm": 0.17578125, "learning_rate": 0.0002553332671103606, "loss": 0.4966, "step": 137080 }, { "epoch": 6.808880500645674, "grad_norm": 0.1416015625, "learning_rate": 0.00025529353332671105, "loss": 0.4836, "step": 137090 }, { "epoch": 6.809377172941293, "grad_norm": 0.1279296875, "learning_rate": 0.0002552537995430615, "loss": 0.4629, "step": 137100 }, { "epoch": 6.809873845236913, "grad_norm": 0.126953125, "learning_rate": 0.00025521406575941194, "loss": 0.4719, "step": 137110 }, { "epoch": 6.810370517532532, "grad_norm": 0.134765625, "learning_rate": 0.0002551743319757624, "loss": 0.4963, "step": 137120 }, { "epoch": 6.810867189828151, "grad_norm": 0.1513671875, "learning_rate": 0.00025513459819211283, "loss": 0.4692, "step": 137130 }, { "epoch": 6.811363862123771, "grad_norm": 0.15625, "learning_rate": 0.00025509486440846335, "loss": 0.5022, "step": 137140 }, { "epoch": 6.81186053441939, "grad_norm": 0.1357421875, "learning_rate": 0.00025505513062481377, "loss": 0.463, "step": 137150 }, { "epoch": 6.812357206715009, "grad_norm": 0.1376953125, "learning_rate": 0.0002550153968411642, "loss": 0.5002, "step": 137160 }, { "epoch": 6.812853879010628, "grad_norm": 0.1640625, "learning_rate": 0.00025497566305751466, "loss": 0.4814, "step": 137170 }, { "epoch": 6.8133505513062484, "grad_norm": 0.1416015625, "learning_rate": 0.00025493592927386513, "loss": 0.5067, "step": 137180 }, { "epoch": 6.813847223601868, "grad_norm": 0.1640625, "learning_rate": 0.00025489619549021555, "loss": 0.4942, "step": 137190 }, { "epoch": 6.814343895897487, "grad_norm": 0.154296875, "learning_rate": 0.000254856461706566, "loss": 0.5138, "step": 137200 }, { "epoch": 6.814840568193106, "grad_norm": 0.1298828125, "learning_rate": 0.0002548167279229165, "loss": 0.4945, "step": 137210 }, { "epoch": 6.815337240488725, "grad_norm": 0.1416015625, "learning_rate": 0.00025477699413926696, "loss": 0.4939, "step": 137220 }, { "epoch": 6.815833912784345, "grad_norm": 0.134765625, "learning_rate": 0.0002547372603556174, "loss": 0.5035, "step": 137230 }, { "epoch": 6.816330585079964, "grad_norm": 0.1611328125, "learning_rate": 0.0002546975265719678, "loss": 0.4963, "step": 137240 }, { "epoch": 6.816827257375584, "grad_norm": 0.130859375, "learning_rate": 0.00025465779278831827, "loss": 0.4881, "step": 137250 }, { "epoch": 6.817323929671203, "grad_norm": 0.12158203125, "learning_rate": 0.00025461805900466874, "loss": 0.4813, "step": 137260 }, { "epoch": 6.817820601966822, "grad_norm": 0.142578125, "learning_rate": 0.00025457832522101916, "loss": 0.5031, "step": 137270 }, { "epoch": 6.818317274262442, "grad_norm": 0.130859375, "learning_rate": 0.00025453859143736963, "loss": 0.4995, "step": 137280 }, { "epoch": 6.818813946558061, "grad_norm": 0.12353515625, "learning_rate": 0.0002544988576537201, "loss": 0.4715, "step": 137290 }, { "epoch": 6.81931061885368, "grad_norm": 0.1298828125, "learning_rate": 0.00025445912387007057, "loss": 0.4713, "step": 137300 }, { "epoch": 6.819807291149299, "grad_norm": 0.1337890625, "learning_rate": 0.000254419390086421, "loss": 0.4607, "step": 137310 }, { "epoch": 6.820303963444919, "grad_norm": 0.1298828125, "learning_rate": 0.0002543796563027714, "loss": 0.4896, "step": 137320 }, { "epoch": 6.820800635740539, "grad_norm": 0.126953125, "learning_rate": 0.00025433992251912193, "loss": 0.4689, "step": 137330 }, { "epoch": 6.821297308036158, "grad_norm": 0.1376953125, "learning_rate": 0.00025430018873547235, "loss": 0.51, "step": 137340 }, { "epoch": 6.821793980331777, "grad_norm": 0.177734375, "learning_rate": 0.00025426045495182276, "loss": 0.4849, "step": 137350 }, { "epoch": 6.822290652627396, "grad_norm": 0.13671875, "learning_rate": 0.00025422072116817324, "loss": 0.5056, "step": 137360 }, { "epoch": 6.8227873249230155, "grad_norm": 0.125, "learning_rate": 0.0002541809873845237, "loss": 0.4617, "step": 137370 }, { "epoch": 6.823283997218635, "grad_norm": 0.138671875, "learning_rate": 0.0002541412536008742, "loss": 0.5141, "step": 137380 }, { "epoch": 6.823780669514255, "grad_norm": 0.13671875, "learning_rate": 0.0002541015198172246, "loss": 0.487, "step": 137390 }, { "epoch": 6.824277341809874, "grad_norm": 0.130859375, "learning_rate": 0.00025406178603357507, "loss": 0.5098, "step": 137400 }, { "epoch": 6.824774014105493, "grad_norm": 0.1328125, "learning_rate": 0.00025402205224992554, "loss": 0.5122, "step": 137410 }, { "epoch": 6.825270686401113, "grad_norm": 0.13671875, "learning_rate": 0.00025398231846627596, "loss": 0.5249, "step": 137420 }, { "epoch": 6.825767358696732, "grad_norm": 0.1318359375, "learning_rate": 0.00025394258468262637, "loss": 0.473, "step": 137430 }, { "epoch": 6.826264030992351, "grad_norm": 0.1474609375, "learning_rate": 0.0002539028508989769, "loss": 0.515, "step": 137440 }, { "epoch": 6.82676070328797, "grad_norm": 0.1484375, "learning_rate": 0.0002538631171153273, "loss": 0.4566, "step": 137450 }, { "epoch": 6.82725737558359, "grad_norm": 0.134765625, "learning_rate": 0.0002538233833316778, "loss": 0.47, "step": 137460 }, { "epoch": 6.82775404787921, "grad_norm": 0.146484375, "learning_rate": 0.0002537836495480282, "loss": 0.5116, "step": 137470 }, { "epoch": 6.828250720174829, "grad_norm": 0.138671875, "learning_rate": 0.0002537439157643787, "loss": 0.4773, "step": 137480 }, { "epoch": 6.828747392470448, "grad_norm": 0.1396484375, "learning_rate": 0.00025370418198072915, "loss": 0.5096, "step": 137490 }, { "epoch": 6.829244064766067, "grad_norm": 0.1455078125, "learning_rate": 0.00025366444819707956, "loss": 0.4903, "step": 137500 }, { "epoch": 6.8297407370616865, "grad_norm": 0.158203125, "learning_rate": 0.00025362471441343004, "loss": 0.5225, "step": 137510 }, { "epoch": 6.830237409357306, "grad_norm": 0.138671875, "learning_rate": 0.0002535849806297805, "loss": 0.5026, "step": 137520 }, { "epoch": 6.830734081652926, "grad_norm": 0.14453125, "learning_rate": 0.0002535452468461309, "loss": 0.5412, "step": 137530 }, { "epoch": 6.831230753948545, "grad_norm": 0.138671875, "learning_rate": 0.0002535055130624814, "loss": 0.5015, "step": 137540 }, { "epoch": 6.831727426244164, "grad_norm": 0.134765625, "learning_rate": 0.0002534657792788318, "loss": 0.5025, "step": 137550 }, { "epoch": 6.8322240985397835, "grad_norm": 0.142578125, "learning_rate": 0.0002534260454951823, "loss": 0.4904, "step": 137560 }, { "epoch": 6.832720770835403, "grad_norm": 0.142578125, "learning_rate": 0.00025338631171153275, "loss": 0.4763, "step": 137570 }, { "epoch": 6.833217443131022, "grad_norm": 0.12890625, "learning_rate": 0.00025334657792788317, "loss": 0.4824, "step": 137580 }, { "epoch": 6.833714115426641, "grad_norm": 0.1474609375, "learning_rate": 0.00025330684414423364, "loss": 0.5054, "step": 137590 }, { "epoch": 6.834210787722261, "grad_norm": 0.1357421875, "learning_rate": 0.0002532671103605841, "loss": 0.4908, "step": 137600 }, { "epoch": 6.8347074600178805, "grad_norm": 0.1455078125, "learning_rate": 0.00025322737657693453, "loss": 0.4919, "step": 137610 }, { "epoch": 6.8352041323135, "grad_norm": 0.1640625, "learning_rate": 0.000253187642793285, "loss": 0.5096, "step": 137620 }, { "epoch": 6.835700804609119, "grad_norm": 0.154296875, "learning_rate": 0.0002531479090096355, "loss": 0.4745, "step": 137630 }, { "epoch": 6.836197476904738, "grad_norm": 0.138671875, "learning_rate": 0.0002531081752259859, "loss": 0.4789, "step": 137640 }, { "epoch": 6.8366941492003575, "grad_norm": 0.1826171875, "learning_rate": 0.00025306844144233636, "loss": 0.5087, "step": 137650 }, { "epoch": 6.837190821495977, "grad_norm": 0.171875, "learning_rate": 0.0002530287076586868, "loss": 0.5004, "step": 137660 }, { "epoch": 6.837687493791597, "grad_norm": 0.126953125, "learning_rate": 0.0002529889738750373, "loss": 0.4793, "step": 137670 }, { "epoch": 6.838184166087216, "grad_norm": 0.1455078125, "learning_rate": 0.0002529492400913877, "loss": 0.5153, "step": 137680 }, { "epoch": 6.838680838382835, "grad_norm": 0.1181640625, "learning_rate": 0.00025290950630773814, "loss": 0.4701, "step": 137690 }, { "epoch": 6.8391775106784545, "grad_norm": 0.1357421875, "learning_rate": 0.0002528697725240886, "loss": 0.4855, "step": 137700 }, { "epoch": 6.839674182974074, "grad_norm": 0.1435546875, "learning_rate": 0.0002528300387404391, "loss": 0.5083, "step": 137710 }, { "epoch": 6.840170855269693, "grad_norm": 0.1435546875, "learning_rate": 0.0002527903049567895, "loss": 0.4645, "step": 137720 }, { "epoch": 6.840667527565312, "grad_norm": 0.150390625, "learning_rate": 0.00025275057117313997, "loss": 0.497, "step": 137730 }, { "epoch": 6.841164199860931, "grad_norm": 0.244140625, "learning_rate": 0.00025271083738949044, "loss": 0.4876, "step": 137740 }, { "epoch": 6.841660872156551, "grad_norm": 0.1337890625, "learning_rate": 0.0002526711036058409, "loss": 0.488, "step": 137750 }, { "epoch": 6.842157544452171, "grad_norm": 0.1650390625, "learning_rate": 0.00025263136982219133, "loss": 0.5133, "step": 137760 }, { "epoch": 6.84265421674779, "grad_norm": 0.1865234375, "learning_rate": 0.00025259163603854175, "loss": 0.4872, "step": 137770 }, { "epoch": 6.843150889043409, "grad_norm": 0.12890625, "learning_rate": 0.0002525519022548923, "loss": 0.5093, "step": 137780 }, { "epoch": 6.843647561339028, "grad_norm": 0.138671875, "learning_rate": 0.0002525121684712427, "loss": 0.5031, "step": 137790 }, { "epoch": 6.844144233634648, "grad_norm": 0.12890625, "learning_rate": 0.0002524724346875931, "loss": 0.4943, "step": 137800 }, { "epoch": 6.844640905930267, "grad_norm": 0.1416015625, "learning_rate": 0.0002524327009039436, "loss": 0.4916, "step": 137810 }, { "epoch": 6.845137578225886, "grad_norm": 0.14453125, "learning_rate": 0.00025239296712029405, "loss": 0.5025, "step": 137820 }, { "epoch": 6.845634250521506, "grad_norm": 0.13671875, "learning_rate": 0.0002523532333366445, "loss": 0.5072, "step": 137830 }, { "epoch": 6.846130922817125, "grad_norm": 0.1259765625, "learning_rate": 0.00025231349955299494, "loss": 0.5001, "step": 137840 }, { "epoch": 6.846627595112745, "grad_norm": 0.146484375, "learning_rate": 0.00025227376576934536, "loss": 0.4927, "step": 137850 }, { "epoch": 6.847124267408364, "grad_norm": 0.158203125, "learning_rate": 0.0002522340319856959, "loss": 0.5367, "step": 137860 }, { "epoch": 6.847620939703983, "grad_norm": 0.1416015625, "learning_rate": 0.0002521942982020463, "loss": 0.4874, "step": 137870 }, { "epoch": 6.848117611999602, "grad_norm": 0.130859375, "learning_rate": 0.0002521545644183967, "loss": 0.4807, "step": 137880 }, { "epoch": 6.848614284295222, "grad_norm": 0.1337890625, "learning_rate": 0.0002521148306347472, "loss": 0.4903, "step": 137890 }, { "epoch": 6.849110956590842, "grad_norm": 0.1396484375, "learning_rate": 0.00025207509685109766, "loss": 0.5168, "step": 137900 }, { "epoch": 6.849607628886461, "grad_norm": 0.166015625, "learning_rate": 0.00025203536306744813, "loss": 0.481, "step": 137910 }, { "epoch": 6.85010430118208, "grad_norm": 0.1259765625, "learning_rate": 0.00025199562928379855, "loss": 0.4804, "step": 137920 }, { "epoch": 6.850600973477699, "grad_norm": 0.146484375, "learning_rate": 0.000251955895500149, "loss": 0.5242, "step": 137930 }, { "epoch": 6.851097645773319, "grad_norm": 0.154296875, "learning_rate": 0.0002519161617164995, "loss": 0.5137, "step": 137940 }, { "epoch": 6.851594318068938, "grad_norm": 0.1298828125, "learning_rate": 0.0002518764279328499, "loss": 0.516, "step": 137950 }, { "epoch": 6.852090990364557, "grad_norm": 0.1455078125, "learning_rate": 0.0002518366941492004, "loss": 0.5078, "step": 137960 }, { "epoch": 6.852587662660177, "grad_norm": 0.1318359375, "learning_rate": 0.00025179696036555085, "loss": 0.4554, "step": 137970 }, { "epoch": 6.853084334955796, "grad_norm": 0.1435546875, "learning_rate": 0.00025175722658190127, "loss": 0.5063, "step": 137980 }, { "epoch": 6.853581007251416, "grad_norm": 0.1435546875, "learning_rate": 0.00025171749279825174, "loss": 0.4849, "step": 137990 }, { "epoch": 6.854077679547035, "grad_norm": 0.1396484375, "learning_rate": 0.00025167775901460216, "loss": 0.4813, "step": 138000 }, { "epoch": 6.854574351842654, "grad_norm": 0.138671875, "learning_rate": 0.00025163802523095263, "loss": 0.4909, "step": 138010 }, { "epoch": 6.855071024138273, "grad_norm": 0.13671875, "learning_rate": 0.0002515982914473031, "loss": 0.4865, "step": 138020 }, { "epoch": 6.8555676964338925, "grad_norm": 0.201171875, "learning_rate": 0.0002515585576636535, "loss": 0.4834, "step": 138030 }, { "epoch": 6.856064368729513, "grad_norm": 0.1435546875, "learning_rate": 0.000251518823880004, "loss": 0.4737, "step": 138040 }, { "epoch": 6.856561041025132, "grad_norm": 0.1572265625, "learning_rate": 0.00025147909009635446, "loss": 0.4934, "step": 138050 }, { "epoch": 6.857057713320751, "grad_norm": 0.14453125, "learning_rate": 0.0002514393563127049, "loss": 0.4939, "step": 138060 }, { "epoch": 6.85755438561637, "grad_norm": 0.1328125, "learning_rate": 0.00025139962252905535, "loss": 0.5089, "step": 138070 }, { "epoch": 6.8580510579119895, "grad_norm": 0.1689453125, "learning_rate": 0.0002513598887454058, "loss": 0.525, "step": 138080 }, { "epoch": 6.858547730207609, "grad_norm": 0.1328125, "learning_rate": 0.00025132015496175624, "loss": 0.5115, "step": 138090 }, { "epoch": 6.859044402503228, "grad_norm": 0.130859375, "learning_rate": 0.0002512804211781067, "loss": 0.481, "step": 138100 }, { "epoch": 6.859541074798848, "grad_norm": 0.1435546875, "learning_rate": 0.0002512406873944571, "loss": 0.502, "step": 138110 }, { "epoch": 6.860037747094467, "grad_norm": 0.1513671875, "learning_rate": 0.0002512009536108076, "loss": 0.5023, "step": 138120 }, { "epoch": 6.860534419390087, "grad_norm": 0.15625, "learning_rate": 0.00025116121982715807, "loss": 0.5104, "step": 138130 }, { "epoch": 6.861031091685706, "grad_norm": 0.126953125, "learning_rate": 0.0002511214860435085, "loss": 0.4992, "step": 138140 }, { "epoch": 6.861527763981325, "grad_norm": 0.1748046875, "learning_rate": 0.00025108175225985896, "loss": 0.4933, "step": 138150 }, { "epoch": 6.862024436276944, "grad_norm": 0.162109375, "learning_rate": 0.0002510420184762094, "loss": 0.4964, "step": 138160 }, { "epoch": 6.8625211085725635, "grad_norm": 0.1435546875, "learning_rate": 0.00025100228469255984, "loss": 0.4791, "step": 138170 }, { "epoch": 6.863017780868184, "grad_norm": 0.1552734375, "learning_rate": 0.0002509625509089103, "loss": 0.5395, "step": 138180 }, { "epoch": 6.863514453163803, "grad_norm": 0.158203125, "learning_rate": 0.00025092281712526073, "loss": 0.5023, "step": 138190 }, { "epoch": 6.864011125459422, "grad_norm": 0.138671875, "learning_rate": 0.00025088308334161126, "loss": 0.5093, "step": 138200 }, { "epoch": 6.864507797755041, "grad_norm": 0.1875, "learning_rate": 0.0002508433495579617, "loss": 0.5053, "step": 138210 }, { "epoch": 6.8650044700506605, "grad_norm": 0.13671875, "learning_rate": 0.0002508036157743121, "loss": 0.4806, "step": 138220 }, { "epoch": 6.86550114234628, "grad_norm": 0.1240234375, "learning_rate": 0.00025076388199066256, "loss": 0.4829, "step": 138230 }, { "epoch": 6.865997814641899, "grad_norm": 0.1494140625, "learning_rate": 0.00025072414820701303, "loss": 0.5352, "step": 138240 }, { "epoch": 6.866494486937519, "grad_norm": 0.140625, "learning_rate": 0.00025068441442336345, "loss": 0.4913, "step": 138250 }, { "epoch": 6.866991159233138, "grad_norm": 0.20703125, "learning_rate": 0.0002506446806397139, "loss": 0.4969, "step": 138260 }, { "epoch": 6.8674878315287575, "grad_norm": 0.1396484375, "learning_rate": 0.0002506049468560644, "loss": 0.4607, "step": 138270 }, { "epoch": 6.867984503824377, "grad_norm": 0.1484375, "learning_rate": 0.00025056521307241487, "loss": 0.5346, "step": 138280 }, { "epoch": 6.868481176119996, "grad_norm": 0.1259765625, "learning_rate": 0.0002505254792887653, "loss": 0.4662, "step": 138290 }, { "epoch": 6.868977848415615, "grad_norm": 0.1630859375, "learning_rate": 0.0002504857455051157, "loss": 0.5014, "step": 138300 }, { "epoch": 6.869474520711234, "grad_norm": 0.1318359375, "learning_rate": 0.0002504460117214662, "loss": 0.4955, "step": 138310 }, { "epoch": 6.8699711930068545, "grad_norm": 0.138671875, "learning_rate": 0.00025040627793781664, "loss": 0.5091, "step": 138320 }, { "epoch": 6.870467865302474, "grad_norm": 0.12890625, "learning_rate": 0.00025036654415416706, "loss": 0.4925, "step": 138330 }, { "epoch": 6.870964537598093, "grad_norm": 0.1708984375, "learning_rate": 0.00025032681037051753, "loss": 0.4968, "step": 138340 }, { "epoch": 6.871461209893712, "grad_norm": 0.13671875, "learning_rate": 0.000250287076586868, "loss": 0.5242, "step": 138350 }, { "epoch": 6.8719578821893315, "grad_norm": 0.1875, "learning_rate": 0.0002502473428032185, "loss": 0.5039, "step": 138360 }, { "epoch": 6.872454554484951, "grad_norm": 0.1279296875, "learning_rate": 0.0002502076090195689, "loss": 0.5184, "step": 138370 }, { "epoch": 6.87295122678057, "grad_norm": 0.142578125, "learning_rate": 0.00025016787523591936, "loss": 0.512, "step": 138380 }, { "epoch": 6.87344789907619, "grad_norm": 0.1240234375, "learning_rate": 0.00025012814145226983, "loss": 0.476, "step": 138390 }, { "epoch": 6.873944571371809, "grad_norm": 0.177734375, "learning_rate": 0.00025008840766862025, "loss": 0.4987, "step": 138400 }, { "epoch": 6.8744412436674285, "grad_norm": 0.13671875, "learning_rate": 0.0002500486738849707, "loss": 0.4966, "step": 138410 }, { "epoch": 6.874937915963048, "grad_norm": 0.201171875, "learning_rate": 0.00025000894010132114, "loss": 0.4859, "step": 138420 }, { "epoch": 6.875434588258667, "grad_norm": 0.1455078125, "learning_rate": 0.0002499692063176716, "loss": 0.5372, "step": 138430 }, { "epoch": 6.875931260554286, "grad_norm": 0.18359375, "learning_rate": 0.0002499294725340221, "loss": 0.4881, "step": 138440 }, { "epoch": 6.876427932849905, "grad_norm": 0.1396484375, "learning_rate": 0.0002498897387503725, "loss": 0.4923, "step": 138450 }, { "epoch": 6.876924605145525, "grad_norm": 0.1455078125, "learning_rate": 0.00024985000496672297, "loss": 0.5507, "step": 138460 }, { "epoch": 6.877421277441144, "grad_norm": 0.130859375, "learning_rate": 0.00024981027118307344, "loss": 0.4744, "step": 138470 }, { "epoch": 6.877917949736764, "grad_norm": 0.1416015625, "learning_rate": 0.00024977053739942386, "loss": 0.5066, "step": 138480 }, { "epoch": 6.878414622032383, "grad_norm": 0.1328125, "learning_rate": 0.00024973080361577433, "loss": 0.4819, "step": 138490 }, { "epoch": 6.878911294328002, "grad_norm": 0.1455078125, "learning_rate": 0.0002496910698321248, "loss": 0.4991, "step": 138500 }, { "epoch": 6.879407966623622, "grad_norm": 0.130859375, "learning_rate": 0.0002496513360484752, "loss": 0.4835, "step": 138510 }, { "epoch": 6.879904638919241, "grad_norm": 0.138671875, "learning_rate": 0.0002496116022648257, "loss": 0.4923, "step": 138520 }, { "epoch": 6.88040131121486, "grad_norm": 0.1337890625, "learning_rate": 0.0002495718684811761, "loss": 0.502, "step": 138530 }, { "epoch": 6.880897983510479, "grad_norm": 0.130859375, "learning_rate": 0.0002495321346975266, "loss": 0.5097, "step": 138540 }, { "epoch": 6.881394655806099, "grad_norm": 0.1357421875, "learning_rate": 0.00024949240091387705, "loss": 0.5251, "step": 138550 }, { "epoch": 6.881891328101719, "grad_norm": 0.162109375, "learning_rate": 0.00024945266713022747, "loss": 0.497, "step": 138560 }, { "epoch": 6.882388000397338, "grad_norm": 0.1455078125, "learning_rate": 0.00024941293334657794, "loss": 0.5, "step": 138570 }, { "epoch": 6.882884672692957, "grad_norm": 0.1513671875, "learning_rate": 0.0002493731995629284, "loss": 0.5033, "step": 138580 }, { "epoch": 6.883381344988576, "grad_norm": 0.1533203125, "learning_rate": 0.00024933346577927883, "loss": 0.4907, "step": 138590 }, { "epoch": 6.883878017284196, "grad_norm": 0.1708984375, "learning_rate": 0.0002492937319956293, "loss": 0.5065, "step": 138600 }, { "epoch": 6.884374689579815, "grad_norm": 0.140625, "learning_rate": 0.00024925399821197977, "loss": 0.4797, "step": 138610 }, { "epoch": 6.884871361875435, "grad_norm": 0.134765625, "learning_rate": 0.0002492142644283302, "loss": 0.4651, "step": 138620 }, { "epoch": 6.885368034171054, "grad_norm": 0.1494140625, "learning_rate": 0.00024917453064468066, "loss": 0.5183, "step": 138630 }, { "epoch": 6.885864706466673, "grad_norm": 0.146484375, "learning_rate": 0.0002491347968610311, "loss": 0.511, "step": 138640 }, { "epoch": 6.886361378762293, "grad_norm": 0.158203125, "learning_rate": 0.00024909506307738155, "loss": 0.4801, "step": 138650 }, { "epoch": 6.886858051057912, "grad_norm": 0.1337890625, "learning_rate": 0.000249055329293732, "loss": 0.4738, "step": 138660 }, { "epoch": 6.887354723353531, "grad_norm": 0.1259765625, "learning_rate": 0.00024901559551008244, "loss": 0.4818, "step": 138670 }, { "epoch": 6.88785139564915, "grad_norm": 0.125, "learning_rate": 0.0002489758617264329, "loss": 0.4866, "step": 138680 }, { "epoch": 6.88834806794477, "grad_norm": 0.11962890625, "learning_rate": 0.0002489361279427834, "loss": 0.4494, "step": 138690 }, { "epoch": 6.88884474024039, "grad_norm": 0.126953125, "learning_rate": 0.0002488963941591338, "loss": 0.5095, "step": 138700 }, { "epoch": 6.889341412536009, "grad_norm": 0.130859375, "learning_rate": 0.00024885666037548427, "loss": 0.506, "step": 138710 }, { "epoch": 6.889838084831628, "grad_norm": 0.1796875, "learning_rate": 0.0002488169265918347, "loss": 0.4734, "step": 138720 }, { "epoch": 6.890334757127247, "grad_norm": 0.140625, "learning_rate": 0.0002487771928081852, "loss": 0.4911, "step": 138730 }, { "epoch": 6.8908314294228665, "grad_norm": 0.1376953125, "learning_rate": 0.0002487374590245356, "loss": 0.5237, "step": 138740 }, { "epoch": 6.891328101718486, "grad_norm": 0.12451171875, "learning_rate": 0.00024869772524088604, "loss": 0.5002, "step": 138750 }, { "epoch": 6.891824774014106, "grad_norm": 0.146484375, "learning_rate": 0.0002486579914572365, "loss": 0.4975, "step": 138760 }, { "epoch": 6.892321446309725, "grad_norm": 0.1328125, "learning_rate": 0.000248618257673587, "loss": 0.5047, "step": 138770 }, { "epoch": 6.892818118605344, "grad_norm": 0.1513671875, "learning_rate": 0.0002485785238899374, "loss": 0.5074, "step": 138780 }, { "epoch": 6.8933147909009636, "grad_norm": 0.1328125, "learning_rate": 0.0002485387901062879, "loss": 0.478, "step": 138790 }, { "epoch": 6.893811463196583, "grad_norm": 0.138671875, "learning_rate": 0.00024849905632263835, "loss": 0.4823, "step": 138800 }, { "epoch": 6.894308135492202, "grad_norm": 0.1494140625, "learning_rate": 0.0002484593225389888, "loss": 0.4991, "step": 138810 }, { "epoch": 6.894804807787821, "grad_norm": 0.13671875, "learning_rate": 0.00024841958875533923, "loss": 0.5135, "step": 138820 }, { "epoch": 6.895301480083441, "grad_norm": 0.154296875, "learning_rate": 0.00024837985497168965, "loss": 0.5066, "step": 138830 }, { "epoch": 6.895798152379061, "grad_norm": 0.1376953125, "learning_rate": 0.0002483401211880402, "loss": 0.4946, "step": 138840 }, { "epoch": 6.89629482467468, "grad_norm": 0.1650390625, "learning_rate": 0.0002483003874043906, "loss": 0.4861, "step": 138850 }, { "epoch": 6.896791496970299, "grad_norm": 0.1484375, "learning_rate": 0.00024826065362074107, "loss": 0.5071, "step": 138860 }, { "epoch": 6.897288169265918, "grad_norm": 0.1376953125, "learning_rate": 0.0002482209198370915, "loss": 0.4995, "step": 138870 }, { "epoch": 6.8977848415615375, "grad_norm": 0.1484375, "learning_rate": 0.00024818118605344195, "loss": 0.4846, "step": 138880 }, { "epoch": 6.898281513857157, "grad_norm": 0.15625, "learning_rate": 0.0002481414522697924, "loss": 0.4874, "step": 138890 }, { "epoch": 6.898778186152777, "grad_norm": 0.1455078125, "learning_rate": 0.00024810171848614284, "loss": 0.5222, "step": 138900 }, { "epoch": 6.899274858448396, "grad_norm": 0.1328125, "learning_rate": 0.0002480619847024933, "loss": 0.4916, "step": 138910 }, { "epoch": 6.899771530744015, "grad_norm": 0.193359375, "learning_rate": 0.0002480222509188438, "loss": 0.4765, "step": 138920 }, { "epoch": 6.9002682030396345, "grad_norm": 0.150390625, "learning_rate": 0.0002479825171351942, "loss": 0.5114, "step": 138930 }, { "epoch": 6.900764875335254, "grad_norm": 0.126953125, "learning_rate": 0.0002479427833515447, "loss": 0.4979, "step": 138940 }, { "epoch": 6.901261547630873, "grad_norm": 0.1396484375, "learning_rate": 0.0002479030495678951, "loss": 0.4637, "step": 138950 }, { "epoch": 6.901758219926492, "grad_norm": 0.1826171875, "learning_rate": 0.00024786331578424556, "loss": 0.5241, "step": 138960 }, { "epoch": 6.902254892222112, "grad_norm": 0.1640625, "learning_rate": 0.00024782358200059603, "loss": 0.4974, "step": 138970 }, { "epoch": 6.9027515645177315, "grad_norm": 0.205078125, "learning_rate": 0.00024778384821694645, "loss": 0.5086, "step": 138980 }, { "epoch": 6.903248236813351, "grad_norm": 0.1796875, "learning_rate": 0.0002477441144332969, "loss": 0.4754, "step": 138990 }, { "epoch": 6.90374490910897, "grad_norm": 0.1298828125, "learning_rate": 0.0002477043806496474, "loss": 0.5033, "step": 139000 }, { "epoch": 6.904241581404589, "grad_norm": 0.14453125, "learning_rate": 0.0002476646468659978, "loss": 0.5201, "step": 139010 }, { "epoch": 6.904738253700208, "grad_norm": 0.134765625, "learning_rate": 0.0002476249130823483, "loss": 0.5056, "step": 139020 }, { "epoch": 6.905234925995828, "grad_norm": 0.1298828125, "learning_rate": 0.00024758517929869875, "loss": 0.4932, "step": 139030 }, { "epoch": 6.905731598291448, "grad_norm": 0.1337890625, "learning_rate": 0.00024754544551504917, "loss": 0.4779, "step": 139040 }, { "epoch": 6.906228270587067, "grad_norm": 0.14453125, "learning_rate": 0.00024750571173139964, "loss": 0.5336, "step": 139050 }, { "epoch": 6.906724942882686, "grad_norm": 0.1650390625, "learning_rate": 0.00024746597794775006, "loss": 0.5058, "step": 139060 }, { "epoch": 6.9072216151783055, "grad_norm": 0.146484375, "learning_rate": 0.00024742624416410053, "loss": 0.5215, "step": 139070 }, { "epoch": 6.907718287473925, "grad_norm": 0.1298828125, "learning_rate": 0.000247386510380451, "loss": 0.4882, "step": 139080 }, { "epoch": 6.908214959769544, "grad_norm": 0.1435546875, "learning_rate": 0.0002473467765968014, "loss": 0.4795, "step": 139090 }, { "epoch": 6.908711632065163, "grad_norm": 0.13671875, "learning_rate": 0.0002473070428131519, "loss": 0.486, "step": 139100 }, { "epoch": 6.909208304360782, "grad_norm": 0.13671875, "learning_rate": 0.00024726730902950236, "loss": 0.4911, "step": 139110 }, { "epoch": 6.9097049766564025, "grad_norm": 0.1416015625, "learning_rate": 0.0002472275752458528, "loss": 0.4978, "step": 139120 }, { "epoch": 6.910201648952022, "grad_norm": 0.126953125, "learning_rate": 0.00024718784146220325, "loss": 0.4819, "step": 139130 }, { "epoch": 6.910698321247641, "grad_norm": 0.197265625, "learning_rate": 0.0002471481076785537, "loss": 0.4984, "step": 139140 }, { "epoch": 6.91119499354326, "grad_norm": 0.16796875, "learning_rate": 0.00024710837389490414, "loss": 0.5102, "step": 139150 }, { "epoch": 6.911691665838879, "grad_norm": 0.138671875, "learning_rate": 0.0002470686401112546, "loss": 0.4817, "step": 139160 }, { "epoch": 6.912188338134499, "grad_norm": 0.140625, "learning_rate": 0.00024702890632760503, "loss": 0.5105, "step": 139170 }, { "epoch": 6.912685010430118, "grad_norm": 0.1259765625, "learning_rate": 0.00024698917254395555, "loss": 0.4879, "step": 139180 }, { "epoch": 6.913181682725737, "grad_norm": 0.15234375, "learning_rate": 0.00024694943876030597, "loss": 0.507, "step": 139190 }, { "epoch": 6.913678355021357, "grad_norm": 0.126953125, "learning_rate": 0.0002469097049766564, "loss": 0.5073, "step": 139200 }, { "epoch": 6.914175027316976, "grad_norm": 0.13671875, "learning_rate": 0.00024686997119300686, "loss": 0.4896, "step": 139210 }, { "epoch": 6.914671699612596, "grad_norm": 0.138671875, "learning_rate": 0.00024683023740935733, "loss": 0.4983, "step": 139220 }, { "epoch": 6.915168371908215, "grad_norm": 0.12890625, "learning_rate": 0.0002467905036257078, "loss": 0.5106, "step": 139230 }, { "epoch": 6.915665044203834, "grad_norm": 0.1279296875, "learning_rate": 0.0002467507698420582, "loss": 0.4939, "step": 139240 }, { "epoch": 6.916161716499453, "grad_norm": 0.1474609375, "learning_rate": 0.00024671103605840864, "loss": 0.541, "step": 139250 }, { "epoch": 6.9166583887950726, "grad_norm": 0.134765625, "learning_rate": 0.00024667130227475916, "loss": 0.5167, "step": 139260 }, { "epoch": 6.917155061090693, "grad_norm": 0.1533203125, "learning_rate": 0.0002466315684911096, "loss": 0.5126, "step": 139270 }, { "epoch": 6.917651733386312, "grad_norm": 0.150390625, "learning_rate": 0.00024659183470746, "loss": 0.5017, "step": 139280 }, { "epoch": 6.918148405681931, "grad_norm": 0.16015625, "learning_rate": 0.00024655210092381047, "loss": 0.5083, "step": 139290 }, { "epoch": 6.91864507797755, "grad_norm": 0.1474609375, "learning_rate": 0.00024651236714016094, "loss": 0.5055, "step": 139300 }, { "epoch": 6.91914175027317, "grad_norm": 0.1357421875, "learning_rate": 0.0002464726333565114, "loss": 0.5173, "step": 139310 }, { "epoch": 6.919638422568789, "grad_norm": 0.12890625, "learning_rate": 0.0002464328995728618, "loss": 0.4768, "step": 139320 }, { "epoch": 6.920135094864408, "grad_norm": 0.1474609375, "learning_rate": 0.0002463931657892123, "loss": 0.4742, "step": 139330 }, { "epoch": 6.920631767160028, "grad_norm": 0.140625, "learning_rate": 0.00024635343200556277, "loss": 0.4894, "step": 139340 }, { "epoch": 6.921128439455647, "grad_norm": 0.1572265625, "learning_rate": 0.0002463136982219132, "loss": 0.4996, "step": 139350 }, { "epoch": 6.921625111751267, "grad_norm": 0.1435546875, "learning_rate": 0.0002462739644382636, "loss": 0.5211, "step": 139360 }, { "epoch": 6.922121784046886, "grad_norm": 0.1259765625, "learning_rate": 0.00024623423065461413, "loss": 0.4981, "step": 139370 }, { "epoch": 6.922618456342505, "grad_norm": 0.1240234375, "learning_rate": 0.00024619449687096455, "loss": 0.5044, "step": 139380 }, { "epoch": 6.923115128638124, "grad_norm": 0.1591796875, "learning_rate": 0.000246154763087315, "loss": 0.502, "step": 139390 }, { "epoch": 6.9236118009337435, "grad_norm": 0.1376953125, "learning_rate": 0.00024611502930366544, "loss": 0.4946, "step": 139400 }, { "epoch": 6.924108473229364, "grad_norm": 0.162109375, "learning_rate": 0.0002460752955200159, "loss": 0.5054, "step": 139410 }, { "epoch": 6.924605145524983, "grad_norm": 0.1796875, "learning_rate": 0.0002460355617363664, "loss": 0.4877, "step": 139420 }, { "epoch": 6.925101817820602, "grad_norm": 0.140625, "learning_rate": 0.0002459958279527168, "loss": 0.4919, "step": 139430 }, { "epoch": 6.925598490116221, "grad_norm": 0.130859375, "learning_rate": 0.00024595609416906727, "loss": 0.4642, "step": 139440 }, { "epoch": 6.9260951624118405, "grad_norm": 0.1669921875, "learning_rate": 0.00024591636038541774, "loss": 0.5025, "step": 139450 }, { "epoch": 6.92659183470746, "grad_norm": 0.130859375, "learning_rate": 0.00024587662660176815, "loss": 0.4847, "step": 139460 }, { "epoch": 6.927088507003079, "grad_norm": 0.1279296875, "learning_rate": 0.0002458368928181186, "loss": 0.5113, "step": 139470 }, { "epoch": 6.927585179298699, "grad_norm": 0.1484375, "learning_rate": 0.0002457971590344691, "loss": 0.4855, "step": 139480 }, { "epoch": 6.928081851594318, "grad_norm": 0.1533203125, "learning_rate": 0.0002457574252508195, "loss": 0.4971, "step": 139490 }, { "epoch": 6.928578523889938, "grad_norm": 0.1435546875, "learning_rate": 0.00024571769146717, "loss": 0.5003, "step": 139500 }, { "epoch": 6.929075196185557, "grad_norm": 0.126953125, "learning_rate": 0.0002456779576835204, "loss": 0.4825, "step": 139510 }, { "epoch": 6.929571868481176, "grad_norm": 0.1396484375, "learning_rate": 0.0002456382238998709, "loss": 0.501, "step": 139520 }, { "epoch": 6.930068540776795, "grad_norm": 0.1787109375, "learning_rate": 0.00024559849011622135, "loss": 0.4937, "step": 139530 }, { "epoch": 6.9305652130724145, "grad_norm": 0.1416015625, "learning_rate": 0.00024555875633257176, "loss": 0.4989, "step": 139540 }, { "epoch": 6.931061885368035, "grad_norm": 0.1875, "learning_rate": 0.00024551902254892223, "loss": 0.4682, "step": 139550 }, { "epoch": 6.931558557663654, "grad_norm": 0.1337890625, "learning_rate": 0.0002454792887652727, "loss": 0.5281, "step": 139560 }, { "epoch": 6.932055229959273, "grad_norm": 0.169921875, "learning_rate": 0.0002454395549816231, "loss": 0.5126, "step": 139570 }, { "epoch": 6.932551902254892, "grad_norm": 0.1494140625, "learning_rate": 0.0002453998211979736, "loss": 0.4697, "step": 139580 }, { "epoch": 6.9330485745505115, "grad_norm": 0.1318359375, "learning_rate": 0.000245360087414324, "loss": 0.4833, "step": 139590 }, { "epoch": 6.933545246846131, "grad_norm": 0.1689453125, "learning_rate": 0.0002453203536306745, "loss": 0.4721, "step": 139600 }, { "epoch": 6.93404191914175, "grad_norm": 0.14453125, "learning_rate": 0.00024528061984702495, "loss": 0.492, "step": 139610 }, { "epoch": 6.93453859143737, "grad_norm": 0.13671875, "learning_rate": 0.00024524088606337537, "loss": 0.5299, "step": 139620 }, { "epoch": 6.935035263732989, "grad_norm": 0.1396484375, "learning_rate": 0.00024520115227972584, "loss": 0.4887, "step": 139630 }, { "epoch": 6.9355319360286085, "grad_norm": 0.1357421875, "learning_rate": 0.0002451614184960763, "loss": 0.4836, "step": 139640 }, { "epoch": 6.936028608324228, "grad_norm": 0.146484375, "learning_rate": 0.00024512168471242673, "loss": 0.4728, "step": 139650 }, { "epoch": 6.936525280619847, "grad_norm": 0.1640625, "learning_rate": 0.0002450819509287772, "loss": 0.4863, "step": 139660 }, { "epoch": 6.937021952915466, "grad_norm": 0.142578125, "learning_rate": 0.0002450422171451277, "loss": 0.4935, "step": 139670 }, { "epoch": 6.937518625211085, "grad_norm": 0.1240234375, "learning_rate": 0.00024500248336147815, "loss": 0.486, "step": 139680 }, { "epoch": 6.9380152975067055, "grad_norm": 0.142578125, "learning_rate": 0.00024496274957782856, "loss": 0.4928, "step": 139690 }, { "epoch": 6.938511969802325, "grad_norm": 0.1337890625, "learning_rate": 0.000244923015794179, "loss": 0.4964, "step": 139700 }, { "epoch": 6.939008642097944, "grad_norm": 0.134765625, "learning_rate": 0.0002448832820105295, "loss": 0.4789, "step": 139710 }, { "epoch": 6.939505314393563, "grad_norm": 0.13671875, "learning_rate": 0.0002448435482268799, "loss": 0.4749, "step": 139720 }, { "epoch": 6.9400019866891824, "grad_norm": 0.140625, "learning_rate": 0.00024480381444323034, "loss": 0.4678, "step": 139730 }, { "epoch": 6.940498658984802, "grad_norm": 0.130859375, "learning_rate": 0.0002447640806595808, "loss": 0.4679, "step": 139740 }, { "epoch": 6.940995331280421, "grad_norm": 0.14453125, "learning_rate": 0.0002447243468759313, "loss": 0.4952, "step": 139750 }, { "epoch": 6.941492003576041, "grad_norm": 0.146484375, "learning_rate": 0.00024468461309228175, "loss": 0.4943, "step": 139760 }, { "epoch": 6.94198867587166, "grad_norm": 0.12451171875, "learning_rate": 0.00024464487930863217, "loss": 0.4898, "step": 139770 }, { "epoch": 6.9424853481672795, "grad_norm": 0.1435546875, "learning_rate": 0.00024460514552498264, "loss": 0.5022, "step": 139780 }, { "epoch": 6.942982020462899, "grad_norm": 0.125, "learning_rate": 0.0002445654117413331, "loss": 0.5112, "step": 139790 }, { "epoch": 6.943478692758518, "grad_norm": 0.1455078125, "learning_rate": 0.00024452567795768353, "loss": 0.4766, "step": 139800 }, { "epoch": 6.943975365054137, "grad_norm": 0.169921875, "learning_rate": 0.00024448594417403395, "loss": 0.4633, "step": 139810 }, { "epoch": 6.944472037349756, "grad_norm": 0.1474609375, "learning_rate": 0.0002444462103903844, "loss": 0.4725, "step": 139820 }, { "epoch": 6.944968709645376, "grad_norm": 0.1328125, "learning_rate": 0.0002444064766067349, "loss": 0.4767, "step": 139830 }, { "epoch": 6.945465381940996, "grad_norm": 0.130859375, "learning_rate": 0.00024436674282308536, "loss": 0.5269, "step": 139840 }, { "epoch": 6.945962054236615, "grad_norm": 0.142578125, "learning_rate": 0.0002443270090394358, "loss": 0.4814, "step": 139850 }, { "epoch": 6.946458726532234, "grad_norm": 0.1376953125, "learning_rate": 0.00024428727525578625, "loss": 0.4934, "step": 139860 }, { "epoch": 6.946955398827853, "grad_norm": 0.1435546875, "learning_rate": 0.0002442475414721367, "loss": 0.4957, "step": 139870 }, { "epoch": 6.947452071123473, "grad_norm": 0.158203125, "learning_rate": 0.00024420780768848714, "loss": 0.5135, "step": 139880 }, { "epoch": 6.947948743419092, "grad_norm": 0.138671875, "learning_rate": 0.00024416807390483756, "loss": 0.4758, "step": 139890 }, { "epoch": 6.948445415714711, "grad_norm": 0.1484375, "learning_rate": 0.00024412834012118805, "loss": 0.5051, "step": 139900 }, { "epoch": 6.94894208801033, "grad_norm": 0.1328125, "learning_rate": 0.0002440886063375385, "loss": 0.4878, "step": 139910 }, { "epoch": 6.94943876030595, "grad_norm": 0.14453125, "learning_rate": 0.00024404887255388897, "loss": 0.4885, "step": 139920 }, { "epoch": 6.94993543260157, "grad_norm": 0.1298828125, "learning_rate": 0.00024400913877023941, "loss": 0.4816, "step": 139930 }, { "epoch": 6.950432104897189, "grad_norm": 0.1396484375, "learning_rate": 0.00024396940498658983, "loss": 0.505, "step": 139940 }, { "epoch": 6.950928777192808, "grad_norm": 0.134765625, "learning_rate": 0.00024392967120294033, "loss": 0.5088, "step": 139950 }, { "epoch": 6.951425449488427, "grad_norm": 0.1435546875, "learning_rate": 0.00024388993741929075, "loss": 0.4806, "step": 139960 }, { "epoch": 6.951922121784047, "grad_norm": 0.1455078125, "learning_rate": 0.0002438502036356412, "loss": 0.4938, "step": 139970 }, { "epoch": 6.952418794079666, "grad_norm": 0.1318359375, "learning_rate": 0.00024381046985199166, "loss": 0.4716, "step": 139980 }, { "epoch": 6.952915466375286, "grad_norm": 0.138671875, "learning_rate": 0.0002437707360683421, "loss": 0.4989, "step": 139990 }, { "epoch": 6.953412138670905, "grad_norm": 0.13671875, "learning_rate": 0.00024373100228469258, "loss": 0.4904, "step": 140000 }, { "epoch": 6.953908810966524, "grad_norm": 0.14453125, "learning_rate": 0.00024369126850104302, "loss": 0.5107, "step": 140010 }, { "epoch": 6.954405483262144, "grad_norm": 0.130859375, "learning_rate": 0.00024365153471739347, "loss": 0.5227, "step": 140020 }, { "epoch": 6.954902155557763, "grad_norm": 0.140625, "learning_rate": 0.00024361180093374394, "loss": 0.4979, "step": 140030 }, { "epoch": 6.955398827853382, "grad_norm": 0.1318359375, "learning_rate": 0.00024357206715009438, "loss": 0.4795, "step": 140040 }, { "epoch": 6.955895500149001, "grad_norm": 0.1279296875, "learning_rate": 0.0002435323333664448, "loss": 0.485, "step": 140050 }, { "epoch": 6.956392172444621, "grad_norm": 0.12451171875, "learning_rate": 0.0002434925995827953, "loss": 0.484, "step": 140060 }, { "epoch": 6.956888844740241, "grad_norm": 0.1552734375, "learning_rate": 0.00024345286579914571, "loss": 0.5158, "step": 140070 }, { "epoch": 6.95738551703586, "grad_norm": 0.1396484375, "learning_rate": 0.0002434131320154962, "loss": 0.4753, "step": 140080 }, { "epoch": 6.957882189331479, "grad_norm": 0.1513671875, "learning_rate": 0.00024337339823184663, "loss": 0.4996, "step": 140090 }, { "epoch": 6.958378861627098, "grad_norm": 0.173828125, "learning_rate": 0.00024333366444819707, "loss": 0.4916, "step": 140100 }, { "epoch": 6.9588755339227175, "grad_norm": 0.1376953125, "learning_rate": 0.00024329393066454755, "loss": 0.4938, "step": 140110 }, { "epoch": 6.959372206218337, "grad_norm": 0.138671875, "learning_rate": 0.000243254196880898, "loss": 0.5164, "step": 140120 }, { "epoch": 6.959868878513957, "grad_norm": 0.1298828125, "learning_rate": 0.00024321446309724846, "loss": 0.4918, "step": 140130 }, { "epoch": 6.960365550809576, "grad_norm": 0.1435546875, "learning_rate": 0.0002431747293135989, "loss": 0.5013, "step": 140140 }, { "epoch": 6.960862223105195, "grad_norm": 0.1474609375, "learning_rate": 0.00024313499552994935, "loss": 0.4931, "step": 140150 }, { "epoch": 6.9613588954008145, "grad_norm": 0.1298828125, "learning_rate": 0.00024309526174629982, "loss": 0.4834, "step": 140160 }, { "epoch": 6.961855567696434, "grad_norm": 0.1396484375, "learning_rate": 0.00024305552796265027, "loss": 0.4915, "step": 140170 }, { "epoch": 6.962352239992053, "grad_norm": 0.1748046875, "learning_rate": 0.00024301579417900068, "loss": 0.4947, "step": 140180 }, { "epoch": 6.962848912287672, "grad_norm": 0.1591796875, "learning_rate": 0.00024297606039535118, "loss": 0.5211, "step": 140190 }, { "epoch": 6.963345584583292, "grad_norm": 0.150390625, "learning_rate": 0.0002429363266117016, "loss": 0.4868, "step": 140200 }, { "epoch": 6.963842256878912, "grad_norm": 0.1376953125, "learning_rate": 0.00024289659282805207, "loss": 0.4687, "step": 140210 }, { "epoch": 6.964338929174531, "grad_norm": 0.130859375, "learning_rate": 0.00024285685904440251, "loss": 0.4978, "step": 140220 }, { "epoch": 6.96483560147015, "grad_norm": 0.138671875, "learning_rate": 0.00024281712526075296, "loss": 0.494, "step": 140230 }, { "epoch": 6.965332273765769, "grad_norm": 0.158203125, "learning_rate": 0.00024277739147710343, "loss": 0.4935, "step": 140240 }, { "epoch": 6.9658289460613885, "grad_norm": 0.177734375, "learning_rate": 0.00024273765769345387, "loss": 0.4853, "step": 140250 }, { "epoch": 6.966325618357008, "grad_norm": 0.13671875, "learning_rate": 0.0002426979239098043, "loss": 0.4798, "step": 140260 }, { "epoch": 6.966822290652628, "grad_norm": 0.134765625, "learning_rate": 0.0002426581901261548, "loss": 0.4909, "step": 140270 }, { "epoch": 6.967318962948247, "grad_norm": 0.1357421875, "learning_rate": 0.0002426184563425052, "loss": 0.5131, "step": 140280 }, { "epoch": 6.967815635243866, "grad_norm": 0.17578125, "learning_rate": 0.0002425787225588557, "loss": 0.4682, "step": 140290 }, { "epoch": 6.9683123075394855, "grad_norm": 0.1259765625, "learning_rate": 0.00024253898877520612, "loss": 0.4771, "step": 140300 }, { "epoch": 6.968808979835105, "grad_norm": 0.1328125, "learning_rate": 0.00024249925499155657, "loss": 0.4993, "step": 140310 }, { "epoch": 6.969305652130724, "grad_norm": 0.1455078125, "learning_rate": 0.00024245952120790704, "loss": 0.4717, "step": 140320 }, { "epoch": 6.969802324426343, "grad_norm": 0.134765625, "learning_rate": 0.00024241978742425748, "loss": 0.4884, "step": 140330 }, { "epoch": 6.970298996721963, "grad_norm": 0.1357421875, "learning_rate": 0.00024238005364060793, "loss": 0.4904, "step": 140340 }, { "epoch": 6.9707956690175825, "grad_norm": 0.1396484375, "learning_rate": 0.0002423403198569584, "loss": 0.4857, "step": 140350 }, { "epoch": 6.971292341313202, "grad_norm": 0.158203125, "learning_rate": 0.00024230058607330884, "loss": 0.4607, "step": 140360 }, { "epoch": 6.971789013608821, "grad_norm": 0.162109375, "learning_rate": 0.0002422608522896593, "loss": 0.4999, "step": 140370 }, { "epoch": 6.97228568590444, "grad_norm": 0.140625, "learning_rate": 0.00024222111850600976, "loss": 0.4969, "step": 140380 }, { "epoch": 6.972782358200059, "grad_norm": 0.1416015625, "learning_rate": 0.00024218138472236017, "loss": 0.5017, "step": 140390 }, { "epoch": 6.973279030495679, "grad_norm": 0.15625, "learning_rate": 0.00024214165093871067, "loss": 0.54, "step": 140400 }, { "epoch": 6.973775702791299, "grad_norm": 0.14453125, "learning_rate": 0.0002421019171550611, "loss": 0.4736, "step": 140410 }, { "epoch": 6.974272375086918, "grad_norm": 0.134765625, "learning_rate": 0.00024206218337141153, "loss": 0.4898, "step": 140420 }, { "epoch": 6.974769047382537, "grad_norm": 0.1435546875, "learning_rate": 0.000242022449587762, "loss": 0.5352, "step": 140430 }, { "epoch": 6.9752657196781565, "grad_norm": 0.146484375, "learning_rate": 0.00024198271580411245, "loss": 0.4869, "step": 140440 }, { "epoch": 6.975762391973776, "grad_norm": 0.15234375, "learning_rate": 0.00024194298202046292, "loss": 0.4762, "step": 140450 }, { "epoch": 6.976259064269395, "grad_norm": 0.138671875, "learning_rate": 0.00024190324823681337, "loss": 0.4969, "step": 140460 }, { "epoch": 6.976755736565014, "grad_norm": 0.138671875, "learning_rate": 0.0002418635144531638, "loss": 0.5136, "step": 140470 }, { "epoch": 6.977252408860634, "grad_norm": 0.13671875, "learning_rate": 0.00024182378066951428, "loss": 0.514, "step": 140480 }, { "epoch": 6.9777490811562535, "grad_norm": 0.138671875, "learning_rate": 0.00024178404688586473, "loss": 0.4833, "step": 140490 }, { "epoch": 6.978245753451873, "grad_norm": 0.1650390625, "learning_rate": 0.0002417443131022152, "loss": 0.4871, "step": 140500 }, { "epoch": 6.978742425747492, "grad_norm": 0.1513671875, "learning_rate": 0.00024170457931856561, "loss": 0.5071, "step": 140510 }, { "epoch": 6.979239098043111, "grad_norm": 0.15234375, "learning_rate": 0.00024166484553491606, "loss": 0.5003, "step": 140520 }, { "epoch": 6.97973577033873, "grad_norm": 0.1318359375, "learning_rate": 0.00024162511175126653, "loss": 0.4776, "step": 140530 }, { "epoch": 6.98023244263435, "grad_norm": 0.1337890625, "learning_rate": 0.00024158537796761697, "loss": 0.5007, "step": 140540 }, { "epoch": 6.980729114929969, "grad_norm": 0.13671875, "learning_rate": 0.00024154564418396742, "loss": 0.4972, "step": 140550 }, { "epoch": 6.981225787225588, "grad_norm": 0.1689453125, "learning_rate": 0.0002415059104003179, "loss": 0.4996, "step": 140560 }, { "epoch": 6.981722459521208, "grad_norm": 0.2138671875, "learning_rate": 0.00024146617661666833, "loss": 0.4959, "step": 140570 }, { "epoch": 6.982219131816827, "grad_norm": 0.1591796875, "learning_rate": 0.0002414264428330188, "loss": 0.5053, "step": 140580 }, { "epoch": 6.982715804112447, "grad_norm": 0.142578125, "learning_rate": 0.00024138670904936925, "loss": 0.5081, "step": 140590 }, { "epoch": 6.983212476408066, "grad_norm": 0.1376953125, "learning_rate": 0.00024134697526571967, "loss": 0.4818, "step": 140600 }, { "epoch": 6.983709148703685, "grad_norm": 0.1552734375, "learning_rate": 0.00024130724148207017, "loss": 0.5042, "step": 140610 }, { "epoch": 6.984205820999304, "grad_norm": 0.1328125, "learning_rate": 0.00024126750769842058, "loss": 0.5098, "step": 140620 }, { "epoch": 6.9847024932949235, "grad_norm": 0.1318359375, "learning_rate": 0.00024122777391477103, "loss": 0.5089, "step": 140630 }, { "epoch": 6.985199165590544, "grad_norm": 0.1474609375, "learning_rate": 0.0002411880401311215, "loss": 0.4998, "step": 140640 }, { "epoch": 6.985695837886163, "grad_norm": 0.1669921875, "learning_rate": 0.00024114830634747194, "loss": 0.5101, "step": 140650 }, { "epoch": 6.986192510181782, "grad_norm": 0.162109375, "learning_rate": 0.00024110857256382241, "loss": 0.4872, "step": 140660 }, { "epoch": 6.986689182477401, "grad_norm": 0.158203125, "learning_rate": 0.00024106883878017286, "loss": 0.4679, "step": 140670 }, { "epoch": 6.987185854773021, "grad_norm": 0.1337890625, "learning_rate": 0.0002410291049965233, "loss": 0.4831, "step": 140680 }, { "epoch": 6.98768252706864, "grad_norm": 0.1533203125, "learning_rate": 0.00024098937121287377, "loss": 0.474, "step": 140690 }, { "epoch": 6.988179199364259, "grad_norm": 0.126953125, "learning_rate": 0.00024094963742922422, "loss": 0.492, "step": 140700 }, { "epoch": 6.988675871659879, "grad_norm": 0.1416015625, "learning_rate": 0.00024090990364557463, "loss": 0.5069, "step": 140710 }, { "epoch": 6.989172543955498, "grad_norm": 0.1376953125, "learning_rate": 0.00024087016986192513, "loss": 0.5026, "step": 140720 }, { "epoch": 6.989669216251118, "grad_norm": 0.1298828125, "learning_rate": 0.00024083043607827555, "loss": 0.484, "step": 140730 }, { "epoch": 6.990165888546737, "grad_norm": 0.1669921875, "learning_rate": 0.00024079070229462605, "loss": 0.498, "step": 140740 }, { "epoch": 6.990662560842356, "grad_norm": 0.189453125, "learning_rate": 0.00024075096851097647, "loss": 0.4986, "step": 140750 }, { "epoch": 6.991159233137975, "grad_norm": 0.1435546875, "learning_rate": 0.0002407112347273269, "loss": 0.5142, "step": 140760 }, { "epoch": 6.9916559054335945, "grad_norm": 0.154296875, "learning_rate": 0.00024067150094367738, "loss": 0.4693, "step": 140770 }, { "epoch": 6.992152577729215, "grad_norm": 0.1396484375, "learning_rate": 0.00024063176716002783, "loss": 0.522, "step": 140780 }, { "epoch": 6.992649250024834, "grad_norm": 0.1728515625, "learning_rate": 0.00024059203337637824, "loss": 0.4882, "step": 140790 }, { "epoch": 6.993145922320453, "grad_norm": 0.1474609375, "learning_rate": 0.00024055229959272874, "loss": 0.502, "step": 140800 }, { "epoch": 6.993642594616072, "grad_norm": 0.146484375, "learning_rate": 0.00024051256580907916, "loss": 0.4914, "step": 140810 }, { "epoch": 6.9941392669116915, "grad_norm": 0.15625, "learning_rate": 0.00024047283202542966, "loss": 0.4719, "step": 140820 }, { "epoch": 6.994635939207311, "grad_norm": 0.1396484375, "learning_rate": 0.00024043309824178007, "loss": 0.472, "step": 140830 }, { "epoch": 6.99513261150293, "grad_norm": 0.146484375, "learning_rate": 0.00024039336445813052, "loss": 0.4798, "step": 140840 }, { "epoch": 6.99562928379855, "grad_norm": 0.13671875, "learning_rate": 0.000240353630674481, "loss": 0.5351, "step": 140850 }, { "epoch": 6.996125956094169, "grad_norm": 0.1630859375, "learning_rate": 0.00024031389689083143, "loss": 0.5033, "step": 140860 }, { "epoch": 6.9966226283897885, "grad_norm": 0.1494140625, "learning_rate": 0.00024027416310718188, "loss": 0.4998, "step": 140870 }, { "epoch": 6.997119300685408, "grad_norm": 0.1328125, "learning_rate": 0.00024023442932353235, "loss": 0.4827, "step": 140880 }, { "epoch": 6.997615972981027, "grad_norm": 0.15625, "learning_rate": 0.0002401946955398828, "loss": 0.5031, "step": 140890 }, { "epoch": 6.998112645276646, "grad_norm": 0.13671875, "learning_rate": 0.00024015496175623327, "loss": 0.5173, "step": 140900 }, { "epoch": 6.9986093175722655, "grad_norm": 0.171875, "learning_rate": 0.0002401152279725837, "loss": 0.482, "step": 140910 }, { "epoch": 6.999105989867886, "grad_norm": 0.138671875, "learning_rate": 0.00024007549418893413, "loss": 0.4702, "step": 140920 }, { "epoch": 6.999602662163505, "grad_norm": 0.150390625, "learning_rate": 0.00024003576040528463, "loss": 0.4797, "step": 140930 }, { "epoch": 7.000099334459124, "grad_norm": 0.1630859375, "learning_rate": 0.00023999602662163504, "loss": 0.5283, "step": 140940 }, { "epoch": 7.000596006754743, "grad_norm": 0.15234375, "learning_rate": 0.00023995629283798554, "loss": 0.4986, "step": 140950 }, { "epoch": 7.0010926790503625, "grad_norm": 0.1328125, "learning_rate": 0.00023991655905433596, "loss": 0.4611, "step": 140960 }, { "epoch": 7.001589351345982, "grad_norm": 0.1689453125, "learning_rate": 0.0002398768252706864, "loss": 0.4936, "step": 140970 }, { "epoch": 7.002086023641601, "grad_norm": 0.12890625, "learning_rate": 0.00023983709148703687, "loss": 0.4864, "step": 140980 }, { "epoch": 7.002582695937221, "grad_norm": 0.162109375, "learning_rate": 0.00023979735770338732, "loss": 0.4895, "step": 140990 }, { "epoch": 7.00307936823284, "grad_norm": 0.1357421875, "learning_rate": 0.00023975762391973776, "loss": 0.4758, "step": 141000 }, { "epoch": 7.0035760405284595, "grad_norm": 0.13671875, "learning_rate": 0.00023971789013608823, "loss": 0.4665, "step": 141010 }, { "epoch": 7.004072712824079, "grad_norm": 0.13671875, "learning_rate": 0.00023967815635243868, "loss": 0.4966, "step": 141020 }, { "epoch": 7.004569385119698, "grad_norm": 0.1357421875, "learning_rate": 0.00023963842256878915, "loss": 0.4663, "step": 141030 }, { "epoch": 7.005066057415317, "grad_norm": 0.1728515625, "learning_rate": 0.0002395986887851396, "loss": 0.5, "step": 141040 }, { "epoch": 7.005562729710936, "grad_norm": 0.1474609375, "learning_rate": 0.00023955895500149, "loss": 0.5082, "step": 141050 }, { "epoch": 7.006059402006556, "grad_norm": 0.1220703125, "learning_rate": 0.00023951922121784048, "loss": 0.4666, "step": 141060 }, { "epoch": 7.006556074302176, "grad_norm": 0.142578125, "learning_rate": 0.00023947948743419093, "loss": 0.4773, "step": 141070 }, { "epoch": 7.007052746597795, "grad_norm": 0.162109375, "learning_rate": 0.00023943975365054137, "loss": 0.4744, "step": 141080 }, { "epoch": 7.007549418893414, "grad_norm": 0.1357421875, "learning_rate": 0.00023940001986689184, "loss": 0.4914, "step": 141090 }, { "epoch": 7.008046091189033, "grad_norm": 0.138671875, "learning_rate": 0.00023936028608324229, "loss": 0.4801, "step": 141100 }, { "epoch": 7.008542763484653, "grad_norm": 0.154296875, "learning_rate": 0.00023932055229959276, "loss": 0.4721, "step": 141110 }, { "epoch": 7.009039435780272, "grad_norm": 0.14453125, "learning_rate": 0.0002392808185159432, "loss": 0.4794, "step": 141120 }, { "epoch": 7.009536108075891, "grad_norm": 0.130859375, "learning_rate": 0.00023924108473229362, "loss": 0.5034, "step": 141130 }, { "epoch": 7.010032780371511, "grad_norm": 0.2138671875, "learning_rate": 0.00023920135094864412, "loss": 0.4798, "step": 141140 }, { "epoch": 7.0105294526671305, "grad_norm": 0.1474609375, "learning_rate": 0.00023916161716499453, "loss": 0.4586, "step": 141150 }, { "epoch": 7.01102612496275, "grad_norm": 0.138671875, "learning_rate": 0.00023912188338134498, "loss": 0.4931, "step": 141160 }, { "epoch": 7.011522797258369, "grad_norm": 0.1337890625, "learning_rate": 0.00023908214959769545, "loss": 0.4674, "step": 141170 }, { "epoch": 7.012019469553988, "grad_norm": 0.140625, "learning_rate": 0.0002390424158140459, "loss": 0.496, "step": 141180 }, { "epoch": 7.012516141849607, "grad_norm": 0.166015625, "learning_rate": 0.00023900268203039637, "loss": 0.4728, "step": 141190 }, { "epoch": 7.013012814145227, "grad_norm": 0.1318359375, "learning_rate": 0.0002389629482467468, "loss": 0.4576, "step": 141200 }, { "epoch": 7.013509486440847, "grad_norm": 0.130859375, "learning_rate": 0.00023892321446309725, "loss": 0.4785, "step": 141210 }, { "epoch": 7.014006158736466, "grad_norm": 0.134765625, "learning_rate": 0.00023888348067944773, "loss": 0.4936, "step": 141220 }, { "epoch": 7.014502831032085, "grad_norm": 0.140625, "learning_rate": 0.00023884374689579817, "loss": 0.4896, "step": 141230 }, { "epoch": 7.014999503327704, "grad_norm": 0.12890625, "learning_rate": 0.0002388040131121486, "loss": 0.4937, "step": 141240 }, { "epoch": 7.015496175623324, "grad_norm": 0.1484375, "learning_rate": 0.00023876427932849909, "loss": 0.4706, "step": 141250 }, { "epoch": 7.015992847918943, "grad_norm": 0.15625, "learning_rate": 0.0002387245455448495, "loss": 0.5109, "step": 141260 }, { "epoch": 7.016489520214562, "grad_norm": 0.1357421875, "learning_rate": 0.0002386848117612, "loss": 0.4743, "step": 141270 }, { "epoch": 7.016986192510182, "grad_norm": 0.140625, "learning_rate": 0.00023864507797755042, "loss": 0.4832, "step": 141280 }, { "epoch": 7.017482864805801, "grad_norm": 0.142578125, "learning_rate": 0.00023860534419390086, "loss": 0.5202, "step": 141290 }, { "epoch": 7.017979537101421, "grad_norm": 0.1630859375, "learning_rate": 0.00023856561041025133, "loss": 0.519, "step": 141300 }, { "epoch": 7.01847620939704, "grad_norm": 0.1484375, "learning_rate": 0.00023852587662660178, "loss": 0.4728, "step": 141310 }, { "epoch": 7.018972881692659, "grad_norm": 0.1806640625, "learning_rate": 0.00023848614284295225, "loss": 0.4885, "step": 141320 }, { "epoch": 7.019469553988278, "grad_norm": 0.1474609375, "learning_rate": 0.0002384464090593027, "loss": 0.4653, "step": 141330 }, { "epoch": 7.0199662262838975, "grad_norm": 0.1435546875, "learning_rate": 0.0002384066752756531, "loss": 0.4746, "step": 141340 }, { "epoch": 7.020462898579518, "grad_norm": 0.1357421875, "learning_rate": 0.0002383669414920036, "loss": 0.4718, "step": 141350 }, { "epoch": 7.020959570875137, "grad_norm": 0.1474609375, "learning_rate": 0.00023832720770835403, "loss": 0.4733, "step": 141360 }, { "epoch": 7.021456243170756, "grad_norm": 0.13671875, "learning_rate": 0.00023828747392470447, "loss": 0.4683, "step": 141370 }, { "epoch": 7.021952915466375, "grad_norm": 0.1455078125, "learning_rate": 0.00023824774014105494, "loss": 0.4735, "step": 141380 }, { "epoch": 7.022449587761995, "grad_norm": 0.15234375, "learning_rate": 0.00023820800635740539, "loss": 0.4853, "step": 141390 }, { "epoch": 7.022946260057614, "grad_norm": 0.1298828125, "learning_rate": 0.00023816827257375586, "loss": 0.4634, "step": 141400 }, { "epoch": 7.023442932353233, "grad_norm": 0.14453125, "learning_rate": 0.0002381285387901063, "loss": 0.4686, "step": 141410 }, { "epoch": 7.023939604648852, "grad_norm": 0.201171875, "learning_rate": 0.00023808880500645675, "loss": 0.5208, "step": 141420 }, { "epoch": 7.024436276944472, "grad_norm": 0.1337890625, "learning_rate": 0.00023804907122280722, "loss": 0.4788, "step": 141430 }, { "epoch": 7.024932949240092, "grad_norm": 0.15625, "learning_rate": 0.00023800933743915766, "loss": 0.5135, "step": 141440 }, { "epoch": 7.025429621535711, "grad_norm": 0.154296875, "learning_rate": 0.00023796960365550808, "loss": 0.4973, "step": 141450 }, { "epoch": 7.02592629383133, "grad_norm": 0.130859375, "learning_rate": 0.00023792986987185858, "loss": 0.4715, "step": 141460 }, { "epoch": 7.026422966126949, "grad_norm": 0.1533203125, "learning_rate": 0.000237890136088209, "loss": 0.5018, "step": 141470 }, { "epoch": 7.0269196384225685, "grad_norm": 0.201171875, "learning_rate": 0.0002378504023045595, "loss": 0.5192, "step": 141480 }, { "epoch": 7.027416310718188, "grad_norm": 0.1630859375, "learning_rate": 0.0002378106685209099, "loss": 0.5151, "step": 141490 }, { "epoch": 7.027912983013808, "grad_norm": 0.1376953125, "learning_rate": 0.00023777093473726035, "loss": 0.4586, "step": 141500 }, { "epoch": 7.028409655309427, "grad_norm": 0.1533203125, "learning_rate": 0.00023773120095361083, "loss": 0.4731, "step": 141510 }, { "epoch": 7.028906327605046, "grad_norm": 0.1552734375, "learning_rate": 0.00023769146716996127, "loss": 0.4952, "step": 141520 }, { "epoch": 7.0294029999006655, "grad_norm": 0.1884765625, "learning_rate": 0.00023765173338631171, "loss": 0.4799, "step": 141530 }, { "epoch": 7.029899672196285, "grad_norm": 0.1357421875, "learning_rate": 0.00023761199960266219, "loss": 0.5019, "step": 141540 }, { "epoch": 7.030396344491904, "grad_norm": 0.1494140625, "learning_rate": 0.00023757226581901263, "loss": 0.4547, "step": 141550 }, { "epoch": 7.030893016787523, "grad_norm": 0.181640625, "learning_rate": 0.0002375325320353631, "loss": 0.502, "step": 141560 }, { "epoch": 7.031389689083143, "grad_norm": 0.13671875, "learning_rate": 0.00023749279825171355, "loss": 0.5338, "step": 141570 }, { "epoch": 7.0318863613787626, "grad_norm": 0.1875, "learning_rate": 0.00023745306446806396, "loss": 0.4831, "step": 141580 }, { "epoch": 7.032383033674382, "grad_norm": 0.1435546875, "learning_rate": 0.00023741333068441446, "loss": 0.4868, "step": 141590 }, { "epoch": 7.032879705970001, "grad_norm": 0.12890625, "learning_rate": 0.00023737359690076488, "loss": 0.482, "step": 141600 }, { "epoch": 7.03337637826562, "grad_norm": 0.1435546875, "learning_rate": 0.00023733386311711532, "loss": 0.4651, "step": 141610 }, { "epoch": 7.0338730505612395, "grad_norm": 0.1240234375, "learning_rate": 0.0002372941293334658, "loss": 0.47, "step": 141620 }, { "epoch": 7.034369722856859, "grad_norm": 0.1337890625, "learning_rate": 0.00023725439554981624, "loss": 0.4866, "step": 141630 }, { "epoch": 7.034866395152479, "grad_norm": 0.138671875, "learning_rate": 0.0002372146617661667, "loss": 0.4972, "step": 141640 }, { "epoch": 7.035363067448098, "grad_norm": 0.1484375, "learning_rate": 0.00023717492798251715, "loss": 0.4595, "step": 141650 }, { "epoch": 7.035859739743717, "grad_norm": 0.134765625, "learning_rate": 0.00023713519419886757, "loss": 0.459, "step": 141660 }, { "epoch": 7.0363564120393365, "grad_norm": 0.1435546875, "learning_rate": 0.00023709546041521807, "loss": 0.491, "step": 141670 }, { "epoch": 7.036853084334956, "grad_norm": 0.140625, "learning_rate": 0.00023705572663156849, "loss": 0.4738, "step": 141680 }, { "epoch": 7.037349756630575, "grad_norm": 0.16015625, "learning_rate": 0.00023701599284791893, "loss": 0.4718, "step": 141690 }, { "epoch": 7.037846428926194, "grad_norm": 0.16015625, "learning_rate": 0.0002369762590642694, "loss": 0.4733, "step": 141700 }, { "epoch": 7.038343101221814, "grad_norm": 0.1494140625, "learning_rate": 0.00023693652528061985, "loss": 0.4742, "step": 141710 }, { "epoch": 7.0388397735174335, "grad_norm": 0.1376953125, "learning_rate": 0.00023689679149697032, "loss": 0.4897, "step": 141720 }, { "epoch": 7.039336445813053, "grad_norm": 0.15625, "learning_rate": 0.00023685705771332076, "loss": 0.5069, "step": 141730 }, { "epoch": 7.039833118108672, "grad_norm": 0.1357421875, "learning_rate": 0.0002368173239296712, "loss": 0.4781, "step": 141740 }, { "epoch": 7.040329790404291, "grad_norm": 0.1298828125, "learning_rate": 0.00023677759014602168, "loss": 0.4808, "step": 141750 }, { "epoch": 7.04082646269991, "grad_norm": 0.1318359375, "learning_rate": 0.00023673785636237212, "loss": 0.5024, "step": 141760 }, { "epoch": 7.04132313499553, "grad_norm": 0.138671875, "learning_rate": 0.0002366981225787226, "loss": 0.4859, "step": 141770 }, { "epoch": 7.041819807291149, "grad_norm": 0.1337890625, "learning_rate": 0.00023665838879507304, "loss": 0.4987, "step": 141780 }, { "epoch": 7.042316479586769, "grad_norm": 0.1220703125, "learning_rate": 0.00023661865501142345, "loss": 0.458, "step": 141790 }, { "epoch": 7.042813151882388, "grad_norm": 0.2109375, "learning_rate": 0.00023657892122777395, "loss": 0.5042, "step": 141800 }, { "epoch": 7.043309824178007, "grad_norm": 0.1416015625, "learning_rate": 0.00023653918744412437, "loss": 0.4712, "step": 141810 }, { "epoch": 7.043806496473627, "grad_norm": 0.1484375, "learning_rate": 0.00023649945366047481, "loss": 0.4868, "step": 141820 }, { "epoch": 7.044303168769246, "grad_norm": 0.154296875, "learning_rate": 0.00023645971987682529, "loss": 0.4499, "step": 141830 }, { "epoch": 7.044799841064865, "grad_norm": 0.1826171875, "learning_rate": 0.00023641998609317573, "loss": 0.4485, "step": 141840 }, { "epoch": 7.045296513360484, "grad_norm": 0.140625, "learning_rate": 0.0002363802523095262, "loss": 0.4767, "step": 141850 }, { "epoch": 7.0457931856561045, "grad_norm": 0.12890625, "learning_rate": 0.00023634051852587665, "loss": 0.485, "step": 141860 }, { "epoch": 7.046289857951724, "grad_norm": 0.1455078125, "learning_rate": 0.0002363007847422271, "loss": 0.4986, "step": 141870 }, { "epoch": 7.046786530247343, "grad_norm": 0.13671875, "learning_rate": 0.00023626105095857756, "loss": 0.4705, "step": 141880 }, { "epoch": 7.047283202542962, "grad_norm": 0.146484375, "learning_rate": 0.000236221317174928, "loss": 0.4768, "step": 141890 }, { "epoch": 7.047779874838581, "grad_norm": 0.13671875, "learning_rate": 0.00023618158339127842, "loss": 0.4889, "step": 141900 }, { "epoch": 7.048276547134201, "grad_norm": 0.1572265625, "learning_rate": 0.0002361418496076289, "loss": 0.4702, "step": 141910 }, { "epoch": 7.04877321942982, "grad_norm": 0.13671875, "learning_rate": 0.00023610211582397934, "loss": 0.4579, "step": 141920 }, { "epoch": 7.04926989172544, "grad_norm": 0.1474609375, "learning_rate": 0.0002360623820403298, "loss": 0.4773, "step": 141930 }, { "epoch": 7.049766564021059, "grad_norm": 0.1474609375, "learning_rate": 0.00023602264825668025, "loss": 0.4548, "step": 141940 }, { "epoch": 7.050263236316678, "grad_norm": 0.12890625, "learning_rate": 0.0002359829144730307, "loss": 0.4606, "step": 141950 }, { "epoch": 7.050759908612298, "grad_norm": 0.130859375, "learning_rate": 0.00023594318068938117, "loss": 0.4856, "step": 141960 }, { "epoch": 7.051256580907917, "grad_norm": 0.1416015625, "learning_rate": 0.0002359034469057316, "loss": 0.5125, "step": 141970 }, { "epoch": 7.051753253203536, "grad_norm": 0.1484375, "learning_rate": 0.00023586371312208203, "loss": 0.4888, "step": 141980 }, { "epoch": 7.052249925499155, "grad_norm": 0.134765625, "learning_rate": 0.00023582397933843253, "loss": 0.4924, "step": 141990 }, { "epoch": 7.052746597794775, "grad_norm": 0.14453125, "learning_rate": 0.00023578424555478295, "loss": 0.4722, "step": 142000 }, { "epoch": 7.053243270090395, "grad_norm": 0.1416015625, "learning_rate": 0.00023574451177113344, "loss": 0.4774, "step": 142010 }, { "epoch": 7.053739942386014, "grad_norm": 0.130859375, "learning_rate": 0.00023570477798748386, "loss": 0.4848, "step": 142020 }, { "epoch": 7.054236614681633, "grad_norm": 0.1796875, "learning_rate": 0.0002356650442038343, "loss": 0.4656, "step": 142030 }, { "epoch": 7.054733286977252, "grad_norm": 0.15234375, "learning_rate": 0.00023562531042018478, "loss": 0.5006, "step": 142040 }, { "epoch": 7.0552299592728716, "grad_norm": 0.2041015625, "learning_rate": 0.00023558557663653522, "loss": 0.4561, "step": 142050 }, { "epoch": 7.055726631568491, "grad_norm": 0.1337890625, "learning_rate": 0.00023554584285288567, "loss": 0.5041, "step": 142060 }, { "epoch": 7.056223303864111, "grad_norm": 0.1376953125, "learning_rate": 0.00023550610906923614, "loss": 0.4918, "step": 142070 }, { "epoch": 7.05671997615973, "grad_norm": 0.1474609375, "learning_rate": 0.00023546637528558658, "loss": 0.4901, "step": 142080 }, { "epoch": 7.057216648455349, "grad_norm": 0.15625, "learning_rate": 0.00023542664150193705, "loss": 0.4613, "step": 142090 }, { "epoch": 7.057713320750969, "grad_norm": 0.1328125, "learning_rate": 0.0002353869077182875, "loss": 0.4986, "step": 142100 }, { "epoch": 7.058209993046588, "grad_norm": 0.150390625, "learning_rate": 0.00023534717393463791, "loss": 0.4867, "step": 142110 }, { "epoch": 7.058706665342207, "grad_norm": 0.177734375, "learning_rate": 0.0002353074401509884, "loss": 0.4918, "step": 142120 }, { "epoch": 7.059203337637826, "grad_norm": 0.1357421875, "learning_rate": 0.00023526770636733883, "loss": 0.4797, "step": 142130 }, { "epoch": 7.0597000099334455, "grad_norm": 0.12890625, "learning_rate": 0.00023522797258368927, "loss": 0.4604, "step": 142140 }, { "epoch": 7.060196682229066, "grad_norm": 0.1337890625, "learning_rate": 0.00023518823880003975, "loss": 0.4562, "step": 142150 }, { "epoch": 7.060693354524685, "grad_norm": 0.177734375, "learning_rate": 0.0002351485050163902, "loss": 0.4999, "step": 142160 }, { "epoch": 7.061190026820304, "grad_norm": 0.14453125, "learning_rate": 0.00023510877123274066, "loss": 0.5229, "step": 142170 }, { "epoch": 7.061686699115923, "grad_norm": 0.150390625, "learning_rate": 0.0002350690374490911, "loss": 0.4936, "step": 142180 }, { "epoch": 7.0621833714115425, "grad_norm": 0.1513671875, "learning_rate": 0.00023502930366544152, "loss": 0.4659, "step": 142190 }, { "epoch": 7.062680043707162, "grad_norm": 0.150390625, "learning_rate": 0.00023498956988179202, "loss": 0.5114, "step": 142200 }, { "epoch": 7.063176716002781, "grad_norm": 0.1611328125, "learning_rate": 0.00023494983609814244, "loss": 0.4818, "step": 142210 }, { "epoch": 7.063673388298401, "grad_norm": 0.1357421875, "learning_rate": 0.00023491010231449294, "loss": 0.4942, "step": 142220 }, { "epoch": 7.06417006059402, "grad_norm": 0.1640625, "learning_rate": 0.00023487036853084335, "loss": 0.4806, "step": 142230 }, { "epoch": 7.0646667328896395, "grad_norm": 0.1328125, "learning_rate": 0.0002348306347471938, "loss": 0.5028, "step": 142240 }, { "epoch": 7.065163405185259, "grad_norm": 0.146484375, "learning_rate": 0.00023479090096354427, "loss": 0.4712, "step": 142250 }, { "epoch": 7.065660077480878, "grad_norm": 0.134765625, "learning_rate": 0.0002347511671798947, "loss": 0.4996, "step": 142260 }, { "epoch": 7.066156749776497, "grad_norm": 0.1337890625, "learning_rate": 0.00023471143339624516, "loss": 0.4829, "step": 142270 }, { "epoch": 7.066653422072116, "grad_norm": 0.1494140625, "learning_rate": 0.00023467169961259563, "loss": 0.5225, "step": 142280 }, { "epoch": 7.067150094367737, "grad_norm": 0.13671875, "learning_rate": 0.00023463196582894607, "loss": 0.4617, "step": 142290 }, { "epoch": 7.067646766663356, "grad_norm": 0.13671875, "learning_rate": 0.00023459223204529654, "loss": 0.4767, "step": 142300 }, { "epoch": 7.068143438958975, "grad_norm": 0.1572265625, "learning_rate": 0.000234552498261647, "loss": 0.5274, "step": 142310 }, { "epoch": 7.068640111254594, "grad_norm": 0.15625, "learning_rate": 0.0002345127644779974, "loss": 0.4848, "step": 142320 }, { "epoch": 7.0691367835502135, "grad_norm": 0.140625, "learning_rate": 0.0002344730306943479, "loss": 0.4745, "step": 142330 }, { "epoch": 7.069633455845833, "grad_norm": 0.146484375, "learning_rate": 0.00023443329691069832, "loss": 0.4779, "step": 142340 }, { "epoch": 7.070130128141452, "grad_norm": 0.15625, "learning_rate": 0.00023439356312704877, "loss": 0.4938, "step": 142350 }, { "epoch": 7.070626800437072, "grad_norm": 0.1630859375, "learning_rate": 0.00023435382934339924, "loss": 0.5046, "step": 142360 }, { "epoch": 7.071123472732691, "grad_norm": 0.140625, "learning_rate": 0.00023431409555974968, "loss": 0.4688, "step": 142370 }, { "epoch": 7.0716201450283105, "grad_norm": 0.154296875, "learning_rate": 0.00023427436177610015, "loss": 0.4591, "step": 142380 }, { "epoch": 7.07211681732393, "grad_norm": 0.12158203125, "learning_rate": 0.0002342346279924506, "loss": 0.4907, "step": 142390 }, { "epoch": 7.072613489619549, "grad_norm": 0.1318359375, "learning_rate": 0.00023419489420880104, "loss": 0.4604, "step": 142400 }, { "epoch": 7.073110161915168, "grad_norm": 0.130859375, "learning_rate": 0.0002341551604251515, "loss": 0.503, "step": 142410 }, { "epoch": 7.073606834210787, "grad_norm": 0.12451171875, "learning_rate": 0.00023411542664150196, "loss": 0.4991, "step": 142420 }, { "epoch": 7.0741035065064075, "grad_norm": 0.1396484375, "learning_rate": 0.00023407569285785237, "loss": 0.472, "step": 142430 }, { "epoch": 7.074600178802027, "grad_norm": 0.1357421875, "learning_rate": 0.00023403595907420287, "loss": 0.4888, "step": 142440 }, { "epoch": 7.075096851097646, "grad_norm": 0.1328125, "learning_rate": 0.0002339962252905533, "loss": 0.4842, "step": 142450 }, { "epoch": 7.075593523393265, "grad_norm": 0.146484375, "learning_rate": 0.00023395649150690376, "loss": 0.4966, "step": 142460 }, { "epoch": 7.076090195688884, "grad_norm": 0.1572265625, "learning_rate": 0.0002339167577232542, "loss": 0.4971, "step": 142470 }, { "epoch": 7.076586867984504, "grad_norm": 0.169921875, "learning_rate": 0.00023387702393960465, "loss": 0.4729, "step": 142480 }, { "epoch": 7.077083540280123, "grad_norm": 0.18359375, "learning_rate": 0.00023383729015595512, "loss": 0.5048, "step": 142490 }, { "epoch": 7.077580212575742, "grad_norm": 0.140625, "learning_rate": 0.00023379755637230557, "loss": 0.4961, "step": 142500 }, { "epoch": 7.078076884871362, "grad_norm": 0.14453125, "learning_rate": 0.00023375782258865598, "loss": 0.497, "step": 142510 }, { "epoch": 7.0785735571669814, "grad_norm": 0.158203125, "learning_rate": 0.00023371808880500648, "loss": 0.4709, "step": 142520 }, { "epoch": 7.079070229462601, "grad_norm": 0.140625, "learning_rate": 0.0002336783550213569, "loss": 0.514, "step": 142530 }, { "epoch": 7.07956690175822, "grad_norm": 0.1328125, "learning_rate": 0.0002336386212377074, "loss": 0.4985, "step": 142540 }, { "epoch": 7.080063574053839, "grad_norm": 0.1552734375, "learning_rate": 0.0002335988874540578, "loss": 0.4723, "step": 142550 }, { "epoch": 7.080560246349458, "grad_norm": 0.1533203125, "learning_rate": 0.00023355915367040826, "loss": 0.5067, "step": 142560 }, { "epoch": 7.081056918645078, "grad_norm": 0.1376953125, "learning_rate": 0.00023351941988675873, "loss": 0.4608, "step": 142570 }, { "epoch": 7.081553590940698, "grad_norm": 0.1552734375, "learning_rate": 0.00023347968610310917, "loss": 0.4775, "step": 142580 }, { "epoch": 7.082050263236317, "grad_norm": 0.1611328125, "learning_rate": 0.00023343995231945964, "loss": 0.4837, "step": 142590 }, { "epoch": 7.082546935531936, "grad_norm": 0.1416015625, "learning_rate": 0.0002334002185358101, "loss": 0.4759, "step": 142600 }, { "epoch": 7.083043607827555, "grad_norm": 0.1513671875, "learning_rate": 0.00023336048475216053, "loss": 0.4956, "step": 142610 }, { "epoch": 7.083540280123175, "grad_norm": 0.1328125, "learning_rate": 0.000233320750968511, "loss": 0.4865, "step": 142620 }, { "epoch": 7.084036952418794, "grad_norm": 0.130859375, "learning_rate": 0.00023328101718486145, "loss": 0.4843, "step": 142630 }, { "epoch": 7.084533624714413, "grad_norm": 0.138671875, "learning_rate": 0.00023324128340121187, "loss": 0.4738, "step": 142640 }, { "epoch": 7.085030297010033, "grad_norm": 0.1513671875, "learning_rate": 0.00023320154961756236, "loss": 0.4977, "step": 142650 }, { "epoch": 7.085526969305652, "grad_norm": 0.125, "learning_rate": 0.00023316181583391278, "loss": 0.4792, "step": 142660 }, { "epoch": 7.086023641601272, "grad_norm": 0.1513671875, "learning_rate": 0.00023312208205026328, "loss": 0.4879, "step": 142670 }, { "epoch": 7.086520313896891, "grad_norm": 0.1513671875, "learning_rate": 0.0002330823482666137, "loss": 0.5005, "step": 142680 }, { "epoch": 7.08701698619251, "grad_norm": 0.1279296875, "learning_rate": 0.00023304261448296414, "loss": 0.4676, "step": 142690 }, { "epoch": 7.087513658488129, "grad_norm": 0.1494140625, "learning_rate": 0.0002330028806993146, "loss": 0.4853, "step": 142700 }, { "epoch": 7.0880103307837485, "grad_norm": 0.1318359375, "learning_rate": 0.00023296314691566506, "loss": 0.5005, "step": 142710 }, { "epoch": 7.088507003079369, "grad_norm": 0.158203125, "learning_rate": 0.0002329234131320155, "loss": 0.4629, "step": 142720 }, { "epoch": 7.089003675374988, "grad_norm": 0.134765625, "learning_rate": 0.00023288367934836597, "loss": 0.4831, "step": 142730 }, { "epoch": 7.089500347670607, "grad_norm": 0.1533203125, "learning_rate": 0.00023284394556471642, "loss": 0.5082, "step": 142740 }, { "epoch": 7.089997019966226, "grad_norm": 0.126953125, "learning_rate": 0.0002328042117810669, "loss": 0.4694, "step": 142750 }, { "epoch": 7.090493692261846, "grad_norm": 0.158203125, "learning_rate": 0.0002327644779974173, "loss": 0.4828, "step": 142760 }, { "epoch": 7.090990364557465, "grad_norm": 0.15234375, "learning_rate": 0.00023272474421376775, "loss": 0.5281, "step": 142770 }, { "epoch": 7.091487036853084, "grad_norm": 0.1376953125, "learning_rate": 0.00023268501043011822, "loss": 0.4916, "step": 142780 }, { "epoch": 7.091983709148703, "grad_norm": 0.1748046875, "learning_rate": 0.00023264527664646867, "loss": 0.49, "step": 142790 }, { "epoch": 7.092480381444323, "grad_norm": 0.1416015625, "learning_rate": 0.0002326055428628191, "loss": 0.4367, "step": 142800 }, { "epoch": 7.092977053739943, "grad_norm": 0.1474609375, "learning_rate": 0.00023256580907916958, "loss": 0.4794, "step": 142810 }, { "epoch": 7.093473726035562, "grad_norm": 0.1533203125, "learning_rate": 0.00023252607529552003, "loss": 0.468, "step": 142820 }, { "epoch": 7.093970398331181, "grad_norm": 0.1591796875, "learning_rate": 0.0002324863415118705, "loss": 0.506, "step": 142830 }, { "epoch": 7.0944670706268, "grad_norm": 0.1298828125, "learning_rate": 0.00023244660772822094, "loss": 0.4884, "step": 142840 }, { "epoch": 7.0949637429224195, "grad_norm": 0.1484375, "learning_rate": 0.00023240687394457136, "loss": 0.5043, "step": 142850 }, { "epoch": 7.095460415218039, "grad_norm": 0.1640625, "learning_rate": 0.00023236714016092186, "loss": 0.4999, "step": 142860 }, { "epoch": 7.095957087513659, "grad_norm": 0.1533203125, "learning_rate": 0.00023232740637727227, "loss": 0.4508, "step": 142870 }, { "epoch": 7.096453759809278, "grad_norm": 0.154296875, "learning_rate": 0.00023228767259362272, "loss": 0.4809, "step": 142880 }, { "epoch": 7.096950432104897, "grad_norm": 0.14453125, "learning_rate": 0.0002322479388099732, "loss": 0.4774, "step": 142890 }, { "epoch": 7.0974471044005165, "grad_norm": 0.154296875, "learning_rate": 0.00023220820502632363, "loss": 0.4958, "step": 142900 }, { "epoch": 7.097943776696136, "grad_norm": 0.158203125, "learning_rate": 0.0002321684712426741, "loss": 0.5162, "step": 142910 }, { "epoch": 7.098440448991755, "grad_norm": 0.1767578125, "learning_rate": 0.00023212873745902455, "loss": 0.4788, "step": 142920 }, { "epoch": 7.098937121287374, "grad_norm": 0.1376953125, "learning_rate": 0.000232089003675375, "loss": 0.4658, "step": 142930 }, { "epoch": 7.099433793582994, "grad_norm": 0.1533203125, "learning_rate": 0.00023204926989172546, "loss": 0.5278, "step": 142940 }, { "epoch": 7.0999304658786135, "grad_norm": 0.1298828125, "learning_rate": 0.0002320095361080759, "loss": 0.5104, "step": 142950 }, { "epoch": 7.100427138174233, "grad_norm": 0.1943359375, "learning_rate": 0.00023196980232442633, "loss": 0.4983, "step": 142960 }, { "epoch": 7.100923810469852, "grad_norm": 0.140625, "learning_rate": 0.00023193006854077682, "loss": 0.4831, "step": 142970 }, { "epoch": 7.101420482765471, "grad_norm": 0.2216796875, "learning_rate": 0.00023189033475712724, "loss": 0.4977, "step": 142980 }, { "epoch": 7.1019171550610904, "grad_norm": 0.1484375, "learning_rate": 0.00023185060097347774, "loss": 0.481, "step": 142990 }, { "epoch": 7.10241382735671, "grad_norm": 0.134765625, "learning_rate": 0.00023181086718982816, "loss": 0.5166, "step": 143000 }, { "epoch": 7.10291049965233, "grad_norm": 0.16015625, "learning_rate": 0.0002317711334061786, "loss": 0.5247, "step": 143010 }, { "epoch": 7.103407171947949, "grad_norm": 0.12890625, "learning_rate": 0.00023173139962252907, "loss": 0.4746, "step": 143020 }, { "epoch": 7.103903844243568, "grad_norm": 0.13671875, "learning_rate": 0.00023169166583887952, "loss": 0.5252, "step": 143030 }, { "epoch": 7.1044005165391875, "grad_norm": 0.1416015625, "learning_rate": 0.00023165193205523, "loss": 0.4897, "step": 143040 }, { "epoch": 7.104897188834807, "grad_norm": 0.189453125, "learning_rate": 0.00023161219827158043, "loss": 0.4883, "step": 143050 }, { "epoch": 7.105393861130426, "grad_norm": 0.1357421875, "learning_rate": 0.00023157246448793085, "loss": 0.5, "step": 143060 }, { "epoch": 7.105890533426045, "grad_norm": 0.1826171875, "learning_rate": 0.00023153273070428135, "loss": 0.4721, "step": 143070 }, { "epoch": 7.106387205721665, "grad_norm": 0.1357421875, "learning_rate": 0.00023149299692063177, "loss": 0.4643, "step": 143080 }, { "epoch": 7.1068838780172845, "grad_norm": 0.1591796875, "learning_rate": 0.0002314532631369822, "loss": 0.461, "step": 143090 }, { "epoch": 7.107380550312904, "grad_norm": 0.1298828125, "learning_rate": 0.00023141352935333268, "loss": 0.5495, "step": 143100 }, { "epoch": 7.107877222608523, "grad_norm": 0.1396484375, "learning_rate": 0.00023137379556968313, "loss": 0.4675, "step": 143110 }, { "epoch": 7.108373894904142, "grad_norm": 0.1396484375, "learning_rate": 0.0002313340617860336, "loss": 0.4878, "step": 143120 }, { "epoch": 7.108870567199761, "grad_norm": 0.1484375, "learning_rate": 0.00023129432800238404, "loss": 0.4986, "step": 143130 }, { "epoch": 7.109367239495381, "grad_norm": 0.1455078125, "learning_rate": 0.00023125459421873449, "loss": 0.4905, "step": 143140 }, { "epoch": 7.109863911791001, "grad_norm": 0.126953125, "learning_rate": 0.00023121486043508496, "loss": 0.4642, "step": 143150 }, { "epoch": 7.11036058408662, "grad_norm": 0.1435546875, "learning_rate": 0.0002311751266514354, "loss": 0.4544, "step": 143160 }, { "epoch": 7.110857256382239, "grad_norm": 0.1455078125, "learning_rate": 0.00023113539286778582, "loss": 0.4911, "step": 143170 }, { "epoch": 7.111353928677858, "grad_norm": 0.1337890625, "learning_rate": 0.00023109565908413632, "loss": 0.4653, "step": 143180 }, { "epoch": 7.111850600973478, "grad_norm": 0.1337890625, "learning_rate": 0.00023105592530048673, "loss": 0.4804, "step": 143190 }, { "epoch": 7.112347273269097, "grad_norm": 0.171875, "learning_rate": 0.00023101619151683723, "loss": 0.4988, "step": 143200 }, { "epoch": 7.112843945564716, "grad_norm": 0.205078125, "learning_rate": 0.00023097645773318765, "loss": 0.4689, "step": 143210 }, { "epoch": 7.113340617860335, "grad_norm": 0.1357421875, "learning_rate": 0.0002309367239495381, "loss": 0.506, "step": 143220 }, { "epoch": 7.1138372901559555, "grad_norm": 0.150390625, "learning_rate": 0.00023089699016588856, "loss": 0.5052, "step": 143230 }, { "epoch": 7.114333962451575, "grad_norm": 0.1435546875, "learning_rate": 0.000230857256382239, "loss": 0.4921, "step": 143240 }, { "epoch": 7.114830634747194, "grad_norm": 0.1591796875, "learning_rate": 0.00023081752259858945, "loss": 0.5034, "step": 143250 }, { "epoch": 7.115327307042813, "grad_norm": 0.142578125, "learning_rate": 0.00023077778881493992, "loss": 0.4747, "step": 143260 }, { "epoch": 7.115823979338432, "grad_norm": 0.142578125, "learning_rate": 0.00023073805503129037, "loss": 0.4833, "step": 143270 }, { "epoch": 7.116320651634052, "grad_norm": 0.1328125, "learning_rate": 0.00023069832124764084, "loss": 0.4776, "step": 143280 }, { "epoch": 7.116817323929671, "grad_norm": 0.126953125, "learning_rate": 0.00023065858746399128, "loss": 0.452, "step": 143290 }, { "epoch": 7.117313996225291, "grad_norm": 0.1513671875, "learning_rate": 0.0002306188536803417, "loss": 0.499, "step": 143300 }, { "epoch": 7.11781066852091, "grad_norm": 0.15234375, "learning_rate": 0.00023057911989669217, "loss": 0.5051, "step": 143310 }, { "epoch": 7.118307340816529, "grad_norm": 0.1357421875, "learning_rate": 0.00023053938611304262, "loss": 0.487, "step": 143320 }, { "epoch": 7.118804013112149, "grad_norm": 0.1259765625, "learning_rate": 0.00023049965232939306, "loss": 0.505, "step": 143330 }, { "epoch": 7.119300685407768, "grad_norm": 0.2021484375, "learning_rate": 0.00023045991854574353, "loss": 0.5393, "step": 143340 }, { "epoch": 7.119797357703387, "grad_norm": 0.12353515625, "learning_rate": 0.00023042018476209398, "loss": 0.4801, "step": 143350 }, { "epoch": 7.120294029999006, "grad_norm": 0.1708984375, "learning_rate": 0.00023038045097844445, "loss": 0.5147, "step": 143360 }, { "epoch": 7.120790702294626, "grad_norm": 0.1455078125, "learning_rate": 0.0002303407171947949, "loss": 0.4547, "step": 143370 }, { "epoch": 7.121287374590246, "grad_norm": 0.1279296875, "learning_rate": 0.0002303009834111453, "loss": 0.4809, "step": 143380 }, { "epoch": 7.121784046885865, "grad_norm": 0.1474609375, "learning_rate": 0.0002302612496274958, "loss": 0.5019, "step": 143390 }, { "epoch": 7.122280719181484, "grad_norm": 0.138671875, "learning_rate": 0.00023022151584384623, "loss": 0.5201, "step": 143400 }, { "epoch": 7.122777391477103, "grad_norm": 0.1630859375, "learning_rate": 0.00023018178206019667, "loss": 0.4755, "step": 143410 }, { "epoch": 7.1232740637727225, "grad_norm": 0.1357421875, "learning_rate": 0.00023014204827654714, "loss": 0.4496, "step": 143420 }, { "epoch": 7.123770736068342, "grad_norm": 0.158203125, "learning_rate": 0.00023010231449289759, "loss": 0.5056, "step": 143430 }, { "epoch": 7.124267408363962, "grad_norm": 0.1396484375, "learning_rate": 0.00023006258070924806, "loss": 0.4708, "step": 143440 }, { "epoch": 7.124764080659581, "grad_norm": 0.1474609375, "learning_rate": 0.0002300228469255985, "loss": 0.5275, "step": 143450 }, { "epoch": 7.1252607529552, "grad_norm": 0.1396484375, "learning_rate": 0.00022998311314194895, "loss": 0.4782, "step": 143460 }, { "epoch": 7.12575742525082, "grad_norm": 0.1669921875, "learning_rate": 0.00022994337935829942, "loss": 0.4861, "step": 143470 }, { "epoch": 7.126254097546439, "grad_norm": 0.138671875, "learning_rate": 0.00022990364557464986, "loss": 0.5009, "step": 143480 }, { "epoch": 7.126750769842058, "grad_norm": 0.1962890625, "learning_rate": 0.00022986391179100033, "loss": 0.4984, "step": 143490 }, { "epoch": 7.127247442137677, "grad_norm": 0.171875, "learning_rate": 0.00022982417800735078, "loss": 0.4795, "step": 143500 }, { "epoch": 7.1277441144332965, "grad_norm": 0.1533203125, "learning_rate": 0.0002297844442237012, "loss": 0.498, "step": 143510 }, { "epoch": 7.128240786728917, "grad_norm": 0.1640625, "learning_rate": 0.0002297447104400517, "loss": 0.4772, "step": 143520 }, { "epoch": 7.128737459024536, "grad_norm": 0.1630859375, "learning_rate": 0.0002297049766564021, "loss": 0.4932, "step": 143530 }, { "epoch": 7.129234131320155, "grad_norm": 0.138671875, "learning_rate": 0.00022966524287275255, "loss": 0.5033, "step": 143540 }, { "epoch": 7.129730803615774, "grad_norm": 0.134765625, "learning_rate": 0.00022962550908910302, "loss": 0.4864, "step": 143550 }, { "epoch": 7.1302274759113935, "grad_norm": 0.169921875, "learning_rate": 0.00022958577530545347, "loss": 0.4603, "step": 143560 }, { "epoch": 7.130724148207013, "grad_norm": 0.15625, "learning_rate": 0.00022954604152180394, "loss": 0.4982, "step": 143570 }, { "epoch": 7.131220820502632, "grad_norm": 0.134765625, "learning_rate": 0.00022950630773815438, "loss": 0.4506, "step": 143580 }, { "epoch": 7.131717492798252, "grad_norm": 0.1455078125, "learning_rate": 0.00022946657395450483, "loss": 0.5013, "step": 143590 }, { "epoch": 7.132214165093871, "grad_norm": 0.146484375, "learning_rate": 0.0002294268401708553, "loss": 0.4878, "step": 143600 }, { "epoch": 7.1327108373894905, "grad_norm": 0.1416015625, "learning_rate": 0.00022938710638720572, "loss": 0.5103, "step": 143610 }, { "epoch": 7.13320750968511, "grad_norm": 0.18359375, "learning_rate": 0.00022934737260355616, "loss": 0.4955, "step": 143620 }, { "epoch": 7.133704181980729, "grad_norm": 0.171875, "learning_rate": 0.00022930763881990663, "loss": 0.4691, "step": 143630 }, { "epoch": 7.134200854276348, "grad_norm": 0.154296875, "learning_rate": 0.00022926790503625708, "loss": 0.4869, "step": 143640 }, { "epoch": 7.134697526571967, "grad_norm": 0.1396484375, "learning_rate": 0.00022922817125260755, "loss": 0.5061, "step": 143650 }, { "epoch": 7.1351941988675875, "grad_norm": 0.15625, "learning_rate": 0.000229188437468958, "loss": 0.4892, "step": 143660 }, { "epoch": 7.135690871163207, "grad_norm": 0.142578125, "learning_rate": 0.00022914870368530844, "loss": 0.4848, "step": 143670 }, { "epoch": 7.136187543458826, "grad_norm": 0.140625, "learning_rate": 0.0002291089699016589, "loss": 0.4725, "step": 143680 }, { "epoch": 7.136684215754445, "grad_norm": 0.1435546875, "learning_rate": 0.00022906923611800935, "loss": 0.4675, "step": 143690 }, { "epoch": 7.1371808880500645, "grad_norm": 0.138671875, "learning_rate": 0.00022902950233435977, "loss": 0.4967, "step": 143700 }, { "epoch": 7.137677560345684, "grad_norm": 0.16796875, "learning_rate": 0.00022898976855071027, "loss": 0.486, "step": 143710 }, { "epoch": 7.138174232641303, "grad_norm": 0.14453125, "learning_rate": 0.00022895003476706069, "loss": 0.5012, "step": 143720 }, { "epoch": 7.138670904936923, "grad_norm": 0.1435546875, "learning_rate": 0.00022891030098341118, "loss": 0.492, "step": 143730 }, { "epoch": 7.139167577232542, "grad_norm": 0.1494140625, "learning_rate": 0.0002288705671997616, "loss": 0.462, "step": 143740 }, { "epoch": 7.1396642495281615, "grad_norm": 0.2060546875, "learning_rate": 0.00022883083341611205, "loss": 0.4899, "step": 143750 }, { "epoch": 7.140160921823781, "grad_norm": 0.1318359375, "learning_rate": 0.00022879109963246252, "loss": 0.4951, "step": 143760 }, { "epoch": 7.1406575941194, "grad_norm": 0.1396484375, "learning_rate": 0.00022875136584881296, "loss": 0.4571, "step": 143770 }, { "epoch": 7.141154266415019, "grad_norm": 0.1318359375, "learning_rate": 0.0002287116320651634, "loss": 0.4952, "step": 143780 }, { "epoch": 7.141650938710638, "grad_norm": 0.126953125, "learning_rate": 0.00022867189828151388, "loss": 0.4873, "step": 143790 }, { "epoch": 7.1421476110062585, "grad_norm": 0.1474609375, "learning_rate": 0.00022863216449786432, "loss": 0.4599, "step": 143800 }, { "epoch": 7.142644283301878, "grad_norm": 0.16796875, "learning_rate": 0.0002285924307142148, "loss": 0.4905, "step": 143810 }, { "epoch": 7.143140955597497, "grad_norm": 0.1689453125, "learning_rate": 0.00022855269693056524, "loss": 0.4881, "step": 143820 }, { "epoch": 7.143637627893116, "grad_norm": 0.1591796875, "learning_rate": 0.00022851296314691565, "loss": 0.4771, "step": 143830 }, { "epoch": 7.144134300188735, "grad_norm": 0.150390625, "learning_rate": 0.00022847322936326615, "loss": 0.4965, "step": 143840 }, { "epoch": 7.144630972484355, "grad_norm": 0.1357421875, "learning_rate": 0.00022843349557961657, "loss": 0.5024, "step": 143850 }, { "epoch": 7.145127644779974, "grad_norm": 0.1474609375, "learning_rate": 0.00022839376179596704, "loss": 0.4866, "step": 143860 }, { "epoch": 7.145624317075594, "grad_norm": 0.140625, "learning_rate": 0.00022835402801231748, "loss": 0.5024, "step": 143870 }, { "epoch": 7.146120989371213, "grad_norm": 0.1455078125, "learning_rate": 0.00022831429422866793, "loss": 0.4544, "step": 143880 }, { "epoch": 7.146617661666832, "grad_norm": 0.1484375, "learning_rate": 0.0002282745604450184, "loss": 0.4847, "step": 143890 }, { "epoch": 7.147114333962452, "grad_norm": 0.1220703125, "learning_rate": 0.00022823482666136884, "loss": 0.4629, "step": 143900 }, { "epoch": 7.147611006258071, "grad_norm": 0.1572265625, "learning_rate": 0.00022819509287771926, "loss": 0.5042, "step": 143910 }, { "epoch": 7.14810767855369, "grad_norm": 0.140625, "learning_rate": 0.00022815535909406976, "loss": 0.4641, "step": 143920 }, { "epoch": 7.148604350849309, "grad_norm": 0.15234375, "learning_rate": 0.00022811562531042018, "loss": 0.4813, "step": 143930 }, { "epoch": 7.149101023144929, "grad_norm": 0.150390625, "learning_rate": 0.00022807589152677068, "loss": 0.4937, "step": 143940 }, { "epoch": 7.149597695440549, "grad_norm": 0.15234375, "learning_rate": 0.0002280361577431211, "loss": 0.4746, "step": 143950 }, { "epoch": 7.150094367736168, "grad_norm": 0.150390625, "learning_rate": 0.00022799642395947154, "loss": 0.4798, "step": 143960 }, { "epoch": 7.150591040031787, "grad_norm": 0.134765625, "learning_rate": 0.000227956690175822, "loss": 0.5005, "step": 143970 }, { "epoch": 7.151087712327406, "grad_norm": 0.12890625, "learning_rate": 0.00022791695639217245, "loss": 0.4803, "step": 143980 }, { "epoch": 7.151584384623026, "grad_norm": 0.130859375, "learning_rate": 0.0002278772226085229, "loss": 0.4651, "step": 143990 }, { "epoch": 7.152081056918645, "grad_norm": 0.140625, "learning_rate": 0.00022783748882487337, "loss": 0.4723, "step": 144000 }, { "epoch": 7.152577729214264, "grad_norm": 0.17578125, "learning_rate": 0.0002277977550412238, "loss": 0.4977, "step": 144010 }, { "epoch": 7.153074401509884, "grad_norm": 0.1494140625, "learning_rate": 0.00022775802125757428, "loss": 0.5051, "step": 144020 }, { "epoch": 7.153571073805503, "grad_norm": 0.12890625, "learning_rate": 0.00022771828747392473, "loss": 0.5129, "step": 144030 }, { "epoch": 7.154067746101123, "grad_norm": 0.1376953125, "learning_rate": 0.00022767855369027515, "loss": 0.4887, "step": 144040 }, { "epoch": 7.154564418396742, "grad_norm": 0.17578125, "learning_rate": 0.00022763881990662564, "loss": 0.5253, "step": 144050 }, { "epoch": 7.155061090692361, "grad_norm": 0.134765625, "learning_rate": 0.00022759908612297606, "loss": 0.4954, "step": 144060 }, { "epoch": 7.15555776298798, "grad_norm": 0.1513671875, "learning_rate": 0.0002275593523393265, "loss": 0.4925, "step": 144070 }, { "epoch": 7.1560544352835995, "grad_norm": 0.1298828125, "learning_rate": 0.00022751961855567698, "loss": 0.4706, "step": 144080 }, { "epoch": 7.15655110757922, "grad_norm": 0.1669921875, "learning_rate": 0.00022747988477202742, "loss": 0.4558, "step": 144090 }, { "epoch": 7.157047779874839, "grad_norm": 0.1474609375, "learning_rate": 0.0002274401509883779, "loss": 0.4933, "step": 144100 }, { "epoch": 7.157544452170458, "grad_norm": 0.1572265625, "learning_rate": 0.00022740041720472834, "loss": 0.484, "step": 144110 }, { "epoch": 7.158041124466077, "grad_norm": 0.1357421875, "learning_rate": 0.00022736068342107878, "loss": 0.5118, "step": 144120 }, { "epoch": 7.1585377967616965, "grad_norm": 0.1328125, "learning_rate": 0.00022732094963742925, "loss": 0.4969, "step": 144130 }, { "epoch": 7.159034469057316, "grad_norm": 0.16015625, "learning_rate": 0.0002272812158537797, "loss": 0.4972, "step": 144140 }, { "epoch": 7.159531141352935, "grad_norm": 0.1552734375, "learning_rate": 0.0002272414820701301, "loss": 0.5192, "step": 144150 }, { "epoch": 7.160027813648554, "grad_norm": 0.146484375, "learning_rate": 0.00022720174828648058, "loss": 0.4866, "step": 144160 }, { "epoch": 7.160524485944174, "grad_norm": 0.14453125, "learning_rate": 0.00022716201450283103, "loss": 0.4867, "step": 144170 }, { "epoch": 7.161021158239794, "grad_norm": 0.1328125, "learning_rate": 0.0002271222807191815, "loss": 0.5067, "step": 144180 }, { "epoch": 7.161517830535413, "grad_norm": 0.1474609375, "learning_rate": 0.00022708254693553194, "loss": 0.4791, "step": 144190 }, { "epoch": 7.162014502831032, "grad_norm": 0.1787109375, "learning_rate": 0.0002270428131518824, "loss": 0.4805, "step": 144200 }, { "epoch": 7.162511175126651, "grad_norm": 0.1630859375, "learning_rate": 0.00022700307936823286, "loss": 0.4814, "step": 144210 }, { "epoch": 7.1630078474222705, "grad_norm": 0.146484375, "learning_rate": 0.0002269633455845833, "loss": 0.4961, "step": 144220 }, { "epoch": 7.16350451971789, "grad_norm": 0.150390625, "learning_rate": 0.00022692361180093372, "loss": 0.4874, "step": 144230 }, { "epoch": 7.16400119201351, "grad_norm": 0.1591796875, "learning_rate": 0.00022688387801728422, "loss": 0.5099, "step": 144240 }, { "epoch": 7.164497864309129, "grad_norm": 0.154296875, "learning_rate": 0.00022684414423363464, "loss": 0.5031, "step": 144250 }, { "epoch": 7.164994536604748, "grad_norm": 0.14453125, "learning_rate": 0.00022680441044998514, "loss": 0.5086, "step": 144260 }, { "epoch": 7.1654912089003675, "grad_norm": 0.138671875, "learning_rate": 0.00022676467666633555, "loss": 0.4491, "step": 144270 }, { "epoch": 7.165987881195987, "grad_norm": 0.1923828125, "learning_rate": 0.000226724942882686, "loss": 0.4925, "step": 144280 }, { "epoch": 7.166484553491606, "grad_norm": 0.15625, "learning_rate": 0.00022668520909903647, "loss": 0.4784, "step": 144290 }, { "epoch": 7.166981225787225, "grad_norm": 0.1943359375, "learning_rate": 0.0002266454753153869, "loss": 0.5344, "step": 144300 }, { "epoch": 7.167477898082845, "grad_norm": 0.1435546875, "learning_rate": 0.00022660574153173738, "loss": 0.4703, "step": 144310 }, { "epoch": 7.1679745703784645, "grad_norm": 0.13671875, "learning_rate": 0.00022656600774808783, "loss": 0.502, "step": 144320 }, { "epoch": 7.168471242674084, "grad_norm": 0.169921875, "learning_rate": 0.00022652627396443827, "loss": 0.4975, "step": 144330 }, { "epoch": 7.168967914969703, "grad_norm": 0.2041015625, "learning_rate": 0.00022648654018078874, "loss": 0.4867, "step": 144340 }, { "epoch": 7.169464587265322, "grad_norm": 0.142578125, "learning_rate": 0.0002264468063971392, "loss": 0.4843, "step": 144350 }, { "epoch": 7.169961259560941, "grad_norm": 0.181640625, "learning_rate": 0.0002264070726134896, "loss": 0.4771, "step": 144360 }, { "epoch": 7.170457931856561, "grad_norm": 0.1494140625, "learning_rate": 0.0002263673388298401, "loss": 0.4978, "step": 144370 }, { "epoch": 7.170954604152181, "grad_norm": 0.146484375, "learning_rate": 0.00022632760504619052, "loss": 0.4906, "step": 144380 }, { "epoch": 7.1714512764478, "grad_norm": 0.1337890625, "learning_rate": 0.00022628787126254102, "loss": 0.4696, "step": 144390 }, { "epoch": 7.171947948743419, "grad_norm": 0.140625, "learning_rate": 0.00022624813747889144, "loss": 0.4589, "step": 144400 }, { "epoch": 7.1724446210390385, "grad_norm": 0.1484375, "learning_rate": 0.00022620840369524188, "loss": 0.4837, "step": 144410 }, { "epoch": 7.172941293334658, "grad_norm": 0.15234375, "learning_rate": 0.00022616866991159235, "loss": 0.4701, "step": 144420 }, { "epoch": 7.173437965630277, "grad_norm": 0.142578125, "learning_rate": 0.0002261289361279428, "loss": 0.4935, "step": 144430 }, { "epoch": 7.173934637925896, "grad_norm": 0.158203125, "learning_rate": 0.00022608920234429324, "loss": 0.5182, "step": 144440 }, { "epoch": 7.174431310221516, "grad_norm": 0.1474609375, "learning_rate": 0.0002260494685606437, "loss": 0.4686, "step": 144450 }, { "epoch": 7.1749279825171355, "grad_norm": 0.1630859375, "learning_rate": 0.00022600973477699413, "loss": 0.4775, "step": 144460 }, { "epoch": 7.175424654812755, "grad_norm": 0.14453125, "learning_rate": 0.00022597000099334463, "loss": 0.5098, "step": 144470 }, { "epoch": 7.175921327108374, "grad_norm": 0.146484375, "learning_rate": 0.00022593026720969504, "loss": 0.4768, "step": 144480 }, { "epoch": 7.176417999403993, "grad_norm": 0.1455078125, "learning_rate": 0.0002258905334260455, "loss": 0.4488, "step": 144490 }, { "epoch": 7.176914671699612, "grad_norm": 0.1669921875, "learning_rate": 0.00022585079964239596, "loss": 0.4852, "step": 144500 }, { "epoch": 7.177411343995232, "grad_norm": 0.1552734375, "learning_rate": 0.0002258110658587464, "loss": 0.4838, "step": 144510 }, { "epoch": 7.177908016290852, "grad_norm": 0.1357421875, "learning_rate": 0.00022577133207509685, "loss": 0.4947, "step": 144520 }, { "epoch": 7.178404688586471, "grad_norm": 0.130859375, "learning_rate": 0.00022573159829144732, "loss": 0.4788, "step": 144530 }, { "epoch": 7.17890136088209, "grad_norm": 0.1796875, "learning_rate": 0.00022569186450779776, "loss": 0.4921, "step": 144540 }, { "epoch": 7.179398033177709, "grad_norm": 0.1494140625, "learning_rate": 0.00022565213072414824, "loss": 0.473, "step": 144550 }, { "epoch": 7.179894705473329, "grad_norm": 0.1396484375, "learning_rate": 0.00022561239694049868, "loss": 0.4832, "step": 144560 }, { "epoch": 7.180391377768948, "grad_norm": 0.162109375, "learning_rate": 0.0002255726631568491, "loss": 0.4857, "step": 144570 }, { "epoch": 7.180888050064567, "grad_norm": 0.1611328125, "learning_rate": 0.0002255329293731996, "loss": 0.4534, "step": 144580 }, { "epoch": 7.181384722360186, "grad_norm": 0.1435546875, "learning_rate": 0.00022549319558955, "loss": 0.5032, "step": 144590 }, { "epoch": 7.181881394655806, "grad_norm": 0.130859375, "learning_rate": 0.00022545346180590046, "loss": 0.4603, "step": 144600 }, { "epoch": 7.182378066951426, "grad_norm": 0.1416015625, "learning_rate": 0.00022541372802225093, "loss": 0.4819, "step": 144610 }, { "epoch": 7.182874739247045, "grad_norm": 0.171875, "learning_rate": 0.00022537399423860137, "loss": 0.5002, "step": 144620 }, { "epoch": 7.183371411542664, "grad_norm": 0.1396484375, "learning_rate": 0.00022533426045495184, "loss": 0.4795, "step": 144630 }, { "epoch": 7.183868083838283, "grad_norm": 0.171875, "learning_rate": 0.0002252945266713023, "loss": 0.4916, "step": 144640 }, { "epoch": 7.184364756133903, "grad_norm": 0.1435546875, "learning_rate": 0.00022525479288765273, "loss": 0.4951, "step": 144650 }, { "epoch": 7.184861428429522, "grad_norm": 0.138671875, "learning_rate": 0.0002252150591040032, "loss": 0.5165, "step": 144660 }, { "epoch": 7.185358100725142, "grad_norm": 0.140625, "learning_rate": 0.00022517532532035365, "loss": 0.4848, "step": 144670 }, { "epoch": 7.185854773020761, "grad_norm": 0.1416015625, "learning_rate": 0.00022513559153670407, "loss": 0.4596, "step": 144680 }, { "epoch": 7.18635144531638, "grad_norm": 0.140625, "learning_rate": 0.00022509585775305456, "loss": 0.4846, "step": 144690 }, { "epoch": 7.186848117612, "grad_norm": 0.154296875, "learning_rate": 0.00022505612396940498, "loss": 0.4743, "step": 144700 }, { "epoch": 7.187344789907619, "grad_norm": 0.140625, "learning_rate": 0.00022501639018575545, "loss": 0.5068, "step": 144710 }, { "epoch": 7.187841462203238, "grad_norm": 0.142578125, "learning_rate": 0.0002249766564021059, "loss": 0.4759, "step": 144720 }, { "epoch": 7.188338134498857, "grad_norm": 0.1328125, "learning_rate": 0.00022493692261845634, "loss": 0.4783, "step": 144730 }, { "epoch": 7.188834806794477, "grad_norm": 0.12255859375, "learning_rate": 0.0002248971888348068, "loss": 0.4935, "step": 144740 }, { "epoch": 7.189331479090097, "grad_norm": 0.150390625, "learning_rate": 0.00022485745505115726, "loss": 0.4897, "step": 144750 }, { "epoch": 7.189828151385716, "grad_norm": 0.1337890625, "learning_rate": 0.00022481772126750773, "loss": 0.5137, "step": 144760 }, { "epoch": 7.190324823681335, "grad_norm": 0.1552734375, "learning_rate": 0.00022477798748385817, "loss": 0.4782, "step": 144770 }, { "epoch": 7.190821495976954, "grad_norm": 0.169921875, "learning_rate": 0.0002247382537002086, "loss": 0.4985, "step": 144780 }, { "epoch": 7.1913181682725735, "grad_norm": 0.1337890625, "learning_rate": 0.0002246985199165591, "loss": 0.4739, "step": 144790 }, { "epoch": 7.191814840568193, "grad_norm": 0.1669921875, "learning_rate": 0.0002246587861329095, "loss": 0.4992, "step": 144800 }, { "epoch": 7.192311512863813, "grad_norm": 0.14453125, "learning_rate": 0.00022461905234925995, "loss": 0.4707, "step": 144810 }, { "epoch": 7.192808185159432, "grad_norm": 0.1376953125, "learning_rate": 0.00022457931856561042, "loss": 0.4752, "step": 144820 }, { "epoch": 7.193304857455051, "grad_norm": 0.142578125, "learning_rate": 0.00022453958478196086, "loss": 0.4861, "step": 144830 }, { "epoch": 7.1938015297506706, "grad_norm": 0.1640625, "learning_rate": 0.00022449985099831134, "loss": 0.4754, "step": 144840 }, { "epoch": 7.19429820204629, "grad_norm": 0.1884765625, "learning_rate": 0.00022446011721466178, "loss": 0.483, "step": 144850 }, { "epoch": 7.194794874341909, "grad_norm": 0.150390625, "learning_rate": 0.00022442038343101222, "loss": 0.4937, "step": 144860 }, { "epoch": 7.195291546637528, "grad_norm": 0.17578125, "learning_rate": 0.0002243806496473627, "loss": 0.4744, "step": 144870 }, { "epoch": 7.1957882189331475, "grad_norm": 0.142578125, "learning_rate": 0.00022434091586371314, "loss": 0.502, "step": 144880 }, { "epoch": 7.196284891228768, "grad_norm": 0.14453125, "learning_rate": 0.00022430118208006356, "loss": 0.4647, "step": 144890 }, { "epoch": 7.196781563524387, "grad_norm": 0.1474609375, "learning_rate": 0.00022426144829641406, "loss": 0.5108, "step": 144900 }, { "epoch": 7.197278235820006, "grad_norm": 0.146484375, "learning_rate": 0.00022422171451276447, "loss": 0.5119, "step": 144910 }, { "epoch": 7.197774908115625, "grad_norm": 0.166015625, "learning_rate": 0.00022418198072911497, "loss": 0.4668, "step": 144920 }, { "epoch": 7.1982715804112445, "grad_norm": 0.1552734375, "learning_rate": 0.0002241422469454654, "loss": 0.513, "step": 144930 }, { "epoch": 7.198768252706864, "grad_norm": 0.154296875, "learning_rate": 0.00022410251316181583, "loss": 0.4853, "step": 144940 }, { "epoch": 7.199264925002483, "grad_norm": 0.1416015625, "learning_rate": 0.0002240627793781663, "loss": 0.4552, "step": 144950 }, { "epoch": 7.199761597298103, "grad_norm": 0.1572265625, "learning_rate": 0.00022402304559451675, "loss": 0.4684, "step": 144960 }, { "epoch": 7.200258269593722, "grad_norm": 0.1318359375, "learning_rate": 0.0002239833118108672, "loss": 0.4928, "step": 144970 }, { "epoch": 7.2007549418893415, "grad_norm": 0.16015625, "learning_rate": 0.00022394357802721766, "loss": 0.5074, "step": 144980 }, { "epoch": 7.201251614184961, "grad_norm": 0.1435546875, "learning_rate": 0.0002239038442435681, "loss": 0.4706, "step": 144990 }, { "epoch": 7.20174828648058, "grad_norm": 0.15625, "learning_rate": 0.00022386411045991858, "loss": 0.5032, "step": 145000 }, { "epoch": 7.202244958776199, "grad_norm": 0.146484375, "learning_rate": 0.000223824376676269, "loss": 0.4878, "step": 145010 }, { "epoch": 7.202741631071818, "grad_norm": 0.138671875, "learning_rate": 0.00022378464289261944, "loss": 0.4989, "step": 145020 }, { "epoch": 7.2032383033674385, "grad_norm": 0.1357421875, "learning_rate": 0.0002237449091089699, "loss": 0.4722, "step": 145030 }, { "epoch": 7.203734975663058, "grad_norm": 0.1474609375, "learning_rate": 0.00022370517532532036, "loss": 0.4766, "step": 145040 }, { "epoch": 7.204231647958677, "grad_norm": 0.1474609375, "learning_rate": 0.0002236654415416708, "loss": 0.5251, "step": 145050 }, { "epoch": 7.204728320254296, "grad_norm": 0.140625, "learning_rate": 0.00022362570775802127, "loss": 0.4952, "step": 145060 }, { "epoch": 7.205224992549915, "grad_norm": 0.189453125, "learning_rate": 0.00022358597397437172, "loss": 0.4873, "step": 145070 }, { "epoch": 7.205721664845535, "grad_norm": 0.1416015625, "learning_rate": 0.0002235462401907222, "loss": 0.4997, "step": 145080 }, { "epoch": 7.206218337141154, "grad_norm": 0.1796875, "learning_rate": 0.00022350650640707263, "loss": 0.4896, "step": 145090 }, { "epoch": 7.206715009436774, "grad_norm": 0.1455078125, "learning_rate": 0.00022346677262342305, "loss": 0.4994, "step": 145100 }, { "epoch": 7.207211681732393, "grad_norm": 0.1328125, "learning_rate": 0.00022342703883977355, "loss": 0.5009, "step": 145110 }, { "epoch": 7.2077083540280125, "grad_norm": 0.1474609375, "learning_rate": 0.00022338730505612396, "loss": 0.4934, "step": 145120 }, { "epoch": 7.208205026323632, "grad_norm": 0.1298828125, "learning_rate": 0.00022334757127247446, "loss": 0.4681, "step": 145130 }, { "epoch": 7.208701698619251, "grad_norm": 0.2001953125, "learning_rate": 0.00022330783748882488, "loss": 0.4923, "step": 145140 }, { "epoch": 7.20919837091487, "grad_norm": 0.189453125, "learning_rate": 0.00022326810370517532, "loss": 0.4874, "step": 145150 }, { "epoch": 7.209695043210489, "grad_norm": 0.142578125, "learning_rate": 0.0002232283699215258, "loss": 0.501, "step": 145160 }, { "epoch": 7.2101917155061095, "grad_norm": 0.1455078125, "learning_rate": 0.00022318863613787624, "loss": 0.4875, "step": 145170 }, { "epoch": 7.210688387801729, "grad_norm": 0.134765625, "learning_rate": 0.00022314890235422668, "loss": 0.4814, "step": 145180 }, { "epoch": 7.211185060097348, "grad_norm": 0.1591796875, "learning_rate": 0.00022310916857057716, "loss": 0.4958, "step": 145190 }, { "epoch": 7.211681732392967, "grad_norm": 0.142578125, "learning_rate": 0.0002230694347869276, "loss": 0.4566, "step": 145200 }, { "epoch": 7.212178404688586, "grad_norm": 0.154296875, "learning_rate": 0.00022302970100327807, "loss": 0.5105, "step": 145210 }, { "epoch": 7.212675076984206, "grad_norm": 0.1435546875, "learning_rate": 0.00022298996721962852, "loss": 0.4881, "step": 145220 }, { "epoch": 7.213171749279825, "grad_norm": 0.1494140625, "learning_rate": 0.00022295023343597893, "loss": 0.4852, "step": 145230 }, { "epoch": 7.213668421575445, "grad_norm": 0.1513671875, "learning_rate": 0.00022291049965232943, "loss": 0.4856, "step": 145240 }, { "epoch": 7.214165093871064, "grad_norm": 0.1357421875, "learning_rate": 0.00022287076586867985, "loss": 0.5042, "step": 145250 }, { "epoch": 7.214661766166683, "grad_norm": 0.16015625, "learning_rate": 0.0002228310320850303, "loss": 0.4929, "step": 145260 }, { "epoch": 7.215158438462303, "grad_norm": 0.1513671875, "learning_rate": 0.00022279129830138076, "loss": 0.4727, "step": 145270 }, { "epoch": 7.215655110757922, "grad_norm": 0.1875, "learning_rate": 0.0002227515645177312, "loss": 0.5022, "step": 145280 }, { "epoch": 7.216151783053541, "grad_norm": 0.1513671875, "learning_rate": 0.00022271183073408168, "loss": 0.4897, "step": 145290 }, { "epoch": 7.21664845534916, "grad_norm": 0.1455078125, "learning_rate": 0.00022267209695043212, "loss": 0.5057, "step": 145300 }, { "epoch": 7.21714512764478, "grad_norm": 0.150390625, "learning_rate": 0.00022263236316678254, "loss": 0.464, "step": 145310 }, { "epoch": 7.2176417999404, "grad_norm": 0.1357421875, "learning_rate": 0.00022259262938313304, "loss": 0.4611, "step": 145320 }, { "epoch": 7.218138472236019, "grad_norm": 0.1376953125, "learning_rate": 0.00022255289559948346, "loss": 0.4799, "step": 145330 }, { "epoch": 7.218635144531638, "grad_norm": 0.142578125, "learning_rate": 0.0002225131618158339, "loss": 0.4788, "step": 145340 }, { "epoch": 7.219131816827257, "grad_norm": 0.146484375, "learning_rate": 0.00022247342803218437, "loss": 0.4758, "step": 145350 }, { "epoch": 7.219628489122877, "grad_norm": 0.1650390625, "learning_rate": 0.00022243369424853482, "loss": 0.4607, "step": 145360 }, { "epoch": 7.220125161418496, "grad_norm": 0.1650390625, "learning_rate": 0.0002223939604648853, "loss": 0.512, "step": 145370 }, { "epoch": 7.220621833714115, "grad_norm": 0.1806640625, "learning_rate": 0.00022235422668123573, "loss": 0.4601, "step": 145380 }, { "epoch": 7.221118506009735, "grad_norm": 0.138671875, "learning_rate": 0.00022231449289758618, "loss": 0.4845, "step": 145390 }, { "epoch": 7.221615178305354, "grad_norm": 0.150390625, "learning_rate": 0.00022227475911393665, "loss": 0.4872, "step": 145400 }, { "epoch": 7.222111850600974, "grad_norm": 0.1640625, "learning_rate": 0.0002222350253302871, "loss": 0.4676, "step": 145410 }, { "epoch": 7.222608522896593, "grad_norm": 0.1953125, "learning_rate": 0.0002221952915466375, "loss": 0.4826, "step": 145420 }, { "epoch": 7.223105195192212, "grad_norm": 0.1572265625, "learning_rate": 0.000222155557762988, "loss": 0.5007, "step": 145430 }, { "epoch": 7.223601867487831, "grad_norm": 0.1376953125, "learning_rate": 0.00022211582397933842, "loss": 0.4959, "step": 145440 }, { "epoch": 7.2240985397834505, "grad_norm": 0.158203125, "learning_rate": 0.00022207609019568892, "loss": 0.5069, "step": 145450 }, { "epoch": 7.224595212079071, "grad_norm": 0.166015625, "learning_rate": 0.00022203635641203934, "loss": 0.5187, "step": 145460 }, { "epoch": 7.22509188437469, "grad_norm": 0.1376953125, "learning_rate": 0.00022199662262838978, "loss": 0.4887, "step": 145470 }, { "epoch": 7.225588556670309, "grad_norm": 0.1513671875, "learning_rate": 0.00022195688884474026, "loss": 0.4891, "step": 145480 }, { "epoch": 7.226085228965928, "grad_norm": 0.15234375, "learning_rate": 0.0002219171550610907, "loss": 0.5209, "step": 145490 }, { "epoch": 7.2265819012615475, "grad_norm": 0.154296875, "learning_rate": 0.00022187742127744114, "loss": 0.4798, "step": 145500 }, { "epoch": 7.227078573557167, "grad_norm": 0.1279296875, "learning_rate": 0.00022183768749379162, "loss": 0.529, "step": 145510 }, { "epoch": 7.227575245852786, "grad_norm": 0.140625, "learning_rate": 0.00022179795371014206, "loss": 0.4911, "step": 145520 }, { "epoch": 7.228071918148406, "grad_norm": 0.13671875, "learning_rate": 0.00022175821992649253, "loss": 0.499, "step": 145530 }, { "epoch": 7.228568590444025, "grad_norm": 0.1708984375, "learning_rate": 0.00022171848614284298, "loss": 0.49, "step": 145540 }, { "epoch": 7.229065262739645, "grad_norm": 0.134765625, "learning_rate": 0.0002216787523591934, "loss": 0.5261, "step": 145550 }, { "epoch": 7.229561935035264, "grad_norm": 0.14453125, "learning_rate": 0.00022163901857554386, "loss": 0.4873, "step": 145560 }, { "epoch": 7.230058607330883, "grad_norm": 0.220703125, "learning_rate": 0.0002215992847918943, "loss": 0.494, "step": 145570 }, { "epoch": 7.230555279626502, "grad_norm": 0.1728515625, "learning_rate": 0.00022155955100824478, "loss": 0.4886, "step": 145580 }, { "epoch": 7.2310519519221215, "grad_norm": 0.1416015625, "learning_rate": 0.00022151981722459522, "loss": 0.4963, "step": 145590 }, { "epoch": 7.231548624217741, "grad_norm": 0.1328125, "learning_rate": 0.00022148008344094567, "loss": 0.4728, "step": 145600 }, { "epoch": 7.232045296513361, "grad_norm": 0.1767578125, "learning_rate": 0.00022144034965729614, "loss": 0.4815, "step": 145610 }, { "epoch": 7.23254196880898, "grad_norm": 0.173828125, "learning_rate": 0.00022140061587364658, "loss": 0.5064, "step": 145620 }, { "epoch": 7.233038641104599, "grad_norm": 0.1435546875, "learning_rate": 0.000221360882089997, "loss": 0.4569, "step": 145630 }, { "epoch": 7.2335353134002185, "grad_norm": 0.1630859375, "learning_rate": 0.0002213211483063475, "loss": 0.4598, "step": 145640 }, { "epoch": 7.234031985695838, "grad_norm": 0.1533203125, "learning_rate": 0.00022128141452269792, "loss": 0.4865, "step": 145650 }, { "epoch": 7.234528657991457, "grad_norm": 0.138671875, "learning_rate": 0.00022124168073904841, "loss": 0.5023, "step": 145660 }, { "epoch": 7.235025330287076, "grad_norm": 0.1552734375, "learning_rate": 0.00022120194695539883, "loss": 0.4864, "step": 145670 }, { "epoch": 7.235522002582696, "grad_norm": 0.1591796875, "learning_rate": 0.00022116221317174928, "loss": 0.5131, "step": 145680 }, { "epoch": 7.2360186748783155, "grad_norm": 0.1533203125, "learning_rate": 0.00022112247938809975, "loss": 0.4828, "step": 145690 }, { "epoch": 7.236515347173935, "grad_norm": 0.1455078125, "learning_rate": 0.0002210827456044502, "loss": 0.4809, "step": 145700 }, { "epoch": 7.237012019469554, "grad_norm": 0.1396484375, "learning_rate": 0.00022104301182080064, "loss": 0.4749, "step": 145710 }, { "epoch": 7.237508691765173, "grad_norm": 0.1416015625, "learning_rate": 0.0002210032780371511, "loss": 0.5027, "step": 145720 }, { "epoch": 7.238005364060792, "grad_norm": 0.154296875, "learning_rate": 0.00022096354425350155, "loss": 0.521, "step": 145730 }, { "epoch": 7.238502036356412, "grad_norm": 0.1650390625, "learning_rate": 0.00022092381046985202, "loss": 0.4808, "step": 145740 }, { "epoch": 7.238998708652032, "grad_norm": 0.15625, "learning_rate": 0.00022088407668620247, "loss": 0.5053, "step": 145750 }, { "epoch": 7.239495380947651, "grad_norm": 0.1337890625, "learning_rate": 0.00022084434290255288, "loss": 0.4967, "step": 145760 }, { "epoch": 7.23999205324327, "grad_norm": 0.134765625, "learning_rate": 0.00022080460911890338, "loss": 0.4675, "step": 145770 }, { "epoch": 7.2404887255388894, "grad_norm": 0.1376953125, "learning_rate": 0.0002207648753352538, "loss": 0.5069, "step": 145780 }, { "epoch": 7.240985397834509, "grad_norm": 0.1357421875, "learning_rate": 0.00022072514155160424, "loss": 0.4736, "step": 145790 }, { "epoch": 7.241482070130128, "grad_norm": 0.1357421875, "learning_rate": 0.00022068540776795472, "loss": 0.4937, "step": 145800 }, { "epoch": 7.241978742425747, "grad_norm": 0.1328125, "learning_rate": 0.00022064567398430516, "loss": 0.4887, "step": 145810 }, { "epoch": 7.242475414721367, "grad_norm": 0.1513671875, "learning_rate": 0.00022060594020065563, "loss": 0.5057, "step": 145820 }, { "epoch": 7.2429720870169865, "grad_norm": 0.1416015625, "learning_rate": 0.00022056620641700608, "loss": 0.4826, "step": 145830 }, { "epoch": 7.243468759312606, "grad_norm": 0.1494140625, "learning_rate": 0.00022052647263335652, "loss": 0.4793, "step": 145840 }, { "epoch": 7.243965431608225, "grad_norm": 0.150390625, "learning_rate": 0.000220486738849707, "loss": 0.4725, "step": 145850 }, { "epoch": 7.244462103903844, "grad_norm": 0.1669921875, "learning_rate": 0.0002204470050660574, "loss": 0.5033, "step": 145860 }, { "epoch": 7.244958776199463, "grad_norm": 0.1337890625, "learning_rate": 0.00022040727128240785, "loss": 0.4814, "step": 145870 }, { "epoch": 7.245455448495083, "grad_norm": 0.1455078125, "learning_rate": 0.00022036753749875832, "loss": 0.4836, "step": 145880 }, { "epoch": 7.245952120790703, "grad_norm": 0.1298828125, "learning_rate": 0.00022032780371510877, "loss": 0.4845, "step": 145890 }, { "epoch": 7.246448793086322, "grad_norm": 0.142578125, "learning_rate": 0.00022028806993145924, "loss": 0.4973, "step": 145900 }, { "epoch": 7.246945465381941, "grad_norm": 0.13671875, "learning_rate": 0.00022024833614780968, "loss": 0.4808, "step": 145910 }, { "epoch": 7.24744213767756, "grad_norm": 0.1513671875, "learning_rate": 0.00022020860236416013, "loss": 0.494, "step": 145920 }, { "epoch": 7.24793880997318, "grad_norm": 0.1474609375, "learning_rate": 0.0002201688685805106, "loss": 0.4976, "step": 145930 }, { "epoch": 7.248435482268799, "grad_norm": 0.146484375, "learning_rate": 0.00022012913479686104, "loss": 0.4904, "step": 145940 }, { "epoch": 7.248932154564418, "grad_norm": 0.1611328125, "learning_rate": 0.00022008940101321146, "loss": 0.4855, "step": 145950 }, { "epoch": 7.249428826860038, "grad_norm": 0.1376953125, "learning_rate": 0.00022004966722956196, "loss": 0.4814, "step": 145960 }, { "epoch": 7.249925499155657, "grad_norm": 0.142578125, "learning_rate": 0.00022000993344591238, "loss": 0.4844, "step": 145970 }, { "epoch": 7.250422171451277, "grad_norm": 0.1630859375, "learning_rate": 0.00021997019966226287, "loss": 0.4592, "step": 145980 }, { "epoch": 7.250918843746896, "grad_norm": 0.1376953125, "learning_rate": 0.0002199304658786133, "loss": 0.4822, "step": 145990 }, { "epoch": 7.251415516042515, "grad_norm": 0.1357421875, "learning_rate": 0.00021989073209496374, "loss": 0.5044, "step": 146000 }, { "epoch": 7.251912188338134, "grad_norm": 0.12890625, "learning_rate": 0.0002198509983113142, "loss": 0.4562, "step": 146010 }, { "epoch": 7.252408860633754, "grad_norm": 0.1484375, "learning_rate": 0.00021981126452766465, "loss": 0.4825, "step": 146020 }, { "epoch": 7.252905532929373, "grad_norm": 0.146484375, "learning_rate": 0.00021977153074401512, "loss": 0.4722, "step": 146030 }, { "epoch": 7.253402205224993, "grad_norm": 0.140625, "learning_rate": 0.00021973179696036557, "loss": 0.5035, "step": 146040 }, { "epoch": 7.253898877520612, "grad_norm": 0.1279296875, "learning_rate": 0.000219692063176716, "loss": 0.4719, "step": 146050 }, { "epoch": 7.254395549816231, "grad_norm": 0.15625, "learning_rate": 0.00021965232939306648, "loss": 0.471, "step": 146060 }, { "epoch": 7.254892222111851, "grad_norm": 0.16015625, "learning_rate": 0.00021961259560941693, "loss": 0.4632, "step": 146070 }, { "epoch": 7.25538889440747, "grad_norm": 0.1826171875, "learning_rate": 0.00021957286182576734, "loss": 0.4776, "step": 146080 }, { "epoch": 7.255885566703089, "grad_norm": 0.158203125, "learning_rate": 0.00021953312804211784, "loss": 0.4698, "step": 146090 }, { "epoch": 7.256382238998708, "grad_norm": 0.1298828125, "learning_rate": 0.00021949339425846826, "loss": 0.4762, "step": 146100 }, { "epoch": 7.256878911294328, "grad_norm": 0.1455078125, "learning_rate": 0.00021945366047481876, "loss": 0.4938, "step": 146110 }, { "epoch": 7.257375583589948, "grad_norm": 0.142578125, "learning_rate": 0.00021941392669116918, "loss": 0.4769, "step": 146120 }, { "epoch": 7.257872255885567, "grad_norm": 0.1708984375, "learning_rate": 0.00021937419290751962, "loss": 0.5028, "step": 146130 }, { "epoch": 7.258368928181186, "grad_norm": 0.1474609375, "learning_rate": 0.0002193344591238701, "loss": 0.5018, "step": 146140 }, { "epoch": 7.258865600476805, "grad_norm": 0.1533203125, "learning_rate": 0.00021929472534022054, "loss": 0.4779, "step": 146150 }, { "epoch": 7.2593622727724245, "grad_norm": 0.142578125, "learning_rate": 0.00021925499155657095, "loss": 0.4888, "step": 146160 }, { "epoch": 7.259858945068044, "grad_norm": 0.1455078125, "learning_rate": 0.00021921525777292145, "loss": 0.5051, "step": 146170 }, { "epoch": 7.260355617363664, "grad_norm": 0.1416015625, "learning_rate": 0.00021917552398927187, "loss": 0.5003, "step": 146180 }, { "epoch": 7.260852289659283, "grad_norm": 0.134765625, "learning_rate": 0.00021913579020562237, "loss": 0.48, "step": 146190 }, { "epoch": 7.261348961954902, "grad_norm": 0.16796875, "learning_rate": 0.00021909605642197278, "loss": 0.493, "step": 146200 }, { "epoch": 7.2618456342505215, "grad_norm": 0.1396484375, "learning_rate": 0.00021905632263832323, "loss": 0.4842, "step": 146210 }, { "epoch": 7.262342306546141, "grad_norm": 0.1767578125, "learning_rate": 0.0002190165888546737, "loss": 0.493, "step": 146220 }, { "epoch": 7.26283897884176, "grad_norm": 0.12353515625, "learning_rate": 0.00021897685507102414, "loss": 0.4798, "step": 146230 }, { "epoch": 7.263335651137379, "grad_norm": 0.140625, "learning_rate": 0.0002189371212873746, "loss": 0.4905, "step": 146240 }, { "epoch": 7.2638323234329985, "grad_norm": 0.1513671875, "learning_rate": 0.00021889738750372506, "loss": 0.4747, "step": 146250 }, { "epoch": 7.264328995728619, "grad_norm": 0.1455078125, "learning_rate": 0.0002188576537200755, "loss": 0.4507, "step": 146260 }, { "epoch": 7.264825668024238, "grad_norm": 0.15234375, "learning_rate": 0.00021881791993642598, "loss": 0.4637, "step": 146270 }, { "epoch": 7.265322340319857, "grad_norm": 0.140625, "learning_rate": 0.00021877818615277642, "loss": 0.4874, "step": 146280 }, { "epoch": 7.265819012615476, "grad_norm": 0.1533203125, "learning_rate": 0.00021873845236912684, "loss": 0.5362, "step": 146290 }, { "epoch": 7.2663156849110955, "grad_norm": 0.138671875, "learning_rate": 0.00021869871858547733, "loss": 0.477, "step": 146300 }, { "epoch": 7.266812357206715, "grad_norm": 0.181640625, "learning_rate": 0.00021865898480182775, "loss": 0.4905, "step": 146310 }, { "epoch": 7.267309029502334, "grad_norm": 0.1689453125, "learning_rate": 0.0002186192510181782, "loss": 0.4728, "step": 146320 }, { "epoch": 7.267805701797954, "grad_norm": 0.13671875, "learning_rate": 0.00021857951723452867, "loss": 0.4751, "step": 146330 }, { "epoch": 7.268302374093573, "grad_norm": 0.181640625, "learning_rate": 0.0002185397834508791, "loss": 0.5212, "step": 146340 }, { "epoch": 7.2687990463891925, "grad_norm": 0.146484375, "learning_rate": 0.00021850004966722958, "loss": 0.4893, "step": 146350 }, { "epoch": 7.269295718684812, "grad_norm": 0.1376953125, "learning_rate": 0.00021846031588358003, "loss": 0.4892, "step": 146360 }, { "epoch": 7.269792390980431, "grad_norm": 0.12890625, "learning_rate": 0.00021842058209993047, "loss": 0.4533, "step": 146370 }, { "epoch": 7.27028906327605, "grad_norm": 0.1513671875, "learning_rate": 0.00021838084831628094, "loss": 0.4917, "step": 146380 }, { "epoch": 7.270785735571669, "grad_norm": 0.150390625, "learning_rate": 0.0002183411145326314, "loss": 0.4851, "step": 146390 }, { "epoch": 7.2712824078672895, "grad_norm": 0.21875, "learning_rate": 0.00021830138074898186, "loss": 0.5024, "step": 146400 }, { "epoch": 7.271779080162909, "grad_norm": 0.185546875, "learning_rate": 0.00021826164696533228, "loss": 0.5082, "step": 146410 }, { "epoch": 7.272275752458528, "grad_norm": 0.2041015625, "learning_rate": 0.00021822191318168272, "loss": 0.465, "step": 146420 }, { "epoch": 7.272772424754147, "grad_norm": 0.140625, "learning_rate": 0.0002181821793980332, "loss": 0.496, "step": 146430 }, { "epoch": 7.273269097049766, "grad_norm": 0.1318359375, "learning_rate": 0.00021814244561438364, "loss": 0.4859, "step": 146440 }, { "epoch": 7.273765769345386, "grad_norm": 0.1396484375, "learning_rate": 0.00021810271183073408, "loss": 0.502, "step": 146450 }, { "epoch": 7.274262441641005, "grad_norm": 0.142578125, "learning_rate": 0.00021806297804708455, "loss": 0.5207, "step": 146460 }, { "epoch": 7.274759113936625, "grad_norm": 0.15625, "learning_rate": 0.000218023244263435, "loss": 0.4643, "step": 146470 }, { "epoch": 7.275255786232244, "grad_norm": 0.134765625, "learning_rate": 0.00021798351047978547, "loss": 0.4876, "step": 146480 }, { "epoch": 7.2757524585278635, "grad_norm": 0.15234375, "learning_rate": 0.0002179437766961359, "loss": 0.4683, "step": 146490 }, { "epoch": 7.276249130823483, "grad_norm": 0.166015625, "learning_rate": 0.00021790404291248633, "loss": 0.4881, "step": 146500 }, { "epoch": 7.276745803119102, "grad_norm": 0.1455078125, "learning_rate": 0.00021786430912883683, "loss": 0.4642, "step": 146510 }, { "epoch": 7.277242475414721, "grad_norm": 0.1455078125, "learning_rate": 0.00021782457534518724, "loss": 0.4739, "step": 146520 }, { "epoch": 7.27773914771034, "grad_norm": 0.1396484375, "learning_rate": 0.0002177848415615377, "loss": 0.467, "step": 146530 }, { "epoch": 7.2782358200059605, "grad_norm": 0.1826171875, "learning_rate": 0.00021774510777788816, "loss": 0.5216, "step": 146540 }, { "epoch": 7.27873249230158, "grad_norm": 0.1455078125, "learning_rate": 0.0002177053739942386, "loss": 0.5084, "step": 146550 }, { "epoch": 7.279229164597199, "grad_norm": 0.1630859375, "learning_rate": 0.00021766564021058908, "loss": 0.5143, "step": 146560 }, { "epoch": 7.279725836892818, "grad_norm": 0.1513671875, "learning_rate": 0.00021762590642693952, "loss": 0.519, "step": 146570 }, { "epoch": 7.280222509188437, "grad_norm": 0.1435546875, "learning_rate": 0.00021758617264328996, "loss": 0.4847, "step": 146580 }, { "epoch": 7.280719181484057, "grad_norm": 0.14453125, "learning_rate": 0.00021754643885964043, "loss": 0.4794, "step": 146590 }, { "epoch": 7.281215853779676, "grad_norm": 0.1328125, "learning_rate": 0.00021750670507599088, "loss": 0.4951, "step": 146600 }, { "epoch": 7.281712526075296, "grad_norm": 0.1416015625, "learning_rate": 0.0002174669712923413, "loss": 0.5, "step": 146610 }, { "epoch": 7.282209198370915, "grad_norm": 0.1513671875, "learning_rate": 0.0002174272375086918, "loss": 0.4903, "step": 146620 }, { "epoch": 7.282705870666534, "grad_norm": 0.1513671875, "learning_rate": 0.0002173875037250422, "loss": 0.4716, "step": 146630 }, { "epoch": 7.283202542962154, "grad_norm": 0.1337890625, "learning_rate": 0.0002173477699413927, "loss": 0.4708, "step": 146640 }, { "epoch": 7.283699215257773, "grad_norm": 0.140625, "learning_rate": 0.00021730803615774313, "loss": 0.5213, "step": 146650 }, { "epoch": 7.284195887553392, "grad_norm": 0.1630859375, "learning_rate": 0.00021726830237409357, "loss": 0.5167, "step": 146660 }, { "epoch": 7.284692559849011, "grad_norm": 0.140625, "learning_rate": 0.00021722856859044404, "loss": 0.4967, "step": 146670 }, { "epoch": 7.285189232144631, "grad_norm": 0.1767578125, "learning_rate": 0.0002171888348067945, "loss": 0.5061, "step": 146680 }, { "epoch": 7.285685904440251, "grad_norm": 0.142578125, "learning_rate": 0.00021714910102314493, "loss": 0.4926, "step": 146690 }, { "epoch": 7.28618257673587, "grad_norm": 0.1376953125, "learning_rate": 0.0002171093672394954, "loss": 0.4887, "step": 146700 }, { "epoch": 7.286679249031489, "grad_norm": 0.1572265625, "learning_rate": 0.00021706963345584582, "loss": 0.5073, "step": 146710 }, { "epoch": 7.287175921327108, "grad_norm": 0.16015625, "learning_rate": 0.00021702989967219632, "loss": 0.4765, "step": 146720 }, { "epoch": 7.287672593622728, "grad_norm": 0.15234375, "learning_rate": 0.00021699016588854674, "loss": 0.4991, "step": 146730 }, { "epoch": 7.288169265918347, "grad_norm": 0.140625, "learning_rate": 0.00021695043210489718, "loss": 0.4757, "step": 146740 }, { "epoch": 7.288665938213966, "grad_norm": 0.15625, "learning_rate": 0.00021691069832124765, "loss": 0.4766, "step": 146750 }, { "epoch": 7.289162610509586, "grad_norm": 0.146484375, "learning_rate": 0.0002168709645375981, "loss": 0.5002, "step": 146760 }, { "epoch": 7.289659282805205, "grad_norm": 0.1357421875, "learning_rate": 0.00021683123075394854, "loss": 0.4682, "step": 146770 }, { "epoch": 7.290155955100825, "grad_norm": 0.1513671875, "learning_rate": 0.000216791496970299, "loss": 0.4651, "step": 146780 }, { "epoch": 7.290652627396444, "grad_norm": 0.1376953125, "learning_rate": 0.00021675176318664946, "loss": 0.4819, "step": 146790 }, { "epoch": 7.291149299692063, "grad_norm": 0.1357421875, "learning_rate": 0.00021671202940299993, "loss": 0.4793, "step": 146800 }, { "epoch": 7.291645971987682, "grad_norm": 0.185546875, "learning_rate": 0.00021667229561935037, "loss": 0.4844, "step": 146810 }, { "epoch": 7.2921426442833015, "grad_norm": 0.154296875, "learning_rate": 0.0002166325618357008, "loss": 0.4949, "step": 146820 }, { "epoch": 7.292639316578922, "grad_norm": 0.1484375, "learning_rate": 0.0002165928280520513, "loss": 0.4655, "step": 146830 }, { "epoch": 7.293135988874541, "grad_norm": 0.1572265625, "learning_rate": 0.0002165530942684017, "loss": 0.4366, "step": 146840 }, { "epoch": 7.29363266117016, "grad_norm": 0.134765625, "learning_rate": 0.0002165133604847522, "loss": 0.4994, "step": 146850 }, { "epoch": 7.294129333465779, "grad_norm": 0.14453125, "learning_rate": 0.00021647362670110262, "loss": 0.4936, "step": 146860 }, { "epoch": 7.2946260057613985, "grad_norm": 0.14453125, "learning_rate": 0.00021643389291745306, "loss": 0.4645, "step": 146870 }, { "epoch": 7.295122678057018, "grad_norm": 0.1630859375, "learning_rate": 0.00021639415913380354, "loss": 0.4965, "step": 146880 }, { "epoch": 7.295619350352637, "grad_norm": 0.1455078125, "learning_rate": 0.00021635442535015398, "loss": 0.5206, "step": 146890 }, { "epoch": 7.296116022648257, "grad_norm": 0.1416015625, "learning_rate": 0.00021631469156650442, "loss": 0.4899, "step": 146900 }, { "epoch": 7.296612694943876, "grad_norm": 0.1572265625, "learning_rate": 0.0002162749577828549, "loss": 0.4816, "step": 146910 }, { "epoch": 7.2971093672394955, "grad_norm": 0.142578125, "learning_rate": 0.00021623522399920534, "loss": 0.4872, "step": 146920 }, { "epoch": 7.297606039535115, "grad_norm": 0.1435546875, "learning_rate": 0.0002161954902155558, "loss": 0.5007, "step": 146930 }, { "epoch": 7.298102711830734, "grad_norm": 0.15625, "learning_rate": 0.00021615575643190625, "loss": 0.4545, "step": 146940 }, { "epoch": 7.298599384126353, "grad_norm": 0.1572265625, "learning_rate": 0.00021611602264825667, "loss": 0.484, "step": 146950 }, { "epoch": 7.2990960564219725, "grad_norm": 0.1474609375, "learning_rate": 0.00021607628886460717, "loss": 0.4755, "step": 146960 }, { "epoch": 7.299592728717592, "grad_norm": 0.169921875, "learning_rate": 0.0002160365550809576, "loss": 0.4786, "step": 146970 }, { "epoch": 7.300089401013212, "grad_norm": 0.1533203125, "learning_rate": 0.00021599682129730803, "loss": 0.4916, "step": 146980 }, { "epoch": 7.300586073308831, "grad_norm": 0.158203125, "learning_rate": 0.0002159570875136585, "loss": 0.4628, "step": 146990 }, { "epoch": 7.30108274560445, "grad_norm": 0.1474609375, "learning_rate": 0.00021591735373000895, "loss": 0.4911, "step": 147000 }, { "epoch": 7.3015794179000695, "grad_norm": 0.134765625, "learning_rate": 0.00021587761994635942, "loss": 0.4863, "step": 147010 }, { "epoch": 7.302076090195689, "grad_norm": 0.138671875, "learning_rate": 0.00021583788616270986, "loss": 0.4797, "step": 147020 }, { "epoch": 7.302572762491308, "grad_norm": 0.1484375, "learning_rate": 0.00021579815237906028, "loss": 0.4951, "step": 147030 }, { "epoch": 7.303069434786927, "grad_norm": 0.1630859375, "learning_rate": 0.00021575841859541078, "loss": 0.5233, "step": 147040 }, { "epoch": 7.303566107082547, "grad_norm": 0.134765625, "learning_rate": 0.0002157186848117612, "loss": 0.4678, "step": 147050 }, { "epoch": 7.3040627793781665, "grad_norm": 0.14453125, "learning_rate": 0.00021567895102811164, "loss": 0.4642, "step": 147060 }, { "epoch": 7.304559451673786, "grad_norm": 0.1708984375, "learning_rate": 0.0002156392172444621, "loss": 0.5134, "step": 147070 }, { "epoch": 7.305056123969405, "grad_norm": 0.15234375, "learning_rate": 0.00021559948346081256, "loss": 0.5011, "step": 147080 }, { "epoch": 7.305552796265024, "grad_norm": 0.142578125, "learning_rate": 0.00021555974967716303, "loss": 0.5014, "step": 147090 }, { "epoch": 7.306049468560643, "grad_norm": 0.1689453125, "learning_rate": 0.00021552001589351347, "loss": 0.4747, "step": 147100 }, { "epoch": 7.306546140856263, "grad_norm": 0.140625, "learning_rate": 0.00021548028210986392, "loss": 0.4914, "step": 147110 }, { "epoch": 7.307042813151883, "grad_norm": 0.1806640625, "learning_rate": 0.0002154405483262144, "loss": 0.4917, "step": 147120 }, { "epoch": 7.307539485447502, "grad_norm": 0.181640625, "learning_rate": 0.00021540081454256483, "loss": 0.4962, "step": 147130 }, { "epoch": 7.308036157743121, "grad_norm": 0.1337890625, "learning_rate": 0.00021536108075891525, "loss": 0.5183, "step": 147140 }, { "epoch": 7.30853283003874, "grad_norm": 0.1474609375, "learning_rate": 0.00021532134697526575, "loss": 0.4937, "step": 147150 }, { "epoch": 7.30902950233436, "grad_norm": 0.1494140625, "learning_rate": 0.00021528161319161616, "loss": 0.5082, "step": 147160 }, { "epoch": 7.309526174629979, "grad_norm": 0.150390625, "learning_rate": 0.00021524187940796666, "loss": 0.4975, "step": 147170 }, { "epoch": 7.310022846925598, "grad_norm": 0.1455078125, "learning_rate": 0.00021520214562431708, "loss": 0.4993, "step": 147180 }, { "epoch": 7.310519519221218, "grad_norm": 0.140625, "learning_rate": 0.00021516241184066752, "loss": 0.4736, "step": 147190 }, { "epoch": 7.3110161915168375, "grad_norm": 0.134765625, "learning_rate": 0.000215122678057018, "loss": 0.4645, "step": 147200 }, { "epoch": 7.311512863812457, "grad_norm": 0.162109375, "learning_rate": 0.00021508294427336844, "loss": 0.4641, "step": 147210 }, { "epoch": 7.312009536108076, "grad_norm": 0.1552734375, "learning_rate": 0.0002150432104897189, "loss": 0.4738, "step": 147220 }, { "epoch": 7.312506208403695, "grad_norm": 0.181640625, "learning_rate": 0.00021500347670606935, "loss": 0.5394, "step": 147230 }, { "epoch": 7.313002880699314, "grad_norm": 0.1533203125, "learning_rate": 0.0002149637429224198, "loss": 0.4848, "step": 147240 }, { "epoch": 7.313499552994934, "grad_norm": 0.1591796875, "learning_rate": 0.00021492400913877027, "loss": 0.4947, "step": 147250 }, { "epoch": 7.313996225290554, "grad_norm": 0.1630859375, "learning_rate": 0.0002148842753551207, "loss": 0.4941, "step": 147260 }, { "epoch": 7.314492897586173, "grad_norm": 0.1328125, "learning_rate": 0.00021484454157147113, "loss": 0.484, "step": 147270 }, { "epoch": 7.314989569881792, "grad_norm": 0.1474609375, "learning_rate": 0.0002148048077878216, "loss": 0.496, "step": 147280 }, { "epoch": 7.315486242177411, "grad_norm": 0.134765625, "learning_rate": 0.00021476507400417205, "loss": 0.4739, "step": 147290 }, { "epoch": 7.315982914473031, "grad_norm": 0.1337890625, "learning_rate": 0.00021472534022052252, "loss": 0.5054, "step": 147300 }, { "epoch": 7.31647958676865, "grad_norm": 0.1416015625, "learning_rate": 0.00021468560643687296, "loss": 0.4693, "step": 147310 }, { "epoch": 7.316976259064269, "grad_norm": 0.1376953125, "learning_rate": 0.0002146458726532234, "loss": 0.4808, "step": 147320 }, { "epoch": 7.317472931359889, "grad_norm": 0.15234375, "learning_rate": 0.00021460613886957388, "loss": 0.488, "step": 147330 }, { "epoch": 7.317969603655508, "grad_norm": 0.1455078125, "learning_rate": 0.00021456640508592432, "loss": 0.5135, "step": 147340 }, { "epoch": 7.318466275951128, "grad_norm": 0.1826171875, "learning_rate": 0.00021452667130227474, "loss": 0.4665, "step": 147350 }, { "epoch": 7.318962948246747, "grad_norm": 0.1572265625, "learning_rate": 0.00021448693751862524, "loss": 0.4793, "step": 147360 }, { "epoch": 7.319459620542366, "grad_norm": 0.1396484375, "learning_rate": 0.00021444720373497566, "loss": 0.5066, "step": 147370 }, { "epoch": 7.319956292837985, "grad_norm": 0.171875, "learning_rate": 0.00021440746995132615, "loss": 0.4914, "step": 147380 }, { "epoch": 7.3204529651336046, "grad_norm": 0.140625, "learning_rate": 0.00021436773616767657, "loss": 0.487, "step": 147390 }, { "epoch": 7.320949637429225, "grad_norm": 0.14453125, "learning_rate": 0.00021432800238402702, "loss": 0.501, "step": 147400 }, { "epoch": 7.321446309724844, "grad_norm": 0.16015625, "learning_rate": 0.0002142882686003775, "loss": 0.4784, "step": 147410 }, { "epoch": 7.321942982020463, "grad_norm": 0.150390625, "learning_rate": 0.00021424853481672793, "loss": 0.5204, "step": 147420 }, { "epoch": 7.322439654316082, "grad_norm": 0.150390625, "learning_rate": 0.00021420880103307838, "loss": 0.4816, "step": 147430 }, { "epoch": 7.322936326611702, "grad_norm": 0.150390625, "learning_rate": 0.00021416906724942885, "loss": 0.4802, "step": 147440 }, { "epoch": 7.323432998907321, "grad_norm": 0.1484375, "learning_rate": 0.0002141293334657793, "loss": 0.4924, "step": 147450 }, { "epoch": 7.32392967120294, "grad_norm": 0.1416015625, "learning_rate": 0.00021408959968212976, "loss": 0.4798, "step": 147460 }, { "epoch": 7.324426343498559, "grad_norm": 0.1494140625, "learning_rate": 0.0002140498658984802, "loss": 0.4849, "step": 147470 }, { "epoch": 7.324923015794179, "grad_norm": 0.134765625, "learning_rate": 0.00021401013211483062, "loss": 0.4835, "step": 147480 }, { "epoch": 7.325419688089799, "grad_norm": 0.1396484375, "learning_rate": 0.00021397039833118112, "loss": 0.4794, "step": 147490 }, { "epoch": 7.325916360385418, "grad_norm": 0.1552734375, "learning_rate": 0.00021393066454753154, "loss": 0.4891, "step": 147500 }, { "epoch": 7.326413032681037, "grad_norm": 0.1708984375, "learning_rate": 0.00021389093076388198, "loss": 0.4946, "step": 147510 }, { "epoch": 7.326909704976656, "grad_norm": 0.1640625, "learning_rate": 0.00021385119698023246, "loss": 0.509, "step": 147520 }, { "epoch": 7.3274063772722755, "grad_norm": 0.15234375, "learning_rate": 0.0002138114631965829, "loss": 0.4729, "step": 147530 }, { "epoch": 7.327903049567895, "grad_norm": 0.1787109375, "learning_rate": 0.00021377172941293337, "loss": 0.4801, "step": 147540 }, { "epoch": 7.328399721863515, "grad_norm": 0.162109375, "learning_rate": 0.00021373199562928381, "loss": 0.5032, "step": 147550 }, { "epoch": 7.328896394159134, "grad_norm": 0.1435546875, "learning_rate": 0.00021369226184563423, "loss": 0.4887, "step": 147560 }, { "epoch": 7.329393066454753, "grad_norm": 0.1328125, "learning_rate": 0.00021365252806198473, "loss": 0.4697, "step": 147570 }, { "epoch": 7.3298897387503725, "grad_norm": 0.1396484375, "learning_rate": 0.00021361279427833515, "loss": 0.4832, "step": 147580 }, { "epoch": 7.330386411045992, "grad_norm": 0.12890625, "learning_rate": 0.0002135730604946856, "loss": 0.5004, "step": 147590 }, { "epoch": 7.330883083341611, "grad_norm": 0.140625, "learning_rate": 0.00021353332671103606, "loss": 0.5154, "step": 147600 }, { "epoch": 7.33137975563723, "grad_norm": 0.1435546875, "learning_rate": 0.0002134935929273865, "loss": 0.4936, "step": 147610 }, { "epoch": 7.331876427932849, "grad_norm": 0.1474609375, "learning_rate": 0.00021345385914373698, "loss": 0.4782, "step": 147620 }, { "epoch": 7.3323731002284696, "grad_norm": 0.13671875, "learning_rate": 0.00021341412536008742, "loss": 0.4783, "step": 147630 }, { "epoch": 7.332869772524089, "grad_norm": 0.1484375, "learning_rate": 0.00021337439157643787, "loss": 0.4588, "step": 147640 }, { "epoch": 7.333366444819708, "grad_norm": 0.1396484375, "learning_rate": 0.00021333465779278834, "loss": 0.4956, "step": 147650 }, { "epoch": 7.333863117115327, "grad_norm": 0.1552734375, "learning_rate": 0.00021329492400913878, "loss": 0.4903, "step": 147660 }, { "epoch": 7.3343597894109465, "grad_norm": 0.1474609375, "learning_rate": 0.00021325519022548925, "loss": 0.4925, "step": 147670 }, { "epoch": 7.334856461706566, "grad_norm": 0.140625, "learning_rate": 0.0002132154564418397, "loss": 0.4826, "step": 147680 }, { "epoch": 7.335353134002185, "grad_norm": 0.1484375, "learning_rate": 0.00021317572265819012, "loss": 0.5, "step": 147690 }, { "epoch": 7.335849806297805, "grad_norm": 0.1357421875, "learning_rate": 0.00021313598887454061, "loss": 0.4536, "step": 147700 }, { "epoch": 7.336346478593424, "grad_norm": 0.158203125, "learning_rate": 0.00021309625509089103, "loss": 0.4697, "step": 147710 }, { "epoch": 7.3368431508890435, "grad_norm": 0.1318359375, "learning_rate": 0.00021305652130724148, "loss": 0.4588, "step": 147720 }, { "epoch": 7.337339823184663, "grad_norm": 0.142578125, "learning_rate": 0.00021301678752359195, "loss": 0.4895, "step": 147730 }, { "epoch": 7.337836495480282, "grad_norm": 0.14453125, "learning_rate": 0.0002129770537399424, "loss": 0.4911, "step": 147740 }, { "epoch": 7.338333167775901, "grad_norm": 0.193359375, "learning_rate": 0.00021293731995629286, "loss": 0.4846, "step": 147750 }, { "epoch": 7.33882984007152, "grad_norm": 0.1318359375, "learning_rate": 0.0002128975861726433, "loss": 0.4762, "step": 147760 }, { "epoch": 7.3393265123671405, "grad_norm": 0.1376953125, "learning_rate": 0.00021285785238899375, "loss": 0.4769, "step": 147770 }, { "epoch": 7.33982318466276, "grad_norm": 0.14453125, "learning_rate": 0.00021281811860534422, "loss": 0.496, "step": 147780 }, { "epoch": 7.340319856958379, "grad_norm": 0.146484375, "learning_rate": 0.00021277838482169467, "loss": 0.4716, "step": 147790 }, { "epoch": 7.340816529253998, "grad_norm": 0.1484375, "learning_rate": 0.00021273865103804508, "loss": 0.4979, "step": 147800 }, { "epoch": 7.341313201549617, "grad_norm": 0.146484375, "learning_rate": 0.00021269891725439558, "loss": 0.4991, "step": 147810 }, { "epoch": 7.341809873845237, "grad_norm": 0.14453125, "learning_rate": 0.000212659183470746, "loss": 0.5062, "step": 147820 }, { "epoch": 7.342306546140856, "grad_norm": 0.140625, "learning_rate": 0.00021261944968709647, "loss": 0.5022, "step": 147830 }, { "epoch": 7.342803218436476, "grad_norm": 0.1494140625, "learning_rate": 0.00021257971590344691, "loss": 0.5222, "step": 147840 }, { "epoch": 7.343299890732095, "grad_norm": 0.138671875, "learning_rate": 0.00021253998211979736, "loss": 0.4817, "step": 147850 }, { "epoch": 7.343796563027714, "grad_norm": 0.1591796875, "learning_rate": 0.00021250024833614783, "loss": 0.4652, "step": 147860 }, { "epoch": 7.344293235323334, "grad_norm": 0.14453125, "learning_rate": 0.00021246051455249827, "loss": 0.4908, "step": 147870 }, { "epoch": 7.344789907618953, "grad_norm": 0.14453125, "learning_rate": 0.0002124207807688487, "loss": 0.4937, "step": 147880 }, { "epoch": 7.345286579914572, "grad_norm": 0.1259765625, "learning_rate": 0.0002123810469851992, "loss": 0.4813, "step": 147890 }, { "epoch": 7.345783252210191, "grad_norm": 0.150390625, "learning_rate": 0.0002123413132015496, "loss": 0.4756, "step": 147900 }, { "epoch": 7.3462799245058115, "grad_norm": 0.1337890625, "learning_rate": 0.0002123015794179001, "loss": 0.4978, "step": 147910 }, { "epoch": 7.346776596801431, "grad_norm": 0.158203125, "learning_rate": 0.00021226184563425052, "loss": 0.4986, "step": 147920 }, { "epoch": 7.34727326909705, "grad_norm": 0.134765625, "learning_rate": 0.00021222211185060097, "loss": 0.4819, "step": 147930 }, { "epoch": 7.347769941392669, "grad_norm": 0.1513671875, "learning_rate": 0.00021218237806695144, "loss": 0.4643, "step": 147940 }, { "epoch": 7.348266613688288, "grad_norm": 0.1474609375, "learning_rate": 0.00021214264428330188, "loss": 0.4582, "step": 147950 }, { "epoch": 7.348763285983908, "grad_norm": 0.1484375, "learning_rate": 0.00021210291049965233, "loss": 0.4971, "step": 147960 }, { "epoch": 7.349259958279527, "grad_norm": 0.1611328125, "learning_rate": 0.0002120631767160028, "loss": 0.4929, "step": 147970 }, { "epoch": 7.349756630575147, "grad_norm": 0.1767578125, "learning_rate": 0.00021202344293235324, "loss": 0.4662, "step": 147980 }, { "epoch": 7.350253302870766, "grad_norm": 0.12890625, "learning_rate": 0.00021198370914870371, "loss": 0.4468, "step": 147990 }, { "epoch": 7.350749975166385, "grad_norm": 0.1435546875, "learning_rate": 0.00021194397536505416, "loss": 0.5046, "step": 148000 }, { "epoch": 7.351246647462005, "grad_norm": 0.13671875, "learning_rate": 0.00021190424158140458, "loss": 0.4765, "step": 148010 }, { "epoch": 7.351743319757624, "grad_norm": 0.1640625, "learning_rate": 0.00021186450779775507, "loss": 0.4758, "step": 148020 }, { "epoch": 7.352239992053243, "grad_norm": 0.142578125, "learning_rate": 0.0002118247740141055, "loss": 0.4849, "step": 148030 }, { "epoch": 7.352736664348862, "grad_norm": 0.1357421875, "learning_rate": 0.00021178504023045594, "loss": 0.4877, "step": 148040 }, { "epoch": 7.353233336644482, "grad_norm": 0.1455078125, "learning_rate": 0.0002117453064468064, "loss": 0.4915, "step": 148050 }, { "epoch": 7.353730008940102, "grad_norm": 0.140625, "learning_rate": 0.00021170557266315685, "loss": 0.5046, "step": 148060 }, { "epoch": 7.354226681235721, "grad_norm": 0.171875, "learning_rate": 0.00021166583887950732, "loss": 0.4907, "step": 148070 }, { "epoch": 7.35472335353134, "grad_norm": 0.1484375, "learning_rate": 0.00021162610509585777, "loss": 0.4744, "step": 148080 }, { "epoch": 7.355220025826959, "grad_norm": 0.1435546875, "learning_rate": 0.0002115863713122082, "loss": 0.4648, "step": 148090 }, { "epoch": 7.355716698122579, "grad_norm": 0.1533203125, "learning_rate": 0.00021154663752855868, "loss": 0.5027, "step": 148100 }, { "epoch": 7.356213370418198, "grad_norm": 0.1455078125, "learning_rate": 0.0002115069037449091, "loss": 0.4799, "step": 148110 }, { "epoch": 7.356710042713817, "grad_norm": 0.1533203125, "learning_rate": 0.0002114671699612596, "loss": 0.4903, "step": 148120 }, { "epoch": 7.357206715009437, "grad_norm": 0.1455078125, "learning_rate": 0.00021142743617761002, "loss": 0.4935, "step": 148130 }, { "epoch": 7.357703387305056, "grad_norm": 0.154296875, "learning_rate": 0.00021138770239396046, "loss": 0.4986, "step": 148140 }, { "epoch": 7.358200059600676, "grad_norm": 0.146484375, "learning_rate": 0.00021134796861031093, "loss": 0.4964, "step": 148150 }, { "epoch": 7.358696731896295, "grad_norm": 0.1474609375, "learning_rate": 0.00021130823482666137, "loss": 0.4879, "step": 148160 }, { "epoch": 7.359193404191914, "grad_norm": 0.1337890625, "learning_rate": 0.00021126850104301182, "loss": 0.5062, "step": 148170 }, { "epoch": 7.359690076487533, "grad_norm": 0.138671875, "learning_rate": 0.0002112287672593623, "loss": 0.5151, "step": 148180 }, { "epoch": 7.3601867487831525, "grad_norm": 0.158203125, "learning_rate": 0.00021118903347571273, "loss": 0.5111, "step": 148190 }, { "epoch": 7.360683421078773, "grad_norm": 0.1328125, "learning_rate": 0.0002111492996920632, "loss": 0.5055, "step": 148200 }, { "epoch": 7.361180093374392, "grad_norm": 0.154296875, "learning_rate": 0.00021110956590841365, "loss": 0.4919, "step": 148210 }, { "epoch": 7.361676765670011, "grad_norm": 0.12255859375, "learning_rate": 0.00021106983212476407, "loss": 0.4405, "step": 148220 }, { "epoch": 7.36217343796563, "grad_norm": 0.13671875, "learning_rate": 0.00021103009834111457, "loss": 0.4251, "step": 148230 }, { "epoch": 7.3626701102612495, "grad_norm": 0.169921875, "learning_rate": 0.00021099036455746498, "loss": 0.5104, "step": 148240 }, { "epoch": 7.363166782556869, "grad_norm": 0.1396484375, "learning_rate": 0.00021095063077381543, "loss": 0.4527, "step": 148250 }, { "epoch": 7.363663454852488, "grad_norm": 0.1650390625, "learning_rate": 0.0002109108969901659, "loss": 0.4996, "step": 148260 }, { "epoch": 7.364160127148108, "grad_norm": 0.1416015625, "learning_rate": 0.00021087116320651634, "loss": 0.4782, "step": 148270 }, { "epoch": 7.364656799443727, "grad_norm": 0.17578125, "learning_rate": 0.00021083142942286681, "loss": 0.5066, "step": 148280 }, { "epoch": 7.3651534717393465, "grad_norm": 0.1416015625, "learning_rate": 0.00021079169563921726, "loss": 0.4703, "step": 148290 }, { "epoch": 7.365650144034966, "grad_norm": 0.158203125, "learning_rate": 0.0002107519618555677, "loss": 0.4993, "step": 148300 }, { "epoch": 7.366146816330585, "grad_norm": 0.1396484375, "learning_rate": 0.00021071222807191817, "loss": 0.4699, "step": 148310 }, { "epoch": 7.366643488626204, "grad_norm": 0.16015625, "learning_rate": 0.00021067249428826862, "loss": 0.4557, "step": 148320 }, { "epoch": 7.367140160921823, "grad_norm": 0.162109375, "learning_rate": 0.00021063276050461904, "loss": 0.4764, "step": 148330 }, { "epoch": 7.367636833217443, "grad_norm": 0.1796875, "learning_rate": 0.00021059302672096953, "loss": 0.471, "step": 148340 }, { "epoch": 7.368133505513063, "grad_norm": 0.14453125, "learning_rate": 0.00021055329293731995, "loss": 0.5214, "step": 148350 }, { "epoch": 7.368630177808682, "grad_norm": 0.138671875, "learning_rate": 0.00021051355915367045, "loss": 0.4973, "step": 148360 }, { "epoch": 7.369126850104301, "grad_norm": 0.1376953125, "learning_rate": 0.00021047382537002087, "loss": 0.4635, "step": 148370 }, { "epoch": 7.3696235223999205, "grad_norm": 0.1884765625, "learning_rate": 0.0002104340915863713, "loss": 0.4943, "step": 148380 }, { "epoch": 7.37012019469554, "grad_norm": 0.150390625, "learning_rate": 0.00021039435780272178, "loss": 0.5044, "step": 148390 }, { "epoch": 7.370616866991159, "grad_norm": 0.1435546875, "learning_rate": 0.00021035462401907223, "loss": 0.492, "step": 148400 }, { "epoch": 7.371113539286778, "grad_norm": 0.142578125, "learning_rate": 0.00021031489023542264, "loss": 0.4722, "step": 148410 }, { "epoch": 7.371610211582398, "grad_norm": 0.14453125, "learning_rate": 0.00021027515645177314, "loss": 0.4814, "step": 148420 }, { "epoch": 7.3721068838780175, "grad_norm": 0.1435546875, "learning_rate": 0.00021023542266812356, "loss": 0.4697, "step": 148430 }, { "epoch": 7.372603556173637, "grad_norm": 0.1904296875, "learning_rate": 0.00021019568888447406, "loss": 0.4714, "step": 148440 }, { "epoch": 7.373100228469256, "grad_norm": 0.15234375, "learning_rate": 0.00021015595510082448, "loss": 0.4722, "step": 148450 }, { "epoch": 7.373596900764875, "grad_norm": 0.15234375, "learning_rate": 0.00021011622131717492, "loss": 0.4903, "step": 148460 }, { "epoch": 7.374093573060494, "grad_norm": 0.150390625, "learning_rate": 0.0002100764875335254, "loss": 0.4621, "step": 148470 }, { "epoch": 7.374590245356114, "grad_norm": 0.1640625, "learning_rate": 0.00021003675374987583, "loss": 0.4817, "step": 148480 }, { "epoch": 7.375086917651734, "grad_norm": 0.1298828125, "learning_rate": 0.0002099970199662263, "loss": 0.5023, "step": 148490 }, { "epoch": 7.375583589947353, "grad_norm": 0.166015625, "learning_rate": 0.00020995728618257675, "loss": 0.5172, "step": 148500 }, { "epoch": 7.376080262242972, "grad_norm": 0.1787109375, "learning_rate": 0.0002099175523989272, "loss": 0.4956, "step": 148510 }, { "epoch": 7.376576934538591, "grad_norm": 0.1806640625, "learning_rate": 0.00020987781861527767, "loss": 0.5087, "step": 148520 }, { "epoch": 7.377073606834211, "grad_norm": 0.1396484375, "learning_rate": 0.0002098380848316281, "loss": 0.5029, "step": 148530 }, { "epoch": 7.37757027912983, "grad_norm": 0.16015625, "learning_rate": 0.00020979835104797853, "loss": 0.4689, "step": 148540 }, { "epoch": 7.378066951425449, "grad_norm": 0.1474609375, "learning_rate": 0.00020975861726432903, "loss": 0.495, "step": 148550 }, { "epoch": 7.378563623721069, "grad_norm": 0.15625, "learning_rate": 0.00020971888348067944, "loss": 0.4624, "step": 148560 }, { "epoch": 7.3790602960166884, "grad_norm": 0.201171875, "learning_rate": 0.00020967914969702994, "loss": 0.4616, "step": 148570 }, { "epoch": 7.379556968312308, "grad_norm": 0.1435546875, "learning_rate": 0.00020963941591338036, "loss": 0.4881, "step": 148580 }, { "epoch": 7.380053640607927, "grad_norm": 0.130859375, "learning_rate": 0.0002095996821297308, "loss": 0.4747, "step": 148590 }, { "epoch": 7.380550312903546, "grad_norm": 0.1796875, "learning_rate": 0.00020955994834608127, "loss": 0.5043, "step": 148600 }, { "epoch": 7.381046985199165, "grad_norm": 0.1416015625, "learning_rate": 0.00020952021456243172, "loss": 0.4839, "step": 148610 }, { "epoch": 7.381543657494785, "grad_norm": 0.154296875, "learning_rate": 0.00020948048077878216, "loss": 0.4851, "step": 148620 }, { "epoch": 7.382040329790405, "grad_norm": 0.1640625, "learning_rate": 0.00020944074699513263, "loss": 0.478, "step": 148630 }, { "epoch": 7.382537002086024, "grad_norm": 0.140625, "learning_rate": 0.00020940101321148308, "loss": 0.4842, "step": 148640 }, { "epoch": 7.383033674381643, "grad_norm": 0.1552734375, "learning_rate": 0.00020936127942783355, "loss": 0.4903, "step": 148650 }, { "epoch": 7.383530346677262, "grad_norm": 0.21875, "learning_rate": 0.000209321545644184, "loss": 0.5152, "step": 148660 }, { "epoch": 7.384027018972882, "grad_norm": 0.1552734375, "learning_rate": 0.0002092818118605344, "loss": 0.4867, "step": 148670 }, { "epoch": 7.384523691268501, "grad_norm": 0.1337890625, "learning_rate": 0.00020924207807688488, "loss": 0.4878, "step": 148680 }, { "epoch": 7.38502036356412, "grad_norm": 0.1533203125, "learning_rate": 0.00020920234429323533, "loss": 0.5052, "step": 148690 }, { "epoch": 7.38551703585974, "grad_norm": 0.1748046875, "learning_rate": 0.00020916261050958577, "loss": 0.4869, "step": 148700 }, { "epoch": 7.386013708155359, "grad_norm": 0.1396484375, "learning_rate": 0.00020912287672593624, "loss": 0.4599, "step": 148710 }, { "epoch": 7.386510380450979, "grad_norm": 0.162109375, "learning_rate": 0.0002090831429422867, "loss": 0.4942, "step": 148720 }, { "epoch": 7.387007052746598, "grad_norm": 0.1826171875, "learning_rate": 0.00020904340915863716, "loss": 0.5085, "step": 148730 }, { "epoch": 7.387503725042217, "grad_norm": 0.15234375, "learning_rate": 0.0002090036753749876, "loss": 0.5101, "step": 148740 }, { "epoch": 7.388000397337836, "grad_norm": 0.1572265625, "learning_rate": 0.00020896394159133802, "loss": 0.4933, "step": 148750 }, { "epoch": 7.3884970696334555, "grad_norm": 0.142578125, "learning_rate": 0.00020892420780768852, "loss": 0.4913, "step": 148760 }, { "epoch": 7.388993741929076, "grad_norm": 0.2431640625, "learning_rate": 0.00020888447402403894, "loss": 0.4553, "step": 148770 }, { "epoch": 7.389490414224695, "grad_norm": 0.1494140625, "learning_rate": 0.00020884474024038938, "loss": 0.4928, "step": 148780 }, { "epoch": 7.389987086520314, "grad_norm": 0.16796875, "learning_rate": 0.00020880500645673985, "loss": 0.4955, "step": 148790 }, { "epoch": 7.390483758815933, "grad_norm": 0.1474609375, "learning_rate": 0.0002087652726730903, "loss": 0.4831, "step": 148800 }, { "epoch": 7.390980431111553, "grad_norm": 0.1474609375, "learning_rate": 0.00020872553888944077, "loss": 0.4747, "step": 148810 }, { "epoch": 7.391477103407172, "grad_norm": 0.1376953125, "learning_rate": 0.0002086858051057912, "loss": 0.4693, "step": 148820 }, { "epoch": 7.391973775702791, "grad_norm": 0.138671875, "learning_rate": 0.00020864607132214165, "loss": 0.4614, "step": 148830 }, { "epoch": 7.39247044799841, "grad_norm": 0.1669921875, "learning_rate": 0.00020860633753849213, "loss": 0.4809, "step": 148840 }, { "epoch": 7.39296712029403, "grad_norm": 0.140625, "learning_rate": 0.00020856660375484257, "loss": 0.4973, "step": 148850 }, { "epoch": 7.39346379258965, "grad_norm": 0.1435546875, "learning_rate": 0.000208526869971193, "loss": 0.4956, "step": 148860 }, { "epoch": 7.393960464885269, "grad_norm": 0.142578125, "learning_rate": 0.00020848713618754349, "loss": 0.4837, "step": 148870 }, { "epoch": 7.394457137180888, "grad_norm": 0.1484375, "learning_rate": 0.0002084474024038939, "loss": 0.4962, "step": 148880 }, { "epoch": 7.394953809476507, "grad_norm": 0.1767578125, "learning_rate": 0.0002084076686202444, "loss": 0.4933, "step": 148890 }, { "epoch": 7.3954504817721265, "grad_norm": 0.16015625, "learning_rate": 0.00020836793483659482, "loss": 0.4801, "step": 148900 }, { "epoch": 7.395947154067746, "grad_norm": 0.14453125, "learning_rate": 0.00020832820105294526, "loss": 0.5075, "step": 148910 }, { "epoch": 7.396443826363366, "grad_norm": 0.150390625, "learning_rate": 0.00020828846726929573, "loss": 0.5015, "step": 148920 }, { "epoch": 7.396940498658985, "grad_norm": 0.1474609375, "learning_rate": 0.00020824873348564618, "loss": 0.4751, "step": 148930 }, { "epoch": 7.397437170954604, "grad_norm": 0.140625, "learning_rate": 0.00020820899970199665, "loss": 0.4797, "step": 148940 }, { "epoch": 7.3979338432502235, "grad_norm": 0.1533203125, "learning_rate": 0.0002081692659183471, "loss": 0.4828, "step": 148950 }, { "epoch": 7.398430515545843, "grad_norm": 0.1474609375, "learning_rate": 0.0002081295321346975, "loss": 0.5123, "step": 148960 }, { "epoch": 7.398927187841462, "grad_norm": 0.158203125, "learning_rate": 0.000208089798351048, "loss": 0.4848, "step": 148970 }, { "epoch": 7.399423860137081, "grad_norm": 0.142578125, "learning_rate": 0.00020805006456739843, "loss": 0.4856, "step": 148980 }, { "epoch": 7.399920532432701, "grad_norm": 0.1455078125, "learning_rate": 0.00020801033078374887, "loss": 0.4928, "step": 148990 }, { "epoch": 7.4004172047283205, "grad_norm": 0.1455078125, "learning_rate": 0.00020797059700009934, "loss": 0.5128, "step": 149000 }, { "epoch": 7.40091387702394, "grad_norm": 0.146484375, "learning_rate": 0.0002079308632164498, "loss": 0.5278, "step": 149010 }, { "epoch": 7.401410549319559, "grad_norm": 0.1357421875, "learning_rate": 0.00020789112943280026, "loss": 0.4889, "step": 149020 }, { "epoch": 7.401907221615178, "grad_norm": 0.142578125, "learning_rate": 0.0002078513956491507, "loss": 0.4796, "step": 149030 }, { "epoch": 7.4024038939107974, "grad_norm": 0.142578125, "learning_rate": 0.00020781166186550115, "loss": 0.4768, "step": 149040 }, { "epoch": 7.402900566206417, "grad_norm": 0.1357421875, "learning_rate": 0.00020777192808185162, "loss": 0.4833, "step": 149050 }, { "epoch": 7.403397238502036, "grad_norm": 0.1484375, "learning_rate": 0.00020773219429820206, "loss": 0.4826, "step": 149060 }, { "epoch": 7.403893910797656, "grad_norm": 0.1533203125, "learning_rate": 0.00020769246051455248, "loss": 0.5, "step": 149070 }, { "epoch": 7.404390583093275, "grad_norm": 0.1494140625, "learning_rate": 0.00020765272673090298, "loss": 0.479, "step": 149080 }, { "epoch": 7.4048872553888945, "grad_norm": 0.1640625, "learning_rate": 0.0002076129929472534, "loss": 0.4818, "step": 149090 }, { "epoch": 7.405383927684514, "grad_norm": 0.1640625, "learning_rate": 0.0002075732591636039, "loss": 0.498, "step": 149100 }, { "epoch": 7.405880599980133, "grad_norm": 0.142578125, "learning_rate": 0.0002075335253799543, "loss": 0.4568, "step": 149110 }, { "epoch": 7.406377272275752, "grad_norm": 0.1640625, "learning_rate": 0.00020749379159630475, "loss": 0.483, "step": 149120 }, { "epoch": 7.406873944571371, "grad_norm": 0.13671875, "learning_rate": 0.00020745405781265523, "loss": 0.4718, "step": 149130 }, { "epoch": 7.4073706168669915, "grad_norm": 0.1455078125, "learning_rate": 0.00020741432402900567, "loss": 0.5066, "step": 149140 }, { "epoch": 7.407867289162611, "grad_norm": 0.1796875, "learning_rate": 0.00020737459024535611, "loss": 0.4941, "step": 149150 }, { "epoch": 7.40836396145823, "grad_norm": 0.1376953125, "learning_rate": 0.00020733485646170659, "loss": 0.4757, "step": 149160 }, { "epoch": 7.408860633753849, "grad_norm": 0.1474609375, "learning_rate": 0.00020729512267805703, "loss": 0.5042, "step": 149170 }, { "epoch": 7.409357306049468, "grad_norm": 0.15625, "learning_rate": 0.0002072553888944075, "loss": 0.5143, "step": 149180 }, { "epoch": 7.409853978345088, "grad_norm": 0.1630859375, "learning_rate": 0.00020721565511075795, "loss": 0.4808, "step": 149190 }, { "epoch": 7.410350650640707, "grad_norm": 0.150390625, "learning_rate": 0.00020717592132710836, "loss": 0.5003, "step": 149200 }, { "epoch": 7.410847322936327, "grad_norm": 0.1923828125, "learning_rate": 0.00020713618754345886, "loss": 0.4878, "step": 149210 }, { "epoch": 7.411343995231946, "grad_norm": 0.1552734375, "learning_rate": 0.00020709645375980928, "loss": 0.504, "step": 149220 }, { "epoch": 7.411840667527565, "grad_norm": 0.16015625, "learning_rate": 0.00020705671997615972, "loss": 0.481, "step": 149230 }, { "epoch": 7.412337339823185, "grad_norm": 0.166015625, "learning_rate": 0.0002070169861925102, "loss": 0.4732, "step": 149240 }, { "epoch": 7.412834012118804, "grad_norm": 0.1533203125, "learning_rate": 0.00020697725240886064, "loss": 0.5001, "step": 149250 }, { "epoch": 7.413330684414423, "grad_norm": 0.1943359375, "learning_rate": 0.0002069375186252111, "loss": 0.4797, "step": 149260 }, { "epoch": 7.413827356710042, "grad_norm": 0.162109375, "learning_rate": 0.00020689778484156155, "loss": 0.5091, "step": 149270 }, { "epoch": 7.4143240290056625, "grad_norm": 0.1396484375, "learning_rate": 0.00020685805105791197, "loss": 0.4502, "step": 149280 }, { "epoch": 7.414820701301282, "grad_norm": 0.1552734375, "learning_rate": 0.00020681831727426247, "loss": 0.4787, "step": 149290 }, { "epoch": 7.415317373596901, "grad_norm": 0.142578125, "learning_rate": 0.0002067785834906129, "loss": 0.5072, "step": 149300 }, { "epoch": 7.41581404589252, "grad_norm": 0.16015625, "learning_rate": 0.00020673884970696333, "loss": 0.5042, "step": 149310 }, { "epoch": 7.416310718188139, "grad_norm": 0.1630859375, "learning_rate": 0.0002066991159233138, "loss": 0.4981, "step": 149320 }, { "epoch": 7.416807390483759, "grad_norm": 0.150390625, "learning_rate": 0.00020665938213966425, "loss": 0.4738, "step": 149330 }, { "epoch": 7.417304062779378, "grad_norm": 0.1357421875, "learning_rate": 0.00020661964835601472, "loss": 0.4689, "step": 149340 }, { "epoch": 7.417800735074998, "grad_norm": 0.150390625, "learning_rate": 0.00020657991457236516, "loss": 0.4857, "step": 149350 }, { "epoch": 7.418297407370617, "grad_norm": 0.1552734375, "learning_rate": 0.0002065401807887156, "loss": 0.5077, "step": 149360 }, { "epoch": 7.418794079666236, "grad_norm": 0.14453125, "learning_rate": 0.00020650044700506608, "loss": 0.4643, "step": 149370 }, { "epoch": 7.419290751961856, "grad_norm": 0.150390625, "learning_rate": 0.00020646071322141652, "loss": 0.4682, "step": 149380 }, { "epoch": 7.419787424257475, "grad_norm": 0.177734375, "learning_rate": 0.000206420979437767, "loss": 0.4662, "step": 149390 }, { "epoch": 7.420284096553094, "grad_norm": 0.1474609375, "learning_rate": 0.00020638124565411744, "loss": 0.4841, "step": 149400 }, { "epoch": 7.420780768848713, "grad_norm": 0.255859375, "learning_rate": 0.00020634151187046785, "loss": 0.4925, "step": 149410 }, { "epoch": 7.421277441144333, "grad_norm": 0.1474609375, "learning_rate": 0.00020630177808681835, "loss": 0.4395, "step": 149420 }, { "epoch": 7.421774113439953, "grad_norm": 0.13671875, "learning_rate": 0.00020626204430316877, "loss": 0.4531, "step": 149430 }, { "epoch": 7.422270785735572, "grad_norm": 0.12890625, "learning_rate": 0.00020622231051951921, "loss": 0.4508, "step": 149440 }, { "epoch": 7.422767458031191, "grad_norm": 0.1796875, "learning_rate": 0.00020618257673586969, "loss": 0.4889, "step": 149450 }, { "epoch": 7.42326413032681, "grad_norm": 0.1796875, "learning_rate": 0.00020614284295222013, "loss": 0.5129, "step": 149460 }, { "epoch": 7.4237608026224295, "grad_norm": 0.15234375, "learning_rate": 0.0002061031091685706, "loss": 0.5116, "step": 149470 }, { "epoch": 7.424257474918049, "grad_norm": 0.1572265625, "learning_rate": 0.00020606337538492105, "loss": 0.4896, "step": 149480 }, { "epoch": 7.424754147213669, "grad_norm": 0.1962890625, "learning_rate": 0.0002060236416012715, "loss": 0.5167, "step": 149490 }, { "epoch": 7.425250819509288, "grad_norm": 0.1484375, "learning_rate": 0.00020598390781762196, "loss": 0.4773, "step": 149500 }, { "epoch": 7.425747491804907, "grad_norm": 0.1416015625, "learning_rate": 0.0002059441740339724, "loss": 0.4978, "step": 149510 }, { "epoch": 7.426244164100527, "grad_norm": 0.1474609375, "learning_rate": 0.00020590444025032282, "loss": 0.4872, "step": 149520 }, { "epoch": 7.426740836396146, "grad_norm": 0.16796875, "learning_rate": 0.0002058647064666733, "loss": 0.5036, "step": 149530 }, { "epoch": 7.427237508691765, "grad_norm": 0.1328125, "learning_rate": 0.00020582497268302374, "loss": 0.4777, "step": 149540 }, { "epoch": 7.427734180987384, "grad_norm": 0.15625, "learning_rate": 0.0002057852388993742, "loss": 0.4929, "step": 149550 }, { "epoch": 7.4282308532830035, "grad_norm": 0.13671875, "learning_rate": 0.00020574550511572465, "loss": 0.503, "step": 149560 }, { "epoch": 7.428727525578624, "grad_norm": 0.140625, "learning_rate": 0.0002057057713320751, "loss": 0.4705, "step": 149570 }, { "epoch": 7.429224197874243, "grad_norm": 0.1689453125, "learning_rate": 0.00020566603754842557, "loss": 0.4755, "step": 149580 }, { "epoch": 7.429720870169862, "grad_norm": 0.15234375, "learning_rate": 0.00020562630376477601, "loss": 0.4845, "step": 149590 }, { "epoch": 7.430217542465481, "grad_norm": 0.142578125, "learning_rate": 0.00020558656998112643, "loss": 0.5069, "step": 149600 }, { "epoch": 7.4307142147611005, "grad_norm": 0.220703125, "learning_rate": 0.00020554683619747693, "loss": 0.4874, "step": 149610 }, { "epoch": 7.43121088705672, "grad_norm": 0.138671875, "learning_rate": 0.00020550710241382735, "loss": 0.482, "step": 149620 }, { "epoch": 7.431707559352339, "grad_norm": 0.166015625, "learning_rate": 0.00020546736863017785, "loss": 0.4415, "step": 149630 }, { "epoch": 7.432204231647959, "grad_norm": 0.140625, "learning_rate": 0.00020542763484652826, "loss": 0.4703, "step": 149640 }, { "epoch": 7.432700903943578, "grad_norm": 0.140625, "learning_rate": 0.0002053879010628787, "loss": 0.4841, "step": 149650 }, { "epoch": 7.4331975762391975, "grad_norm": 0.15234375, "learning_rate": 0.00020534816727922918, "loss": 0.5322, "step": 149660 }, { "epoch": 7.433694248534817, "grad_norm": 0.140625, "learning_rate": 0.00020530843349557962, "loss": 0.4818, "step": 149670 }, { "epoch": 7.434190920830436, "grad_norm": 0.1416015625, "learning_rate": 0.00020526869971193007, "loss": 0.4964, "step": 149680 }, { "epoch": 7.434687593126055, "grad_norm": 0.2119140625, "learning_rate": 0.00020522896592828054, "loss": 0.4843, "step": 149690 }, { "epoch": 7.435184265421674, "grad_norm": 0.1376953125, "learning_rate": 0.00020518923214463098, "loss": 0.4871, "step": 149700 }, { "epoch": 7.435680937717294, "grad_norm": 0.15234375, "learning_rate": 0.00020514949836098145, "loss": 0.5375, "step": 149710 }, { "epoch": 7.436177610012914, "grad_norm": 0.154296875, "learning_rate": 0.0002051097645773319, "loss": 0.4897, "step": 149720 }, { "epoch": 7.436674282308533, "grad_norm": 0.1494140625, "learning_rate": 0.00020507003079368231, "loss": 0.4603, "step": 149730 }, { "epoch": 7.437170954604152, "grad_norm": 0.162109375, "learning_rate": 0.0002050302970100328, "loss": 0.473, "step": 149740 }, { "epoch": 7.4376676268997715, "grad_norm": 0.203125, "learning_rate": 0.00020499056322638323, "loss": 0.5023, "step": 149750 }, { "epoch": 7.438164299195391, "grad_norm": 0.166015625, "learning_rate": 0.00020495082944273373, "loss": 0.475, "step": 149760 }, { "epoch": 7.43866097149101, "grad_norm": 0.1640625, "learning_rate": 0.00020491109565908415, "loss": 0.4797, "step": 149770 }, { "epoch": 7.439157643786629, "grad_norm": 0.1474609375, "learning_rate": 0.0002048713618754346, "loss": 0.4915, "step": 149780 }, { "epoch": 7.439654316082249, "grad_norm": 0.1708984375, "learning_rate": 0.00020483162809178506, "loss": 0.4932, "step": 149790 }, { "epoch": 7.4401509883778685, "grad_norm": 0.1806640625, "learning_rate": 0.0002047918943081355, "loss": 0.4973, "step": 149800 }, { "epoch": 7.440647660673488, "grad_norm": 0.15234375, "learning_rate": 0.00020475216052448592, "loss": 0.4873, "step": 149810 }, { "epoch": 7.441144332969107, "grad_norm": 0.14453125, "learning_rate": 0.00020471242674083642, "loss": 0.474, "step": 149820 }, { "epoch": 7.441641005264726, "grad_norm": 0.1669921875, "learning_rate": 0.00020467269295718684, "loss": 0.4897, "step": 149830 }, { "epoch": 7.442137677560345, "grad_norm": 0.16015625, "learning_rate": 0.00020463295917353734, "loss": 0.4911, "step": 149840 }, { "epoch": 7.442634349855965, "grad_norm": 0.16015625, "learning_rate": 0.00020459322538988775, "loss": 0.5272, "step": 149850 }, { "epoch": 7.443131022151585, "grad_norm": 0.1572265625, "learning_rate": 0.0002045534916062382, "loss": 0.511, "step": 149860 }, { "epoch": 7.443627694447204, "grad_norm": 0.1416015625, "learning_rate": 0.00020451375782258867, "loss": 0.5126, "step": 149870 }, { "epoch": 7.444124366742823, "grad_norm": 0.1474609375, "learning_rate": 0.00020447402403893911, "loss": 0.4711, "step": 149880 }, { "epoch": 7.444621039038442, "grad_norm": 0.1708984375, "learning_rate": 0.00020443429025528956, "loss": 0.4834, "step": 149890 }, { "epoch": 7.445117711334062, "grad_norm": 0.1357421875, "learning_rate": 0.00020439455647164003, "loss": 0.5153, "step": 149900 }, { "epoch": 7.445614383629681, "grad_norm": 0.150390625, "learning_rate": 0.00020435482268799047, "loss": 0.4782, "step": 149910 }, { "epoch": 7.4461110559253, "grad_norm": 0.1484375, "learning_rate": 0.00020431508890434095, "loss": 0.468, "step": 149920 }, { "epoch": 7.44660772822092, "grad_norm": 0.140625, "learning_rate": 0.0002042753551206914, "loss": 0.4964, "step": 149930 }, { "epoch": 7.447104400516539, "grad_norm": 0.205078125, "learning_rate": 0.0002042356213370418, "loss": 0.5064, "step": 149940 }, { "epoch": 7.447601072812159, "grad_norm": 0.158203125, "learning_rate": 0.0002041958875533923, "loss": 0.4847, "step": 149950 }, { "epoch": 7.448097745107778, "grad_norm": 0.142578125, "learning_rate": 0.00020415615376974272, "loss": 0.4819, "step": 149960 }, { "epoch": 7.448594417403397, "grad_norm": 0.162109375, "learning_rate": 0.00020411641998609317, "loss": 0.4937, "step": 149970 }, { "epoch": 7.449091089699016, "grad_norm": 0.1455078125, "learning_rate": 0.00020407668620244364, "loss": 0.4924, "step": 149980 }, { "epoch": 7.449587761994636, "grad_norm": 0.158203125, "learning_rate": 0.00020403695241879408, "loss": 0.4743, "step": 149990 }, { "epoch": 7.450084434290256, "grad_norm": 0.1591796875, "learning_rate": 0.00020399721863514455, "loss": 0.4696, "step": 150000 }, { "epoch": 7.450581106585875, "grad_norm": 0.1396484375, "learning_rate": 0.000203957484851495, "loss": 0.5106, "step": 150010 }, { "epoch": 7.451077778881494, "grad_norm": 0.1572265625, "learning_rate": 0.00020391775106784544, "loss": 0.4905, "step": 150020 }, { "epoch": 7.451574451177113, "grad_norm": 0.1669921875, "learning_rate": 0.0002038780172841959, "loss": 0.496, "step": 150030 }, { "epoch": 7.452071123472733, "grad_norm": 0.1484375, "learning_rate": 0.00020383828350054636, "loss": 0.4744, "step": 150040 }, { "epoch": 7.452567795768352, "grad_norm": 0.1806640625, "learning_rate": 0.00020379854971689677, "loss": 0.4868, "step": 150050 }, { "epoch": 7.453064468063971, "grad_norm": 0.1396484375, "learning_rate": 0.00020375881593324727, "loss": 0.4942, "step": 150060 }, { "epoch": 7.453561140359591, "grad_norm": 0.1513671875, "learning_rate": 0.0002037190821495977, "loss": 0.497, "step": 150070 }, { "epoch": 7.45405781265521, "grad_norm": 0.158203125, "learning_rate": 0.00020367934836594816, "loss": 0.4981, "step": 150080 }, { "epoch": 7.45455448495083, "grad_norm": 0.15625, "learning_rate": 0.0002036396145822986, "loss": 0.4988, "step": 150090 }, { "epoch": 7.455051157246449, "grad_norm": 0.1494140625, "learning_rate": 0.00020359988079864905, "loss": 0.479, "step": 150100 }, { "epoch": 7.455547829542068, "grad_norm": 0.1416015625, "learning_rate": 0.00020356014701499952, "loss": 0.4834, "step": 150110 }, { "epoch": 7.456044501837687, "grad_norm": 0.1708984375, "learning_rate": 0.00020352041323134997, "loss": 0.4881, "step": 150120 }, { "epoch": 7.4565411741333065, "grad_norm": 0.140625, "learning_rate": 0.00020348067944770038, "loss": 0.4548, "step": 150130 }, { "epoch": 7.457037846428927, "grad_norm": 0.1396484375, "learning_rate": 0.00020344094566405088, "loss": 0.4853, "step": 150140 }, { "epoch": 7.457534518724546, "grad_norm": 0.1650390625, "learning_rate": 0.0002034012118804013, "loss": 0.5291, "step": 150150 }, { "epoch": 7.458031191020165, "grad_norm": 0.154296875, "learning_rate": 0.0002033614780967518, "loss": 0.4977, "step": 150160 }, { "epoch": 7.458527863315784, "grad_norm": 0.1669921875, "learning_rate": 0.00020332174431310221, "loss": 0.482, "step": 150170 }, { "epoch": 7.4590245356114036, "grad_norm": 0.158203125, "learning_rate": 0.00020328201052945266, "loss": 0.463, "step": 150180 }, { "epoch": 7.459521207907023, "grad_norm": 0.150390625, "learning_rate": 0.00020324227674580313, "loss": 0.504, "step": 150190 }, { "epoch": 7.460017880202642, "grad_norm": 0.1513671875, "learning_rate": 0.00020320254296215357, "loss": 0.5334, "step": 150200 }, { "epoch": 7.460514552498261, "grad_norm": 0.1435546875, "learning_rate": 0.00020316280917850405, "loss": 0.4615, "step": 150210 }, { "epoch": 7.461011224793881, "grad_norm": 0.1396484375, "learning_rate": 0.0002031230753948545, "loss": 0.4766, "step": 150220 }, { "epoch": 7.461507897089501, "grad_norm": 0.1328125, "learning_rate": 0.00020308334161120493, "loss": 0.4841, "step": 150230 }, { "epoch": 7.46200456938512, "grad_norm": 0.1591796875, "learning_rate": 0.0002030436078275554, "loss": 0.4853, "step": 150240 }, { "epoch": 7.462501241680739, "grad_norm": 0.1513671875, "learning_rate": 0.00020300387404390585, "loss": 0.4894, "step": 150250 }, { "epoch": 7.462997913976358, "grad_norm": 0.140625, "learning_rate": 0.00020296414026025627, "loss": 0.4651, "step": 150260 }, { "epoch": 7.4634945862719775, "grad_norm": 0.1826171875, "learning_rate": 0.00020292440647660677, "loss": 0.4855, "step": 150270 }, { "epoch": 7.463991258567597, "grad_norm": 0.1630859375, "learning_rate": 0.00020288467269295718, "loss": 0.4818, "step": 150280 }, { "epoch": 7.464487930863217, "grad_norm": 0.14453125, "learning_rate": 0.00020284493890930768, "loss": 0.4988, "step": 150290 }, { "epoch": 7.464984603158836, "grad_norm": 0.1396484375, "learning_rate": 0.0002028052051256581, "loss": 0.487, "step": 150300 }, { "epoch": 7.465481275454455, "grad_norm": 0.1611328125, "learning_rate": 0.00020276547134200854, "loss": 0.4859, "step": 150310 }, { "epoch": 7.4659779477500745, "grad_norm": 0.1689453125, "learning_rate": 0.00020272573755835901, "loss": 0.4853, "step": 150320 }, { "epoch": 7.466474620045694, "grad_norm": 0.13671875, "learning_rate": 0.00020268600377470946, "loss": 0.4654, "step": 150330 }, { "epoch": 7.466971292341313, "grad_norm": 0.1328125, "learning_rate": 0.0002026462699910599, "loss": 0.4836, "step": 150340 }, { "epoch": 7.467467964636932, "grad_norm": 0.1435546875, "learning_rate": 0.00020260653620741037, "loss": 0.4624, "step": 150350 }, { "epoch": 7.467964636932552, "grad_norm": 0.138671875, "learning_rate": 0.00020256680242376082, "loss": 0.4759, "step": 150360 }, { "epoch": 7.4684613092281715, "grad_norm": 0.150390625, "learning_rate": 0.0002025270686401113, "loss": 0.4909, "step": 150370 }, { "epoch": 7.468957981523791, "grad_norm": 0.1416015625, "learning_rate": 0.0002024873348564617, "loss": 0.4793, "step": 150380 }, { "epoch": 7.46945465381941, "grad_norm": 0.169921875, "learning_rate": 0.00020244760107281215, "loss": 0.5134, "step": 150390 }, { "epoch": 7.469951326115029, "grad_norm": 0.13671875, "learning_rate": 0.00020240786728916262, "loss": 0.448, "step": 150400 }, { "epoch": 7.470447998410648, "grad_norm": 0.1650390625, "learning_rate": 0.00020236813350551307, "loss": 0.4737, "step": 150410 }, { "epoch": 7.470944670706268, "grad_norm": 0.1533203125, "learning_rate": 0.0002023283997218635, "loss": 0.4889, "step": 150420 }, { "epoch": 7.471441343001887, "grad_norm": 0.138671875, "learning_rate": 0.00020228866593821398, "loss": 0.4734, "step": 150430 }, { "epoch": 7.471938015297507, "grad_norm": 0.1650390625, "learning_rate": 0.00020224893215456443, "loss": 0.457, "step": 150440 }, { "epoch": 7.472434687593126, "grad_norm": 0.150390625, "learning_rate": 0.0002022091983709149, "loss": 0.5119, "step": 150450 }, { "epoch": 7.4729313598887455, "grad_norm": 0.1357421875, "learning_rate": 0.00020216946458726534, "loss": 0.4664, "step": 150460 }, { "epoch": 7.473428032184365, "grad_norm": 0.1572265625, "learning_rate": 0.00020212973080361576, "loss": 0.4786, "step": 150470 }, { "epoch": 7.473924704479984, "grad_norm": 0.154296875, "learning_rate": 0.00020208999701996626, "loss": 0.4814, "step": 150480 }, { "epoch": 7.474421376775603, "grad_norm": 0.1435546875, "learning_rate": 0.00020205026323631667, "loss": 0.4903, "step": 150490 }, { "epoch": 7.474918049071222, "grad_norm": 0.1591796875, "learning_rate": 0.00020201052945266712, "loss": 0.4863, "step": 150500 }, { "epoch": 7.4754147213668425, "grad_norm": 0.1552734375, "learning_rate": 0.0002019707956690176, "loss": 0.5055, "step": 150510 }, { "epoch": 7.475911393662462, "grad_norm": 0.166015625, "learning_rate": 0.00020193106188536803, "loss": 0.5019, "step": 150520 }, { "epoch": 7.476408065958081, "grad_norm": 0.1337890625, "learning_rate": 0.0002018913281017185, "loss": 0.4812, "step": 150530 }, { "epoch": 7.4769047382537, "grad_norm": 0.15625, "learning_rate": 0.00020185159431806895, "loss": 0.5075, "step": 150540 }, { "epoch": 7.477401410549319, "grad_norm": 0.1396484375, "learning_rate": 0.0002018118605344194, "loss": 0.4877, "step": 150550 }, { "epoch": 7.477898082844939, "grad_norm": 0.1494140625, "learning_rate": 0.00020177212675076987, "loss": 0.4877, "step": 150560 }, { "epoch": 7.478394755140558, "grad_norm": 0.1552734375, "learning_rate": 0.0002017323929671203, "loss": 0.4917, "step": 150570 }, { "epoch": 7.478891427436178, "grad_norm": 0.1357421875, "learning_rate": 0.00020169265918347073, "loss": 0.4761, "step": 150580 }, { "epoch": 7.479388099731797, "grad_norm": 0.13671875, "learning_rate": 0.00020165292539982123, "loss": 0.5167, "step": 150590 }, { "epoch": 7.479884772027416, "grad_norm": 0.1669921875, "learning_rate": 0.00020161319161617164, "loss": 0.4985, "step": 150600 }, { "epoch": 7.480381444323036, "grad_norm": 0.1689453125, "learning_rate": 0.00020157345783252214, "loss": 0.4823, "step": 150610 }, { "epoch": 7.480878116618655, "grad_norm": 0.1416015625, "learning_rate": 0.00020153372404887256, "loss": 0.4774, "step": 150620 }, { "epoch": 7.481374788914274, "grad_norm": 0.1513671875, "learning_rate": 0.000201493990265223, "loss": 0.5035, "step": 150630 }, { "epoch": 7.481871461209893, "grad_norm": 0.1552734375, "learning_rate": 0.00020145425648157347, "loss": 0.4775, "step": 150640 }, { "epoch": 7.482368133505513, "grad_norm": 0.158203125, "learning_rate": 0.00020141452269792392, "loss": 0.4709, "step": 150650 }, { "epoch": 7.482864805801133, "grad_norm": 0.1474609375, "learning_rate": 0.0002013747889142744, "loss": 0.5085, "step": 150660 }, { "epoch": 7.483361478096752, "grad_norm": 0.1435546875, "learning_rate": 0.00020133505513062483, "loss": 0.4913, "step": 150670 }, { "epoch": 7.483858150392371, "grad_norm": 0.1484375, "learning_rate": 0.00020129532134697525, "loss": 0.5026, "step": 150680 }, { "epoch": 7.48435482268799, "grad_norm": 0.1435546875, "learning_rate": 0.00020125558756332575, "loss": 0.5076, "step": 150690 }, { "epoch": 7.48485149498361, "grad_norm": 0.15625, "learning_rate": 0.00020121585377967617, "loss": 0.4643, "step": 150700 }, { "epoch": 7.485348167279229, "grad_norm": 0.1494140625, "learning_rate": 0.0002011761199960266, "loss": 0.4904, "step": 150710 }, { "epoch": 7.485844839574849, "grad_norm": 0.166015625, "learning_rate": 0.00020113638621237708, "loss": 0.4684, "step": 150720 }, { "epoch": 7.486341511870468, "grad_norm": 0.2265625, "learning_rate": 0.00020109665242872753, "loss": 0.4995, "step": 150730 }, { "epoch": 7.486838184166087, "grad_norm": 0.15234375, "learning_rate": 0.000201056918645078, "loss": 0.4827, "step": 150740 }, { "epoch": 7.487334856461707, "grad_norm": 0.1474609375, "learning_rate": 0.00020101718486142844, "loss": 0.4878, "step": 150750 }, { "epoch": 7.487831528757326, "grad_norm": 0.1533203125, "learning_rate": 0.00020097745107777889, "loss": 0.5121, "step": 150760 }, { "epoch": 7.488328201052945, "grad_norm": 0.158203125, "learning_rate": 0.00020093771729412936, "loss": 0.5013, "step": 150770 }, { "epoch": 7.488824873348564, "grad_norm": 0.1435546875, "learning_rate": 0.0002008979835104798, "loss": 0.4898, "step": 150780 }, { "epoch": 7.489321545644184, "grad_norm": 0.1611328125, "learning_rate": 0.00020085824972683022, "loss": 0.5036, "step": 150790 }, { "epoch": 7.489818217939804, "grad_norm": 0.1396484375, "learning_rate": 0.00020081851594318072, "loss": 0.4944, "step": 150800 }, { "epoch": 7.490314890235423, "grad_norm": 0.146484375, "learning_rate": 0.00020077878215953113, "loss": 0.5034, "step": 150810 }, { "epoch": 7.490811562531042, "grad_norm": 0.16015625, "learning_rate": 0.00020073904837588163, "loss": 0.5013, "step": 150820 }, { "epoch": 7.491308234826661, "grad_norm": 0.1708984375, "learning_rate": 0.00020069931459223205, "loss": 0.4785, "step": 150830 }, { "epoch": 7.4918049071222805, "grad_norm": 0.146484375, "learning_rate": 0.0002006595808085825, "loss": 0.4658, "step": 150840 }, { "epoch": 7.4923015794179, "grad_norm": 0.1533203125, "learning_rate": 0.00020061984702493297, "loss": 0.4785, "step": 150850 }, { "epoch": 7.49279825171352, "grad_norm": 0.15625, "learning_rate": 0.0002005801132412834, "loss": 0.4904, "step": 150860 }, { "epoch": 7.493294924009139, "grad_norm": 0.14453125, "learning_rate": 0.00020054037945763385, "loss": 0.4657, "step": 150870 }, { "epoch": 7.493791596304758, "grad_norm": 0.16796875, "learning_rate": 0.00020050064567398433, "loss": 0.5018, "step": 150880 }, { "epoch": 7.494288268600378, "grad_norm": 0.134765625, "learning_rate": 0.00020046091189033477, "loss": 0.5251, "step": 150890 }, { "epoch": 7.494784940895997, "grad_norm": 0.1455078125, "learning_rate": 0.00020042117810668524, "loss": 0.481, "step": 150900 }, { "epoch": 7.495281613191616, "grad_norm": 0.138671875, "learning_rate": 0.00020038144432303569, "loss": 0.4986, "step": 150910 }, { "epoch": 7.495778285487235, "grad_norm": 0.1513671875, "learning_rate": 0.0002003417105393861, "loss": 0.5211, "step": 150920 }, { "epoch": 7.4962749577828545, "grad_norm": 0.15234375, "learning_rate": 0.00020030197675573657, "loss": 0.4996, "step": 150930 }, { "epoch": 7.496771630078475, "grad_norm": 0.154296875, "learning_rate": 0.00020026224297208702, "loss": 0.4629, "step": 150940 }, { "epoch": 7.497268302374094, "grad_norm": 0.1494140625, "learning_rate": 0.00020022250918843746, "loss": 0.5066, "step": 150950 }, { "epoch": 7.497764974669713, "grad_norm": 0.1708984375, "learning_rate": 0.00020018277540478793, "loss": 0.4895, "step": 150960 }, { "epoch": 7.498261646965332, "grad_norm": 0.1435546875, "learning_rate": 0.00020014304162113838, "loss": 0.4812, "step": 150970 }, { "epoch": 7.4987583192609515, "grad_norm": 0.1416015625, "learning_rate": 0.00020010330783748885, "loss": 0.4761, "step": 150980 }, { "epoch": 7.499254991556571, "grad_norm": 0.150390625, "learning_rate": 0.0002000635740538393, "loss": 0.5024, "step": 150990 }, { "epoch": 7.49975166385219, "grad_norm": 0.1513671875, "learning_rate": 0.0002000238402701897, "loss": 0.4823, "step": 151000 }, { "epoch": 7.50024833614781, "grad_norm": 0.150390625, "learning_rate": 0.0001999841064865402, "loss": 0.4834, "step": 151010 }, { "epoch": 7.500745008443429, "grad_norm": 0.150390625, "learning_rate": 0.00019994437270289063, "loss": 0.4755, "step": 151020 }, { "epoch": 7.5012416807390485, "grad_norm": 0.1611328125, "learning_rate": 0.0001999046389192411, "loss": 0.4558, "step": 151030 }, { "epoch": 7.501738353034668, "grad_norm": 0.150390625, "learning_rate": 0.00019986490513559154, "loss": 0.5269, "step": 151040 }, { "epoch": 7.502235025330287, "grad_norm": 0.1884765625, "learning_rate": 0.000199825171351942, "loss": 0.5016, "step": 151050 }, { "epoch": 7.502731697625906, "grad_norm": 0.1796875, "learning_rate": 0.00019978543756829246, "loss": 0.4781, "step": 151060 }, { "epoch": 7.503228369921525, "grad_norm": 0.1591796875, "learning_rate": 0.0001997457037846429, "loss": 0.5139, "step": 151070 }, { "epoch": 7.503725042217145, "grad_norm": 0.15625, "learning_rate": 0.00019970597000099335, "loss": 0.4934, "step": 151080 }, { "epoch": 7.504221714512765, "grad_norm": 0.1357421875, "learning_rate": 0.00019966623621734382, "loss": 0.5015, "step": 151090 }, { "epoch": 7.504718386808384, "grad_norm": 0.1572265625, "learning_rate": 0.00019962650243369426, "loss": 0.4734, "step": 151100 }, { "epoch": 7.505215059104003, "grad_norm": 0.146484375, "learning_rate": 0.0001995867686500447, "loss": 0.4978, "step": 151110 }, { "epoch": 7.505711731399622, "grad_norm": 0.138671875, "learning_rate": 0.00019954703486639518, "loss": 0.5078, "step": 151120 }, { "epoch": 7.506208403695242, "grad_norm": 0.158203125, "learning_rate": 0.00019950730108274562, "loss": 0.5149, "step": 151130 }, { "epoch": 7.506705075990861, "grad_norm": 0.1455078125, "learning_rate": 0.00019946756729909607, "loss": 0.4712, "step": 151140 }, { "epoch": 7.50720174828648, "grad_norm": 0.1484375, "learning_rate": 0.0001994278335154465, "loss": 0.4818, "step": 151150 }, { "epoch": 7.5076984205821, "grad_norm": 0.171875, "learning_rate": 0.00019938809973179698, "loss": 0.5044, "step": 151160 }, { "epoch": 7.5081950928777195, "grad_norm": 0.1435546875, "learning_rate": 0.00019934836594814743, "loss": 0.4519, "step": 151170 }, { "epoch": 7.508691765173339, "grad_norm": 0.1484375, "learning_rate": 0.00019930863216449787, "loss": 0.5022, "step": 151180 }, { "epoch": 7.509188437468958, "grad_norm": 0.16796875, "learning_rate": 0.00019926889838084831, "loss": 0.4742, "step": 151190 }, { "epoch": 7.509685109764577, "grad_norm": 0.15234375, "learning_rate": 0.00019922916459719879, "loss": 0.5147, "step": 151200 }, { "epoch": 7.510181782060196, "grad_norm": 0.1416015625, "learning_rate": 0.00019918943081354923, "loss": 0.5044, "step": 151210 }, { "epoch": 7.510678454355816, "grad_norm": 0.1474609375, "learning_rate": 0.00019914969702989967, "loss": 0.5168, "step": 151220 }, { "epoch": 7.511175126651436, "grad_norm": 0.162109375, "learning_rate": 0.00019910996324625012, "loss": 0.4839, "step": 151230 }, { "epoch": 7.511671798947055, "grad_norm": 0.1572265625, "learning_rate": 0.0001990702294626006, "loss": 0.4637, "step": 151240 }, { "epoch": 7.512168471242674, "grad_norm": 0.1552734375, "learning_rate": 0.00019903049567895103, "loss": 0.4912, "step": 151250 }, { "epoch": 7.512665143538293, "grad_norm": 0.140625, "learning_rate": 0.00019899076189530148, "loss": 0.4905, "step": 151260 }, { "epoch": 7.513161815833913, "grad_norm": 0.1474609375, "learning_rate": 0.00019895102811165195, "loss": 0.4811, "step": 151270 }, { "epoch": 7.513658488129532, "grad_norm": 0.1484375, "learning_rate": 0.0001989112943280024, "loss": 0.4526, "step": 151280 }, { "epoch": 7.514155160425151, "grad_norm": 0.1611328125, "learning_rate": 0.00019887156054435286, "loss": 0.4655, "step": 151290 }, { "epoch": 7.514651832720771, "grad_norm": 0.1669921875, "learning_rate": 0.00019883182676070328, "loss": 0.4852, "step": 151300 }, { "epoch": 7.51514850501639, "grad_norm": 0.1474609375, "learning_rate": 0.00019879209297705375, "loss": 0.4661, "step": 151310 }, { "epoch": 7.51564517731201, "grad_norm": 0.1474609375, "learning_rate": 0.0001987523591934042, "loss": 0.4959, "step": 151320 }, { "epoch": 7.516141849607629, "grad_norm": 0.140625, "learning_rate": 0.00019871262540975467, "loss": 0.4951, "step": 151330 }, { "epoch": 7.516638521903248, "grad_norm": 0.1474609375, "learning_rate": 0.00019867289162610509, "loss": 0.4933, "step": 151340 }, { "epoch": 7.517135194198867, "grad_norm": 0.1416015625, "learning_rate": 0.00019863315784245556, "loss": 0.4918, "step": 151350 }, { "epoch": 7.517631866494487, "grad_norm": 0.1513671875, "learning_rate": 0.000198593424058806, "loss": 0.4697, "step": 151360 }, { "epoch": 7.518128538790107, "grad_norm": 0.14453125, "learning_rate": 0.00019855369027515647, "loss": 0.4969, "step": 151370 }, { "epoch": 7.518625211085726, "grad_norm": 0.142578125, "learning_rate": 0.0001985139564915069, "loss": 0.5117, "step": 151380 }, { "epoch": 7.519121883381345, "grad_norm": 0.150390625, "learning_rate": 0.00019847422270785736, "loss": 0.5019, "step": 151390 }, { "epoch": 7.519618555676964, "grad_norm": 0.1513671875, "learning_rate": 0.0001984344889242078, "loss": 0.5078, "step": 151400 }, { "epoch": 7.520115227972584, "grad_norm": 0.150390625, "learning_rate": 0.00019839475514055828, "loss": 0.4668, "step": 151410 }, { "epoch": 7.520611900268203, "grad_norm": 0.1513671875, "learning_rate": 0.00019835502135690872, "loss": 0.4808, "step": 151420 }, { "epoch": 7.521108572563822, "grad_norm": 0.1767578125, "learning_rate": 0.00019831528757325917, "loss": 0.4847, "step": 151430 }, { "epoch": 7.521605244859442, "grad_norm": 0.173828125, "learning_rate": 0.00019827555378960964, "loss": 0.4861, "step": 151440 }, { "epoch": 7.522101917155061, "grad_norm": 0.1767578125, "learning_rate": 0.00019823582000596008, "loss": 0.4836, "step": 151450 }, { "epoch": 7.522598589450681, "grad_norm": 0.1357421875, "learning_rate": 0.00019819608622231055, "loss": 0.4662, "step": 151460 }, { "epoch": 7.5230952617463, "grad_norm": 0.162109375, "learning_rate": 0.00019815635243866097, "loss": 0.5157, "step": 151470 }, { "epoch": 7.523591934041919, "grad_norm": 0.16015625, "learning_rate": 0.00019811661865501144, "loss": 0.4875, "step": 151480 }, { "epoch": 7.524088606337538, "grad_norm": 0.1396484375, "learning_rate": 0.00019807688487136189, "loss": 0.4465, "step": 151490 }, { "epoch": 7.5245852786331575, "grad_norm": 0.150390625, "learning_rate": 0.00019803715108771236, "loss": 0.4683, "step": 151500 }, { "epoch": 7.525081950928778, "grad_norm": 0.1376953125, "learning_rate": 0.00019799741730406277, "loss": 0.4887, "step": 151510 }, { "epoch": 7.525578623224397, "grad_norm": 0.130859375, "learning_rate": 0.00019795768352041325, "loss": 0.5133, "step": 151520 }, { "epoch": 7.526075295520016, "grad_norm": 0.1494140625, "learning_rate": 0.0001979179497367637, "loss": 0.5277, "step": 151530 }, { "epoch": 7.526571967815635, "grad_norm": 0.142578125, "learning_rate": 0.00019787821595311416, "loss": 0.4574, "step": 151540 }, { "epoch": 7.5270686401112545, "grad_norm": 0.14453125, "learning_rate": 0.00019783848216946458, "loss": 0.5024, "step": 151550 }, { "epoch": 7.527565312406874, "grad_norm": 0.1572265625, "learning_rate": 0.00019779874838581505, "loss": 0.4973, "step": 151560 }, { "epoch": 7.528061984702493, "grad_norm": 0.140625, "learning_rate": 0.0001977590146021655, "loss": 0.496, "step": 151570 }, { "epoch": 7.528558656998113, "grad_norm": 0.1552734375, "learning_rate": 0.00019771928081851596, "loss": 0.4705, "step": 151580 }, { "epoch": 7.529055329293732, "grad_norm": 0.1572265625, "learning_rate": 0.0001976795470348664, "loss": 0.4755, "step": 151590 }, { "epoch": 7.529552001589352, "grad_norm": 0.1630859375, "learning_rate": 0.00019763981325121685, "loss": 0.4684, "step": 151600 }, { "epoch": 7.530048673884971, "grad_norm": 0.1650390625, "learning_rate": 0.00019760007946756732, "loss": 0.4966, "step": 151610 }, { "epoch": 7.53054534618059, "grad_norm": 0.16796875, "learning_rate": 0.00019756034568391777, "loss": 0.5127, "step": 151620 }, { "epoch": 7.531042018476209, "grad_norm": 0.1650390625, "learning_rate": 0.0001975206119002682, "loss": 0.4807, "step": 151630 }, { "epoch": 7.5315386907718285, "grad_norm": 0.146484375, "learning_rate": 0.00019748087811661866, "loss": 0.4676, "step": 151640 }, { "epoch": 7.532035363067449, "grad_norm": 0.15234375, "learning_rate": 0.00019744114433296913, "loss": 0.4945, "step": 151650 }, { "epoch": 7.532532035363068, "grad_norm": 0.1435546875, "learning_rate": 0.00019740141054931957, "loss": 0.4862, "step": 151660 }, { "epoch": 7.533028707658687, "grad_norm": 0.1630859375, "learning_rate": 0.00019736167676567002, "loss": 0.4681, "step": 151670 }, { "epoch": 7.533525379954306, "grad_norm": 0.15234375, "learning_rate": 0.00019732194298202046, "loss": 0.4649, "step": 151680 }, { "epoch": 7.5340220522499255, "grad_norm": 0.1650390625, "learning_rate": 0.00019728220919837093, "loss": 0.5175, "step": 151690 }, { "epoch": 7.534518724545545, "grad_norm": 0.1728515625, "learning_rate": 0.00019724247541472138, "loss": 0.4863, "step": 151700 }, { "epoch": 7.535015396841164, "grad_norm": 0.1494140625, "learning_rate": 0.00019720274163107182, "loss": 0.4879, "step": 151710 }, { "epoch": 7.535512069136783, "grad_norm": 0.158203125, "learning_rate": 0.00019716300784742227, "loss": 0.4997, "step": 151720 }, { "epoch": 7.536008741432402, "grad_norm": 0.1552734375, "learning_rate": 0.00019712327406377274, "loss": 0.4874, "step": 151730 }, { "epoch": 7.5365054137280225, "grad_norm": 0.1494140625, "learning_rate": 0.00019708354028012318, "loss": 0.4761, "step": 151740 }, { "epoch": 7.537002086023642, "grad_norm": 0.224609375, "learning_rate": 0.00019704380649647363, "loss": 0.4938, "step": 151750 }, { "epoch": 7.537498758319261, "grad_norm": 0.19140625, "learning_rate": 0.0001970040727128241, "loss": 0.4834, "step": 151760 }, { "epoch": 7.53799543061488, "grad_norm": 0.1435546875, "learning_rate": 0.00019696433892917454, "loss": 0.4996, "step": 151770 }, { "epoch": 7.538492102910499, "grad_norm": 0.1484375, "learning_rate": 0.00019692460514552499, "loss": 0.4935, "step": 151780 }, { "epoch": 7.538988775206119, "grad_norm": 0.1484375, "learning_rate": 0.00019688487136187543, "loss": 0.4975, "step": 151790 }, { "epoch": 7.539485447501738, "grad_norm": 0.150390625, "learning_rate": 0.0001968451375782259, "loss": 0.4957, "step": 151800 }, { "epoch": 7.539982119797358, "grad_norm": 0.1533203125, "learning_rate": 0.00019680540379457635, "loss": 0.484, "step": 151810 }, { "epoch": 7.540478792092977, "grad_norm": 0.1337890625, "learning_rate": 0.00019676567001092682, "loss": 0.5024, "step": 151820 }, { "epoch": 7.5409754643885964, "grad_norm": 0.17578125, "learning_rate": 0.00019672593622727723, "loss": 0.4973, "step": 151830 }, { "epoch": 7.541472136684216, "grad_norm": 0.16796875, "learning_rate": 0.0001966862024436277, "loss": 0.5094, "step": 151840 }, { "epoch": 7.541968808979835, "grad_norm": 0.1318359375, "learning_rate": 0.00019664646865997815, "loss": 0.4825, "step": 151850 }, { "epoch": 7.542465481275454, "grad_norm": 0.177734375, "learning_rate": 0.00019660673487632862, "loss": 0.4917, "step": 151860 }, { "epoch": 7.542962153571073, "grad_norm": 0.15234375, "learning_rate": 0.00019656700109267907, "loss": 0.4823, "step": 151870 }, { "epoch": 7.5434588258666935, "grad_norm": 0.1328125, "learning_rate": 0.0001965272673090295, "loss": 0.4685, "step": 151880 }, { "epoch": 7.543955498162313, "grad_norm": 0.14453125, "learning_rate": 0.00019648753352537995, "loss": 0.4929, "step": 151890 }, { "epoch": 7.544452170457932, "grad_norm": 0.1552734375, "learning_rate": 0.00019644779974173042, "loss": 0.498, "step": 151900 }, { "epoch": 7.544948842753551, "grad_norm": 0.21484375, "learning_rate": 0.00019640806595808087, "loss": 0.5111, "step": 151910 }, { "epoch": 7.54544551504917, "grad_norm": 0.16796875, "learning_rate": 0.0001963683321744313, "loss": 0.4959, "step": 151920 }, { "epoch": 7.54594218734479, "grad_norm": 0.1494140625, "learning_rate": 0.00019632859839078176, "loss": 0.4935, "step": 151930 }, { "epoch": 7.546438859640409, "grad_norm": 0.158203125, "learning_rate": 0.00019628886460713223, "loss": 0.5074, "step": 151940 }, { "epoch": 7.546935531936029, "grad_norm": 0.142578125, "learning_rate": 0.00019624913082348267, "loss": 0.4886, "step": 151950 }, { "epoch": 7.547432204231648, "grad_norm": 0.1708984375, "learning_rate": 0.00019620939703983312, "loss": 0.4911, "step": 151960 }, { "epoch": 7.547928876527267, "grad_norm": 0.166015625, "learning_rate": 0.0001961696632561836, "loss": 0.4812, "step": 151970 }, { "epoch": 7.548425548822887, "grad_norm": 0.14453125, "learning_rate": 0.00019612992947253403, "loss": 0.498, "step": 151980 }, { "epoch": 7.548922221118506, "grad_norm": 0.1611328125, "learning_rate": 0.0001960901956888845, "loss": 0.4915, "step": 151990 }, { "epoch": 7.549418893414125, "grad_norm": 0.134765625, "learning_rate": 0.00019605046190523492, "loss": 0.5126, "step": 152000 }, { "epoch": 7.549915565709744, "grad_norm": 0.1484375, "learning_rate": 0.0001960107281215854, "loss": 0.4934, "step": 152010 }, { "epoch": 7.550412238005364, "grad_norm": 0.1474609375, "learning_rate": 0.00019597099433793584, "loss": 0.5036, "step": 152020 }, { "epoch": 7.550908910300984, "grad_norm": 0.1513671875, "learning_rate": 0.0001959312605542863, "loss": 0.5045, "step": 152030 }, { "epoch": 7.551405582596603, "grad_norm": 0.1591796875, "learning_rate": 0.00019589152677063673, "loss": 0.5075, "step": 152040 }, { "epoch": 7.551902254892222, "grad_norm": 0.21875, "learning_rate": 0.0001958517929869872, "loss": 0.4903, "step": 152050 }, { "epoch": 7.552398927187841, "grad_norm": 0.1787109375, "learning_rate": 0.00019581205920333764, "loss": 0.5088, "step": 152060 }, { "epoch": 7.552895599483461, "grad_norm": 0.1513671875, "learning_rate": 0.0001957723254196881, "loss": 0.4865, "step": 152070 }, { "epoch": 7.55339227177908, "grad_norm": 0.17578125, "learning_rate": 0.00019573259163603853, "loss": 0.4959, "step": 152080 }, { "epoch": 7.5538889440747, "grad_norm": 0.1318359375, "learning_rate": 0.000195692857852389, "loss": 0.4927, "step": 152090 }, { "epoch": 7.554385616370319, "grad_norm": 0.15625, "learning_rate": 0.00019565312406873945, "loss": 0.4903, "step": 152100 }, { "epoch": 7.554882288665938, "grad_norm": 0.1572265625, "learning_rate": 0.00019561339028508992, "loss": 0.4768, "step": 152110 }, { "epoch": 7.555378960961558, "grad_norm": 0.1396484375, "learning_rate": 0.00019557365650144036, "loss": 0.466, "step": 152120 }, { "epoch": 7.555875633257177, "grad_norm": 0.158203125, "learning_rate": 0.0001955339227177908, "loss": 0.4701, "step": 152130 }, { "epoch": 7.556372305552796, "grad_norm": 0.1591796875, "learning_rate": 0.00019549418893414128, "loss": 0.4693, "step": 152140 }, { "epoch": 7.556868977848415, "grad_norm": 0.1611328125, "learning_rate": 0.00019545445515049172, "loss": 0.5133, "step": 152150 }, { "epoch": 7.557365650144035, "grad_norm": 0.14453125, "learning_rate": 0.00019541472136684217, "loss": 0.4872, "step": 152160 }, { "epoch": 7.557862322439655, "grad_norm": 0.1748046875, "learning_rate": 0.0001953749875831926, "loss": 0.4954, "step": 152170 }, { "epoch": 7.558358994735274, "grad_norm": 0.15234375, "learning_rate": 0.00019533525379954308, "loss": 0.486, "step": 152180 }, { "epoch": 7.558855667030893, "grad_norm": 0.1396484375, "learning_rate": 0.00019529552001589353, "loss": 0.5108, "step": 152190 }, { "epoch": 7.559352339326512, "grad_norm": 0.1396484375, "learning_rate": 0.00019525578623224397, "loss": 0.4682, "step": 152200 }, { "epoch": 7.5598490116221315, "grad_norm": 0.1357421875, "learning_rate": 0.0001952160524485944, "loss": 0.4867, "step": 152210 }, { "epoch": 7.560345683917751, "grad_norm": 0.1279296875, "learning_rate": 0.00019517631866494488, "loss": 0.4885, "step": 152220 }, { "epoch": 7.560842356213371, "grad_norm": 0.146484375, "learning_rate": 0.00019513658488129533, "loss": 0.5012, "step": 152230 }, { "epoch": 7.56133902850899, "grad_norm": 0.1435546875, "learning_rate": 0.00019509685109764577, "loss": 0.4948, "step": 152240 }, { "epoch": 7.561835700804609, "grad_norm": 0.166015625, "learning_rate": 0.00019505711731399622, "loss": 0.5236, "step": 152250 }, { "epoch": 7.5623323731002285, "grad_norm": 0.1416015625, "learning_rate": 0.0001950173835303467, "loss": 0.4801, "step": 152260 }, { "epoch": 7.562829045395848, "grad_norm": 0.1611328125, "learning_rate": 0.00019497764974669713, "loss": 0.5022, "step": 152270 }, { "epoch": 7.563325717691467, "grad_norm": 0.1396484375, "learning_rate": 0.0001949379159630476, "loss": 0.4811, "step": 152280 }, { "epoch": 7.563822389987086, "grad_norm": 0.1630859375, "learning_rate": 0.00019489818217939805, "loss": 0.5052, "step": 152290 }, { "epoch": 7.564319062282706, "grad_norm": 0.15625, "learning_rate": 0.0001948584483957485, "loss": 0.5147, "step": 152300 }, { "epoch": 7.564815734578326, "grad_norm": 0.150390625, "learning_rate": 0.00019481871461209896, "loss": 0.4936, "step": 152310 }, { "epoch": 7.565312406873945, "grad_norm": 0.1494140625, "learning_rate": 0.0001947789808284494, "loss": 0.4627, "step": 152320 }, { "epoch": 7.565809079169564, "grad_norm": 0.1689453125, "learning_rate": 0.00019473924704479985, "loss": 0.4492, "step": 152330 }, { "epoch": 7.566305751465183, "grad_norm": 0.162109375, "learning_rate": 0.0001946995132611503, "loss": 0.4764, "step": 152340 }, { "epoch": 7.5668024237608025, "grad_norm": 0.1689453125, "learning_rate": 0.00019465977947750077, "loss": 0.5023, "step": 152350 }, { "epoch": 7.567299096056422, "grad_norm": 0.14453125, "learning_rate": 0.0001946200456938512, "loss": 0.5026, "step": 152360 }, { "epoch": 7.567795768352042, "grad_norm": 0.19140625, "learning_rate": 0.00019458031191020166, "loss": 0.4972, "step": 152370 }, { "epoch": 7.568292440647661, "grad_norm": 0.1376953125, "learning_rate": 0.0001945405781265521, "loss": 0.4831, "step": 152380 }, { "epoch": 7.56878911294328, "grad_norm": 0.158203125, "learning_rate": 0.00019450084434290257, "loss": 0.4975, "step": 152390 }, { "epoch": 7.5692857852388995, "grad_norm": 0.140625, "learning_rate": 0.00019446111055925302, "loss": 0.4622, "step": 152400 }, { "epoch": 7.569782457534519, "grad_norm": 0.1474609375, "learning_rate": 0.00019442137677560346, "loss": 0.5062, "step": 152410 }, { "epoch": 7.570279129830138, "grad_norm": 0.1669921875, "learning_rate": 0.0001943816429919539, "loss": 0.4771, "step": 152420 }, { "epoch": 7.570775802125757, "grad_norm": 0.1611328125, "learning_rate": 0.00019434190920830438, "loss": 0.4967, "step": 152430 }, { "epoch": 7.571272474421376, "grad_norm": 0.13671875, "learning_rate": 0.00019430217542465482, "loss": 0.492, "step": 152440 }, { "epoch": 7.571769146716996, "grad_norm": 0.1337890625, "learning_rate": 0.00019426244164100527, "loss": 0.5033, "step": 152450 }, { "epoch": 7.572265819012616, "grad_norm": 0.1455078125, "learning_rate": 0.00019422270785735574, "loss": 0.4958, "step": 152460 }, { "epoch": 7.572762491308235, "grad_norm": 0.1474609375, "learning_rate": 0.00019418297407370618, "loss": 0.4741, "step": 152470 }, { "epoch": 7.573259163603854, "grad_norm": 0.1630859375, "learning_rate": 0.00019414324029005663, "loss": 0.4976, "step": 152480 }, { "epoch": 7.573755835899473, "grad_norm": 0.169921875, "learning_rate": 0.00019410350650640707, "loss": 0.4931, "step": 152490 }, { "epoch": 7.574252508195093, "grad_norm": 0.140625, "learning_rate": 0.00019406377272275754, "loss": 0.487, "step": 152500 }, { "epoch": 7.574749180490712, "grad_norm": 0.15625, "learning_rate": 0.00019402403893910798, "loss": 0.4996, "step": 152510 }, { "epoch": 7.575245852786331, "grad_norm": 0.1669921875, "learning_rate": 0.00019398430515545846, "loss": 0.4514, "step": 152520 }, { "epoch": 7.575742525081951, "grad_norm": 0.1591796875, "learning_rate": 0.00019394457137180887, "loss": 0.4785, "step": 152530 }, { "epoch": 7.5762391973775705, "grad_norm": 0.1943359375, "learning_rate": 0.00019390483758815934, "loss": 0.499, "step": 152540 }, { "epoch": 7.57673586967319, "grad_norm": 0.1396484375, "learning_rate": 0.0001938651038045098, "loss": 0.4918, "step": 152550 }, { "epoch": 7.577232541968809, "grad_norm": 0.1513671875, "learning_rate": 0.00019382537002086026, "loss": 0.47, "step": 152560 }, { "epoch": 7.577729214264428, "grad_norm": 0.162109375, "learning_rate": 0.00019378563623721068, "loss": 0.4788, "step": 152570 }, { "epoch": 7.578225886560047, "grad_norm": 0.1611328125, "learning_rate": 0.00019374590245356115, "loss": 0.4927, "step": 152580 }, { "epoch": 7.578722558855667, "grad_norm": 0.166015625, "learning_rate": 0.0001937061686699116, "loss": 0.4955, "step": 152590 }, { "epoch": 7.579219231151287, "grad_norm": 0.14453125, "learning_rate": 0.00019366643488626206, "loss": 0.4595, "step": 152600 }, { "epoch": 7.579715903446906, "grad_norm": 0.1494140625, "learning_rate": 0.0001936267011026125, "loss": 0.482, "step": 152610 }, { "epoch": 7.580212575742525, "grad_norm": 0.1552734375, "learning_rate": 0.00019358696731896295, "loss": 0.4559, "step": 152620 }, { "epoch": 7.580709248038144, "grad_norm": 0.1640625, "learning_rate": 0.0001935472335353134, "loss": 0.4868, "step": 152630 }, { "epoch": 7.581205920333764, "grad_norm": 0.1611328125, "learning_rate": 0.00019350749975166387, "loss": 0.498, "step": 152640 }, { "epoch": 7.581702592629383, "grad_norm": 0.1796875, "learning_rate": 0.0001934677659680143, "loss": 0.5297, "step": 152650 }, { "epoch": 7.582199264925002, "grad_norm": 0.1533203125, "learning_rate": 0.00019342803218436476, "loss": 0.4542, "step": 152660 }, { "epoch": 7.582695937220622, "grad_norm": 0.1533203125, "learning_rate": 0.00019338829840071523, "loss": 0.4785, "step": 152670 }, { "epoch": 7.583192609516241, "grad_norm": 0.1572265625, "learning_rate": 0.00019334856461706567, "loss": 0.4759, "step": 152680 }, { "epoch": 7.583689281811861, "grad_norm": 0.1513671875, "learning_rate": 0.00019330883083341612, "loss": 0.5101, "step": 152690 }, { "epoch": 7.58418595410748, "grad_norm": 0.1591796875, "learning_rate": 0.00019326909704976656, "loss": 0.4911, "step": 152700 }, { "epoch": 7.584682626403099, "grad_norm": 0.1416015625, "learning_rate": 0.00019322936326611703, "loss": 0.4462, "step": 152710 }, { "epoch": 7.585179298698718, "grad_norm": 0.171875, "learning_rate": 0.00019318962948246748, "loss": 0.4964, "step": 152720 }, { "epoch": 7.5856759709943375, "grad_norm": 0.18359375, "learning_rate": 0.00019314989569881795, "loss": 0.4997, "step": 152730 }, { "epoch": 7.586172643289958, "grad_norm": 0.158203125, "learning_rate": 0.00019311016191516837, "loss": 0.4711, "step": 152740 }, { "epoch": 7.586669315585577, "grad_norm": 0.169921875, "learning_rate": 0.00019307042813151884, "loss": 0.4773, "step": 152750 }, { "epoch": 7.587165987881196, "grad_norm": 0.1416015625, "learning_rate": 0.00019303069434786928, "loss": 0.4628, "step": 152760 }, { "epoch": 7.587662660176815, "grad_norm": 0.1494140625, "learning_rate": 0.00019299096056421975, "loss": 0.4686, "step": 152770 }, { "epoch": 7.588159332472435, "grad_norm": 0.1728515625, "learning_rate": 0.00019295122678057017, "loss": 0.4697, "step": 152780 }, { "epoch": 7.588656004768054, "grad_norm": 0.1357421875, "learning_rate": 0.00019291149299692064, "loss": 0.4738, "step": 152790 }, { "epoch": 7.589152677063673, "grad_norm": 0.1455078125, "learning_rate": 0.00019287175921327109, "loss": 0.4983, "step": 152800 }, { "epoch": 7.589649349359293, "grad_norm": 0.14453125, "learning_rate": 0.00019283202542962156, "loss": 0.4926, "step": 152810 }, { "epoch": 7.590146021654912, "grad_norm": 0.15234375, "learning_rate": 0.000192792291645972, "loss": 0.4742, "step": 152820 }, { "epoch": 7.590642693950532, "grad_norm": 0.154296875, "learning_rate": 0.00019275255786232244, "loss": 0.4904, "step": 152830 }, { "epoch": 7.591139366246151, "grad_norm": 0.1513671875, "learning_rate": 0.00019271282407867292, "loss": 0.4825, "step": 152840 }, { "epoch": 7.59163603854177, "grad_norm": 0.1474609375, "learning_rate": 0.00019267309029502336, "loss": 0.4604, "step": 152850 }, { "epoch": 7.592132710837389, "grad_norm": 0.1484375, "learning_rate": 0.0001926333565113738, "loss": 0.4852, "step": 152860 }, { "epoch": 7.5926293831330085, "grad_norm": 0.1640625, "learning_rate": 0.00019259362272772425, "loss": 0.4775, "step": 152870 }, { "epoch": 7.593126055428629, "grad_norm": 0.177734375, "learning_rate": 0.00019255388894407472, "loss": 0.4789, "step": 152880 }, { "epoch": 7.593622727724248, "grad_norm": 0.1552734375, "learning_rate": 0.00019251415516042516, "loss": 0.4874, "step": 152890 }, { "epoch": 7.594119400019867, "grad_norm": 0.134765625, "learning_rate": 0.0001924744213767756, "loss": 0.4828, "step": 152900 }, { "epoch": 7.594616072315486, "grad_norm": 0.1689453125, "learning_rate": 0.00019243468759312605, "loss": 0.4898, "step": 152910 }, { "epoch": 7.5951127446111055, "grad_norm": 0.1533203125, "learning_rate": 0.00019239495380947652, "loss": 0.4894, "step": 152920 }, { "epoch": 7.595609416906725, "grad_norm": 0.150390625, "learning_rate": 0.00019235522002582697, "loss": 0.5232, "step": 152930 }, { "epoch": 7.596106089202344, "grad_norm": 0.1552734375, "learning_rate": 0.0001923154862421774, "loss": 0.4549, "step": 152940 }, { "epoch": 7.596602761497964, "grad_norm": 0.1455078125, "learning_rate": 0.00019227575245852786, "loss": 0.469, "step": 152950 }, { "epoch": 7.597099433793583, "grad_norm": 0.1591796875, "learning_rate": 0.00019223601867487833, "loss": 0.517, "step": 152960 }, { "epoch": 7.5975961060892026, "grad_norm": 0.1474609375, "learning_rate": 0.00019219628489122877, "loss": 0.4966, "step": 152970 }, { "epoch": 7.598092778384822, "grad_norm": 0.1396484375, "learning_rate": 0.00019215655110757922, "loss": 0.4698, "step": 152980 }, { "epoch": 7.598589450680441, "grad_norm": 0.1513671875, "learning_rate": 0.0001921168173239297, "loss": 0.4923, "step": 152990 }, { "epoch": 7.59908612297606, "grad_norm": 0.154296875, "learning_rate": 0.00019207708354028013, "loss": 0.4954, "step": 153000 }, { "epoch": 7.5995827952716795, "grad_norm": 0.138671875, "learning_rate": 0.0001920373497566306, "loss": 0.5022, "step": 153010 }, { "epoch": 7.6000794675673, "grad_norm": 0.1474609375, "learning_rate": 0.00019199761597298102, "loss": 0.4913, "step": 153020 }, { "epoch": 7.600576139862919, "grad_norm": 0.1689453125, "learning_rate": 0.0001919578821893315, "loss": 0.5118, "step": 153030 }, { "epoch": 7.601072812158538, "grad_norm": 0.1484375, "learning_rate": 0.00019191814840568194, "loss": 0.4902, "step": 153040 }, { "epoch": 7.601569484454157, "grad_norm": 0.1611328125, "learning_rate": 0.0001918784146220324, "loss": 0.4935, "step": 153050 }, { "epoch": 7.6020661567497765, "grad_norm": 0.14453125, "learning_rate": 0.00019183868083838283, "loss": 0.498, "step": 153060 }, { "epoch": 7.602562829045396, "grad_norm": 0.14453125, "learning_rate": 0.0001917989470547333, "loss": 0.4913, "step": 153070 }, { "epoch": 7.603059501341015, "grad_norm": 0.1552734375, "learning_rate": 0.00019175921327108374, "loss": 0.477, "step": 153080 }, { "epoch": 7.603556173636634, "grad_norm": 0.16796875, "learning_rate": 0.0001917194794874342, "loss": 0.4654, "step": 153090 }, { "epoch": 7.604052845932254, "grad_norm": 0.142578125, "learning_rate": 0.00019167974570378463, "loss": 0.4991, "step": 153100 }, { "epoch": 7.6045495182278735, "grad_norm": 0.146484375, "learning_rate": 0.0001916400119201351, "loss": 0.4697, "step": 153110 }, { "epoch": 7.605046190523493, "grad_norm": 0.154296875, "learning_rate": 0.00019160027813648555, "loss": 0.4818, "step": 153120 }, { "epoch": 7.605542862819112, "grad_norm": 0.154296875, "learning_rate": 0.00019156054435283602, "loss": 0.4834, "step": 153130 }, { "epoch": 7.606039535114731, "grad_norm": 0.1748046875, "learning_rate": 0.00019152081056918646, "loss": 0.4731, "step": 153140 }, { "epoch": 7.60653620741035, "grad_norm": 0.1298828125, "learning_rate": 0.0001914810767855369, "loss": 0.4613, "step": 153150 }, { "epoch": 7.60703287970597, "grad_norm": 0.1435546875, "learning_rate": 0.00019144134300188738, "loss": 0.5042, "step": 153160 }, { "epoch": 7.607529552001589, "grad_norm": 0.2001953125, "learning_rate": 0.00019140160921823782, "loss": 0.5001, "step": 153170 }, { "epoch": 7.608026224297209, "grad_norm": 0.1474609375, "learning_rate": 0.00019136187543458826, "loss": 0.484, "step": 153180 }, { "epoch": 7.608522896592828, "grad_norm": 0.1513671875, "learning_rate": 0.0001913221416509387, "loss": 0.4745, "step": 153190 }, { "epoch": 7.609019568888447, "grad_norm": 0.1435546875, "learning_rate": 0.00019128240786728918, "loss": 0.4809, "step": 153200 }, { "epoch": 7.609516241184067, "grad_norm": 0.16796875, "learning_rate": 0.00019124267408363962, "loss": 0.4721, "step": 153210 }, { "epoch": 7.610012913479686, "grad_norm": 0.140625, "learning_rate": 0.0001912029402999901, "loss": 0.4792, "step": 153220 }, { "epoch": 7.610509585775305, "grad_norm": 0.146484375, "learning_rate": 0.0001911632065163405, "loss": 0.4953, "step": 153230 }, { "epoch": 7.611006258070924, "grad_norm": 0.1357421875, "learning_rate": 0.00019112347273269098, "loss": 0.4718, "step": 153240 }, { "epoch": 7.6115029303665445, "grad_norm": 0.15625, "learning_rate": 0.00019108373894904143, "loss": 0.5329, "step": 153250 }, { "epoch": 7.611999602662164, "grad_norm": 0.1689453125, "learning_rate": 0.0001910440051653919, "loss": 0.5006, "step": 153260 }, { "epoch": 7.612496274957783, "grad_norm": 0.158203125, "learning_rate": 0.00019100427138174232, "loss": 0.4735, "step": 153270 }, { "epoch": 7.612992947253402, "grad_norm": 0.14453125, "learning_rate": 0.0001909645375980928, "loss": 0.5047, "step": 153280 }, { "epoch": 7.613489619549021, "grad_norm": 0.1591796875, "learning_rate": 0.00019092480381444323, "loss": 0.5039, "step": 153290 }, { "epoch": 7.613986291844641, "grad_norm": 0.1376953125, "learning_rate": 0.0001908850700307937, "loss": 0.4499, "step": 153300 }, { "epoch": 7.61448296414026, "grad_norm": 0.1416015625, "learning_rate": 0.00019084533624714415, "loss": 0.5066, "step": 153310 }, { "epoch": 7.61497963643588, "grad_norm": 0.1494140625, "learning_rate": 0.0001908056024634946, "loss": 0.4999, "step": 153320 }, { "epoch": 7.615476308731499, "grad_norm": 0.15234375, "learning_rate": 0.00019076586867984504, "loss": 0.5119, "step": 153330 }, { "epoch": 7.615972981027118, "grad_norm": 0.16796875, "learning_rate": 0.0001907261348961955, "loss": 0.489, "step": 153340 }, { "epoch": 7.616469653322738, "grad_norm": 0.146484375, "learning_rate": 0.00019068640111254595, "loss": 0.4963, "step": 153350 }, { "epoch": 7.616966325618357, "grad_norm": 0.162109375, "learning_rate": 0.0001906466673288964, "loss": 0.5032, "step": 153360 }, { "epoch": 7.617462997913976, "grad_norm": 0.1630859375, "learning_rate": 0.00019060693354524687, "loss": 0.4976, "step": 153370 }, { "epoch": 7.617959670209595, "grad_norm": 0.17578125, "learning_rate": 0.0001905671997615973, "loss": 0.4896, "step": 153380 }, { "epoch": 7.618456342505215, "grad_norm": 0.1640625, "learning_rate": 0.00019052746597794776, "loss": 0.5082, "step": 153390 }, { "epoch": 7.618953014800835, "grad_norm": 0.1689453125, "learning_rate": 0.0001904877321942982, "loss": 0.495, "step": 153400 }, { "epoch": 7.619449687096454, "grad_norm": 0.1748046875, "learning_rate": 0.00019044799841064867, "loss": 0.5106, "step": 153410 }, { "epoch": 7.619946359392073, "grad_norm": 0.14453125, "learning_rate": 0.00019040826462699912, "loss": 0.4946, "step": 153420 }, { "epoch": 7.620443031687692, "grad_norm": 0.1474609375, "learning_rate": 0.00019036853084334956, "loss": 0.4776, "step": 153430 }, { "epoch": 7.6209397039833116, "grad_norm": 0.1484375, "learning_rate": 0.0001903287970597, "loss": 0.5085, "step": 153440 }, { "epoch": 7.621436376278931, "grad_norm": 0.1474609375, "learning_rate": 0.00019028906327605048, "loss": 0.5323, "step": 153450 }, { "epoch": 7.621933048574551, "grad_norm": 0.158203125, "learning_rate": 0.00019024932949240092, "loss": 0.4859, "step": 153460 }, { "epoch": 7.62242972087017, "grad_norm": 0.1455078125, "learning_rate": 0.00019020959570875136, "loss": 0.5257, "step": 153470 }, { "epoch": 7.622926393165789, "grad_norm": 0.1953125, "learning_rate": 0.0001901698619251018, "loss": 0.4878, "step": 153480 }, { "epoch": 7.623423065461409, "grad_norm": 0.1494140625, "learning_rate": 0.00019013012814145228, "loss": 0.4682, "step": 153490 }, { "epoch": 7.623919737757028, "grad_norm": 0.1376953125, "learning_rate": 0.00019009039435780272, "loss": 0.4855, "step": 153500 }, { "epoch": 7.624416410052647, "grad_norm": 0.1533203125, "learning_rate": 0.00019005066057415317, "loss": 0.4881, "step": 153510 }, { "epoch": 7.624913082348266, "grad_norm": 0.1689453125, "learning_rate": 0.00019001092679050364, "loss": 0.4935, "step": 153520 }, { "epoch": 7.625409754643886, "grad_norm": 0.166015625, "learning_rate": 0.00018997119300685408, "loss": 0.4882, "step": 153530 }, { "epoch": 7.625906426939506, "grad_norm": 0.1533203125, "learning_rate": 0.00018993145922320456, "loss": 0.5026, "step": 153540 }, { "epoch": 7.626403099235125, "grad_norm": 0.1865234375, "learning_rate": 0.000189891725439555, "loss": 0.4697, "step": 153550 }, { "epoch": 7.626899771530744, "grad_norm": 0.1552734375, "learning_rate": 0.00018985199165590544, "loss": 0.493, "step": 153560 }, { "epoch": 7.627396443826363, "grad_norm": 0.1787109375, "learning_rate": 0.0001898122578722559, "loss": 0.5156, "step": 153570 }, { "epoch": 7.6278931161219825, "grad_norm": 0.1552734375, "learning_rate": 0.00018977252408860636, "loss": 0.5061, "step": 153580 }, { "epoch": 7.628389788417602, "grad_norm": 0.1640625, "learning_rate": 0.0001897327903049568, "loss": 0.4553, "step": 153590 }, { "epoch": 7.628886460713222, "grad_norm": 0.1396484375, "learning_rate": 0.00018969305652130725, "loss": 0.4785, "step": 153600 }, { "epoch": 7.629383133008841, "grad_norm": 0.154296875, "learning_rate": 0.0001896533227376577, "loss": 0.4759, "step": 153610 }, { "epoch": 7.62987980530446, "grad_norm": 0.1787109375, "learning_rate": 0.00018961358895400816, "loss": 0.4862, "step": 153620 }, { "epoch": 7.6303764776000795, "grad_norm": 0.1650390625, "learning_rate": 0.0001895738551703586, "loss": 0.5018, "step": 153630 }, { "epoch": 7.630873149895699, "grad_norm": 0.138671875, "learning_rate": 0.00018953412138670905, "loss": 0.484, "step": 153640 }, { "epoch": 7.631369822191318, "grad_norm": 0.1689453125, "learning_rate": 0.0001894943876030595, "loss": 0.5266, "step": 153650 }, { "epoch": 7.631866494486937, "grad_norm": 0.15234375, "learning_rate": 0.00018945465381940997, "loss": 0.4716, "step": 153660 }, { "epoch": 7.632363166782557, "grad_norm": 0.138671875, "learning_rate": 0.0001894149200357604, "loss": 0.4972, "step": 153670 }, { "epoch": 7.632859839078177, "grad_norm": 0.1357421875, "learning_rate": 0.00018937518625211086, "loss": 0.4969, "step": 153680 }, { "epoch": 7.633356511373796, "grad_norm": 0.1572265625, "learning_rate": 0.00018933545246846133, "loss": 0.5109, "step": 153690 }, { "epoch": 7.633853183669415, "grad_norm": 0.1611328125, "learning_rate": 0.00018929571868481177, "loss": 0.4701, "step": 153700 }, { "epoch": 7.634349855965034, "grad_norm": 0.1396484375, "learning_rate": 0.00018925598490116224, "loss": 0.4861, "step": 153710 }, { "epoch": 7.6348465282606535, "grad_norm": 0.1396484375, "learning_rate": 0.00018921625111751266, "loss": 0.508, "step": 153720 }, { "epoch": 7.635343200556273, "grad_norm": 0.1552734375, "learning_rate": 0.00018917651733386313, "loss": 0.4445, "step": 153730 }, { "epoch": 7.635839872851893, "grad_norm": 0.1572265625, "learning_rate": 0.00018913678355021358, "loss": 0.5065, "step": 153740 }, { "epoch": 7.636336545147512, "grad_norm": 0.1533203125, "learning_rate": 0.00018909704976656405, "loss": 0.5, "step": 153750 }, { "epoch": 7.636833217443131, "grad_norm": 0.1572265625, "learning_rate": 0.00018905731598291446, "loss": 0.4926, "step": 153760 }, { "epoch": 7.6373298897387505, "grad_norm": 0.1611328125, "learning_rate": 0.00018901758219926494, "loss": 0.501, "step": 153770 }, { "epoch": 7.63782656203437, "grad_norm": 0.1787109375, "learning_rate": 0.00018897784841561538, "loss": 0.4738, "step": 153780 }, { "epoch": 7.638323234329989, "grad_norm": 0.15625, "learning_rate": 0.00018893811463196585, "loss": 0.4654, "step": 153790 }, { "epoch": 7.638819906625608, "grad_norm": 0.1396484375, "learning_rate": 0.00018889838084831627, "loss": 0.4611, "step": 153800 }, { "epoch": 7.639316578921227, "grad_norm": 0.1875, "learning_rate": 0.00018885864706466674, "loss": 0.5073, "step": 153810 }, { "epoch": 7.6398132512168475, "grad_norm": 0.1435546875, "learning_rate": 0.00018881891328101718, "loss": 0.5262, "step": 153820 }, { "epoch": 7.640309923512467, "grad_norm": 0.1865234375, "learning_rate": 0.00018877917949736766, "loss": 0.4801, "step": 153830 }, { "epoch": 7.640806595808086, "grad_norm": 0.1533203125, "learning_rate": 0.0001887394457137181, "loss": 0.4817, "step": 153840 }, { "epoch": 7.641303268103705, "grad_norm": 0.1396484375, "learning_rate": 0.00018869971193006854, "loss": 0.4774, "step": 153850 }, { "epoch": 7.641799940399324, "grad_norm": 0.1650390625, "learning_rate": 0.00018865997814641902, "loss": 0.5228, "step": 153860 }, { "epoch": 7.642296612694944, "grad_norm": 0.1455078125, "learning_rate": 0.00018862024436276946, "loss": 0.4823, "step": 153870 }, { "epoch": 7.642793284990563, "grad_norm": 0.1552734375, "learning_rate": 0.0001885805105791199, "loss": 0.491, "step": 153880 }, { "epoch": 7.643289957286182, "grad_norm": 0.1455078125, "learning_rate": 0.00018854077679547035, "loss": 0.4891, "step": 153890 }, { "epoch": 7.643786629581802, "grad_norm": 0.1474609375, "learning_rate": 0.00018850104301182082, "loss": 0.4896, "step": 153900 }, { "epoch": 7.644283301877421, "grad_norm": 0.1796875, "learning_rate": 0.00018846130922817126, "loss": 0.4967, "step": 153910 }, { "epoch": 7.644779974173041, "grad_norm": 0.1484375, "learning_rate": 0.0001884215754445217, "loss": 0.4747, "step": 153920 }, { "epoch": 7.64527664646866, "grad_norm": 0.169921875, "learning_rate": 0.00018838184166087215, "loss": 0.4724, "step": 153930 }, { "epoch": 7.645773318764279, "grad_norm": 0.1689453125, "learning_rate": 0.00018834210787722262, "loss": 0.5126, "step": 153940 }, { "epoch": 7.646269991059898, "grad_norm": 0.1513671875, "learning_rate": 0.00018830237409357307, "loss": 0.4926, "step": 153950 }, { "epoch": 7.646766663355518, "grad_norm": 0.1591796875, "learning_rate": 0.00018826264030992354, "loss": 0.5141, "step": 153960 }, { "epoch": 7.647263335651138, "grad_norm": 0.1669921875, "learning_rate": 0.00018822290652627396, "loss": 0.4958, "step": 153970 }, { "epoch": 7.647760007946757, "grad_norm": 0.154296875, "learning_rate": 0.00018818317274262443, "loss": 0.488, "step": 153980 }, { "epoch": 7.648256680242376, "grad_norm": 0.158203125, "learning_rate": 0.00018814343895897487, "loss": 0.5178, "step": 153990 }, { "epoch": 7.648753352537995, "grad_norm": 0.17578125, "learning_rate": 0.00018810370517532534, "loss": 0.4964, "step": 154000 }, { "epoch": 7.649250024833615, "grad_norm": 0.1650390625, "learning_rate": 0.0001880639713916758, "loss": 0.4906, "step": 154010 }, { "epoch": 7.649746697129234, "grad_norm": 0.158203125, "learning_rate": 0.00018802423760802623, "loss": 0.4911, "step": 154020 }, { "epoch": 7.650243369424853, "grad_norm": 0.1513671875, "learning_rate": 0.00018798450382437668, "loss": 0.5231, "step": 154030 }, { "epoch": 7.650740041720473, "grad_norm": 0.138671875, "learning_rate": 0.00018794477004072715, "loss": 0.485, "step": 154040 }, { "epoch": 7.651236714016092, "grad_norm": 0.15234375, "learning_rate": 0.0001879050362570776, "loss": 0.4854, "step": 154050 }, { "epoch": 7.651733386311712, "grad_norm": 0.1611328125, "learning_rate": 0.00018786530247342804, "loss": 0.5198, "step": 154060 }, { "epoch": 7.652230058607331, "grad_norm": 0.1591796875, "learning_rate": 0.0001878255686897785, "loss": 0.5184, "step": 154070 }, { "epoch": 7.65272673090295, "grad_norm": 0.1650390625, "learning_rate": 0.00018778583490612895, "loss": 0.5059, "step": 154080 }, { "epoch": 7.653223403198569, "grad_norm": 0.1484375, "learning_rate": 0.0001877461011224794, "loss": 0.4949, "step": 154090 }, { "epoch": 7.6537200754941885, "grad_norm": 0.181640625, "learning_rate": 0.00018770636733882984, "loss": 0.4948, "step": 154100 }, { "epoch": 7.654216747789809, "grad_norm": 0.1474609375, "learning_rate": 0.0001876666335551803, "loss": 0.462, "step": 154110 }, { "epoch": 7.654713420085428, "grad_norm": 0.166015625, "learning_rate": 0.00018762689977153076, "loss": 0.4503, "step": 154120 }, { "epoch": 7.655210092381047, "grad_norm": 0.1552734375, "learning_rate": 0.0001875871659878812, "loss": 0.4983, "step": 154130 }, { "epoch": 7.655706764676666, "grad_norm": 0.1552734375, "learning_rate": 0.00018754743220423164, "loss": 0.4772, "step": 154140 }, { "epoch": 7.656203436972286, "grad_norm": 0.1826171875, "learning_rate": 0.00018750769842058212, "loss": 0.5236, "step": 154150 }, { "epoch": 7.656700109267905, "grad_norm": 0.173828125, "learning_rate": 0.00018746796463693256, "loss": 0.4789, "step": 154160 }, { "epoch": 7.657196781563524, "grad_norm": 0.1572265625, "learning_rate": 0.000187428230853283, "loss": 0.4866, "step": 154170 }, { "epoch": 7.657693453859144, "grad_norm": 0.1484375, "learning_rate": 0.00018738849706963345, "loss": 0.4892, "step": 154180 }, { "epoch": 7.658190126154763, "grad_norm": 0.1513671875, "learning_rate": 0.00018734876328598392, "loss": 0.506, "step": 154190 }, { "epoch": 7.658686798450383, "grad_norm": 0.1494140625, "learning_rate": 0.00018730902950233436, "loss": 0.4922, "step": 154200 }, { "epoch": 7.659183470746002, "grad_norm": 0.1708984375, "learning_rate": 0.0001872692957186848, "loss": 0.4795, "step": 154210 }, { "epoch": 7.659680143041621, "grad_norm": 0.1357421875, "learning_rate": 0.00018722956193503528, "loss": 0.5183, "step": 154220 }, { "epoch": 7.66017681533724, "grad_norm": 0.16015625, "learning_rate": 0.00018718982815138572, "loss": 0.4634, "step": 154230 }, { "epoch": 7.6606734876328595, "grad_norm": 0.1396484375, "learning_rate": 0.0001871500943677362, "loss": 0.5115, "step": 154240 }, { "epoch": 7.66117015992848, "grad_norm": 0.1533203125, "learning_rate": 0.0001871103605840866, "loss": 0.5029, "step": 154250 }, { "epoch": 7.661666832224099, "grad_norm": 0.169921875, "learning_rate": 0.00018707062680043708, "loss": 0.5018, "step": 154260 }, { "epoch": 7.662163504519718, "grad_norm": 0.1357421875, "learning_rate": 0.00018703089301678753, "loss": 0.4781, "step": 154270 }, { "epoch": 7.662660176815337, "grad_norm": 0.15234375, "learning_rate": 0.000186991159233138, "loss": 0.4766, "step": 154280 }, { "epoch": 7.6631568491109565, "grad_norm": 0.146484375, "learning_rate": 0.00018695142544948842, "loss": 0.4757, "step": 154290 }, { "epoch": 7.663653521406576, "grad_norm": 0.14453125, "learning_rate": 0.0001869116916658389, "loss": 0.5078, "step": 154300 }, { "epoch": 7.664150193702195, "grad_norm": 0.12890625, "learning_rate": 0.00018687195788218933, "loss": 0.4786, "step": 154310 }, { "epoch": 7.664646865997815, "grad_norm": 0.16015625, "learning_rate": 0.0001868322240985398, "loss": 0.5029, "step": 154320 }, { "epoch": 7.665143538293434, "grad_norm": 0.19921875, "learning_rate": 0.00018679249031489022, "loss": 0.4848, "step": 154330 }, { "epoch": 7.6656402105890535, "grad_norm": 0.158203125, "learning_rate": 0.0001867527565312407, "loss": 0.499, "step": 154340 }, { "epoch": 7.666136882884673, "grad_norm": 0.154296875, "learning_rate": 0.00018671302274759114, "loss": 0.4852, "step": 154350 }, { "epoch": 7.666633555180292, "grad_norm": 0.1630859375, "learning_rate": 0.0001866732889639416, "loss": 0.4741, "step": 154360 }, { "epoch": 7.667130227475911, "grad_norm": 0.1455078125, "learning_rate": 0.00018663355518029205, "loss": 0.4989, "step": 154370 }, { "epoch": 7.6676268997715304, "grad_norm": 0.1435546875, "learning_rate": 0.0001865938213966425, "loss": 0.5169, "step": 154380 }, { "epoch": 7.668123572067151, "grad_norm": 0.150390625, "learning_rate": 0.00018655408761299297, "loss": 0.4676, "step": 154390 }, { "epoch": 7.66862024436277, "grad_norm": 0.1376953125, "learning_rate": 0.0001865143538293434, "loss": 0.4761, "step": 154400 }, { "epoch": 7.669116916658389, "grad_norm": 0.1572265625, "learning_rate": 0.00018647462004569388, "loss": 0.5073, "step": 154410 }, { "epoch": 7.669613588954008, "grad_norm": 0.1416015625, "learning_rate": 0.0001864348862620443, "loss": 0.4982, "step": 154420 }, { "epoch": 7.6701102612496275, "grad_norm": 0.14453125, "learning_rate": 0.00018639515247839477, "loss": 0.4747, "step": 154430 }, { "epoch": 7.670606933545247, "grad_norm": 0.1474609375, "learning_rate": 0.00018635541869474522, "loss": 0.4954, "step": 154440 }, { "epoch": 7.671103605840866, "grad_norm": 0.154296875, "learning_rate": 0.0001863156849110957, "loss": 0.4677, "step": 154450 }, { "epoch": 7.671600278136486, "grad_norm": 0.171875, "learning_rate": 0.0001862759511274461, "loss": 0.5054, "step": 154460 }, { "epoch": 7.672096950432105, "grad_norm": 0.1396484375, "learning_rate": 0.00018623621734379658, "loss": 0.4732, "step": 154470 }, { "epoch": 7.6725936227277245, "grad_norm": 0.1591796875, "learning_rate": 0.00018619648356014702, "loss": 0.486, "step": 154480 }, { "epoch": 7.673090295023344, "grad_norm": 0.146484375, "learning_rate": 0.0001861567497764975, "loss": 0.5007, "step": 154490 }, { "epoch": 7.673586967318963, "grad_norm": 0.162109375, "learning_rate": 0.0001861170159928479, "loss": 0.4551, "step": 154500 }, { "epoch": 7.674083639614582, "grad_norm": 0.1650390625, "learning_rate": 0.00018607728220919838, "loss": 0.512, "step": 154510 }, { "epoch": 7.674580311910201, "grad_norm": 0.1416015625, "learning_rate": 0.00018603754842554882, "loss": 0.5094, "step": 154520 }, { "epoch": 7.675076984205821, "grad_norm": 0.1494140625, "learning_rate": 0.0001859978146418993, "loss": 0.4922, "step": 154530 }, { "epoch": 7.67557365650144, "grad_norm": 0.1669921875, "learning_rate": 0.00018595808085824974, "loss": 0.4668, "step": 154540 }, { "epoch": 7.67607032879706, "grad_norm": 0.1396484375, "learning_rate": 0.00018591834707460018, "loss": 0.4956, "step": 154550 }, { "epoch": 7.676567001092679, "grad_norm": 0.166015625, "learning_rate": 0.00018587861329095066, "loss": 0.5197, "step": 154560 }, { "epoch": 7.677063673388298, "grad_norm": 0.15625, "learning_rate": 0.0001858388795073011, "loss": 0.466, "step": 154570 }, { "epoch": 7.677560345683918, "grad_norm": 0.1435546875, "learning_rate": 0.00018579914572365154, "loss": 0.4589, "step": 154580 }, { "epoch": 7.678057017979537, "grad_norm": 0.181640625, "learning_rate": 0.000185759411940002, "loss": 0.4978, "step": 154590 }, { "epoch": 7.678553690275156, "grad_norm": 0.1396484375, "learning_rate": 0.00018571967815635246, "loss": 0.4784, "step": 154600 }, { "epoch": 7.679050362570775, "grad_norm": 0.17578125, "learning_rate": 0.0001856799443727029, "loss": 0.4704, "step": 154610 }, { "epoch": 7.6795470348663954, "grad_norm": 0.1591796875, "learning_rate": 0.00018564021058905335, "loss": 0.4839, "step": 154620 }, { "epoch": 7.680043707162015, "grad_norm": 0.154296875, "learning_rate": 0.0001856004768054038, "loss": 0.4889, "step": 154630 }, { "epoch": 7.680540379457634, "grad_norm": 0.1513671875, "learning_rate": 0.00018556074302175426, "loss": 0.4979, "step": 154640 }, { "epoch": 7.681037051753253, "grad_norm": 0.150390625, "learning_rate": 0.0001855210092381047, "loss": 0.5, "step": 154650 }, { "epoch": 7.681533724048872, "grad_norm": 0.1376953125, "learning_rate": 0.00018548127545445515, "loss": 0.504, "step": 154660 }, { "epoch": 7.682030396344492, "grad_norm": 0.1513671875, "learning_rate": 0.0001854415416708056, "loss": 0.4719, "step": 154670 }, { "epoch": 7.682527068640111, "grad_norm": 0.1494140625, "learning_rate": 0.00018540180788715607, "loss": 0.4961, "step": 154680 }, { "epoch": 7.683023740935731, "grad_norm": 0.1630859375, "learning_rate": 0.0001853620741035065, "loss": 0.4992, "step": 154690 }, { "epoch": 7.68352041323135, "grad_norm": 0.1923828125, "learning_rate": 0.00018532234031985696, "loss": 0.4729, "step": 154700 }, { "epoch": 7.684017085526969, "grad_norm": 0.1533203125, "learning_rate": 0.00018528260653620743, "loss": 0.5064, "step": 154710 }, { "epoch": 7.684513757822589, "grad_norm": 0.158203125, "learning_rate": 0.00018524287275255787, "loss": 0.4831, "step": 154720 }, { "epoch": 7.685010430118208, "grad_norm": 0.1650390625, "learning_rate": 0.00018520313896890832, "loss": 0.4868, "step": 154730 }, { "epoch": 7.685507102413827, "grad_norm": 0.16015625, "learning_rate": 0.00018516340518525876, "loss": 0.4677, "step": 154740 }, { "epoch": 7.686003774709446, "grad_norm": 0.15625, "learning_rate": 0.00018512367140160923, "loss": 0.5095, "step": 154750 }, { "epoch": 7.686500447005066, "grad_norm": 0.1640625, "learning_rate": 0.00018508393761795968, "loss": 0.5201, "step": 154760 }, { "epoch": 7.686997119300686, "grad_norm": 0.150390625, "learning_rate": 0.00018504420383431015, "loss": 0.4974, "step": 154770 }, { "epoch": 7.687493791596305, "grad_norm": 0.1474609375, "learning_rate": 0.00018500447005066056, "loss": 0.479, "step": 154780 }, { "epoch": 7.687990463891924, "grad_norm": 0.1904296875, "learning_rate": 0.00018496473626701104, "loss": 0.5237, "step": 154790 }, { "epoch": 7.688487136187543, "grad_norm": 0.154296875, "learning_rate": 0.00018492500248336148, "loss": 0.4972, "step": 154800 }, { "epoch": 7.6889838084831625, "grad_norm": 0.166015625, "learning_rate": 0.00018488526869971195, "loss": 0.5134, "step": 154810 }, { "epoch": 7.689480480778782, "grad_norm": 0.1689453125, "learning_rate": 0.0001848455349160624, "loss": 0.4614, "step": 154820 }, { "epoch": 7.689977153074402, "grad_norm": 0.1484375, "learning_rate": 0.00018480580113241284, "loss": 0.508, "step": 154830 }, { "epoch": 7.690473825370021, "grad_norm": 0.1435546875, "learning_rate": 0.00018476606734876328, "loss": 0.4976, "step": 154840 }, { "epoch": 7.69097049766564, "grad_norm": 0.1650390625, "learning_rate": 0.00018472633356511376, "loss": 0.4854, "step": 154850 }, { "epoch": 7.69146716996126, "grad_norm": 0.1630859375, "learning_rate": 0.0001846865997814642, "loss": 0.4516, "step": 154860 }, { "epoch": 7.691963842256879, "grad_norm": 0.1416015625, "learning_rate": 0.00018464686599781464, "loss": 0.4901, "step": 154870 }, { "epoch": 7.692460514552498, "grad_norm": 0.1513671875, "learning_rate": 0.0001846071322141651, "loss": 0.484, "step": 154880 }, { "epoch": 7.692957186848117, "grad_norm": 0.16015625, "learning_rate": 0.00018456739843051556, "loss": 0.446, "step": 154890 }, { "epoch": 7.693453859143737, "grad_norm": 0.2041015625, "learning_rate": 0.000184527664646866, "loss": 0.4732, "step": 154900 }, { "epoch": 7.693950531439357, "grad_norm": 0.1484375, "learning_rate": 0.00018448793086321645, "loss": 0.518, "step": 154910 }, { "epoch": 7.694447203734976, "grad_norm": 0.150390625, "learning_rate": 0.00018444819707956692, "loss": 0.4795, "step": 154920 }, { "epoch": 7.694943876030595, "grad_norm": 0.150390625, "learning_rate": 0.00018440846329591736, "loss": 0.5062, "step": 154930 }, { "epoch": 7.695440548326214, "grad_norm": 0.1435546875, "learning_rate": 0.00018436872951226784, "loss": 0.4908, "step": 154940 }, { "epoch": 7.6959372206218335, "grad_norm": 0.13671875, "learning_rate": 0.00018432899572861825, "loss": 0.4745, "step": 154950 }, { "epoch": 7.696433892917453, "grad_norm": 0.146484375, "learning_rate": 0.00018428926194496872, "loss": 0.5064, "step": 154960 }, { "epoch": 7.696930565213073, "grad_norm": 0.16796875, "learning_rate": 0.00018424952816131917, "loss": 0.4763, "step": 154970 }, { "epoch": 7.697427237508692, "grad_norm": 0.1611328125, "learning_rate": 0.00018420979437766964, "loss": 0.5162, "step": 154980 }, { "epoch": 7.697923909804311, "grad_norm": 0.1552734375, "learning_rate": 0.00018417006059402006, "loss": 0.4906, "step": 154990 }, { "epoch": 7.6984205820999305, "grad_norm": 0.14453125, "learning_rate": 0.00018413032681037053, "loss": 0.4674, "step": 155000 }, { "epoch": 7.69891725439555, "grad_norm": 0.1748046875, "learning_rate": 0.00018409059302672097, "loss": 0.5068, "step": 155010 }, { "epoch": 7.699413926691169, "grad_norm": 0.1796875, "learning_rate": 0.00018405085924307144, "loss": 0.4977, "step": 155020 }, { "epoch": 7.699910598986788, "grad_norm": 0.1630859375, "learning_rate": 0.00018401112545942186, "loss": 0.4931, "step": 155030 }, { "epoch": 7.700407271282408, "grad_norm": 0.189453125, "learning_rate": 0.00018397139167577233, "loss": 0.5055, "step": 155040 }, { "epoch": 7.7009039435780275, "grad_norm": 0.171875, "learning_rate": 0.00018393165789212278, "loss": 0.4862, "step": 155050 }, { "epoch": 7.701400615873647, "grad_norm": 0.1533203125, "learning_rate": 0.00018389192410847325, "loss": 0.4901, "step": 155060 }, { "epoch": 7.701897288169266, "grad_norm": 0.1494140625, "learning_rate": 0.0001838521903248237, "loss": 0.4955, "step": 155070 }, { "epoch": 7.702393960464885, "grad_norm": 0.1552734375, "learning_rate": 0.00018381245654117414, "loss": 0.4724, "step": 155080 }, { "epoch": 7.7028906327605045, "grad_norm": 0.1396484375, "learning_rate": 0.0001837727227575246, "loss": 0.4718, "step": 155090 }, { "epoch": 7.703387305056124, "grad_norm": 0.158203125, "learning_rate": 0.00018373298897387505, "loss": 0.484, "step": 155100 }, { "epoch": 7.703883977351744, "grad_norm": 0.1572265625, "learning_rate": 0.0001836932551902255, "loss": 0.4619, "step": 155110 }, { "epoch": 7.704380649647363, "grad_norm": 0.162109375, "learning_rate": 0.00018365352140657594, "loss": 0.5065, "step": 155120 }, { "epoch": 7.704877321942982, "grad_norm": 0.146484375, "learning_rate": 0.0001836137876229264, "loss": 0.4808, "step": 155130 }, { "epoch": 7.7053739942386015, "grad_norm": 0.1494140625, "learning_rate": 0.00018357405383927686, "loss": 0.4847, "step": 155140 }, { "epoch": 7.705870666534221, "grad_norm": 0.1591796875, "learning_rate": 0.0001835343200556273, "loss": 0.4682, "step": 155150 }, { "epoch": 7.70636733882984, "grad_norm": 0.1865234375, "learning_rate": 0.00018349458627197774, "loss": 0.504, "step": 155160 }, { "epoch": 7.706864011125459, "grad_norm": 0.1611328125, "learning_rate": 0.00018345485248832822, "loss": 0.4837, "step": 155170 }, { "epoch": 7.707360683421078, "grad_norm": 0.1552734375, "learning_rate": 0.00018341511870467866, "loss": 0.4956, "step": 155180 }, { "epoch": 7.7078573557166985, "grad_norm": 0.16015625, "learning_rate": 0.0001833753849210291, "loss": 0.5038, "step": 155190 }, { "epoch": 7.708354028012318, "grad_norm": 0.13671875, "learning_rate": 0.00018333565113737955, "loss": 0.4812, "step": 155200 }, { "epoch": 7.708850700307937, "grad_norm": 0.1484375, "learning_rate": 0.00018329591735373002, "loss": 0.5041, "step": 155210 }, { "epoch": 7.709347372603556, "grad_norm": 0.142578125, "learning_rate": 0.00018325618357008046, "loss": 0.4985, "step": 155220 }, { "epoch": 7.709844044899175, "grad_norm": 0.140625, "learning_rate": 0.00018321644978643094, "loss": 0.4971, "step": 155230 }, { "epoch": 7.710340717194795, "grad_norm": 0.1611328125, "learning_rate": 0.00018317671600278138, "loss": 0.481, "step": 155240 }, { "epoch": 7.710837389490414, "grad_norm": 0.154296875, "learning_rate": 0.00018313698221913182, "loss": 0.4365, "step": 155250 }, { "epoch": 7.711334061786033, "grad_norm": 0.15625, "learning_rate": 0.0001830972484354823, "loss": 0.4678, "step": 155260 }, { "epoch": 7.711830734081653, "grad_norm": 0.1416015625, "learning_rate": 0.00018305751465183274, "loss": 0.466, "step": 155270 }, { "epoch": 7.712327406377272, "grad_norm": 0.1552734375, "learning_rate": 0.00018301778086818318, "loss": 0.4843, "step": 155280 }, { "epoch": 7.712824078672892, "grad_norm": 0.1416015625, "learning_rate": 0.00018297804708453363, "loss": 0.4702, "step": 155290 }, { "epoch": 7.713320750968511, "grad_norm": 0.1650390625, "learning_rate": 0.0001829383133008841, "loss": 0.4972, "step": 155300 }, { "epoch": 7.71381742326413, "grad_norm": 0.1484375, "learning_rate": 0.00018289857951723454, "loss": 0.5, "step": 155310 }, { "epoch": 7.714314095559749, "grad_norm": 0.1435546875, "learning_rate": 0.000182858845733585, "loss": 0.5008, "step": 155320 }, { "epoch": 7.714810767855369, "grad_norm": 0.138671875, "learning_rate": 0.00018281911194993543, "loss": 0.4864, "step": 155330 }, { "epoch": 7.715307440150989, "grad_norm": 0.158203125, "learning_rate": 0.0001827793781662859, "loss": 0.4841, "step": 155340 }, { "epoch": 7.715804112446608, "grad_norm": 0.1376953125, "learning_rate": 0.00018273964438263635, "loss": 0.4886, "step": 155350 }, { "epoch": 7.716300784742227, "grad_norm": 0.16796875, "learning_rate": 0.0001826999105989868, "loss": 0.5069, "step": 155360 }, { "epoch": 7.716797457037846, "grad_norm": 0.1513671875, "learning_rate": 0.00018266017681533724, "loss": 0.4672, "step": 155370 }, { "epoch": 7.717294129333466, "grad_norm": 0.1611328125, "learning_rate": 0.0001826204430316877, "loss": 0.4969, "step": 155380 }, { "epoch": 7.717790801629085, "grad_norm": 0.1630859375, "learning_rate": 0.00018258070924803815, "loss": 0.5053, "step": 155390 }, { "epoch": 7.718287473924704, "grad_norm": 0.1416015625, "learning_rate": 0.0001825409754643886, "loss": 0.4804, "step": 155400 }, { "epoch": 7.718784146220324, "grad_norm": 0.14453125, "learning_rate": 0.00018250124168073907, "loss": 0.4871, "step": 155410 }, { "epoch": 7.719280818515943, "grad_norm": 0.1533203125, "learning_rate": 0.0001824615078970895, "loss": 0.5047, "step": 155420 }, { "epoch": 7.719777490811563, "grad_norm": 0.146484375, "learning_rate": 0.00018242177411343996, "loss": 0.4768, "step": 155430 }, { "epoch": 7.720274163107182, "grad_norm": 0.1474609375, "learning_rate": 0.0001823820403297904, "loss": 0.4859, "step": 155440 }, { "epoch": 7.720770835402801, "grad_norm": 0.150390625, "learning_rate": 0.00018234230654614087, "loss": 0.4843, "step": 155450 }, { "epoch": 7.72126750769842, "grad_norm": 0.15625, "learning_rate": 0.00018230257276249132, "loss": 0.4914, "step": 155460 }, { "epoch": 7.7217641799940395, "grad_norm": 0.169921875, "learning_rate": 0.0001822628389788418, "loss": 0.4985, "step": 155470 }, { "epoch": 7.72226085228966, "grad_norm": 0.1572265625, "learning_rate": 0.0001822231051951922, "loss": 0.4893, "step": 155480 }, { "epoch": 7.722757524585279, "grad_norm": 0.146484375, "learning_rate": 0.00018218337141154268, "loss": 0.5087, "step": 155490 }, { "epoch": 7.723254196880898, "grad_norm": 0.150390625, "learning_rate": 0.00018214363762789312, "loss": 0.4684, "step": 155500 }, { "epoch": 7.723750869176517, "grad_norm": 0.1396484375, "learning_rate": 0.0001821039038442436, "loss": 0.492, "step": 155510 }, { "epoch": 7.7242475414721365, "grad_norm": 0.1533203125, "learning_rate": 0.000182064170060594, "loss": 0.4771, "step": 155520 }, { "epoch": 7.724744213767756, "grad_norm": 0.16015625, "learning_rate": 0.00018202443627694448, "loss": 0.4561, "step": 155530 }, { "epoch": 7.725240886063375, "grad_norm": 0.169921875, "learning_rate": 0.00018198470249329492, "loss": 0.4831, "step": 155540 }, { "epoch": 7.725737558358995, "grad_norm": 0.16015625, "learning_rate": 0.0001819449687096454, "loss": 0.4935, "step": 155550 }, { "epoch": 7.726234230654614, "grad_norm": 0.1376953125, "learning_rate": 0.00018190523492599584, "loss": 0.4966, "step": 155560 }, { "epoch": 7.726730902950234, "grad_norm": 0.16015625, "learning_rate": 0.00018186550114234628, "loss": 0.48, "step": 155570 }, { "epoch": 7.727227575245853, "grad_norm": 0.2060546875, "learning_rate": 0.00018182576735869673, "loss": 0.5067, "step": 155580 }, { "epoch": 7.727724247541472, "grad_norm": 0.166015625, "learning_rate": 0.0001817860335750472, "loss": 0.4902, "step": 155590 }, { "epoch": 7.728220919837091, "grad_norm": 0.146484375, "learning_rate": 0.00018174629979139764, "loss": 0.4832, "step": 155600 }, { "epoch": 7.7287175921327105, "grad_norm": 0.1591796875, "learning_rate": 0.0001817065660077481, "loss": 0.5113, "step": 155610 }, { "epoch": 7.729214264428331, "grad_norm": 0.21484375, "learning_rate": 0.00018166683222409856, "loss": 0.4901, "step": 155620 }, { "epoch": 7.72971093672395, "grad_norm": 0.162109375, "learning_rate": 0.000181627098440449, "loss": 0.4785, "step": 155630 }, { "epoch": 7.730207609019569, "grad_norm": 0.1611328125, "learning_rate": 0.00018158736465679947, "loss": 0.4777, "step": 155640 }, { "epoch": 7.730704281315188, "grad_norm": 0.138671875, "learning_rate": 0.0001815476308731499, "loss": 0.4751, "step": 155650 }, { "epoch": 7.7312009536108075, "grad_norm": 0.177734375, "learning_rate": 0.00018150789708950036, "loss": 0.4931, "step": 155660 }, { "epoch": 7.731697625906427, "grad_norm": 0.177734375, "learning_rate": 0.0001814681633058508, "loss": 0.5069, "step": 155670 }, { "epoch": 7.732194298202046, "grad_norm": 0.15234375, "learning_rate": 0.00018142842952220128, "loss": 0.5336, "step": 155680 }, { "epoch": 7.732690970497666, "grad_norm": 0.1650390625, "learning_rate": 0.0001813886957385517, "loss": 0.4794, "step": 155690 }, { "epoch": 7.733187642793285, "grad_norm": 0.1455078125, "learning_rate": 0.00018134896195490217, "loss": 0.4496, "step": 155700 }, { "epoch": 7.7336843150889045, "grad_norm": 0.140625, "learning_rate": 0.0001813092281712526, "loss": 0.506, "step": 155710 }, { "epoch": 7.734180987384524, "grad_norm": 0.1474609375, "learning_rate": 0.00018126949438760308, "loss": 0.4618, "step": 155720 }, { "epoch": 7.734677659680143, "grad_norm": 0.154296875, "learning_rate": 0.0001812297606039535, "loss": 0.4909, "step": 155730 }, { "epoch": 7.735174331975762, "grad_norm": 0.1552734375, "learning_rate": 0.00018119002682030397, "loss": 0.5024, "step": 155740 }, { "epoch": 7.735671004271381, "grad_norm": 0.1533203125, "learning_rate": 0.00018115029303665442, "loss": 0.5077, "step": 155750 }, { "epoch": 7.7361676765670015, "grad_norm": 0.15234375, "learning_rate": 0.0001811105592530049, "loss": 0.4669, "step": 155760 }, { "epoch": 7.736664348862621, "grad_norm": 0.1552734375, "learning_rate": 0.00018107082546935533, "loss": 0.5059, "step": 155770 }, { "epoch": 7.73716102115824, "grad_norm": 0.1630859375, "learning_rate": 0.00018103109168570578, "loss": 0.49, "step": 155780 }, { "epoch": 7.737657693453859, "grad_norm": 0.1474609375, "learning_rate": 0.00018099135790205625, "loss": 0.5105, "step": 155790 }, { "epoch": 7.7381543657494785, "grad_norm": 0.1630859375, "learning_rate": 0.0001809516241184067, "loss": 0.4722, "step": 155800 }, { "epoch": 7.738651038045098, "grad_norm": 0.15234375, "learning_rate": 0.00018091189033475714, "loss": 0.484, "step": 155810 }, { "epoch": 7.739147710340717, "grad_norm": 0.1494140625, "learning_rate": 0.00018087215655110758, "loss": 0.4742, "step": 155820 }, { "epoch": 7.739644382636337, "grad_norm": 0.1474609375, "learning_rate": 0.00018083242276745805, "loss": 0.5169, "step": 155830 }, { "epoch": 7.740141054931956, "grad_norm": 0.1513671875, "learning_rate": 0.0001807926889838085, "loss": 0.462, "step": 155840 }, { "epoch": 7.7406377272275755, "grad_norm": 0.146484375, "learning_rate": 0.00018075295520015894, "loss": 0.4676, "step": 155850 }, { "epoch": 7.741134399523195, "grad_norm": 0.205078125, "learning_rate": 0.00018071322141650938, "loss": 0.5185, "step": 155860 }, { "epoch": 7.741631071818814, "grad_norm": 0.1484375, "learning_rate": 0.00018067348763285986, "loss": 0.4933, "step": 155870 }, { "epoch": 7.742127744114433, "grad_norm": 0.154296875, "learning_rate": 0.0001806337538492103, "loss": 0.4851, "step": 155880 }, { "epoch": 7.742624416410052, "grad_norm": 0.1376953125, "learning_rate": 0.00018059402006556074, "loss": 0.498, "step": 155890 }, { "epoch": 7.743121088705672, "grad_norm": 0.1748046875, "learning_rate": 0.0001805542862819112, "loss": 0.4972, "step": 155900 }, { "epoch": 7.743617761001292, "grad_norm": 0.1474609375, "learning_rate": 0.00018051455249826166, "loss": 0.4893, "step": 155910 }, { "epoch": 7.744114433296911, "grad_norm": 0.146484375, "learning_rate": 0.0001804748187146121, "loss": 0.491, "step": 155920 }, { "epoch": 7.74461110559253, "grad_norm": 0.171875, "learning_rate": 0.00018043508493096255, "loss": 0.5067, "step": 155930 }, { "epoch": 7.745107777888149, "grad_norm": 0.150390625, "learning_rate": 0.00018039535114731302, "loss": 0.4909, "step": 155940 }, { "epoch": 7.745604450183769, "grad_norm": 0.14453125, "learning_rate": 0.00018035561736366346, "loss": 0.5073, "step": 155950 }, { "epoch": 7.746101122479388, "grad_norm": 0.134765625, "learning_rate": 0.00018031588358001393, "loss": 0.4952, "step": 155960 }, { "epoch": 7.746597794775007, "grad_norm": 0.146484375, "learning_rate": 0.00018027614979636435, "loss": 0.485, "step": 155970 }, { "epoch": 7.747094467070626, "grad_norm": 0.1474609375, "learning_rate": 0.00018023641601271482, "loss": 0.4798, "step": 155980 }, { "epoch": 7.747591139366246, "grad_norm": 0.138671875, "learning_rate": 0.00018019668222906527, "loss": 0.4683, "step": 155990 }, { "epoch": 7.748087811661866, "grad_norm": 0.1552734375, "learning_rate": 0.00018015694844541574, "loss": 0.4895, "step": 156000 }, { "epoch": 7.748584483957485, "grad_norm": 0.16015625, "learning_rate": 0.00018011721466176616, "loss": 0.4628, "step": 156010 }, { "epoch": 7.749081156253104, "grad_norm": 0.1826171875, "learning_rate": 0.00018007748087811663, "loss": 0.4879, "step": 156020 }, { "epoch": 7.749577828548723, "grad_norm": 0.1708984375, "learning_rate": 0.00018003774709446707, "loss": 0.4913, "step": 156030 }, { "epoch": 7.750074500844343, "grad_norm": 0.150390625, "learning_rate": 0.00017999801331081754, "loss": 0.4876, "step": 156040 }, { "epoch": 7.750571173139962, "grad_norm": 0.1552734375, "learning_rate": 0.00017995827952716796, "loss": 0.5229, "step": 156050 }, { "epoch": 7.751067845435582, "grad_norm": 0.1728515625, "learning_rate": 0.00017991854574351843, "loss": 0.4982, "step": 156060 }, { "epoch": 7.751564517731201, "grad_norm": 0.1650390625, "learning_rate": 0.00017987881195986888, "loss": 0.4882, "step": 156070 }, { "epoch": 7.75206119002682, "grad_norm": 0.1640625, "learning_rate": 0.00017983907817621935, "loss": 0.5045, "step": 156080 }, { "epoch": 7.75255786232244, "grad_norm": 0.1591796875, "learning_rate": 0.0001797993443925698, "loss": 0.456, "step": 156090 }, { "epoch": 7.753054534618059, "grad_norm": 0.162109375, "learning_rate": 0.00017975961060892024, "loss": 0.5028, "step": 156100 }, { "epoch": 7.753551206913678, "grad_norm": 0.15625, "learning_rate": 0.0001797198768252707, "loss": 0.5156, "step": 156110 }, { "epoch": 7.754047879209297, "grad_norm": 0.1650390625, "learning_rate": 0.00017968014304162115, "loss": 0.4691, "step": 156120 }, { "epoch": 7.754544551504917, "grad_norm": 0.142578125, "learning_rate": 0.00017964040925797162, "loss": 0.4989, "step": 156130 }, { "epoch": 7.755041223800537, "grad_norm": 0.15234375, "learning_rate": 0.00017960067547432204, "loss": 0.4972, "step": 156140 }, { "epoch": 7.755537896096156, "grad_norm": 0.169921875, "learning_rate": 0.0001795609416906725, "loss": 0.4985, "step": 156150 }, { "epoch": 7.756034568391775, "grad_norm": 0.1611328125, "learning_rate": 0.00017952120790702296, "loss": 0.5022, "step": 156160 }, { "epoch": 7.756531240687394, "grad_norm": 0.1455078125, "learning_rate": 0.00017948147412337343, "loss": 0.5003, "step": 156170 }, { "epoch": 7.7570279129830135, "grad_norm": 0.1533203125, "learning_rate": 0.00017944174033972384, "loss": 0.5014, "step": 156180 }, { "epoch": 7.757524585278633, "grad_norm": 0.1474609375, "learning_rate": 0.00017940200655607432, "loss": 0.4945, "step": 156190 }, { "epoch": 7.758021257574253, "grad_norm": 0.1640625, "learning_rate": 0.00017936227277242476, "loss": 0.5087, "step": 156200 }, { "epoch": 7.758517929869872, "grad_norm": 0.1513671875, "learning_rate": 0.00017932253898877523, "loss": 0.4933, "step": 156210 }, { "epoch": 7.759014602165491, "grad_norm": 0.1474609375, "learning_rate": 0.00017928280520512565, "loss": 0.5283, "step": 156220 }, { "epoch": 7.7595112744611106, "grad_norm": 0.1572265625, "learning_rate": 0.00017924307142147612, "loss": 0.4793, "step": 156230 }, { "epoch": 7.76000794675673, "grad_norm": 0.1611328125, "learning_rate": 0.00017920333763782656, "loss": 0.4575, "step": 156240 }, { "epoch": 7.760504619052349, "grad_norm": 0.1640625, "learning_rate": 0.00017916360385417703, "loss": 0.5082, "step": 156250 }, { "epoch": 7.761001291347968, "grad_norm": 0.1806640625, "learning_rate": 0.00017912387007052748, "loss": 0.4818, "step": 156260 }, { "epoch": 7.761497963643588, "grad_norm": 0.14453125, "learning_rate": 0.00017908413628687792, "loss": 0.4817, "step": 156270 }, { "epoch": 7.761994635939208, "grad_norm": 0.1640625, "learning_rate": 0.00017904440250322837, "loss": 0.4939, "step": 156280 }, { "epoch": 7.762491308234827, "grad_norm": 0.1572265625, "learning_rate": 0.00017900466871957884, "loss": 0.4987, "step": 156290 }, { "epoch": 7.762987980530446, "grad_norm": 0.1376953125, "learning_rate": 0.00017896493493592928, "loss": 0.4516, "step": 156300 }, { "epoch": 7.763484652826065, "grad_norm": 0.16015625, "learning_rate": 0.00017892520115227973, "loss": 0.4825, "step": 156310 }, { "epoch": 7.7639813251216845, "grad_norm": 0.173828125, "learning_rate": 0.0001788854673686302, "loss": 0.5048, "step": 156320 }, { "epoch": 7.764477997417304, "grad_norm": 0.1767578125, "learning_rate": 0.00017884573358498064, "loss": 0.5065, "step": 156330 }, { "epoch": 7.764974669712924, "grad_norm": 0.150390625, "learning_rate": 0.0001788059998013311, "loss": 0.4905, "step": 156340 }, { "epoch": 7.765471342008543, "grad_norm": 0.177734375, "learning_rate": 0.00017876626601768153, "loss": 0.4914, "step": 156350 }, { "epoch": 7.765968014304162, "grad_norm": 0.1611328125, "learning_rate": 0.000178726532234032, "loss": 0.5184, "step": 156360 }, { "epoch": 7.7664646865997815, "grad_norm": 0.16015625, "learning_rate": 0.00017868679845038245, "loss": 0.4882, "step": 156370 }, { "epoch": 7.766961358895401, "grad_norm": 0.154296875, "learning_rate": 0.0001786470646667329, "loss": 0.501, "step": 156380 }, { "epoch": 7.76745803119102, "grad_norm": 0.1513671875, "learning_rate": 0.00017860733088308334, "loss": 0.5198, "step": 156390 }, { "epoch": 7.767954703486639, "grad_norm": 0.1669921875, "learning_rate": 0.0001785675970994338, "loss": 0.4973, "step": 156400 }, { "epoch": 7.768451375782259, "grad_norm": 0.1552734375, "learning_rate": 0.00017852786331578425, "loss": 0.5214, "step": 156410 }, { "epoch": 7.7689480480778785, "grad_norm": 0.146484375, "learning_rate": 0.0001784881295321347, "loss": 0.4826, "step": 156420 }, { "epoch": 7.769444720373498, "grad_norm": 0.1484375, "learning_rate": 0.00017844839574848514, "loss": 0.4648, "step": 156430 }, { "epoch": 7.769941392669117, "grad_norm": 0.1552734375, "learning_rate": 0.0001784086619648356, "loss": 0.5546, "step": 156440 }, { "epoch": 7.770438064964736, "grad_norm": 0.1591796875, "learning_rate": 0.00017836892818118606, "loss": 0.502, "step": 156450 }, { "epoch": 7.770934737260355, "grad_norm": 0.1591796875, "learning_rate": 0.0001783291943975365, "loss": 0.4913, "step": 156460 }, { "epoch": 7.771431409555975, "grad_norm": 0.158203125, "learning_rate": 0.00017828946061388697, "loss": 0.4959, "step": 156470 }, { "epoch": 7.771928081851595, "grad_norm": 0.15234375, "learning_rate": 0.00017824972683023742, "loss": 0.4986, "step": 156480 }, { "epoch": 7.772424754147214, "grad_norm": 0.154296875, "learning_rate": 0.0001782099930465879, "loss": 0.4853, "step": 156490 }, { "epoch": 7.772921426442833, "grad_norm": 0.16015625, "learning_rate": 0.00017817025926293833, "loss": 0.4826, "step": 156500 }, { "epoch": 7.7734180987384525, "grad_norm": 0.15234375, "learning_rate": 0.00017813052547928878, "loss": 0.5112, "step": 156510 }, { "epoch": 7.773914771034072, "grad_norm": 0.17578125, "learning_rate": 0.00017809079169563922, "loss": 0.4555, "step": 156520 }, { "epoch": 7.774411443329691, "grad_norm": 0.154296875, "learning_rate": 0.0001780510579119897, "loss": 0.5022, "step": 156530 }, { "epoch": 7.77490811562531, "grad_norm": 0.1572265625, "learning_rate": 0.00017801132412834014, "loss": 0.4864, "step": 156540 }, { "epoch": 7.77540478792093, "grad_norm": 0.1767578125, "learning_rate": 0.00017797159034469058, "loss": 0.5203, "step": 156550 }, { "epoch": 7.7759014602165495, "grad_norm": 0.150390625, "learning_rate": 0.00017793185656104102, "loss": 0.4664, "step": 156560 }, { "epoch": 7.776398132512169, "grad_norm": 0.134765625, "learning_rate": 0.0001778921227773915, "loss": 0.4759, "step": 156570 }, { "epoch": 7.776894804807788, "grad_norm": 0.1435546875, "learning_rate": 0.00017785238899374194, "loss": 0.5151, "step": 156580 }, { "epoch": 7.777391477103407, "grad_norm": 0.1591796875, "learning_rate": 0.00017781265521009238, "loss": 0.5092, "step": 156590 }, { "epoch": 7.777888149399026, "grad_norm": 0.1728515625, "learning_rate": 0.00017777292142644283, "loss": 0.4774, "step": 156600 }, { "epoch": 7.778384821694646, "grad_norm": 0.1455078125, "learning_rate": 0.0001777331876427933, "loss": 0.481, "step": 156610 }, { "epoch": 7.778881493990265, "grad_norm": 0.169921875, "learning_rate": 0.00017769345385914374, "loss": 0.4945, "step": 156620 }, { "epoch": 7.779378166285884, "grad_norm": 0.1572265625, "learning_rate": 0.0001776537200754942, "loss": 0.4764, "step": 156630 }, { "epoch": 7.779874838581504, "grad_norm": 0.1884765625, "learning_rate": 0.00017761398629184466, "loss": 0.4602, "step": 156640 }, { "epoch": 7.780371510877123, "grad_norm": 0.1494140625, "learning_rate": 0.0001775742525081951, "loss": 0.5032, "step": 156650 }, { "epoch": 7.780868183172743, "grad_norm": 0.162109375, "learning_rate": 0.00017753451872454557, "loss": 0.4916, "step": 156660 }, { "epoch": 7.781364855468362, "grad_norm": 0.1650390625, "learning_rate": 0.000177494784940896, "loss": 0.4989, "step": 156670 }, { "epoch": 7.781861527763981, "grad_norm": 0.17578125, "learning_rate": 0.00017745505115724646, "loss": 0.5072, "step": 156680 }, { "epoch": 7.7823582000596, "grad_norm": 0.1572265625, "learning_rate": 0.0001774153173735969, "loss": 0.4669, "step": 156690 }, { "epoch": 7.7828548723552196, "grad_norm": 0.16015625, "learning_rate": 0.00017737558358994738, "loss": 0.4916, "step": 156700 }, { "epoch": 7.78335154465084, "grad_norm": 0.1689453125, "learning_rate": 0.0001773358498062978, "loss": 0.5019, "step": 156710 }, { "epoch": 7.783848216946459, "grad_norm": 0.18359375, "learning_rate": 0.00017729611602264827, "loss": 0.5173, "step": 156720 }, { "epoch": 7.784344889242078, "grad_norm": 0.1416015625, "learning_rate": 0.0001772563822389987, "loss": 0.4668, "step": 156730 }, { "epoch": 7.784841561537697, "grad_norm": 0.1689453125, "learning_rate": 0.00017721664845534918, "loss": 0.4851, "step": 156740 }, { "epoch": 7.785338233833317, "grad_norm": 0.162109375, "learning_rate": 0.0001771769146716996, "loss": 0.4818, "step": 156750 }, { "epoch": 7.785834906128936, "grad_norm": 0.1396484375, "learning_rate": 0.00017713718088805007, "loss": 0.5166, "step": 156760 }, { "epoch": 7.786331578424555, "grad_norm": 0.1640625, "learning_rate": 0.00017709744710440052, "loss": 0.4916, "step": 156770 }, { "epoch": 7.786828250720175, "grad_norm": 0.1494140625, "learning_rate": 0.000177057713320751, "loss": 0.483, "step": 156780 }, { "epoch": 7.787324923015794, "grad_norm": 0.171875, "learning_rate": 0.00017701797953710143, "loss": 0.4881, "step": 156790 }, { "epoch": 7.787821595311414, "grad_norm": 0.162109375, "learning_rate": 0.00017697824575345188, "loss": 0.493, "step": 156800 }, { "epoch": 7.788318267607033, "grad_norm": 0.15234375, "learning_rate": 0.00017693851196980235, "loss": 0.4836, "step": 156810 }, { "epoch": 7.788814939902652, "grad_norm": 0.1884765625, "learning_rate": 0.0001768987781861528, "loss": 0.4688, "step": 156820 }, { "epoch": 7.789311612198271, "grad_norm": 0.1630859375, "learning_rate": 0.00017685904440250324, "loss": 0.5103, "step": 156830 }, { "epoch": 7.7898082844938905, "grad_norm": 0.150390625, "learning_rate": 0.00017681931061885368, "loss": 0.5353, "step": 156840 }, { "epoch": 7.790304956789511, "grad_norm": 0.1591796875, "learning_rate": 0.00017677957683520415, "loss": 0.493, "step": 156850 }, { "epoch": 7.79080162908513, "grad_norm": 0.14453125, "learning_rate": 0.0001767398430515546, "loss": 0.488, "step": 156860 }, { "epoch": 7.791298301380749, "grad_norm": 0.138671875, "learning_rate": 0.00017670010926790504, "loss": 0.4948, "step": 156870 }, { "epoch": 7.791794973676368, "grad_norm": 0.1435546875, "learning_rate": 0.00017666037548425548, "loss": 0.5081, "step": 156880 }, { "epoch": 7.7922916459719875, "grad_norm": 0.1669921875, "learning_rate": 0.00017662064170060595, "loss": 0.483, "step": 156890 }, { "epoch": 7.792788318267607, "grad_norm": 0.142578125, "learning_rate": 0.0001765809079169564, "loss": 0.5121, "step": 156900 }, { "epoch": 7.793284990563226, "grad_norm": 0.1611328125, "learning_rate": 0.00017654117413330687, "loss": 0.4898, "step": 156910 }, { "epoch": 7.793781662858846, "grad_norm": 0.16796875, "learning_rate": 0.0001765014403496573, "loss": 0.4897, "step": 156920 }, { "epoch": 7.794278335154465, "grad_norm": 0.1552734375, "learning_rate": 0.00017646170656600776, "loss": 0.4827, "step": 156930 }, { "epoch": 7.794775007450085, "grad_norm": 0.150390625, "learning_rate": 0.0001764219727823582, "loss": 0.4754, "step": 156940 }, { "epoch": 7.795271679745704, "grad_norm": 0.17578125, "learning_rate": 0.00017638223899870867, "loss": 0.5168, "step": 156950 }, { "epoch": 7.795768352041323, "grad_norm": 0.142578125, "learning_rate": 0.00017634250521505912, "loss": 0.4789, "step": 156960 }, { "epoch": 7.796265024336942, "grad_norm": 0.162109375, "learning_rate": 0.00017630277143140956, "loss": 0.4859, "step": 156970 }, { "epoch": 7.7967616966325615, "grad_norm": 0.1396484375, "learning_rate": 0.00017626303764776003, "loss": 0.5107, "step": 156980 }, { "epoch": 7.797258368928182, "grad_norm": 0.16015625, "learning_rate": 0.00017622330386411048, "loss": 0.4651, "step": 156990 }, { "epoch": 7.797755041223801, "grad_norm": 0.1845703125, "learning_rate": 0.00017618357008046092, "loss": 0.468, "step": 157000 }, { "epoch": 7.79825171351942, "grad_norm": 0.162109375, "learning_rate": 0.00017614383629681137, "loss": 0.4886, "step": 157010 }, { "epoch": 7.798748385815039, "grad_norm": 0.1416015625, "learning_rate": 0.00017610410251316184, "loss": 0.4947, "step": 157020 }, { "epoch": 7.7992450581106585, "grad_norm": 0.158203125, "learning_rate": 0.00017606436872951228, "loss": 0.5017, "step": 157030 }, { "epoch": 7.799741730406278, "grad_norm": 0.177734375, "learning_rate": 0.00017602463494586273, "loss": 0.4941, "step": 157040 }, { "epoch": 7.800238402701897, "grad_norm": 0.15234375, "learning_rate": 0.00017598490116221317, "loss": 0.4735, "step": 157050 }, { "epoch": 7.800735074997517, "grad_norm": 0.1708984375, "learning_rate": 0.00017594516737856364, "loss": 0.4988, "step": 157060 }, { "epoch": 7.801231747293136, "grad_norm": 0.158203125, "learning_rate": 0.0001759054335949141, "loss": 0.4839, "step": 157070 }, { "epoch": 7.8017284195887555, "grad_norm": 0.201171875, "learning_rate": 0.00017586569981126453, "loss": 0.4763, "step": 157080 }, { "epoch": 7.802225091884375, "grad_norm": 0.1591796875, "learning_rate": 0.00017582596602761498, "loss": 0.5064, "step": 157090 }, { "epoch": 7.802721764179994, "grad_norm": 0.16796875, "learning_rate": 0.00017578623224396545, "loss": 0.4735, "step": 157100 }, { "epoch": 7.803218436475613, "grad_norm": 0.1572265625, "learning_rate": 0.0001757464984603159, "loss": 0.483, "step": 157110 }, { "epoch": 7.803715108771232, "grad_norm": 0.154296875, "learning_rate": 0.00017570676467666634, "loss": 0.4756, "step": 157120 }, { "epoch": 7.8042117810668525, "grad_norm": 0.171875, "learning_rate": 0.00017566703089301678, "loss": 0.4639, "step": 157130 }, { "epoch": 7.804708453362472, "grad_norm": 0.1767578125, "learning_rate": 0.00017562729710936725, "loss": 0.4743, "step": 157140 }, { "epoch": 7.805205125658091, "grad_norm": 0.173828125, "learning_rate": 0.0001755875633257177, "loss": 0.5498, "step": 157150 }, { "epoch": 7.80570179795371, "grad_norm": 0.1455078125, "learning_rate": 0.00017554782954206814, "loss": 0.4827, "step": 157160 }, { "epoch": 7.8061984702493294, "grad_norm": 0.1689453125, "learning_rate": 0.0001755080957584186, "loss": 0.4982, "step": 157170 }, { "epoch": 7.806695142544949, "grad_norm": 0.1689453125, "learning_rate": 0.00017546836197476905, "loss": 0.5006, "step": 157180 }, { "epoch": 7.807191814840568, "grad_norm": 0.1552734375, "learning_rate": 0.00017542862819111953, "loss": 0.4829, "step": 157190 }, { "epoch": 7.807688487136188, "grad_norm": 0.14453125, "learning_rate": 0.00017538889440746994, "loss": 0.4748, "step": 157200 }, { "epoch": 7.808185159431807, "grad_norm": 0.2060546875, "learning_rate": 0.00017534916062382041, "loss": 0.4986, "step": 157210 }, { "epoch": 7.8086818317274265, "grad_norm": 0.1630859375, "learning_rate": 0.00017530942684017086, "loss": 0.4668, "step": 157220 }, { "epoch": 7.809178504023046, "grad_norm": 0.173828125, "learning_rate": 0.00017526969305652133, "loss": 0.484, "step": 157230 }, { "epoch": 7.809675176318665, "grad_norm": 0.1513671875, "learning_rate": 0.00017522995927287175, "loss": 0.4552, "step": 157240 }, { "epoch": 7.810171848614284, "grad_norm": 0.1796875, "learning_rate": 0.00017519022548922222, "loss": 0.4988, "step": 157250 }, { "epoch": 7.810668520909903, "grad_norm": 0.16015625, "learning_rate": 0.00017515049170557266, "loss": 0.4594, "step": 157260 }, { "epoch": 7.8111651932055235, "grad_norm": 0.1396484375, "learning_rate": 0.00017511075792192313, "loss": 0.4824, "step": 157270 }, { "epoch": 7.811661865501143, "grad_norm": 0.16015625, "learning_rate": 0.00017507102413827355, "loss": 0.5047, "step": 157280 }, { "epoch": 7.812158537796762, "grad_norm": 0.1474609375, "learning_rate": 0.00017503129035462402, "loss": 0.5078, "step": 157290 }, { "epoch": 7.812655210092381, "grad_norm": 0.169921875, "learning_rate": 0.00017499155657097447, "loss": 0.4975, "step": 157300 }, { "epoch": 7.813151882388, "grad_norm": 0.138671875, "learning_rate": 0.00017495182278732494, "loss": 0.4623, "step": 157310 }, { "epoch": 7.81364855468362, "grad_norm": 0.15234375, "learning_rate": 0.00017491208900367538, "loss": 0.4954, "step": 157320 }, { "epoch": 7.814145226979239, "grad_norm": 0.17578125, "learning_rate": 0.00017487235522002583, "loss": 0.4881, "step": 157330 }, { "epoch": 7.814641899274858, "grad_norm": 0.1552734375, "learning_rate": 0.0001748326214363763, "loss": 0.4988, "step": 157340 }, { "epoch": 7.815138571570477, "grad_norm": 0.1435546875, "learning_rate": 0.00017479288765272674, "loss": 0.4848, "step": 157350 }, { "epoch": 7.815635243866097, "grad_norm": 0.18359375, "learning_rate": 0.00017475315386907721, "loss": 0.4845, "step": 157360 }, { "epoch": 7.816131916161717, "grad_norm": 0.162109375, "learning_rate": 0.00017471342008542763, "loss": 0.503, "step": 157370 }, { "epoch": 7.816628588457336, "grad_norm": 0.1904296875, "learning_rate": 0.0001746736863017781, "loss": 0.478, "step": 157380 }, { "epoch": 7.817125260752955, "grad_norm": 0.1513671875, "learning_rate": 0.00017463395251812855, "loss": 0.4855, "step": 157390 }, { "epoch": 7.817621933048574, "grad_norm": 0.1650390625, "learning_rate": 0.00017459421873447902, "loss": 0.4919, "step": 157400 }, { "epoch": 7.818118605344194, "grad_norm": 0.171875, "learning_rate": 0.00017455448495082944, "loss": 0.493, "step": 157410 }, { "epoch": 7.818615277639813, "grad_norm": 0.1591796875, "learning_rate": 0.0001745147511671799, "loss": 0.4962, "step": 157420 }, { "epoch": 7.819111949935433, "grad_norm": 0.1630859375, "learning_rate": 0.00017447501738353035, "loss": 0.4884, "step": 157430 }, { "epoch": 7.819608622231052, "grad_norm": 0.1591796875, "learning_rate": 0.00017443528359988082, "loss": 0.5151, "step": 157440 }, { "epoch": 7.820105294526671, "grad_norm": 0.1572265625, "learning_rate": 0.00017439554981623124, "loss": 0.4866, "step": 157450 }, { "epoch": 7.820601966822291, "grad_norm": 0.1708984375, "learning_rate": 0.0001743558160325817, "loss": 0.5049, "step": 157460 }, { "epoch": 7.82109863911791, "grad_norm": 0.2138671875, "learning_rate": 0.00017431608224893216, "loss": 0.4865, "step": 157470 }, { "epoch": 7.821595311413529, "grad_norm": 0.1630859375, "learning_rate": 0.00017427634846528263, "loss": 0.5304, "step": 157480 }, { "epoch": 7.822091983709148, "grad_norm": 0.1396484375, "learning_rate": 0.00017423661468163307, "loss": 0.4809, "step": 157490 }, { "epoch": 7.822588656004768, "grad_norm": 0.150390625, "learning_rate": 0.00017419688089798351, "loss": 0.5039, "step": 157500 }, { "epoch": 7.823085328300388, "grad_norm": 0.146484375, "learning_rate": 0.00017415714711433399, "loss": 0.4733, "step": 157510 }, { "epoch": 7.823582000596007, "grad_norm": 0.173828125, "learning_rate": 0.00017411741333068443, "loss": 0.4945, "step": 157520 }, { "epoch": 7.824078672891626, "grad_norm": 0.1474609375, "learning_rate": 0.00017407767954703487, "loss": 0.4967, "step": 157530 }, { "epoch": 7.824575345187245, "grad_norm": 0.1630859375, "learning_rate": 0.00017403794576338532, "loss": 0.4992, "step": 157540 }, { "epoch": 7.8250720174828645, "grad_norm": 0.15625, "learning_rate": 0.0001739982119797358, "loss": 0.4455, "step": 157550 }, { "epoch": 7.825568689778484, "grad_norm": 0.16796875, "learning_rate": 0.00017395847819608623, "loss": 0.4939, "step": 157560 }, { "epoch": 7.826065362074104, "grad_norm": 0.1728515625, "learning_rate": 0.00017391874441243668, "loss": 0.4764, "step": 157570 }, { "epoch": 7.826562034369723, "grad_norm": 0.1474609375, "learning_rate": 0.00017387901062878712, "loss": 0.4906, "step": 157580 }, { "epoch": 7.827058706665342, "grad_norm": 0.166015625, "learning_rate": 0.0001738392768451376, "loss": 0.5121, "step": 157590 }, { "epoch": 7.8275553789609615, "grad_norm": 0.16015625, "learning_rate": 0.00017379954306148804, "loss": 0.4899, "step": 157600 }, { "epoch": 7.828052051256581, "grad_norm": 0.16015625, "learning_rate": 0.00017375980927783848, "loss": 0.5032, "step": 157610 }, { "epoch": 7.8285487235522, "grad_norm": 0.1669921875, "learning_rate": 0.00017372007549418893, "loss": 0.5003, "step": 157620 }, { "epoch": 7.829045395847819, "grad_norm": 0.1552734375, "learning_rate": 0.0001736803417105394, "loss": 0.5219, "step": 157630 }, { "epoch": 7.829542068143439, "grad_norm": 0.1962890625, "learning_rate": 0.00017364060792688984, "loss": 0.4936, "step": 157640 }, { "epoch": 7.830038740439059, "grad_norm": 0.1474609375, "learning_rate": 0.0001736008741432403, "loss": 0.4803, "step": 157650 }, { "epoch": 7.830535412734678, "grad_norm": 0.16015625, "learning_rate": 0.00017356114035959076, "loss": 0.5206, "step": 157660 }, { "epoch": 7.831032085030297, "grad_norm": 0.1591796875, "learning_rate": 0.0001735214065759412, "loss": 0.5155, "step": 157670 }, { "epoch": 7.831528757325916, "grad_norm": 0.1796875, "learning_rate": 0.00017348167279229167, "loss": 0.4948, "step": 157680 }, { "epoch": 7.8320254296215355, "grad_norm": 0.1708984375, "learning_rate": 0.0001734419390086421, "loss": 0.4792, "step": 157690 }, { "epoch": 7.832522101917155, "grad_norm": 0.16796875, "learning_rate": 0.00017340220522499256, "loss": 0.4773, "step": 157700 }, { "epoch": 7.833018774212775, "grad_norm": 0.1494140625, "learning_rate": 0.000173362471441343, "loss": 0.4869, "step": 157710 }, { "epoch": 7.833515446508394, "grad_norm": 0.17578125, "learning_rate": 0.00017332273765769348, "loss": 0.4808, "step": 157720 }, { "epoch": 7.834012118804013, "grad_norm": 0.2109375, "learning_rate": 0.0001732830038740439, "loss": 0.4866, "step": 157730 }, { "epoch": 7.8345087910996325, "grad_norm": 0.1787109375, "learning_rate": 0.00017324327009039437, "loss": 0.4477, "step": 157740 }, { "epoch": 7.835005463395252, "grad_norm": 0.162109375, "learning_rate": 0.0001732035363067448, "loss": 0.483, "step": 157750 }, { "epoch": 7.835502135690871, "grad_norm": 0.169921875, "learning_rate": 0.00017316380252309528, "loss": 0.5277, "step": 157760 }, { "epoch": 7.83599880798649, "grad_norm": 0.1494140625, "learning_rate": 0.00017312406873944573, "loss": 0.4935, "step": 157770 }, { "epoch": 7.83649548028211, "grad_norm": 0.1708984375, "learning_rate": 0.00017308433495579617, "loss": 0.4675, "step": 157780 }, { "epoch": 7.8369921525777295, "grad_norm": 0.1435546875, "learning_rate": 0.00017304460117214662, "loss": 0.4856, "step": 157790 }, { "epoch": 7.837488824873349, "grad_norm": 0.177734375, "learning_rate": 0.00017300486738849709, "loss": 0.4605, "step": 157800 }, { "epoch": 7.837985497168968, "grad_norm": 0.1748046875, "learning_rate": 0.00017296513360484753, "loss": 0.4991, "step": 157810 }, { "epoch": 7.838482169464587, "grad_norm": 0.154296875, "learning_rate": 0.00017292539982119797, "loss": 0.5, "step": 157820 }, { "epoch": 7.838978841760206, "grad_norm": 0.1435546875, "learning_rate": 0.00017288566603754845, "loss": 0.4949, "step": 157830 }, { "epoch": 7.839475514055826, "grad_norm": 0.17578125, "learning_rate": 0.0001728459322538989, "loss": 0.4982, "step": 157840 }, { "epoch": 7.839972186351446, "grad_norm": 0.1513671875, "learning_rate": 0.00017280619847024933, "loss": 0.4761, "step": 157850 }, { "epoch": 7.840468858647065, "grad_norm": 0.1796875, "learning_rate": 0.00017276646468659978, "loss": 0.4898, "step": 157860 }, { "epoch": 7.840965530942684, "grad_norm": 0.1669921875, "learning_rate": 0.00017272673090295025, "loss": 0.5283, "step": 157870 }, { "epoch": 7.8414622032383035, "grad_norm": 0.146484375, "learning_rate": 0.0001726869971193007, "loss": 0.4675, "step": 157880 }, { "epoch": 7.841958875533923, "grad_norm": 0.140625, "learning_rate": 0.00017264726333565117, "loss": 0.4695, "step": 157890 }, { "epoch": 7.842455547829542, "grad_norm": 0.1650390625, "learning_rate": 0.00017260752955200158, "loss": 0.4885, "step": 157900 }, { "epoch": 7.842952220125161, "grad_norm": 0.1484375, "learning_rate": 0.00017256779576835205, "loss": 0.4774, "step": 157910 }, { "epoch": 7.843448892420781, "grad_norm": 0.1728515625, "learning_rate": 0.0001725280619847025, "loss": 0.489, "step": 157920 }, { "epoch": 7.8439455647164005, "grad_norm": 0.1650390625, "learning_rate": 0.00017248832820105297, "loss": 0.4955, "step": 157930 }, { "epoch": 7.84444223701202, "grad_norm": 0.16015625, "learning_rate": 0.0001724485944174034, "loss": 0.4899, "step": 157940 }, { "epoch": 7.844938909307639, "grad_norm": 0.1669921875, "learning_rate": 0.00017240886063375386, "loss": 0.5077, "step": 157950 }, { "epoch": 7.845435581603258, "grad_norm": 0.146484375, "learning_rate": 0.0001723691268501043, "loss": 0.5141, "step": 157960 }, { "epoch": 7.845932253898877, "grad_norm": 0.1826171875, "learning_rate": 0.00017232939306645477, "loss": 0.486, "step": 157970 }, { "epoch": 7.846428926194497, "grad_norm": 0.169921875, "learning_rate": 0.0001722896592828052, "loss": 0.4688, "step": 157980 }, { "epoch": 7.846925598490116, "grad_norm": 0.146484375, "learning_rate": 0.00017224992549915566, "loss": 0.4759, "step": 157990 }, { "epoch": 7.847422270785736, "grad_norm": 0.1572265625, "learning_rate": 0.0001722101917155061, "loss": 0.5069, "step": 158000 }, { "epoch": 7.847918943081355, "grad_norm": 0.173828125, "learning_rate": 0.00017217045793185658, "loss": 0.4946, "step": 158010 }, { "epoch": 7.848415615376974, "grad_norm": 0.189453125, "learning_rate": 0.00017213072414820702, "loss": 0.515, "step": 158020 }, { "epoch": 7.848912287672594, "grad_norm": 0.1875, "learning_rate": 0.00017209099036455747, "loss": 0.4999, "step": 158030 }, { "epoch": 7.849408959968213, "grad_norm": 0.15625, "learning_rate": 0.00017205125658090794, "loss": 0.4776, "step": 158040 }, { "epoch": 7.849905632263832, "grad_norm": 0.1640625, "learning_rate": 0.00017201152279725838, "loss": 0.487, "step": 158050 }, { "epoch": 7.850402304559451, "grad_norm": 0.203125, "learning_rate": 0.00017197178901360883, "loss": 0.5035, "step": 158060 }, { "epoch": 7.8508989768550705, "grad_norm": 0.1376953125, "learning_rate": 0.00017193205522995927, "loss": 0.488, "step": 158070 }, { "epoch": 7.851395649150691, "grad_norm": 0.1591796875, "learning_rate": 0.00017189232144630974, "loss": 0.5068, "step": 158080 }, { "epoch": 7.85189232144631, "grad_norm": 0.140625, "learning_rate": 0.0001718525876626602, "loss": 0.4852, "step": 158090 }, { "epoch": 7.852388993741929, "grad_norm": 0.1513671875, "learning_rate": 0.00017181285387901063, "loss": 0.5238, "step": 158100 }, { "epoch": 7.852885666037548, "grad_norm": 0.15625, "learning_rate": 0.00017177312009536107, "loss": 0.498, "step": 158110 }, { "epoch": 7.853382338333168, "grad_norm": 0.1611328125, "learning_rate": 0.00017173338631171155, "loss": 0.485, "step": 158120 }, { "epoch": 7.853879010628787, "grad_norm": 0.173828125, "learning_rate": 0.000171693652528062, "loss": 0.4411, "step": 158130 }, { "epoch": 7.854375682924406, "grad_norm": 0.1552734375, "learning_rate": 0.00017165391874441243, "loss": 0.5216, "step": 158140 }, { "epoch": 7.854872355220026, "grad_norm": 0.1513671875, "learning_rate": 0.00017161418496076288, "loss": 0.494, "step": 158150 }, { "epoch": 7.855369027515645, "grad_norm": 0.1396484375, "learning_rate": 0.00017157445117711335, "loss": 0.5004, "step": 158160 }, { "epoch": 7.855865699811265, "grad_norm": 0.162109375, "learning_rate": 0.0001715347173934638, "loss": 0.4854, "step": 158170 }, { "epoch": 7.856362372106884, "grad_norm": 0.158203125, "learning_rate": 0.00017149498360981427, "loss": 0.467, "step": 158180 }, { "epoch": 7.856859044402503, "grad_norm": 0.162109375, "learning_rate": 0.0001714552498261647, "loss": 0.4838, "step": 158190 }, { "epoch": 7.857355716698122, "grad_norm": 0.14453125, "learning_rate": 0.00017141551604251515, "loss": 0.5075, "step": 158200 }, { "epoch": 7.8578523889937415, "grad_norm": 0.171875, "learning_rate": 0.00017137578225886563, "loss": 0.4666, "step": 158210 }, { "epoch": 7.858349061289362, "grad_norm": 0.1865234375, "learning_rate": 0.00017133604847521607, "loss": 0.495, "step": 158220 }, { "epoch": 7.858845733584981, "grad_norm": 0.1513671875, "learning_rate": 0.00017129631469156651, "loss": 0.4623, "step": 158230 }, { "epoch": 7.8593424058806, "grad_norm": 0.1513671875, "learning_rate": 0.00017125658090791696, "loss": 0.4929, "step": 158240 }, { "epoch": 7.859839078176219, "grad_norm": 0.1513671875, "learning_rate": 0.00017121684712426743, "loss": 0.4998, "step": 158250 }, { "epoch": 7.8603357504718385, "grad_norm": 0.1494140625, "learning_rate": 0.00017117711334061787, "loss": 0.465, "step": 158260 }, { "epoch": 7.860832422767458, "grad_norm": 0.162109375, "learning_rate": 0.00017113737955696832, "loss": 0.5087, "step": 158270 }, { "epoch": 7.861329095063077, "grad_norm": 0.169921875, "learning_rate": 0.00017109764577331876, "loss": 0.4851, "step": 158280 }, { "epoch": 7.861825767358697, "grad_norm": 0.171875, "learning_rate": 0.00017105791198966923, "loss": 0.4744, "step": 158290 }, { "epoch": 7.862322439654316, "grad_norm": 0.1572265625, "learning_rate": 0.00017101817820601968, "loss": 0.505, "step": 158300 }, { "epoch": 7.8628191119499355, "grad_norm": 0.1484375, "learning_rate": 0.00017097844442237012, "loss": 0.4845, "step": 158310 }, { "epoch": 7.863315784245555, "grad_norm": 0.150390625, "learning_rate": 0.00017093871063872057, "loss": 0.5029, "step": 158320 }, { "epoch": 7.863812456541174, "grad_norm": 0.18359375, "learning_rate": 0.00017089897685507104, "loss": 0.4545, "step": 158330 }, { "epoch": 7.864309128836793, "grad_norm": 0.16796875, "learning_rate": 0.00017085924307142148, "loss": 0.4649, "step": 158340 }, { "epoch": 7.8648058011324125, "grad_norm": 0.1533203125, "learning_rate": 0.00017081950928777193, "loss": 0.5021, "step": 158350 }, { "epoch": 7.865302473428033, "grad_norm": 0.17578125, "learning_rate": 0.0001707797755041224, "loss": 0.4561, "step": 158360 }, { "epoch": 7.865799145723652, "grad_norm": 0.181640625, "learning_rate": 0.00017074004172047284, "loss": 0.5059, "step": 158370 }, { "epoch": 7.866295818019271, "grad_norm": 0.150390625, "learning_rate": 0.00017070030793682331, "loss": 0.5086, "step": 158380 }, { "epoch": 7.86679249031489, "grad_norm": 0.1455078125, "learning_rate": 0.00017066057415317373, "loss": 0.4865, "step": 158390 }, { "epoch": 7.8672891626105095, "grad_norm": 0.1708984375, "learning_rate": 0.0001706208403695242, "loss": 0.4936, "step": 158400 }, { "epoch": 7.867785834906129, "grad_norm": 0.14453125, "learning_rate": 0.00017058110658587465, "loss": 0.4558, "step": 158410 }, { "epoch": 7.868282507201748, "grad_norm": 0.173828125, "learning_rate": 0.00017054137280222512, "loss": 0.5057, "step": 158420 }, { "epoch": 7.868779179497368, "grad_norm": 0.169921875, "learning_rate": 0.00017050163901857553, "loss": 0.4906, "step": 158430 }, { "epoch": 7.869275851792987, "grad_norm": 0.1474609375, "learning_rate": 0.000170461905234926, "loss": 0.5208, "step": 158440 }, { "epoch": 7.8697725240886065, "grad_norm": 0.1650390625, "learning_rate": 0.00017042217145127645, "loss": 0.4733, "step": 158450 }, { "epoch": 7.870269196384226, "grad_norm": 0.1865234375, "learning_rate": 0.00017038243766762692, "loss": 0.4874, "step": 158460 }, { "epoch": 7.870765868679845, "grad_norm": 0.158203125, "learning_rate": 0.00017034270388397734, "loss": 0.4597, "step": 158470 }, { "epoch": 7.871262540975464, "grad_norm": 0.150390625, "learning_rate": 0.0001703029701003278, "loss": 0.5403, "step": 158480 }, { "epoch": 7.871759213271083, "grad_norm": 0.1689453125, "learning_rate": 0.00017026323631667825, "loss": 0.525, "step": 158490 }, { "epoch": 7.8722558855667035, "grad_norm": 0.173828125, "learning_rate": 0.00017022350253302873, "loss": 0.4927, "step": 158500 }, { "epoch": 7.872752557862323, "grad_norm": 0.15234375, "learning_rate": 0.00017018376874937917, "loss": 0.5222, "step": 158510 }, { "epoch": 7.873249230157942, "grad_norm": 0.14453125, "learning_rate": 0.00017014403496572961, "loss": 0.4882, "step": 158520 }, { "epoch": 7.873745902453561, "grad_norm": 0.1845703125, "learning_rate": 0.00017010430118208009, "loss": 0.4862, "step": 158530 }, { "epoch": 7.87424257474918, "grad_norm": 0.1689453125, "learning_rate": 0.00017006456739843053, "loss": 0.4564, "step": 158540 }, { "epoch": 7.8747392470448, "grad_norm": 0.1650390625, "learning_rate": 0.00017002483361478097, "loss": 0.5056, "step": 158550 }, { "epoch": 7.875235919340419, "grad_norm": 0.140625, "learning_rate": 0.00016998509983113142, "loss": 0.49, "step": 158560 }, { "epoch": 7.875732591636039, "grad_norm": 0.1640625, "learning_rate": 0.0001699453660474819, "loss": 0.4968, "step": 158570 }, { "epoch": 7.876229263931658, "grad_norm": 0.15234375, "learning_rate": 0.00016990563226383233, "loss": 0.4868, "step": 158580 }, { "epoch": 7.8767259362272775, "grad_norm": 0.177734375, "learning_rate": 0.0001698658984801828, "loss": 0.4979, "step": 158590 }, { "epoch": 7.877222608522897, "grad_norm": 0.1708984375, "learning_rate": 0.00016982616469653322, "loss": 0.4879, "step": 158600 }, { "epoch": 7.877719280818516, "grad_norm": 0.1767578125, "learning_rate": 0.0001697864309128837, "loss": 0.5487, "step": 158610 }, { "epoch": 7.878215953114135, "grad_norm": 0.150390625, "learning_rate": 0.00016974669712923414, "loss": 0.4763, "step": 158620 }, { "epoch": 7.878712625409754, "grad_norm": 0.1591796875, "learning_rate": 0.0001697069633455846, "loss": 0.4907, "step": 158630 }, { "epoch": 7.8792092977053745, "grad_norm": 0.166015625, "learning_rate": 0.00016966722956193503, "loss": 0.4819, "step": 158640 }, { "epoch": 7.879705970000994, "grad_norm": 0.1767578125, "learning_rate": 0.0001696274957782855, "loss": 0.4651, "step": 158650 }, { "epoch": 7.880202642296613, "grad_norm": 0.1708984375, "learning_rate": 0.00016958776199463594, "loss": 0.5158, "step": 158660 }, { "epoch": 7.880699314592232, "grad_norm": 0.1591796875, "learning_rate": 0.00016954802821098641, "loss": 0.4985, "step": 158670 }, { "epoch": 7.881195986887851, "grad_norm": 0.154296875, "learning_rate": 0.00016950829442733683, "loss": 0.4626, "step": 158680 }, { "epoch": 7.881692659183471, "grad_norm": 0.15625, "learning_rate": 0.0001694685606436873, "loss": 0.4984, "step": 158690 }, { "epoch": 7.88218933147909, "grad_norm": 0.1572265625, "learning_rate": 0.00016942882686003775, "loss": 0.4647, "step": 158700 }, { "epoch": 7.882686003774709, "grad_norm": 0.1513671875, "learning_rate": 0.00016938909307638822, "loss": 0.4911, "step": 158710 }, { "epoch": 7.883182676070328, "grad_norm": 0.1787109375, "learning_rate": 0.00016934935929273866, "loss": 0.5046, "step": 158720 }, { "epoch": 7.883679348365948, "grad_norm": 0.154296875, "learning_rate": 0.0001693096255090891, "loss": 0.4652, "step": 158730 }, { "epoch": 7.884176020661568, "grad_norm": 0.1591796875, "learning_rate": 0.00016926989172543958, "loss": 0.5216, "step": 158740 }, { "epoch": 7.884672692957187, "grad_norm": 0.15625, "learning_rate": 0.00016923015794179002, "loss": 0.4934, "step": 158750 }, { "epoch": 7.885169365252806, "grad_norm": 0.14453125, "learning_rate": 0.00016919042415814047, "loss": 0.5015, "step": 158760 }, { "epoch": 7.885666037548425, "grad_norm": 0.1611328125, "learning_rate": 0.0001691506903744909, "loss": 0.5578, "step": 158770 }, { "epoch": 7.8861627098440445, "grad_norm": 0.1455078125, "learning_rate": 0.00016911095659084138, "loss": 0.4995, "step": 158780 }, { "epoch": 7.886659382139664, "grad_norm": 0.1806640625, "learning_rate": 0.00016907122280719183, "loss": 0.5216, "step": 158790 }, { "epoch": 7.887156054435284, "grad_norm": 0.1435546875, "learning_rate": 0.00016903148902354227, "loss": 0.4803, "step": 158800 }, { "epoch": 7.887652726730903, "grad_norm": 0.1435546875, "learning_rate": 0.00016899175523989271, "loss": 0.4816, "step": 158810 }, { "epoch": 7.888149399026522, "grad_norm": 0.1611328125, "learning_rate": 0.00016895202145624319, "loss": 0.4879, "step": 158820 }, { "epoch": 7.888646071322142, "grad_norm": 0.1728515625, "learning_rate": 0.00016891228767259363, "loss": 0.4714, "step": 158830 }, { "epoch": 7.889142743617761, "grad_norm": 0.158203125, "learning_rate": 0.00016887255388894407, "loss": 0.4949, "step": 158840 }, { "epoch": 7.88963941591338, "grad_norm": 0.15625, "learning_rate": 0.00016883282010529452, "loss": 0.5038, "step": 158850 }, { "epoch": 7.890136088208999, "grad_norm": 0.1513671875, "learning_rate": 0.000168793086321645, "loss": 0.4899, "step": 158860 }, { "epoch": 7.890632760504619, "grad_norm": 0.1552734375, "learning_rate": 0.00016875335253799543, "loss": 0.5325, "step": 158870 }, { "epoch": 7.891129432800239, "grad_norm": 0.1982421875, "learning_rate": 0.00016871361875434588, "loss": 0.4795, "step": 158880 }, { "epoch": 7.891626105095858, "grad_norm": 0.1591796875, "learning_rate": 0.00016867388497069635, "loss": 0.5102, "step": 158890 }, { "epoch": 7.892122777391477, "grad_norm": 0.1474609375, "learning_rate": 0.0001686341511870468, "loss": 0.4802, "step": 158900 }, { "epoch": 7.892619449687096, "grad_norm": 0.1552734375, "learning_rate": 0.00016859441740339727, "loss": 0.4854, "step": 158910 }, { "epoch": 7.8931161219827155, "grad_norm": 0.158203125, "learning_rate": 0.00016855468361974768, "loss": 0.4677, "step": 158920 }, { "epoch": 7.893612794278335, "grad_norm": 0.1474609375, "learning_rate": 0.00016851494983609815, "loss": 0.4833, "step": 158930 }, { "epoch": 7.894109466573955, "grad_norm": 0.169921875, "learning_rate": 0.0001684752160524486, "loss": 0.4998, "step": 158940 }, { "epoch": 7.894606138869574, "grad_norm": 0.138671875, "learning_rate": 0.00016843548226879907, "loss": 0.4769, "step": 158950 }, { "epoch": 7.895102811165193, "grad_norm": 0.146484375, "learning_rate": 0.0001683957484851495, "loss": 0.4937, "step": 158960 }, { "epoch": 7.8955994834608125, "grad_norm": 0.1796875, "learning_rate": 0.00016835601470149996, "loss": 0.487, "step": 158970 }, { "epoch": 7.896096155756432, "grad_norm": 0.1484375, "learning_rate": 0.0001683162809178504, "loss": 0.5119, "step": 158980 }, { "epoch": 7.896592828052051, "grad_norm": 0.15625, "learning_rate": 0.00016827654713420087, "loss": 0.4928, "step": 158990 }, { "epoch": 7.89708950034767, "grad_norm": 0.1591796875, "learning_rate": 0.0001682368133505513, "loss": 0.4702, "step": 159000 }, { "epoch": 7.89758617264329, "grad_norm": 0.1904296875, "learning_rate": 0.00016819707956690176, "loss": 0.481, "step": 159010 }, { "epoch": 7.8980828449389096, "grad_norm": 0.1357421875, "learning_rate": 0.0001681573457832522, "loss": 0.5237, "step": 159020 }, { "epoch": 7.898579517234529, "grad_norm": 0.1806640625, "learning_rate": 0.00016811761199960268, "loss": 0.4982, "step": 159030 }, { "epoch": 7.899076189530148, "grad_norm": 0.1650390625, "learning_rate": 0.00016807787821595312, "loss": 0.4826, "step": 159040 }, { "epoch": 7.899572861825767, "grad_norm": 0.154296875, "learning_rate": 0.00016803814443230357, "loss": 0.4898, "step": 159050 }, { "epoch": 7.9000695341213865, "grad_norm": 0.166015625, "learning_rate": 0.00016799841064865404, "loss": 0.479, "step": 159060 }, { "epoch": 7.900566206417006, "grad_norm": 0.17578125, "learning_rate": 0.00016795867686500448, "loss": 0.4809, "step": 159070 }, { "epoch": 7.901062878712626, "grad_norm": 0.1806640625, "learning_rate": 0.00016791894308135495, "loss": 0.4751, "step": 159080 }, { "epoch": 7.901559551008245, "grad_norm": 0.171875, "learning_rate": 0.00016787920929770537, "loss": 0.5325, "step": 159090 }, { "epoch": 7.902056223303864, "grad_norm": 0.1669921875, "learning_rate": 0.00016783947551405584, "loss": 0.4959, "step": 159100 }, { "epoch": 7.9025528955994835, "grad_norm": 0.154296875, "learning_rate": 0.00016779974173040629, "loss": 0.4888, "step": 159110 }, { "epoch": 7.903049567895103, "grad_norm": 0.1796875, "learning_rate": 0.00016776000794675676, "loss": 0.5317, "step": 159120 }, { "epoch": 7.903546240190722, "grad_norm": 0.1630859375, "learning_rate": 0.00016772027416310717, "loss": 0.4846, "step": 159130 }, { "epoch": 7.904042912486341, "grad_norm": 0.1787109375, "learning_rate": 0.00016768054037945765, "loss": 0.5025, "step": 159140 }, { "epoch": 7.904539584781961, "grad_norm": 0.171875, "learning_rate": 0.0001676408065958081, "loss": 0.5048, "step": 159150 }, { "epoch": 7.9050362570775805, "grad_norm": 0.146484375, "learning_rate": 0.00016760107281215856, "loss": 0.5299, "step": 159160 }, { "epoch": 7.9055329293732, "grad_norm": 0.173828125, "learning_rate": 0.00016756133902850898, "loss": 0.4928, "step": 159170 }, { "epoch": 7.906029601668819, "grad_norm": 0.1513671875, "learning_rate": 0.00016752160524485945, "loss": 0.491, "step": 159180 }, { "epoch": 7.906526273964438, "grad_norm": 0.1796875, "learning_rate": 0.0001674818714612099, "loss": 0.4857, "step": 159190 }, { "epoch": 7.907022946260057, "grad_norm": 0.1474609375, "learning_rate": 0.00016744213767756037, "loss": 0.4391, "step": 159200 }, { "epoch": 7.907519618555677, "grad_norm": 0.154296875, "learning_rate": 0.0001674024038939108, "loss": 0.5006, "step": 159210 }, { "epoch": 7.908016290851297, "grad_norm": 0.1689453125, "learning_rate": 0.00016736267011026125, "loss": 0.5145, "step": 159220 }, { "epoch": 7.908512963146916, "grad_norm": 0.1669921875, "learning_rate": 0.00016732293632661173, "loss": 0.4822, "step": 159230 }, { "epoch": 7.909009635442535, "grad_norm": 0.1552734375, "learning_rate": 0.00016728320254296217, "loss": 0.4696, "step": 159240 }, { "epoch": 7.909506307738154, "grad_norm": 0.1533203125, "learning_rate": 0.00016724346875931261, "loss": 0.4848, "step": 159250 }, { "epoch": 7.910002980033774, "grad_norm": 0.1416015625, "learning_rate": 0.00016720373497566306, "loss": 0.4845, "step": 159260 }, { "epoch": 7.910499652329393, "grad_norm": 0.1650390625, "learning_rate": 0.00016716400119201353, "loss": 0.5213, "step": 159270 }, { "epoch": 7.910996324625012, "grad_norm": 0.15625, "learning_rate": 0.00016712426740836397, "loss": 0.4884, "step": 159280 }, { "epoch": 7.911492996920632, "grad_norm": 0.1435546875, "learning_rate": 0.00016708453362471442, "loss": 0.4857, "step": 159290 }, { "epoch": 7.9119896692162515, "grad_norm": 0.150390625, "learning_rate": 0.00016704479984106486, "loss": 0.4944, "step": 159300 }, { "epoch": 7.912486341511871, "grad_norm": 0.1640625, "learning_rate": 0.00016700506605741533, "loss": 0.5093, "step": 159310 }, { "epoch": 7.91298301380749, "grad_norm": 0.1533203125, "learning_rate": 0.00016696533227376578, "loss": 0.4598, "step": 159320 }, { "epoch": 7.913479686103109, "grad_norm": 0.1689453125, "learning_rate": 0.00016692559849011622, "loss": 0.5011, "step": 159330 }, { "epoch": 7.913976358398728, "grad_norm": 0.1494140625, "learning_rate": 0.00016688586470646667, "loss": 0.4991, "step": 159340 }, { "epoch": 7.914473030694348, "grad_norm": 0.1689453125, "learning_rate": 0.00016684613092281714, "loss": 0.4935, "step": 159350 }, { "epoch": 7.914969702989968, "grad_norm": 0.166015625, "learning_rate": 0.00016680639713916758, "loss": 0.4924, "step": 159360 }, { "epoch": 7.915466375285587, "grad_norm": 0.166015625, "learning_rate": 0.00016676666335551803, "loss": 0.503, "step": 159370 }, { "epoch": 7.915963047581206, "grad_norm": 0.1474609375, "learning_rate": 0.0001667269295718685, "loss": 0.4942, "step": 159380 }, { "epoch": 7.916459719876825, "grad_norm": 0.1611328125, "learning_rate": 0.00016668719578821894, "loss": 0.4976, "step": 159390 }, { "epoch": 7.916956392172445, "grad_norm": 0.146484375, "learning_rate": 0.00016664746200456939, "loss": 0.4774, "step": 159400 }, { "epoch": 7.917453064468064, "grad_norm": 0.1435546875, "learning_rate": 0.00016660772822091983, "loss": 0.4851, "step": 159410 }, { "epoch": 7.917949736763683, "grad_norm": 0.1474609375, "learning_rate": 0.0001665679944372703, "loss": 0.4923, "step": 159420 }, { "epoch": 7.918446409059302, "grad_norm": 0.1591796875, "learning_rate": 0.00016652826065362075, "loss": 0.4775, "step": 159430 }, { "epoch": 7.9189430813549215, "grad_norm": 0.169921875, "learning_rate": 0.00016648852686997122, "loss": 0.5103, "step": 159440 }, { "epoch": 7.919439753650542, "grad_norm": 0.1572265625, "learning_rate": 0.00016644879308632166, "loss": 0.5057, "step": 159450 }, { "epoch": 7.919936425946161, "grad_norm": 0.1474609375, "learning_rate": 0.0001664090593026721, "loss": 0.4869, "step": 159460 }, { "epoch": 7.92043309824178, "grad_norm": 0.158203125, "learning_rate": 0.00016636932551902255, "loss": 0.4947, "step": 159470 }, { "epoch": 7.920929770537399, "grad_norm": 0.1474609375, "learning_rate": 0.00016632959173537302, "loss": 0.4713, "step": 159480 }, { "epoch": 7.9214264428330186, "grad_norm": 0.15625, "learning_rate": 0.00016628985795172347, "loss": 0.4921, "step": 159490 }, { "epoch": 7.921923115128638, "grad_norm": 0.15234375, "learning_rate": 0.0001662501241680739, "loss": 0.4989, "step": 159500 }, { "epoch": 7.922419787424257, "grad_norm": 0.1884765625, "learning_rate": 0.00016621039038442435, "loss": 0.5064, "step": 159510 }, { "epoch": 7.922916459719877, "grad_norm": 0.1796875, "learning_rate": 0.00016617065660077483, "loss": 0.4692, "step": 159520 }, { "epoch": 7.923413132015496, "grad_norm": 0.150390625, "learning_rate": 0.00016613092281712527, "loss": 0.5094, "step": 159530 }, { "epoch": 7.923909804311116, "grad_norm": 0.1416015625, "learning_rate": 0.00016609118903347571, "loss": 0.5177, "step": 159540 }, { "epoch": 7.924406476606735, "grad_norm": 0.1650390625, "learning_rate": 0.00016605145524982616, "loss": 0.5124, "step": 159550 }, { "epoch": 7.924903148902354, "grad_norm": 0.2041015625, "learning_rate": 0.00016601172146617663, "loss": 0.5021, "step": 159560 }, { "epoch": 7.925399821197973, "grad_norm": 0.154296875, "learning_rate": 0.00016597198768252707, "loss": 0.5097, "step": 159570 }, { "epoch": 7.9258964934935925, "grad_norm": 0.1669921875, "learning_rate": 0.00016593225389887752, "loss": 0.4946, "step": 159580 }, { "epoch": 7.926393165789213, "grad_norm": 0.1787109375, "learning_rate": 0.000165892520115228, "loss": 0.5066, "step": 159590 }, { "epoch": 7.926889838084832, "grad_norm": 0.181640625, "learning_rate": 0.00016585278633157843, "loss": 0.5075, "step": 159600 }, { "epoch": 7.927386510380451, "grad_norm": 0.1884765625, "learning_rate": 0.0001658130525479289, "loss": 0.4872, "step": 159610 }, { "epoch": 7.92788318267607, "grad_norm": 0.185546875, "learning_rate": 0.00016577331876427932, "loss": 0.4864, "step": 159620 }, { "epoch": 7.9283798549716895, "grad_norm": 0.181640625, "learning_rate": 0.0001657335849806298, "loss": 0.4786, "step": 159630 }, { "epoch": 7.928876527267309, "grad_norm": 0.1748046875, "learning_rate": 0.00016569385119698024, "loss": 0.511, "step": 159640 }, { "epoch": 7.929373199562928, "grad_norm": 0.15234375, "learning_rate": 0.0001656541174133307, "loss": 0.4909, "step": 159650 }, { "epoch": 7.929869871858548, "grad_norm": 0.1640625, "learning_rate": 0.00016561438362968113, "loss": 0.4978, "step": 159660 }, { "epoch": 7.930366544154167, "grad_norm": 0.169921875, "learning_rate": 0.0001655746498460316, "loss": 0.475, "step": 159670 }, { "epoch": 7.9308632164497865, "grad_norm": 0.158203125, "learning_rate": 0.00016553491606238204, "loss": 0.4994, "step": 159680 }, { "epoch": 7.931359888745406, "grad_norm": 0.189453125, "learning_rate": 0.0001654951822787325, "loss": 0.5013, "step": 159690 }, { "epoch": 7.931856561041025, "grad_norm": 0.1669921875, "learning_rate": 0.00016545544849508293, "loss": 0.5227, "step": 159700 }, { "epoch": 7.932353233336644, "grad_norm": 0.19921875, "learning_rate": 0.0001654157147114334, "loss": 0.4901, "step": 159710 }, { "epoch": 7.932849905632263, "grad_norm": 0.146484375, "learning_rate": 0.00016537598092778385, "loss": 0.5077, "step": 159720 }, { "epoch": 7.933346577927884, "grad_norm": 0.162109375, "learning_rate": 0.00016533624714413432, "loss": 0.4844, "step": 159730 }, { "epoch": 7.933843250223503, "grad_norm": 0.1796875, "learning_rate": 0.00016529651336048476, "loss": 0.4831, "step": 159740 }, { "epoch": 7.934339922519122, "grad_norm": 0.1533203125, "learning_rate": 0.0001652567795768352, "loss": 0.4687, "step": 159750 }, { "epoch": 7.934836594814741, "grad_norm": 0.1611328125, "learning_rate": 0.00016521704579318568, "loss": 0.4882, "step": 159760 }, { "epoch": 7.9353332671103605, "grad_norm": 0.1513671875, "learning_rate": 0.00016517731200953612, "loss": 0.5135, "step": 159770 }, { "epoch": 7.93582993940598, "grad_norm": 0.16015625, "learning_rate": 0.00016513757822588657, "loss": 0.5015, "step": 159780 }, { "epoch": 7.936326611701599, "grad_norm": 0.15234375, "learning_rate": 0.000165097844442237, "loss": 0.4614, "step": 159790 }, { "epoch": 7.936823283997219, "grad_norm": 0.1572265625, "learning_rate": 0.00016505811065858748, "loss": 0.4798, "step": 159800 }, { "epoch": 7.937319956292838, "grad_norm": 0.1787109375, "learning_rate": 0.00016501837687493793, "loss": 0.4945, "step": 159810 }, { "epoch": 7.9378166285884575, "grad_norm": 0.1591796875, "learning_rate": 0.00016497864309128837, "loss": 0.4811, "step": 159820 }, { "epoch": 7.938313300884077, "grad_norm": 0.1533203125, "learning_rate": 0.00016493890930763881, "loss": 0.497, "step": 159830 }, { "epoch": 7.938809973179696, "grad_norm": 0.15625, "learning_rate": 0.00016489917552398929, "loss": 0.5042, "step": 159840 }, { "epoch": 7.939306645475315, "grad_norm": 0.1484375, "learning_rate": 0.00016485944174033973, "loss": 0.496, "step": 159850 }, { "epoch": 7.939803317770934, "grad_norm": 0.1533203125, "learning_rate": 0.0001648197079566902, "loss": 0.5029, "step": 159860 }, { "epoch": 7.9402999900665545, "grad_norm": 0.20703125, "learning_rate": 0.00016477997417304062, "loss": 0.479, "step": 159870 }, { "epoch": 7.940796662362174, "grad_norm": 0.158203125, "learning_rate": 0.0001647402403893911, "loss": 0.5002, "step": 159880 }, { "epoch": 7.941293334657793, "grad_norm": 0.1826171875, "learning_rate": 0.00016470050660574153, "loss": 0.4777, "step": 159890 }, { "epoch": 7.941790006953412, "grad_norm": 0.203125, "learning_rate": 0.000164660772822092, "loss": 0.4911, "step": 159900 }, { "epoch": 7.942286679249031, "grad_norm": 0.1904296875, "learning_rate": 0.00016462103903844245, "loss": 0.4854, "step": 159910 }, { "epoch": 7.942783351544651, "grad_norm": 0.1484375, "learning_rate": 0.0001645813052547929, "loss": 0.5135, "step": 159920 }, { "epoch": 7.94328002384027, "grad_norm": 0.14453125, "learning_rate": 0.00016454157147114337, "loss": 0.4886, "step": 159930 }, { "epoch": 7.94377669613589, "grad_norm": 0.162109375, "learning_rate": 0.0001645018376874938, "loss": 0.4596, "step": 159940 }, { "epoch": 7.944273368431509, "grad_norm": 0.173828125, "learning_rate": 0.00016446210390384425, "loss": 0.4739, "step": 159950 }, { "epoch": 7.9447700407271284, "grad_norm": 0.158203125, "learning_rate": 0.0001644223701201947, "loss": 0.4765, "step": 159960 }, { "epoch": 7.945266713022748, "grad_norm": 0.1416015625, "learning_rate": 0.00016438263633654517, "loss": 0.4598, "step": 159970 }, { "epoch": 7.945763385318367, "grad_norm": 0.1591796875, "learning_rate": 0.0001643429025528956, "loss": 0.5043, "step": 159980 }, { "epoch": 7.946260057613986, "grad_norm": 0.1708984375, "learning_rate": 0.00016430316876924606, "loss": 0.514, "step": 159990 }, { "epoch": 7.946756729909605, "grad_norm": 0.1787109375, "learning_rate": 0.0001642634349855965, "loss": 0.4984, "step": 160000 }, { "epoch": 7.9472534022052255, "grad_norm": 0.142578125, "learning_rate": 0.00016422370120194697, "loss": 0.498, "step": 160010 }, { "epoch": 7.947750074500845, "grad_norm": 0.16796875, "learning_rate": 0.00016418396741829742, "loss": 0.51, "step": 160020 }, { "epoch": 7.948246746796464, "grad_norm": 0.1875, "learning_rate": 0.00016414423363464786, "loss": 0.5087, "step": 160030 }, { "epoch": 7.948743419092083, "grad_norm": 0.150390625, "learning_rate": 0.0001641044998509983, "loss": 0.5029, "step": 160040 }, { "epoch": 7.949240091387702, "grad_norm": 0.1455078125, "learning_rate": 0.00016406476606734878, "loss": 0.5312, "step": 160050 }, { "epoch": 7.949736763683322, "grad_norm": 0.16015625, "learning_rate": 0.00016402503228369922, "loss": 0.4919, "step": 160060 }, { "epoch": 7.950233435978941, "grad_norm": 0.181640625, "learning_rate": 0.00016398529850004967, "loss": 0.4937, "step": 160070 }, { "epoch": 7.95073010827456, "grad_norm": 0.1416015625, "learning_rate": 0.00016394556471640014, "loss": 0.475, "step": 160080 }, { "epoch": 7.95122678057018, "grad_norm": 0.14453125, "learning_rate": 0.00016390583093275058, "loss": 0.4837, "step": 160090 }, { "epoch": 7.951723452865799, "grad_norm": 0.1572265625, "learning_rate": 0.00016386609714910103, "loss": 0.4909, "step": 160100 }, { "epoch": 7.952220125161419, "grad_norm": 0.2080078125, "learning_rate": 0.00016382636336545147, "loss": 0.5178, "step": 160110 }, { "epoch": 7.952716797457038, "grad_norm": 0.15234375, "learning_rate": 0.00016378662958180194, "loss": 0.5154, "step": 160120 }, { "epoch": 7.953213469752657, "grad_norm": 0.142578125, "learning_rate": 0.00016374689579815239, "loss": 0.4721, "step": 160130 }, { "epoch": 7.953710142048276, "grad_norm": 0.158203125, "learning_rate": 0.00016370716201450286, "loss": 0.5122, "step": 160140 }, { "epoch": 7.9542068143438955, "grad_norm": 0.154296875, "learning_rate": 0.00016366742823085327, "loss": 0.4571, "step": 160150 }, { "epoch": 7.954703486639515, "grad_norm": 0.1689453125, "learning_rate": 0.00016362769444720375, "loss": 0.4744, "step": 160160 }, { "epoch": 7.955200158935135, "grad_norm": 0.158203125, "learning_rate": 0.0001635879606635542, "loss": 0.498, "step": 160170 }, { "epoch": 7.955696831230754, "grad_norm": 0.146484375, "learning_rate": 0.00016354822687990466, "loss": 0.4578, "step": 160180 }, { "epoch": 7.956193503526373, "grad_norm": 0.14453125, "learning_rate": 0.00016350849309625508, "loss": 0.4896, "step": 160190 }, { "epoch": 7.956690175821993, "grad_norm": 0.1455078125, "learning_rate": 0.00016346875931260555, "loss": 0.4738, "step": 160200 }, { "epoch": 7.957186848117612, "grad_norm": 0.1591796875, "learning_rate": 0.000163429025528956, "loss": 0.5022, "step": 160210 }, { "epoch": 7.957683520413231, "grad_norm": 0.158203125, "learning_rate": 0.00016338929174530647, "loss": 0.4943, "step": 160220 }, { "epoch": 7.95818019270885, "grad_norm": 0.1611328125, "learning_rate": 0.0001633495579616569, "loss": 0.4896, "step": 160230 }, { "epoch": 7.95867686500447, "grad_norm": 0.1640625, "learning_rate": 0.00016330982417800735, "loss": 0.492, "step": 160240 }, { "epoch": 7.95917353730009, "grad_norm": 0.1572265625, "learning_rate": 0.0001632700903943578, "loss": 0.5101, "step": 160250 }, { "epoch": 7.959670209595709, "grad_norm": 0.2001953125, "learning_rate": 0.00016323035661070827, "loss": 0.5329, "step": 160260 }, { "epoch": 7.960166881891328, "grad_norm": 0.15234375, "learning_rate": 0.00016319062282705871, "loss": 0.476, "step": 160270 }, { "epoch": 7.960663554186947, "grad_norm": 0.1552734375, "learning_rate": 0.00016315088904340916, "loss": 0.4737, "step": 160280 }, { "epoch": 7.9611602264825665, "grad_norm": 0.1572265625, "learning_rate": 0.00016311115525975963, "loss": 0.4942, "step": 160290 }, { "epoch": 7.961656898778186, "grad_norm": 0.15625, "learning_rate": 0.00016307142147611007, "loss": 0.5531, "step": 160300 }, { "epoch": 7.962153571073806, "grad_norm": 0.1533203125, "learning_rate": 0.00016303168769246054, "loss": 0.5119, "step": 160310 }, { "epoch": 7.962650243369425, "grad_norm": 0.146484375, "learning_rate": 0.00016299195390881096, "loss": 0.5132, "step": 160320 }, { "epoch": 7.963146915665044, "grad_norm": 0.1767578125, "learning_rate": 0.00016295222012516143, "loss": 0.479, "step": 160330 }, { "epoch": 7.9636435879606635, "grad_norm": 0.1611328125, "learning_rate": 0.00016291248634151188, "loss": 0.5006, "step": 160340 }, { "epoch": 7.964140260256283, "grad_norm": 0.1484375, "learning_rate": 0.00016287275255786235, "loss": 0.4917, "step": 160350 }, { "epoch": 7.964636932551902, "grad_norm": 0.169921875, "learning_rate": 0.00016283301877421277, "loss": 0.5246, "step": 160360 }, { "epoch": 7.965133604847521, "grad_norm": 0.1953125, "learning_rate": 0.00016279328499056324, "loss": 0.4925, "step": 160370 }, { "epoch": 7.965630277143141, "grad_norm": 0.16015625, "learning_rate": 0.00016275355120691368, "loss": 0.474, "step": 160380 }, { "epoch": 7.9661269494387605, "grad_norm": 0.1708984375, "learning_rate": 0.00016271381742326415, "loss": 0.4957, "step": 160390 }, { "epoch": 7.96662362173438, "grad_norm": 0.16796875, "learning_rate": 0.00016267408363961457, "loss": 0.4688, "step": 160400 }, { "epoch": 7.967120294029999, "grad_norm": 0.166015625, "learning_rate": 0.00016263434985596504, "loss": 0.4886, "step": 160410 }, { "epoch": 7.967616966325618, "grad_norm": 0.1552734375, "learning_rate": 0.00016259461607231549, "loss": 0.4947, "step": 160420 }, { "epoch": 7.9681136386212374, "grad_norm": 0.166015625, "learning_rate": 0.00016255488228866596, "loss": 0.4888, "step": 160430 }, { "epoch": 7.968610310916857, "grad_norm": 0.166015625, "learning_rate": 0.0001625151485050164, "loss": 0.4831, "step": 160440 }, { "epoch": 7.969106983212477, "grad_norm": 0.16015625, "learning_rate": 0.00016247541472136685, "loss": 0.5021, "step": 160450 }, { "epoch": 7.969603655508096, "grad_norm": 0.16796875, "learning_rate": 0.00016243568093771732, "loss": 0.4852, "step": 160460 }, { "epoch": 7.970100327803715, "grad_norm": 0.1552734375, "learning_rate": 0.00016239594715406776, "loss": 0.4923, "step": 160470 }, { "epoch": 7.9705970000993345, "grad_norm": 0.1728515625, "learning_rate": 0.0001623562133704182, "loss": 0.4684, "step": 160480 }, { "epoch": 7.971093672394954, "grad_norm": 0.166015625, "learning_rate": 0.00016231647958676865, "loss": 0.4737, "step": 160490 }, { "epoch": 7.971590344690573, "grad_norm": 0.1689453125, "learning_rate": 0.00016227674580311912, "loss": 0.521, "step": 160500 }, { "epoch": 7.972087016986192, "grad_norm": 0.1474609375, "learning_rate": 0.00016223701201946957, "loss": 0.468, "step": 160510 }, { "epoch": 7.972583689281812, "grad_norm": 0.1552734375, "learning_rate": 0.00016219727823582, "loss": 0.5087, "step": 160520 }, { "epoch": 7.9730803615774315, "grad_norm": 0.15625, "learning_rate": 0.00016215754445217045, "loss": 0.4756, "step": 160530 }, { "epoch": 7.973577033873051, "grad_norm": 0.2099609375, "learning_rate": 0.00016211781066852093, "loss": 0.5339, "step": 160540 }, { "epoch": 7.97407370616867, "grad_norm": 0.1484375, "learning_rate": 0.00016207807688487137, "loss": 0.4721, "step": 160550 }, { "epoch": 7.974570378464289, "grad_norm": 0.14453125, "learning_rate": 0.00016203834310122181, "loss": 0.5024, "step": 160560 }, { "epoch": 7.975067050759908, "grad_norm": 0.1416015625, "learning_rate": 0.00016199860931757226, "loss": 0.4412, "step": 160570 }, { "epoch": 7.975563723055528, "grad_norm": 0.189453125, "learning_rate": 0.00016195887553392273, "loss": 0.4727, "step": 160580 }, { "epoch": 7.976060395351148, "grad_norm": 0.138671875, "learning_rate": 0.00016191914175027317, "loss": 0.5233, "step": 160590 }, { "epoch": 7.976557067646767, "grad_norm": 0.1943359375, "learning_rate": 0.00016187940796662362, "loss": 0.4815, "step": 160600 }, { "epoch": 7.977053739942386, "grad_norm": 0.16796875, "learning_rate": 0.0001618396741829741, "loss": 0.4791, "step": 160610 }, { "epoch": 7.977550412238005, "grad_norm": 0.154296875, "learning_rate": 0.00016179994039932453, "loss": 0.5128, "step": 160620 }, { "epoch": 7.978047084533625, "grad_norm": 0.1591796875, "learning_rate": 0.000161760206615675, "loss": 0.4934, "step": 160630 }, { "epoch": 7.978543756829244, "grad_norm": 0.15234375, "learning_rate": 0.00016172047283202542, "loss": 0.5397, "step": 160640 }, { "epoch": 7.979040429124863, "grad_norm": 0.1650390625, "learning_rate": 0.0001616807390483759, "loss": 0.4757, "step": 160650 }, { "epoch": 7.979537101420483, "grad_norm": 0.1494140625, "learning_rate": 0.00016164100526472634, "loss": 0.4689, "step": 160660 }, { "epoch": 7.9800337737161025, "grad_norm": 0.1767578125, "learning_rate": 0.0001616012714810768, "loss": 0.5168, "step": 160670 }, { "epoch": 7.980530446011722, "grad_norm": 0.1572265625, "learning_rate": 0.00016156153769742723, "loss": 0.4812, "step": 160680 }, { "epoch": 7.981027118307341, "grad_norm": 0.15234375, "learning_rate": 0.0001615218039137777, "loss": 0.5067, "step": 160690 }, { "epoch": 7.98152379060296, "grad_norm": 0.1455078125, "learning_rate": 0.00016148207013012814, "loss": 0.4903, "step": 160700 }, { "epoch": 7.982020462898579, "grad_norm": 0.142578125, "learning_rate": 0.0001614423363464786, "loss": 0.4981, "step": 160710 }, { "epoch": 7.982517135194199, "grad_norm": 0.1962890625, "learning_rate": 0.00016140260256282906, "loss": 0.5052, "step": 160720 }, { "epoch": 7.983013807489819, "grad_norm": 0.1474609375, "learning_rate": 0.0001613628687791795, "loss": 0.4815, "step": 160730 }, { "epoch": 7.983510479785438, "grad_norm": 0.15625, "learning_rate": 0.00016132313499552995, "loss": 0.5113, "step": 160740 }, { "epoch": 7.984007152081057, "grad_norm": 0.171875, "learning_rate": 0.00016128340121188042, "loss": 0.492, "step": 160750 }, { "epoch": 7.984503824376676, "grad_norm": 0.1689453125, "learning_rate": 0.00016124366742823086, "loss": 0.5066, "step": 160760 }, { "epoch": 7.985000496672296, "grad_norm": 0.19140625, "learning_rate": 0.0001612039336445813, "loss": 0.497, "step": 160770 }, { "epoch": 7.985497168967915, "grad_norm": 0.1513671875, "learning_rate": 0.00016116419986093178, "loss": 0.5048, "step": 160780 }, { "epoch": 7.985993841263534, "grad_norm": 0.224609375, "learning_rate": 0.00016112446607728222, "loss": 0.4885, "step": 160790 }, { "epoch": 7.986490513559153, "grad_norm": 0.1611328125, "learning_rate": 0.00016108473229363267, "loss": 0.4969, "step": 160800 }, { "epoch": 7.986987185854773, "grad_norm": 0.1533203125, "learning_rate": 0.0001610449985099831, "loss": 0.4893, "step": 160810 }, { "epoch": 7.987483858150393, "grad_norm": 0.1708984375, "learning_rate": 0.00016100526472633358, "loss": 0.4677, "step": 160820 }, { "epoch": 7.987980530446012, "grad_norm": 0.1796875, "learning_rate": 0.00016096553094268403, "loss": 0.4925, "step": 160830 }, { "epoch": 7.988477202741631, "grad_norm": 0.1640625, "learning_rate": 0.0001609257971590345, "loss": 0.489, "step": 160840 }, { "epoch": 7.98897387503725, "grad_norm": 0.17578125, "learning_rate": 0.00016088606337538491, "loss": 0.4792, "step": 160850 }, { "epoch": 7.9894705473328695, "grad_norm": 0.1708984375, "learning_rate": 0.00016084632959173539, "loss": 0.5002, "step": 160860 }, { "epoch": 7.989967219628489, "grad_norm": 0.16015625, "learning_rate": 0.00016080659580808583, "loss": 0.4733, "step": 160870 }, { "epoch": 7.990463891924108, "grad_norm": 0.166015625, "learning_rate": 0.0001607668620244363, "loss": 0.4713, "step": 160880 }, { "epoch": 7.990960564219728, "grad_norm": 0.1533203125, "learning_rate": 0.00016072712824078672, "loss": 0.5031, "step": 160890 }, { "epoch": 7.991457236515347, "grad_norm": 0.1708984375, "learning_rate": 0.0001606873944571372, "loss": 0.4917, "step": 160900 }, { "epoch": 7.991953908810967, "grad_norm": 0.146484375, "learning_rate": 0.00016064766067348763, "loss": 0.4995, "step": 160910 }, { "epoch": 7.992450581106586, "grad_norm": 0.1826171875, "learning_rate": 0.0001606079268898381, "loss": 0.5008, "step": 160920 }, { "epoch": 7.992947253402205, "grad_norm": 0.1904296875, "learning_rate": 0.00016056819310618855, "loss": 0.4829, "step": 160930 }, { "epoch": 7.993443925697824, "grad_norm": 0.1455078125, "learning_rate": 0.000160528459322539, "loss": 0.5061, "step": 160940 }, { "epoch": 7.9939405979934435, "grad_norm": 0.1484375, "learning_rate": 0.00016048872553888944, "loss": 0.4793, "step": 160950 }, { "epoch": 7.994437270289064, "grad_norm": 0.15625, "learning_rate": 0.0001604489917552399, "loss": 0.4777, "step": 160960 }, { "epoch": 7.994933942584683, "grad_norm": 0.166015625, "learning_rate": 0.00016040925797159035, "loss": 0.4421, "step": 160970 }, { "epoch": 7.995430614880302, "grad_norm": 0.150390625, "learning_rate": 0.0001603695241879408, "loss": 0.4834, "step": 160980 }, { "epoch": 7.995927287175921, "grad_norm": 0.20703125, "learning_rate": 0.00016032979040429127, "loss": 0.5118, "step": 160990 }, { "epoch": 7.9964239594715405, "grad_norm": 0.1796875, "learning_rate": 0.0001602900566206417, "loss": 0.4914, "step": 161000 }, { "epoch": 7.99692063176716, "grad_norm": 0.150390625, "learning_rate": 0.00016025032283699216, "loss": 0.4723, "step": 161010 }, { "epoch": 7.997417304062779, "grad_norm": 0.1630859375, "learning_rate": 0.0001602105890533426, "loss": 0.5193, "step": 161020 }, { "epoch": 7.997913976358399, "grad_norm": 0.1884765625, "learning_rate": 0.00016017085526969307, "loss": 0.5055, "step": 161030 }, { "epoch": 7.998410648654018, "grad_norm": 0.1552734375, "learning_rate": 0.00016013112148604352, "loss": 0.454, "step": 161040 }, { "epoch": 7.9989073209496375, "grad_norm": 0.1484375, "learning_rate": 0.00016009138770239396, "loss": 0.4973, "step": 161050 }, { "epoch": 7.999403993245257, "grad_norm": 0.166015625, "learning_rate": 0.0001600516539187444, "loss": 0.4694, "step": 161060 }, { "epoch": 7.999900665540876, "grad_norm": 0.173828125, "learning_rate": 0.00016001192013509488, "loss": 0.5157, "step": 161070 }, { "epoch": 8.000397337836496, "grad_norm": 0.162109375, "learning_rate": 0.00015997218635144532, "loss": 0.5245, "step": 161080 }, { "epoch": 8.000894010132114, "grad_norm": 0.1689453125, "learning_rate": 0.00015993245256779577, "loss": 0.4829, "step": 161090 }, { "epoch": 8.001390682427735, "grad_norm": 0.1708984375, "learning_rate": 0.0001598927187841462, "loss": 0.4883, "step": 161100 }, { "epoch": 8.001887354723353, "grad_norm": 0.1767578125, "learning_rate": 0.00015985298500049668, "loss": 0.478, "step": 161110 }, { "epoch": 8.002384027018973, "grad_norm": 0.150390625, "learning_rate": 0.00015981325121684713, "loss": 0.508, "step": 161120 }, { "epoch": 8.002880699314593, "grad_norm": 0.2021484375, "learning_rate": 0.0001597735174331976, "loss": 0.4908, "step": 161130 }, { "epoch": 8.003377371610211, "grad_norm": 0.146484375, "learning_rate": 0.00015973378364954804, "loss": 0.4701, "step": 161140 }, { "epoch": 8.003874043905832, "grad_norm": 0.1552734375, "learning_rate": 0.00015969404986589849, "loss": 0.459, "step": 161150 }, { "epoch": 8.00437071620145, "grad_norm": 0.1591796875, "learning_rate": 0.00015965431608224896, "loss": 0.4498, "step": 161160 }, { "epoch": 8.00486738849707, "grad_norm": 0.177734375, "learning_rate": 0.0001596145822985994, "loss": 0.4693, "step": 161170 }, { "epoch": 8.005364060792688, "grad_norm": 0.15625, "learning_rate": 0.00015957484851494985, "loss": 0.4906, "step": 161180 }, { "epoch": 8.005860733088308, "grad_norm": 0.15625, "learning_rate": 0.0001595351147313003, "loss": 0.478, "step": 161190 }, { "epoch": 8.006357405383927, "grad_norm": 0.142578125, "learning_rate": 0.00015949538094765076, "loss": 0.4693, "step": 161200 }, { "epoch": 8.006854077679547, "grad_norm": 0.158203125, "learning_rate": 0.0001594556471640012, "loss": 0.4263, "step": 161210 }, { "epoch": 8.007350749975167, "grad_norm": 0.1708984375, "learning_rate": 0.00015941591338035165, "loss": 0.4807, "step": 161220 }, { "epoch": 8.007847422270785, "grad_norm": 0.169921875, "learning_rate": 0.0001593761795967021, "loss": 0.541, "step": 161230 }, { "epoch": 8.008344094566406, "grad_norm": 0.1875, "learning_rate": 0.00015933644581305256, "loss": 0.4734, "step": 161240 }, { "epoch": 8.008840766862024, "grad_norm": 0.18359375, "learning_rate": 0.000159296712029403, "loss": 0.4799, "step": 161250 }, { "epoch": 8.009337439157644, "grad_norm": 0.142578125, "learning_rate": 0.00015925697824575345, "loss": 0.4768, "step": 161260 }, { "epoch": 8.009834111453262, "grad_norm": 0.15625, "learning_rate": 0.0001592172444621039, "loss": 0.5115, "step": 161270 }, { "epoch": 8.010330783748882, "grad_norm": 0.154296875, "learning_rate": 0.00015917751067845437, "loss": 0.4876, "step": 161280 }, { "epoch": 8.010827456044503, "grad_norm": 0.15234375, "learning_rate": 0.0001591377768948048, "loss": 0.4816, "step": 161290 }, { "epoch": 8.01132412834012, "grad_norm": 0.14453125, "learning_rate": 0.00015909804311115526, "loss": 0.4597, "step": 161300 }, { "epoch": 8.011820800635741, "grad_norm": 0.1572265625, "learning_rate": 0.00015905830932750573, "loss": 0.4957, "step": 161310 }, { "epoch": 8.01231747293136, "grad_norm": 0.185546875, "learning_rate": 0.00015901857554385617, "loss": 0.4762, "step": 161320 }, { "epoch": 8.01281414522698, "grad_norm": 0.1806640625, "learning_rate": 0.00015897884176020664, "loss": 0.4954, "step": 161330 }, { "epoch": 8.013310817522598, "grad_norm": 0.154296875, "learning_rate": 0.00015893910797655706, "loss": 0.4877, "step": 161340 }, { "epoch": 8.013807489818218, "grad_norm": 0.1806640625, "learning_rate": 0.00015889937419290753, "loss": 0.4988, "step": 161350 }, { "epoch": 8.014304162113838, "grad_norm": 0.158203125, "learning_rate": 0.00015885964040925798, "loss": 0.4862, "step": 161360 }, { "epoch": 8.014800834409456, "grad_norm": 0.1552734375, "learning_rate": 0.00015881990662560845, "loss": 0.4652, "step": 161370 }, { "epoch": 8.015297506705076, "grad_norm": 0.162109375, "learning_rate": 0.00015878017284195887, "loss": 0.4944, "step": 161380 }, { "epoch": 8.015794179000695, "grad_norm": 0.1689453125, "learning_rate": 0.00015874043905830934, "loss": 0.4868, "step": 161390 }, { "epoch": 8.016290851296315, "grad_norm": 0.1611328125, "learning_rate": 0.00015870070527465978, "loss": 0.527, "step": 161400 }, { "epoch": 8.016787523591933, "grad_norm": 0.158203125, "learning_rate": 0.00015866097149101025, "loss": 0.4963, "step": 161410 }, { "epoch": 8.017284195887553, "grad_norm": 0.1572265625, "learning_rate": 0.00015862123770736067, "loss": 0.4732, "step": 161420 }, { "epoch": 8.017780868183173, "grad_norm": 0.1806640625, "learning_rate": 0.00015858150392371114, "loss": 0.4751, "step": 161430 }, { "epoch": 8.018277540478792, "grad_norm": 0.1728515625, "learning_rate": 0.00015854177014006159, "loss": 0.45, "step": 161440 }, { "epoch": 8.018774212774412, "grad_norm": 0.1591796875, "learning_rate": 0.00015850203635641206, "loss": 0.495, "step": 161450 }, { "epoch": 8.01927088507003, "grad_norm": 0.1875, "learning_rate": 0.0001584623025727625, "loss": 0.4826, "step": 161460 }, { "epoch": 8.01976755736565, "grad_norm": 0.171875, "learning_rate": 0.00015842256878911295, "loss": 0.5109, "step": 161470 }, { "epoch": 8.020264229661269, "grad_norm": 0.16015625, "learning_rate": 0.00015838283500546342, "loss": 0.4613, "step": 161480 }, { "epoch": 8.020760901956889, "grad_norm": 0.15234375, "learning_rate": 0.00015834310122181386, "loss": 0.4482, "step": 161490 }, { "epoch": 8.021257574252509, "grad_norm": 0.1953125, "learning_rate": 0.0001583033674381643, "loss": 0.4715, "step": 161500 }, { "epoch": 8.021754246548127, "grad_norm": 0.1572265625, "learning_rate": 0.00015826363365451475, "loss": 0.5069, "step": 161510 }, { "epoch": 8.022250918843747, "grad_norm": 0.1533203125, "learning_rate": 0.00015822389987086522, "loss": 0.4852, "step": 161520 }, { "epoch": 8.022747591139366, "grad_norm": 0.1455078125, "learning_rate": 0.00015818416608721566, "loss": 0.4645, "step": 161530 }, { "epoch": 8.023244263434986, "grad_norm": 0.1845703125, "learning_rate": 0.00015814443230356614, "loss": 0.4513, "step": 161540 }, { "epoch": 8.023740935730604, "grad_norm": 0.2060546875, "learning_rate": 0.00015810469851991655, "loss": 0.4544, "step": 161550 }, { "epoch": 8.024237608026224, "grad_norm": 0.1865234375, "learning_rate": 0.00015806496473626702, "loss": 0.4841, "step": 161560 }, { "epoch": 8.024734280321844, "grad_norm": 0.1572265625, "learning_rate": 0.00015802523095261747, "loss": 0.5216, "step": 161570 }, { "epoch": 8.025230952617463, "grad_norm": 0.1650390625, "learning_rate": 0.00015798549716896794, "loss": 0.5066, "step": 161580 }, { "epoch": 8.025727624913083, "grad_norm": 0.1591796875, "learning_rate": 0.00015794576338531836, "loss": 0.4658, "step": 161590 }, { "epoch": 8.026224297208701, "grad_norm": 0.162109375, "learning_rate": 0.00015790602960166883, "loss": 0.4869, "step": 161600 }, { "epoch": 8.026720969504321, "grad_norm": 0.1591796875, "learning_rate": 0.00015786629581801927, "loss": 0.4999, "step": 161610 }, { "epoch": 8.02721764179994, "grad_norm": 0.181640625, "learning_rate": 0.00015782656203436974, "loss": 0.483, "step": 161620 }, { "epoch": 8.02771431409556, "grad_norm": 0.185546875, "learning_rate": 0.0001577868282507202, "loss": 0.491, "step": 161630 }, { "epoch": 8.02821098639118, "grad_norm": 0.15625, "learning_rate": 0.00015774709446707063, "loss": 0.4676, "step": 161640 }, { "epoch": 8.028707658686798, "grad_norm": 0.1572265625, "learning_rate": 0.00015770736068342108, "loss": 0.466, "step": 161650 }, { "epoch": 8.029204330982418, "grad_norm": 0.173828125, "learning_rate": 0.00015766762689977155, "loss": 0.5024, "step": 161660 }, { "epoch": 8.029701003278037, "grad_norm": 0.201171875, "learning_rate": 0.000157627893116122, "loss": 0.4902, "step": 161670 }, { "epoch": 8.030197675573657, "grad_norm": 0.189453125, "learning_rate": 0.00015758815933247244, "loss": 0.4683, "step": 161680 }, { "epoch": 8.030694347869275, "grad_norm": 0.1640625, "learning_rate": 0.0001575484255488229, "loss": 0.484, "step": 161690 }, { "epoch": 8.031191020164895, "grad_norm": 0.16015625, "learning_rate": 0.00015750869176517335, "loss": 0.5135, "step": 161700 }, { "epoch": 8.031687692460515, "grad_norm": 0.1484375, "learning_rate": 0.0001574689579815238, "loss": 0.4741, "step": 161710 }, { "epoch": 8.032184364756134, "grad_norm": 0.1826171875, "learning_rate": 0.00015742922419787424, "loss": 0.4908, "step": 161720 }, { "epoch": 8.032681037051754, "grad_norm": 0.1943359375, "learning_rate": 0.0001573894904142247, "loss": 0.4432, "step": 161730 }, { "epoch": 8.033177709347372, "grad_norm": 0.1728515625, "learning_rate": 0.00015734975663057516, "loss": 0.4724, "step": 161740 }, { "epoch": 8.033674381642992, "grad_norm": 0.158203125, "learning_rate": 0.0001573100228469256, "loss": 0.4892, "step": 161750 }, { "epoch": 8.03417105393861, "grad_norm": 0.1689453125, "learning_rate": 0.00015727028906327605, "loss": 0.4908, "step": 161760 }, { "epoch": 8.03466772623423, "grad_norm": 0.1611328125, "learning_rate": 0.00015723055527962652, "loss": 0.493, "step": 161770 }, { "epoch": 8.03516439852985, "grad_norm": 0.201171875, "learning_rate": 0.00015719082149597696, "loss": 0.4561, "step": 161780 }, { "epoch": 8.03566107082547, "grad_norm": 0.162109375, "learning_rate": 0.0001571510877123274, "loss": 0.5212, "step": 161790 }, { "epoch": 8.03615774312109, "grad_norm": 0.1708984375, "learning_rate": 0.00015711135392867785, "loss": 0.4831, "step": 161800 }, { "epoch": 8.036654415416708, "grad_norm": 0.1865234375, "learning_rate": 0.00015707162014502832, "loss": 0.4874, "step": 161810 }, { "epoch": 8.037151087712328, "grad_norm": 0.1484375, "learning_rate": 0.00015703188636137877, "loss": 0.4625, "step": 161820 }, { "epoch": 8.037647760007946, "grad_norm": 0.171875, "learning_rate": 0.0001569921525777292, "loss": 0.4806, "step": 161830 }, { "epoch": 8.038144432303566, "grad_norm": 0.1572265625, "learning_rate": 0.00015695241879407968, "loss": 0.4679, "step": 161840 }, { "epoch": 8.038641104599186, "grad_norm": 0.171875, "learning_rate": 0.00015691268501043012, "loss": 0.4581, "step": 161850 }, { "epoch": 8.039137776894805, "grad_norm": 0.1455078125, "learning_rate": 0.0001568729512267806, "loss": 0.4933, "step": 161860 }, { "epoch": 8.039634449190425, "grad_norm": 0.1669921875, "learning_rate": 0.000156833217443131, "loss": 0.4792, "step": 161870 }, { "epoch": 8.040131121486043, "grad_norm": 0.1708984375, "learning_rate": 0.00015679348365948148, "loss": 0.4653, "step": 161880 }, { "epoch": 8.040627793781663, "grad_norm": 0.181640625, "learning_rate": 0.00015675374987583193, "loss": 0.4661, "step": 161890 }, { "epoch": 8.041124466077282, "grad_norm": 0.1650390625, "learning_rate": 0.0001567140160921824, "loss": 0.4943, "step": 161900 }, { "epoch": 8.041621138372902, "grad_norm": 0.1611328125, "learning_rate": 0.00015667428230853282, "loss": 0.5065, "step": 161910 }, { "epoch": 8.04211781066852, "grad_norm": 0.1552734375, "learning_rate": 0.0001566345485248833, "loss": 0.4755, "step": 161920 }, { "epoch": 8.04261448296414, "grad_norm": 0.15234375, "learning_rate": 0.00015659481474123373, "loss": 0.4787, "step": 161930 }, { "epoch": 8.04311115525976, "grad_norm": 0.15625, "learning_rate": 0.0001565550809575842, "loss": 0.4992, "step": 161940 }, { "epoch": 8.043607827555379, "grad_norm": 0.1572265625, "learning_rate": 0.00015651534717393462, "loss": 0.4444, "step": 161950 }, { "epoch": 8.044104499850999, "grad_norm": 0.1572265625, "learning_rate": 0.0001564756133902851, "loss": 0.4849, "step": 161960 }, { "epoch": 8.044601172146617, "grad_norm": 0.2138671875, "learning_rate": 0.00015643587960663554, "loss": 0.5151, "step": 161970 }, { "epoch": 8.045097844442237, "grad_norm": 0.162109375, "learning_rate": 0.000156396145822986, "loss": 0.4721, "step": 161980 }, { "epoch": 8.045594516737856, "grad_norm": 0.1640625, "learning_rate": 0.00015635641203933645, "loss": 0.4897, "step": 161990 }, { "epoch": 8.046091189033476, "grad_norm": 0.1396484375, "learning_rate": 0.0001563166782556869, "loss": 0.4776, "step": 162000 }, { "epoch": 8.046587861329096, "grad_norm": 0.1396484375, "learning_rate": 0.00015627694447203737, "loss": 0.4532, "step": 162010 }, { "epoch": 8.047084533624714, "grad_norm": 0.177734375, "learning_rate": 0.0001562372106883878, "loss": 0.4664, "step": 162020 }, { "epoch": 8.047581205920334, "grad_norm": 0.1572265625, "learning_rate": 0.00015619747690473828, "loss": 0.4775, "step": 162030 }, { "epoch": 8.048077878215953, "grad_norm": 0.1689453125, "learning_rate": 0.0001561577431210887, "loss": 0.4718, "step": 162040 }, { "epoch": 8.048574550511573, "grad_norm": 0.1494140625, "learning_rate": 0.00015611800933743917, "loss": 0.47, "step": 162050 }, { "epoch": 8.049071222807191, "grad_norm": 0.1796875, "learning_rate": 0.00015607827555378962, "loss": 0.5064, "step": 162060 }, { "epoch": 8.049567895102811, "grad_norm": 0.189453125, "learning_rate": 0.0001560385417701401, "loss": 0.4787, "step": 162070 }, { "epoch": 8.050064567398431, "grad_norm": 0.203125, "learning_rate": 0.0001559988079864905, "loss": 0.4881, "step": 162080 }, { "epoch": 8.05056123969405, "grad_norm": 0.1552734375, "learning_rate": 0.00015595907420284098, "loss": 0.4707, "step": 162090 }, { "epoch": 8.05105791198967, "grad_norm": 0.15625, "learning_rate": 0.00015591934041919142, "loss": 0.4533, "step": 162100 }, { "epoch": 8.051554584285288, "grad_norm": 0.1787109375, "learning_rate": 0.0001558796066355419, "loss": 0.4785, "step": 162110 }, { "epoch": 8.052051256580908, "grad_norm": 0.1884765625, "learning_rate": 0.0001558398728518923, "loss": 0.4745, "step": 162120 }, { "epoch": 8.052547928876526, "grad_norm": 0.15625, "learning_rate": 0.00015580013906824278, "loss": 0.4967, "step": 162130 }, { "epoch": 8.053044601172147, "grad_norm": 0.166015625, "learning_rate": 0.00015576040528459323, "loss": 0.4866, "step": 162140 }, { "epoch": 8.053541273467767, "grad_norm": 0.1865234375, "learning_rate": 0.0001557206715009437, "loss": 0.4914, "step": 162150 }, { "epoch": 8.054037945763385, "grad_norm": 0.1611328125, "learning_rate": 0.00015568093771729414, "loss": 0.4741, "step": 162160 }, { "epoch": 8.054534618059005, "grad_norm": 0.16796875, "learning_rate": 0.00015564120393364458, "loss": 0.4544, "step": 162170 }, { "epoch": 8.055031290354624, "grad_norm": 0.1669921875, "learning_rate": 0.00015560147014999506, "loss": 0.5047, "step": 162180 }, { "epoch": 8.055527962650244, "grad_norm": 0.1982421875, "learning_rate": 0.0001555617363663455, "loss": 0.4983, "step": 162190 }, { "epoch": 8.056024634945862, "grad_norm": 0.1494140625, "learning_rate": 0.00015552200258269594, "loss": 0.4744, "step": 162200 }, { "epoch": 8.056521307241482, "grad_norm": 0.15625, "learning_rate": 0.0001554822687990464, "loss": 0.5062, "step": 162210 }, { "epoch": 8.057017979537102, "grad_norm": 0.1494140625, "learning_rate": 0.00015544253501539686, "loss": 0.4831, "step": 162220 }, { "epoch": 8.05751465183272, "grad_norm": 0.154296875, "learning_rate": 0.0001554028012317473, "loss": 0.4767, "step": 162230 }, { "epoch": 8.05801132412834, "grad_norm": 0.1650390625, "learning_rate": 0.00015536306744809775, "loss": 0.457, "step": 162240 }, { "epoch": 8.058507996423959, "grad_norm": 0.1826171875, "learning_rate": 0.0001553233336644482, "loss": 0.4757, "step": 162250 }, { "epoch": 8.059004668719579, "grad_norm": 0.177734375, "learning_rate": 0.00015528359988079866, "loss": 0.486, "step": 162260 }, { "epoch": 8.059501341015197, "grad_norm": 0.1640625, "learning_rate": 0.0001552438660971491, "loss": 0.4749, "step": 162270 }, { "epoch": 8.059998013310818, "grad_norm": 0.158203125, "learning_rate": 0.00015520413231349955, "loss": 0.4829, "step": 162280 }, { "epoch": 8.060494685606438, "grad_norm": 0.1787109375, "learning_rate": 0.00015516439852985, "loss": 0.4866, "step": 162290 }, { "epoch": 8.060991357902056, "grad_norm": 0.1455078125, "learning_rate": 0.00015512466474620047, "loss": 0.4831, "step": 162300 }, { "epoch": 8.061488030197676, "grad_norm": 0.173828125, "learning_rate": 0.0001550849309625509, "loss": 0.4949, "step": 162310 }, { "epoch": 8.061984702493294, "grad_norm": 0.18359375, "learning_rate": 0.00015504519717890136, "loss": 0.4942, "step": 162320 }, { "epoch": 8.062481374788915, "grad_norm": 0.1650390625, "learning_rate": 0.00015500546339525183, "loss": 0.4613, "step": 162330 }, { "epoch": 8.062978047084533, "grad_norm": 0.1650390625, "learning_rate": 0.00015496572961160227, "loss": 0.4588, "step": 162340 }, { "epoch": 8.063474719380153, "grad_norm": 0.1513671875, "learning_rate": 0.00015492599582795272, "loss": 0.4524, "step": 162350 }, { "epoch": 8.063971391675773, "grad_norm": 0.1708984375, "learning_rate": 0.00015488626204430316, "loss": 0.4873, "step": 162360 }, { "epoch": 8.064468063971391, "grad_norm": 0.1962890625, "learning_rate": 0.00015484652826065363, "loss": 0.523, "step": 162370 }, { "epoch": 8.064964736267012, "grad_norm": 0.1728515625, "learning_rate": 0.00015480679447700408, "loss": 0.4733, "step": 162380 }, { "epoch": 8.06546140856263, "grad_norm": 0.15234375, "learning_rate": 0.00015476706069335455, "loss": 0.4955, "step": 162390 }, { "epoch": 8.06595808085825, "grad_norm": 0.1875, "learning_rate": 0.000154727326909705, "loss": 0.5039, "step": 162400 }, { "epoch": 8.066454753153868, "grad_norm": 0.142578125, "learning_rate": 0.00015468759312605544, "loss": 0.489, "step": 162410 }, { "epoch": 8.066951425449489, "grad_norm": 0.16015625, "learning_rate": 0.00015464785934240588, "loss": 0.479, "step": 162420 }, { "epoch": 8.067448097745109, "grad_norm": 0.1669921875, "learning_rate": 0.00015460812555875635, "loss": 0.4693, "step": 162430 }, { "epoch": 8.067944770040727, "grad_norm": 0.1640625, "learning_rate": 0.0001545683917751068, "loss": 0.4643, "step": 162440 }, { "epoch": 8.068441442336347, "grad_norm": 0.1611328125, "learning_rate": 0.00015452865799145724, "loss": 0.4647, "step": 162450 }, { "epoch": 8.068938114631965, "grad_norm": 0.1611328125, "learning_rate": 0.00015448892420780769, "loss": 0.482, "step": 162460 }, { "epoch": 8.069434786927586, "grad_norm": 0.138671875, "learning_rate": 0.00015444919042415816, "loss": 0.4824, "step": 162470 }, { "epoch": 8.069931459223204, "grad_norm": 0.1611328125, "learning_rate": 0.0001544094566405086, "loss": 0.4639, "step": 162480 }, { "epoch": 8.070428131518824, "grad_norm": 0.154296875, "learning_rate": 0.00015436972285685904, "loss": 0.4406, "step": 162490 }, { "epoch": 8.070924803814444, "grad_norm": 0.1591796875, "learning_rate": 0.0001543299890732095, "loss": 0.478, "step": 162500 }, { "epoch": 8.071421476110062, "grad_norm": 0.154296875, "learning_rate": 0.00015429025528955996, "loss": 0.4737, "step": 162510 }, { "epoch": 8.071918148405683, "grad_norm": 0.203125, "learning_rate": 0.0001542505215059104, "loss": 0.4339, "step": 162520 }, { "epoch": 8.072414820701301, "grad_norm": 0.1708984375, "learning_rate": 0.00015421078772226085, "loss": 0.498, "step": 162530 }, { "epoch": 8.072911492996921, "grad_norm": 0.1591796875, "learning_rate": 0.00015417105393861132, "loss": 0.4988, "step": 162540 }, { "epoch": 8.07340816529254, "grad_norm": 0.158203125, "learning_rate": 0.00015413132015496176, "loss": 0.4765, "step": 162550 }, { "epoch": 8.07390483758816, "grad_norm": 0.1630859375, "learning_rate": 0.00015409158637131224, "loss": 0.4536, "step": 162560 }, { "epoch": 8.074401509883778, "grad_norm": 0.1494140625, "learning_rate": 0.00015405185258766265, "loss": 0.5135, "step": 162570 }, { "epoch": 8.074898182179398, "grad_norm": 0.1533203125, "learning_rate": 0.00015401211880401312, "loss": 0.4666, "step": 162580 }, { "epoch": 8.075394854475018, "grad_norm": 0.1591796875, "learning_rate": 0.00015397238502036357, "loss": 0.4725, "step": 162590 }, { "epoch": 8.075891526770636, "grad_norm": 0.16796875, "learning_rate": 0.00015393265123671404, "loss": 0.4554, "step": 162600 }, { "epoch": 8.076388199066256, "grad_norm": 0.15625, "learning_rate": 0.00015389291745306446, "loss": 0.4599, "step": 162610 }, { "epoch": 8.076884871361875, "grad_norm": 0.1533203125, "learning_rate": 0.00015385318366941493, "loss": 0.4925, "step": 162620 }, { "epoch": 8.077381543657495, "grad_norm": 0.1796875, "learning_rate": 0.00015381344988576537, "loss": 0.4482, "step": 162630 }, { "epoch": 8.077878215953113, "grad_norm": 0.1728515625, "learning_rate": 0.00015377371610211584, "loss": 0.4978, "step": 162640 }, { "epoch": 8.078374888248733, "grad_norm": 0.2041015625, "learning_rate": 0.00015373398231846626, "loss": 0.469, "step": 162650 }, { "epoch": 8.078871560544354, "grad_norm": 0.1630859375, "learning_rate": 0.00015369424853481673, "loss": 0.4678, "step": 162660 }, { "epoch": 8.079368232839972, "grad_norm": 0.1669921875, "learning_rate": 0.00015365451475116718, "loss": 0.4798, "step": 162670 }, { "epoch": 8.079864905135592, "grad_norm": 0.169921875, "learning_rate": 0.00015361478096751765, "loss": 0.4581, "step": 162680 }, { "epoch": 8.08036157743121, "grad_norm": 0.1630859375, "learning_rate": 0.0001535750471838681, "loss": 0.4758, "step": 162690 }, { "epoch": 8.08085824972683, "grad_norm": 0.1826171875, "learning_rate": 0.00015353531340021854, "loss": 0.5031, "step": 162700 }, { "epoch": 8.081354922022449, "grad_norm": 0.1611328125, "learning_rate": 0.000153495579616569, "loss": 0.4785, "step": 162710 }, { "epoch": 8.081851594318069, "grad_norm": 0.16015625, "learning_rate": 0.00015345584583291945, "loss": 0.4417, "step": 162720 }, { "epoch": 8.082348266613689, "grad_norm": 0.1806640625, "learning_rate": 0.0001534161120492699, "loss": 0.5085, "step": 162730 }, { "epoch": 8.082844938909307, "grad_norm": 0.158203125, "learning_rate": 0.00015337637826562034, "loss": 0.4757, "step": 162740 }, { "epoch": 8.083341611204927, "grad_norm": 0.1513671875, "learning_rate": 0.0001533366444819708, "loss": 0.4616, "step": 162750 }, { "epoch": 8.083838283500546, "grad_norm": 0.1748046875, "learning_rate": 0.00015329691069832126, "loss": 0.4805, "step": 162760 }, { "epoch": 8.084334955796166, "grad_norm": 0.169921875, "learning_rate": 0.0001532571769146717, "loss": 0.5201, "step": 162770 }, { "epoch": 8.084831628091784, "grad_norm": 0.138671875, "learning_rate": 0.00015321744313102214, "loss": 0.4898, "step": 162780 }, { "epoch": 8.085328300387404, "grad_norm": 0.15625, "learning_rate": 0.00015317770934737262, "loss": 0.5156, "step": 162790 }, { "epoch": 8.085824972683024, "grad_norm": 0.1904296875, "learning_rate": 0.00015313797556372306, "loss": 0.5232, "step": 162800 }, { "epoch": 8.086321644978643, "grad_norm": 0.150390625, "learning_rate": 0.00015309824178007353, "loss": 0.5013, "step": 162810 }, { "epoch": 8.086818317274263, "grad_norm": 0.1650390625, "learning_rate": 0.00015305850799642395, "loss": 0.5049, "step": 162820 }, { "epoch": 8.087314989569881, "grad_norm": 0.1923828125, "learning_rate": 0.00015301877421277442, "loss": 0.4755, "step": 162830 }, { "epoch": 8.087811661865501, "grad_norm": 0.1591796875, "learning_rate": 0.00015297904042912486, "loss": 0.4835, "step": 162840 }, { "epoch": 8.08830833416112, "grad_norm": 0.1630859375, "learning_rate": 0.00015293930664547534, "loss": 0.5212, "step": 162850 }, { "epoch": 8.08880500645674, "grad_norm": 0.1513671875, "learning_rate": 0.00015289957286182578, "loss": 0.488, "step": 162860 }, { "epoch": 8.08930167875236, "grad_norm": 0.16796875, "learning_rate": 0.00015285983907817622, "loss": 0.5045, "step": 162870 }, { "epoch": 8.089798351047978, "grad_norm": 0.16015625, "learning_rate": 0.0001528201052945267, "loss": 0.4862, "step": 162880 }, { "epoch": 8.090295023343598, "grad_norm": 0.1484375, "learning_rate": 0.00015278037151087714, "loss": 0.4815, "step": 162890 }, { "epoch": 8.090791695639217, "grad_norm": 0.1572265625, "learning_rate": 0.00015274063772722758, "loss": 0.5271, "step": 162900 }, { "epoch": 8.091288367934837, "grad_norm": 0.1689453125, "learning_rate": 0.00015270090394357803, "loss": 0.5063, "step": 162910 }, { "epoch": 8.091785040230455, "grad_norm": 0.1669921875, "learning_rate": 0.0001526611701599285, "loss": 0.4708, "step": 162920 }, { "epoch": 8.092281712526075, "grad_norm": 0.1611328125, "learning_rate": 0.00015262143637627894, "loss": 0.4968, "step": 162930 }, { "epoch": 8.092778384821695, "grad_norm": 0.1767578125, "learning_rate": 0.0001525817025926294, "loss": 0.5011, "step": 162940 }, { "epoch": 8.093275057117314, "grad_norm": 0.1669921875, "learning_rate": 0.00015254196880897983, "loss": 0.488, "step": 162950 }, { "epoch": 8.093771729412934, "grad_norm": 0.154296875, "learning_rate": 0.0001525022350253303, "loss": 0.4811, "step": 162960 }, { "epoch": 8.094268401708552, "grad_norm": 0.169921875, "learning_rate": 0.00015246250124168075, "loss": 0.4724, "step": 162970 }, { "epoch": 8.094765074004172, "grad_norm": 0.2080078125, "learning_rate": 0.0001524227674580312, "loss": 0.5173, "step": 162980 }, { "epoch": 8.09526174629979, "grad_norm": 0.1396484375, "learning_rate": 0.00015238303367438164, "loss": 0.4809, "step": 162990 }, { "epoch": 8.09575841859541, "grad_norm": 0.166015625, "learning_rate": 0.0001523432998907321, "loss": 0.5081, "step": 163000 }, { "epoch": 8.096255090891031, "grad_norm": 0.1591796875, "learning_rate": 0.00015230356610708255, "loss": 0.4998, "step": 163010 }, { "epoch": 8.09675176318665, "grad_norm": 0.1533203125, "learning_rate": 0.000152263832323433, "loss": 0.4874, "step": 163020 }, { "epoch": 8.09724843548227, "grad_norm": 0.1611328125, "learning_rate": 0.00015222409853978347, "loss": 0.4991, "step": 163030 }, { "epoch": 8.097745107777888, "grad_norm": 0.197265625, "learning_rate": 0.0001521843647561339, "loss": 0.4939, "step": 163040 }, { "epoch": 8.098241780073508, "grad_norm": 0.1552734375, "learning_rate": 0.00015214463097248436, "loss": 0.4714, "step": 163050 }, { "epoch": 8.098738452369126, "grad_norm": 0.150390625, "learning_rate": 0.0001521048971888348, "loss": 0.4791, "step": 163060 }, { "epoch": 8.099235124664746, "grad_norm": 0.171875, "learning_rate": 0.00015206516340518527, "loss": 0.4741, "step": 163070 }, { "epoch": 8.099731796960366, "grad_norm": 0.146484375, "learning_rate": 0.00015202542962153572, "loss": 0.5234, "step": 163080 }, { "epoch": 8.100228469255985, "grad_norm": 0.1533203125, "learning_rate": 0.0001519856958378862, "loss": 0.5092, "step": 163090 }, { "epoch": 8.100725141551605, "grad_norm": 0.1533203125, "learning_rate": 0.0001519459620542366, "loss": 0.4809, "step": 163100 }, { "epoch": 8.101221813847223, "grad_norm": 0.1767578125, "learning_rate": 0.00015190622827058708, "loss": 0.4739, "step": 163110 }, { "epoch": 8.101718486142843, "grad_norm": 0.1806640625, "learning_rate": 0.00015186649448693752, "loss": 0.5185, "step": 163120 }, { "epoch": 8.102215158438462, "grad_norm": 0.1591796875, "learning_rate": 0.000151826760703288, "loss": 0.4776, "step": 163130 }, { "epoch": 8.102711830734082, "grad_norm": 0.1455078125, "learning_rate": 0.0001517870269196384, "loss": 0.4871, "step": 163140 }, { "epoch": 8.103208503029702, "grad_norm": 0.19140625, "learning_rate": 0.00015174729313598888, "loss": 0.4446, "step": 163150 }, { "epoch": 8.10370517532532, "grad_norm": 0.154296875, "learning_rate": 0.00015170755935233932, "loss": 0.4787, "step": 163160 }, { "epoch": 8.10420184762094, "grad_norm": 0.1787109375, "learning_rate": 0.0001516678255686898, "loss": 0.4722, "step": 163170 }, { "epoch": 8.104698519916559, "grad_norm": 0.1591796875, "learning_rate": 0.00015162809178504024, "loss": 0.5063, "step": 163180 }, { "epoch": 8.105195192212179, "grad_norm": 0.177734375, "learning_rate": 0.00015158835800139068, "loss": 0.5127, "step": 163190 }, { "epoch": 8.105691864507797, "grad_norm": 0.15625, "learning_rate": 0.00015154862421774113, "loss": 0.4697, "step": 163200 }, { "epoch": 8.106188536803417, "grad_norm": 0.1591796875, "learning_rate": 0.0001515088904340916, "loss": 0.5029, "step": 163210 }, { "epoch": 8.106685209099037, "grad_norm": 0.1572265625, "learning_rate": 0.00015146915665044204, "loss": 0.4469, "step": 163220 }, { "epoch": 8.107181881394656, "grad_norm": 0.15234375, "learning_rate": 0.0001514294228667925, "loss": 0.4893, "step": 163230 }, { "epoch": 8.107678553690276, "grad_norm": 0.150390625, "learning_rate": 0.00015138968908314296, "loss": 0.5028, "step": 163240 }, { "epoch": 8.108175225985894, "grad_norm": 0.1953125, "learning_rate": 0.0001513499552994934, "loss": 0.5145, "step": 163250 }, { "epoch": 8.108671898281514, "grad_norm": 0.1533203125, "learning_rate": 0.00015131022151584388, "loss": 0.4838, "step": 163260 }, { "epoch": 8.109168570577133, "grad_norm": 0.1669921875, "learning_rate": 0.0001512704877321943, "loss": 0.4886, "step": 163270 }, { "epoch": 8.109665242872753, "grad_norm": 0.15625, "learning_rate": 0.00015123075394854476, "loss": 0.4737, "step": 163280 }, { "epoch": 8.110161915168371, "grad_norm": 0.19140625, "learning_rate": 0.0001511910201648952, "loss": 0.5064, "step": 163290 }, { "epoch": 8.110658587463991, "grad_norm": 0.1650390625, "learning_rate": 0.00015115128638124568, "loss": 0.4509, "step": 163300 }, { "epoch": 8.111155259759611, "grad_norm": 0.1533203125, "learning_rate": 0.0001511115525975961, "loss": 0.4886, "step": 163310 }, { "epoch": 8.11165193205523, "grad_norm": 0.1572265625, "learning_rate": 0.00015107181881394657, "loss": 0.4764, "step": 163320 }, { "epoch": 8.11214860435085, "grad_norm": 0.1435546875, "learning_rate": 0.000151032085030297, "loss": 0.4824, "step": 163330 }, { "epoch": 8.112645276646468, "grad_norm": 0.1708984375, "learning_rate": 0.00015099235124664748, "loss": 0.4774, "step": 163340 }, { "epoch": 8.113141948942088, "grad_norm": 0.1943359375, "learning_rate": 0.0001509526174629979, "loss": 0.4948, "step": 163350 }, { "epoch": 8.113638621237707, "grad_norm": 0.1494140625, "learning_rate": 0.00015091288367934837, "loss": 0.4568, "step": 163360 }, { "epoch": 8.114135293533327, "grad_norm": 0.158203125, "learning_rate": 0.00015087314989569882, "loss": 0.4688, "step": 163370 }, { "epoch": 8.114631965828947, "grad_norm": 0.1787109375, "learning_rate": 0.0001508334161120493, "loss": 0.4855, "step": 163380 }, { "epoch": 8.115128638124565, "grad_norm": 0.16015625, "learning_rate": 0.00015079368232839973, "loss": 0.4662, "step": 163390 }, { "epoch": 8.115625310420185, "grad_norm": 0.1552734375, "learning_rate": 0.00015075394854475018, "loss": 0.491, "step": 163400 }, { "epoch": 8.116121982715804, "grad_norm": 0.1806640625, "learning_rate": 0.00015071421476110065, "loss": 0.4785, "step": 163410 }, { "epoch": 8.116618655011424, "grad_norm": 0.1806640625, "learning_rate": 0.0001506744809774511, "loss": 0.4968, "step": 163420 }, { "epoch": 8.117115327307042, "grad_norm": 0.1640625, "learning_rate": 0.00015063474719380154, "loss": 0.4812, "step": 163430 }, { "epoch": 8.117611999602662, "grad_norm": 0.15625, "learning_rate": 0.00015059501341015198, "loss": 0.5179, "step": 163440 }, { "epoch": 8.118108671898282, "grad_norm": 0.1708984375, "learning_rate": 0.00015055527962650245, "loss": 0.5029, "step": 163450 }, { "epoch": 8.1186053441939, "grad_norm": 0.1494140625, "learning_rate": 0.0001505155458428529, "loss": 0.5008, "step": 163460 }, { "epoch": 8.11910201648952, "grad_norm": 0.1748046875, "learning_rate": 0.00015047581205920334, "loss": 0.5056, "step": 163470 }, { "epoch": 8.119598688785139, "grad_norm": 0.154296875, "learning_rate": 0.00015043607827555378, "loss": 0.4666, "step": 163480 }, { "epoch": 8.12009536108076, "grad_norm": 0.150390625, "learning_rate": 0.00015039634449190426, "loss": 0.5242, "step": 163490 }, { "epoch": 8.120592033376377, "grad_norm": 0.1630859375, "learning_rate": 0.0001503566107082547, "loss": 0.4416, "step": 163500 }, { "epoch": 8.121088705671998, "grad_norm": 0.1591796875, "learning_rate": 0.00015031687692460514, "loss": 0.5143, "step": 163510 }, { "epoch": 8.121585377967618, "grad_norm": 0.1591796875, "learning_rate": 0.0001502771431409556, "loss": 0.4992, "step": 163520 }, { "epoch": 8.122082050263236, "grad_norm": 0.1572265625, "learning_rate": 0.00015023740935730606, "loss": 0.4799, "step": 163530 }, { "epoch": 8.122578722558856, "grad_norm": 0.1552734375, "learning_rate": 0.0001501976755736565, "loss": 0.4654, "step": 163540 }, { "epoch": 8.123075394854474, "grad_norm": 0.1884765625, "learning_rate": 0.00015015794179000695, "loss": 0.5201, "step": 163550 }, { "epoch": 8.123572067150095, "grad_norm": 0.162109375, "learning_rate": 0.00015011820800635742, "loss": 0.4963, "step": 163560 }, { "epoch": 8.124068739445713, "grad_norm": 0.1494140625, "learning_rate": 0.00015007847422270786, "loss": 0.477, "step": 163570 }, { "epoch": 8.124565411741333, "grad_norm": 0.185546875, "learning_rate": 0.00015003874043905834, "loss": 0.4793, "step": 163580 }, { "epoch": 8.125062084036953, "grad_norm": 0.1591796875, "learning_rate": 0.00014999900665540875, "loss": 0.4878, "step": 163590 }, { "epoch": 8.125558756332572, "grad_norm": 0.1787109375, "learning_rate": 0.00014995927287175922, "loss": 0.4903, "step": 163600 }, { "epoch": 8.126055428628192, "grad_norm": 0.1513671875, "learning_rate": 0.00014991953908810967, "loss": 0.4818, "step": 163610 }, { "epoch": 8.12655210092381, "grad_norm": 0.1796875, "learning_rate": 0.00014987980530446014, "loss": 0.48, "step": 163620 }, { "epoch": 8.12704877321943, "grad_norm": 0.19140625, "learning_rate": 0.00014984007152081056, "loss": 0.4938, "step": 163630 }, { "epoch": 8.127545445515048, "grad_norm": 0.16796875, "learning_rate": 0.00014980033773716103, "loss": 0.484, "step": 163640 }, { "epoch": 8.128042117810669, "grad_norm": 0.158203125, "learning_rate": 0.00014976060395351147, "loss": 0.4536, "step": 163650 }, { "epoch": 8.128538790106289, "grad_norm": 0.1533203125, "learning_rate": 0.00014972087016986194, "loss": 0.4506, "step": 163660 }, { "epoch": 8.129035462401907, "grad_norm": 0.185546875, "learning_rate": 0.0001496811363862124, "loss": 0.4585, "step": 163670 }, { "epoch": 8.129532134697527, "grad_norm": 0.169921875, "learning_rate": 0.00014964140260256283, "loss": 0.4976, "step": 163680 }, { "epoch": 8.130028806993145, "grad_norm": 0.1611328125, "learning_rate": 0.00014960166881891328, "loss": 0.4749, "step": 163690 }, { "epoch": 8.130525479288766, "grad_norm": 0.1591796875, "learning_rate": 0.00014956193503526375, "loss": 0.4705, "step": 163700 }, { "epoch": 8.131022151584384, "grad_norm": 0.14453125, "learning_rate": 0.0001495222012516142, "loss": 0.4701, "step": 163710 }, { "epoch": 8.131518823880004, "grad_norm": 0.169921875, "learning_rate": 0.00014948246746796464, "loss": 0.5111, "step": 163720 }, { "epoch": 8.132015496175624, "grad_norm": 0.154296875, "learning_rate": 0.0001494427336843151, "loss": 0.4588, "step": 163730 }, { "epoch": 8.132512168471242, "grad_norm": 0.1826171875, "learning_rate": 0.00014940299990066555, "loss": 0.4914, "step": 163740 }, { "epoch": 8.133008840766863, "grad_norm": 0.1611328125, "learning_rate": 0.000149363266117016, "loss": 0.5111, "step": 163750 }, { "epoch": 8.133505513062481, "grad_norm": 0.1806640625, "learning_rate": 0.00014932353233336644, "loss": 0.529, "step": 163760 }, { "epoch": 8.134002185358101, "grad_norm": 0.1982421875, "learning_rate": 0.0001492837985497169, "loss": 0.4805, "step": 163770 }, { "epoch": 8.13449885765372, "grad_norm": 0.1474609375, "learning_rate": 0.00014924406476606736, "loss": 0.4843, "step": 163780 }, { "epoch": 8.13499552994934, "grad_norm": 0.15234375, "learning_rate": 0.00014920433098241783, "loss": 0.4816, "step": 163790 }, { "epoch": 8.13549220224496, "grad_norm": 0.1474609375, "learning_rate": 0.00014916459719876824, "loss": 0.5106, "step": 163800 }, { "epoch": 8.135988874540578, "grad_norm": 0.1630859375, "learning_rate": 0.00014912486341511872, "loss": 0.4676, "step": 163810 }, { "epoch": 8.136485546836198, "grad_norm": 0.193359375, "learning_rate": 0.00014908512963146916, "loss": 0.4662, "step": 163820 }, { "epoch": 8.136982219131816, "grad_norm": 0.2001953125, "learning_rate": 0.00014904539584781963, "loss": 0.4769, "step": 163830 }, { "epoch": 8.137478891427437, "grad_norm": 0.173828125, "learning_rate": 0.00014900566206417005, "loss": 0.4501, "step": 163840 }, { "epoch": 8.137975563723055, "grad_norm": 0.177734375, "learning_rate": 0.00014896592828052052, "loss": 0.4867, "step": 163850 }, { "epoch": 8.138472236018675, "grad_norm": 0.1669921875, "learning_rate": 0.00014892619449687096, "loss": 0.4927, "step": 163860 }, { "epoch": 8.138968908314295, "grad_norm": 0.1669921875, "learning_rate": 0.00014888646071322144, "loss": 0.4846, "step": 163870 }, { "epoch": 8.139465580609913, "grad_norm": 0.171875, "learning_rate": 0.00014884672692957188, "loss": 0.4848, "step": 163880 }, { "epoch": 8.139962252905534, "grad_norm": 0.16796875, "learning_rate": 0.00014880699314592232, "loss": 0.5082, "step": 163890 }, { "epoch": 8.140458925201152, "grad_norm": 0.1591796875, "learning_rate": 0.00014876725936227277, "loss": 0.4832, "step": 163900 }, { "epoch": 8.140955597496772, "grad_norm": 0.150390625, "learning_rate": 0.00014872752557862324, "loss": 0.4672, "step": 163910 }, { "epoch": 8.14145226979239, "grad_norm": 0.1611328125, "learning_rate": 0.00014868779179497368, "loss": 0.4833, "step": 163920 }, { "epoch": 8.14194894208801, "grad_norm": 0.16015625, "learning_rate": 0.00014864805801132413, "loss": 0.484, "step": 163930 }, { "epoch": 8.142445614383629, "grad_norm": 0.15625, "learning_rate": 0.0001486083242276746, "loss": 0.498, "step": 163940 }, { "epoch": 8.142942286679249, "grad_norm": 0.15625, "learning_rate": 0.00014856859044402504, "loss": 0.4554, "step": 163950 }, { "epoch": 8.143438958974869, "grad_norm": 0.154296875, "learning_rate": 0.0001485288566603755, "loss": 0.4805, "step": 163960 }, { "epoch": 8.143935631270487, "grad_norm": 0.171875, "learning_rate": 0.00014848912287672593, "loss": 0.446, "step": 163970 }, { "epoch": 8.144432303566107, "grad_norm": 0.158203125, "learning_rate": 0.0001484493890930764, "loss": 0.4562, "step": 163980 }, { "epoch": 8.144928975861726, "grad_norm": 0.1591796875, "learning_rate": 0.00014840965530942685, "loss": 0.4906, "step": 163990 }, { "epoch": 8.145425648157346, "grad_norm": 0.1708984375, "learning_rate": 0.0001483699215257773, "loss": 0.4687, "step": 164000 }, { "epoch": 8.145922320452964, "grad_norm": 0.1669921875, "learning_rate": 0.00014833018774212774, "loss": 0.4506, "step": 164010 }, { "epoch": 8.146418992748584, "grad_norm": 0.171875, "learning_rate": 0.0001482904539584782, "loss": 0.4859, "step": 164020 }, { "epoch": 8.146915665044205, "grad_norm": 0.1572265625, "learning_rate": 0.00014825072017482865, "loss": 0.493, "step": 164030 }, { "epoch": 8.147412337339823, "grad_norm": 0.16015625, "learning_rate": 0.0001482109863911791, "loss": 0.4749, "step": 164040 }, { "epoch": 8.147909009635443, "grad_norm": 0.1689453125, "learning_rate": 0.00014817125260752954, "loss": 0.4579, "step": 164050 }, { "epoch": 8.148405681931061, "grad_norm": 0.16015625, "learning_rate": 0.00014813151882388, "loss": 0.5064, "step": 164060 }, { "epoch": 8.148902354226681, "grad_norm": 0.1650390625, "learning_rate": 0.00014809178504023046, "loss": 0.4737, "step": 164070 }, { "epoch": 8.1493990265223, "grad_norm": 0.162109375, "learning_rate": 0.00014805205125658093, "loss": 0.5041, "step": 164080 }, { "epoch": 8.14989569881792, "grad_norm": 0.1591796875, "learning_rate": 0.00014801231747293137, "loss": 0.49, "step": 164090 }, { "epoch": 8.15039237111354, "grad_norm": 0.158203125, "learning_rate": 0.00014797258368928182, "loss": 0.4585, "step": 164100 }, { "epoch": 8.150889043409158, "grad_norm": 0.1474609375, "learning_rate": 0.0001479328499056323, "loss": 0.4613, "step": 164110 }, { "epoch": 8.151385715704778, "grad_norm": 0.197265625, "learning_rate": 0.00014789311612198273, "loss": 0.4807, "step": 164120 }, { "epoch": 8.151882388000397, "grad_norm": 0.15234375, "learning_rate": 0.00014785338233833318, "loss": 0.4522, "step": 164130 }, { "epoch": 8.152379060296017, "grad_norm": 0.1650390625, "learning_rate": 0.00014781364855468362, "loss": 0.4512, "step": 164140 }, { "epoch": 8.152875732591635, "grad_norm": 0.169921875, "learning_rate": 0.0001477739147710341, "loss": 0.4485, "step": 164150 }, { "epoch": 8.153372404887255, "grad_norm": 0.1650390625, "learning_rate": 0.00014773418098738454, "loss": 0.5078, "step": 164160 }, { "epoch": 8.153869077182875, "grad_norm": 0.1640625, "learning_rate": 0.00014769444720373498, "loss": 0.4984, "step": 164170 }, { "epoch": 8.154365749478494, "grad_norm": 0.142578125, "learning_rate": 0.00014765471342008542, "loss": 0.5128, "step": 164180 }, { "epoch": 8.154862421774114, "grad_norm": 0.1640625, "learning_rate": 0.0001476149796364359, "loss": 0.4821, "step": 164190 }, { "epoch": 8.155359094069732, "grad_norm": 0.1669921875, "learning_rate": 0.00014757524585278634, "loss": 0.4764, "step": 164200 }, { "epoch": 8.155855766365352, "grad_norm": 0.1591796875, "learning_rate": 0.00014753551206913678, "loss": 0.4622, "step": 164210 }, { "epoch": 8.15635243866097, "grad_norm": 0.173828125, "learning_rate": 0.00014749577828548723, "loss": 0.4858, "step": 164220 }, { "epoch": 8.15684911095659, "grad_norm": 0.169921875, "learning_rate": 0.0001474560445018377, "loss": 0.4987, "step": 164230 }, { "epoch": 8.157345783252211, "grad_norm": 0.1904296875, "learning_rate": 0.00014741631071818814, "loss": 0.4995, "step": 164240 }, { "epoch": 8.15784245554783, "grad_norm": 0.154296875, "learning_rate": 0.0001473765769345386, "loss": 0.4922, "step": 164250 }, { "epoch": 8.15833912784345, "grad_norm": 0.1669921875, "learning_rate": 0.00014733684315088906, "loss": 0.5072, "step": 164260 }, { "epoch": 8.158835800139068, "grad_norm": 0.177734375, "learning_rate": 0.0001472971093672395, "loss": 0.4662, "step": 164270 }, { "epoch": 8.159332472434688, "grad_norm": 0.1962890625, "learning_rate": 0.00014725737558358998, "loss": 0.4912, "step": 164280 }, { "epoch": 8.159829144730306, "grad_norm": 0.1875, "learning_rate": 0.0001472176417999404, "loss": 0.4743, "step": 164290 }, { "epoch": 8.160325817025926, "grad_norm": 0.1728515625, "learning_rate": 0.00014717790801629086, "loss": 0.5222, "step": 164300 }, { "epoch": 8.160822489321546, "grad_norm": 0.1787109375, "learning_rate": 0.0001471381742326413, "loss": 0.4899, "step": 164310 }, { "epoch": 8.161319161617165, "grad_norm": 0.1552734375, "learning_rate": 0.00014709844044899178, "loss": 0.5119, "step": 164320 }, { "epoch": 8.161815833912785, "grad_norm": 0.1796875, "learning_rate": 0.0001470587066653422, "loss": 0.4994, "step": 164330 }, { "epoch": 8.162312506208403, "grad_norm": 0.15625, "learning_rate": 0.00014701897288169267, "loss": 0.4725, "step": 164340 }, { "epoch": 8.162809178504023, "grad_norm": 0.166015625, "learning_rate": 0.0001469792390980431, "loss": 0.4845, "step": 164350 }, { "epoch": 8.163305850799642, "grad_norm": 0.166015625, "learning_rate": 0.00014693950531439358, "loss": 0.5011, "step": 164360 }, { "epoch": 8.163802523095262, "grad_norm": 0.1787109375, "learning_rate": 0.000146899771530744, "loss": 0.4875, "step": 164370 }, { "epoch": 8.164299195390882, "grad_norm": 0.1533203125, "learning_rate": 0.00014686003774709447, "loss": 0.4793, "step": 164380 }, { "epoch": 8.1647958676865, "grad_norm": 0.1708984375, "learning_rate": 0.00014682030396344492, "loss": 0.4925, "step": 164390 }, { "epoch": 8.16529253998212, "grad_norm": 0.169921875, "learning_rate": 0.0001467805701797954, "loss": 0.5141, "step": 164400 }, { "epoch": 8.165789212277739, "grad_norm": 0.15234375, "learning_rate": 0.00014674083639614583, "loss": 0.4963, "step": 164410 }, { "epoch": 8.166285884573359, "grad_norm": 0.173828125, "learning_rate": 0.00014670110261249628, "loss": 0.4683, "step": 164420 }, { "epoch": 8.166782556868977, "grad_norm": 0.1650390625, "learning_rate": 0.00014666136882884675, "loss": 0.4896, "step": 164430 }, { "epoch": 8.167279229164597, "grad_norm": 0.15625, "learning_rate": 0.0001466216350451972, "loss": 0.4534, "step": 164440 }, { "epoch": 8.167775901460217, "grad_norm": 0.1611328125, "learning_rate": 0.00014658190126154764, "loss": 0.4682, "step": 164450 }, { "epoch": 8.168272573755836, "grad_norm": 0.1669921875, "learning_rate": 0.00014654216747789808, "loss": 0.4751, "step": 164460 }, { "epoch": 8.168769246051456, "grad_norm": 0.158203125, "learning_rate": 0.00014650243369424855, "loss": 0.4642, "step": 164470 }, { "epoch": 8.169265918347074, "grad_norm": 0.1611328125, "learning_rate": 0.000146462699910599, "loss": 0.4879, "step": 164480 }, { "epoch": 8.169762590642694, "grad_norm": 0.1650390625, "learning_rate": 0.00014642296612694947, "loss": 0.497, "step": 164490 }, { "epoch": 8.170259262938313, "grad_norm": 0.1494140625, "learning_rate": 0.00014638323234329988, "loss": 0.4932, "step": 164500 }, { "epoch": 8.170755935233933, "grad_norm": 0.16796875, "learning_rate": 0.00014634349855965036, "loss": 0.5398, "step": 164510 }, { "epoch": 8.171252607529553, "grad_norm": 0.16796875, "learning_rate": 0.0001463037647760008, "loss": 0.4897, "step": 164520 }, { "epoch": 8.171749279825171, "grad_norm": 0.1796875, "learning_rate": 0.00014626403099235127, "loss": 0.4901, "step": 164530 }, { "epoch": 8.172245952120791, "grad_norm": 0.15234375, "learning_rate": 0.0001462242972087017, "loss": 0.446, "step": 164540 }, { "epoch": 8.17274262441641, "grad_norm": 0.15625, "learning_rate": 0.00014618456342505216, "loss": 0.4869, "step": 164550 }, { "epoch": 8.17323929671203, "grad_norm": 0.1875, "learning_rate": 0.0001461448296414026, "loss": 0.4803, "step": 164560 }, { "epoch": 8.173735969007648, "grad_norm": 0.17578125, "learning_rate": 0.00014610509585775308, "loss": 0.4773, "step": 164570 }, { "epoch": 8.174232641303268, "grad_norm": 0.1416015625, "learning_rate": 0.00014606536207410352, "loss": 0.4384, "step": 164580 }, { "epoch": 8.174729313598888, "grad_norm": 0.189453125, "learning_rate": 0.00014602562829045396, "loss": 0.4849, "step": 164590 }, { "epoch": 8.175225985894507, "grad_norm": 0.1591796875, "learning_rate": 0.0001459858945068044, "loss": 0.4766, "step": 164600 }, { "epoch": 8.175722658190127, "grad_norm": 0.154296875, "learning_rate": 0.00014594616072315488, "loss": 0.4943, "step": 164610 }, { "epoch": 8.176219330485745, "grad_norm": 0.1572265625, "learning_rate": 0.00014590642693950532, "loss": 0.4399, "step": 164620 }, { "epoch": 8.176716002781365, "grad_norm": 0.158203125, "learning_rate": 0.00014586669315585577, "loss": 0.4539, "step": 164630 }, { "epoch": 8.177212675076984, "grad_norm": 0.17578125, "learning_rate": 0.00014582695937220624, "loss": 0.5025, "step": 164640 }, { "epoch": 8.177709347372604, "grad_norm": 0.251953125, "learning_rate": 0.00014578722558855668, "loss": 0.5087, "step": 164650 }, { "epoch": 8.178206019668224, "grad_norm": 0.169921875, "learning_rate": 0.00014574749180490713, "loss": 0.5102, "step": 164660 }, { "epoch": 8.178702691963842, "grad_norm": 0.1728515625, "learning_rate": 0.00014570775802125757, "loss": 0.4862, "step": 164670 }, { "epoch": 8.179199364259462, "grad_norm": 0.2021484375, "learning_rate": 0.00014566802423760804, "loss": 0.4795, "step": 164680 }, { "epoch": 8.17969603655508, "grad_norm": 0.1630859375, "learning_rate": 0.0001456282904539585, "loss": 0.5009, "step": 164690 }, { "epoch": 8.1801927088507, "grad_norm": 0.1669921875, "learning_rate": 0.00014558855667030893, "loss": 0.4772, "step": 164700 }, { "epoch": 8.180689381146319, "grad_norm": 0.166015625, "learning_rate": 0.00014554882288665938, "loss": 0.5138, "step": 164710 }, { "epoch": 8.18118605344194, "grad_norm": 0.16796875, "learning_rate": 0.00014550908910300985, "loss": 0.4987, "step": 164720 }, { "epoch": 8.181682725737558, "grad_norm": 0.16015625, "learning_rate": 0.0001454693553193603, "loss": 0.4644, "step": 164730 }, { "epoch": 8.182179398033178, "grad_norm": 0.1708984375, "learning_rate": 0.00014542962153571074, "loss": 0.481, "step": 164740 }, { "epoch": 8.182676070328798, "grad_norm": 0.158203125, "learning_rate": 0.00014538988775206118, "loss": 0.4189, "step": 164750 }, { "epoch": 8.183172742624416, "grad_norm": 0.16796875, "learning_rate": 0.00014535015396841165, "loss": 0.4587, "step": 164760 }, { "epoch": 8.183669414920036, "grad_norm": 0.1728515625, "learning_rate": 0.0001453104201847621, "loss": 0.4845, "step": 164770 }, { "epoch": 8.184166087215655, "grad_norm": 0.2080078125, "learning_rate": 0.00014527068640111254, "loss": 0.4905, "step": 164780 }, { "epoch": 8.184662759511275, "grad_norm": 0.1611328125, "learning_rate": 0.000145230952617463, "loss": 0.4633, "step": 164790 }, { "epoch": 8.185159431806893, "grad_norm": 0.16796875, "learning_rate": 0.00014519121883381346, "loss": 0.4864, "step": 164800 }, { "epoch": 8.185656104102513, "grad_norm": 0.1767578125, "learning_rate": 0.00014515148505016393, "loss": 0.501, "step": 164810 }, { "epoch": 8.186152776398133, "grad_norm": 0.1748046875, "learning_rate": 0.00014511175126651434, "loss": 0.4567, "step": 164820 }, { "epoch": 8.186649448693752, "grad_norm": 0.177734375, "learning_rate": 0.00014507201748286482, "loss": 0.4716, "step": 164830 }, { "epoch": 8.187146120989372, "grad_norm": 0.1767578125, "learning_rate": 0.00014503228369921526, "loss": 0.4943, "step": 164840 }, { "epoch": 8.18764279328499, "grad_norm": 0.1552734375, "learning_rate": 0.00014499254991556573, "loss": 0.4988, "step": 164850 }, { "epoch": 8.18813946558061, "grad_norm": 0.14453125, "learning_rate": 0.00014495281613191615, "loss": 0.4557, "step": 164860 }, { "epoch": 8.188636137876228, "grad_norm": 0.15625, "learning_rate": 0.00014491308234826662, "loss": 0.4808, "step": 164870 }, { "epoch": 8.189132810171849, "grad_norm": 0.1728515625, "learning_rate": 0.00014487334856461706, "loss": 0.4897, "step": 164880 }, { "epoch": 8.189629482467469, "grad_norm": 0.19140625, "learning_rate": 0.00014483361478096754, "loss": 0.4593, "step": 164890 }, { "epoch": 8.190126154763087, "grad_norm": 0.189453125, "learning_rate": 0.00014479388099731795, "loss": 0.5139, "step": 164900 }, { "epoch": 8.190622827058707, "grad_norm": 0.189453125, "learning_rate": 0.00014475414721366842, "loss": 0.4925, "step": 164910 }, { "epoch": 8.191119499354325, "grad_norm": 0.162109375, "learning_rate": 0.00014471441343001887, "loss": 0.4642, "step": 164920 }, { "epoch": 8.191616171649946, "grad_norm": 0.1533203125, "learning_rate": 0.00014467467964636934, "loss": 0.4647, "step": 164930 }, { "epoch": 8.192112843945564, "grad_norm": 0.1767578125, "learning_rate": 0.00014463494586271978, "loss": 0.4486, "step": 164940 }, { "epoch": 8.192609516241184, "grad_norm": 0.16796875, "learning_rate": 0.00014459521207907023, "loss": 0.5145, "step": 164950 }, { "epoch": 8.193106188536804, "grad_norm": 0.16796875, "learning_rate": 0.0001445554782954207, "loss": 0.4644, "step": 164960 }, { "epoch": 8.193602860832423, "grad_norm": 0.1611328125, "learning_rate": 0.00014451574451177114, "loss": 0.4735, "step": 164970 }, { "epoch": 8.194099533128043, "grad_norm": 0.1572265625, "learning_rate": 0.00014447601072812161, "loss": 0.4492, "step": 164980 }, { "epoch": 8.194596205423661, "grad_norm": 0.1767578125, "learning_rate": 0.00014443627694447203, "loss": 0.4675, "step": 164990 }, { "epoch": 8.195092877719281, "grad_norm": 0.1572265625, "learning_rate": 0.0001443965431608225, "loss": 0.4816, "step": 165000 }, { "epoch": 8.1955895500149, "grad_norm": 0.158203125, "learning_rate": 0.00014435680937717295, "loss": 0.4675, "step": 165010 }, { "epoch": 8.19608622231052, "grad_norm": 0.1689453125, "learning_rate": 0.00014431707559352342, "loss": 0.4893, "step": 165020 }, { "epoch": 8.19658289460614, "grad_norm": 0.1591796875, "learning_rate": 0.00014427734180987384, "loss": 0.4923, "step": 165030 }, { "epoch": 8.197079566901758, "grad_norm": 0.15234375, "learning_rate": 0.0001442376080262243, "loss": 0.5033, "step": 165040 }, { "epoch": 8.197576239197378, "grad_norm": 0.1923828125, "learning_rate": 0.00014419787424257475, "loss": 0.454, "step": 165050 }, { "epoch": 8.198072911492996, "grad_norm": 0.1669921875, "learning_rate": 0.00014415814045892522, "loss": 0.4845, "step": 165060 }, { "epoch": 8.198569583788617, "grad_norm": 0.177734375, "learning_rate": 0.00014411840667527564, "loss": 0.4828, "step": 165070 }, { "epoch": 8.199066256084235, "grad_norm": 0.2001953125, "learning_rate": 0.0001440786728916261, "loss": 0.4931, "step": 165080 }, { "epoch": 8.199562928379855, "grad_norm": 0.169921875, "learning_rate": 0.00014403893910797656, "loss": 0.5006, "step": 165090 }, { "epoch": 8.200059600675475, "grad_norm": 0.1669921875, "learning_rate": 0.00014399920532432703, "loss": 0.4702, "step": 165100 }, { "epoch": 8.200556272971093, "grad_norm": 0.1572265625, "learning_rate": 0.00014395947154067747, "loss": 0.4977, "step": 165110 }, { "epoch": 8.201052945266714, "grad_norm": 0.16796875, "learning_rate": 0.00014391973775702792, "loss": 0.4561, "step": 165120 }, { "epoch": 8.201549617562332, "grad_norm": 0.189453125, "learning_rate": 0.0001438800039733784, "loss": 0.5116, "step": 165130 }, { "epoch": 8.202046289857952, "grad_norm": 0.1513671875, "learning_rate": 0.00014384027018972883, "loss": 0.4462, "step": 165140 }, { "epoch": 8.20254296215357, "grad_norm": 0.2119140625, "learning_rate": 0.00014380053640607928, "loss": 0.4528, "step": 165150 }, { "epoch": 8.20303963444919, "grad_norm": 0.15625, "learning_rate": 0.00014376080262242972, "loss": 0.4725, "step": 165160 }, { "epoch": 8.20353630674481, "grad_norm": 0.1787109375, "learning_rate": 0.0001437210688387802, "loss": 0.483, "step": 165170 }, { "epoch": 8.204032979040429, "grad_norm": 0.1611328125, "learning_rate": 0.00014368133505513064, "loss": 0.4928, "step": 165180 }, { "epoch": 8.204529651336049, "grad_norm": 0.173828125, "learning_rate": 0.00014364160127148108, "loss": 0.4657, "step": 165190 }, { "epoch": 8.205026323631667, "grad_norm": 0.171875, "learning_rate": 0.00014360186748783152, "loss": 0.4717, "step": 165200 }, { "epoch": 8.205522995927288, "grad_norm": 0.162109375, "learning_rate": 0.000143562133704182, "loss": 0.457, "step": 165210 }, { "epoch": 8.206019668222906, "grad_norm": 0.16015625, "learning_rate": 0.00014352239992053244, "loss": 0.5057, "step": 165220 }, { "epoch": 8.206516340518526, "grad_norm": 0.1513671875, "learning_rate": 0.00014348266613688288, "loss": 0.4999, "step": 165230 }, { "epoch": 8.207013012814146, "grad_norm": 0.1689453125, "learning_rate": 0.00014344293235323333, "loss": 0.4944, "step": 165240 }, { "epoch": 8.207509685109764, "grad_norm": 0.1630859375, "learning_rate": 0.0001434031985695838, "loss": 0.4968, "step": 165250 }, { "epoch": 8.208006357405385, "grad_norm": 0.16796875, "learning_rate": 0.00014336346478593424, "loss": 0.4919, "step": 165260 }, { "epoch": 8.208503029701003, "grad_norm": 0.208984375, "learning_rate": 0.0001433237310022847, "loss": 0.5149, "step": 165270 }, { "epoch": 8.208999701996623, "grad_norm": 0.1513671875, "learning_rate": 0.00014328399721863516, "loss": 0.4786, "step": 165280 }, { "epoch": 8.209496374292241, "grad_norm": 0.158203125, "learning_rate": 0.0001432442634349856, "loss": 0.5123, "step": 165290 }, { "epoch": 8.209993046587861, "grad_norm": 0.1669921875, "learning_rate": 0.00014320452965133605, "loss": 0.4706, "step": 165300 }, { "epoch": 8.21048971888348, "grad_norm": 0.1796875, "learning_rate": 0.0001431647958676865, "loss": 0.5235, "step": 165310 }, { "epoch": 8.2109863911791, "grad_norm": 0.2041015625, "learning_rate": 0.00014312506208403696, "loss": 0.4814, "step": 165320 }, { "epoch": 8.21148306347472, "grad_norm": 0.1806640625, "learning_rate": 0.0001430853283003874, "loss": 0.464, "step": 165330 }, { "epoch": 8.211979735770338, "grad_norm": 0.1767578125, "learning_rate": 0.00014304559451673788, "loss": 0.5068, "step": 165340 }, { "epoch": 8.212476408065958, "grad_norm": 0.1796875, "learning_rate": 0.00014300586073308832, "loss": 0.5117, "step": 165350 }, { "epoch": 8.212973080361577, "grad_norm": 0.16796875, "learning_rate": 0.00014296612694943877, "loss": 0.4758, "step": 165360 }, { "epoch": 8.213469752657197, "grad_norm": 0.1591796875, "learning_rate": 0.0001429263931657892, "loss": 0.4912, "step": 165370 }, { "epoch": 8.213966424952815, "grad_norm": 0.1513671875, "learning_rate": 0.00014288665938213968, "loss": 0.476, "step": 165380 }, { "epoch": 8.214463097248435, "grad_norm": 0.1806640625, "learning_rate": 0.00014284692559849013, "loss": 0.5403, "step": 165390 }, { "epoch": 8.214959769544055, "grad_norm": 0.1572265625, "learning_rate": 0.00014280719181484057, "loss": 0.4758, "step": 165400 }, { "epoch": 8.215456441839674, "grad_norm": 0.2080078125, "learning_rate": 0.00014276745803119102, "loss": 0.4988, "step": 165410 }, { "epoch": 8.215953114135294, "grad_norm": 0.166015625, "learning_rate": 0.0001427277242475415, "loss": 0.4871, "step": 165420 }, { "epoch": 8.216449786430912, "grad_norm": 0.15234375, "learning_rate": 0.00014268799046389193, "loss": 0.4574, "step": 165430 }, { "epoch": 8.216946458726532, "grad_norm": 0.189453125, "learning_rate": 0.00014264825668024238, "loss": 0.4665, "step": 165440 }, { "epoch": 8.21744313102215, "grad_norm": 0.1708984375, "learning_rate": 0.00014260852289659282, "loss": 0.4975, "step": 165450 }, { "epoch": 8.21793980331777, "grad_norm": 0.17578125, "learning_rate": 0.0001425687891129433, "loss": 0.479, "step": 165460 }, { "epoch": 8.218436475613391, "grad_norm": 0.1748046875, "learning_rate": 0.00014252905532929374, "loss": 0.4817, "step": 165470 }, { "epoch": 8.21893314790901, "grad_norm": 0.1630859375, "learning_rate": 0.00014248932154564418, "loss": 0.4735, "step": 165480 }, { "epoch": 8.21942982020463, "grad_norm": 0.150390625, "learning_rate": 0.00014244958776199465, "loss": 0.4687, "step": 165490 }, { "epoch": 8.219926492500248, "grad_norm": 0.208984375, "learning_rate": 0.0001424098539783451, "loss": 0.4996, "step": 165500 }, { "epoch": 8.220423164795868, "grad_norm": 0.1474609375, "learning_rate": 0.00014237012019469557, "loss": 0.4872, "step": 165510 }, { "epoch": 8.220919837091486, "grad_norm": 0.166015625, "learning_rate": 0.00014233038641104598, "loss": 0.5011, "step": 165520 }, { "epoch": 8.221416509387106, "grad_norm": 0.1630859375, "learning_rate": 0.00014229065262739646, "loss": 0.4914, "step": 165530 }, { "epoch": 8.221913181682726, "grad_norm": 0.185546875, "learning_rate": 0.0001422509188437469, "loss": 0.4813, "step": 165540 }, { "epoch": 8.222409853978345, "grad_norm": 0.1611328125, "learning_rate": 0.00014221118506009737, "loss": 0.4656, "step": 165550 }, { "epoch": 8.222906526273965, "grad_norm": 0.1552734375, "learning_rate": 0.0001421714512764478, "loss": 0.4844, "step": 165560 }, { "epoch": 8.223403198569583, "grad_norm": 0.240234375, "learning_rate": 0.00014213171749279826, "loss": 0.5205, "step": 165570 }, { "epoch": 8.223899870865203, "grad_norm": 0.1591796875, "learning_rate": 0.0001420919837091487, "loss": 0.4786, "step": 165580 }, { "epoch": 8.224396543160822, "grad_norm": 0.169921875, "learning_rate": 0.00014205224992549917, "loss": 0.5083, "step": 165590 }, { "epoch": 8.224893215456442, "grad_norm": 0.154296875, "learning_rate": 0.0001420125161418496, "loss": 0.4834, "step": 165600 }, { "epoch": 8.225389887752062, "grad_norm": 0.181640625, "learning_rate": 0.00014197278235820006, "loss": 0.4658, "step": 165610 }, { "epoch": 8.22588656004768, "grad_norm": 0.181640625, "learning_rate": 0.0001419330485745505, "loss": 0.5276, "step": 165620 }, { "epoch": 8.2263832323433, "grad_norm": 0.1533203125, "learning_rate": 0.00014189331479090098, "loss": 0.4855, "step": 165630 }, { "epoch": 8.226879904638919, "grad_norm": 0.162109375, "learning_rate": 0.00014185358100725142, "loss": 0.4836, "step": 165640 }, { "epoch": 8.227376576934539, "grad_norm": 0.1669921875, "learning_rate": 0.00014181384722360187, "loss": 0.4809, "step": 165650 }, { "epoch": 8.227873249230157, "grad_norm": 0.1513671875, "learning_rate": 0.00014177411343995234, "loss": 0.498, "step": 165660 }, { "epoch": 8.228369921525777, "grad_norm": 0.1748046875, "learning_rate": 0.00014173437965630278, "loss": 0.5017, "step": 165670 }, { "epoch": 8.228866593821397, "grad_norm": 0.1796875, "learning_rate": 0.00014169464587265323, "loss": 0.4874, "step": 165680 }, { "epoch": 8.229363266117016, "grad_norm": 0.169921875, "learning_rate": 0.00014165491208900367, "loss": 0.4943, "step": 165690 }, { "epoch": 8.229859938412636, "grad_norm": 0.1552734375, "learning_rate": 0.00014161517830535414, "loss": 0.4968, "step": 165700 }, { "epoch": 8.230356610708254, "grad_norm": 0.1796875, "learning_rate": 0.0001415754445217046, "loss": 0.4797, "step": 165710 }, { "epoch": 8.230853283003874, "grad_norm": 0.1884765625, "learning_rate": 0.00014153571073805503, "loss": 0.4862, "step": 165720 }, { "epoch": 8.231349955299493, "grad_norm": 0.171875, "learning_rate": 0.00014149597695440548, "loss": 0.4464, "step": 165730 }, { "epoch": 8.231846627595113, "grad_norm": 0.1689453125, "learning_rate": 0.00014145624317075595, "loss": 0.4743, "step": 165740 }, { "epoch": 8.232343299890733, "grad_norm": 0.1630859375, "learning_rate": 0.0001414165093871064, "loss": 0.5065, "step": 165750 }, { "epoch": 8.232839972186351, "grad_norm": 0.1796875, "learning_rate": 0.00014137677560345686, "loss": 0.4617, "step": 165760 }, { "epoch": 8.233336644481971, "grad_norm": 0.16015625, "learning_rate": 0.00014133704181980728, "loss": 0.494, "step": 165770 }, { "epoch": 8.23383331677759, "grad_norm": 0.16796875, "learning_rate": 0.00014129730803615775, "loss": 0.4749, "step": 165780 }, { "epoch": 8.23432998907321, "grad_norm": 0.150390625, "learning_rate": 0.0001412575742525082, "loss": 0.467, "step": 165790 }, { "epoch": 8.234826661368828, "grad_norm": 0.1845703125, "learning_rate": 0.00014121784046885867, "loss": 0.4945, "step": 165800 }, { "epoch": 8.235323333664448, "grad_norm": 0.166015625, "learning_rate": 0.0001411781066852091, "loss": 0.5129, "step": 165810 }, { "epoch": 8.235820005960068, "grad_norm": 0.22265625, "learning_rate": 0.00014113837290155956, "loss": 0.5185, "step": 165820 }, { "epoch": 8.236316678255687, "grad_norm": 0.150390625, "learning_rate": 0.00014109863911791003, "loss": 0.5067, "step": 165830 }, { "epoch": 8.236813350551307, "grad_norm": 0.1533203125, "learning_rate": 0.00014105890533426047, "loss": 0.4612, "step": 165840 }, { "epoch": 8.237310022846925, "grad_norm": 0.1669921875, "learning_rate": 0.00014101917155061092, "loss": 0.5043, "step": 165850 }, { "epoch": 8.237806695142545, "grad_norm": 0.1640625, "learning_rate": 0.00014097943776696136, "loss": 0.4934, "step": 165860 }, { "epoch": 8.238303367438164, "grad_norm": 0.1669921875, "learning_rate": 0.00014093970398331183, "loss": 0.495, "step": 165870 }, { "epoch": 8.238800039733784, "grad_norm": 0.1611328125, "learning_rate": 0.00014089997019966228, "loss": 0.4677, "step": 165880 }, { "epoch": 8.239296712029404, "grad_norm": 0.1953125, "learning_rate": 0.00014086023641601272, "loss": 0.4895, "step": 165890 }, { "epoch": 8.239793384325022, "grad_norm": 0.1787109375, "learning_rate": 0.00014082050263236316, "loss": 0.4675, "step": 165900 }, { "epoch": 8.240290056620642, "grad_norm": 0.1650390625, "learning_rate": 0.00014078076884871363, "loss": 0.4758, "step": 165910 }, { "epoch": 8.24078672891626, "grad_norm": 0.162109375, "learning_rate": 0.00014074103506506408, "loss": 0.4697, "step": 165920 }, { "epoch": 8.24128340121188, "grad_norm": 0.1669921875, "learning_rate": 0.00014070130128141452, "loss": 0.4968, "step": 165930 }, { "epoch": 8.241780073507499, "grad_norm": 0.1640625, "learning_rate": 0.00014066156749776497, "loss": 0.477, "step": 165940 }, { "epoch": 8.24227674580312, "grad_norm": 0.171875, "learning_rate": 0.00014062183371411544, "loss": 0.4564, "step": 165950 }, { "epoch": 8.24277341809874, "grad_norm": 0.1650390625, "learning_rate": 0.00014058209993046588, "loss": 0.4658, "step": 165960 }, { "epoch": 8.243270090394358, "grad_norm": 0.1572265625, "learning_rate": 0.00014054236614681633, "loss": 0.4952, "step": 165970 }, { "epoch": 8.243766762689978, "grad_norm": 0.203125, "learning_rate": 0.0001405026323631668, "loss": 0.4651, "step": 165980 }, { "epoch": 8.244263434985596, "grad_norm": 0.2138671875, "learning_rate": 0.00014046289857951724, "loss": 0.4815, "step": 165990 }, { "epoch": 8.244760107281216, "grad_norm": 0.171875, "learning_rate": 0.00014042316479586771, "loss": 0.5055, "step": 166000 }, { "epoch": 8.245256779576835, "grad_norm": 0.1552734375, "learning_rate": 0.00014038343101221813, "loss": 0.5135, "step": 166010 }, { "epoch": 8.245753451872455, "grad_norm": 0.1552734375, "learning_rate": 0.0001403436972285686, "loss": 0.52, "step": 166020 }, { "epoch": 8.246250124168075, "grad_norm": 0.1875, "learning_rate": 0.00014030396344491905, "loss": 0.4857, "step": 166030 }, { "epoch": 8.246746796463693, "grad_norm": 0.18359375, "learning_rate": 0.00014026422966126952, "loss": 0.5122, "step": 166040 }, { "epoch": 8.247243468759313, "grad_norm": 0.1845703125, "learning_rate": 0.00014022449587761994, "loss": 0.489, "step": 166050 }, { "epoch": 8.247740141054932, "grad_norm": 0.166015625, "learning_rate": 0.0001401847620939704, "loss": 0.4951, "step": 166060 }, { "epoch": 8.248236813350552, "grad_norm": 0.1875, "learning_rate": 0.00014014502831032085, "loss": 0.5143, "step": 166070 }, { "epoch": 8.24873348564617, "grad_norm": 0.1826171875, "learning_rate": 0.00014010529452667132, "loss": 0.4649, "step": 166080 }, { "epoch": 8.24923015794179, "grad_norm": 0.19921875, "learning_rate": 0.00014006556074302174, "loss": 0.4765, "step": 166090 }, { "epoch": 8.249726830237408, "grad_norm": 0.154296875, "learning_rate": 0.0001400258269593722, "loss": 0.4906, "step": 166100 }, { "epoch": 8.250223502533029, "grad_norm": 0.15234375, "learning_rate": 0.00013998609317572266, "loss": 0.461, "step": 166110 }, { "epoch": 8.250720174828649, "grad_norm": 0.1572265625, "learning_rate": 0.00013994635939207313, "loss": 0.4916, "step": 166120 }, { "epoch": 8.251216847124267, "grad_norm": 0.173828125, "learning_rate": 0.00013990662560842357, "loss": 0.478, "step": 166130 }, { "epoch": 8.251713519419887, "grad_norm": 0.2021484375, "learning_rate": 0.00013986689182477402, "loss": 0.5184, "step": 166140 }, { "epoch": 8.252210191715506, "grad_norm": 0.15625, "learning_rate": 0.00013982715804112446, "loss": 0.4736, "step": 166150 }, { "epoch": 8.252706864011126, "grad_norm": 0.1748046875, "learning_rate": 0.00013978742425747493, "loss": 0.4896, "step": 166160 }, { "epoch": 8.253203536306744, "grad_norm": 0.166015625, "learning_rate": 0.00013974769047382538, "loss": 0.4874, "step": 166170 }, { "epoch": 8.253700208602364, "grad_norm": 0.1611328125, "learning_rate": 0.00013970795669017582, "loss": 0.46, "step": 166180 }, { "epoch": 8.254196880897984, "grad_norm": 0.197265625, "learning_rate": 0.0001396682229065263, "loss": 0.486, "step": 166190 }, { "epoch": 8.254693553193603, "grad_norm": 0.1611328125, "learning_rate": 0.00013962848912287673, "loss": 0.4872, "step": 166200 }, { "epoch": 8.255190225489223, "grad_norm": 0.1962890625, "learning_rate": 0.0001395887553392272, "loss": 0.5024, "step": 166210 }, { "epoch": 8.255686897784841, "grad_norm": 0.158203125, "learning_rate": 0.00013954902155557762, "loss": 0.4826, "step": 166220 }, { "epoch": 8.256183570080461, "grad_norm": 0.1669921875, "learning_rate": 0.0001395092877719281, "loss": 0.4846, "step": 166230 }, { "epoch": 8.25668024237608, "grad_norm": 0.1513671875, "learning_rate": 0.00013946955398827854, "loss": 0.4903, "step": 166240 }, { "epoch": 8.2571769146717, "grad_norm": 0.1767578125, "learning_rate": 0.000139429820204629, "loss": 0.5027, "step": 166250 }, { "epoch": 8.25767358696732, "grad_norm": 0.1728515625, "learning_rate": 0.00013939008642097943, "loss": 0.4871, "step": 166260 }, { "epoch": 8.258170259262938, "grad_norm": 0.162109375, "learning_rate": 0.0001393503526373299, "loss": 0.481, "step": 166270 }, { "epoch": 8.258666931558558, "grad_norm": 0.1650390625, "learning_rate": 0.00013931061885368034, "loss": 0.4921, "step": 166280 }, { "epoch": 8.259163603854176, "grad_norm": 0.1669921875, "learning_rate": 0.00013927088507003081, "loss": 0.491, "step": 166290 }, { "epoch": 8.259660276149797, "grad_norm": 0.1572265625, "learning_rate": 0.00013923115128638123, "loss": 0.4899, "step": 166300 }, { "epoch": 8.260156948445415, "grad_norm": 0.177734375, "learning_rate": 0.0001391914175027317, "loss": 0.4881, "step": 166310 }, { "epoch": 8.260653620741035, "grad_norm": 0.1513671875, "learning_rate": 0.00013915168371908215, "loss": 0.4838, "step": 166320 }, { "epoch": 8.261150293036655, "grad_norm": 0.216796875, "learning_rate": 0.00013911194993543262, "loss": 0.4698, "step": 166330 }, { "epoch": 8.261646965332273, "grad_norm": 0.1904296875, "learning_rate": 0.00013907221615178306, "loss": 0.4901, "step": 166340 }, { "epoch": 8.262143637627894, "grad_norm": 0.1513671875, "learning_rate": 0.0001390324823681335, "loss": 0.46, "step": 166350 }, { "epoch": 8.262640309923512, "grad_norm": 0.1630859375, "learning_rate": 0.00013899274858448398, "loss": 0.5303, "step": 166360 }, { "epoch": 8.263136982219132, "grad_norm": 0.173828125, "learning_rate": 0.00013895301480083442, "loss": 0.5084, "step": 166370 }, { "epoch": 8.26363365451475, "grad_norm": 0.1630859375, "learning_rate": 0.00013891328101718487, "loss": 0.5177, "step": 166380 }, { "epoch": 8.26413032681037, "grad_norm": 0.1689453125, "learning_rate": 0.0001388735472335353, "loss": 0.4443, "step": 166390 }, { "epoch": 8.26462699910599, "grad_norm": 0.1669921875, "learning_rate": 0.00013883381344988578, "loss": 0.4598, "step": 166400 }, { "epoch": 8.265123671401609, "grad_norm": 0.1630859375, "learning_rate": 0.00013879407966623623, "loss": 0.482, "step": 166410 }, { "epoch": 8.265620343697229, "grad_norm": 0.1630859375, "learning_rate": 0.00013875434588258667, "loss": 0.5358, "step": 166420 }, { "epoch": 8.266117015992847, "grad_norm": 0.1630859375, "learning_rate": 0.00013871461209893712, "loss": 0.4901, "step": 166430 }, { "epoch": 8.266613688288468, "grad_norm": 0.166015625, "learning_rate": 0.0001386748783152876, "loss": 0.5012, "step": 166440 }, { "epoch": 8.267110360584086, "grad_norm": 0.1708984375, "learning_rate": 0.00013863514453163803, "loss": 0.5218, "step": 166450 }, { "epoch": 8.267607032879706, "grad_norm": 0.171875, "learning_rate": 0.00013859541074798848, "loss": 0.4715, "step": 166460 }, { "epoch": 8.268103705175326, "grad_norm": 0.162109375, "learning_rate": 0.00013855567696433892, "loss": 0.4567, "step": 166470 }, { "epoch": 8.268600377470944, "grad_norm": 0.1630859375, "learning_rate": 0.0001385159431806894, "loss": 0.4753, "step": 166480 }, { "epoch": 8.269097049766565, "grad_norm": 0.1640625, "learning_rate": 0.00013847620939703984, "loss": 0.4756, "step": 166490 }, { "epoch": 8.269593722062183, "grad_norm": 0.1748046875, "learning_rate": 0.00013843647561339028, "loss": 0.4921, "step": 166500 }, { "epoch": 8.270090394357803, "grad_norm": 0.1591796875, "learning_rate": 0.00013839674182974075, "loss": 0.4789, "step": 166510 }, { "epoch": 8.270587066653421, "grad_norm": 0.1591796875, "learning_rate": 0.0001383570080460912, "loss": 0.4963, "step": 166520 }, { "epoch": 8.271083738949041, "grad_norm": 0.185546875, "learning_rate": 0.00013831727426244167, "loss": 0.485, "step": 166530 }, { "epoch": 8.271580411244662, "grad_norm": 0.1630859375, "learning_rate": 0.00013827754047879208, "loss": 0.4849, "step": 166540 }, { "epoch": 8.27207708354028, "grad_norm": 0.15625, "learning_rate": 0.00013823780669514255, "loss": 0.4716, "step": 166550 }, { "epoch": 8.2725737558359, "grad_norm": 0.18359375, "learning_rate": 0.000138198072911493, "loss": 0.5257, "step": 166560 }, { "epoch": 8.273070428131518, "grad_norm": 0.1826171875, "learning_rate": 0.00013815833912784347, "loss": 0.5066, "step": 166570 }, { "epoch": 8.273567100427138, "grad_norm": 0.166015625, "learning_rate": 0.0001381186053441939, "loss": 0.4887, "step": 166580 }, { "epoch": 8.274063772722757, "grad_norm": 0.1689453125, "learning_rate": 0.00013807887156054436, "loss": 0.4754, "step": 166590 }, { "epoch": 8.274560445018377, "grad_norm": 0.16015625, "learning_rate": 0.0001380391377768948, "loss": 0.4981, "step": 166600 }, { "epoch": 8.275057117313997, "grad_norm": 0.2158203125, "learning_rate": 0.00013799940399324527, "loss": 0.4992, "step": 166610 }, { "epoch": 8.275553789609615, "grad_norm": 0.193359375, "learning_rate": 0.00013795967020959572, "loss": 0.4977, "step": 166620 }, { "epoch": 8.276050461905236, "grad_norm": 0.158203125, "learning_rate": 0.00013791993642594616, "loss": 0.464, "step": 166630 }, { "epoch": 8.276547134200854, "grad_norm": 0.15234375, "learning_rate": 0.0001378802026422966, "loss": 0.475, "step": 166640 }, { "epoch": 8.277043806496474, "grad_norm": 0.197265625, "learning_rate": 0.00013784046885864708, "loss": 0.5013, "step": 166650 }, { "epoch": 8.277540478792092, "grad_norm": 0.1650390625, "learning_rate": 0.00013780073507499752, "loss": 0.4751, "step": 166660 }, { "epoch": 8.278037151087712, "grad_norm": 0.1533203125, "learning_rate": 0.00013776100129134797, "loss": 0.4773, "step": 166670 }, { "epoch": 8.27853382338333, "grad_norm": 0.1767578125, "learning_rate": 0.00013772126750769844, "loss": 0.4894, "step": 166680 }, { "epoch": 8.27903049567895, "grad_norm": 0.193359375, "learning_rate": 0.00013768153372404888, "loss": 0.4941, "step": 166690 }, { "epoch": 8.279527167974571, "grad_norm": 0.181640625, "learning_rate": 0.00013764179994039935, "loss": 0.5007, "step": 166700 }, { "epoch": 8.28002384027019, "grad_norm": 0.166015625, "learning_rate": 0.00013760206615674977, "loss": 0.4881, "step": 166710 }, { "epoch": 8.28052051256581, "grad_norm": 0.1591796875, "learning_rate": 0.00013756233237310024, "loss": 0.4957, "step": 166720 }, { "epoch": 8.281017184861428, "grad_norm": 0.1640625, "learning_rate": 0.0001375225985894507, "loss": 0.4816, "step": 166730 }, { "epoch": 8.281513857157048, "grad_norm": 0.173828125, "learning_rate": 0.00013748286480580116, "loss": 0.4573, "step": 166740 }, { "epoch": 8.282010529452666, "grad_norm": 0.16015625, "learning_rate": 0.00013744313102215158, "loss": 0.4782, "step": 166750 }, { "epoch": 8.282507201748286, "grad_norm": 0.1650390625, "learning_rate": 0.00013740339723850205, "loss": 0.508, "step": 166760 }, { "epoch": 8.283003874043906, "grad_norm": 0.1689453125, "learning_rate": 0.0001373636634548525, "loss": 0.486, "step": 166770 }, { "epoch": 8.283500546339525, "grad_norm": 0.1689453125, "learning_rate": 0.00013732392967120296, "loss": 0.4679, "step": 166780 }, { "epoch": 8.283997218635145, "grad_norm": 0.1669921875, "learning_rate": 0.00013728419588755338, "loss": 0.4881, "step": 166790 }, { "epoch": 8.284493890930763, "grad_norm": 0.171875, "learning_rate": 0.00013724446210390385, "loss": 0.4946, "step": 166800 }, { "epoch": 8.284990563226383, "grad_norm": 0.1611328125, "learning_rate": 0.0001372047283202543, "loss": 0.5021, "step": 166810 }, { "epoch": 8.285487235522002, "grad_norm": 0.16015625, "learning_rate": 0.00013716499453660477, "loss": 0.4723, "step": 166820 }, { "epoch": 8.285983907817622, "grad_norm": 0.1669921875, "learning_rate": 0.0001371252607529552, "loss": 0.4937, "step": 166830 }, { "epoch": 8.286480580113242, "grad_norm": 0.1552734375, "learning_rate": 0.00013708552696930565, "loss": 0.4536, "step": 166840 }, { "epoch": 8.28697725240886, "grad_norm": 0.169921875, "learning_rate": 0.00013704579318565613, "loss": 0.5034, "step": 166850 }, { "epoch": 8.28747392470448, "grad_norm": 0.185546875, "learning_rate": 0.00013700605940200657, "loss": 0.475, "step": 166860 }, { "epoch": 8.287970597000099, "grad_norm": 0.1513671875, "learning_rate": 0.00013696632561835701, "loss": 0.4466, "step": 166870 }, { "epoch": 8.288467269295719, "grad_norm": 0.1689453125, "learning_rate": 0.00013692659183470746, "loss": 0.4629, "step": 166880 }, { "epoch": 8.288963941591337, "grad_norm": 0.1796875, "learning_rate": 0.00013688685805105793, "loss": 0.4828, "step": 166890 }, { "epoch": 8.289460613886957, "grad_norm": 0.1513671875, "learning_rate": 0.00013684712426740837, "loss": 0.4674, "step": 166900 }, { "epoch": 8.289957286182577, "grad_norm": 0.18359375, "learning_rate": 0.00013680739048375882, "loss": 0.4985, "step": 166910 }, { "epoch": 8.290453958478196, "grad_norm": 0.1748046875, "learning_rate": 0.00013676765670010926, "loss": 0.462, "step": 166920 }, { "epoch": 8.290950630773816, "grad_norm": 0.17578125, "learning_rate": 0.00013672792291645973, "loss": 0.4734, "step": 166930 }, { "epoch": 8.291447303069434, "grad_norm": 0.177734375, "learning_rate": 0.00013668818913281018, "loss": 0.4651, "step": 166940 }, { "epoch": 8.291943975365054, "grad_norm": 0.154296875, "learning_rate": 0.00013664845534916062, "loss": 0.473, "step": 166950 }, { "epoch": 8.292440647660673, "grad_norm": 0.181640625, "learning_rate": 0.00013660872156551107, "loss": 0.5003, "step": 166960 }, { "epoch": 8.292937319956293, "grad_norm": 0.162109375, "learning_rate": 0.00013656898778186154, "loss": 0.4831, "step": 166970 }, { "epoch": 8.293433992251913, "grad_norm": 0.1689453125, "learning_rate": 0.00013652925399821198, "loss": 0.4685, "step": 166980 }, { "epoch": 8.293930664547531, "grad_norm": 0.16796875, "learning_rate": 0.00013648952021456243, "loss": 0.4819, "step": 166990 }, { "epoch": 8.294427336843151, "grad_norm": 0.169921875, "learning_rate": 0.00013644978643091287, "loss": 0.4569, "step": 167000 }, { "epoch": 8.29492400913877, "grad_norm": 0.189453125, "learning_rate": 0.00013641005264726334, "loss": 0.4882, "step": 167010 }, { "epoch": 8.29542068143439, "grad_norm": 0.173828125, "learning_rate": 0.0001363703188636138, "loss": 0.4702, "step": 167020 }, { "epoch": 8.295917353730008, "grad_norm": 0.189453125, "learning_rate": 0.00013633058507996426, "loss": 0.4986, "step": 167030 }, { "epoch": 8.296414026025628, "grad_norm": 0.181640625, "learning_rate": 0.0001362908512963147, "loss": 0.4749, "step": 167040 }, { "epoch": 8.296910698321248, "grad_norm": 0.1630859375, "learning_rate": 0.00013625111751266515, "loss": 0.4875, "step": 167050 }, { "epoch": 8.297407370616867, "grad_norm": 0.1484375, "learning_rate": 0.00013621138372901562, "loss": 0.4534, "step": 167060 }, { "epoch": 8.297904042912487, "grad_norm": 0.1904296875, "learning_rate": 0.00013617164994536606, "loss": 0.5031, "step": 167070 }, { "epoch": 8.298400715208105, "grad_norm": 0.166015625, "learning_rate": 0.0001361319161617165, "loss": 0.4927, "step": 167080 }, { "epoch": 8.298897387503725, "grad_norm": 0.1669921875, "learning_rate": 0.00013609218237806695, "loss": 0.4837, "step": 167090 }, { "epoch": 8.299394059799344, "grad_norm": 0.150390625, "learning_rate": 0.00013605244859441742, "loss": 0.4649, "step": 167100 }, { "epoch": 8.299890732094964, "grad_norm": 0.1669921875, "learning_rate": 0.00013601271481076787, "loss": 0.4737, "step": 167110 }, { "epoch": 8.300387404390584, "grad_norm": 0.1572265625, "learning_rate": 0.0001359729810271183, "loss": 0.4946, "step": 167120 }, { "epoch": 8.300884076686202, "grad_norm": 0.1943359375, "learning_rate": 0.00013593324724346876, "loss": 0.4786, "step": 167130 }, { "epoch": 8.301380748981822, "grad_norm": 0.1591796875, "learning_rate": 0.00013589351345981923, "loss": 0.4839, "step": 167140 }, { "epoch": 8.30187742127744, "grad_norm": 0.1796875, "learning_rate": 0.00013585377967616967, "loss": 0.4802, "step": 167150 }, { "epoch": 8.30237409357306, "grad_norm": 0.1533203125, "learning_rate": 0.00013581404589252011, "loss": 0.5023, "step": 167160 }, { "epoch": 8.30287076586868, "grad_norm": 0.201171875, "learning_rate": 0.00013577431210887056, "loss": 0.497, "step": 167170 }, { "epoch": 8.3033674381643, "grad_norm": 0.1904296875, "learning_rate": 0.00013573457832522103, "loss": 0.4474, "step": 167180 }, { "epoch": 8.30386411045992, "grad_norm": 0.1708984375, "learning_rate": 0.00013569484454157147, "loss": 0.5103, "step": 167190 }, { "epoch": 8.304360782755538, "grad_norm": 0.197265625, "learning_rate": 0.00013565511075792192, "loss": 0.503, "step": 167200 }, { "epoch": 8.304857455051158, "grad_norm": 0.1806640625, "learning_rate": 0.0001356153769742724, "loss": 0.5081, "step": 167210 }, { "epoch": 8.305354127346776, "grad_norm": 0.162109375, "learning_rate": 0.00013557564319062283, "loss": 0.494, "step": 167220 }, { "epoch": 8.305850799642396, "grad_norm": 0.1689453125, "learning_rate": 0.0001355359094069733, "loss": 0.4958, "step": 167230 }, { "epoch": 8.306347471938015, "grad_norm": 0.1552734375, "learning_rate": 0.00013549617562332372, "loss": 0.4953, "step": 167240 }, { "epoch": 8.306844144233635, "grad_norm": 0.1708984375, "learning_rate": 0.0001354564418396742, "loss": 0.5154, "step": 167250 }, { "epoch": 8.307340816529255, "grad_norm": 0.1650390625, "learning_rate": 0.00013541670805602464, "loss": 0.4744, "step": 167260 }, { "epoch": 8.307837488824873, "grad_norm": 0.1650390625, "learning_rate": 0.0001353769742723751, "loss": 0.4729, "step": 167270 }, { "epoch": 8.308334161120493, "grad_norm": 0.169921875, "learning_rate": 0.00013533724048872553, "loss": 0.5144, "step": 167280 }, { "epoch": 8.308830833416112, "grad_norm": 0.17578125, "learning_rate": 0.000135297506705076, "loss": 0.4924, "step": 167290 }, { "epoch": 8.309327505711732, "grad_norm": 0.1669921875, "learning_rate": 0.00013525777292142644, "loss": 0.4912, "step": 167300 }, { "epoch": 8.30982417800735, "grad_norm": 0.16796875, "learning_rate": 0.00013521803913777691, "loss": 0.4705, "step": 167310 }, { "epoch": 8.31032085030297, "grad_norm": 0.166015625, "learning_rate": 0.00013517830535412733, "loss": 0.5122, "step": 167320 }, { "epoch": 8.31081752259859, "grad_norm": 0.154296875, "learning_rate": 0.0001351385715704778, "loss": 0.4926, "step": 167330 }, { "epoch": 8.311314194894209, "grad_norm": 0.171875, "learning_rate": 0.00013509883778682825, "loss": 0.4612, "step": 167340 }, { "epoch": 8.311810867189829, "grad_norm": 0.18359375, "learning_rate": 0.00013505910400317872, "loss": 0.4623, "step": 167350 }, { "epoch": 8.312307539485447, "grad_norm": 0.171875, "learning_rate": 0.00013501937021952916, "loss": 0.4782, "step": 167360 }, { "epoch": 8.312804211781067, "grad_norm": 0.158203125, "learning_rate": 0.0001349796364358796, "loss": 0.4786, "step": 167370 }, { "epoch": 8.313300884076686, "grad_norm": 0.1572265625, "learning_rate": 0.00013493990265223008, "loss": 0.4938, "step": 167380 }, { "epoch": 8.313797556372306, "grad_norm": 0.1708984375, "learning_rate": 0.00013490016886858052, "loss": 0.4889, "step": 167390 }, { "epoch": 8.314294228667926, "grad_norm": 0.2001953125, "learning_rate": 0.00013486043508493097, "loss": 0.528, "step": 167400 }, { "epoch": 8.314790900963544, "grad_norm": 0.1669921875, "learning_rate": 0.0001348207013012814, "loss": 0.4834, "step": 167410 }, { "epoch": 8.315287573259164, "grad_norm": 0.1630859375, "learning_rate": 0.00013478096751763188, "loss": 0.4942, "step": 167420 }, { "epoch": 8.315784245554783, "grad_norm": 0.1796875, "learning_rate": 0.00013474123373398233, "loss": 0.4927, "step": 167430 }, { "epoch": 8.316280917850403, "grad_norm": 0.1904296875, "learning_rate": 0.0001347014999503328, "loss": 0.482, "step": 167440 }, { "epoch": 8.316777590146021, "grad_norm": 0.154296875, "learning_rate": 0.00013466176616668321, "loss": 0.4717, "step": 167450 }, { "epoch": 8.317274262441641, "grad_norm": 0.1650390625, "learning_rate": 0.00013462203238303369, "loss": 0.4825, "step": 167460 }, { "epoch": 8.317770934737261, "grad_norm": 0.16796875, "learning_rate": 0.00013458229859938413, "loss": 0.4978, "step": 167470 }, { "epoch": 8.31826760703288, "grad_norm": 0.1962890625, "learning_rate": 0.0001345425648157346, "loss": 0.5176, "step": 167480 }, { "epoch": 8.3187642793285, "grad_norm": 0.1650390625, "learning_rate": 0.00013450283103208502, "loss": 0.4729, "step": 167490 }, { "epoch": 8.319260951624118, "grad_norm": 0.1640625, "learning_rate": 0.0001344630972484355, "loss": 0.5029, "step": 167500 }, { "epoch": 8.319757623919738, "grad_norm": 0.1669921875, "learning_rate": 0.00013442336346478593, "loss": 0.4453, "step": 167510 }, { "epoch": 8.320254296215357, "grad_norm": 0.19140625, "learning_rate": 0.0001343836296811364, "loss": 0.5059, "step": 167520 }, { "epoch": 8.320750968510977, "grad_norm": 0.171875, "learning_rate": 0.00013434389589748685, "loss": 0.4966, "step": 167530 }, { "epoch": 8.321247640806595, "grad_norm": 0.185546875, "learning_rate": 0.0001343041621138373, "loss": 0.4385, "step": 167540 }, { "epoch": 8.321744313102215, "grad_norm": 0.1728515625, "learning_rate": 0.00013426442833018777, "loss": 0.4618, "step": 167550 }, { "epoch": 8.322240985397835, "grad_norm": 0.1708984375, "learning_rate": 0.0001342246945465382, "loss": 0.4689, "step": 167560 }, { "epoch": 8.322737657693454, "grad_norm": 0.1611328125, "learning_rate": 0.00013418496076288865, "loss": 0.4645, "step": 167570 }, { "epoch": 8.323234329989074, "grad_norm": 0.1640625, "learning_rate": 0.0001341452269792391, "loss": 0.4748, "step": 167580 }, { "epoch": 8.323731002284692, "grad_norm": 0.1748046875, "learning_rate": 0.00013410549319558957, "loss": 0.4818, "step": 167590 }, { "epoch": 8.324227674580312, "grad_norm": 0.1806640625, "learning_rate": 0.00013406575941194001, "loss": 0.4575, "step": 167600 }, { "epoch": 8.32472434687593, "grad_norm": 0.1845703125, "learning_rate": 0.00013402602562829046, "loss": 0.4976, "step": 167610 }, { "epoch": 8.32522101917155, "grad_norm": 0.1875, "learning_rate": 0.0001339862918446409, "loss": 0.4791, "step": 167620 }, { "epoch": 8.32571769146717, "grad_norm": 0.2001953125, "learning_rate": 0.00013394655806099137, "loss": 0.4636, "step": 167630 }, { "epoch": 8.326214363762789, "grad_norm": 0.1728515625, "learning_rate": 0.00013390682427734182, "loss": 0.4777, "step": 167640 }, { "epoch": 8.32671103605841, "grad_norm": 0.158203125, "learning_rate": 0.00013386709049369226, "loss": 0.4811, "step": 167650 }, { "epoch": 8.327207708354027, "grad_norm": 0.1796875, "learning_rate": 0.0001338273567100427, "loss": 0.4824, "step": 167660 }, { "epoch": 8.327704380649648, "grad_norm": 0.1591796875, "learning_rate": 0.00013378762292639318, "loss": 0.464, "step": 167670 }, { "epoch": 8.328201052945266, "grad_norm": 0.189453125, "learning_rate": 0.00013374788914274362, "loss": 0.4885, "step": 167680 }, { "epoch": 8.328697725240886, "grad_norm": 0.169921875, "learning_rate": 0.00013370815535909407, "loss": 0.4901, "step": 167690 }, { "epoch": 8.329194397536506, "grad_norm": 0.19921875, "learning_rate": 0.00013366842157544454, "loss": 0.4973, "step": 167700 }, { "epoch": 8.329691069832124, "grad_norm": 0.1767578125, "learning_rate": 0.00013362868779179498, "loss": 0.4965, "step": 167710 }, { "epoch": 8.330187742127745, "grad_norm": 0.1953125, "learning_rate": 0.00013358895400814543, "loss": 0.4656, "step": 167720 }, { "epoch": 8.330684414423363, "grad_norm": 0.1630859375, "learning_rate": 0.00013354922022449587, "loss": 0.4857, "step": 167730 }, { "epoch": 8.331181086718983, "grad_norm": 0.1748046875, "learning_rate": 0.00013350948644084634, "loss": 0.494, "step": 167740 }, { "epoch": 8.331677759014601, "grad_norm": 0.185546875, "learning_rate": 0.0001334697526571968, "loss": 0.4538, "step": 167750 }, { "epoch": 8.332174431310222, "grad_norm": 0.1572265625, "learning_rate": 0.00013343001887354726, "loss": 0.48, "step": 167760 }, { "epoch": 8.332671103605842, "grad_norm": 0.2216796875, "learning_rate": 0.00013339028508989767, "loss": 0.4763, "step": 167770 }, { "epoch": 8.33316777590146, "grad_norm": 0.201171875, "learning_rate": 0.00013335055130624815, "loss": 0.4868, "step": 167780 }, { "epoch": 8.33366444819708, "grad_norm": 0.1767578125, "learning_rate": 0.0001333108175225986, "loss": 0.4968, "step": 167790 }, { "epoch": 8.334161120492698, "grad_norm": 0.1689453125, "learning_rate": 0.00013327108373894906, "loss": 0.4734, "step": 167800 }, { "epoch": 8.334657792788319, "grad_norm": 0.154296875, "learning_rate": 0.00013323134995529948, "loss": 0.5021, "step": 167810 }, { "epoch": 8.335154465083937, "grad_norm": 0.1630859375, "learning_rate": 0.00013319161617164995, "loss": 0.4863, "step": 167820 }, { "epoch": 8.335651137379557, "grad_norm": 0.2001953125, "learning_rate": 0.0001331518823880004, "loss": 0.5015, "step": 167830 }, { "epoch": 8.336147809675177, "grad_norm": 0.1552734375, "learning_rate": 0.00013311214860435087, "loss": 0.5054, "step": 167840 }, { "epoch": 8.336644481970795, "grad_norm": 0.169921875, "learning_rate": 0.00013307241482070128, "loss": 0.4841, "step": 167850 }, { "epoch": 8.337141154266416, "grad_norm": 0.181640625, "learning_rate": 0.00013303268103705175, "loss": 0.4526, "step": 167860 }, { "epoch": 8.337637826562034, "grad_norm": 0.1533203125, "learning_rate": 0.0001329929472534022, "loss": 0.508, "step": 167870 }, { "epoch": 8.338134498857654, "grad_norm": 0.169921875, "learning_rate": 0.00013295321346975267, "loss": 0.5082, "step": 167880 }, { "epoch": 8.338631171153272, "grad_norm": 0.173828125, "learning_rate": 0.00013291347968610311, "loss": 0.5039, "step": 167890 }, { "epoch": 8.339127843448892, "grad_norm": 0.1728515625, "learning_rate": 0.00013287374590245356, "loss": 0.4792, "step": 167900 }, { "epoch": 8.339624515744513, "grad_norm": 0.197265625, "learning_rate": 0.00013283401211880403, "loss": 0.4819, "step": 167910 }, { "epoch": 8.340121188040131, "grad_norm": 0.17578125, "learning_rate": 0.00013279427833515447, "loss": 0.5087, "step": 167920 }, { "epoch": 8.340617860335751, "grad_norm": 0.20703125, "learning_rate": 0.00013275454455150495, "loss": 0.4872, "step": 167930 }, { "epoch": 8.34111453263137, "grad_norm": 0.1826171875, "learning_rate": 0.00013271481076785536, "loss": 0.4954, "step": 167940 }, { "epoch": 8.34161120492699, "grad_norm": 0.1640625, "learning_rate": 0.00013267507698420583, "loss": 0.4834, "step": 167950 }, { "epoch": 8.342107877222608, "grad_norm": 0.1865234375, "learning_rate": 0.00013263534320055628, "loss": 0.4951, "step": 167960 }, { "epoch": 8.342604549518228, "grad_norm": 0.1806640625, "learning_rate": 0.00013259560941690675, "loss": 0.464, "step": 167970 }, { "epoch": 8.343101221813848, "grad_norm": 0.1611328125, "learning_rate": 0.00013255587563325717, "loss": 0.4987, "step": 167980 }, { "epoch": 8.343597894109466, "grad_norm": 0.1611328125, "learning_rate": 0.00013251614184960764, "loss": 0.5149, "step": 167990 }, { "epoch": 8.344094566405087, "grad_norm": 0.162109375, "learning_rate": 0.00013247640806595808, "loss": 0.4995, "step": 168000 }, { "epoch": 8.344591238700705, "grad_norm": 0.19140625, "learning_rate": 0.00013243667428230855, "loss": 0.4931, "step": 168010 }, { "epoch": 8.345087910996325, "grad_norm": 0.1884765625, "learning_rate": 0.00013239694049865897, "loss": 0.5091, "step": 168020 }, { "epoch": 8.345584583291943, "grad_norm": 0.1650390625, "learning_rate": 0.00013235720671500944, "loss": 0.4792, "step": 168030 }, { "epoch": 8.346081255587563, "grad_norm": 0.1845703125, "learning_rate": 0.0001323174729313599, "loss": 0.4816, "step": 168040 }, { "epoch": 8.346577927883184, "grad_norm": 0.1875, "learning_rate": 0.00013227773914771036, "loss": 0.483, "step": 168050 }, { "epoch": 8.347074600178802, "grad_norm": 0.173828125, "learning_rate": 0.0001322380053640608, "loss": 0.4836, "step": 168060 }, { "epoch": 8.347571272474422, "grad_norm": 0.169921875, "learning_rate": 0.00013219827158041125, "loss": 0.4806, "step": 168070 }, { "epoch": 8.34806794477004, "grad_norm": 0.197265625, "learning_rate": 0.00013215853779676172, "loss": 0.4986, "step": 168080 }, { "epoch": 8.34856461706566, "grad_norm": 0.1669921875, "learning_rate": 0.00013211880401311216, "loss": 0.49, "step": 168090 }, { "epoch": 8.349061289361279, "grad_norm": 0.1669921875, "learning_rate": 0.0001320790702294626, "loss": 0.4752, "step": 168100 }, { "epoch": 8.349557961656899, "grad_norm": 0.1865234375, "learning_rate": 0.00013203933644581305, "loss": 0.4681, "step": 168110 }, { "epoch": 8.350054633952517, "grad_norm": 0.1923828125, "learning_rate": 0.00013199960266216352, "loss": 0.4882, "step": 168120 }, { "epoch": 8.350551306248137, "grad_norm": 0.1845703125, "learning_rate": 0.00013195986887851397, "loss": 0.4639, "step": 168130 }, { "epoch": 8.351047978543757, "grad_norm": 0.171875, "learning_rate": 0.0001319201350948644, "loss": 0.5005, "step": 168140 }, { "epoch": 8.351544650839376, "grad_norm": 0.166015625, "learning_rate": 0.00013188040131121485, "loss": 0.4698, "step": 168150 }, { "epoch": 8.352041323134996, "grad_norm": 0.1982421875, "learning_rate": 0.00013184066752756533, "loss": 0.4761, "step": 168160 }, { "epoch": 8.352537995430614, "grad_norm": 0.1748046875, "learning_rate": 0.00013180093374391577, "loss": 0.4914, "step": 168170 }, { "epoch": 8.353034667726234, "grad_norm": 0.1689453125, "learning_rate": 0.00013176119996026621, "loss": 0.4655, "step": 168180 }, { "epoch": 8.353531340021853, "grad_norm": 0.1708984375, "learning_rate": 0.00013172146617661666, "loss": 0.4935, "step": 168190 }, { "epoch": 8.354028012317473, "grad_norm": 0.1533203125, "learning_rate": 0.00013168173239296713, "loss": 0.4744, "step": 168200 }, { "epoch": 8.354524684613093, "grad_norm": 0.1865234375, "learning_rate": 0.00013164199860931757, "loss": 0.4751, "step": 168210 }, { "epoch": 8.355021356908711, "grad_norm": 0.1650390625, "learning_rate": 0.00013160226482566802, "loss": 0.4717, "step": 168220 }, { "epoch": 8.355518029204331, "grad_norm": 0.1787109375, "learning_rate": 0.0001315625310420185, "loss": 0.4954, "step": 168230 }, { "epoch": 8.35601470149995, "grad_norm": 0.1845703125, "learning_rate": 0.00013152279725836893, "loss": 0.5185, "step": 168240 }, { "epoch": 8.35651137379557, "grad_norm": 0.1708984375, "learning_rate": 0.0001314830634747194, "loss": 0.4877, "step": 168250 }, { "epoch": 8.357008046091188, "grad_norm": 0.1611328125, "learning_rate": 0.00013144332969106982, "loss": 0.4824, "step": 168260 }, { "epoch": 8.357504718386808, "grad_norm": 0.17578125, "learning_rate": 0.0001314035959074203, "loss": 0.5182, "step": 168270 }, { "epoch": 8.358001390682428, "grad_norm": 0.1728515625, "learning_rate": 0.00013136386212377074, "loss": 0.4792, "step": 168280 }, { "epoch": 8.358498062978047, "grad_norm": 0.162109375, "learning_rate": 0.0001313241283401212, "loss": 0.479, "step": 168290 }, { "epoch": 8.358994735273667, "grad_norm": 0.1630859375, "learning_rate": 0.00013128439455647165, "loss": 0.513, "step": 168300 }, { "epoch": 8.359491407569285, "grad_norm": 0.189453125, "learning_rate": 0.0001312446607728221, "loss": 0.481, "step": 168310 }, { "epoch": 8.359988079864905, "grad_norm": 0.1806640625, "learning_rate": 0.00013120492698917254, "loss": 0.462, "step": 168320 }, { "epoch": 8.360484752160524, "grad_norm": 0.1650390625, "learning_rate": 0.00013116519320552301, "loss": 0.5018, "step": 168330 }, { "epoch": 8.360981424456144, "grad_norm": 0.1708984375, "learning_rate": 0.00013112545942187346, "loss": 0.4758, "step": 168340 }, { "epoch": 8.361478096751764, "grad_norm": 0.1806640625, "learning_rate": 0.0001310857256382239, "loss": 0.4626, "step": 168350 }, { "epoch": 8.361974769047382, "grad_norm": 0.16015625, "learning_rate": 0.00013104599185457435, "loss": 0.4703, "step": 168360 }, { "epoch": 8.362471441343002, "grad_norm": 0.1845703125, "learning_rate": 0.00013100625807092482, "loss": 0.478, "step": 168370 }, { "epoch": 8.36296811363862, "grad_norm": 0.154296875, "learning_rate": 0.00013096652428727526, "loss": 0.4709, "step": 168380 }, { "epoch": 8.36346478593424, "grad_norm": 0.1591796875, "learning_rate": 0.0001309267905036257, "loss": 0.4842, "step": 168390 }, { "epoch": 8.36396145822986, "grad_norm": 0.2021484375, "learning_rate": 0.00013088705671997618, "loss": 0.5029, "step": 168400 }, { "epoch": 8.36445813052548, "grad_norm": 0.2177734375, "learning_rate": 0.00013084732293632662, "loss": 0.4755, "step": 168410 }, { "epoch": 8.3649548028211, "grad_norm": 0.1708984375, "learning_rate": 0.00013080758915267707, "loss": 0.4617, "step": 168420 }, { "epoch": 8.365451475116718, "grad_norm": 0.19140625, "learning_rate": 0.0001307678553690275, "loss": 0.4698, "step": 168430 }, { "epoch": 8.365948147412338, "grad_norm": 0.185546875, "learning_rate": 0.00013072812158537798, "loss": 0.4857, "step": 168440 }, { "epoch": 8.366444819707956, "grad_norm": 0.1611328125, "learning_rate": 0.00013068838780172843, "loss": 0.4522, "step": 168450 }, { "epoch": 8.366941492003576, "grad_norm": 0.1669921875, "learning_rate": 0.0001306486540180789, "loss": 0.5127, "step": 168460 }, { "epoch": 8.367438164299195, "grad_norm": 0.171875, "learning_rate": 0.00013060892023442931, "loss": 0.4891, "step": 168470 }, { "epoch": 8.367934836594815, "grad_norm": 0.1708984375, "learning_rate": 0.00013056918645077979, "loss": 0.4853, "step": 168480 }, { "epoch": 8.368431508890435, "grad_norm": 0.1728515625, "learning_rate": 0.00013052945266713023, "loss": 0.4682, "step": 168490 }, { "epoch": 8.368928181186053, "grad_norm": 0.1650390625, "learning_rate": 0.0001304897188834807, "loss": 0.4787, "step": 168500 }, { "epoch": 8.369424853481673, "grad_norm": 0.162109375, "learning_rate": 0.00013044998509983112, "loss": 0.4798, "step": 168510 }, { "epoch": 8.369921525777292, "grad_norm": 0.193359375, "learning_rate": 0.0001304102513161816, "loss": 0.4655, "step": 168520 }, { "epoch": 8.370418198072912, "grad_norm": 0.185546875, "learning_rate": 0.00013037051753253203, "loss": 0.4892, "step": 168530 }, { "epoch": 8.37091487036853, "grad_norm": 0.1689453125, "learning_rate": 0.0001303307837488825, "loss": 0.5185, "step": 168540 }, { "epoch": 8.37141154266415, "grad_norm": 0.16796875, "learning_rate": 0.00013029104996523295, "loss": 0.4781, "step": 168550 }, { "epoch": 8.37190821495977, "grad_norm": 0.16796875, "learning_rate": 0.0001302513161815834, "loss": 0.4573, "step": 168560 }, { "epoch": 8.372404887255389, "grad_norm": 0.17578125, "learning_rate": 0.00013021158239793384, "loss": 0.4944, "step": 168570 }, { "epoch": 8.372901559551009, "grad_norm": 0.2109375, "learning_rate": 0.0001301718486142843, "loss": 0.4696, "step": 168580 }, { "epoch": 8.373398231846627, "grad_norm": 0.1845703125, "learning_rate": 0.00013013211483063475, "loss": 0.4993, "step": 168590 }, { "epoch": 8.373894904142247, "grad_norm": 0.166015625, "learning_rate": 0.0001300923810469852, "loss": 0.4979, "step": 168600 }, { "epoch": 8.374391576437866, "grad_norm": 0.171875, "learning_rate": 0.00013005264726333567, "loss": 0.4934, "step": 168610 }, { "epoch": 8.374888248733486, "grad_norm": 0.1669921875, "learning_rate": 0.00013001291347968611, "loss": 0.4886, "step": 168620 }, { "epoch": 8.375384921029106, "grad_norm": 0.177734375, "learning_rate": 0.00012997317969603656, "loss": 0.4948, "step": 168630 }, { "epoch": 8.375881593324724, "grad_norm": 0.1787109375, "learning_rate": 0.000129933445912387, "loss": 0.4783, "step": 168640 }, { "epoch": 8.376378265620344, "grad_norm": 0.1640625, "learning_rate": 0.00012989371212873747, "loss": 0.4894, "step": 168650 }, { "epoch": 8.376874937915963, "grad_norm": 0.181640625, "learning_rate": 0.00012985397834508792, "loss": 0.4993, "step": 168660 }, { "epoch": 8.377371610211583, "grad_norm": 0.18359375, "learning_rate": 0.00012981424456143836, "loss": 0.4976, "step": 168670 }, { "epoch": 8.377868282507201, "grad_norm": 0.16015625, "learning_rate": 0.0001297745107777888, "loss": 0.4744, "step": 168680 }, { "epoch": 8.378364954802821, "grad_norm": 0.1650390625, "learning_rate": 0.00012973477699413928, "loss": 0.4681, "step": 168690 }, { "epoch": 8.378861627098441, "grad_norm": 0.16796875, "learning_rate": 0.00012969504321048972, "loss": 0.5058, "step": 168700 }, { "epoch": 8.37935829939406, "grad_norm": 0.1650390625, "learning_rate": 0.0001296553094268402, "loss": 0.5096, "step": 168710 }, { "epoch": 8.37985497168968, "grad_norm": 0.1611328125, "learning_rate": 0.0001296155756431906, "loss": 0.4912, "step": 168720 }, { "epoch": 8.380351643985298, "grad_norm": 0.1748046875, "learning_rate": 0.00012957584185954108, "loss": 0.5029, "step": 168730 }, { "epoch": 8.380848316280918, "grad_norm": 0.1875, "learning_rate": 0.00012953610807589153, "loss": 0.5057, "step": 168740 }, { "epoch": 8.381344988576537, "grad_norm": 0.1845703125, "learning_rate": 0.000129496374292242, "loss": 0.4688, "step": 168750 }, { "epoch": 8.381841660872157, "grad_norm": 0.19140625, "learning_rate": 0.00012945664050859244, "loss": 0.4389, "step": 168760 }, { "epoch": 8.382338333167777, "grad_norm": 0.1611328125, "learning_rate": 0.00012941690672494289, "loss": 0.4644, "step": 168770 }, { "epoch": 8.382835005463395, "grad_norm": 0.173828125, "learning_rate": 0.00012937717294129336, "loss": 0.5107, "step": 168780 }, { "epoch": 8.383331677759015, "grad_norm": 0.2021484375, "learning_rate": 0.0001293374391576438, "loss": 0.4563, "step": 168790 }, { "epoch": 8.383828350054634, "grad_norm": 0.173828125, "learning_rate": 0.00012929770537399425, "loss": 0.5044, "step": 168800 }, { "epoch": 8.384325022350254, "grad_norm": 0.1923828125, "learning_rate": 0.0001292579715903447, "loss": 0.4981, "step": 168810 }, { "epoch": 8.384821694645872, "grad_norm": 0.1728515625, "learning_rate": 0.00012921823780669516, "loss": 0.4397, "step": 168820 }, { "epoch": 8.385318366941492, "grad_norm": 0.1689453125, "learning_rate": 0.0001291785040230456, "loss": 0.4958, "step": 168830 }, { "epoch": 8.385815039237112, "grad_norm": 0.171875, "learning_rate": 0.00012913877023939605, "loss": 0.4877, "step": 168840 }, { "epoch": 8.38631171153273, "grad_norm": 0.1640625, "learning_rate": 0.0001290990364557465, "loss": 0.5225, "step": 168850 }, { "epoch": 8.38680838382835, "grad_norm": 0.162109375, "learning_rate": 0.00012905930267209697, "loss": 0.4748, "step": 168860 }, { "epoch": 8.387305056123969, "grad_norm": 0.1689453125, "learning_rate": 0.0001290195688884474, "loss": 0.4867, "step": 168870 }, { "epoch": 8.38780172841959, "grad_norm": 0.166015625, "learning_rate": 0.00012897983510479785, "loss": 0.4634, "step": 168880 }, { "epoch": 8.388298400715207, "grad_norm": 0.1611328125, "learning_rate": 0.0001289401013211483, "loss": 0.4952, "step": 168890 }, { "epoch": 8.388795073010828, "grad_norm": 0.1708984375, "learning_rate": 0.00012890036753749877, "loss": 0.5218, "step": 168900 }, { "epoch": 8.389291745306446, "grad_norm": 0.1689453125, "learning_rate": 0.00012886063375384921, "loss": 0.4845, "step": 168910 }, { "epoch": 8.389788417602066, "grad_norm": 0.1796875, "learning_rate": 0.00012882089997019966, "loss": 0.488, "step": 168920 }, { "epoch": 8.390285089897686, "grad_norm": 0.169921875, "learning_rate": 0.00012878116618655013, "loss": 0.539, "step": 168930 }, { "epoch": 8.390781762193305, "grad_norm": 0.1806640625, "learning_rate": 0.00012874143240290057, "loss": 0.4901, "step": 168940 }, { "epoch": 8.391278434488925, "grad_norm": 0.166015625, "learning_rate": 0.00012870169861925105, "loss": 0.4447, "step": 168950 }, { "epoch": 8.391775106784543, "grad_norm": 0.1611328125, "learning_rate": 0.00012866196483560146, "loss": 0.5132, "step": 168960 }, { "epoch": 8.392271779080163, "grad_norm": 0.1787109375, "learning_rate": 0.00012862223105195193, "loss": 0.4866, "step": 168970 }, { "epoch": 8.392768451375781, "grad_norm": 0.1875, "learning_rate": 0.00012858249726830238, "loss": 0.4917, "step": 168980 }, { "epoch": 8.393265123671402, "grad_norm": 0.158203125, "learning_rate": 0.00012854276348465285, "loss": 0.4911, "step": 168990 }, { "epoch": 8.393761795967022, "grad_norm": 0.16015625, "learning_rate": 0.00012850302970100327, "loss": 0.4666, "step": 169000 }, { "epoch": 8.39425846826264, "grad_norm": 0.1796875, "learning_rate": 0.00012846329591735374, "loss": 0.4556, "step": 169010 }, { "epoch": 8.39475514055826, "grad_norm": 0.18359375, "learning_rate": 0.00012842356213370418, "loss": 0.5018, "step": 169020 }, { "epoch": 8.395251812853878, "grad_norm": 0.1650390625, "learning_rate": 0.00012838382835005465, "loss": 0.4684, "step": 169030 }, { "epoch": 8.395748485149499, "grad_norm": 0.181640625, "learning_rate": 0.00012834409456640507, "loss": 0.4969, "step": 169040 }, { "epoch": 8.396245157445117, "grad_norm": 0.1611328125, "learning_rate": 0.00012830436078275554, "loss": 0.4415, "step": 169050 }, { "epoch": 8.396741829740737, "grad_norm": 0.1845703125, "learning_rate": 0.00012826462699910599, "loss": 0.486, "step": 169060 }, { "epoch": 8.397238502036357, "grad_norm": 0.1494140625, "learning_rate": 0.00012822489321545646, "loss": 0.472, "step": 169070 }, { "epoch": 8.397735174331975, "grad_norm": 0.1513671875, "learning_rate": 0.0001281851594318069, "loss": 0.5017, "step": 169080 }, { "epoch": 8.398231846627596, "grad_norm": 0.1552734375, "learning_rate": 0.00012814542564815735, "loss": 0.4763, "step": 169090 }, { "epoch": 8.398728518923214, "grad_norm": 0.1748046875, "learning_rate": 0.00012810569186450782, "loss": 0.498, "step": 169100 }, { "epoch": 8.399225191218834, "grad_norm": 0.189453125, "learning_rate": 0.00012806595808085826, "loss": 0.5043, "step": 169110 }, { "epoch": 8.399721863514452, "grad_norm": 0.1787109375, "learning_rate": 0.0001280262242972087, "loss": 0.4787, "step": 169120 }, { "epoch": 8.400218535810072, "grad_norm": 0.19140625, "learning_rate": 0.00012798649051355915, "loss": 0.4647, "step": 169130 }, { "epoch": 8.400715208105693, "grad_norm": 0.173828125, "learning_rate": 0.00012794675672990962, "loss": 0.4939, "step": 169140 }, { "epoch": 8.401211880401311, "grad_norm": 0.1689453125, "learning_rate": 0.00012790702294626007, "loss": 0.473, "step": 169150 }, { "epoch": 8.401708552696931, "grad_norm": 0.1865234375, "learning_rate": 0.00012786728916261054, "loss": 0.4581, "step": 169160 }, { "epoch": 8.40220522499255, "grad_norm": 0.1630859375, "learning_rate": 0.00012782755537896095, "loss": 0.4621, "step": 169170 }, { "epoch": 8.40270189728817, "grad_norm": 0.1884765625, "learning_rate": 0.00012778782159531143, "loss": 0.4803, "step": 169180 }, { "epoch": 8.403198569583788, "grad_norm": 0.166015625, "learning_rate": 0.00012774808781166187, "loss": 0.4912, "step": 169190 }, { "epoch": 8.403695241879408, "grad_norm": 0.166015625, "learning_rate": 0.00012770835402801234, "loss": 0.4673, "step": 169200 }, { "epoch": 8.404191914175028, "grad_norm": 0.158203125, "learning_rate": 0.00012766862024436276, "loss": 0.5012, "step": 169210 }, { "epoch": 8.404688586470646, "grad_norm": 0.1640625, "learning_rate": 0.00012762888646071323, "loss": 0.4878, "step": 169220 }, { "epoch": 8.405185258766267, "grad_norm": 0.205078125, "learning_rate": 0.00012758915267706367, "loss": 0.5079, "step": 169230 }, { "epoch": 8.405681931061885, "grad_norm": 0.17578125, "learning_rate": 0.00012754941889341415, "loss": 0.4728, "step": 169240 }, { "epoch": 8.406178603357505, "grad_norm": 0.185546875, "learning_rate": 0.0001275096851097646, "loss": 0.4736, "step": 169250 }, { "epoch": 8.406675275653123, "grad_norm": 0.181640625, "learning_rate": 0.00012746995132611503, "loss": 0.4654, "step": 169260 }, { "epoch": 8.407171947948743, "grad_norm": 0.2041015625, "learning_rate": 0.00012743021754246548, "loss": 0.4865, "step": 169270 }, { "epoch": 8.407668620244364, "grad_norm": 0.16796875, "learning_rate": 0.00012739048375881595, "loss": 0.4786, "step": 169280 }, { "epoch": 8.408165292539982, "grad_norm": 0.1787109375, "learning_rate": 0.0001273507499751664, "loss": 0.4763, "step": 169290 }, { "epoch": 8.408661964835602, "grad_norm": 0.19921875, "learning_rate": 0.00012731101619151684, "loss": 0.4851, "step": 169300 }, { "epoch": 8.40915863713122, "grad_norm": 0.16015625, "learning_rate": 0.0001272712824078673, "loss": 0.4838, "step": 169310 }, { "epoch": 8.40965530942684, "grad_norm": 0.173828125, "learning_rate": 0.00012723154862421775, "loss": 0.4796, "step": 169320 }, { "epoch": 8.410151981722459, "grad_norm": 0.1669921875, "learning_rate": 0.0001271918148405682, "loss": 0.5066, "step": 169330 }, { "epoch": 8.410648654018079, "grad_norm": 0.17578125, "learning_rate": 0.00012715208105691864, "loss": 0.5034, "step": 169340 }, { "epoch": 8.411145326313699, "grad_norm": 0.1884765625, "learning_rate": 0.0001271123472732691, "loss": 0.4853, "step": 169350 }, { "epoch": 8.411641998609317, "grad_norm": 0.171875, "learning_rate": 0.00012707261348961956, "loss": 0.4663, "step": 169360 }, { "epoch": 8.412138670904937, "grad_norm": 0.16796875, "learning_rate": 0.00012703287970597, "loss": 0.4583, "step": 169370 }, { "epoch": 8.412635343200556, "grad_norm": 0.1708984375, "learning_rate": 0.00012699314592232045, "loss": 0.5136, "step": 169380 }, { "epoch": 8.413132015496176, "grad_norm": 0.17578125, "learning_rate": 0.00012695341213867092, "loss": 0.5158, "step": 169390 }, { "epoch": 8.413628687791794, "grad_norm": 0.181640625, "learning_rate": 0.00012691367835502136, "loss": 0.4755, "step": 169400 }, { "epoch": 8.414125360087414, "grad_norm": 0.181640625, "learning_rate": 0.0001268739445713718, "loss": 0.4769, "step": 169410 }, { "epoch": 8.414622032383035, "grad_norm": 0.1748046875, "learning_rate": 0.00012683421078772225, "loss": 0.4683, "step": 169420 }, { "epoch": 8.415118704678653, "grad_norm": 0.16015625, "learning_rate": 0.00012679447700407272, "loss": 0.4729, "step": 169430 }, { "epoch": 8.415615376974273, "grad_norm": 0.1748046875, "learning_rate": 0.00012675474322042317, "loss": 0.4856, "step": 169440 }, { "epoch": 8.416112049269891, "grad_norm": 0.1962890625, "learning_rate": 0.0001267150094367736, "loss": 0.5044, "step": 169450 }, { "epoch": 8.416608721565511, "grad_norm": 0.1591796875, "learning_rate": 0.00012667527565312408, "loss": 0.4702, "step": 169460 }, { "epoch": 8.41710539386113, "grad_norm": 0.1787109375, "learning_rate": 0.00012663554186947453, "loss": 0.4685, "step": 169470 }, { "epoch": 8.41760206615675, "grad_norm": 0.185546875, "learning_rate": 0.000126595808085825, "loss": 0.4905, "step": 169480 }, { "epoch": 8.418098738452368, "grad_norm": 0.177734375, "learning_rate": 0.00012655607430217541, "loss": 0.4802, "step": 169490 }, { "epoch": 8.418595410747988, "grad_norm": 0.169921875, "learning_rate": 0.00012651634051852589, "loss": 0.4804, "step": 169500 }, { "epoch": 8.419092083043608, "grad_norm": 0.19921875, "learning_rate": 0.00012647660673487633, "loss": 0.4988, "step": 169510 }, { "epoch": 8.419588755339227, "grad_norm": 0.162109375, "learning_rate": 0.0001264368729512268, "loss": 0.5153, "step": 169520 }, { "epoch": 8.420085427634847, "grad_norm": 0.1826171875, "learning_rate": 0.00012639713916757722, "loss": 0.4659, "step": 169530 }, { "epoch": 8.420582099930465, "grad_norm": 0.181640625, "learning_rate": 0.0001263574053839277, "loss": 0.4871, "step": 169540 }, { "epoch": 8.421078772226085, "grad_norm": 0.193359375, "learning_rate": 0.00012631767160027813, "loss": 0.4974, "step": 169550 }, { "epoch": 8.421575444521704, "grad_norm": 0.1640625, "learning_rate": 0.0001262779378166286, "loss": 0.4921, "step": 169560 }, { "epoch": 8.422072116817324, "grad_norm": 0.162109375, "learning_rate": 0.00012623820403297905, "loss": 0.5091, "step": 169570 }, { "epoch": 8.422568789112944, "grad_norm": 0.1728515625, "learning_rate": 0.0001261984702493295, "loss": 0.472, "step": 169580 }, { "epoch": 8.423065461408562, "grad_norm": 0.2080078125, "learning_rate": 0.00012615873646567994, "loss": 0.4821, "step": 169590 }, { "epoch": 8.423562133704182, "grad_norm": 0.2041015625, "learning_rate": 0.0001261190026820304, "loss": 0.5078, "step": 169600 }, { "epoch": 8.4240588059998, "grad_norm": 0.1767578125, "learning_rate": 0.00012607926889838085, "loss": 0.4889, "step": 169610 }, { "epoch": 8.42455547829542, "grad_norm": 0.1591796875, "learning_rate": 0.0001260395351147313, "loss": 0.4557, "step": 169620 }, { "epoch": 8.42505215059104, "grad_norm": 0.1728515625, "learning_rate": 0.00012599980133108177, "loss": 0.4891, "step": 169630 }, { "epoch": 8.42554882288666, "grad_norm": 0.1904296875, "learning_rate": 0.0001259600675474322, "loss": 0.4704, "step": 169640 }, { "epoch": 8.42604549518228, "grad_norm": 0.1748046875, "learning_rate": 0.00012592033376378268, "loss": 0.4891, "step": 169650 }, { "epoch": 8.426542167477898, "grad_norm": 0.169921875, "learning_rate": 0.0001258805999801331, "loss": 0.5038, "step": 169660 }, { "epoch": 8.427038839773518, "grad_norm": 0.181640625, "learning_rate": 0.00012584086619648357, "loss": 0.5051, "step": 169670 }, { "epoch": 8.427535512069136, "grad_norm": 0.17578125, "learning_rate": 0.00012580113241283402, "loss": 0.4965, "step": 169680 }, { "epoch": 8.428032184364756, "grad_norm": 0.248046875, "learning_rate": 0.0001257613986291845, "loss": 0.4932, "step": 169690 }, { "epoch": 8.428528856660375, "grad_norm": 0.1875, "learning_rate": 0.0001257216648455349, "loss": 0.5121, "step": 169700 }, { "epoch": 8.429025528955995, "grad_norm": 0.169921875, "learning_rate": 0.00012568193106188538, "loss": 0.4726, "step": 169710 }, { "epoch": 8.429522201251615, "grad_norm": 0.18359375, "learning_rate": 0.00012564219727823582, "loss": 0.4629, "step": 169720 }, { "epoch": 8.430018873547233, "grad_norm": 0.1826171875, "learning_rate": 0.0001256024634945863, "loss": 0.4679, "step": 169730 }, { "epoch": 8.430515545842853, "grad_norm": 0.2001953125, "learning_rate": 0.0001255627297109367, "loss": 0.4935, "step": 169740 }, { "epoch": 8.431012218138472, "grad_norm": 0.1796875, "learning_rate": 0.00012552299592728718, "loss": 0.4959, "step": 169750 }, { "epoch": 8.431508890434092, "grad_norm": 0.1865234375, "learning_rate": 0.00012548326214363763, "loss": 0.4851, "step": 169760 }, { "epoch": 8.43200556272971, "grad_norm": 0.1953125, "learning_rate": 0.0001254435283599881, "loss": 0.4833, "step": 169770 }, { "epoch": 8.43250223502533, "grad_norm": 0.1552734375, "learning_rate": 0.00012540379457633854, "loss": 0.4903, "step": 169780 }, { "epoch": 8.43299890732095, "grad_norm": 0.1787109375, "learning_rate": 0.00012536406079268899, "loss": 0.4767, "step": 169790 }, { "epoch": 8.433495579616569, "grad_norm": 0.1728515625, "learning_rate": 0.00012532432700903946, "loss": 0.4849, "step": 169800 }, { "epoch": 8.433992251912189, "grad_norm": 0.1826171875, "learning_rate": 0.0001252845932253899, "loss": 0.5229, "step": 169810 }, { "epoch": 8.434488924207807, "grad_norm": 0.1728515625, "learning_rate": 0.00012524485944174035, "loss": 0.4598, "step": 169820 }, { "epoch": 8.434985596503427, "grad_norm": 0.1875, "learning_rate": 0.0001252051256580908, "loss": 0.5054, "step": 169830 }, { "epoch": 8.435482268799046, "grad_norm": 0.158203125, "learning_rate": 0.00012516539187444126, "loss": 0.459, "step": 169840 }, { "epoch": 8.435978941094666, "grad_norm": 0.1513671875, "learning_rate": 0.0001251256580907917, "loss": 0.491, "step": 169850 }, { "epoch": 8.436475613390286, "grad_norm": 0.1806640625, "learning_rate": 0.00012508592430714215, "loss": 0.4843, "step": 169860 }, { "epoch": 8.436972285685904, "grad_norm": 0.189453125, "learning_rate": 0.0001250461905234926, "loss": 0.5034, "step": 169870 }, { "epoch": 8.437468957981524, "grad_norm": 0.1845703125, "learning_rate": 0.00012500645673984307, "loss": 0.4848, "step": 169880 }, { "epoch": 8.437965630277143, "grad_norm": 0.25, "learning_rate": 0.0001249667229561935, "loss": 0.5074, "step": 169890 }, { "epoch": 8.438462302572763, "grad_norm": 0.1884765625, "learning_rate": 0.00012492698917254395, "loss": 0.5188, "step": 169900 }, { "epoch": 8.438958974868381, "grad_norm": 0.203125, "learning_rate": 0.0001248872553888944, "loss": 0.5135, "step": 169910 }, { "epoch": 8.439455647164001, "grad_norm": 0.15625, "learning_rate": 0.00012484752160524487, "loss": 0.457, "step": 169920 }, { "epoch": 8.439952319459621, "grad_norm": 0.208984375, "learning_rate": 0.00012480778782159531, "loss": 0.5279, "step": 169930 }, { "epoch": 8.44044899175524, "grad_norm": 0.1923828125, "learning_rate": 0.00012476805403794576, "loss": 0.4814, "step": 169940 }, { "epoch": 8.44094566405086, "grad_norm": 0.1572265625, "learning_rate": 0.00012472832025429623, "loss": 0.485, "step": 169950 }, { "epoch": 8.441442336346478, "grad_norm": 0.1708984375, "learning_rate": 0.00012468858647064667, "loss": 0.5091, "step": 169960 }, { "epoch": 8.441939008642098, "grad_norm": 0.19140625, "learning_rate": 0.00012464885268699712, "loss": 0.4557, "step": 169970 }, { "epoch": 8.442435680937717, "grad_norm": 0.173828125, "learning_rate": 0.0001246091189033476, "loss": 0.4626, "step": 169980 }, { "epoch": 8.442932353233337, "grad_norm": 0.1689453125, "learning_rate": 0.00012456938511969803, "loss": 0.4495, "step": 169990 }, { "epoch": 8.443429025528957, "grad_norm": 0.1630859375, "learning_rate": 0.00012452965133604848, "loss": 0.4806, "step": 170000 }, { "epoch": 8.443925697824575, "grad_norm": 0.189453125, "learning_rate": 0.00012448991755239895, "loss": 0.4817, "step": 170010 }, { "epoch": 8.444422370120195, "grad_norm": 0.1572265625, "learning_rate": 0.0001244501837687494, "loss": 0.4995, "step": 170020 }, { "epoch": 8.444919042415814, "grad_norm": 0.1728515625, "learning_rate": 0.00012441044998509984, "loss": 0.4927, "step": 170030 }, { "epoch": 8.445415714711434, "grad_norm": 0.1650390625, "learning_rate": 0.00012437071620145028, "loss": 0.5012, "step": 170040 }, { "epoch": 8.445912387007052, "grad_norm": 0.1650390625, "learning_rate": 0.00012433098241780075, "loss": 0.493, "step": 170050 }, { "epoch": 8.446409059302672, "grad_norm": 0.166015625, "learning_rate": 0.0001242912486341512, "loss": 0.525, "step": 170060 }, { "epoch": 8.446905731598292, "grad_norm": 0.1748046875, "learning_rate": 0.00012425151485050164, "loss": 0.4894, "step": 170070 }, { "epoch": 8.44740240389391, "grad_norm": 0.1708984375, "learning_rate": 0.00012421178106685209, "loss": 0.4814, "step": 170080 }, { "epoch": 8.44789907618953, "grad_norm": 0.173828125, "learning_rate": 0.00012417204728320256, "loss": 0.4779, "step": 170090 }, { "epoch": 8.448395748485149, "grad_norm": 0.1650390625, "learning_rate": 0.000124132313499553, "loss": 0.4791, "step": 170100 }, { "epoch": 8.44889242078077, "grad_norm": 0.1767578125, "learning_rate": 0.00012409257971590345, "loss": 0.4647, "step": 170110 }, { "epoch": 8.449389093076388, "grad_norm": 0.1875, "learning_rate": 0.0001240528459322539, "loss": 0.5275, "step": 170120 }, { "epoch": 8.449885765372008, "grad_norm": 0.16015625, "learning_rate": 0.00012401311214860436, "loss": 0.4688, "step": 170130 }, { "epoch": 8.450382437667628, "grad_norm": 0.17578125, "learning_rate": 0.0001239733783649548, "loss": 0.458, "step": 170140 }, { "epoch": 8.450879109963246, "grad_norm": 0.224609375, "learning_rate": 0.00012393364458130525, "loss": 0.4909, "step": 170150 }, { "epoch": 8.451375782258866, "grad_norm": 0.1591796875, "learning_rate": 0.00012389391079765572, "loss": 0.5083, "step": 170160 }, { "epoch": 8.451872454554485, "grad_norm": 0.169921875, "learning_rate": 0.00012385417701400617, "loss": 0.5134, "step": 170170 }, { "epoch": 8.452369126850105, "grad_norm": 0.181640625, "learning_rate": 0.00012381444323035664, "loss": 0.4914, "step": 170180 }, { "epoch": 8.452865799145723, "grad_norm": 0.1640625, "learning_rate": 0.00012377470944670705, "loss": 0.4646, "step": 170190 }, { "epoch": 8.453362471441343, "grad_norm": 0.1630859375, "learning_rate": 0.00012373497566305753, "loss": 0.4769, "step": 170200 }, { "epoch": 8.453859143736963, "grad_norm": 0.1669921875, "learning_rate": 0.00012369524187940797, "loss": 0.4946, "step": 170210 }, { "epoch": 8.454355816032582, "grad_norm": 0.185546875, "learning_rate": 0.00012365550809575844, "loss": 0.503, "step": 170220 }, { "epoch": 8.454852488328202, "grad_norm": 0.181640625, "learning_rate": 0.00012361577431210886, "loss": 0.5124, "step": 170230 }, { "epoch": 8.45534916062382, "grad_norm": 0.1630859375, "learning_rate": 0.00012357604052845933, "loss": 0.4763, "step": 170240 }, { "epoch": 8.45584583291944, "grad_norm": 0.166015625, "learning_rate": 0.00012353630674480977, "loss": 0.4672, "step": 170250 }, { "epoch": 8.456342505215058, "grad_norm": 0.162109375, "learning_rate": 0.00012349657296116024, "loss": 0.5051, "step": 170260 }, { "epoch": 8.456839177510679, "grad_norm": 0.1748046875, "learning_rate": 0.00012345683917751066, "loss": 0.4574, "step": 170270 }, { "epoch": 8.457335849806299, "grad_norm": 0.16015625, "learning_rate": 0.00012341710539386113, "loss": 0.4711, "step": 170280 }, { "epoch": 8.457832522101917, "grad_norm": 0.1982421875, "learning_rate": 0.00012337737161021158, "loss": 0.4805, "step": 170290 }, { "epoch": 8.458329194397537, "grad_norm": 0.1630859375, "learning_rate": 0.00012333763782656205, "loss": 0.4692, "step": 170300 }, { "epoch": 8.458825866693156, "grad_norm": 0.1708984375, "learning_rate": 0.0001232979040429125, "loss": 0.4767, "step": 170310 }, { "epoch": 8.459322538988776, "grad_norm": 0.1689453125, "learning_rate": 0.00012325817025926294, "loss": 0.4833, "step": 170320 }, { "epoch": 8.459819211284394, "grad_norm": 0.2099609375, "learning_rate": 0.0001232184364756134, "loss": 0.488, "step": 170330 }, { "epoch": 8.460315883580014, "grad_norm": 0.1796875, "learning_rate": 0.00012317870269196385, "loss": 0.529, "step": 170340 }, { "epoch": 8.460812555875632, "grad_norm": 0.1806640625, "learning_rate": 0.0001231389689083143, "loss": 0.4887, "step": 170350 }, { "epoch": 8.461309228171253, "grad_norm": 0.1767578125, "learning_rate": 0.00012309923512466474, "loss": 0.5137, "step": 170360 }, { "epoch": 8.461805900466873, "grad_norm": 0.197265625, "learning_rate": 0.0001230595013410152, "loss": 0.499, "step": 170370 }, { "epoch": 8.462302572762491, "grad_norm": 0.150390625, "learning_rate": 0.00012301976755736566, "loss": 0.5015, "step": 170380 }, { "epoch": 8.462799245058111, "grad_norm": 0.162109375, "learning_rate": 0.00012298003377371613, "loss": 0.4844, "step": 170390 }, { "epoch": 8.46329591735373, "grad_norm": 0.1796875, "learning_rate": 0.00012294029999006655, "loss": 0.4932, "step": 170400 }, { "epoch": 8.46379258964935, "grad_norm": 0.1669921875, "learning_rate": 0.00012290056620641702, "loss": 0.4661, "step": 170410 }, { "epoch": 8.464289261944968, "grad_norm": 0.185546875, "learning_rate": 0.00012286083242276746, "loss": 0.504, "step": 170420 }, { "epoch": 8.464785934240588, "grad_norm": 0.1630859375, "learning_rate": 0.00012282109863911793, "loss": 0.4773, "step": 170430 }, { "epoch": 8.465282606536208, "grad_norm": 0.16015625, "learning_rate": 0.00012278136485546835, "loss": 0.4944, "step": 170440 }, { "epoch": 8.465779278831826, "grad_norm": 0.1904296875, "learning_rate": 0.00012274163107181882, "loss": 0.4932, "step": 170450 }, { "epoch": 8.466275951127447, "grad_norm": 0.1689453125, "learning_rate": 0.00012270189728816927, "loss": 0.4765, "step": 170460 }, { "epoch": 8.466772623423065, "grad_norm": 0.1650390625, "learning_rate": 0.00012266216350451974, "loss": 0.487, "step": 170470 }, { "epoch": 8.467269295718685, "grad_norm": 0.2119140625, "learning_rate": 0.00012262242972087018, "loss": 0.4692, "step": 170480 }, { "epoch": 8.467765968014303, "grad_norm": 0.171875, "learning_rate": 0.00012258269593722063, "loss": 0.4835, "step": 170490 }, { "epoch": 8.468262640309923, "grad_norm": 0.1591796875, "learning_rate": 0.0001225429621535711, "loss": 0.4907, "step": 170500 }, { "epoch": 8.468759312605544, "grad_norm": 0.17578125, "learning_rate": 0.00012250322836992154, "loss": 0.4705, "step": 170510 }, { "epoch": 8.469255984901162, "grad_norm": 0.1982421875, "learning_rate": 0.00012246349458627199, "loss": 0.4721, "step": 170520 }, { "epoch": 8.469752657196782, "grad_norm": 0.16796875, "learning_rate": 0.00012242376080262243, "loss": 0.4743, "step": 170530 }, { "epoch": 8.4702493294924, "grad_norm": 0.1826171875, "learning_rate": 0.0001223840270189729, "loss": 0.4876, "step": 170540 }, { "epoch": 8.47074600178802, "grad_norm": 0.1640625, "learning_rate": 0.00012234429323532335, "loss": 0.5025, "step": 170550 }, { "epoch": 8.471242674083639, "grad_norm": 0.1923828125, "learning_rate": 0.0001223045594516738, "loss": 0.4845, "step": 170560 }, { "epoch": 8.471739346379259, "grad_norm": 0.1640625, "learning_rate": 0.00012226482566802423, "loss": 0.5168, "step": 170570 }, { "epoch": 8.472236018674879, "grad_norm": 0.1767578125, "learning_rate": 0.0001222250918843747, "loss": 0.498, "step": 170580 }, { "epoch": 8.472732690970497, "grad_norm": 0.1650390625, "learning_rate": 0.00012218535810072515, "loss": 0.4583, "step": 170590 }, { "epoch": 8.473229363266118, "grad_norm": 0.1572265625, "learning_rate": 0.0001221456243170756, "loss": 0.4809, "step": 170600 }, { "epoch": 8.473726035561736, "grad_norm": 0.1845703125, "learning_rate": 0.00012210589053342604, "loss": 0.5027, "step": 170610 }, { "epoch": 8.474222707857356, "grad_norm": 0.16015625, "learning_rate": 0.00012206615674977651, "loss": 0.459, "step": 170620 }, { "epoch": 8.474719380152974, "grad_norm": 0.1669921875, "learning_rate": 0.00012202642296612697, "loss": 0.4806, "step": 170630 }, { "epoch": 8.475216052448594, "grad_norm": 0.1923828125, "learning_rate": 0.0001219866891824774, "loss": 0.4887, "step": 170640 }, { "epoch": 8.475712724744215, "grad_norm": 0.1865234375, "learning_rate": 0.00012194695539882786, "loss": 0.4781, "step": 170650 }, { "epoch": 8.476209397039833, "grad_norm": 0.177734375, "learning_rate": 0.00012190722161517831, "loss": 0.4878, "step": 170660 }, { "epoch": 8.476706069335453, "grad_norm": 0.19921875, "learning_rate": 0.00012186748783152877, "loss": 0.5021, "step": 170670 }, { "epoch": 8.477202741631071, "grad_norm": 0.17578125, "learning_rate": 0.0001218277540478792, "loss": 0.4751, "step": 170680 }, { "epoch": 8.477699413926691, "grad_norm": 0.1728515625, "learning_rate": 0.00012178802026422966, "loss": 0.4795, "step": 170690 }, { "epoch": 8.47819608622231, "grad_norm": 0.197265625, "learning_rate": 0.00012174828648058012, "loss": 0.5012, "step": 170700 }, { "epoch": 8.47869275851793, "grad_norm": 0.1953125, "learning_rate": 0.00012170855269693058, "loss": 0.5053, "step": 170710 }, { "epoch": 8.47918943081355, "grad_norm": 0.1640625, "learning_rate": 0.00012166881891328102, "loss": 0.4818, "step": 170720 }, { "epoch": 8.479686103109168, "grad_norm": 0.173828125, "learning_rate": 0.00012162908512963148, "loss": 0.4805, "step": 170730 }, { "epoch": 8.480182775404788, "grad_norm": 0.171875, "learning_rate": 0.00012158935134598192, "loss": 0.5127, "step": 170740 }, { "epoch": 8.480679447700407, "grad_norm": 0.1630859375, "learning_rate": 0.00012154961756233238, "loss": 0.5017, "step": 170750 }, { "epoch": 8.481176119996027, "grad_norm": 0.1787109375, "learning_rate": 0.00012150988377868282, "loss": 0.4945, "step": 170760 }, { "epoch": 8.481672792291645, "grad_norm": 0.185546875, "learning_rate": 0.00012147014999503328, "loss": 0.4775, "step": 170770 }, { "epoch": 8.482169464587265, "grad_norm": 0.197265625, "learning_rate": 0.00012143041621138374, "loss": 0.4812, "step": 170780 }, { "epoch": 8.482666136882886, "grad_norm": 0.1552734375, "learning_rate": 0.0001213906824277342, "loss": 0.4761, "step": 170790 }, { "epoch": 8.483162809178504, "grad_norm": 0.173828125, "learning_rate": 0.00012135094864408463, "loss": 0.4856, "step": 170800 }, { "epoch": 8.483659481474124, "grad_norm": 0.1611328125, "learning_rate": 0.00012131121486043509, "loss": 0.4723, "step": 170810 }, { "epoch": 8.484156153769742, "grad_norm": 0.189453125, "learning_rate": 0.00012127148107678554, "loss": 0.5, "step": 170820 }, { "epoch": 8.484652826065362, "grad_norm": 0.1796875, "learning_rate": 0.000121231747293136, "loss": 0.4597, "step": 170830 }, { "epoch": 8.48514949836098, "grad_norm": 0.1572265625, "learning_rate": 0.00012119201350948646, "loss": 0.4632, "step": 170840 }, { "epoch": 8.4856461706566, "grad_norm": 0.1591796875, "learning_rate": 0.00012115227972583689, "loss": 0.4383, "step": 170850 }, { "epoch": 8.48614284295222, "grad_norm": 0.19140625, "learning_rate": 0.00012111254594218735, "loss": 0.4637, "step": 170860 }, { "epoch": 8.48663951524784, "grad_norm": 0.150390625, "learning_rate": 0.0001210728121585378, "loss": 0.485, "step": 170870 }, { "epoch": 8.48713618754346, "grad_norm": 0.1884765625, "learning_rate": 0.00012103307837488826, "loss": 0.4719, "step": 170880 }, { "epoch": 8.487632859839078, "grad_norm": 0.1630859375, "learning_rate": 0.0001209933445912387, "loss": 0.4889, "step": 170890 }, { "epoch": 8.488129532134698, "grad_norm": 0.1767578125, "learning_rate": 0.00012095361080758915, "loss": 0.5083, "step": 170900 }, { "epoch": 8.488626204430316, "grad_norm": 0.1640625, "learning_rate": 0.00012091387702393961, "loss": 0.4648, "step": 170910 }, { "epoch": 8.489122876725936, "grad_norm": 0.1884765625, "learning_rate": 0.00012087414324029007, "loss": 0.462, "step": 170920 }, { "epoch": 8.489619549021555, "grad_norm": 0.17578125, "learning_rate": 0.00012083440945664051, "loss": 0.4874, "step": 170930 }, { "epoch": 8.490116221317175, "grad_norm": 0.2099609375, "learning_rate": 0.00012079467567299097, "loss": 0.4802, "step": 170940 }, { "epoch": 8.490612893612795, "grad_norm": 0.17578125, "learning_rate": 0.00012075494188934143, "loss": 0.5076, "step": 170950 }, { "epoch": 8.491109565908413, "grad_norm": 0.15234375, "learning_rate": 0.00012071520810569188, "loss": 0.4954, "step": 170960 }, { "epoch": 8.491606238204033, "grad_norm": 0.1845703125, "learning_rate": 0.00012067547432204232, "loss": 0.497, "step": 170970 }, { "epoch": 8.492102910499652, "grad_norm": 0.169921875, "learning_rate": 0.00012063574053839277, "loss": 0.5424, "step": 170980 }, { "epoch": 8.492599582795272, "grad_norm": 0.1611328125, "learning_rate": 0.00012059600675474323, "loss": 0.5002, "step": 170990 }, { "epoch": 8.49309625509089, "grad_norm": 0.1767578125, "learning_rate": 0.00012055627297109369, "loss": 0.4813, "step": 171000 }, { "epoch": 8.49359292738651, "grad_norm": 0.16796875, "learning_rate": 0.00012051653918744412, "loss": 0.5072, "step": 171010 }, { "epoch": 8.49408959968213, "grad_norm": 0.185546875, "learning_rate": 0.00012047680540379458, "loss": 0.5017, "step": 171020 }, { "epoch": 8.494586271977749, "grad_norm": 0.169921875, "learning_rate": 0.00012043707162014503, "loss": 0.4959, "step": 171030 }, { "epoch": 8.495082944273369, "grad_norm": 0.18359375, "learning_rate": 0.00012039733783649549, "loss": 0.4379, "step": 171040 }, { "epoch": 8.495579616568987, "grad_norm": 0.158203125, "learning_rate": 0.00012035760405284592, "loss": 0.4611, "step": 171050 }, { "epoch": 8.496076288864607, "grad_norm": 0.1884765625, "learning_rate": 0.00012031787026919638, "loss": 0.5006, "step": 171060 }, { "epoch": 8.496572961160226, "grad_norm": 0.1708984375, "learning_rate": 0.00012027813648554684, "loss": 0.4515, "step": 171070 }, { "epoch": 8.497069633455846, "grad_norm": 0.197265625, "learning_rate": 0.0001202384027018973, "loss": 0.487, "step": 171080 }, { "epoch": 8.497566305751466, "grad_norm": 0.2041015625, "learning_rate": 0.00012019866891824774, "loss": 0.484, "step": 171090 }, { "epoch": 8.498062978047084, "grad_norm": 0.1962890625, "learning_rate": 0.0001201589351345982, "loss": 0.4772, "step": 171100 }, { "epoch": 8.498559650342704, "grad_norm": 0.16796875, "learning_rate": 0.00012011920135094866, "loss": 0.4699, "step": 171110 }, { "epoch": 8.499056322638323, "grad_norm": 0.1689453125, "learning_rate": 0.00012007946756729911, "loss": 0.4888, "step": 171120 }, { "epoch": 8.499552994933943, "grad_norm": 0.1787109375, "learning_rate": 0.00012003973378364955, "loss": 0.461, "step": 171130 }, { "epoch": 8.500049667229561, "grad_norm": 0.1787109375, "learning_rate": 0.00012, "loss": 0.5063, "step": 171140 }, { "epoch": 8.500546339525181, "grad_norm": 0.1845703125, "learning_rate": 0.00011996026621635046, "loss": 0.4832, "step": 171150 }, { "epoch": 8.501043011820801, "grad_norm": 0.1689453125, "learning_rate": 0.00011992053243270092, "loss": 0.4892, "step": 171160 }, { "epoch": 8.50153968411642, "grad_norm": 0.1767578125, "learning_rate": 0.00011988079864905135, "loss": 0.4947, "step": 171170 }, { "epoch": 8.50203635641204, "grad_norm": 0.17578125, "learning_rate": 0.00011984106486540181, "loss": 0.4856, "step": 171180 }, { "epoch": 8.502533028707658, "grad_norm": 0.1845703125, "learning_rate": 0.00011980133108175226, "loss": 0.4755, "step": 171190 }, { "epoch": 8.503029701003278, "grad_norm": 0.171875, "learning_rate": 0.00011976159729810272, "loss": 0.5107, "step": 171200 }, { "epoch": 8.503526373298897, "grad_norm": 0.17578125, "learning_rate": 0.00011972186351445315, "loss": 0.5227, "step": 171210 }, { "epoch": 8.504023045594517, "grad_norm": 0.1552734375, "learning_rate": 0.00011968212973080361, "loss": 0.4864, "step": 171220 }, { "epoch": 8.504519717890137, "grad_norm": 0.1748046875, "learning_rate": 0.00011964239594715407, "loss": 0.4398, "step": 171230 }, { "epoch": 8.505016390185755, "grad_norm": 0.1875, "learning_rate": 0.00011960266216350453, "loss": 0.4832, "step": 171240 }, { "epoch": 8.505513062481375, "grad_norm": 0.1796875, "learning_rate": 0.00011956292837985498, "loss": 0.4942, "step": 171250 }, { "epoch": 8.506009734776994, "grad_norm": 0.1611328125, "learning_rate": 0.00011952319459620543, "loss": 0.4764, "step": 171260 }, { "epoch": 8.506506407072614, "grad_norm": 0.18359375, "learning_rate": 0.00011948346081255589, "loss": 0.4805, "step": 171270 }, { "epoch": 8.507003079368232, "grad_norm": 0.1943359375, "learning_rate": 0.00011944372702890634, "loss": 0.5, "step": 171280 }, { "epoch": 8.507499751663852, "grad_norm": 0.15234375, "learning_rate": 0.00011940399324525679, "loss": 0.4862, "step": 171290 }, { "epoch": 8.507996423959472, "grad_norm": 0.197265625, "learning_rate": 0.00011936425946160723, "loss": 0.4842, "step": 171300 }, { "epoch": 8.50849309625509, "grad_norm": 0.169921875, "learning_rate": 0.00011932452567795769, "loss": 0.4655, "step": 171310 }, { "epoch": 8.50898976855071, "grad_norm": 0.166015625, "learning_rate": 0.00011928479189430815, "loss": 0.4868, "step": 171320 }, { "epoch": 8.509486440846329, "grad_norm": 0.177734375, "learning_rate": 0.0001192450581106586, "loss": 0.4836, "step": 171330 }, { "epoch": 8.50998311314195, "grad_norm": 0.1728515625, "learning_rate": 0.00011920532432700904, "loss": 0.4904, "step": 171340 }, { "epoch": 8.510479785437568, "grad_norm": 0.1650390625, "learning_rate": 0.0001191655905433595, "loss": 0.4697, "step": 171350 }, { "epoch": 8.510976457733188, "grad_norm": 0.181640625, "learning_rate": 0.00011912585675970995, "loss": 0.5178, "step": 171360 }, { "epoch": 8.511473130028808, "grad_norm": 0.1748046875, "learning_rate": 0.00011908612297606041, "loss": 0.4777, "step": 171370 }, { "epoch": 8.511969802324426, "grad_norm": 0.197265625, "learning_rate": 0.00011904638919241084, "loss": 0.5055, "step": 171380 }, { "epoch": 8.512466474620046, "grad_norm": 0.1787109375, "learning_rate": 0.0001190066554087613, "loss": 0.4759, "step": 171390 }, { "epoch": 8.512963146915665, "grad_norm": 0.177734375, "learning_rate": 0.00011896692162511176, "loss": 0.4742, "step": 171400 }, { "epoch": 8.513459819211285, "grad_norm": 0.1767578125, "learning_rate": 0.00011892718784146221, "loss": 0.4928, "step": 171410 }, { "epoch": 8.513956491506903, "grad_norm": 0.1767578125, "learning_rate": 0.00011888745405781266, "loss": 0.4833, "step": 171420 }, { "epoch": 8.514453163802523, "grad_norm": 0.171875, "learning_rate": 0.00011884772027416312, "loss": 0.514, "step": 171430 }, { "epoch": 8.514949836098143, "grad_norm": 0.1767578125, "learning_rate": 0.00011880798649051356, "loss": 0.4703, "step": 171440 }, { "epoch": 8.515446508393762, "grad_norm": 0.1611328125, "learning_rate": 0.00011876825270686402, "loss": 0.4729, "step": 171450 }, { "epoch": 8.515943180689382, "grad_norm": 0.1689453125, "learning_rate": 0.00011872851892321446, "loss": 0.4881, "step": 171460 }, { "epoch": 8.516439852985, "grad_norm": 0.208984375, "learning_rate": 0.00011868878513956492, "loss": 0.4682, "step": 171470 }, { "epoch": 8.51693652528062, "grad_norm": 0.162109375, "learning_rate": 0.00011864905135591538, "loss": 0.4637, "step": 171480 }, { "epoch": 8.517433197576239, "grad_norm": 0.1728515625, "learning_rate": 0.00011860931757226584, "loss": 0.4956, "step": 171490 }, { "epoch": 8.517929869871859, "grad_norm": 0.1806640625, "learning_rate": 0.00011856958378861627, "loss": 0.4511, "step": 171500 }, { "epoch": 8.518426542167479, "grad_norm": 0.1962890625, "learning_rate": 0.00011852985000496672, "loss": 0.4889, "step": 171510 }, { "epoch": 8.518923214463097, "grad_norm": 0.166015625, "learning_rate": 0.00011849011622131718, "loss": 0.45, "step": 171520 }, { "epoch": 8.519419886758717, "grad_norm": 0.1650390625, "learning_rate": 0.00011845038243766764, "loss": 0.4617, "step": 171530 }, { "epoch": 8.519916559054336, "grad_norm": 0.181640625, "learning_rate": 0.00011841064865401807, "loss": 0.5195, "step": 171540 }, { "epoch": 8.520413231349956, "grad_norm": 0.17578125, "learning_rate": 0.00011837091487036853, "loss": 0.4709, "step": 171550 }, { "epoch": 8.520909903645574, "grad_norm": 0.1689453125, "learning_rate": 0.00011833118108671899, "loss": 0.4762, "step": 171560 }, { "epoch": 8.521406575941194, "grad_norm": 0.16796875, "learning_rate": 0.00011829144730306944, "loss": 0.4603, "step": 171570 }, { "epoch": 8.521903248236814, "grad_norm": 0.1650390625, "learning_rate": 0.00011825171351941989, "loss": 0.4745, "step": 171580 }, { "epoch": 8.522399920532433, "grad_norm": 0.19140625, "learning_rate": 0.00011821197973577033, "loss": 0.501, "step": 171590 }, { "epoch": 8.522896592828053, "grad_norm": 0.1796875, "learning_rate": 0.00011817224595212079, "loss": 0.4991, "step": 171600 }, { "epoch": 8.523393265123671, "grad_norm": 0.1748046875, "learning_rate": 0.00011813251216847125, "loss": 0.4872, "step": 171610 }, { "epoch": 8.523889937419291, "grad_norm": 0.166015625, "learning_rate": 0.00011809277838482169, "loss": 0.4998, "step": 171620 }, { "epoch": 8.52438660971491, "grad_norm": 0.1787109375, "learning_rate": 0.00011805304460117215, "loss": 0.4902, "step": 171630 }, { "epoch": 8.52488328201053, "grad_norm": 0.1630859375, "learning_rate": 0.00011801331081752261, "loss": 0.4906, "step": 171640 }, { "epoch": 8.52537995430615, "grad_norm": 0.169921875, "learning_rate": 0.00011797357703387307, "loss": 0.4801, "step": 171650 }, { "epoch": 8.525876626601768, "grad_norm": 0.1689453125, "learning_rate": 0.00011793384325022352, "loss": 0.4798, "step": 171660 }, { "epoch": 8.526373298897388, "grad_norm": 0.1884765625, "learning_rate": 0.00011789410946657395, "loss": 0.4599, "step": 171670 }, { "epoch": 8.526869971193006, "grad_norm": 0.197265625, "learning_rate": 0.00011785437568292441, "loss": 0.4866, "step": 171680 }, { "epoch": 8.527366643488627, "grad_norm": 0.177734375, "learning_rate": 0.00011781464189927487, "loss": 0.5076, "step": 171690 }, { "epoch": 8.527863315784245, "grad_norm": 0.1630859375, "learning_rate": 0.00011777490811562533, "loss": 0.4867, "step": 171700 }, { "epoch": 8.528359988079865, "grad_norm": 0.16796875, "learning_rate": 0.00011773517433197576, "loss": 0.4963, "step": 171710 }, { "epoch": 8.528856660375485, "grad_norm": 0.1845703125, "learning_rate": 0.00011769544054832622, "loss": 0.5126, "step": 171720 }, { "epoch": 8.529353332671104, "grad_norm": 0.1708984375, "learning_rate": 0.00011765570676467667, "loss": 0.4964, "step": 171730 }, { "epoch": 8.529850004966724, "grad_norm": 0.1611328125, "learning_rate": 0.00011761597298102713, "loss": 0.4977, "step": 171740 }, { "epoch": 8.530346677262342, "grad_norm": 0.1640625, "learning_rate": 0.00011757623919737756, "loss": 0.5222, "step": 171750 }, { "epoch": 8.530843349557962, "grad_norm": 0.1845703125, "learning_rate": 0.00011753650541372802, "loss": 0.4701, "step": 171760 }, { "epoch": 8.53134002185358, "grad_norm": 0.171875, "learning_rate": 0.00011749677163007848, "loss": 0.4865, "step": 171770 }, { "epoch": 8.5318366941492, "grad_norm": 0.193359375, "learning_rate": 0.00011745703784642894, "loss": 0.4622, "step": 171780 }, { "epoch": 8.532333366444819, "grad_norm": 0.1708984375, "learning_rate": 0.00011741730406277938, "loss": 0.4803, "step": 171790 }, { "epoch": 8.532830038740439, "grad_norm": 0.16796875, "learning_rate": 0.00011737757027912984, "loss": 0.4632, "step": 171800 }, { "epoch": 8.533326711036059, "grad_norm": 0.1787109375, "learning_rate": 0.0001173378364954803, "loss": 0.4871, "step": 171810 }, { "epoch": 8.533823383331677, "grad_norm": 0.185546875, "learning_rate": 0.00011729810271183075, "loss": 0.5087, "step": 171820 }, { "epoch": 8.534320055627298, "grad_norm": 0.1640625, "learning_rate": 0.00011725836892818118, "loss": 0.4876, "step": 171830 }, { "epoch": 8.534816727922916, "grad_norm": 0.2021484375, "learning_rate": 0.00011721863514453164, "loss": 0.4843, "step": 171840 }, { "epoch": 8.535313400218536, "grad_norm": 0.1669921875, "learning_rate": 0.0001171789013608821, "loss": 0.4838, "step": 171850 }, { "epoch": 8.535810072514154, "grad_norm": 0.1708984375, "learning_rate": 0.00011713916757723256, "loss": 0.463, "step": 171860 }, { "epoch": 8.536306744809774, "grad_norm": 0.1669921875, "learning_rate": 0.00011709943379358299, "loss": 0.4703, "step": 171870 }, { "epoch": 8.536803417105395, "grad_norm": 0.16796875, "learning_rate": 0.00011705970000993345, "loss": 0.4724, "step": 171880 }, { "epoch": 8.537300089401013, "grad_norm": 0.189453125, "learning_rate": 0.0001170199662262839, "loss": 0.5123, "step": 171890 }, { "epoch": 8.537796761696633, "grad_norm": 0.1884765625, "learning_rate": 0.00011698023244263436, "loss": 0.4887, "step": 171900 }, { "epoch": 8.538293433992251, "grad_norm": 0.17578125, "learning_rate": 0.00011694049865898479, "loss": 0.4584, "step": 171910 }, { "epoch": 8.538790106287871, "grad_norm": 0.1826171875, "learning_rate": 0.00011690076487533525, "loss": 0.4788, "step": 171920 }, { "epoch": 8.53928677858349, "grad_norm": 0.171875, "learning_rate": 0.00011686103109168571, "loss": 0.5075, "step": 171930 }, { "epoch": 8.53978345087911, "grad_norm": 0.16796875, "learning_rate": 0.00011682129730803617, "loss": 0.5154, "step": 171940 }, { "epoch": 8.54028012317473, "grad_norm": 0.1669921875, "learning_rate": 0.00011678156352438661, "loss": 0.4921, "step": 171950 }, { "epoch": 8.540776795470348, "grad_norm": 0.1787109375, "learning_rate": 0.00011674182974073707, "loss": 0.4844, "step": 171960 }, { "epoch": 8.541273467765969, "grad_norm": 0.1943359375, "learning_rate": 0.00011670209595708753, "loss": 0.4855, "step": 171970 }, { "epoch": 8.541770140061587, "grad_norm": 0.1796875, "learning_rate": 0.00011666236217343798, "loss": 0.4719, "step": 171980 }, { "epoch": 8.542266812357207, "grad_norm": 0.2373046875, "learning_rate": 0.00011662262838978841, "loss": 0.4725, "step": 171990 }, { "epoch": 8.542763484652825, "grad_norm": 0.177734375, "learning_rate": 0.00011658289460613887, "loss": 0.4709, "step": 172000 }, { "epoch": 8.543260156948445, "grad_norm": 0.1865234375, "learning_rate": 0.00011654316082248933, "loss": 0.4989, "step": 172010 }, { "epoch": 8.543756829244066, "grad_norm": 0.1796875, "learning_rate": 0.00011650342703883979, "loss": 0.4963, "step": 172020 }, { "epoch": 8.544253501539684, "grad_norm": 0.1748046875, "learning_rate": 0.00011646369325519022, "loss": 0.4863, "step": 172030 }, { "epoch": 8.544750173835304, "grad_norm": 0.1708984375, "learning_rate": 0.00011642395947154068, "loss": 0.4976, "step": 172040 }, { "epoch": 8.545246846130922, "grad_norm": 0.1884765625, "learning_rate": 0.00011638422568789113, "loss": 0.4842, "step": 172050 }, { "epoch": 8.545743518426542, "grad_norm": 0.1611328125, "learning_rate": 0.00011634449190424159, "loss": 0.4863, "step": 172060 }, { "epoch": 8.54624019072216, "grad_norm": 0.2041015625, "learning_rate": 0.00011630475812059205, "loss": 0.4896, "step": 172070 }, { "epoch": 8.546736863017781, "grad_norm": 0.1728515625, "learning_rate": 0.00011626502433694248, "loss": 0.495, "step": 172080 }, { "epoch": 8.547233535313401, "grad_norm": 0.18359375, "learning_rate": 0.00011622529055329294, "loss": 0.4976, "step": 172090 }, { "epoch": 8.54773020760902, "grad_norm": 0.1669921875, "learning_rate": 0.0001161855567696434, "loss": 0.5086, "step": 172100 }, { "epoch": 8.54822687990464, "grad_norm": 0.1962890625, "learning_rate": 0.00011614582298599385, "loss": 0.4871, "step": 172110 }, { "epoch": 8.548723552200258, "grad_norm": 0.171875, "learning_rate": 0.0001161060892023443, "loss": 0.4809, "step": 172120 }, { "epoch": 8.549220224495878, "grad_norm": 0.1806640625, "learning_rate": 0.00011606635541869476, "loss": 0.4747, "step": 172130 }, { "epoch": 8.549716896791496, "grad_norm": 0.1630859375, "learning_rate": 0.0001160266216350452, "loss": 0.5212, "step": 172140 }, { "epoch": 8.550213569087116, "grad_norm": 0.1865234375, "learning_rate": 0.00011598688785139566, "loss": 0.4953, "step": 172150 }, { "epoch": 8.550710241382736, "grad_norm": 0.1806640625, "learning_rate": 0.0001159471540677461, "loss": 0.4812, "step": 172160 }, { "epoch": 8.551206913678355, "grad_norm": 0.1591796875, "learning_rate": 0.00011590742028409656, "loss": 0.4604, "step": 172170 }, { "epoch": 8.551703585973975, "grad_norm": 0.177734375, "learning_rate": 0.00011586768650044702, "loss": 0.4723, "step": 172180 }, { "epoch": 8.552200258269593, "grad_norm": 0.2021484375, "learning_rate": 0.00011582795271679748, "loss": 0.4686, "step": 172190 }, { "epoch": 8.552696930565213, "grad_norm": 0.1689453125, "learning_rate": 0.0001157882189331479, "loss": 0.4577, "step": 172200 }, { "epoch": 8.553193602860832, "grad_norm": 0.197265625, "learning_rate": 0.00011574848514949836, "loss": 0.4925, "step": 172210 }, { "epoch": 8.553690275156452, "grad_norm": 0.1796875, "learning_rate": 0.00011570875136584882, "loss": 0.5066, "step": 172220 }, { "epoch": 8.55418694745207, "grad_norm": 0.1708984375, "learning_rate": 0.00011566901758219928, "loss": 0.4873, "step": 172230 }, { "epoch": 8.55468361974769, "grad_norm": 0.1669921875, "learning_rate": 0.00011562928379854971, "loss": 0.4874, "step": 172240 }, { "epoch": 8.55518029204331, "grad_norm": 0.1748046875, "learning_rate": 0.00011558955001490017, "loss": 0.4744, "step": 172250 }, { "epoch": 8.555676964338929, "grad_norm": 0.16796875, "learning_rate": 0.00011554981623125063, "loss": 0.4727, "step": 172260 }, { "epoch": 8.556173636634549, "grad_norm": 0.1591796875, "learning_rate": 0.00011551008244760108, "loss": 0.4891, "step": 172270 }, { "epoch": 8.556670308930167, "grad_norm": 0.1806640625, "learning_rate": 0.00011547034866395153, "loss": 0.4895, "step": 172280 }, { "epoch": 8.557166981225787, "grad_norm": 0.1796875, "learning_rate": 0.00011543061488030197, "loss": 0.4906, "step": 172290 }, { "epoch": 8.557663653521406, "grad_norm": 0.2041015625, "learning_rate": 0.00011539088109665243, "loss": 0.493, "step": 172300 }, { "epoch": 8.558160325817026, "grad_norm": 0.1806640625, "learning_rate": 0.00011535114731300289, "loss": 0.5026, "step": 172310 }, { "epoch": 8.558656998112646, "grad_norm": 0.1826171875, "learning_rate": 0.00011531141352935333, "loss": 0.4587, "step": 172320 }, { "epoch": 8.559153670408264, "grad_norm": 0.1572265625, "learning_rate": 0.00011527167974570379, "loss": 0.4745, "step": 172330 }, { "epoch": 8.559650342703884, "grad_norm": 0.19921875, "learning_rate": 0.00011523194596205425, "loss": 0.4961, "step": 172340 }, { "epoch": 8.560147014999503, "grad_norm": 0.1748046875, "learning_rate": 0.0001151922121784047, "loss": 0.5004, "step": 172350 }, { "epoch": 8.560643687295123, "grad_norm": 0.1728515625, "learning_rate": 0.00011515247839475514, "loss": 0.4838, "step": 172360 }, { "epoch": 8.561140359590741, "grad_norm": 0.19140625, "learning_rate": 0.0001151127446111056, "loss": 0.5345, "step": 172370 }, { "epoch": 8.561637031886361, "grad_norm": 0.16796875, "learning_rate": 0.00011507301082745605, "loss": 0.4883, "step": 172380 }, { "epoch": 8.562133704181981, "grad_norm": 0.19140625, "learning_rate": 0.00011503327704380651, "loss": 0.4802, "step": 172390 }, { "epoch": 8.5626303764776, "grad_norm": 0.1787109375, "learning_rate": 0.00011499354326015694, "loss": 0.477, "step": 172400 }, { "epoch": 8.56312704877322, "grad_norm": 0.18359375, "learning_rate": 0.0001149538094765074, "loss": 0.4884, "step": 172410 }, { "epoch": 8.563623721068838, "grad_norm": 0.177734375, "learning_rate": 0.00011491407569285786, "loss": 0.4693, "step": 172420 }, { "epoch": 8.564120393364458, "grad_norm": 0.1865234375, "learning_rate": 0.00011487434190920831, "loss": 0.4957, "step": 172430 }, { "epoch": 8.564617065660077, "grad_norm": 0.1669921875, "learning_rate": 0.00011483460812555874, "loss": 0.4737, "step": 172440 }, { "epoch": 8.565113737955697, "grad_norm": 0.1640625, "learning_rate": 0.0001147948743419092, "loss": 0.5143, "step": 172450 }, { "epoch": 8.565610410251317, "grad_norm": 0.1689453125, "learning_rate": 0.00011475514055825966, "loss": 0.4957, "step": 172460 }, { "epoch": 8.566107082546935, "grad_norm": 0.1708984375, "learning_rate": 0.00011471540677461012, "loss": 0.475, "step": 172470 }, { "epoch": 8.566603754842555, "grad_norm": 0.1591796875, "learning_rate": 0.00011467567299096056, "loss": 0.4944, "step": 172480 }, { "epoch": 8.567100427138174, "grad_norm": 0.1865234375, "learning_rate": 0.00011463593920731102, "loss": 0.4913, "step": 172490 }, { "epoch": 8.567597099433794, "grad_norm": 0.1650390625, "learning_rate": 0.00011459620542366148, "loss": 0.4978, "step": 172500 }, { "epoch": 8.568093771729412, "grad_norm": 0.17578125, "learning_rate": 0.00011455647164001194, "loss": 0.4725, "step": 172510 }, { "epoch": 8.568590444025032, "grad_norm": 0.19140625, "learning_rate": 0.0001145167378563624, "loss": 0.5416, "step": 172520 }, { "epoch": 8.569087116320652, "grad_norm": 0.1787109375, "learning_rate": 0.00011447700407271282, "loss": 0.4676, "step": 172530 }, { "epoch": 8.56958378861627, "grad_norm": 0.1640625, "learning_rate": 0.00011443727028906328, "loss": 0.4885, "step": 172540 }, { "epoch": 8.57008046091189, "grad_norm": 0.2158203125, "learning_rate": 0.00011439753650541374, "loss": 0.4661, "step": 172550 }, { "epoch": 8.57057713320751, "grad_norm": 0.169921875, "learning_rate": 0.0001143578027217642, "loss": 0.4737, "step": 172560 }, { "epoch": 8.57107380550313, "grad_norm": 0.1669921875, "learning_rate": 0.00011431806893811463, "loss": 0.481, "step": 172570 }, { "epoch": 8.571570477798748, "grad_norm": 0.1591796875, "learning_rate": 0.00011427833515446509, "loss": 0.4818, "step": 172580 }, { "epoch": 8.572067150094368, "grad_norm": 0.197265625, "learning_rate": 0.00011423860137081554, "loss": 0.4792, "step": 172590 }, { "epoch": 8.572563822389988, "grad_norm": 0.171875, "learning_rate": 0.000114198867587166, "loss": 0.5084, "step": 172600 }, { "epoch": 8.573060494685606, "grad_norm": 0.189453125, "learning_rate": 0.00011415913380351643, "loss": 0.5065, "step": 172610 }, { "epoch": 8.573557166981226, "grad_norm": 0.1640625, "learning_rate": 0.00011411940001986689, "loss": 0.5042, "step": 172620 }, { "epoch": 8.574053839276845, "grad_norm": 0.17578125, "learning_rate": 0.00011407966623621735, "loss": 0.5009, "step": 172630 }, { "epoch": 8.574550511572465, "grad_norm": 0.169921875, "learning_rate": 0.0001140399324525678, "loss": 0.4828, "step": 172640 }, { "epoch": 8.575047183868083, "grad_norm": 0.19140625, "learning_rate": 0.00011400019866891825, "loss": 0.4809, "step": 172650 }, { "epoch": 8.575543856163703, "grad_norm": 0.16796875, "learning_rate": 0.00011396046488526871, "loss": 0.4985, "step": 172660 }, { "epoch": 8.576040528459323, "grad_norm": 0.1611328125, "learning_rate": 0.00011392073110161917, "loss": 0.4727, "step": 172670 }, { "epoch": 8.576537200754942, "grad_norm": 0.181640625, "learning_rate": 0.00011388099731796962, "loss": 0.4763, "step": 172680 }, { "epoch": 8.577033873050562, "grad_norm": 0.1923828125, "learning_rate": 0.00011384126353432005, "loss": 0.4663, "step": 172690 }, { "epoch": 8.57753054534618, "grad_norm": 0.2001953125, "learning_rate": 0.00011380152975067051, "loss": 0.4856, "step": 172700 }, { "epoch": 8.5780272176418, "grad_norm": 0.1572265625, "learning_rate": 0.00011376179596702097, "loss": 0.4837, "step": 172710 }, { "epoch": 8.578523889937419, "grad_norm": 0.1767578125, "learning_rate": 0.00011372206218337143, "loss": 0.49, "step": 172720 }, { "epoch": 8.579020562233039, "grad_norm": 0.181640625, "learning_rate": 0.00011368232839972186, "loss": 0.4778, "step": 172730 }, { "epoch": 8.579517234528659, "grad_norm": 0.1943359375, "learning_rate": 0.00011364259461607232, "loss": 0.5232, "step": 172740 }, { "epoch": 8.580013906824277, "grad_norm": 0.1669921875, "learning_rate": 0.00011360286083242277, "loss": 0.5044, "step": 172750 }, { "epoch": 8.580510579119897, "grad_norm": 0.177734375, "learning_rate": 0.00011356312704877323, "loss": 0.5027, "step": 172760 }, { "epoch": 8.581007251415516, "grad_norm": 0.2080078125, "learning_rate": 0.00011352339326512366, "loss": 0.473, "step": 172770 }, { "epoch": 8.581503923711136, "grad_norm": 0.1630859375, "learning_rate": 0.00011348365948147412, "loss": 0.4845, "step": 172780 }, { "epoch": 8.582000596006754, "grad_norm": 0.1708984375, "learning_rate": 0.00011344392569782458, "loss": 0.4811, "step": 172790 }, { "epoch": 8.582497268302374, "grad_norm": 0.16015625, "learning_rate": 0.00011340419191417504, "loss": 0.4984, "step": 172800 }, { "epoch": 8.582993940597994, "grad_norm": 0.1826171875, "learning_rate": 0.00011336445813052548, "loss": 0.4727, "step": 172810 }, { "epoch": 8.583490612893613, "grad_norm": 0.1943359375, "learning_rate": 0.00011332472434687594, "loss": 0.5024, "step": 172820 }, { "epoch": 8.583987285189233, "grad_norm": 0.1669921875, "learning_rate": 0.0001132849905632264, "loss": 0.4802, "step": 172830 }, { "epoch": 8.584483957484851, "grad_norm": 0.171875, "learning_rate": 0.00011324525677957685, "loss": 0.4615, "step": 172840 }, { "epoch": 8.584980629780471, "grad_norm": 0.1904296875, "learning_rate": 0.00011320552299592728, "loss": 0.5001, "step": 172850 }, { "epoch": 8.58547730207609, "grad_norm": 0.1640625, "learning_rate": 0.00011316578921227774, "loss": 0.48, "step": 172860 }, { "epoch": 8.58597397437171, "grad_norm": 0.177734375, "learning_rate": 0.0001131260554286282, "loss": 0.4833, "step": 172870 }, { "epoch": 8.58647064666733, "grad_norm": 0.2177734375, "learning_rate": 0.00011308632164497866, "loss": 0.4868, "step": 172880 }, { "epoch": 8.586967318962948, "grad_norm": 0.1845703125, "learning_rate": 0.00011304658786132909, "loss": 0.4963, "step": 172890 }, { "epoch": 8.587463991258568, "grad_norm": 0.2080078125, "learning_rate": 0.00011300685407767955, "loss": 0.4841, "step": 172900 }, { "epoch": 8.587960663554187, "grad_norm": 0.1884765625, "learning_rate": 0.00011296712029403, "loss": 0.5264, "step": 172910 }, { "epoch": 8.588457335849807, "grad_norm": 0.16796875, "learning_rate": 0.00011292738651038046, "loss": 0.4982, "step": 172920 }, { "epoch": 8.588954008145425, "grad_norm": 0.1650390625, "learning_rate": 0.00011288765272673092, "loss": 0.4825, "step": 172930 }, { "epoch": 8.589450680441045, "grad_norm": 0.193359375, "learning_rate": 0.00011284791894308135, "loss": 0.5479, "step": 172940 }, { "epoch": 8.589947352736665, "grad_norm": 0.1796875, "learning_rate": 0.00011280818515943181, "loss": 0.4673, "step": 172950 }, { "epoch": 8.590444025032284, "grad_norm": 0.197265625, "learning_rate": 0.00011276845137578227, "loss": 0.4555, "step": 172960 }, { "epoch": 8.590940697327904, "grad_norm": 0.1884765625, "learning_rate": 0.00011272871759213272, "loss": 0.501, "step": 172970 }, { "epoch": 8.591437369623522, "grad_norm": 0.1669921875, "learning_rate": 0.00011268898380848317, "loss": 0.4642, "step": 172980 }, { "epoch": 8.591934041919142, "grad_norm": 0.17578125, "learning_rate": 0.00011264925002483361, "loss": 0.4943, "step": 172990 }, { "epoch": 8.59243071421476, "grad_norm": 0.2060546875, "learning_rate": 0.00011260951624118407, "loss": 0.4543, "step": 173000 }, { "epoch": 8.59292738651038, "grad_norm": 0.158203125, "learning_rate": 0.00011256978245753453, "loss": 0.4762, "step": 173010 }, { "epoch": 8.593424058806, "grad_norm": 0.181640625, "learning_rate": 0.00011253004867388497, "loss": 0.4986, "step": 173020 }, { "epoch": 8.593920731101619, "grad_norm": 0.2392578125, "learning_rate": 0.00011249031489023543, "loss": 0.5028, "step": 173030 }, { "epoch": 8.59441740339724, "grad_norm": 0.1806640625, "learning_rate": 0.00011245058110658589, "loss": 0.5066, "step": 173040 }, { "epoch": 8.594914075692857, "grad_norm": 0.1865234375, "learning_rate": 0.00011241084732293635, "loss": 0.4852, "step": 173050 }, { "epoch": 8.595410747988478, "grad_norm": 0.17578125, "learning_rate": 0.00011237111353928678, "loss": 0.4797, "step": 173060 }, { "epoch": 8.595907420284096, "grad_norm": 0.20703125, "learning_rate": 0.00011233137975563723, "loss": 0.4802, "step": 173070 }, { "epoch": 8.596404092579716, "grad_norm": 0.1669921875, "learning_rate": 0.00011229164597198769, "loss": 0.5015, "step": 173080 }, { "epoch": 8.596900764875336, "grad_norm": 0.234375, "learning_rate": 0.00011225191218833815, "loss": 0.4566, "step": 173090 }, { "epoch": 8.597397437170955, "grad_norm": 0.251953125, "learning_rate": 0.00011221217840468858, "loss": 0.4922, "step": 173100 }, { "epoch": 8.597894109466575, "grad_norm": 0.1796875, "learning_rate": 0.00011217244462103904, "loss": 0.5129, "step": 173110 }, { "epoch": 8.598390781762193, "grad_norm": 0.1669921875, "learning_rate": 0.0001121327108373895, "loss": 0.5091, "step": 173120 }, { "epoch": 8.598887454057813, "grad_norm": 0.1845703125, "learning_rate": 0.00011209297705373995, "loss": 0.5011, "step": 173130 }, { "epoch": 8.599384126353431, "grad_norm": 0.1875, "learning_rate": 0.00011205324327009038, "loss": 0.4902, "step": 173140 }, { "epoch": 8.599880798649052, "grad_norm": 0.166015625, "learning_rate": 0.00011201350948644084, "loss": 0.4722, "step": 173150 }, { "epoch": 8.600377470944672, "grad_norm": 0.224609375, "learning_rate": 0.0001119737757027913, "loss": 0.4924, "step": 173160 }, { "epoch": 8.60087414324029, "grad_norm": 0.1796875, "learning_rate": 0.00011193404191914176, "loss": 0.5015, "step": 173170 }, { "epoch": 8.60137081553591, "grad_norm": 0.19921875, "learning_rate": 0.0001118943081354922, "loss": 0.4771, "step": 173180 }, { "epoch": 8.601867487831528, "grad_norm": 0.181640625, "learning_rate": 0.00011185457435184266, "loss": 0.4567, "step": 173190 }, { "epoch": 8.602364160127149, "grad_norm": 0.15625, "learning_rate": 0.00011181484056819312, "loss": 0.5131, "step": 173200 }, { "epoch": 8.602860832422767, "grad_norm": 0.1767578125, "learning_rate": 0.00011177510678454358, "loss": 0.4573, "step": 173210 }, { "epoch": 8.603357504718387, "grad_norm": 0.1728515625, "learning_rate": 0.000111735373000894, "loss": 0.4729, "step": 173220 }, { "epoch": 8.603854177014005, "grad_norm": 0.1708984375, "learning_rate": 0.00011169563921724446, "loss": 0.5176, "step": 173230 }, { "epoch": 8.604350849309625, "grad_norm": 0.2041015625, "learning_rate": 0.00011165590543359492, "loss": 0.4943, "step": 173240 }, { "epoch": 8.604847521605246, "grad_norm": 0.1904296875, "learning_rate": 0.00011161617164994538, "loss": 0.4649, "step": 173250 }, { "epoch": 8.605344193900864, "grad_norm": 0.1982421875, "learning_rate": 0.00011157643786629581, "loss": 0.5047, "step": 173260 }, { "epoch": 8.605840866196484, "grad_norm": 0.1767578125, "learning_rate": 0.00011153670408264627, "loss": 0.4832, "step": 173270 }, { "epoch": 8.606337538492102, "grad_norm": 0.189453125, "learning_rate": 0.00011149697029899673, "loss": 0.4853, "step": 173280 }, { "epoch": 8.606834210787722, "grad_norm": 0.1865234375, "learning_rate": 0.00011145723651534718, "loss": 0.4814, "step": 173290 }, { "epoch": 8.60733088308334, "grad_norm": 0.15625, "learning_rate": 0.00011141750273169761, "loss": 0.4646, "step": 173300 }, { "epoch": 8.607827555378961, "grad_norm": 0.189453125, "learning_rate": 0.00011137776894804807, "loss": 0.4631, "step": 173310 }, { "epoch": 8.608324227674581, "grad_norm": 0.1806640625, "learning_rate": 0.00011133803516439853, "loss": 0.4459, "step": 173320 }, { "epoch": 8.6088208999702, "grad_norm": 0.181640625, "learning_rate": 0.00011129830138074899, "loss": 0.4805, "step": 173330 }, { "epoch": 8.60931757226582, "grad_norm": 0.1943359375, "learning_rate": 0.00011125856759709945, "loss": 0.4772, "step": 173340 }, { "epoch": 8.609814244561438, "grad_norm": 0.1875, "learning_rate": 0.00011121883381344989, "loss": 0.4769, "step": 173350 }, { "epoch": 8.610310916857058, "grad_norm": 0.162109375, "learning_rate": 0.00011117910002980035, "loss": 0.4761, "step": 173360 }, { "epoch": 8.610807589152676, "grad_norm": 0.1728515625, "learning_rate": 0.0001111393662461508, "loss": 0.4986, "step": 173370 }, { "epoch": 8.611304261448296, "grad_norm": 0.234375, "learning_rate": 0.00011109963246250126, "loss": 0.4688, "step": 173380 }, { "epoch": 8.611800933743917, "grad_norm": 0.197265625, "learning_rate": 0.0001110598986788517, "loss": 0.4831, "step": 173390 }, { "epoch": 8.612297606039535, "grad_norm": 0.1767578125, "learning_rate": 0.00011102016489520215, "loss": 0.5004, "step": 173400 }, { "epoch": 8.612794278335155, "grad_norm": 0.189453125, "learning_rate": 0.00011098043111155261, "loss": 0.4872, "step": 173410 }, { "epoch": 8.613290950630773, "grad_norm": 0.1611328125, "learning_rate": 0.00011094069732790307, "loss": 0.4881, "step": 173420 }, { "epoch": 8.613787622926393, "grad_norm": 0.1689453125, "learning_rate": 0.0001109009635442535, "loss": 0.4759, "step": 173430 }, { "epoch": 8.614284295222012, "grad_norm": 0.18359375, "learning_rate": 0.00011086122976060396, "loss": 0.489, "step": 173440 }, { "epoch": 8.614780967517632, "grad_norm": 0.20703125, "learning_rate": 0.00011082149597695441, "loss": 0.5264, "step": 173450 }, { "epoch": 8.615277639813252, "grad_norm": 0.16796875, "learning_rate": 0.00011078176219330487, "loss": 0.4752, "step": 173460 }, { "epoch": 8.61577431210887, "grad_norm": 0.1796875, "learning_rate": 0.0001107420284096553, "loss": 0.508, "step": 173470 }, { "epoch": 8.61627098440449, "grad_norm": 0.1806640625, "learning_rate": 0.00011070229462600576, "loss": 0.4833, "step": 173480 }, { "epoch": 8.616767656700109, "grad_norm": 0.2236328125, "learning_rate": 0.00011066256084235622, "loss": 0.4774, "step": 173490 }, { "epoch": 8.617264328995729, "grad_norm": 0.1630859375, "learning_rate": 0.00011062282705870668, "loss": 0.4662, "step": 173500 }, { "epoch": 8.617761001291347, "grad_norm": 0.1767578125, "learning_rate": 0.00011058309327505712, "loss": 0.5317, "step": 173510 }, { "epoch": 8.618257673586967, "grad_norm": 0.1962890625, "learning_rate": 0.00011054335949140758, "loss": 0.4799, "step": 173520 }, { "epoch": 8.618754345882587, "grad_norm": 0.1826171875, "learning_rate": 0.00011050362570775804, "loss": 0.4866, "step": 173530 }, { "epoch": 8.619251018178206, "grad_norm": 0.1796875, "learning_rate": 0.0001104638919241085, "loss": 0.4851, "step": 173540 }, { "epoch": 8.619747690473826, "grad_norm": 0.171875, "learning_rate": 0.00011042415814045892, "loss": 0.4892, "step": 173550 }, { "epoch": 8.620244362769444, "grad_norm": 0.1669921875, "learning_rate": 0.00011038442435680938, "loss": 0.5096, "step": 173560 }, { "epoch": 8.620741035065064, "grad_norm": 0.2265625, "learning_rate": 0.00011034469057315984, "loss": 0.457, "step": 173570 }, { "epoch": 8.621237707360683, "grad_norm": 0.1640625, "learning_rate": 0.0001103049567895103, "loss": 0.5146, "step": 173580 }, { "epoch": 8.621734379656303, "grad_norm": 0.1796875, "learning_rate": 0.00011026522300586073, "loss": 0.4439, "step": 173590 }, { "epoch": 8.622231051951921, "grad_norm": 0.166015625, "learning_rate": 0.00011022548922221119, "loss": 0.5048, "step": 173600 }, { "epoch": 8.622727724247541, "grad_norm": 0.177734375, "learning_rate": 0.00011018575543856164, "loss": 0.4622, "step": 173610 }, { "epoch": 8.623224396543161, "grad_norm": 0.177734375, "learning_rate": 0.0001101460216549121, "loss": 0.503, "step": 173620 }, { "epoch": 8.62372106883878, "grad_norm": 0.1796875, "learning_rate": 0.00011010628787126253, "loss": 0.5009, "step": 173630 }, { "epoch": 8.6242177411344, "grad_norm": 0.1728515625, "learning_rate": 0.00011006655408761299, "loss": 0.4883, "step": 173640 }, { "epoch": 8.624714413430018, "grad_norm": 0.16796875, "learning_rate": 0.00011002682030396345, "loss": 0.4946, "step": 173650 }, { "epoch": 8.625211085725638, "grad_norm": 0.197265625, "learning_rate": 0.0001099870865203139, "loss": 0.5035, "step": 173660 }, { "epoch": 8.625707758021257, "grad_norm": 0.17578125, "learning_rate": 0.00010994735273666435, "loss": 0.4953, "step": 173670 }, { "epoch": 8.626204430316877, "grad_norm": 0.1708984375, "learning_rate": 0.00010990761895301481, "loss": 0.5011, "step": 173680 }, { "epoch": 8.626701102612497, "grad_norm": 0.162109375, "learning_rate": 0.00010986788516936527, "loss": 0.4745, "step": 173690 }, { "epoch": 8.627197774908115, "grad_norm": 0.1640625, "learning_rate": 0.00010982815138571571, "loss": 0.4817, "step": 173700 }, { "epoch": 8.627694447203735, "grad_norm": 0.1865234375, "learning_rate": 0.00010978841760206615, "loss": 0.5043, "step": 173710 }, { "epoch": 8.628191119499354, "grad_norm": 0.171875, "learning_rate": 0.00010974868381841661, "loss": 0.4751, "step": 173720 }, { "epoch": 8.628687791794974, "grad_norm": 0.1640625, "learning_rate": 0.00010970895003476707, "loss": 0.478, "step": 173730 }, { "epoch": 8.629184464090592, "grad_norm": 0.1943359375, "learning_rate": 0.00010966921625111753, "loss": 0.4573, "step": 173740 }, { "epoch": 8.629681136386212, "grad_norm": 0.1640625, "learning_rate": 0.00010962948246746796, "loss": 0.504, "step": 173750 }, { "epoch": 8.630177808681832, "grad_norm": 0.1533203125, "learning_rate": 0.00010958974868381842, "loss": 0.4997, "step": 173760 }, { "epoch": 8.63067448097745, "grad_norm": 0.1875, "learning_rate": 0.00010955001490016887, "loss": 0.4746, "step": 173770 }, { "epoch": 8.63117115327307, "grad_norm": 0.21484375, "learning_rate": 0.00010951028111651933, "loss": 0.4846, "step": 173780 }, { "epoch": 8.63166782556869, "grad_norm": 0.1845703125, "learning_rate": 0.00010947054733286979, "loss": 0.4894, "step": 173790 }, { "epoch": 8.63216449786431, "grad_norm": 0.201171875, "learning_rate": 0.00010943081354922022, "loss": 0.485, "step": 173800 }, { "epoch": 8.632661170159928, "grad_norm": 0.1708984375, "learning_rate": 0.00010939107976557068, "loss": 0.4527, "step": 173810 }, { "epoch": 8.633157842455548, "grad_norm": 0.18359375, "learning_rate": 0.00010935134598192114, "loss": 0.51, "step": 173820 }, { "epoch": 8.633654514751168, "grad_norm": 0.1787109375, "learning_rate": 0.0001093116121982716, "loss": 0.4759, "step": 173830 }, { "epoch": 8.634151187046786, "grad_norm": 0.1669921875, "learning_rate": 0.00010927187841462202, "loss": 0.4881, "step": 173840 }, { "epoch": 8.634647859342406, "grad_norm": 0.23046875, "learning_rate": 0.00010923214463097248, "loss": 0.5048, "step": 173850 }, { "epoch": 8.635144531638025, "grad_norm": 0.1689453125, "learning_rate": 0.00010919241084732294, "loss": 0.522, "step": 173860 }, { "epoch": 8.635641203933645, "grad_norm": 0.197265625, "learning_rate": 0.0001091526770636734, "loss": 0.4871, "step": 173870 }, { "epoch": 8.636137876229263, "grad_norm": 0.1787109375, "learning_rate": 0.00010911294328002384, "loss": 0.4856, "step": 173880 }, { "epoch": 8.636634548524883, "grad_norm": 0.1630859375, "learning_rate": 0.0001090732094963743, "loss": 0.5151, "step": 173890 }, { "epoch": 8.637131220820503, "grad_norm": 0.1982421875, "learning_rate": 0.00010903347571272476, "loss": 0.4928, "step": 173900 }, { "epoch": 8.637627893116122, "grad_norm": 0.16015625, "learning_rate": 0.00010899374192907522, "loss": 0.4525, "step": 173910 }, { "epoch": 8.638124565411742, "grad_norm": 0.1728515625, "learning_rate": 0.00010895400814542565, "loss": 0.4856, "step": 173920 }, { "epoch": 8.63862123770736, "grad_norm": 0.1640625, "learning_rate": 0.0001089142743617761, "loss": 0.4888, "step": 173930 }, { "epoch": 8.63911791000298, "grad_norm": 0.1708984375, "learning_rate": 0.00010887454057812656, "loss": 0.4846, "step": 173940 }, { "epoch": 8.639614582298599, "grad_norm": 0.1962890625, "learning_rate": 0.00010883480679447702, "loss": 0.4762, "step": 173950 }, { "epoch": 8.640111254594219, "grad_norm": 0.173828125, "learning_rate": 0.00010879507301082745, "loss": 0.4901, "step": 173960 }, { "epoch": 8.640607926889839, "grad_norm": 0.1826171875, "learning_rate": 0.00010875533922717791, "loss": 0.4973, "step": 173970 }, { "epoch": 8.641104599185457, "grad_norm": 0.201171875, "learning_rate": 0.00010871560544352837, "loss": 0.4961, "step": 173980 }, { "epoch": 8.641601271481077, "grad_norm": 0.1640625, "learning_rate": 0.00010867587165987882, "loss": 0.484, "step": 173990 }, { "epoch": 8.642097943776696, "grad_norm": 0.169921875, "learning_rate": 0.00010863613787622925, "loss": 0.4745, "step": 174000 }, { "epoch": 8.642594616072316, "grad_norm": 0.162109375, "learning_rate": 0.00010859640409257971, "loss": 0.4634, "step": 174010 }, { "epoch": 8.643091288367934, "grad_norm": 0.16796875, "learning_rate": 0.00010855667030893017, "loss": 0.4951, "step": 174020 }, { "epoch": 8.643587960663554, "grad_norm": 0.173828125, "learning_rate": 0.00010851693652528063, "loss": 0.4963, "step": 174030 }, { "epoch": 8.644084632959174, "grad_norm": 0.19140625, "learning_rate": 0.00010847720274163107, "loss": 0.4981, "step": 174040 }, { "epoch": 8.644581305254793, "grad_norm": 0.1845703125, "learning_rate": 0.00010843746895798153, "loss": 0.5023, "step": 174050 }, { "epoch": 8.645077977550413, "grad_norm": 0.1650390625, "learning_rate": 0.00010839773517433199, "loss": 0.509, "step": 174060 }, { "epoch": 8.645574649846031, "grad_norm": 0.2001953125, "learning_rate": 0.00010835800139068245, "loss": 0.465, "step": 174070 }, { "epoch": 8.646071322141651, "grad_norm": 0.177734375, "learning_rate": 0.00010831826760703288, "loss": 0.496, "step": 174080 }, { "epoch": 8.64656799443727, "grad_norm": 0.1826171875, "learning_rate": 0.00010827853382338333, "loss": 0.4552, "step": 174090 }, { "epoch": 8.64706466673289, "grad_norm": 0.1748046875, "learning_rate": 0.00010823880003973379, "loss": 0.4964, "step": 174100 }, { "epoch": 8.64756133902851, "grad_norm": 0.193359375, "learning_rate": 0.00010819906625608425, "loss": 0.4778, "step": 174110 }, { "epoch": 8.648058011324128, "grad_norm": 0.20703125, "learning_rate": 0.00010815933247243468, "loss": 0.4903, "step": 174120 }, { "epoch": 8.648554683619748, "grad_norm": 0.19140625, "learning_rate": 0.00010811959868878514, "loss": 0.4899, "step": 174130 }, { "epoch": 8.649051355915367, "grad_norm": 0.1904296875, "learning_rate": 0.0001080798649051356, "loss": 0.5264, "step": 174140 }, { "epoch": 8.649548028210987, "grad_norm": 0.1728515625, "learning_rate": 0.00010804013112148605, "loss": 0.4519, "step": 174150 }, { "epoch": 8.650044700506605, "grad_norm": 0.1806640625, "learning_rate": 0.00010800039733783648, "loss": 0.4579, "step": 174160 }, { "epoch": 8.650541372802225, "grad_norm": 0.1728515625, "learning_rate": 0.00010796066355418694, "loss": 0.4924, "step": 174170 }, { "epoch": 8.651038045097845, "grad_norm": 0.171875, "learning_rate": 0.0001079209297705374, "loss": 0.4829, "step": 174180 }, { "epoch": 8.651534717393464, "grad_norm": 0.1923828125, "learning_rate": 0.00010788119598688786, "loss": 0.495, "step": 174190 }, { "epoch": 8.652031389689084, "grad_norm": 0.19921875, "learning_rate": 0.00010784146220323832, "loss": 0.496, "step": 174200 }, { "epoch": 8.652528061984702, "grad_norm": 0.1826171875, "learning_rate": 0.00010780172841958876, "loss": 0.4862, "step": 174210 }, { "epoch": 8.653024734280322, "grad_norm": 0.171875, "learning_rate": 0.00010776199463593922, "loss": 0.4759, "step": 174220 }, { "epoch": 8.65352140657594, "grad_norm": 0.169921875, "learning_rate": 0.00010772226085228968, "loss": 0.4728, "step": 174230 }, { "epoch": 8.65401807887156, "grad_norm": 0.1611328125, "learning_rate": 0.00010768252706864013, "loss": 0.4325, "step": 174240 }, { "epoch": 8.65451475116718, "grad_norm": 0.18359375, "learning_rate": 0.00010764279328499056, "loss": 0.5091, "step": 174250 }, { "epoch": 8.655011423462799, "grad_norm": 0.169921875, "learning_rate": 0.00010760305950134102, "loss": 0.5027, "step": 174260 }, { "epoch": 8.65550809575842, "grad_norm": 0.1748046875, "learning_rate": 0.00010756332571769148, "loss": 0.4376, "step": 174270 }, { "epoch": 8.656004768054038, "grad_norm": 0.1708984375, "learning_rate": 0.00010752359193404194, "loss": 0.4668, "step": 174280 }, { "epoch": 8.656501440349658, "grad_norm": 0.171875, "learning_rate": 0.00010748385815039237, "loss": 0.4832, "step": 174290 }, { "epoch": 8.656998112645276, "grad_norm": 0.2060546875, "learning_rate": 0.00010744412436674283, "loss": 0.4939, "step": 174300 }, { "epoch": 8.657494784940896, "grad_norm": 0.1904296875, "learning_rate": 0.00010740439058309328, "loss": 0.4924, "step": 174310 }, { "epoch": 8.657991457236516, "grad_norm": 0.1787109375, "learning_rate": 0.00010736465679944374, "loss": 0.4954, "step": 174320 }, { "epoch": 8.658488129532135, "grad_norm": 0.185546875, "learning_rate": 0.00010732492301579417, "loss": 0.4583, "step": 174330 }, { "epoch": 8.658984801827755, "grad_norm": 0.16796875, "learning_rate": 0.00010728518923214463, "loss": 0.4932, "step": 174340 }, { "epoch": 8.659481474123373, "grad_norm": 0.18359375, "learning_rate": 0.00010724545544849509, "loss": 0.4873, "step": 174350 }, { "epoch": 8.659978146418993, "grad_norm": 0.177734375, "learning_rate": 0.00010720572166484555, "loss": 0.4775, "step": 174360 }, { "epoch": 8.660474818714611, "grad_norm": 0.1796875, "learning_rate": 0.00010716598788119599, "loss": 0.4945, "step": 174370 }, { "epoch": 8.660971491010232, "grad_norm": 0.1689453125, "learning_rate": 0.00010712625409754645, "loss": 0.4903, "step": 174380 }, { "epoch": 8.661468163305852, "grad_norm": 0.1640625, "learning_rate": 0.0001070865203138969, "loss": 0.4777, "step": 174390 }, { "epoch": 8.66196483560147, "grad_norm": 0.2060546875, "learning_rate": 0.00010704678653024735, "loss": 0.4987, "step": 174400 }, { "epoch": 8.66246150789709, "grad_norm": 0.173828125, "learning_rate": 0.0001070070527465978, "loss": 0.4975, "step": 174410 }, { "epoch": 8.662958180192708, "grad_norm": 0.1787109375, "learning_rate": 0.00010696731896294825, "loss": 0.4891, "step": 174420 }, { "epoch": 8.663454852488329, "grad_norm": 0.2041015625, "learning_rate": 0.00010692758517929871, "loss": 0.463, "step": 174430 }, { "epoch": 8.663951524783947, "grad_norm": 0.1708984375, "learning_rate": 0.00010688785139564917, "loss": 0.4858, "step": 174440 }, { "epoch": 8.664448197079567, "grad_norm": 0.16796875, "learning_rate": 0.0001068481176119996, "loss": 0.45, "step": 174450 }, { "epoch": 8.664944869375187, "grad_norm": 0.216796875, "learning_rate": 0.00010680838382835006, "loss": 0.4699, "step": 174460 }, { "epoch": 8.665441541670805, "grad_norm": 0.169921875, "learning_rate": 0.00010676865004470051, "loss": 0.4936, "step": 174470 }, { "epoch": 8.665938213966426, "grad_norm": 0.1611328125, "learning_rate": 0.00010672891626105097, "loss": 0.4886, "step": 174480 }, { "epoch": 8.666434886262044, "grad_norm": 0.1767578125, "learning_rate": 0.0001066891824774014, "loss": 0.4716, "step": 174490 }, { "epoch": 8.666931558557664, "grad_norm": 0.1923828125, "learning_rate": 0.00010664944869375186, "loss": 0.4806, "step": 174500 }, { "epoch": 8.667428230853282, "grad_norm": 0.1845703125, "learning_rate": 0.00010660971491010232, "loss": 0.4962, "step": 174510 }, { "epoch": 8.667924903148903, "grad_norm": 0.189453125, "learning_rate": 0.00010656998112645278, "loss": 0.4882, "step": 174520 }, { "epoch": 8.668421575444523, "grad_norm": 0.1787109375, "learning_rate": 0.00010653024734280322, "loss": 0.5009, "step": 174530 }, { "epoch": 8.668918247740141, "grad_norm": 0.1826171875, "learning_rate": 0.00010649051355915368, "loss": 0.5219, "step": 174540 }, { "epoch": 8.669414920035761, "grad_norm": 0.1904296875, "learning_rate": 0.00010645077977550412, "loss": 0.5139, "step": 174550 }, { "epoch": 8.66991159233138, "grad_norm": 0.197265625, "learning_rate": 0.00010641104599185458, "loss": 0.4798, "step": 174560 }, { "epoch": 8.670408264627, "grad_norm": 0.189453125, "learning_rate": 0.00010637131220820502, "loss": 0.481, "step": 174570 }, { "epoch": 8.670904936922618, "grad_norm": 0.177734375, "learning_rate": 0.00010633157842455548, "loss": 0.4926, "step": 174580 }, { "epoch": 8.671401609218238, "grad_norm": 0.173828125, "learning_rate": 0.00010629184464090594, "loss": 0.4913, "step": 174590 }, { "epoch": 8.671898281513856, "grad_norm": 0.1865234375, "learning_rate": 0.0001062521108572564, "loss": 0.4824, "step": 174600 }, { "epoch": 8.672394953809476, "grad_norm": 0.1767578125, "learning_rate": 0.00010621237707360685, "loss": 0.4928, "step": 174610 }, { "epoch": 8.672891626105097, "grad_norm": 0.169921875, "learning_rate": 0.00010617264328995729, "loss": 0.5017, "step": 174620 }, { "epoch": 8.673388298400715, "grad_norm": 0.1689453125, "learning_rate": 0.00010613290950630774, "loss": 0.5175, "step": 174630 }, { "epoch": 8.673884970696335, "grad_norm": 0.169921875, "learning_rate": 0.0001060931757226582, "loss": 0.5026, "step": 174640 }, { "epoch": 8.674381642991953, "grad_norm": 0.17578125, "learning_rate": 0.00010605344193900866, "loss": 0.4927, "step": 174650 }, { "epoch": 8.674878315287573, "grad_norm": 0.171875, "learning_rate": 0.00010601370815535909, "loss": 0.4765, "step": 174660 }, { "epoch": 8.675374987583192, "grad_norm": 0.181640625, "learning_rate": 0.00010597397437170955, "loss": 0.4778, "step": 174670 }, { "epoch": 8.675871659878812, "grad_norm": 0.1953125, "learning_rate": 0.00010593424058806, "loss": 0.4864, "step": 174680 }, { "epoch": 8.676368332174432, "grad_norm": 0.16796875, "learning_rate": 0.00010589450680441046, "loss": 0.5022, "step": 174690 }, { "epoch": 8.67686500447005, "grad_norm": 0.1875, "learning_rate": 0.0001058547730207609, "loss": 0.4873, "step": 174700 }, { "epoch": 8.67736167676567, "grad_norm": 0.1865234375, "learning_rate": 0.00010581503923711135, "loss": 0.4798, "step": 174710 }, { "epoch": 8.677858349061289, "grad_norm": 0.181640625, "learning_rate": 0.00010577530545346181, "loss": 0.4645, "step": 174720 }, { "epoch": 8.678355021356909, "grad_norm": 0.205078125, "learning_rate": 0.00010573557166981227, "loss": 0.498, "step": 174730 }, { "epoch": 8.678851693652527, "grad_norm": 0.1767578125, "learning_rate": 0.00010569583788616271, "loss": 0.474, "step": 174740 }, { "epoch": 8.679348365948147, "grad_norm": 0.1806640625, "learning_rate": 0.00010565610410251317, "loss": 0.4782, "step": 174750 }, { "epoch": 8.679845038243768, "grad_norm": 0.1708984375, "learning_rate": 0.00010561637031886363, "loss": 0.4497, "step": 174760 }, { "epoch": 8.680341710539386, "grad_norm": 0.2001953125, "learning_rate": 0.00010557663653521408, "loss": 0.4833, "step": 174770 }, { "epoch": 8.680838382835006, "grad_norm": 0.185546875, "learning_rate": 0.00010553690275156452, "loss": 0.4923, "step": 174780 }, { "epoch": 8.681335055130624, "grad_norm": 0.1748046875, "learning_rate": 0.00010549716896791497, "loss": 0.4748, "step": 174790 }, { "epoch": 8.681831727426244, "grad_norm": 0.201171875, "learning_rate": 0.00010545743518426543, "loss": 0.4766, "step": 174800 }, { "epoch": 8.682328399721863, "grad_norm": 0.17578125, "learning_rate": 0.00010541770140061589, "loss": 0.5165, "step": 174810 }, { "epoch": 8.682825072017483, "grad_norm": 0.16796875, "learning_rate": 0.00010537796761696632, "loss": 0.4782, "step": 174820 }, { "epoch": 8.683321744313103, "grad_norm": 0.2158203125, "learning_rate": 0.00010533823383331678, "loss": 0.482, "step": 174830 }, { "epoch": 8.683818416608721, "grad_norm": 0.1826171875, "learning_rate": 0.00010529850004966724, "loss": 0.508, "step": 174840 }, { "epoch": 8.684315088904341, "grad_norm": 0.1845703125, "learning_rate": 0.00010525876626601769, "loss": 0.4965, "step": 174850 }, { "epoch": 8.68481176119996, "grad_norm": 0.1767578125, "learning_rate": 0.00010521903248236812, "loss": 0.4568, "step": 174860 }, { "epoch": 8.68530843349558, "grad_norm": 0.1845703125, "learning_rate": 0.00010517929869871858, "loss": 0.4496, "step": 174870 }, { "epoch": 8.685805105791198, "grad_norm": 0.1728515625, "learning_rate": 0.00010513956491506904, "loss": 0.4925, "step": 174880 }, { "epoch": 8.686301778086818, "grad_norm": 0.171875, "learning_rate": 0.0001050998311314195, "loss": 0.4353, "step": 174890 }, { "epoch": 8.686798450382438, "grad_norm": 0.1728515625, "learning_rate": 0.00010506009734776994, "loss": 0.4739, "step": 174900 }, { "epoch": 8.687295122678057, "grad_norm": 0.185546875, "learning_rate": 0.0001050203635641204, "loss": 0.4828, "step": 174910 }, { "epoch": 8.687791794973677, "grad_norm": 0.1669921875, "learning_rate": 0.00010498062978047086, "loss": 0.4693, "step": 174920 }, { "epoch": 8.688288467269295, "grad_norm": 0.18359375, "learning_rate": 0.00010494089599682131, "loss": 0.5162, "step": 174930 }, { "epoch": 8.688785139564915, "grad_norm": 0.1767578125, "learning_rate": 0.00010490116221317175, "loss": 0.5078, "step": 174940 }, { "epoch": 8.689281811860534, "grad_norm": 0.1904296875, "learning_rate": 0.0001048614284295222, "loss": 0.5057, "step": 174950 }, { "epoch": 8.689778484156154, "grad_norm": 0.17578125, "learning_rate": 0.00010482169464587266, "loss": 0.4784, "step": 174960 }, { "epoch": 8.690275156451774, "grad_norm": 0.197265625, "learning_rate": 0.00010478196086222312, "loss": 0.5044, "step": 174970 }, { "epoch": 8.690771828747392, "grad_norm": 0.197265625, "learning_rate": 0.00010474222707857355, "loss": 0.4781, "step": 174980 }, { "epoch": 8.691268501043012, "grad_norm": 0.1845703125, "learning_rate": 0.00010470249329492401, "loss": 0.473, "step": 174990 }, { "epoch": 8.69176517333863, "grad_norm": 0.1689453125, "learning_rate": 0.00010466275951127447, "loss": 0.4808, "step": 175000 }, { "epoch": 8.69226184563425, "grad_norm": 0.171875, "learning_rate": 0.00010462302572762492, "loss": 0.4748, "step": 175010 }, { "epoch": 8.69275851792987, "grad_norm": 0.171875, "learning_rate": 0.00010458329194397538, "loss": 0.4727, "step": 175020 }, { "epoch": 8.69325519022549, "grad_norm": 0.1865234375, "learning_rate": 0.00010454355816032581, "loss": 0.4716, "step": 175030 }, { "epoch": 8.693751862521108, "grad_norm": 0.1826171875, "learning_rate": 0.00010450382437667627, "loss": 0.4809, "step": 175040 }, { "epoch": 8.694248534816728, "grad_norm": 0.16796875, "learning_rate": 0.00010446409059302673, "loss": 0.492, "step": 175050 }, { "epoch": 8.694745207112348, "grad_norm": 0.1650390625, "learning_rate": 0.00010442435680937719, "loss": 0.4875, "step": 175060 }, { "epoch": 8.695241879407966, "grad_norm": 0.208984375, "learning_rate": 0.00010438462302572763, "loss": 0.4855, "step": 175070 }, { "epoch": 8.695738551703586, "grad_norm": 0.19921875, "learning_rate": 0.00010434488924207809, "loss": 0.4954, "step": 175080 }, { "epoch": 8.696235223999205, "grad_norm": 0.16015625, "learning_rate": 0.00010430515545842854, "loss": 0.4874, "step": 175090 }, { "epoch": 8.696731896294825, "grad_norm": 0.177734375, "learning_rate": 0.00010426542167477899, "loss": 0.4979, "step": 175100 }, { "epoch": 8.697228568590443, "grad_norm": 0.1982421875, "learning_rate": 0.00010422568789112943, "loss": 0.5075, "step": 175110 }, { "epoch": 8.697725240886063, "grad_norm": 0.2060546875, "learning_rate": 0.00010418595410747989, "loss": 0.4886, "step": 175120 }, { "epoch": 8.698221913181683, "grad_norm": 0.1748046875, "learning_rate": 0.00010414622032383035, "loss": 0.4603, "step": 175130 }, { "epoch": 8.698718585477302, "grad_norm": 0.193359375, "learning_rate": 0.00010410648654018081, "loss": 0.4983, "step": 175140 }, { "epoch": 8.699215257772922, "grad_norm": 0.1787109375, "learning_rate": 0.00010406675275653124, "loss": 0.4894, "step": 175150 }, { "epoch": 8.69971193006854, "grad_norm": 0.19921875, "learning_rate": 0.0001040270189728817, "loss": 0.4874, "step": 175160 }, { "epoch": 8.70020860236416, "grad_norm": 0.1875, "learning_rate": 0.00010398728518923215, "loss": 0.4918, "step": 175170 }, { "epoch": 8.700705274659779, "grad_norm": 0.1796875, "learning_rate": 0.00010394755140558261, "loss": 0.5007, "step": 175180 }, { "epoch": 8.701201946955399, "grad_norm": 0.203125, "learning_rate": 0.00010390781762193304, "loss": 0.478, "step": 175190 }, { "epoch": 8.701698619251019, "grad_norm": 0.1787109375, "learning_rate": 0.0001038680838382835, "loss": 0.4908, "step": 175200 }, { "epoch": 8.702195291546637, "grad_norm": 0.1650390625, "learning_rate": 0.00010382835005463396, "loss": 0.4689, "step": 175210 }, { "epoch": 8.702691963842257, "grad_norm": 0.1728515625, "learning_rate": 0.00010378861627098442, "loss": 0.4518, "step": 175220 }, { "epoch": 8.703188636137876, "grad_norm": 0.169921875, "learning_rate": 0.00010374888248733486, "loss": 0.4886, "step": 175230 }, { "epoch": 8.703685308433496, "grad_norm": 0.1630859375, "learning_rate": 0.00010370914870368532, "loss": 0.5028, "step": 175240 }, { "epoch": 8.704181980729114, "grad_norm": 0.1708984375, "learning_rate": 0.00010366941492003576, "loss": 0.4998, "step": 175250 }, { "epoch": 8.704678653024734, "grad_norm": 0.1943359375, "learning_rate": 0.00010362968113638622, "loss": 0.4962, "step": 175260 }, { "epoch": 8.705175325320354, "grad_norm": 0.177734375, "learning_rate": 0.00010358994735273666, "loss": 0.4612, "step": 175270 }, { "epoch": 8.705671997615973, "grad_norm": 0.2001953125, "learning_rate": 0.00010355021356908712, "loss": 0.4968, "step": 175280 }, { "epoch": 8.706168669911593, "grad_norm": 0.166015625, "learning_rate": 0.00010351047978543758, "loss": 0.4972, "step": 175290 }, { "epoch": 8.706665342207211, "grad_norm": 0.18359375, "learning_rate": 0.00010347074600178804, "loss": 0.4681, "step": 175300 }, { "epoch": 8.707162014502831, "grad_norm": 0.1748046875, "learning_rate": 0.00010343101221813847, "loss": 0.459, "step": 175310 }, { "epoch": 8.70765868679845, "grad_norm": 0.1865234375, "learning_rate": 0.00010339127843448893, "loss": 0.4841, "step": 175320 }, { "epoch": 8.70815535909407, "grad_norm": 0.185546875, "learning_rate": 0.00010335154465083938, "loss": 0.4949, "step": 175330 }, { "epoch": 8.70865203138969, "grad_norm": 0.1611328125, "learning_rate": 0.00010331181086718984, "loss": 0.4655, "step": 175340 }, { "epoch": 8.709148703685308, "grad_norm": 0.1767578125, "learning_rate": 0.00010327207708354027, "loss": 0.4858, "step": 175350 }, { "epoch": 8.709645375980928, "grad_norm": 0.1943359375, "learning_rate": 0.00010323234329989073, "loss": 0.4825, "step": 175360 }, { "epoch": 8.710142048276547, "grad_norm": 0.1591796875, "learning_rate": 0.00010319260951624119, "loss": 0.4749, "step": 175370 }, { "epoch": 8.710638720572167, "grad_norm": 0.1826171875, "learning_rate": 0.00010315287573259164, "loss": 0.5096, "step": 175380 }, { "epoch": 8.711135392867785, "grad_norm": 0.197265625, "learning_rate": 0.00010311314194894208, "loss": 0.5066, "step": 175390 }, { "epoch": 8.711632065163405, "grad_norm": 0.19921875, "learning_rate": 0.00010307340816529253, "loss": 0.4953, "step": 175400 }, { "epoch": 8.712128737459025, "grad_norm": 0.2041015625, "learning_rate": 0.00010303367438164299, "loss": 0.4979, "step": 175410 }, { "epoch": 8.712625409754644, "grad_norm": 0.1767578125, "learning_rate": 0.00010299394059799345, "loss": 0.5207, "step": 175420 }, { "epoch": 8.713122082050264, "grad_norm": 0.18359375, "learning_rate": 0.0001029542068143439, "loss": 0.4455, "step": 175430 }, { "epoch": 8.713618754345882, "grad_norm": 0.1904296875, "learning_rate": 0.00010291447303069435, "loss": 0.4792, "step": 175440 }, { "epoch": 8.714115426641502, "grad_norm": 0.1796875, "learning_rate": 0.00010287473924704481, "loss": 0.4731, "step": 175450 }, { "epoch": 8.71461209893712, "grad_norm": 0.2158203125, "learning_rate": 0.00010283500546339527, "loss": 0.4873, "step": 175460 }, { "epoch": 8.71510877123274, "grad_norm": 0.193359375, "learning_rate": 0.00010279527167974572, "loss": 0.4752, "step": 175470 }, { "epoch": 8.71560544352836, "grad_norm": 0.21484375, "learning_rate": 0.00010275553789609616, "loss": 0.5031, "step": 175480 }, { "epoch": 8.716102115823979, "grad_norm": 0.1689453125, "learning_rate": 0.00010271580411244661, "loss": 0.458, "step": 175490 }, { "epoch": 8.7165987881196, "grad_norm": 0.2109375, "learning_rate": 0.00010267607032879707, "loss": 0.4615, "step": 175500 }, { "epoch": 8.717095460415218, "grad_norm": 0.18359375, "learning_rate": 0.00010263633654514753, "loss": 0.4954, "step": 175510 }, { "epoch": 8.717592132710838, "grad_norm": 0.1767578125, "learning_rate": 0.00010259660276149796, "loss": 0.4803, "step": 175520 }, { "epoch": 8.718088805006456, "grad_norm": 0.2021484375, "learning_rate": 0.00010255686897784842, "loss": 0.4735, "step": 175530 }, { "epoch": 8.718585477302076, "grad_norm": 0.203125, "learning_rate": 0.00010251713519419887, "loss": 0.4805, "step": 175540 }, { "epoch": 8.719082149597696, "grad_norm": 0.1826171875, "learning_rate": 0.00010247740141054933, "loss": 0.47, "step": 175550 }, { "epoch": 8.719578821893315, "grad_norm": 0.171875, "learning_rate": 0.00010243766762689976, "loss": 0.4884, "step": 175560 }, { "epoch": 8.720075494188935, "grad_norm": 0.1875, "learning_rate": 0.00010239793384325022, "loss": 0.4689, "step": 175570 }, { "epoch": 8.720572166484553, "grad_norm": 0.205078125, "learning_rate": 0.00010235820005960068, "loss": 0.5375, "step": 175580 }, { "epoch": 8.721068838780173, "grad_norm": 0.1875, "learning_rate": 0.00010231846627595114, "loss": 0.4968, "step": 175590 }, { "epoch": 8.721565511075791, "grad_norm": 0.1865234375, "learning_rate": 0.00010227873249230158, "loss": 0.47, "step": 175600 }, { "epoch": 8.722062183371412, "grad_norm": 0.2099609375, "learning_rate": 0.00010223899870865204, "loss": 0.5143, "step": 175610 }, { "epoch": 8.722558855667032, "grad_norm": 0.1748046875, "learning_rate": 0.0001021992649250025, "loss": 0.4678, "step": 175620 }, { "epoch": 8.72305552796265, "grad_norm": 0.212890625, "learning_rate": 0.00010215953114135295, "loss": 0.5128, "step": 175630 }, { "epoch": 8.72355220025827, "grad_norm": 0.1767578125, "learning_rate": 0.00010211979735770339, "loss": 0.5179, "step": 175640 }, { "epoch": 8.724048872553889, "grad_norm": 0.1884765625, "learning_rate": 0.00010208006357405384, "loss": 0.4985, "step": 175650 }, { "epoch": 8.724545544849509, "grad_norm": 0.1845703125, "learning_rate": 0.0001020403297904043, "loss": 0.4765, "step": 175660 }, { "epoch": 8.725042217145127, "grad_norm": 0.171875, "learning_rate": 0.00010200059600675476, "loss": 0.4867, "step": 175670 }, { "epoch": 8.725538889440747, "grad_norm": 0.181640625, "learning_rate": 0.00010196086222310519, "loss": 0.4989, "step": 175680 }, { "epoch": 8.726035561736367, "grad_norm": 0.1826171875, "learning_rate": 0.00010192112843945565, "loss": 0.4549, "step": 175690 }, { "epoch": 8.726532234031986, "grad_norm": 0.1748046875, "learning_rate": 0.0001018813946558061, "loss": 0.4934, "step": 175700 }, { "epoch": 8.727028906327606, "grad_norm": 0.1806640625, "learning_rate": 0.00010184166087215656, "loss": 0.4957, "step": 175710 }, { "epoch": 8.727525578623224, "grad_norm": 0.181640625, "learning_rate": 0.000101801927088507, "loss": 0.4774, "step": 175720 }, { "epoch": 8.728022250918844, "grad_norm": 0.1767578125, "learning_rate": 0.00010176219330485745, "loss": 0.4926, "step": 175730 }, { "epoch": 8.728518923214462, "grad_norm": 0.1748046875, "learning_rate": 0.00010172245952120791, "loss": 0.4866, "step": 175740 }, { "epoch": 8.729015595510083, "grad_norm": 0.1796875, "learning_rate": 0.00010168272573755837, "loss": 0.4714, "step": 175750 }, { "epoch": 8.729512267805703, "grad_norm": 0.173828125, "learning_rate": 0.00010164299195390881, "loss": 0.4632, "step": 175760 }, { "epoch": 8.730008940101321, "grad_norm": 0.197265625, "learning_rate": 0.00010160325817025927, "loss": 0.5331, "step": 175770 }, { "epoch": 8.730505612396941, "grad_norm": 0.1943359375, "learning_rate": 0.00010156352438660973, "loss": 0.5019, "step": 175780 }, { "epoch": 8.73100228469256, "grad_norm": 0.1845703125, "learning_rate": 0.00010152379060296018, "loss": 0.4965, "step": 175790 }, { "epoch": 8.73149895698818, "grad_norm": 0.17578125, "learning_rate": 0.00010148405681931062, "loss": 0.4733, "step": 175800 }, { "epoch": 8.731995629283798, "grad_norm": 0.2001953125, "learning_rate": 0.00010144432303566107, "loss": 0.4864, "step": 175810 }, { "epoch": 8.732492301579418, "grad_norm": 0.1748046875, "learning_rate": 0.00010140458925201153, "loss": 0.5009, "step": 175820 }, { "epoch": 8.732988973875038, "grad_norm": 0.1982421875, "learning_rate": 0.00010136485546836199, "loss": 0.5191, "step": 175830 }, { "epoch": 8.733485646170656, "grad_norm": 0.169921875, "learning_rate": 0.00010132512168471242, "loss": 0.4973, "step": 175840 }, { "epoch": 8.733982318466277, "grad_norm": 0.17578125, "learning_rate": 0.00010128538790106288, "loss": 0.4668, "step": 175850 }, { "epoch": 8.734478990761895, "grad_norm": 0.1689453125, "learning_rate": 0.00010124565411741333, "loss": 0.4487, "step": 175860 }, { "epoch": 8.734975663057515, "grad_norm": 0.203125, "learning_rate": 0.00010120592033376379, "loss": 0.5054, "step": 175870 }, { "epoch": 8.735472335353133, "grad_norm": 0.1953125, "learning_rate": 0.00010116618655011425, "loss": 0.4952, "step": 175880 }, { "epoch": 8.735969007648754, "grad_norm": 0.162109375, "learning_rate": 0.00010112645276646468, "loss": 0.4726, "step": 175890 }, { "epoch": 8.736465679944374, "grad_norm": 0.208984375, "learning_rate": 0.00010108671898281514, "loss": 0.4965, "step": 175900 }, { "epoch": 8.736962352239992, "grad_norm": 0.16796875, "learning_rate": 0.0001010469851991656, "loss": 0.4819, "step": 175910 }, { "epoch": 8.737459024535612, "grad_norm": 0.17578125, "learning_rate": 0.00010100725141551605, "loss": 0.5017, "step": 175920 }, { "epoch": 8.73795569683123, "grad_norm": 0.1728515625, "learning_rate": 0.0001009675176318665, "loss": 0.4949, "step": 175930 }, { "epoch": 8.73845236912685, "grad_norm": 0.201171875, "learning_rate": 0.00010092778384821696, "loss": 0.4852, "step": 175940 }, { "epoch": 8.738949041422469, "grad_norm": 0.1826171875, "learning_rate": 0.0001008880500645674, "loss": 0.4902, "step": 175950 }, { "epoch": 8.739445713718089, "grad_norm": 0.1826171875, "learning_rate": 0.00010084831628091786, "loss": 0.4961, "step": 175960 }, { "epoch": 8.739942386013709, "grad_norm": 0.1787109375, "learning_rate": 0.0001008085824972683, "loss": 0.5117, "step": 175970 }, { "epoch": 8.740439058309327, "grad_norm": 0.2216796875, "learning_rate": 0.00010076884871361876, "loss": 0.4918, "step": 175980 }, { "epoch": 8.740935730604948, "grad_norm": 0.1796875, "learning_rate": 0.00010072911492996922, "loss": 0.4626, "step": 175990 }, { "epoch": 8.741432402900566, "grad_norm": 0.173828125, "learning_rate": 0.00010068938114631968, "loss": 0.4798, "step": 176000 }, { "epoch": 8.741929075196186, "grad_norm": 0.1806640625, "learning_rate": 0.00010064964736267011, "loss": 0.4802, "step": 176010 }, { "epoch": 8.742425747491804, "grad_norm": 0.1865234375, "learning_rate": 0.00010060991357902056, "loss": 0.4873, "step": 176020 }, { "epoch": 8.742922419787424, "grad_norm": 0.18359375, "learning_rate": 0.00010057017979537102, "loss": 0.4846, "step": 176030 }, { "epoch": 8.743419092083043, "grad_norm": 0.169921875, "learning_rate": 0.00010053044601172148, "loss": 0.4633, "step": 176040 }, { "epoch": 8.743915764378663, "grad_norm": 0.1767578125, "learning_rate": 0.00010049071222807191, "loss": 0.4753, "step": 176050 }, { "epoch": 8.744412436674283, "grad_norm": 0.181640625, "learning_rate": 0.00010045097844442237, "loss": 0.49, "step": 176060 }, { "epoch": 8.744909108969901, "grad_norm": 0.1953125, "learning_rate": 0.00010041124466077283, "loss": 0.5021, "step": 176070 }, { "epoch": 8.745405781265521, "grad_norm": 0.19921875, "learning_rate": 0.00010037151087712328, "loss": 0.4969, "step": 176080 }, { "epoch": 8.74590245356114, "grad_norm": 0.169921875, "learning_rate": 0.00010033177709347373, "loss": 0.4906, "step": 176090 }, { "epoch": 8.74639912585676, "grad_norm": 0.171875, "learning_rate": 0.00010029204330982417, "loss": 0.4859, "step": 176100 }, { "epoch": 8.746895798152378, "grad_norm": 0.1611328125, "learning_rate": 0.00010025230952617463, "loss": 0.4801, "step": 176110 }, { "epoch": 8.747392470447998, "grad_norm": 0.17578125, "learning_rate": 0.00010021257574252509, "loss": 0.4762, "step": 176120 }, { "epoch": 8.747889142743619, "grad_norm": 0.1728515625, "learning_rate": 0.00010017284195887553, "loss": 0.4778, "step": 176130 }, { "epoch": 8.748385815039237, "grad_norm": 0.212890625, "learning_rate": 0.00010013310817522599, "loss": 0.5307, "step": 176140 }, { "epoch": 8.748882487334857, "grad_norm": 0.1669921875, "learning_rate": 0.00010009337439157645, "loss": 0.4938, "step": 176150 }, { "epoch": 8.749379159630475, "grad_norm": 0.1953125, "learning_rate": 0.0001000536406079269, "loss": 0.4661, "step": 176160 }, { "epoch": 8.749875831926095, "grad_norm": 0.185546875, "learning_rate": 0.00010001390682427734, "loss": 0.483, "step": 176170 }, { "epoch": 8.750372504221714, "grad_norm": 0.1748046875, "learning_rate": 9.99741730406278e-05, "loss": 0.4883, "step": 176180 }, { "epoch": 8.750869176517334, "grad_norm": 0.1865234375, "learning_rate": 9.993443925697825e-05, "loss": 0.4923, "step": 176190 }, { "epoch": 8.751365848812954, "grad_norm": 0.193359375, "learning_rate": 9.98947054733287e-05, "loss": 0.4881, "step": 176200 }, { "epoch": 8.751862521108572, "grad_norm": 0.1845703125, "learning_rate": 9.985497168967915e-05, "loss": 0.4714, "step": 176210 }, { "epoch": 8.752359193404192, "grad_norm": 0.197265625, "learning_rate": 9.98152379060296e-05, "loss": 0.4527, "step": 176220 }, { "epoch": 8.75285586569981, "grad_norm": 0.18359375, "learning_rate": 9.977550412238006e-05, "loss": 0.4658, "step": 176230 }, { "epoch": 8.753352537995431, "grad_norm": 0.1904296875, "learning_rate": 9.97357703387305e-05, "loss": 0.4686, "step": 176240 }, { "epoch": 8.75384921029105, "grad_norm": 0.201171875, "learning_rate": 9.969603655508096e-05, "loss": 0.4648, "step": 176250 }, { "epoch": 8.75434588258667, "grad_norm": 0.189453125, "learning_rate": 9.96563027714314e-05, "loss": 0.4743, "step": 176260 }, { "epoch": 8.75484255488229, "grad_norm": 0.166015625, "learning_rate": 9.961656898778186e-05, "loss": 0.4739, "step": 176270 }, { "epoch": 8.755339227177908, "grad_norm": 0.1826171875, "learning_rate": 9.957683520413232e-05, "loss": 0.5123, "step": 176280 }, { "epoch": 8.755835899473528, "grad_norm": 0.1865234375, "learning_rate": 9.953710142048278e-05, "loss": 0.4968, "step": 176290 }, { "epoch": 8.756332571769146, "grad_norm": 0.1923828125, "learning_rate": 9.949736763683322e-05, "loss": 0.4837, "step": 176300 }, { "epoch": 8.756829244064766, "grad_norm": 0.212890625, "learning_rate": 9.945763385318368e-05, "loss": 0.5075, "step": 176310 }, { "epoch": 8.757325916360385, "grad_norm": 0.1923828125, "learning_rate": 9.941790006953414e-05, "loss": 0.4982, "step": 176320 }, { "epoch": 8.757822588656005, "grad_norm": 0.169921875, "learning_rate": 9.937816628588458e-05, "loss": 0.4884, "step": 176330 }, { "epoch": 8.758319260951625, "grad_norm": 0.1728515625, "learning_rate": 9.933843250223504e-05, "loss": 0.495, "step": 176340 }, { "epoch": 8.758815933247243, "grad_norm": 0.166015625, "learning_rate": 9.929869871858548e-05, "loss": 0.4847, "step": 176350 }, { "epoch": 8.759312605542863, "grad_norm": 0.1943359375, "learning_rate": 9.925896493493594e-05, "loss": 0.4984, "step": 176360 }, { "epoch": 8.759809277838482, "grad_norm": 0.1787109375, "learning_rate": 9.921923115128638e-05, "loss": 0.4804, "step": 176370 }, { "epoch": 8.760305950134102, "grad_norm": 0.1787109375, "learning_rate": 9.917949736763684e-05, "loss": 0.5331, "step": 176380 }, { "epoch": 8.76080262242972, "grad_norm": 0.193359375, "learning_rate": 9.913976358398729e-05, "loss": 0.5116, "step": 176390 }, { "epoch": 8.76129929472534, "grad_norm": 0.1943359375, "learning_rate": 9.910002980033774e-05, "loss": 0.4954, "step": 176400 }, { "epoch": 8.761795967020959, "grad_norm": 0.1884765625, "learning_rate": 9.906029601668819e-05, "loss": 0.5152, "step": 176410 }, { "epoch": 8.762292639316579, "grad_norm": 0.1845703125, "learning_rate": 9.902056223303865e-05, "loss": 0.4826, "step": 176420 }, { "epoch": 8.762789311612199, "grad_norm": 0.181640625, "learning_rate": 9.898082844938909e-05, "loss": 0.4719, "step": 176430 }, { "epoch": 8.763285983907817, "grad_norm": 0.1787109375, "learning_rate": 9.894109466573955e-05, "loss": 0.4763, "step": 176440 }, { "epoch": 8.763782656203437, "grad_norm": 0.1826171875, "learning_rate": 9.890136088208999e-05, "loss": 0.4884, "step": 176450 }, { "epoch": 8.764279328499056, "grad_norm": 0.1787109375, "learning_rate": 9.886162709844045e-05, "loss": 0.4742, "step": 176460 }, { "epoch": 8.764776000794676, "grad_norm": 0.197265625, "learning_rate": 9.882189331479091e-05, "loss": 0.4734, "step": 176470 }, { "epoch": 8.765272673090294, "grad_norm": 0.171875, "learning_rate": 9.878215953114137e-05, "loss": 0.5066, "step": 176480 }, { "epoch": 8.765769345385914, "grad_norm": 0.169921875, "learning_rate": 9.874242574749181e-05, "loss": 0.4681, "step": 176490 }, { "epoch": 8.766266017681534, "grad_norm": 0.1806640625, "learning_rate": 9.870269196384227e-05, "loss": 0.4684, "step": 176500 }, { "epoch": 8.766762689977153, "grad_norm": 0.1796875, "learning_rate": 9.866295818019271e-05, "loss": 0.4754, "step": 176510 }, { "epoch": 8.767259362272773, "grad_norm": 0.1806640625, "learning_rate": 9.862322439654317e-05, "loss": 0.4733, "step": 176520 }, { "epoch": 8.767756034568391, "grad_norm": 0.177734375, "learning_rate": 9.858349061289361e-05, "loss": 0.4888, "step": 176530 }, { "epoch": 8.768252706864011, "grad_norm": 0.189453125, "learning_rate": 9.854375682924407e-05, "loss": 0.5491, "step": 176540 }, { "epoch": 8.76874937915963, "grad_norm": 0.1708984375, "learning_rate": 9.850402304559452e-05, "loss": 0.48, "step": 176550 }, { "epoch": 8.76924605145525, "grad_norm": 0.1826171875, "learning_rate": 9.846428926194497e-05, "loss": 0.4741, "step": 176560 }, { "epoch": 8.76974272375087, "grad_norm": 0.1953125, "learning_rate": 9.842455547829542e-05, "loss": 0.4764, "step": 176570 }, { "epoch": 8.770239396046488, "grad_norm": 0.2431640625, "learning_rate": 9.838482169464588e-05, "loss": 0.51, "step": 176580 }, { "epoch": 8.770736068342108, "grad_norm": 0.166015625, "learning_rate": 9.834508791099632e-05, "loss": 0.466, "step": 176590 }, { "epoch": 8.771232740637727, "grad_norm": 0.1943359375, "learning_rate": 9.830535412734678e-05, "loss": 0.4746, "step": 176600 }, { "epoch": 8.771729412933347, "grad_norm": 0.1650390625, "learning_rate": 9.826562034369722e-05, "loss": 0.5056, "step": 176610 }, { "epoch": 8.772226085228965, "grad_norm": 0.173828125, "learning_rate": 9.822588656004768e-05, "loss": 0.4754, "step": 176620 }, { "epoch": 8.772722757524585, "grad_norm": 0.2373046875, "learning_rate": 9.818615277639814e-05, "loss": 0.4696, "step": 176630 }, { "epoch": 8.773219429820205, "grad_norm": 0.171875, "learning_rate": 9.81464189927486e-05, "loss": 0.5043, "step": 176640 }, { "epoch": 8.773716102115824, "grad_norm": 0.1611328125, "learning_rate": 9.810668520909904e-05, "loss": 0.4881, "step": 176650 }, { "epoch": 8.774212774411444, "grad_norm": 0.19921875, "learning_rate": 9.80669514254495e-05, "loss": 0.4989, "step": 176660 }, { "epoch": 8.774709446707062, "grad_norm": 0.171875, "learning_rate": 9.802721764179994e-05, "loss": 0.472, "step": 176670 }, { "epoch": 8.775206119002682, "grad_norm": 0.177734375, "learning_rate": 9.79874838581504e-05, "loss": 0.4874, "step": 176680 }, { "epoch": 8.7757027912983, "grad_norm": 0.1640625, "learning_rate": 9.794775007450084e-05, "loss": 0.4736, "step": 176690 }, { "epoch": 8.77619946359392, "grad_norm": 0.166015625, "learning_rate": 9.79080162908513e-05, "loss": 0.481, "step": 176700 }, { "epoch": 8.77669613588954, "grad_norm": 0.16796875, "learning_rate": 9.786828250720176e-05, "loss": 0.4719, "step": 176710 }, { "epoch": 8.77719280818516, "grad_norm": 0.1982421875, "learning_rate": 9.78285487235522e-05, "loss": 0.481, "step": 176720 }, { "epoch": 8.77768948048078, "grad_norm": 0.2060546875, "learning_rate": 9.778881493990266e-05, "loss": 0.4933, "step": 176730 }, { "epoch": 8.778186152776398, "grad_norm": 0.181640625, "learning_rate": 9.77490811562531e-05, "loss": 0.4894, "step": 176740 }, { "epoch": 8.778682825072018, "grad_norm": 0.2001953125, "learning_rate": 9.770934737260356e-05, "loss": 0.4862, "step": 176750 }, { "epoch": 8.779179497367636, "grad_norm": 0.177734375, "learning_rate": 9.766961358895401e-05, "loss": 0.4877, "step": 176760 }, { "epoch": 8.779676169663256, "grad_norm": 0.1865234375, "learning_rate": 9.762987980530447e-05, "loss": 0.5012, "step": 176770 }, { "epoch": 8.780172841958876, "grad_norm": 0.22265625, "learning_rate": 9.759014602165491e-05, "loss": 0.4863, "step": 176780 }, { "epoch": 8.780669514254495, "grad_norm": 0.1884765625, "learning_rate": 9.755041223800537e-05, "loss": 0.4905, "step": 176790 }, { "epoch": 8.781166186550115, "grad_norm": 0.1796875, "learning_rate": 9.751067845435581e-05, "loss": 0.5004, "step": 176800 }, { "epoch": 8.781662858845733, "grad_norm": 0.171875, "learning_rate": 9.747094467070627e-05, "loss": 0.469, "step": 176810 }, { "epoch": 8.782159531141353, "grad_norm": 0.197265625, "learning_rate": 9.743121088705673e-05, "loss": 0.5133, "step": 176820 }, { "epoch": 8.782656203436972, "grad_norm": 0.1884765625, "learning_rate": 9.739147710340719e-05, "loss": 0.4958, "step": 176830 }, { "epoch": 8.783152875732592, "grad_norm": 0.1884765625, "learning_rate": 9.735174331975763e-05, "loss": 0.4747, "step": 176840 }, { "epoch": 8.783649548028212, "grad_norm": 0.1865234375, "learning_rate": 9.731200953610809e-05, "loss": 0.4967, "step": 176850 }, { "epoch": 8.78414622032383, "grad_norm": 0.2236328125, "learning_rate": 9.727227575245853e-05, "loss": 0.5081, "step": 176860 }, { "epoch": 8.78464289261945, "grad_norm": 0.181640625, "learning_rate": 9.723254196880899e-05, "loss": 0.4796, "step": 176870 }, { "epoch": 8.785139564915069, "grad_norm": 0.1728515625, "learning_rate": 9.719280818515943e-05, "loss": 0.4937, "step": 176880 }, { "epoch": 8.785636237210689, "grad_norm": 0.2470703125, "learning_rate": 9.715307440150989e-05, "loss": 0.4887, "step": 176890 }, { "epoch": 8.786132909506307, "grad_norm": 0.1904296875, "learning_rate": 9.711334061786034e-05, "loss": 0.4763, "step": 176900 }, { "epoch": 8.786629581801927, "grad_norm": 0.2001953125, "learning_rate": 9.70736068342108e-05, "loss": 0.475, "step": 176910 }, { "epoch": 8.787126254097547, "grad_norm": 0.1748046875, "learning_rate": 9.703387305056124e-05, "loss": 0.4647, "step": 176920 }, { "epoch": 8.787622926393166, "grad_norm": 0.1611328125, "learning_rate": 9.69941392669117e-05, "loss": 0.4669, "step": 176930 }, { "epoch": 8.788119598688786, "grad_norm": 0.17578125, "learning_rate": 9.695440548326214e-05, "loss": 0.5126, "step": 176940 }, { "epoch": 8.788616270984404, "grad_norm": 0.16796875, "learning_rate": 9.69146716996126e-05, "loss": 0.473, "step": 176950 }, { "epoch": 8.789112943280024, "grad_norm": 0.1767578125, "learning_rate": 9.687493791596304e-05, "loss": 0.4998, "step": 176960 }, { "epoch": 8.789609615575642, "grad_norm": 0.1669921875, "learning_rate": 9.68352041323135e-05, "loss": 0.4981, "step": 176970 }, { "epoch": 8.790106287871263, "grad_norm": 0.203125, "learning_rate": 9.679547034866396e-05, "loss": 0.4983, "step": 176980 }, { "epoch": 8.790602960166883, "grad_norm": 0.21484375, "learning_rate": 9.675573656501442e-05, "loss": 0.4855, "step": 176990 }, { "epoch": 8.791099632462501, "grad_norm": 0.1787109375, "learning_rate": 9.671600278136486e-05, "loss": 0.5087, "step": 177000 }, { "epoch": 8.791596304758121, "grad_norm": 0.1728515625, "learning_rate": 9.667626899771532e-05, "loss": 0.4864, "step": 177010 }, { "epoch": 8.79209297705374, "grad_norm": 0.1923828125, "learning_rate": 9.663653521406576e-05, "loss": 0.5112, "step": 177020 }, { "epoch": 8.79258964934936, "grad_norm": 0.18359375, "learning_rate": 9.659680143041622e-05, "loss": 0.4765, "step": 177030 }, { "epoch": 8.793086321644978, "grad_norm": 0.208984375, "learning_rate": 9.655706764676666e-05, "loss": 0.4844, "step": 177040 }, { "epoch": 8.793582993940598, "grad_norm": 0.2001953125, "learning_rate": 9.651733386311712e-05, "loss": 0.4911, "step": 177050 }, { "epoch": 8.794079666236218, "grad_norm": 0.1845703125, "learning_rate": 9.647760007946757e-05, "loss": 0.499, "step": 177060 }, { "epoch": 8.794576338531837, "grad_norm": 0.177734375, "learning_rate": 9.643786629581802e-05, "loss": 0.4567, "step": 177070 }, { "epoch": 8.795073010827457, "grad_norm": 0.201171875, "learning_rate": 9.639813251216847e-05, "loss": 0.4706, "step": 177080 }, { "epoch": 8.795569683123075, "grad_norm": 0.189453125, "learning_rate": 9.635839872851893e-05, "loss": 0.5288, "step": 177090 }, { "epoch": 8.796066355418695, "grad_norm": 0.181640625, "learning_rate": 9.631866494486937e-05, "loss": 0.503, "step": 177100 }, { "epoch": 8.796563027714313, "grad_norm": 0.2109375, "learning_rate": 9.627893116121983e-05, "loss": 0.514, "step": 177110 }, { "epoch": 8.797059700009934, "grad_norm": 0.169921875, "learning_rate": 9.623919737757027e-05, "loss": 0.5233, "step": 177120 }, { "epoch": 8.797556372305554, "grad_norm": 0.2041015625, "learning_rate": 9.619946359392073e-05, "loss": 0.504, "step": 177130 }, { "epoch": 8.798053044601172, "grad_norm": 0.1875, "learning_rate": 9.615972981027119e-05, "loss": 0.4765, "step": 177140 }, { "epoch": 8.798549716896792, "grad_norm": 0.18359375, "learning_rate": 9.611999602662163e-05, "loss": 0.4937, "step": 177150 }, { "epoch": 8.79904638919241, "grad_norm": 0.19140625, "learning_rate": 9.608026224297209e-05, "loss": 0.4746, "step": 177160 }, { "epoch": 8.79954306148803, "grad_norm": 0.1728515625, "learning_rate": 9.604052845932255e-05, "loss": 0.4774, "step": 177170 }, { "epoch": 8.800039733783649, "grad_norm": 0.193359375, "learning_rate": 9.6000794675673e-05, "loss": 0.4536, "step": 177180 }, { "epoch": 8.800536406079269, "grad_norm": 0.2080078125, "learning_rate": 9.596106089202345e-05, "loss": 0.4693, "step": 177190 }, { "epoch": 8.80103307837489, "grad_norm": 0.181640625, "learning_rate": 9.592132710837391e-05, "loss": 0.4815, "step": 177200 }, { "epoch": 8.801529750670507, "grad_norm": 0.21875, "learning_rate": 9.588159332472435e-05, "loss": 0.4595, "step": 177210 }, { "epoch": 8.802026422966128, "grad_norm": 0.203125, "learning_rate": 9.584185954107481e-05, "loss": 0.4572, "step": 177220 }, { "epoch": 8.802523095261746, "grad_norm": 0.16015625, "learning_rate": 9.580212575742525e-05, "loss": 0.5092, "step": 177230 }, { "epoch": 8.803019767557366, "grad_norm": 0.18359375, "learning_rate": 9.576239197377571e-05, "loss": 0.4826, "step": 177240 }, { "epoch": 8.803516439852984, "grad_norm": 0.1806640625, "learning_rate": 9.572265819012616e-05, "loss": 0.4733, "step": 177250 }, { "epoch": 8.804013112148604, "grad_norm": 0.205078125, "learning_rate": 9.568292440647661e-05, "loss": 0.4817, "step": 177260 }, { "epoch": 8.804509784444225, "grad_norm": 0.181640625, "learning_rate": 9.564319062282706e-05, "loss": 0.5005, "step": 177270 }, { "epoch": 8.805006456739843, "grad_norm": 0.2158203125, "learning_rate": 9.560345683917752e-05, "loss": 0.4755, "step": 177280 }, { "epoch": 8.805503129035463, "grad_norm": 0.166015625, "learning_rate": 9.556372305552796e-05, "loss": 0.486, "step": 177290 }, { "epoch": 8.805999801331081, "grad_norm": 0.1953125, "learning_rate": 9.552398927187842e-05, "loss": 0.4747, "step": 177300 }, { "epoch": 8.806496473626702, "grad_norm": 0.181640625, "learning_rate": 9.548425548822886e-05, "loss": 0.5076, "step": 177310 }, { "epoch": 8.80699314592232, "grad_norm": 0.1806640625, "learning_rate": 9.544452170457932e-05, "loss": 0.5059, "step": 177320 }, { "epoch": 8.80748981821794, "grad_norm": 0.16796875, "learning_rate": 9.540478792092978e-05, "loss": 0.4922, "step": 177330 }, { "epoch": 8.80798649051356, "grad_norm": 0.1923828125, "learning_rate": 9.536505413728024e-05, "loss": 0.4997, "step": 177340 }, { "epoch": 8.808483162809178, "grad_norm": 0.197265625, "learning_rate": 9.532532035363068e-05, "loss": 0.4665, "step": 177350 }, { "epoch": 8.808979835104799, "grad_norm": 0.2138671875, "learning_rate": 9.528558656998114e-05, "loss": 0.4884, "step": 177360 }, { "epoch": 8.809476507400417, "grad_norm": 0.2138671875, "learning_rate": 9.524585278633158e-05, "loss": 0.512, "step": 177370 }, { "epoch": 8.809973179696037, "grad_norm": 0.1904296875, "learning_rate": 9.520611900268204e-05, "loss": 0.5118, "step": 177380 }, { "epoch": 8.810469851991655, "grad_norm": 0.1767578125, "learning_rate": 9.516638521903248e-05, "loss": 0.4607, "step": 177390 }, { "epoch": 8.810966524287275, "grad_norm": 0.220703125, "learning_rate": 9.512665143538294e-05, "loss": 0.5227, "step": 177400 }, { "epoch": 8.811463196582894, "grad_norm": 0.1904296875, "learning_rate": 9.508691765173339e-05, "loss": 0.4998, "step": 177410 }, { "epoch": 8.811959868878514, "grad_norm": 0.18359375, "learning_rate": 9.504718386808384e-05, "loss": 0.4814, "step": 177420 }, { "epoch": 8.812456541174134, "grad_norm": 0.197265625, "learning_rate": 9.500745008443429e-05, "loss": 0.4896, "step": 177430 }, { "epoch": 8.812953213469752, "grad_norm": 0.1796875, "learning_rate": 9.496771630078475e-05, "loss": 0.4792, "step": 177440 }, { "epoch": 8.813449885765372, "grad_norm": 0.185546875, "learning_rate": 9.492798251713519e-05, "loss": 0.4921, "step": 177450 }, { "epoch": 8.81394655806099, "grad_norm": 0.185546875, "learning_rate": 9.488824873348565e-05, "loss": 0.5142, "step": 177460 }, { "epoch": 8.814443230356611, "grad_norm": 0.1689453125, "learning_rate": 9.484851494983609e-05, "loss": 0.4699, "step": 177470 }, { "epoch": 8.81493990265223, "grad_norm": 0.171875, "learning_rate": 9.480878116618655e-05, "loss": 0.4891, "step": 177480 }, { "epoch": 8.81543657494785, "grad_norm": 0.1826171875, "learning_rate": 9.476904738253701e-05, "loss": 0.4795, "step": 177490 }, { "epoch": 8.81593324724347, "grad_norm": 0.1884765625, "learning_rate": 9.472931359888745e-05, "loss": 0.4878, "step": 177500 }, { "epoch": 8.816429919539088, "grad_norm": 0.2197265625, "learning_rate": 9.468957981523791e-05, "loss": 0.4549, "step": 177510 }, { "epoch": 8.816926591834708, "grad_norm": 0.1884765625, "learning_rate": 9.464984603158837e-05, "loss": 0.5013, "step": 177520 }, { "epoch": 8.817423264130326, "grad_norm": 0.1708984375, "learning_rate": 9.461011224793881e-05, "loss": 0.4684, "step": 177530 }, { "epoch": 8.817919936425946, "grad_norm": 0.185546875, "learning_rate": 9.457037846428927e-05, "loss": 0.4496, "step": 177540 }, { "epoch": 8.818416608721565, "grad_norm": 0.177734375, "learning_rate": 9.453064468063973e-05, "loss": 0.4796, "step": 177550 }, { "epoch": 8.818913281017185, "grad_norm": 0.1875, "learning_rate": 9.449091089699017e-05, "loss": 0.4595, "step": 177560 }, { "epoch": 8.819409953312805, "grad_norm": 0.173828125, "learning_rate": 9.445117711334063e-05, "loss": 0.4822, "step": 177570 }, { "epoch": 8.819906625608423, "grad_norm": 0.1748046875, "learning_rate": 9.441144332969107e-05, "loss": 0.4814, "step": 177580 }, { "epoch": 8.820403297904043, "grad_norm": 0.208984375, "learning_rate": 9.437170954604153e-05, "loss": 0.4958, "step": 177590 }, { "epoch": 8.820899970199662, "grad_norm": 0.1650390625, "learning_rate": 9.433197576239198e-05, "loss": 0.4911, "step": 177600 }, { "epoch": 8.821396642495282, "grad_norm": 0.1728515625, "learning_rate": 9.429224197874243e-05, "loss": 0.4948, "step": 177610 }, { "epoch": 8.8218933147909, "grad_norm": 0.1865234375, "learning_rate": 9.425250819509288e-05, "loss": 0.5156, "step": 177620 }, { "epoch": 8.82238998708652, "grad_norm": 0.185546875, "learning_rate": 9.421277441144334e-05, "loss": 0.4728, "step": 177630 }, { "epoch": 8.82288665938214, "grad_norm": 0.193359375, "learning_rate": 9.417304062779378e-05, "loss": 0.4963, "step": 177640 }, { "epoch": 8.823383331677759, "grad_norm": 0.1669921875, "learning_rate": 9.413330684414424e-05, "loss": 0.4614, "step": 177650 }, { "epoch": 8.823880003973379, "grad_norm": 0.20703125, "learning_rate": 9.409357306049468e-05, "loss": 0.4823, "step": 177660 }, { "epoch": 8.824376676268997, "grad_norm": 0.1796875, "learning_rate": 9.405383927684514e-05, "loss": 0.4824, "step": 177670 }, { "epoch": 8.824873348564617, "grad_norm": 0.171875, "learning_rate": 9.40141054931956e-05, "loss": 0.4904, "step": 177680 }, { "epoch": 8.825370020860236, "grad_norm": 0.166015625, "learning_rate": 9.397437170954606e-05, "loss": 0.5075, "step": 177690 }, { "epoch": 8.825866693155856, "grad_norm": 0.220703125, "learning_rate": 9.39346379258965e-05, "loss": 0.5011, "step": 177700 }, { "epoch": 8.826363365451476, "grad_norm": 0.2412109375, "learning_rate": 9.389490414224696e-05, "loss": 0.5239, "step": 177710 }, { "epoch": 8.826860037747094, "grad_norm": 0.197265625, "learning_rate": 9.38551703585974e-05, "loss": 0.4759, "step": 177720 }, { "epoch": 8.827356710042714, "grad_norm": 0.17578125, "learning_rate": 9.381543657494786e-05, "loss": 0.5076, "step": 177730 }, { "epoch": 8.827853382338333, "grad_norm": 0.17578125, "learning_rate": 9.37757027912983e-05, "loss": 0.44, "step": 177740 }, { "epoch": 8.828350054633953, "grad_norm": 0.2060546875, "learning_rate": 9.373596900764876e-05, "loss": 0.5002, "step": 177750 }, { "epoch": 8.828846726929571, "grad_norm": 0.18359375, "learning_rate": 9.36962352239992e-05, "loss": 0.4905, "step": 177760 }, { "epoch": 8.829343399225191, "grad_norm": 0.1943359375, "learning_rate": 9.365650144034966e-05, "loss": 0.4814, "step": 177770 }, { "epoch": 8.82984007152081, "grad_norm": 0.181640625, "learning_rate": 9.361676765670011e-05, "loss": 0.4869, "step": 177780 }, { "epoch": 8.83033674381643, "grad_norm": 0.181640625, "learning_rate": 9.357703387305057e-05, "loss": 0.4872, "step": 177790 }, { "epoch": 8.83083341611205, "grad_norm": 0.166015625, "learning_rate": 9.353730008940101e-05, "loss": 0.4742, "step": 177800 }, { "epoch": 8.831330088407668, "grad_norm": 0.1728515625, "learning_rate": 9.349756630575147e-05, "loss": 0.4995, "step": 177810 }, { "epoch": 8.831826760703288, "grad_norm": 0.1767578125, "learning_rate": 9.345783252210191e-05, "loss": 0.501, "step": 177820 }, { "epoch": 8.832323432998907, "grad_norm": 0.189453125, "learning_rate": 9.341809873845237e-05, "loss": 0.4939, "step": 177830 }, { "epoch": 8.832820105294527, "grad_norm": 0.1787109375, "learning_rate": 9.337836495480283e-05, "loss": 0.514, "step": 177840 }, { "epoch": 8.833316777590145, "grad_norm": 0.1650390625, "learning_rate": 9.333863117115329e-05, "loss": 0.4908, "step": 177850 }, { "epoch": 8.833813449885765, "grad_norm": 0.2021484375, "learning_rate": 9.329889738750373e-05, "loss": 0.4792, "step": 177860 }, { "epoch": 8.834310122181385, "grad_norm": 0.1650390625, "learning_rate": 9.325916360385419e-05, "loss": 0.4731, "step": 177870 }, { "epoch": 8.834806794477004, "grad_norm": 0.173828125, "learning_rate": 9.321942982020463e-05, "loss": 0.4529, "step": 177880 }, { "epoch": 8.835303466772624, "grad_norm": 0.1962890625, "learning_rate": 9.317969603655509e-05, "loss": 0.4739, "step": 177890 }, { "epoch": 8.835800139068242, "grad_norm": 0.185546875, "learning_rate": 9.313996225290553e-05, "loss": 0.4991, "step": 177900 }, { "epoch": 8.836296811363862, "grad_norm": 0.1787109375, "learning_rate": 9.310022846925599e-05, "loss": 0.4774, "step": 177910 }, { "epoch": 8.83679348365948, "grad_norm": 0.1923828125, "learning_rate": 9.306049468560644e-05, "loss": 0.5063, "step": 177920 }, { "epoch": 8.8372901559551, "grad_norm": 0.1806640625, "learning_rate": 9.30207609019569e-05, "loss": 0.5064, "step": 177930 }, { "epoch": 8.83778682825072, "grad_norm": 0.1884765625, "learning_rate": 9.298102711830734e-05, "loss": 0.5055, "step": 177940 }, { "epoch": 8.83828350054634, "grad_norm": 0.1787109375, "learning_rate": 9.29412933346578e-05, "loss": 0.5141, "step": 177950 }, { "epoch": 8.83878017284196, "grad_norm": 0.1748046875, "learning_rate": 9.290155955100824e-05, "loss": 0.4801, "step": 177960 }, { "epoch": 8.839276845137578, "grad_norm": 0.220703125, "learning_rate": 9.28618257673587e-05, "loss": 0.4867, "step": 177970 }, { "epoch": 8.839773517433198, "grad_norm": 0.19140625, "learning_rate": 9.282209198370916e-05, "loss": 0.502, "step": 177980 }, { "epoch": 8.840270189728816, "grad_norm": 0.1806640625, "learning_rate": 9.27823582000596e-05, "loss": 0.4746, "step": 177990 }, { "epoch": 8.840766862024436, "grad_norm": 0.228515625, "learning_rate": 9.274262441641006e-05, "loss": 0.4901, "step": 178000 }, { "epoch": 8.841263534320056, "grad_norm": 0.212890625, "learning_rate": 9.27028906327605e-05, "loss": 0.4745, "step": 178010 }, { "epoch": 8.841760206615675, "grad_norm": 0.1669921875, "learning_rate": 9.266315684911096e-05, "loss": 0.4777, "step": 178020 }, { "epoch": 8.842256878911295, "grad_norm": 0.1806640625, "learning_rate": 9.262342306546142e-05, "loss": 0.4853, "step": 178030 }, { "epoch": 8.842753551206913, "grad_norm": 0.177734375, "learning_rate": 9.258368928181188e-05, "loss": 0.4787, "step": 178040 }, { "epoch": 8.843250223502533, "grad_norm": 0.189453125, "learning_rate": 9.254395549816232e-05, "loss": 0.4891, "step": 178050 }, { "epoch": 8.843746895798152, "grad_norm": 0.2041015625, "learning_rate": 9.250422171451278e-05, "loss": 0.508, "step": 178060 }, { "epoch": 8.844243568093772, "grad_norm": 0.201171875, "learning_rate": 9.246448793086322e-05, "loss": 0.4948, "step": 178070 }, { "epoch": 8.844740240389392, "grad_norm": 0.1767578125, "learning_rate": 9.242475414721368e-05, "loss": 0.4857, "step": 178080 }, { "epoch": 8.84523691268501, "grad_norm": 0.17578125, "learning_rate": 9.238502036356412e-05, "loss": 0.4985, "step": 178090 }, { "epoch": 8.84573358498063, "grad_norm": 0.17578125, "learning_rate": 9.234528657991458e-05, "loss": 0.4708, "step": 178100 }, { "epoch": 8.846230257276249, "grad_norm": 0.2275390625, "learning_rate": 9.230555279626503e-05, "loss": 0.4864, "step": 178110 }, { "epoch": 8.846726929571869, "grad_norm": 0.181640625, "learning_rate": 9.226581901261548e-05, "loss": 0.4788, "step": 178120 }, { "epoch": 8.847223601867487, "grad_norm": 0.1826171875, "learning_rate": 9.222608522896593e-05, "loss": 0.536, "step": 178130 }, { "epoch": 8.847720274163107, "grad_norm": 0.1962890625, "learning_rate": 9.218635144531639e-05, "loss": 0.487, "step": 178140 }, { "epoch": 8.848216946458727, "grad_norm": 0.173828125, "learning_rate": 9.214661766166683e-05, "loss": 0.5071, "step": 178150 }, { "epoch": 8.848713618754346, "grad_norm": 0.1953125, "learning_rate": 9.210688387801729e-05, "loss": 0.4701, "step": 178160 }, { "epoch": 8.849210291049966, "grad_norm": 0.1923828125, "learning_rate": 9.206715009436773e-05, "loss": 0.4669, "step": 178170 }, { "epoch": 8.849706963345584, "grad_norm": 0.17578125, "learning_rate": 9.202741631071819e-05, "loss": 0.4869, "step": 178180 }, { "epoch": 8.850203635641204, "grad_norm": 0.1787109375, "learning_rate": 9.198768252706865e-05, "loss": 0.4847, "step": 178190 }, { "epoch": 8.850700307936822, "grad_norm": 0.19921875, "learning_rate": 9.19479487434191e-05, "loss": 0.5305, "step": 178200 }, { "epoch": 8.851196980232443, "grad_norm": 0.24609375, "learning_rate": 9.190821495976955e-05, "loss": 0.4845, "step": 178210 }, { "epoch": 8.851693652528063, "grad_norm": 0.1943359375, "learning_rate": 9.186848117612001e-05, "loss": 0.4765, "step": 178220 }, { "epoch": 8.852190324823681, "grad_norm": 0.2109375, "learning_rate": 9.182874739247045e-05, "loss": 0.4727, "step": 178230 }, { "epoch": 8.852686997119301, "grad_norm": 0.1826171875, "learning_rate": 9.178901360882091e-05, "loss": 0.4844, "step": 178240 }, { "epoch": 8.85318366941492, "grad_norm": 0.1845703125, "learning_rate": 9.174927982517135e-05, "loss": 0.4649, "step": 178250 }, { "epoch": 8.85368034171054, "grad_norm": 0.1669921875, "learning_rate": 9.170954604152181e-05, "loss": 0.5175, "step": 178260 }, { "epoch": 8.854177014006158, "grad_norm": 0.2001953125, "learning_rate": 9.166981225787226e-05, "loss": 0.5003, "step": 178270 }, { "epoch": 8.854673686301778, "grad_norm": 0.220703125, "learning_rate": 9.163007847422271e-05, "loss": 0.4849, "step": 178280 }, { "epoch": 8.855170358597398, "grad_norm": 0.169921875, "learning_rate": 9.159034469057316e-05, "loss": 0.4885, "step": 178290 }, { "epoch": 8.855667030893017, "grad_norm": 0.193359375, "learning_rate": 9.155061090692362e-05, "loss": 0.4961, "step": 178300 }, { "epoch": 8.856163703188637, "grad_norm": 0.1689453125, "learning_rate": 9.151087712327406e-05, "loss": 0.4878, "step": 178310 }, { "epoch": 8.856660375484255, "grad_norm": 0.1884765625, "learning_rate": 9.147114333962452e-05, "loss": 0.523, "step": 178320 }, { "epoch": 8.857157047779875, "grad_norm": 0.18359375, "learning_rate": 9.143140955597496e-05, "loss": 0.4829, "step": 178330 }, { "epoch": 8.857653720075493, "grad_norm": 0.185546875, "learning_rate": 9.139167577232542e-05, "loss": 0.4897, "step": 178340 }, { "epoch": 8.858150392371114, "grad_norm": 0.19140625, "learning_rate": 9.135194198867586e-05, "loss": 0.4924, "step": 178350 }, { "epoch": 8.858647064666734, "grad_norm": 0.1923828125, "learning_rate": 9.131220820502632e-05, "loss": 0.4894, "step": 178360 }, { "epoch": 8.859143736962352, "grad_norm": 0.2080078125, "learning_rate": 9.127247442137678e-05, "loss": 0.4551, "step": 178370 }, { "epoch": 8.859640409257972, "grad_norm": 0.1904296875, "learning_rate": 9.123274063772724e-05, "loss": 0.4811, "step": 178380 }, { "epoch": 8.86013708155359, "grad_norm": 0.1953125, "learning_rate": 9.11930068540777e-05, "loss": 0.4676, "step": 178390 }, { "epoch": 8.86063375384921, "grad_norm": 0.1845703125, "learning_rate": 9.115327307042814e-05, "loss": 0.4773, "step": 178400 }, { "epoch": 8.861130426144829, "grad_norm": 0.2001953125, "learning_rate": 9.11135392867786e-05, "loss": 0.4545, "step": 178410 }, { "epoch": 8.861627098440449, "grad_norm": 0.185546875, "learning_rate": 9.107380550312904e-05, "loss": 0.4835, "step": 178420 }, { "epoch": 8.86212377073607, "grad_norm": 0.1806640625, "learning_rate": 9.10340717194795e-05, "loss": 0.466, "step": 178430 }, { "epoch": 8.862620443031688, "grad_norm": 0.1728515625, "learning_rate": 9.099433793582994e-05, "loss": 0.4657, "step": 178440 }, { "epoch": 8.863117115327308, "grad_norm": 0.2001953125, "learning_rate": 9.09546041521804e-05, "loss": 0.5077, "step": 178450 }, { "epoch": 8.863613787622926, "grad_norm": 0.251953125, "learning_rate": 9.091487036853085e-05, "loss": 0.4968, "step": 178460 }, { "epoch": 8.864110459918546, "grad_norm": 0.189453125, "learning_rate": 9.08751365848813e-05, "loss": 0.508, "step": 178470 }, { "epoch": 8.864607132214164, "grad_norm": 0.189453125, "learning_rate": 9.083540280123175e-05, "loss": 0.4876, "step": 178480 }, { "epoch": 8.865103804509785, "grad_norm": 0.181640625, "learning_rate": 9.07956690175822e-05, "loss": 0.4909, "step": 178490 }, { "epoch": 8.865600476805405, "grad_norm": 0.2470703125, "learning_rate": 9.075593523393265e-05, "loss": 0.4989, "step": 178500 }, { "epoch": 8.866097149101023, "grad_norm": 0.19921875, "learning_rate": 9.071620145028311e-05, "loss": 0.5079, "step": 178510 }, { "epoch": 8.866593821396643, "grad_norm": 0.1806640625, "learning_rate": 9.067646766663355e-05, "loss": 0.4881, "step": 178520 }, { "epoch": 8.867090493692261, "grad_norm": 0.212890625, "learning_rate": 9.063673388298401e-05, "loss": 0.5281, "step": 178530 }, { "epoch": 8.867587165987882, "grad_norm": 0.1875, "learning_rate": 9.059700009933447e-05, "loss": 0.4902, "step": 178540 }, { "epoch": 8.8680838382835, "grad_norm": 0.171875, "learning_rate": 9.055726631568493e-05, "loss": 0.5051, "step": 178550 }, { "epoch": 8.86858051057912, "grad_norm": 0.1728515625, "learning_rate": 9.051753253203537e-05, "loss": 0.5191, "step": 178560 }, { "epoch": 8.86907718287474, "grad_norm": 0.1826171875, "learning_rate": 9.047779874838583e-05, "loss": 0.5161, "step": 178570 }, { "epoch": 8.869573855170358, "grad_norm": 0.1923828125, "learning_rate": 9.043806496473627e-05, "loss": 0.4767, "step": 178580 }, { "epoch": 8.870070527465979, "grad_norm": 0.1962890625, "learning_rate": 9.039833118108673e-05, "loss": 0.5076, "step": 178590 }, { "epoch": 8.870567199761597, "grad_norm": 0.1826171875, "learning_rate": 9.035859739743717e-05, "loss": 0.4949, "step": 178600 }, { "epoch": 8.871063872057217, "grad_norm": 0.197265625, "learning_rate": 9.031886361378763e-05, "loss": 0.5077, "step": 178610 }, { "epoch": 8.871560544352835, "grad_norm": 0.208984375, "learning_rate": 9.027912983013808e-05, "loss": 0.4732, "step": 178620 }, { "epoch": 8.872057216648455, "grad_norm": 0.16796875, "learning_rate": 9.023939604648853e-05, "loss": 0.4695, "step": 178630 }, { "epoch": 8.872553888944076, "grad_norm": 0.19921875, "learning_rate": 9.019966226283898e-05, "loss": 0.4374, "step": 178640 }, { "epoch": 8.873050561239694, "grad_norm": 0.2333984375, "learning_rate": 9.015992847918944e-05, "loss": 0.4909, "step": 178650 }, { "epoch": 8.873547233535314, "grad_norm": 0.1826171875, "learning_rate": 9.012019469553988e-05, "loss": 0.4758, "step": 178660 }, { "epoch": 8.874043905830932, "grad_norm": 0.197265625, "learning_rate": 9.008046091189034e-05, "loss": 0.4834, "step": 178670 }, { "epoch": 8.874540578126553, "grad_norm": 0.1884765625, "learning_rate": 9.004072712824078e-05, "loss": 0.5022, "step": 178680 }, { "epoch": 8.87503725042217, "grad_norm": 0.18359375, "learning_rate": 9.000099334459124e-05, "loss": 0.4888, "step": 178690 }, { "epoch": 8.875533922717791, "grad_norm": 0.2236328125, "learning_rate": 8.996125956094168e-05, "loss": 0.4553, "step": 178700 }, { "epoch": 8.876030595013411, "grad_norm": 0.1982421875, "learning_rate": 8.992152577729214e-05, "loss": 0.4859, "step": 178710 }, { "epoch": 8.87652726730903, "grad_norm": 0.216796875, "learning_rate": 8.98817919936426e-05, "loss": 0.5082, "step": 178720 }, { "epoch": 8.87702393960465, "grad_norm": 0.1845703125, "learning_rate": 8.984205820999306e-05, "loss": 0.508, "step": 178730 }, { "epoch": 8.877520611900268, "grad_norm": 0.2041015625, "learning_rate": 8.98023244263435e-05, "loss": 0.4816, "step": 178740 }, { "epoch": 8.878017284195888, "grad_norm": 0.1787109375, "learning_rate": 8.976259064269396e-05, "loss": 0.4698, "step": 178750 }, { "epoch": 8.878513956491506, "grad_norm": 0.181640625, "learning_rate": 8.97228568590444e-05, "loss": 0.4767, "step": 178760 }, { "epoch": 8.879010628787126, "grad_norm": 0.205078125, "learning_rate": 8.968312307539486e-05, "loss": 0.4936, "step": 178770 }, { "epoch": 8.879507301082745, "grad_norm": 0.1689453125, "learning_rate": 8.96433892917453e-05, "loss": 0.4932, "step": 178780 }, { "epoch": 8.880003973378365, "grad_norm": 0.216796875, "learning_rate": 8.960365550809576e-05, "loss": 0.5014, "step": 178790 }, { "epoch": 8.880500645673985, "grad_norm": 0.1865234375, "learning_rate": 8.956392172444621e-05, "loss": 0.4737, "step": 178800 }, { "epoch": 8.880997317969603, "grad_norm": 0.1845703125, "learning_rate": 8.952418794079667e-05, "loss": 0.4558, "step": 178810 }, { "epoch": 8.881493990265223, "grad_norm": 0.1865234375, "learning_rate": 8.948445415714712e-05, "loss": 0.511, "step": 178820 }, { "epoch": 8.881990662560842, "grad_norm": 0.2265625, "learning_rate": 8.944472037349757e-05, "loss": 0.4751, "step": 178830 }, { "epoch": 8.882487334856462, "grad_norm": 0.1640625, "learning_rate": 8.940498658984803e-05, "loss": 0.4934, "step": 178840 }, { "epoch": 8.88298400715208, "grad_norm": 0.2021484375, "learning_rate": 8.936525280619847e-05, "loss": 0.5163, "step": 178850 }, { "epoch": 8.8834806794477, "grad_norm": 0.1767578125, "learning_rate": 8.932551902254893e-05, "loss": 0.5046, "step": 178860 }, { "epoch": 8.88397735174332, "grad_norm": 0.1962890625, "learning_rate": 8.928578523889937e-05, "loss": 0.4613, "step": 178870 }, { "epoch": 8.884474024038939, "grad_norm": 0.177734375, "learning_rate": 8.924605145524983e-05, "loss": 0.5034, "step": 178880 }, { "epoch": 8.884970696334559, "grad_norm": 0.19921875, "learning_rate": 8.920631767160029e-05, "loss": 0.5281, "step": 178890 }, { "epoch": 8.885467368630177, "grad_norm": 0.177734375, "learning_rate": 8.916658388795075e-05, "loss": 0.5001, "step": 178900 }, { "epoch": 8.885964040925797, "grad_norm": 0.173828125, "learning_rate": 8.912685010430119e-05, "loss": 0.4436, "step": 178910 }, { "epoch": 8.886460713221416, "grad_norm": 0.2001953125, "learning_rate": 8.908711632065165e-05, "loss": 0.4834, "step": 178920 }, { "epoch": 8.886957385517036, "grad_norm": 0.1884765625, "learning_rate": 8.904738253700209e-05, "loss": 0.4922, "step": 178930 }, { "epoch": 8.887454057812656, "grad_norm": 0.1845703125, "learning_rate": 8.900764875335255e-05, "loss": 0.4558, "step": 178940 }, { "epoch": 8.887950730108274, "grad_norm": 0.19921875, "learning_rate": 8.8967914969703e-05, "loss": 0.5002, "step": 178950 }, { "epoch": 8.888447402403894, "grad_norm": 0.1748046875, "learning_rate": 8.892818118605345e-05, "loss": 0.4759, "step": 178960 }, { "epoch": 8.888944074699513, "grad_norm": 0.1689453125, "learning_rate": 8.88884474024039e-05, "loss": 0.4968, "step": 178970 }, { "epoch": 8.889440746995133, "grad_norm": 0.2041015625, "learning_rate": 8.884871361875435e-05, "loss": 0.4812, "step": 178980 }, { "epoch": 8.889937419290751, "grad_norm": 0.177734375, "learning_rate": 8.88089798351048e-05, "loss": 0.4873, "step": 178990 }, { "epoch": 8.890434091586371, "grad_norm": 0.177734375, "learning_rate": 8.876924605145526e-05, "loss": 0.4781, "step": 179000 }, { "epoch": 8.890930763881991, "grad_norm": 0.1904296875, "learning_rate": 8.87295122678057e-05, "loss": 0.4674, "step": 179010 }, { "epoch": 8.89142743617761, "grad_norm": 0.1904296875, "learning_rate": 8.868977848415616e-05, "loss": 0.4771, "step": 179020 }, { "epoch": 8.89192410847323, "grad_norm": 0.1953125, "learning_rate": 8.86500447005066e-05, "loss": 0.4913, "step": 179030 }, { "epoch": 8.892420780768848, "grad_norm": 0.1962890625, "learning_rate": 8.861031091685706e-05, "loss": 0.5114, "step": 179040 }, { "epoch": 8.892917453064468, "grad_norm": 0.1943359375, "learning_rate": 8.857057713320752e-05, "loss": 0.4954, "step": 179050 }, { "epoch": 8.893414125360087, "grad_norm": 0.17578125, "learning_rate": 8.853084334955796e-05, "loss": 0.5121, "step": 179060 }, { "epoch": 8.893910797655707, "grad_norm": 0.1787109375, "learning_rate": 8.849110956590842e-05, "loss": 0.5159, "step": 179070 }, { "epoch": 8.894407469951327, "grad_norm": 0.181640625, "learning_rate": 8.845137578225888e-05, "loss": 0.4863, "step": 179080 }, { "epoch": 8.894904142246945, "grad_norm": 0.1875, "learning_rate": 8.841164199860932e-05, "loss": 0.4863, "step": 179090 }, { "epoch": 8.895400814542565, "grad_norm": 0.177734375, "learning_rate": 8.837190821495978e-05, "loss": 0.516, "step": 179100 }, { "epoch": 8.895897486838184, "grad_norm": 0.1767578125, "learning_rate": 8.833217443131022e-05, "loss": 0.5159, "step": 179110 }, { "epoch": 8.896394159133804, "grad_norm": 0.201171875, "learning_rate": 8.829244064766068e-05, "loss": 0.4884, "step": 179120 }, { "epoch": 8.896890831429422, "grad_norm": 0.173828125, "learning_rate": 8.825270686401113e-05, "loss": 0.5105, "step": 179130 }, { "epoch": 8.897387503725042, "grad_norm": 0.1728515625, "learning_rate": 8.821297308036158e-05, "loss": 0.5287, "step": 179140 }, { "epoch": 8.897884176020662, "grad_norm": 0.1748046875, "learning_rate": 8.817323929671203e-05, "loss": 0.4891, "step": 179150 }, { "epoch": 8.89838084831628, "grad_norm": 0.1787109375, "learning_rate": 8.813350551306249e-05, "loss": 0.488, "step": 179160 }, { "epoch": 8.8988775206119, "grad_norm": 0.2099609375, "learning_rate": 8.809377172941293e-05, "loss": 0.4921, "step": 179170 }, { "epoch": 8.89937419290752, "grad_norm": 0.1962890625, "learning_rate": 8.805403794576339e-05, "loss": 0.5335, "step": 179180 }, { "epoch": 8.89987086520314, "grad_norm": 0.1796875, "learning_rate": 8.801430416211383e-05, "loss": 0.5144, "step": 179190 }, { "epoch": 8.900367537498758, "grad_norm": 0.17578125, "learning_rate": 8.797457037846429e-05, "loss": 0.5105, "step": 179200 }, { "epoch": 8.900864209794378, "grad_norm": 0.189453125, "learning_rate": 8.793483659481473e-05, "loss": 0.4774, "step": 179210 }, { "epoch": 8.901360882089996, "grad_norm": 0.2021484375, "learning_rate": 8.789510281116519e-05, "loss": 0.5074, "step": 179220 }, { "epoch": 8.901857554385616, "grad_norm": 0.1669921875, "learning_rate": 8.785536902751565e-05, "loss": 0.4918, "step": 179230 }, { "epoch": 8.902354226681236, "grad_norm": 0.177734375, "learning_rate": 8.781563524386611e-05, "loss": 0.4707, "step": 179240 }, { "epoch": 8.902850898976855, "grad_norm": 0.1982421875, "learning_rate": 8.777590146021657e-05, "loss": 0.4949, "step": 179250 }, { "epoch": 8.903347571272475, "grad_norm": 0.2138671875, "learning_rate": 8.773616767656701e-05, "loss": 0.4835, "step": 179260 }, { "epoch": 8.903844243568093, "grad_norm": 0.1962890625, "learning_rate": 8.769643389291747e-05, "loss": 0.4933, "step": 179270 }, { "epoch": 8.904340915863713, "grad_norm": 0.177734375, "learning_rate": 8.765670010926791e-05, "loss": 0.4601, "step": 179280 }, { "epoch": 8.904837588159332, "grad_norm": 0.17578125, "learning_rate": 8.761696632561837e-05, "loss": 0.4871, "step": 179290 }, { "epoch": 8.905334260454952, "grad_norm": 0.1826171875, "learning_rate": 8.757723254196881e-05, "loss": 0.4939, "step": 179300 }, { "epoch": 8.905830932750572, "grad_norm": 0.1904296875, "learning_rate": 8.753749875831927e-05, "loss": 0.4796, "step": 179310 }, { "epoch": 8.90632760504619, "grad_norm": 0.2001953125, "learning_rate": 8.749776497466972e-05, "loss": 0.4798, "step": 179320 }, { "epoch": 8.90682427734181, "grad_norm": 0.20703125, "learning_rate": 8.745803119102017e-05, "loss": 0.4824, "step": 179330 }, { "epoch": 8.907320949637429, "grad_norm": 0.166015625, "learning_rate": 8.741829740737062e-05, "loss": 0.5187, "step": 179340 }, { "epoch": 8.907817621933049, "grad_norm": 0.1748046875, "learning_rate": 8.737856362372108e-05, "loss": 0.5074, "step": 179350 }, { "epoch": 8.908314294228667, "grad_norm": 0.18359375, "learning_rate": 8.733882984007152e-05, "loss": 0.4614, "step": 179360 }, { "epoch": 8.908810966524287, "grad_norm": 0.1796875, "learning_rate": 8.729909605642198e-05, "loss": 0.4654, "step": 179370 }, { "epoch": 8.909307638819907, "grad_norm": 0.2021484375, "learning_rate": 8.725936227277242e-05, "loss": 0.4884, "step": 179380 }, { "epoch": 8.909804311115526, "grad_norm": 0.2314453125, "learning_rate": 8.721962848912288e-05, "loss": 0.5112, "step": 179390 }, { "epoch": 8.910300983411146, "grad_norm": 0.1943359375, "learning_rate": 8.717989470547334e-05, "loss": 0.4858, "step": 179400 }, { "epoch": 8.910797655706764, "grad_norm": 0.189453125, "learning_rate": 8.714016092182378e-05, "loss": 0.5039, "step": 179410 }, { "epoch": 8.911294328002384, "grad_norm": 0.1748046875, "learning_rate": 8.710042713817424e-05, "loss": 0.4878, "step": 179420 }, { "epoch": 8.911791000298003, "grad_norm": 0.2001953125, "learning_rate": 8.70606933545247e-05, "loss": 0.4789, "step": 179430 }, { "epoch": 8.912287672593623, "grad_norm": 0.18359375, "learning_rate": 8.702095957087514e-05, "loss": 0.4978, "step": 179440 }, { "epoch": 8.912784344889243, "grad_norm": 0.1806640625, "learning_rate": 8.69812257872256e-05, "loss": 0.4867, "step": 179450 }, { "epoch": 8.913281017184861, "grad_norm": 0.1884765625, "learning_rate": 8.694149200357604e-05, "loss": 0.4864, "step": 179460 }, { "epoch": 8.913777689480481, "grad_norm": 0.1826171875, "learning_rate": 8.69017582199265e-05, "loss": 0.489, "step": 179470 }, { "epoch": 8.9142743617761, "grad_norm": 0.19921875, "learning_rate": 8.686202443627695e-05, "loss": 0.487, "step": 179480 }, { "epoch": 8.91477103407172, "grad_norm": 0.2001953125, "learning_rate": 8.68222906526274e-05, "loss": 0.4775, "step": 179490 }, { "epoch": 8.915267706367338, "grad_norm": 0.197265625, "learning_rate": 8.678255686897785e-05, "loss": 0.4653, "step": 179500 }, { "epoch": 8.915764378662958, "grad_norm": 0.1767578125, "learning_rate": 8.67428230853283e-05, "loss": 0.4398, "step": 179510 }, { "epoch": 8.916261050958578, "grad_norm": 0.1845703125, "learning_rate": 8.670308930167875e-05, "loss": 0.507, "step": 179520 }, { "epoch": 8.916757723254197, "grad_norm": 0.189453125, "learning_rate": 8.666335551802921e-05, "loss": 0.4761, "step": 179530 }, { "epoch": 8.917254395549817, "grad_norm": 0.19921875, "learning_rate": 8.662362173437965e-05, "loss": 0.4468, "step": 179540 }, { "epoch": 8.917751067845435, "grad_norm": 0.1787109375, "learning_rate": 8.658388795073011e-05, "loss": 0.4619, "step": 179550 }, { "epoch": 8.918247740141055, "grad_norm": 0.203125, "learning_rate": 8.654415416708055e-05, "loss": 0.4887, "step": 179560 }, { "epoch": 8.918744412436673, "grad_norm": 0.1904296875, "learning_rate": 8.650442038343101e-05, "loss": 0.4776, "step": 179570 }, { "epoch": 8.919241084732294, "grad_norm": 0.2021484375, "learning_rate": 8.646468659978147e-05, "loss": 0.5162, "step": 179580 }, { "epoch": 8.919737757027914, "grad_norm": 0.201171875, "learning_rate": 8.642495281613193e-05, "loss": 0.4993, "step": 179590 }, { "epoch": 8.920234429323532, "grad_norm": 0.1953125, "learning_rate": 8.638521903248237e-05, "loss": 0.4697, "step": 179600 }, { "epoch": 8.920731101619152, "grad_norm": 0.2119140625, "learning_rate": 8.634548524883283e-05, "loss": 0.4912, "step": 179610 }, { "epoch": 8.92122777391477, "grad_norm": 0.1865234375, "learning_rate": 8.630575146518327e-05, "loss": 0.4815, "step": 179620 }, { "epoch": 8.92172444621039, "grad_norm": 0.193359375, "learning_rate": 8.626601768153373e-05, "loss": 0.4812, "step": 179630 }, { "epoch": 8.922221118506009, "grad_norm": 0.1708984375, "learning_rate": 8.622628389788418e-05, "loss": 0.4815, "step": 179640 }, { "epoch": 8.922717790801629, "grad_norm": 0.1875, "learning_rate": 8.618655011423463e-05, "loss": 0.5083, "step": 179650 }, { "epoch": 8.92321446309725, "grad_norm": 0.20703125, "learning_rate": 8.614681633058509e-05, "loss": 0.4818, "step": 179660 }, { "epoch": 8.923711135392868, "grad_norm": 0.1767578125, "learning_rate": 8.610708254693554e-05, "loss": 0.4998, "step": 179670 }, { "epoch": 8.924207807688488, "grad_norm": 0.177734375, "learning_rate": 8.606734876328599e-05, "loss": 0.48, "step": 179680 }, { "epoch": 8.924704479984106, "grad_norm": 0.1865234375, "learning_rate": 8.602761497963644e-05, "loss": 0.503, "step": 179690 }, { "epoch": 8.925201152279726, "grad_norm": 0.1787109375, "learning_rate": 8.59878811959869e-05, "loss": 0.4443, "step": 179700 }, { "epoch": 8.925697824575344, "grad_norm": 0.177734375, "learning_rate": 8.594814741233734e-05, "loss": 0.4841, "step": 179710 }, { "epoch": 8.926194496870965, "grad_norm": 0.1826171875, "learning_rate": 8.59084136286878e-05, "loss": 0.5048, "step": 179720 }, { "epoch": 8.926691169166585, "grad_norm": 0.201171875, "learning_rate": 8.586867984503824e-05, "loss": 0.5001, "step": 179730 }, { "epoch": 8.927187841462203, "grad_norm": 0.1708984375, "learning_rate": 8.58289460613887e-05, "loss": 0.4688, "step": 179740 }, { "epoch": 8.927684513757823, "grad_norm": 0.1884765625, "learning_rate": 8.578921227773916e-05, "loss": 0.4602, "step": 179750 }, { "epoch": 8.928181186053441, "grad_norm": 0.236328125, "learning_rate": 8.57494784940896e-05, "loss": 0.505, "step": 179760 }, { "epoch": 8.928677858349062, "grad_norm": 0.1708984375, "learning_rate": 8.570974471044006e-05, "loss": 0.4495, "step": 179770 }, { "epoch": 8.92917453064468, "grad_norm": 0.1962890625, "learning_rate": 8.567001092679052e-05, "loss": 0.4841, "step": 179780 }, { "epoch": 8.9296712029403, "grad_norm": 0.1806640625, "learning_rate": 8.563027714314096e-05, "loss": 0.4862, "step": 179790 }, { "epoch": 8.93016787523592, "grad_norm": 0.181640625, "learning_rate": 8.559054335949142e-05, "loss": 0.5101, "step": 179800 }, { "epoch": 8.930664547531538, "grad_norm": 0.2216796875, "learning_rate": 8.555080957584186e-05, "loss": 0.4651, "step": 179810 }, { "epoch": 8.931161219827159, "grad_norm": 0.20703125, "learning_rate": 8.551107579219232e-05, "loss": 0.4696, "step": 179820 }, { "epoch": 8.931657892122777, "grad_norm": 0.1982421875, "learning_rate": 8.547134200854277e-05, "loss": 0.5091, "step": 179830 }, { "epoch": 8.932154564418397, "grad_norm": 0.19140625, "learning_rate": 8.543160822489322e-05, "loss": 0.4927, "step": 179840 }, { "epoch": 8.932651236714015, "grad_norm": 0.1806640625, "learning_rate": 8.539187444124367e-05, "loss": 0.4911, "step": 179850 }, { "epoch": 8.933147909009636, "grad_norm": 0.1943359375, "learning_rate": 8.535214065759413e-05, "loss": 0.52, "step": 179860 }, { "epoch": 8.933644581305256, "grad_norm": 0.2001953125, "learning_rate": 8.531240687394457e-05, "loss": 0.4781, "step": 179870 }, { "epoch": 8.934141253600874, "grad_norm": 0.1845703125, "learning_rate": 8.527267309029503e-05, "loss": 0.4909, "step": 179880 }, { "epoch": 8.934637925896494, "grad_norm": 0.205078125, "learning_rate": 8.523293930664547e-05, "loss": 0.4733, "step": 179890 }, { "epoch": 8.935134598192112, "grad_norm": 0.203125, "learning_rate": 8.519320552299593e-05, "loss": 0.499, "step": 179900 }, { "epoch": 8.935631270487733, "grad_norm": 0.1708984375, "learning_rate": 8.515347173934637e-05, "loss": 0.4652, "step": 179910 }, { "epoch": 8.93612794278335, "grad_norm": 0.1875, "learning_rate": 8.511373795569683e-05, "loss": 0.4774, "step": 179920 }, { "epoch": 8.936624615078971, "grad_norm": 0.2236328125, "learning_rate": 8.507400417204729e-05, "loss": 0.5377, "step": 179930 }, { "epoch": 8.937121287374591, "grad_norm": 0.1845703125, "learning_rate": 8.503427038839775e-05, "loss": 0.4984, "step": 179940 }, { "epoch": 8.93761795967021, "grad_norm": 0.193359375, "learning_rate": 8.499453660474819e-05, "loss": 0.4954, "step": 179950 }, { "epoch": 8.93811463196583, "grad_norm": 0.1767578125, "learning_rate": 8.495480282109865e-05, "loss": 0.4871, "step": 179960 }, { "epoch": 8.938611304261448, "grad_norm": 0.185546875, "learning_rate": 8.49150690374491e-05, "loss": 0.4923, "step": 179970 }, { "epoch": 8.939107976557068, "grad_norm": 0.232421875, "learning_rate": 8.487533525379955e-05, "loss": 0.4952, "step": 179980 }, { "epoch": 8.939604648852686, "grad_norm": 0.193359375, "learning_rate": 8.483560147015e-05, "loss": 0.5052, "step": 179990 }, { "epoch": 8.940101321148306, "grad_norm": 0.1865234375, "learning_rate": 8.479586768650045e-05, "loss": 0.4791, "step": 180000 }, { "epoch": 8.940597993443927, "grad_norm": 0.1875, "learning_rate": 8.47561339028509e-05, "loss": 0.4606, "step": 180010 }, { "epoch": 8.941094665739545, "grad_norm": 0.1865234375, "learning_rate": 8.471640011920136e-05, "loss": 0.4786, "step": 180020 }, { "epoch": 8.941591338035165, "grad_norm": 0.2060546875, "learning_rate": 8.46766663355518e-05, "loss": 0.468, "step": 180030 }, { "epoch": 8.942088010330783, "grad_norm": 0.1796875, "learning_rate": 8.463693255190226e-05, "loss": 0.4834, "step": 180040 }, { "epoch": 8.942584682626403, "grad_norm": 0.2021484375, "learning_rate": 8.45971987682527e-05, "loss": 0.4883, "step": 180050 }, { "epoch": 8.943081354922022, "grad_norm": 0.19140625, "learning_rate": 8.455746498460316e-05, "loss": 0.4961, "step": 180060 }, { "epoch": 8.943578027217642, "grad_norm": 0.1640625, "learning_rate": 8.451773120095362e-05, "loss": 0.4694, "step": 180070 }, { "epoch": 8.944074699513262, "grad_norm": 0.1865234375, "learning_rate": 8.447799741730406e-05, "loss": 0.4684, "step": 180080 }, { "epoch": 8.94457137180888, "grad_norm": 0.2158203125, "learning_rate": 8.443826363365452e-05, "loss": 0.4963, "step": 180090 }, { "epoch": 8.9450680441045, "grad_norm": 0.2158203125, "learning_rate": 8.439852985000498e-05, "loss": 0.482, "step": 180100 }, { "epoch": 8.945564716400119, "grad_norm": 0.20703125, "learning_rate": 8.435879606635542e-05, "loss": 0.5205, "step": 180110 }, { "epoch": 8.946061388695739, "grad_norm": 0.1787109375, "learning_rate": 8.431906228270588e-05, "loss": 0.4627, "step": 180120 }, { "epoch": 8.946558060991357, "grad_norm": 0.197265625, "learning_rate": 8.427932849905634e-05, "loss": 0.4897, "step": 180130 }, { "epoch": 8.947054733286977, "grad_norm": 0.1826171875, "learning_rate": 8.423959471540678e-05, "loss": 0.4652, "step": 180140 }, { "epoch": 8.947551405582598, "grad_norm": 0.201171875, "learning_rate": 8.419986093175724e-05, "loss": 0.5093, "step": 180150 }, { "epoch": 8.948048077878216, "grad_norm": 0.18359375, "learning_rate": 8.416012714810768e-05, "loss": 0.4991, "step": 180160 }, { "epoch": 8.948544750173836, "grad_norm": 0.1669921875, "learning_rate": 8.412039336445814e-05, "loss": 0.4651, "step": 180170 }, { "epoch": 8.949041422469454, "grad_norm": 0.1767578125, "learning_rate": 8.408065958080859e-05, "loss": 0.4559, "step": 180180 }, { "epoch": 8.949538094765074, "grad_norm": 0.193359375, "learning_rate": 8.404092579715904e-05, "loss": 0.5146, "step": 180190 }, { "epoch": 8.950034767060693, "grad_norm": 0.1953125, "learning_rate": 8.400119201350949e-05, "loss": 0.495, "step": 180200 }, { "epoch": 8.950531439356313, "grad_norm": 0.19921875, "learning_rate": 8.396145822985994e-05, "loss": 0.4924, "step": 180210 }, { "epoch": 8.951028111651931, "grad_norm": 0.1826171875, "learning_rate": 8.392172444621039e-05, "loss": 0.4889, "step": 180220 }, { "epoch": 8.951524783947551, "grad_norm": 0.18359375, "learning_rate": 8.388199066256085e-05, "loss": 0.4817, "step": 180230 }, { "epoch": 8.952021456243171, "grad_norm": 0.1796875, "learning_rate": 8.384225687891129e-05, "loss": 0.4716, "step": 180240 }, { "epoch": 8.95251812853879, "grad_norm": 0.2138671875, "learning_rate": 8.380252309526175e-05, "loss": 0.501, "step": 180250 }, { "epoch": 8.95301480083441, "grad_norm": 0.2021484375, "learning_rate": 8.37627893116122e-05, "loss": 0.4933, "step": 180260 }, { "epoch": 8.953511473130028, "grad_norm": 0.1845703125, "learning_rate": 8.372305552796265e-05, "loss": 0.4752, "step": 180270 }, { "epoch": 8.954008145425648, "grad_norm": 0.18359375, "learning_rate": 8.368332174431311e-05, "loss": 0.4896, "step": 180280 }, { "epoch": 8.954504817721267, "grad_norm": 0.220703125, "learning_rate": 8.364358796066357e-05, "loss": 0.4925, "step": 180290 }, { "epoch": 8.955001490016887, "grad_norm": 0.185546875, "learning_rate": 8.360385417701401e-05, "loss": 0.5158, "step": 180300 }, { "epoch": 8.955498162312507, "grad_norm": 0.2138671875, "learning_rate": 8.356412039336447e-05, "loss": 0.4804, "step": 180310 }, { "epoch": 8.955994834608125, "grad_norm": 0.1943359375, "learning_rate": 8.352438660971491e-05, "loss": 0.4729, "step": 180320 }, { "epoch": 8.956491506903745, "grad_norm": 0.201171875, "learning_rate": 8.348465282606537e-05, "loss": 0.5196, "step": 180330 }, { "epoch": 8.956988179199364, "grad_norm": 0.1748046875, "learning_rate": 8.344491904241582e-05, "loss": 0.4813, "step": 180340 }, { "epoch": 8.957484851494984, "grad_norm": 0.2197265625, "learning_rate": 8.340518525876627e-05, "loss": 0.4854, "step": 180350 }, { "epoch": 8.957981523790602, "grad_norm": 0.1787109375, "learning_rate": 8.336545147511672e-05, "loss": 0.4981, "step": 180360 }, { "epoch": 8.958478196086222, "grad_norm": 0.1953125, "learning_rate": 8.332571769146717e-05, "loss": 0.5089, "step": 180370 }, { "epoch": 8.958974868381842, "grad_norm": 0.2080078125, "learning_rate": 8.328598390781762e-05, "loss": 0.4921, "step": 180380 }, { "epoch": 8.95947154067746, "grad_norm": 0.1767578125, "learning_rate": 8.324625012416808e-05, "loss": 0.534, "step": 180390 }, { "epoch": 8.95996821297308, "grad_norm": 0.2001953125, "learning_rate": 8.320651634051852e-05, "loss": 0.5196, "step": 180400 }, { "epoch": 8.9604648852687, "grad_norm": 0.177734375, "learning_rate": 8.316678255686898e-05, "loss": 0.4891, "step": 180410 }, { "epoch": 8.96096155756432, "grad_norm": 0.2236328125, "learning_rate": 8.312704877321942e-05, "loss": 0.5117, "step": 180420 }, { "epoch": 8.961458229859938, "grad_norm": 0.1875, "learning_rate": 8.308731498956988e-05, "loss": 0.4737, "step": 180430 }, { "epoch": 8.961954902155558, "grad_norm": 0.1845703125, "learning_rate": 8.304758120592034e-05, "loss": 0.4806, "step": 180440 }, { "epoch": 8.962451574451178, "grad_norm": 0.1845703125, "learning_rate": 8.30078474222708e-05, "loss": 0.5053, "step": 180450 }, { "epoch": 8.962948246746796, "grad_norm": 0.19140625, "learning_rate": 8.296811363862124e-05, "loss": 0.5095, "step": 180460 }, { "epoch": 8.963444919042416, "grad_norm": 0.1708984375, "learning_rate": 8.29283798549717e-05, "loss": 0.4786, "step": 180470 }, { "epoch": 8.963941591338035, "grad_norm": 0.1865234375, "learning_rate": 8.288864607132214e-05, "loss": 0.5075, "step": 180480 }, { "epoch": 8.964438263633655, "grad_norm": 0.193359375, "learning_rate": 8.28489122876726e-05, "loss": 0.4938, "step": 180490 }, { "epoch": 8.964934935929273, "grad_norm": 0.1943359375, "learning_rate": 8.280917850402306e-05, "loss": 0.5019, "step": 180500 }, { "epoch": 8.965431608224893, "grad_norm": 0.1845703125, "learning_rate": 8.27694447203735e-05, "loss": 0.475, "step": 180510 }, { "epoch": 8.965928280520513, "grad_norm": 0.19921875, "learning_rate": 8.272971093672396e-05, "loss": 0.4328, "step": 180520 }, { "epoch": 8.966424952816132, "grad_norm": 0.1875, "learning_rate": 8.26899771530744e-05, "loss": 0.5268, "step": 180530 }, { "epoch": 8.966921625111752, "grad_norm": 0.185546875, "learning_rate": 8.265024336942486e-05, "loss": 0.4776, "step": 180540 }, { "epoch": 8.96741829740737, "grad_norm": 0.1826171875, "learning_rate": 8.261050958577531e-05, "loss": 0.4855, "step": 180550 }, { "epoch": 8.96791496970299, "grad_norm": 0.1826171875, "learning_rate": 8.257077580212576e-05, "loss": 0.5095, "step": 180560 }, { "epoch": 8.968411641998609, "grad_norm": 0.1943359375, "learning_rate": 8.253104201847621e-05, "loss": 0.4727, "step": 180570 }, { "epoch": 8.968908314294229, "grad_norm": 0.1865234375, "learning_rate": 8.249130823482667e-05, "loss": 0.5032, "step": 180580 }, { "epoch": 8.969404986589847, "grad_norm": 0.201171875, "learning_rate": 8.245157445117711e-05, "loss": 0.4954, "step": 180590 }, { "epoch": 8.969901658885467, "grad_norm": 0.1904296875, "learning_rate": 8.241184066752757e-05, "loss": 0.4854, "step": 180600 }, { "epoch": 8.970398331181087, "grad_norm": 0.1953125, "learning_rate": 8.237210688387801e-05, "loss": 0.495, "step": 180610 }, { "epoch": 8.970895003476706, "grad_norm": 0.203125, "learning_rate": 8.233237310022847e-05, "loss": 0.4903, "step": 180620 }, { "epoch": 8.971391675772326, "grad_norm": 0.1923828125, "learning_rate": 8.229263931657893e-05, "loss": 0.4724, "step": 180630 }, { "epoch": 8.971888348067944, "grad_norm": 0.19140625, "learning_rate": 8.225290553292939e-05, "loss": 0.5086, "step": 180640 }, { "epoch": 8.972385020363564, "grad_norm": 0.2333984375, "learning_rate": 8.221317174927983e-05, "loss": 0.496, "step": 180650 }, { "epoch": 8.972881692659183, "grad_norm": 0.1904296875, "learning_rate": 8.217343796563029e-05, "loss": 0.4937, "step": 180660 }, { "epoch": 8.973378364954803, "grad_norm": 0.232421875, "learning_rate": 8.213370418198073e-05, "loss": 0.4648, "step": 180670 }, { "epoch": 8.973875037250423, "grad_norm": 0.2001953125, "learning_rate": 8.209397039833119e-05, "loss": 0.4768, "step": 180680 }, { "epoch": 8.974371709546041, "grad_norm": 0.1884765625, "learning_rate": 8.205423661468163e-05, "loss": 0.4783, "step": 180690 }, { "epoch": 8.974868381841661, "grad_norm": 0.2236328125, "learning_rate": 8.201450283103209e-05, "loss": 0.4717, "step": 180700 }, { "epoch": 8.97536505413728, "grad_norm": 0.244140625, "learning_rate": 8.197476904738254e-05, "loss": 0.4951, "step": 180710 }, { "epoch": 8.9758617264329, "grad_norm": 0.1669921875, "learning_rate": 8.1935035263733e-05, "loss": 0.4896, "step": 180720 }, { "epoch": 8.976358398728518, "grad_norm": 0.1904296875, "learning_rate": 8.189530148008344e-05, "loss": 0.4844, "step": 180730 }, { "epoch": 8.976855071024138, "grad_norm": 0.2138671875, "learning_rate": 8.18555676964339e-05, "loss": 0.4644, "step": 180740 }, { "epoch": 8.977351743319758, "grad_norm": 0.1943359375, "learning_rate": 8.181583391278434e-05, "loss": 0.4718, "step": 180750 }, { "epoch": 8.977848415615377, "grad_norm": 0.1826171875, "learning_rate": 8.17761001291348e-05, "loss": 0.4921, "step": 180760 }, { "epoch": 8.978345087910997, "grad_norm": 0.1669921875, "learning_rate": 8.173636634548524e-05, "loss": 0.467, "step": 180770 }, { "epoch": 8.978841760206615, "grad_norm": 0.181640625, "learning_rate": 8.16966325618357e-05, "loss": 0.5066, "step": 180780 }, { "epoch": 8.979338432502235, "grad_norm": 0.18359375, "learning_rate": 8.165689877818616e-05, "loss": 0.5071, "step": 180790 }, { "epoch": 8.979835104797854, "grad_norm": 0.2041015625, "learning_rate": 8.161716499453662e-05, "loss": 0.4895, "step": 180800 }, { "epoch": 8.980331777093474, "grad_norm": 0.1826171875, "learning_rate": 8.157743121088706e-05, "loss": 0.5055, "step": 180810 }, { "epoch": 8.980828449389094, "grad_norm": 0.1865234375, "learning_rate": 8.153769742723752e-05, "loss": 0.4669, "step": 180820 }, { "epoch": 8.981325121684712, "grad_norm": 0.2236328125, "learning_rate": 8.149796364358796e-05, "loss": 0.5243, "step": 180830 }, { "epoch": 8.981821793980332, "grad_norm": 0.189453125, "learning_rate": 8.145822985993842e-05, "loss": 0.4916, "step": 180840 }, { "epoch": 8.98231846627595, "grad_norm": 0.1669921875, "learning_rate": 8.141849607628886e-05, "loss": 0.4769, "step": 180850 }, { "epoch": 8.98281513857157, "grad_norm": 0.1845703125, "learning_rate": 8.137876229263932e-05, "loss": 0.466, "step": 180860 }, { "epoch": 8.983311810867189, "grad_norm": 0.22265625, "learning_rate": 8.133902850898977e-05, "loss": 0.524, "step": 180870 }, { "epoch": 8.983808483162809, "grad_norm": 0.193359375, "learning_rate": 8.129929472534022e-05, "loss": 0.5201, "step": 180880 }, { "epoch": 8.98430515545843, "grad_norm": 0.18359375, "learning_rate": 8.125956094169067e-05, "loss": 0.5019, "step": 180890 }, { "epoch": 8.984801827754048, "grad_norm": 0.173828125, "learning_rate": 8.121982715804113e-05, "loss": 0.4948, "step": 180900 }, { "epoch": 8.985298500049668, "grad_norm": 0.19921875, "learning_rate": 8.118009337439157e-05, "loss": 0.5, "step": 180910 }, { "epoch": 8.985795172345286, "grad_norm": 0.1865234375, "learning_rate": 8.114035959074203e-05, "loss": 0.4749, "step": 180920 }, { "epoch": 8.986291844640906, "grad_norm": 0.201171875, "learning_rate": 8.110062580709249e-05, "loss": 0.47, "step": 180930 }, { "epoch": 8.986788516936524, "grad_norm": 0.197265625, "learning_rate": 8.106089202344293e-05, "loss": 0.4902, "step": 180940 }, { "epoch": 8.987285189232145, "grad_norm": 0.19140625, "learning_rate": 8.102115823979339e-05, "loss": 0.5165, "step": 180950 }, { "epoch": 8.987781861527765, "grad_norm": 0.1865234375, "learning_rate": 8.098142445614383e-05, "loss": 0.4855, "step": 180960 }, { "epoch": 8.988278533823383, "grad_norm": 0.2109375, "learning_rate": 8.094169067249429e-05, "loss": 0.5052, "step": 180970 }, { "epoch": 8.988775206119003, "grad_norm": 0.2021484375, "learning_rate": 8.090195688884475e-05, "loss": 0.4728, "step": 180980 }, { "epoch": 8.989271878414621, "grad_norm": 0.1904296875, "learning_rate": 8.08622231051952e-05, "loss": 0.483, "step": 180990 }, { "epoch": 8.989768550710242, "grad_norm": 0.171875, "learning_rate": 8.082248932154565e-05, "loss": 0.4724, "step": 181000 }, { "epoch": 8.99026522300586, "grad_norm": 0.1806640625, "learning_rate": 8.078275553789611e-05, "loss": 0.4849, "step": 181010 }, { "epoch": 8.99076189530148, "grad_norm": 0.228515625, "learning_rate": 8.074302175424655e-05, "loss": 0.486, "step": 181020 }, { "epoch": 8.9912585675971, "grad_norm": 0.171875, "learning_rate": 8.070328797059701e-05, "loss": 0.5072, "step": 181030 }, { "epoch": 8.991755239892719, "grad_norm": 0.205078125, "learning_rate": 8.066355418694745e-05, "loss": 0.525, "step": 181040 }, { "epoch": 8.992251912188339, "grad_norm": 0.1728515625, "learning_rate": 8.062382040329791e-05, "loss": 0.4892, "step": 181050 }, { "epoch": 8.992748584483957, "grad_norm": 0.177734375, "learning_rate": 8.058408661964836e-05, "loss": 0.4923, "step": 181060 }, { "epoch": 8.993245256779577, "grad_norm": 0.2060546875, "learning_rate": 8.054435283599881e-05, "loss": 0.4851, "step": 181070 }, { "epoch": 8.993741929075195, "grad_norm": 0.1708984375, "learning_rate": 8.050461905234926e-05, "loss": 0.4845, "step": 181080 }, { "epoch": 8.994238601370816, "grad_norm": 0.2021484375, "learning_rate": 8.046488526869972e-05, "loss": 0.4927, "step": 181090 }, { "epoch": 8.994735273666436, "grad_norm": 0.21484375, "learning_rate": 8.042515148505016e-05, "loss": 0.5307, "step": 181100 }, { "epoch": 8.995231945962054, "grad_norm": 0.18359375, "learning_rate": 8.038541770140062e-05, "loss": 0.4963, "step": 181110 }, { "epoch": 8.995728618257674, "grad_norm": 0.1845703125, "learning_rate": 8.034568391775106e-05, "loss": 0.5034, "step": 181120 }, { "epoch": 8.996225290553292, "grad_norm": 0.185546875, "learning_rate": 8.030595013410152e-05, "loss": 0.5045, "step": 181130 }, { "epoch": 8.996721962848913, "grad_norm": 0.1640625, "learning_rate": 8.026621635045198e-05, "loss": 0.4621, "step": 181140 }, { "epoch": 8.997218635144531, "grad_norm": 0.185546875, "learning_rate": 8.022648256680244e-05, "loss": 0.5041, "step": 181150 }, { "epoch": 8.997715307440151, "grad_norm": 0.193359375, "learning_rate": 8.018674878315288e-05, "loss": 0.477, "step": 181160 }, { "epoch": 8.998211979735771, "grad_norm": 0.2158203125, "learning_rate": 8.014701499950334e-05, "loss": 0.5047, "step": 181170 }, { "epoch": 8.99870865203139, "grad_norm": 0.181640625, "learning_rate": 8.010728121585378e-05, "loss": 0.5087, "step": 181180 }, { "epoch": 8.99920532432701, "grad_norm": 0.1875, "learning_rate": 8.006754743220424e-05, "loss": 0.495, "step": 181190 }, { "epoch": 8.999701996622628, "grad_norm": 0.197265625, "learning_rate": 8.002781364855468e-05, "loss": 0.4698, "step": 181200 }, { "epoch": 9.000198668918248, "grad_norm": 0.193359375, "learning_rate": 7.998807986490514e-05, "loss": 0.4873, "step": 181210 }, { "epoch": 9.000695341213866, "grad_norm": 0.19921875, "learning_rate": 7.994834608125559e-05, "loss": 0.4846, "step": 181220 }, { "epoch": 9.001192013509487, "grad_norm": 0.189453125, "learning_rate": 7.990861229760604e-05, "loss": 0.4822, "step": 181230 }, { "epoch": 9.001688685805107, "grad_norm": 0.2177734375, "learning_rate": 7.986887851395649e-05, "loss": 0.4589, "step": 181240 }, { "epoch": 9.002185358100725, "grad_norm": 0.228515625, "learning_rate": 7.982914473030695e-05, "loss": 0.5168, "step": 181250 }, { "epoch": 9.002682030396345, "grad_norm": 0.1767578125, "learning_rate": 7.978941094665739e-05, "loss": 0.5155, "step": 181260 }, { "epoch": 9.003178702691963, "grad_norm": 0.1884765625, "learning_rate": 7.974967716300785e-05, "loss": 0.4748, "step": 181270 }, { "epoch": 9.003675374987584, "grad_norm": 0.189453125, "learning_rate": 7.970994337935829e-05, "loss": 0.4775, "step": 181280 }, { "epoch": 9.004172047283202, "grad_norm": 0.236328125, "learning_rate": 7.967020959570875e-05, "loss": 0.4732, "step": 181290 }, { "epoch": 9.004668719578822, "grad_norm": 0.1923828125, "learning_rate": 7.963047581205921e-05, "loss": 0.4981, "step": 181300 }, { "epoch": 9.005165391874442, "grad_norm": 0.1865234375, "learning_rate": 7.959074202840965e-05, "loss": 0.4724, "step": 181310 }, { "epoch": 9.00566206417006, "grad_norm": 0.181640625, "learning_rate": 7.955100824476011e-05, "loss": 0.5017, "step": 181320 }, { "epoch": 9.00615873646568, "grad_norm": 0.193359375, "learning_rate": 7.951127446111057e-05, "loss": 0.4853, "step": 181330 }, { "epoch": 9.006655408761299, "grad_norm": 0.2109375, "learning_rate": 7.947154067746103e-05, "loss": 0.4691, "step": 181340 }, { "epoch": 9.007152081056919, "grad_norm": 0.181640625, "learning_rate": 7.943180689381147e-05, "loss": 0.5132, "step": 181350 }, { "epoch": 9.007648753352537, "grad_norm": 0.1884765625, "learning_rate": 7.939207311016193e-05, "loss": 0.4841, "step": 181360 }, { "epoch": 9.008145425648157, "grad_norm": 0.1787109375, "learning_rate": 7.935233932651237e-05, "loss": 0.4567, "step": 181370 }, { "epoch": 9.008642097943778, "grad_norm": 0.1845703125, "learning_rate": 7.931260554286283e-05, "loss": 0.4849, "step": 181380 }, { "epoch": 9.009138770239396, "grad_norm": 0.2255859375, "learning_rate": 7.927287175921327e-05, "loss": 0.4844, "step": 181390 }, { "epoch": 9.009635442535016, "grad_norm": 0.1845703125, "learning_rate": 7.923313797556373e-05, "loss": 0.4968, "step": 181400 }, { "epoch": 9.010132114830634, "grad_norm": 0.1806640625, "learning_rate": 7.919340419191418e-05, "loss": 0.4743, "step": 181410 }, { "epoch": 9.010628787126254, "grad_norm": 0.1923828125, "learning_rate": 7.915367040826463e-05, "loss": 0.512, "step": 181420 }, { "epoch": 9.011125459421873, "grad_norm": 0.1943359375, "learning_rate": 7.911393662461508e-05, "loss": 0.4897, "step": 181430 }, { "epoch": 9.011622131717493, "grad_norm": 0.18359375, "learning_rate": 7.907420284096554e-05, "loss": 0.4648, "step": 181440 }, { "epoch": 9.012118804013111, "grad_norm": 0.1767578125, "learning_rate": 7.903446905731598e-05, "loss": 0.5017, "step": 181450 }, { "epoch": 9.012615476308731, "grad_norm": 0.18359375, "learning_rate": 7.899473527366644e-05, "loss": 0.4461, "step": 181460 }, { "epoch": 9.013112148604352, "grad_norm": 0.2109375, "learning_rate": 7.895500149001688e-05, "loss": 0.5197, "step": 181470 }, { "epoch": 9.01360882089997, "grad_norm": 0.19921875, "learning_rate": 7.891526770636734e-05, "loss": 0.5218, "step": 181480 }, { "epoch": 9.01410549319559, "grad_norm": 0.1767578125, "learning_rate": 7.88755339227178e-05, "loss": 0.4734, "step": 181490 }, { "epoch": 9.014602165491208, "grad_norm": 0.1943359375, "learning_rate": 7.883580013906826e-05, "loss": 0.4906, "step": 181500 }, { "epoch": 9.015098837786828, "grad_norm": 0.1923828125, "learning_rate": 7.87960663554187e-05, "loss": 0.4702, "step": 181510 }, { "epoch": 9.015595510082447, "grad_norm": 0.1796875, "learning_rate": 7.875633257176916e-05, "loss": 0.4654, "step": 181520 }, { "epoch": 9.016092182378067, "grad_norm": 0.1953125, "learning_rate": 7.87165987881196e-05, "loss": 0.4803, "step": 181530 }, { "epoch": 9.016588854673687, "grad_norm": 0.1923828125, "learning_rate": 7.867686500447006e-05, "loss": 0.4961, "step": 181540 }, { "epoch": 9.017085526969305, "grad_norm": 0.1904296875, "learning_rate": 7.86371312208205e-05, "loss": 0.4313, "step": 181550 }, { "epoch": 9.017582199264925, "grad_norm": 0.1845703125, "learning_rate": 7.859739743717096e-05, "loss": 0.4801, "step": 181560 }, { "epoch": 9.018078871560544, "grad_norm": 0.189453125, "learning_rate": 7.85576636535214e-05, "loss": 0.4918, "step": 181570 }, { "epoch": 9.018575543856164, "grad_norm": 0.1884765625, "learning_rate": 7.851792986987186e-05, "loss": 0.4472, "step": 181580 }, { "epoch": 9.019072216151782, "grad_norm": 0.189453125, "learning_rate": 7.847819608622231e-05, "loss": 0.455, "step": 181590 }, { "epoch": 9.019568888447402, "grad_norm": 0.173828125, "learning_rate": 7.843846230257277e-05, "loss": 0.5086, "step": 181600 }, { "epoch": 9.020065560743022, "grad_norm": 0.1806640625, "learning_rate": 7.839872851892321e-05, "loss": 0.4726, "step": 181610 }, { "epoch": 9.02056223303864, "grad_norm": 0.1845703125, "learning_rate": 7.835899473527367e-05, "loss": 0.4827, "step": 181620 }, { "epoch": 9.021058905334261, "grad_norm": 0.1875, "learning_rate": 7.831926095162411e-05, "loss": 0.5131, "step": 181630 }, { "epoch": 9.02155557762988, "grad_norm": 0.2216796875, "learning_rate": 7.827952716797457e-05, "loss": 0.4827, "step": 181640 }, { "epoch": 9.0220522499255, "grad_norm": 0.1767578125, "learning_rate": 7.823979338432503e-05, "loss": 0.4884, "step": 181650 }, { "epoch": 9.022548922221118, "grad_norm": 0.1728515625, "learning_rate": 7.820005960067547e-05, "loss": 0.482, "step": 181660 }, { "epoch": 9.023045594516738, "grad_norm": 0.220703125, "learning_rate": 7.816032581702593e-05, "loss": 0.5079, "step": 181670 }, { "epoch": 9.023542266812358, "grad_norm": 0.19140625, "learning_rate": 7.812059203337639e-05, "loss": 0.4843, "step": 181680 }, { "epoch": 9.024038939107976, "grad_norm": 0.181640625, "learning_rate": 7.808085824972683e-05, "loss": 0.4598, "step": 181690 }, { "epoch": 9.024535611403596, "grad_norm": 0.18359375, "learning_rate": 7.804112446607729e-05, "loss": 0.4772, "step": 181700 }, { "epoch": 9.025032283699215, "grad_norm": 0.201171875, "learning_rate": 7.800139068242773e-05, "loss": 0.5073, "step": 181710 }, { "epoch": 9.025528955994835, "grad_norm": 0.2001953125, "learning_rate": 7.796165689877819e-05, "loss": 0.4913, "step": 181720 }, { "epoch": 9.026025628290453, "grad_norm": 0.1962890625, "learning_rate": 7.792192311512864e-05, "loss": 0.4884, "step": 181730 }, { "epoch": 9.026522300586073, "grad_norm": 0.1953125, "learning_rate": 7.78821893314791e-05, "loss": 0.4637, "step": 181740 }, { "epoch": 9.027018972881693, "grad_norm": 0.2109375, "learning_rate": 7.784245554782954e-05, "loss": 0.4977, "step": 181750 }, { "epoch": 9.027515645177312, "grad_norm": 0.1787109375, "learning_rate": 7.780272176418e-05, "loss": 0.479, "step": 181760 }, { "epoch": 9.028012317472932, "grad_norm": 0.1982421875, "learning_rate": 7.776298798053045e-05, "loss": 0.5042, "step": 181770 }, { "epoch": 9.02850898976855, "grad_norm": 0.2021484375, "learning_rate": 7.77232541968809e-05, "loss": 0.5161, "step": 181780 }, { "epoch": 9.02900566206417, "grad_norm": 0.1748046875, "learning_rate": 7.768352041323136e-05, "loss": 0.4433, "step": 181790 }, { "epoch": 9.029502334359789, "grad_norm": 0.177734375, "learning_rate": 7.76437866295818e-05, "loss": 0.4592, "step": 181800 }, { "epoch": 9.029999006655409, "grad_norm": 0.224609375, "learning_rate": 7.760405284593226e-05, "loss": 0.4839, "step": 181810 }, { "epoch": 9.030495678951029, "grad_norm": 0.2099609375, "learning_rate": 7.75643190622827e-05, "loss": 0.5169, "step": 181820 }, { "epoch": 9.030992351246647, "grad_norm": 0.1982421875, "learning_rate": 7.752458527863316e-05, "loss": 0.5034, "step": 181830 }, { "epoch": 9.031489023542267, "grad_norm": 0.255859375, "learning_rate": 7.748485149498362e-05, "loss": 0.4805, "step": 181840 }, { "epoch": 9.031985695837886, "grad_norm": 0.1943359375, "learning_rate": 7.744511771133408e-05, "loss": 0.474, "step": 181850 }, { "epoch": 9.032482368133506, "grad_norm": 0.1806640625, "learning_rate": 7.740538392768452e-05, "loss": 0.4976, "step": 181860 }, { "epoch": 9.032979040429124, "grad_norm": 0.1728515625, "learning_rate": 7.736565014403498e-05, "loss": 0.4408, "step": 181870 }, { "epoch": 9.033475712724744, "grad_norm": 0.1796875, "learning_rate": 7.732591636038542e-05, "loss": 0.5109, "step": 181880 }, { "epoch": 9.033972385020364, "grad_norm": 0.2275390625, "learning_rate": 7.728618257673588e-05, "loss": 0.4535, "step": 181890 }, { "epoch": 9.034469057315983, "grad_norm": 0.185546875, "learning_rate": 7.724644879308632e-05, "loss": 0.4903, "step": 181900 }, { "epoch": 9.034965729611603, "grad_norm": 0.1826171875, "learning_rate": 7.720671500943678e-05, "loss": 0.5, "step": 181910 }, { "epoch": 9.035462401907221, "grad_norm": 0.2236328125, "learning_rate": 7.716698122578723e-05, "loss": 0.5069, "step": 181920 }, { "epoch": 9.035959074202841, "grad_norm": 0.1796875, "learning_rate": 7.712724744213768e-05, "loss": 0.4912, "step": 181930 }, { "epoch": 9.03645574649846, "grad_norm": 0.228515625, "learning_rate": 7.708751365848813e-05, "loss": 0.4778, "step": 181940 }, { "epoch": 9.03695241879408, "grad_norm": 0.1962890625, "learning_rate": 7.704777987483859e-05, "loss": 0.5301, "step": 181950 }, { "epoch": 9.0374490910897, "grad_norm": 0.1796875, "learning_rate": 7.700804609118903e-05, "loss": 0.4735, "step": 181960 }, { "epoch": 9.037945763385318, "grad_norm": 0.2021484375, "learning_rate": 7.696831230753949e-05, "loss": 0.4786, "step": 181970 }, { "epoch": 9.038442435680938, "grad_norm": 0.201171875, "learning_rate": 7.692857852388993e-05, "loss": 0.4826, "step": 181980 }, { "epoch": 9.038939107976557, "grad_norm": 0.1953125, "learning_rate": 7.688884474024039e-05, "loss": 0.4855, "step": 181990 }, { "epoch": 9.039435780272177, "grad_norm": 0.2236328125, "learning_rate": 7.684911095659085e-05, "loss": 0.4959, "step": 182000 }, { "epoch": 9.039932452567795, "grad_norm": 0.20703125, "learning_rate": 7.680937717294129e-05, "loss": 0.4911, "step": 182010 }, { "epoch": 9.040429124863415, "grad_norm": 0.2431640625, "learning_rate": 7.676964338929175e-05, "loss": 0.4749, "step": 182020 }, { "epoch": 9.040925797159035, "grad_norm": 0.177734375, "learning_rate": 7.672990960564221e-05, "loss": 0.5086, "step": 182030 }, { "epoch": 9.041422469454654, "grad_norm": 0.1904296875, "learning_rate": 7.669017582199265e-05, "loss": 0.4991, "step": 182040 }, { "epoch": 9.041919141750274, "grad_norm": 0.19140625, "learning_rate": 7.665044203834311e-05, "loss": 0.4976, "step": 182050 }, { "epoch": 9.042415814045892, "grad_norm": 0.171875, "learning_rate": 7.661070825469355e-05, "loss": 0.4819, "step": 182060 }, { "epoch": 9.042912486341512, "grad_norm": 0.2021484375, "learning_rate": 7.657097447104401e-05, "loss": 0.4784, "step": 182070 }, { "epoch": 9.04340915863713, "grad_norm": 0.1748046875, "learning_rate": 7.653124068739446e-05, "loss": 0.5272, "step": 182080 }, { "epoch": 9.04390583093275, "grad_norm": 0.20703125, "learning_rate": 7.649150690374491e-05, "loss": 0.4936, "step": 182090 }, { "epoch": 9.04440250322837, "grad_norm": 0.19140625, "learning_rate": 7.645177312009536e-05, "loss": 0.479, "step": 182100 }, { "epoch": 9.04489917552399, "grad_norm": 0.2138671875, "learning_rate": 7.641203933644582e-05, "loss": 0.4642, "step": 182110 }, { "epoch": 9.04539584781961, "grad_norm": 0.255859375, "learning_rate": 7.637230555279626e-05, "loss": 0.4864, "step": 182120 }, { "epoch": 9.045892520115228, "grad_norm": 0.1904296875, "learning_rate": 7.633257176914672e-05, "loss": 0.5005, "step": 182130 }, { "epoch": 9.046389192410848, "grad_norm": 0.1806640625, "learning_rate": 7.629283798549716e-05, "loss": 0.4765, "step": 182140 }, { "epoch": 9.046885864706466, "grad_norm": 0.193359375, "learning_rate": 7.625310420184762e-05, "loss": 0.4727, "step": 182150 }, { "epoch": 9.047382537002086, "grad_norm": 0.1875, "learning_rate": 7.621337041819806e-05, "loss": 0.4515, "step": 182160 }, { "epoch": 9.047879209297705, "grad_norm": 0.189453125, "learning_rate": 7.617363663454852e-05, "loss": 0.505, "step": 182170 }, { "epoch": 9.048375881593325, "grad_norm": 0.1943359375, "learning_rate": 7.613390285089898e-05, "loss": 0.4773, "step": 182180 }, { "epoch": 9.048872553888945, "grad_norm": 0.1904296875, "learning_rate": 7.609416906724944e-05, "loss": 0.4876, "step": 182190 }, { "epoch": 9.049369226184563, "grad_norm": 0.2109375, "learning_rate": 7.60544352835999e-05, "loss": 0.4807, "step": 182200 }, { "epoch": 9.049865898480183, "grad_norm": 0.193359375, "learning_rate": 7.601470149995034e-05, "loss": 0.5126, "step": 182210 }, { "epoch": 9.050362570775802, "grad_norm": 0.1923828125, "learning_rate": 7.59749677163008e-05, "loss": 0.5207, "step": 182220 }, { "epoch": 9.050859243071422, "grad_norm": 0.1943359375, "learning_rate": 7.593523393265124e-05, "loss": 0.4763, "step": 182230 }, { "epoch": 9.05135591536704, "grad_norm": 0.2216796875, "learning_rate": 7.58955001490017e-05, "loss": 0.483, "step": 182240 }, { "epoch": 9.05185258766266, "grad_norm": 0.1865234375, "learning_rate": 7.585576636535214e-05, "loss": 0.4933, "step": 182250 }, { "epoch": 9.05234925995828, "grad_norm": 0.19140625, "learning_rate": 7.58160325817026e-05, "loss": 0.4884, "step": 182260 }, { "epoch": 9.052845932253899, "grad_norm": 0.197265625, "learning_rate": 7.577629879805305e-05, "loss": 0.4849, "step": 182270 }, { "epoch": 9.053342604549519, "grad_norm": 0.1826171875, "learning_rate": 7.57365650144035e-05, "loss": 0.5134, "step": 182280 }, { "epoch": 9.053839276845137, "grad_norm": 0.2060546875, "learning_rate": 7.569683123075395e-05, "loss": 0.4907, "step": 182290 }, { "epoch": 9.054335949140757, "grad_norm": 0.19140625, "learning_rate": 7.56570974471044e-05, "loss": 0.4962, "step": 182300 }, { "epoch": 9.054832621436375, "grad_norm": 0.181640625, "learning_rate": 7.561736366345485e-05, "loss": 0.48, "step": 182310 }, { "epoch": 9.055329293731996, "grad_norm": 0.1875, "learning_rate": 7.557762987980531e-05, "loss": 0.4899, "step": 182320 }, { "epoch": 9.055825966027616, "grad_norm": 0.205078125, "learning_rate": 7.553789609615575e-05, "loss": 0.4762, "step": 182330 }, { "epoch": 9.056322638323234, "grad_norm": 0.220703125, "learning_rate": 7.549816231250621e-05, "loss": 0.4973, "step": 182340 }, { "epoch": 9.056819310618854, "grad_norm": 0.1826171875, "learning_rate": 7.545842852885667e-05, "loss": 0.4652, "step": 182350 }, { "epoch": 9.057315982914472, "grad_norm": 0.1728515625, "learning_rate": 7.541869474520713e-05, "loss": 0.4807, "step": 182360 }, { "epoch": 9.057812655210093, "grad_norm": 0.1787109375, "learning_rate": 7.537896096155757e-05, "loss": 0.4666, "step": 182370 }, { "epoch": 9.058309327505711, "grad_norm": 0.2119140625, "learning_rate": 7.533922717790803e-05, "loss": 0.4926, "step": 182380 }, { "epoch": 9.058805999801331, "grad_norm": 0.1875, "learning_rate": 7.529949339425847e-05, "loss": 0.4793, "step": 182390 }, { "epoch": 9.059302672096951, "grad_norm": 0.2021484375, "learning_rate": 7.525975961060893e-05, "loss": 0.4722, "step": 182400 }, { "epoch": 9.05979934439257, "grad_norm": 0.189453125, "learning_rate": 7.522002582695937e-05, "loss": 0.4739, "step": 182410 }, { "epoch": 9.06029601668819, "grad_norm": 0.177734375, "learning_rate": 7.518029204330983e-05, "loss": 0.4611, "step": 182420 }, { "epoch": 9.060792688983808, "grad_norm": 0.181640625, "learning_rate": 7.514055825966028e-05, "loss": 0.4728, "step": 182430 }, { "epoch": 9.061289361279428, "grad_norm": 0.1982421875, "learning_rate": 7.510082447601073e-05, "loss": 0.5058, "step": 182440 }, { "epoch": 9.061786033575046, "grad_norm": 0.20703125, "learning_rate": 7.506109069236118e-05, "loss": 0.5037, "step": 182450 }, { "epoch": 9.062282705870667, "grad_norm": 0.18359375, "learning_rate": 7.502135690871164e-05, "loss": 0.4864, "step": 182460 }, { "epoch": 9.062779378166287, "grad_norm": 0.1875, "learning_rate": 7.498162312506208e-05, "loss": 0.5376, "step": 182470 }, { "epoch": 9.063276050461905, "grad_norm": 0.1865234375, "learning_rate": 7.494188934141254e-05, "loss": 0.4829, "step": 182480 }, { "epoch": 9.063772722757525, "grad_norm": 0.1962890625, "learning_rate": 7.490215555776298e-05, "loss": 0.4903, "step": 182490 }, { "epoch": 9.064269395053143, "grad_norm": 0.16796875, "learning_rate": 7.486242177411344e-05, "loss": 0.4897, "step": 182500 }, { "epoch": 9.064766067348764, "grad_norm": 0.1884765625, "learning_rate": 7.482268799046388e-05, "loss": 0.4932, "step": 182510 }, { "epoch": 9.065262739644382, "grad_norm": 0.2138671875, "learning_rate": 7.478295420681434e-05, "loss": 0.5258, "step": 182520 }, { "epoch": 9.065759411940002, "grad_norm": 0.1875, "learning_rate": 7.47432204231648e-05, "loss": 0.5126, "step": 182530 }, { "epoch": 9.066256084235622, "grad_norm": 0.20703125, "learning_rate": 7.470348663951526e-05, "loss": 0.4997, "step": 182540 }, { "epoch": 9.06675275653124, "grad_norm": 0.189453125, "learning_rate": 7.46637528558657e-05, "loss": 0.4666, "step": 182550 }, { "epoch": 9.06724942882686, "grad_norm": 0.19921875, "learning_rate": 7.462401907221616e-05, "loss": 0.4605, "step": 182560 }, { "epoch": 9.067746101122479, "grad_norm": 0.23046875, "learning_rate": 7.45842852885666e-05, "loss": 0.4722, "step": 182570 }, { "epoch": 9.068242773418099, "grad_norm": 0.1845703125, "learning_rate": 7.454455150491706e-05, "loss": 0.5031, "step": 182580 }, { "epoch": 9.068739445713717, "grad_norm": 0.1865234375, "learning_rate": 7.45048177212675e-05, "loss": 0.4937, "step": 182590 }, { "epoch": 9.069236118009337, "grad_norm": 0.2080078125, "learning_rate": 7.446508393761796e-05, "loss": 0.4644, "step": 182600 }, { "epoch": 9.069732790304958, "grad_norm": 0.1962890625, "learning_rate": 7.442535015396842e-05, "loss": 0.4606, "step": 182610 }, { "epoch": 9.070229462600576, "grad_norm": 0.2158203125, "learning_rate": 7.438561637031887e-05, "loss": 0.5453, "step": 182620 }, { "epoch": 9.070726134896196, "grad_norm": 0.208984375, "learning_rate": 7.434588258666932e-05, "loss": 0.4805, "step": 182630 }, { "epoch": 9.071222807191814, "grad_norm": 0.189453125, "learning_rate": 7.430614880301977e-05, "loss": 0.4781, "step": 182640 }, { "epoch": 9.071719479487435, "grad_norm": 0.1865234375, "learning_rate": 7.426641501937023e-05, "loss": 0.4755, "step": 182650 }, { "epoch": 9.072216151783053, "grad_norm": 0.1962890625, "learning_rate": 7.422668123572067e-05, "loss": 0.487, "step": 182660 }, { "epoch": 9.072712824078673, "grad_norm": 0.197265625, "learning_rate": 7.418694745207113e-05, "loss": 0.5061, "step": 182670 }, { "epoch": 9.073209496374293, "grad_norm": 0.2041015625, "learning_rate": 7.414721366842157e-05, "loss": 0.4973, "step": 182680 }, { "epoch": 9.073706168669911, "grad_norm": 0.1982421875, "learning_rate": 7.410747988477203e-05, "loss": 0.4624, "step": 182690 }, { "epoch": 9.074202840965532, "grad_norm": 0.1884765625, "learning_rate": 7.406774610112249e-05, "loss": 0.4932, "step": 182700 }, { "epoch": 9.07469951326115, "grad_norm": 0.1865234375, "learning_rate": 7.402801231747295e-05, "loss": 0.5181, "step": 182710 }, { "epoch": 9.07519618555677, "grad_norm": 0.2021484375, "learning_rate": 7.398827853382339e-05, "loss": 0.494, "step": 182720 }, { "epoch": 9.075692857852388, "grad_norm": 0.185546875, "learning_rate": 7.394854475017385e-05, "loss": 0.4742, "step": 182730 }, { "epoch": 9.076189530148008, "grad_norm": 0.1826171875, "learning_rate": 7.390881096652429e-05, "loss": 0.4949, "step": 182740 }, { "epoch": 9.076686202443629, "grad_norm": 0.1826171875, "learning_rate": 7.386907718287475e-05, "loss": 0.4609, "step": 182750 }, { "epoch": 9.077182874739247, "grad_norm": 0.1806640625, "learning_rate": 7.38293433992252e-05, "loss": 0.4786, "step": 182760 }, { "epoch": 9.077679547034867, "grad_norm": 0.1953125, "learning_rate": 7.378960961557565e-05, "loss": 0.5006, "step": 182770 }, { "epoch": 9.078176219330485, "grad_norm": 0.1796875, "learning_rate": 7.37498758319261e-05, "loss": 0.478, "step": 182780 }, { "epoch": 9.078672891626105, "grad_norm": 0.2138671875, "learning_rate": 7.371014204827655e-05, "loss": 0.5064, "step": 182790 }, { "epoch": 9.079169563921724, "grad_norm": 0.1904296875, "learning_rate": 7.3670408264627e-05, "loss": 0.482, "step": 182800 }, { "epoch": 9.079666236217344, "grad_norm": 0.2021484375, "learning_rate": 7.363067448097746e-05, "loss": 0.4831, "step": 182810 }, { "epoch": 9.080162908512964, "grad_norm": 0.2138671875, "learning_rate": 7.35909406973279e-05, "loss": 0.4944, "step": 182820 }, { "epoch": 9.080659580808582, "grad_norm": 0.1923828125, "learning_rate": 7.355120691367836e-05, "loss": 0.4607, "step": 182830 }, { "epoch": 9.081156253104202, "grad_norm": 0.203125, "learning_rate": 7.35114731300288e-05, "loss": 0.4474, "step": 182840 }, { "epoch": 9.08165292539982, "grad_norm": 0.21484375, "learning_rate": 7.347173934637926e-05, "loss": 0.4873, "step": 182850 }, { "epoch": 9.082149597695441, "grad_norm": 0.208984375, "learning_rate": 7.34320055627297e-05, "loss": 0.4632, "step": 182860 }, { "epoch": 9.08264626999106, "grad_norm": 0.18359375, "learning_rate": 7.339227177908016e-05, "loss": 0.4593, "step": 182870 }, { "epoch": 9.08314294228668, "grad_norm": 0.1865234375, "learning_rate": 7.335253799543062e-05, "loss": 0.464, "step": 182880 }, { "epoch": 9.083639614582298, "grad_norm": 0.24609375, "learning_rate": 7.331280421178108e-05, "loss": 0.4516, "step": 182890 }, { "epoch": 9.084136286877918, "grad_norm": 0.1865234375, "learning_rate": 7.327307042813152e-05, "loss": 0.4705, "step": 182900 }, { "epoch": 9.084632959173538, "grad_norm": 0.2138671875, "learning_rate": 7.323333664448198e-05, "loss": 0.5121, "step": 182910 }, { "epoch": 9.085129631469156, "grad_norm": 0.2314453125, "learning_rate": 7.319360286083242e-05, "loss": 0.4887, "step": 182920 }, { "epoch": 9.085626303764776, "grad_norm": 0.2099609375, "learning_rate": 7.315386907718288e-05, "loss": 0.4915, "step": 182930 }, { "epoch": 9.086122976060395, "grad_norm": 0.1982421875, "learning_rate": 7.311413529353333e-05, "loss": 0.4873, "step": 182940 }, { "epoch": 9.086619648356015, "grad_norm": 0.185546875, "learning_rate": 7.307440150988378e-05, "loss": 0.4949, "step": 182950 }, { "epoch": 9.087116320651633, "grad_norm": 0.193359375, "learning_rate": 7.303466772623423e-05, "loss": 0.4797, "step": 182960 }, { "epoch": 9.087612992947253, "grad_norm": 0.203125, "learning_rate": 7.299493394258469e-05, "loss": 0.4649, "step": 182970 }, { "epoch": 9.088109665242873, "grad_norm": 0.185546875, "learning_rate": 7.295520015893513e-05, "loss": 0.4682, "step": 182980 }, { "epoch": 9.088606337538492, "grad_norm": 0.197265625, "learning_rate": 7.291546637528559e-05, "loss": 0.5045, "step": 182990 }, { "epoch": 9.089103009834112, "grad_norm": 0.181640625, "learning_rate": 7.287573259163603e-05, "loss": 0.4728, "step": 183000 }, { "epoch": 9.08959968212973, "grad_norm": 0.2255859375, "learning_rate": 7.283599880798649e-05, "loss": 0.4804, "step": 183010 }, { "epoch": 9.09009635442535, "grad_norm": 0.189453125, "learning_rate": 7.279626502433695e-05, "loss": 0.4665, "step": 183020 }, { "epoch": 9.090593026720969, "grad_norm": 0.19140625, "learning_rate": 7.275653124068739e-05, "loss": 0.4453, "step": 183030 }, { "epoch": 9.091089699016589, "grad_norm": 0.18359375, "learning_rate": 7.271679745703785e-05, "loss": 0.5243, "step": 183040 }, { "epoch": 9.091586371312209, "grad_norm": 0.1943359375, "learning_rate": 7.267706367338831e-05, "loss": 0.5243, "step": 183050 }, { "epoch": 9.092083043607827, "grad_norm": 0.1875, "learning_rate": 7.263732988973877e-05, "loss": 0.4825, "step": 183060 }, { "epoch": 9.092579715903447, "grad_norm": 0.203125, "learning_rate": 7.259759610608921e-05, "loss": 0.4764, "step": 183070 }, { "epoch": 9.093076388199066, "grad_norm": 0.201171875, "learning_rate": 7.255786232243967e-05, "loss": 0.4724, "step": 183080 }, { "epoch": 9.093573060494686, "grad_norm": 0.1875, "learning_rate": 7.251812853879011e-05, "loss": 0.4651, "step": 183090 }, { "epoch": 9.094069732790304, "grad_norm": 0.1796875, "learning_rate": 7.247839475514057e-05, "loss": 0.5167, "step": 183100 }, { "epoch": 9.094566405085924, "grad_norm": 0.1904296875, "learning_rate": 7.243866097149101e-05, "loss": 0.4953, "step": 183110 }, { "epoch": 9.095063077381544, "grad_norm": 0.185546875, "learning_rate": 7.239892718784147e-05, "loss": 0.5026, "step": 183120 }, { "epoch": 9.095559749677163, "grad_norm": 0.2138671875, "learning_rate": 7.235919340419192e-05, "loss": 0.4616, "step": 183130 }, { "epoch": 9.096056421972783, "grad_norm": 0.1845703125, "learning_rate": 7.231945962054237e-05, "loss": 0.4756, "step": 183140 }, { "epoch": 9.096553094268401, "grad_norm": 0.2177734375, "learning_rate": 7.227972583689282e-05, "loss": 0.503, "step": 183150 }, { "epoch": 9.097049766564021, "grad_norm": 0.193359375, "learning_rate": 7.223999205324328e-05, "loss": 0.5279, "step": 183160 }, { "epoch": 9.09754643885964, "grad_norm": 0.177734375, "learning_rate": 7.220025826959372e-05, "loss": 0.4922, "step": 183170 }, { "epoch": 9.09804311115526, "grad_norm": 0.2158203125, "learning_rate": 7.216052448594418e-05, "loss": 0.4952, "step": 183180 }, { "epoch": 9.09853978345088, "grad_norm": 0.23828125, "learning_rate": 7.212079070229462e-05, "loss": 0.5045, "step": 183190 }, { "epoch": 9.099036455746498, "grad_norm": 0.25, "learning_rate": 7.208105691864508e-05, "loss": 0.5088, "step": 183200 }, { "epoch": 9.099533128042118, "grad_norm": 0.2109375, "learning_rate": 7.204132313499554e-05, "loss": 0.4668, "step": 183210 }, { "epoch": 9.100029800337737, "grad_norm": 0.1767578125, "learning_rate": 7.200158935134598e-05, "loss": 0.4921, "step": 183220 }, { "epoch": 9.100526472633357, "grad_norm": 0.19140625, "learning_rate": 7.196185556769644e-05, "loss": 0.4922, "step": 183230 }, { "epoch": 9.101023144928975, "grad_norm": 0.181640625, "learning_rate": 7.19221217840469e-05, "loss": 0.4779, "step": 183240 }, { "epoch": 9.101519817224595, "grad_norm": 0.2001953125, "learning_rate": 7.188238800039734e-05, "loss": 0.477, "step": 183250 }, { "epoch": 9.102016489520215, "grad_norm": 0.185546875, "learning_rate": 7.18426542167478e-05, "loss": 0.5114, "step": 183260 }, { "epoch": 9.102513161815834, "grad_norm": 0.189453125, "learning_rate": 7.180292043309824e-05, "loss": 0.4597, "step": 183270 }, { "epoch": 9.103009834111454, "grad_norm": 0.2001953125, "learning_rate": 7.17631866494487e-05, "loss": 0.5096, "step": 183280 }, { "epoch": 9.103506506407072, "grad_norm": 0.1767578125, "learning_rate": 7.172345286579915e-05, "loss": 0.5081, "step": 183290 }, { "epoch": 9.104003178702692, "grad_norm": 0.181640625, "learning_rate": 7.16837190821496e-05, "loss": 0.474, "step": 183300 }, { "epoch": 9.10449985099831, "grad_norm": 0.193359375, "learning_rate": 7.164398529850005e-05, "loss": 0.467, "step": 183310 }, { "epoch": 9.10499652329393, "grad_norm": 0.1845703125, "learning_rate": 7.16042515148505e-05, "loss": 0.4756, "step": 183320 }, { "epoch": 9.10549319558955, "grad_norm": 0.1787109375, "learning_rate": 7.156451773120095e-05, "loss": 0.4683, "step": 183330 }, { "epoch": 9.10598986788517, "grad_norm": 0.205078125, "learning_rate": 7.152478394755141e-05, "loss": 0.4684, "step": 183340 }, { "epoch": 9.10648654018079, "grad_norm": 0.232421875, "learning_rate": 7.148505016390185e-05, "loss": 0.5029, "step": 183350 }, { "epoch": 9.106983212476408, "grad_norm": 0.193359375, "learning_rate": 7.144531638025231e-05, "loss": 0.5172, "step": 183360 }, { "epoch": 9.107479884772028, "grad_norm": 0.1806640625, "learning_rate": 7.140558259660275e-05, "loss": 0.496, "step": 183370 }, { "epoch": 9.107976557067646, "grad_norm": 0.19140625, "learning_rate": 7.136584881295321e-05, "loss": 0.4647, "step": 183380 }, { "epoch": 9.108473229363266, "grad_norm": 0.212890625, "learning_rate": 7.132611502930367e-05, "loss": 0.4909, "step": 183390 }, { "epoch": 9.108969901658886, "grad_norm": 0.212890625, "learning_rate": 7.128638124565413e-05, "loss": 0.5088, "step": 183400 }, { "epoch": 9.109466573954505, "grad_norm": 0.1943359375, "learning_rate": 7.124664746200457e-05, "loss": 0.4997, "step": 183410 }, { "epoch": 9.109963246250125, "grad_norm": 0.1923828125, "learning_rate": 7.120691367835503e-05, "loss": 0.4552, "step": 183420 }, { "epoch": 9.110459918545743, "grad_norm": 0.1787109375, "learning_rate": 7.116717989470547e-05, "loss": 0.4588, "step": 183430 }, { "epoch": 9.110956590841363, "grad_norm": 0.177734375, "learning_rate": 7.112744611105593e-05, "loss": 0.4673, "step": 183440 }, { "epoch": 9.111453263136982, "grad_norm": 0.244140625, "learning_rate": 7.108771232740639e-05, "loss": 0.498, "step": 183450 }, { "epoch": 9.111949935432602, "grad_norm": 0.193359375, "learning_rate": 7.104797854375683e-05, "loss": 0.4668, "step": 183460 }, { "epoch": 9.112446607728222, "grad_norm": 0.21875, "learning_rate": 7.100824476010729e-05, "loss": 0.4676, "step": 183470 }, { "epoch": 9.11294328002384, "grad_norm": 0.2080078125, "learning_rate": 7.096851097645774e-05, "loss": 0.5052, "step": 183480 }, { "epoch": 9.11343995231946, "grad_norm": 0.1923828125, "learning_rate": 7.09287771928082e-05, "loss": 0.4709, "step": 183490 }, { "epoch": 9.113936624615079, "grad_norm": 0.18359375, "learning_rate": 7.088904340915864e-05, "loss": 0.4904, "step": 183500 }, { "epoch": 9.114433296910699, "grad_norm": 0.19140625, "learning_rate": 7.08493096255091e-05, "loss": 0.4782, "step": 183510 }, { "epoch": 9.114929969206317, "grad_norm": 0.18359375, "learning_rate": 7.080957584185954e-05, "loss": 0.4843, "step": 183520 }, { "epoch": 9.115426641501937, "grad_norm": 0.1826171875, "learning_rate": 7.076984205821e-05, "loss": 0.4844, "step": 183530 }, { "epoch": 9.115923313797555, "grad_norm": 0.18359375, "learning_rate": 7.073010827456044e-05, "loss": 0.4814, "step": 183540 }, { "epoch": 9.116419986093176, "grad_norm": 0.2353515625, "learning_rate": 7.06903744909109e-05, "loss": 0.5254, "step": 183550 }, { "epoch": 9.116916658388796, "grad_norm": 0.1923828125, "learning_rate": 7.065064070726136e-05, "loss": 0.495, "step": 183560 }, { "epoch": 9.117413330684414, "grad_norm": 0.19140625, "learning_rate": 7.06109069236118e-05, "loss": 0.4925, "step": 183570 }, { "epoch": 9.117910002980034, "grad_norm": 0.1875, "learning_rate": 7.057117313996226e-05, "loss": 0.5212, "step": 183580 }, { "epoch": 9.118406675275653, "grad_norm": 0.18359375, "learning_rate": 7.053143935631272e-05, "loss": 0.4711, "step": 183590 }, { "epoch": 9.118903347571273, "grad_norm": 0.201171875, "learning_rate": 7.049170557266316e-05, "loss": 0.4479, "step": 183600 }, { "epoch": 9.119400019866891, "grad_norm": 0.19140625, "learning_rate": 7.045197178901362e-05, "loss": 0.4634, "step": 183610 }, { "epoch": 9.119896692162511, "grad_norm": 0.2080078125, "learning_rate": 7.041223800536406e-05, "loss": 0.5072, "step": 183620 }, { "epoch": 9.120393364458131, "grad_norm": 0.19140625, "learning_rate": 7.037250422171452e-05, "loss": 0.4743, "step": 183630 }, { "epoch": 9.12089003675375, "grad_norm": 0.20703125, "learning_rate": 7.033277043806497e-05, "loss": 0.4725, "step": 183640 }, { "epoch": 9.12138670904937, "grad_norm": 0.2001953125, "learning_rate": 7.029303665441542e-05, "loss": 0.483, "step": 183650 }, { "epoch": 9.121883381344988, "grad_norm": 0.1640625, "learning_rate": 7.025330287076587e-05, "loss": 0.4595, "step": 183660 }, { "epoch": 9.122380053640608, "grad_norm": 0.205078125, "learning_rate": 7.021356908711633e-05, "loss": 0.4898, "step": 183670 }, { "epoch": 9.122876725936226, "grad_norm": 0.171875, "learning_rate": 7.017383530346677e-05, "loss": 0.4713, "step": 183680 }, { "epoch": 9.123373398231847, "grad_norm": 0.18359375, "learning_rate": 7.013410151981723e-05, "loss": 0.4793, "step": 183690 }, { "epoch": 9.123870070527467, "grad_norm": 0.205078125, "learning_rate": 7.009436773616767e-05, "loss": 0.4977, "step": 183700 }, { "epoch": 9.124366742823085, "grad_norm": 0.1865234375, "learning_rate": 7.005463395251813e-05, "loss": 0.4537, "step": 183710 }, { "epoch": 9.124863415118705, "grad_norm": 0.197265625, "learning_rate": 7.001490016886857e-05, "loss": 0.5065, "step": 183720 }, { "epoch": 9.125360087414323, "grad_norm": 0.1982421875, "learning_rate": 6.997516638521903e-05, "loss": 0.5102, "step": 183730 }, { "epoch": 9.125856759709944, "grad_norm": 0.2392578125, "learning_rate": 6.993543260156949e-05, "loss": 0.5043, "step": 183740 }, { "epoch": 9.126353432005562, "grad_norm": 0.2021484375, "learning_rate": 6.989569881791995e-05, "loss": 0.5042, "step": 183750 }, { "epoch": 9.126850104301182, "grad_norm": 0.1728515625, "learning_rate": 6.985596503427039e-05, "loss": 0.5023, "step": 183760 }, { "epoch": 9.127346776596802, "grad_norm": 0.193359375, "learning_rate": 6.981623125062085e-05, "loss": 0.4962, "step": 183770 }, { "epoch": 9.12784344889242, "grad_norm": 0.1953125, "learning_rate": 6.97764974669713e-05, "loss": 0.4831, "step": 183780 }, { "epoch": 9.12834012118804, "grad_norm": 0.2236328125, "learning_rate": 6.973676368332175e-05, "loss": 0.5234, "step": 183790 }, { "epoch": 9.128836793483659, "grad_norm": 0.2021484375, "learning_rate": 6.96970298996722e-05, "loss": 0.4678, "step": 183800 }, { "epoch": 9.129333465779279, "grad_norm": 0.2060546875, "learning_rate": 6.965729611602265e-05, "loss": 0.4922, "step": 183810 }, { "epoch": 9.129830138074897, "grad_norm": 0.18359375, "learning_rate": 6.96175623323731e-05, "loss": 0.4929, "step": 183820 }, { "epoch": 9.130326810370518, "grad_norm": 0.189453125, "learning_rate": 6.957782854872356e-05, "loss": 0.4406, "step": 183830 }, { "epoch": 9.130823482666138, "grad_norm": 0.201171875, "learning_rate": 6.9538094765074e-05, "loss": 0.4779, "step": 183840 }, { "epoch": 9.131320154961756, "grad_norm": 0.197265625, "learning_rate": 6.949836098142446e-05, "loss": 0.4758, "step": 183850 }, { "epoch": 9.131816827257376, "grad_norm": 0.2109375, "learning_rate": 6.94586271977749e-05, "loss": 0.4949, "step": 183860 }, { "epoch": 9.132313499552994, "grad_norm": 0.2177734375, "learning_rate": 6.941889341412536e-05, "loss": 0.4358, "step": 183870 }, { "epoch": 9.132810171848615, "grad_norm": 0.1845703125, "learning_rate": 6.937915963047582e-05, "loss": 0.4765, "step": 183880 }, { "epoch": 9.133306844144233, "grad_norm": 0.2109375, "learning_rate": 6.933942584682626e-05, "loss": 0.5001, "step": 183890 }, { "epoch": 9.133803516439853, "grad_norm": 0.1845703125, "learning_rate": 6.929969206317672e-05, "loss": 0.5032, "step": 183900 }, { "epoch": 9.134300188735473, "grad_norm": 0.181640625, "learning_rate": 6.925995827952718e-05, "loss": 0.4726, "step": 183910 }, { "epoch": 9.134796861031091, "grad_norm": 0.2158203125, "learning_rate": 6.922022449587762e-05, "loss": 0.4912, "step": 183920 }, { "epoch": 9.135293533326712, "grad_norm": 0.220703125, "learning_rate": 6.918049071222808e-05, "loss": 0.4891, "step": 183930 }, { "epoch": 9.13579020562233, "grad_norm": 0.224609375, "learning_rate": 6.914075692857854e-05, "loss": 0.4766, "step": 183940 }, { "epoch": 9.13628687791795, "grad_norm": 0.21875, "learning_rate": 6.910102314492898e-05, "loss": 0.4845, "step": 183950 }, { "epoch": 9.136783550213568, "grad_norm": 0.2275390625, "learning_rate": 6.906128936127944e-05, "loss": 0.4646, "step": 183960 }, { "epoch": 9.137280222509188, "grad_norm": 0.208984375, "learning_rate": 6.902155557762988e-05, "loss": 0.4531, "step": 183970 }, { "epoch": 9.137776894804809, "grad_norm": 0.19921875, "learning_rate": 6.898182179398034e-05, "loss": 0.4454, "step": 183980 }, { "epoch": 9.138273567100427, "grad_norm": 0.2021484375, "learning_rate": 6.894208801033079e-05, "loss": 0.5006, "step": 183990 }, { "epoch": 9.138770239396047, "grad_norm": 0.236328125, "learning_rate": 6.890235422668124e-05, "loss": 0.4959, "step": 184000 }, { "epoch": 9.139266911691665, "grad_norm": 0.220703125, "learning_rate": 6.886262044303169e-05, "loss": 0.4727, "step": 184010 }, { "epoch": 9.139763583987286, "grad_norm": 0.1865234375, "learning_rate": 6.882288665938215e-05, "loss": 0.4789, "step": 184020 }, { "epoch": 9.140260256282904, "grad_norm": 0.23046875, "learning_rate": 6.878315287573259e-05, "loss": 0.5327, "step": 184030 }, { "epoch": 9.140756928578524, "grad_norm": 0.1865234375, "learning_rate": 6.874341909208305e-05, "loss": 0.5216, "step": 184040 }, { "epoch": 9.141253600874144, "grad_norm": 0.189453125, "learning_rate": 6.870368530843349e-05, "loss": 0.4922, "step": 184050 }, { "epoch": 9.141750273169762, "grad_norm": 0.1943359375, "learning_rate": 6.866395152478395e-05, "loss": 0.4965, "step": 184060 }, { "epoch": 9.142246945465383, "grad_norm": 0.2158203125, "learning_rate": 6.86242177411344e-05, "loss": 0.495, "step": 184070 }, { "epoch": 9.142743617761, "grad_norm": 0.19921875, "learning_rate": 6.858448395748485e-05, "loss": 0.4731, "step": 184080 }, { "epoch": 9.143240290056621, "grad_norm": 0.1845703125, "learning_rate": 6.854475017383531e-05, "loss": 0.4728, "step": 184090 }, { "epoch": 9.14373696235224, "grad_norm": 0.1845703125, "learning_rate": 6.850501639018577e-05, "loss": 0.4996, "step": 184100 }, { "epoch": 9.14423363464786, "grad_norm": 0.193359375, "learning_rate": 6.846528260653621e-05, "loss": 0.4944, "step": 184110 }, { "epoch": 9.14473030694348, "grad_norm": 0.216796875, "learning_rate": 6.842554882288667e-05, "loss": 0.5061, "step": 184120 }, { "epoch": 9.145226979239098, "grad_norm": 0.2060546875, "learning_rate": 6.838581503923711e-05, "loss": 0.482, "step": 184130 }, { "epoch": 9.145723651534718, "grad_norm": 0.203125, "learning_rate": 6.834608125558757e-05, "loss": 0.4966, "step": 184140 }, { "epoch": 9.146220323830336, "grad_norm": 0.2060546875, "learning_rate": 6.830634747193802e-05, "loss": 0.4577, "step": 184150 }, { "epoch": 9.146716996125956, "grad_norm": 0.193359375, "learning_rate": 6.826661368828847e-05, "loss": 0.468, "step": 184160 }, { "epoch": 9.147213668421575, "grad_norm": 0.1865234375, "learning_rate": 6.822687990463892e-05, "loss": 0.4917, "step": 184170 }, { "epoch": 9.147710340717195, "grad_norm": 0.201171875, "learning_rate": 6.818714612098938e-05, "loss": 0.446, "step": 184180 }, { "epoch": 9.148207013012815, "grad_norm": 0.2001953125, "learning_rate": 6.814741233733982e-05, "loss": 0.4935, "step": 184190 }, { "epoch": 9.148703685308433, "grad_norm": 0.2177734375, "learning_rate": 6.810767855369028e-05, "loss": 0.4845, "step": 184200 }, { "epoch": 9.149200357604053, "grad_norm": 0.234375, "learning_rate": 6.806794477004072e-05, "loss": 0.512, "step": 184210 }, { "epoch": 9.149697029899672, "grad_norm": 0.1796875, "learning_rate": 6.802821098639118e-05, "loss": 0.4937, "step": 184220 }, { "epoch": 9.150193702195292, "grad_norm": 0.19140625, "learning_rate": 6.798847720274162e-05, "loss": 0.4869, "step": 184230 }, { "epoch": 9.15069037449091, "grad_norm": 0.1904296875, "learning_rate": 6.794874341909208e-05, "loss": 0.5046, "step": 184240 }, { "epoch": 9.15118704678653, "grad_norm": 0.1875, "learning_rate": 6.790900963544254e-05, "loss": 0.4638, "step": 184250 }, { "epoch": 9.151683719082149, "grad_norm": 0.208984375, "learning_rate": 6.7869275851793e-05, "loss": 0.4879, "step": 184260 }, { "epoch": 9.152180391377769, "grad_norm": 0.1904296875, "learning_rate": 6.782954206814344e-05, "loss": 0.4779, "step": 184270 }, { "epoch": 9.152677063673389, "grad_norm": 0.1953125, "learning_rate": 6.77898082844939e-05, "loss": 0.4877, "step": 184280 }, { "epoch": 9.153173735969007, "grad_norm": 0.23828125, "learning_rate": 6.775007450084436e-05, "loss": 0.4687, "step": 184290 }, { "epoch": 9.153670408264627, "grad_norm": 0.1904296875, "learning_rate": 6.77103407171948e-05, "loss": 0.474, "step": 184300 }, { "epoch": 9.154167080560246, "grad_norm": 0.1982421875, "learning_rate": 6.767060693354526e-05, "loss": 0.4974, "step": 184310 }, { "epoch": 9.154663752855866, "grad_norm": 0.2041015625, "learning_rate": 6.76308731498957e-05, "loss": 0.4704, "step": 184320 }, { "epoch": 9.155160425151484, "grad_norm": 0.1875, "learning_rate": 6.759113936624616e-05, "loss": 0.494, "step": 184330 }, { "epoch": 9.155657097447104, "grad_norm": 0.2060546875, "learning_rate": 6.75514055825966e-05, "loss": 0.5045, "step": 184340 }, { "epoch": 9.156153769742724, "grad_norm": 0.181640625, "learning_rate": 6.751167179894706e-05, "loss": 0.462, "step": 184350 }, { "epoch": 9.156650442038343, "grad_norm": 0.2177734375, "learning_rate": 6.747193801529751e-05, "loss": 0.4745, "step": 184360 }, { "epoch": 9.157147114333963, "grad_norm": 0.2080078125, "learning_rate": 6.743220423164797e-05, "loss": 0.5212, "step": 184370 }, { "epoch": 9.157643786629581, "grad_norm": 0.1806640625, "learning_rate": 6.739247044799841e-05, "loss": 0.457, "step": 184380 }, { "epoch": 9.158140458925201, "grad_norm": 0.20703125, "learning_rate": 6.735273666434887e-05, "loss": 0.4669, "step": 184390 }, { "epoch": 9.15863713122082, "grad_norm": 0.1826171875, "learning_rate": 6.731300288069931e-05, "loss": 0.4686, "step": 184400 }, { "epoch": 9.15913380351644, "grad_norm": 0.21484375, "learning_rate": 6.727326909704977e-05, "loss": 0.4725, "step": 184410 }, { "epoch": 9.15963047581206, "grad_norm": 0.1982421875, "learning_rate": 6.723353531340021e-05, "loss": 0.4822, "step": 184420 }, { "epoch": 9.160127148107678, "grad_norm": 0.1767578125, "learning_rate": 6.719380152975067e-05, "loss": 0.4776, "step": 184430 }, { "epoch": 9.160623820403298, "grad_norm": 0.1884765625, "learning_rate": 6.715406774610113e-05, "loss": 0.4736, "step": 184440 }, { "epoch": 9.161120492698917, "grad_norm": 0.2255859375, "learning_rate": 6.711433396245159e-05, "loss": 0.4684, "step": 184450 }, { "epoch": 9.161617164994537, "grad_norm": 0.173828125, "learning_rate": 6.707460017880203e-05, "loss": 0.4736, "step": 184460 }, { "epoch": 9.162113837290155, "grad_norm": 0.224609375, "learning_rate": 6.703486639515249e-05, "loss": 0.4846, "step": 184470 }, { "epoch": 9.162610509585775, "grad_norm": 0.189453125, "learning_rate": 6.699513261150293e-05, "loss": 0.4632, "step": 184480 }, { "epoch": 9.163107181881395, "grad_norm": 0.251953125, "learning_rate": 6.695539882785339e-05, "loss": 0.4721, "step": 184490 }, { "epoch": 9.163603854177014, "grad_norm": 0.1923828125, "learning_rate": 6.691566504420384e-05, "loss": 0.5025, "step": 184500 }, { "epoch": 9.164100526472634, "grad_norm": 0.181640625, "learning_rate": 6.687593126055429e-05, "loss": 0.4663, "step": 184510 }, { "epoch": 9.164597198768252, "grad_norm": 0.26171875, "learning_rate": 6.683619747690474e-05, "loss": 0.4928, "step": 184520 }, { "epoch": 9.165093871063872, "grad_norm": 0.2060546875, "learning_rate": 6.67964636932552e-05, "loss": 0.4811, "step": 184530 }, { "epoch": 9.16559054335949, "grad_norm": 0.2080078125, "learning_rate": 6.675672990960564e-05, "loss": 0.4769, "step": 184540 }, { "epoch": 9.16608721565511, "grad_norm": 0.1875, "learning_rate": 6.67169961259561e-05, "loss": 0.4968, "step": 184550 }, { "epoch": 9.16658388795073, "grad_norm": 0.1884765625, "learning_rate": 6.667726234230654e-05, "loss": 0.4564, "step": 184560 }, { "epoch": 9.16708056024635, "grad_norm": 0.1923828125, "learning_rate": 6.6637528558657e-05, "loss": 0.4507, "step": 184570 }, { "epoch": 9.16757723254197, "grad_norm": 0.1943359375, "learning_rate": 6.659779477500744e-05, "loss": 0.5085, "step": 184580 }, { "epoch": 9.168073904837588, "grad_norm": 0.20703125, "learning_rate": 6.65580609913579e-05, "loss": 0.4818, "step": 184590 }, { "epoch": 9.168570577133208, "grad_norm": 0.1796875, "learning_rate": 6.651832720770836e-05, "loss": 0.4917, "step": 184600 }, { "epoch": 9.169067249428826, "grad_norm": 0.1923828125, "learning_rate": 6.647859342405882e-05, "loss": 0.4645, "step": 184610 }, { "epoch": 9.169563921724446, "grad_norm": 0.2138671875, "learning_rate": 6.643885964040926e-05, "loss": 0.4783, "step": 184620 }, { "epoch": 9.170060594020066, "grad_norm": 0.203125, "learning_rate": 6.639912585675972e-05, "loss": 0.4996, "step": 184630 }, { "epoch": 9.170557266315685, "grad_norm": 0.1923828125, "learning_rate": 6.635939207311016e-05, "loss": 0.5024, "step": 184640 }, { "epoch": 9.171053938611305, "grad_norm": 0.1826171875, "learning_rate": 6.631965828946062e-05, "loss": 0.4781, "step": 184650 }, { "epoch": 9.171550610906923, "grad_norm": 0.2021484375, "learning_rate": 6.627992450581107e-05, "loss": 0.4547, "step": 184660 }, { "epoch": 9.172047283202543, "grad_norm": 0.181640625, "learning_rate": 6.624019072216152e-05, "loss": 0.4807, "step": 184670 }, { "epoch": 9.172543955498162, "grad_norm": 0.2060546875, "learning_rate": 6.620045693851197e-05, "loss": 0.504, "step": 184680 }, { "epoch": 9.173040627793782, "grad_norm": 0.2265625, "learning_rate": 6.616072315486243e-05, "loss": 0.4856, "step": 184690 }, { "epoch": 9.173537300089402, "grad_norm": 0.1865234375, "learning_rate": 6.612098937121287e-05, "loss": 0.441, "step": 184700 }, { "epoch": 9.17403397238502, "grad_norm": 0.2060546875, "learning_rate": 6.608125558756333e-05, "loss": 0.4968, "step": 184710 }, { "epoch": 9.17453064468064, "grad_norm": 0.2119140625, "learning_rate": 6.604152180391378e-05, "loss": 0.5115, "step": 184720 }, { "epoch": 9.175027316976259, "grad_norm": 0.1796875, "learning_rate": 6.600178802026423e-05, "loss": 0.4634, "step": 184730 }, { "epoch": 9.175523989271879, "grad_norm": 0.1904296875, "learning_rate": 6.596205423661469e-05, "loss": 0.4893, "step": 184740 }, { "epoch": 9.176020661567497, "grad_norm": 0.1845703125, "learning_rate": 6.592232045296513e-05, "loss": 0.4928, "step": 184750 }, { "epoch": 9.176517333863117, "grad_norm": 0.185546875, "learning_rate": 6.588258666931559e-05, "loss": 0.4845, "step": 184760 }, { "epoch": 9.177014006158737, "grad_norm": 0.1953125, "learning_rate": 6.584285288566603e-05, "loss": 0.4834, "step": 184770 }, { "epoch": 9.177510678454356, "grad_norm": 0.1904296875, "learning_rate": 6.580311910201649e-05, "loss": 0.4973, "step": 184780 }, { "epoch": 9.178007350749976, "grad_norm": 0.2080078125, "learning_rate": 6.576338531836695e-05, "loss": 0.4579, "step": 184790 }, { "epoch": 9.178504023045594, "grad_norm": 0.2021484375, "learning_rate": 6.57236515347174e-05, "loss": 0.5235, "step": 184800 }, { "epoch": 9.179000695341214, "grad_norm": 0.2041015625, "learning_rate": 6.568391775106785e-05, "loss": 0.4999, "step": 184810 }, { "epoch": 9.179497367636833, "grad_norm": 0.203125, "learning_rate": 6.564418396741831e-05, "loss": 0.4872, "step": 184820 }, { "epoch": 9.179994039932453, "grad_norm": 0.234375, "learning_rate": 6.560445018376875e-05, "loss": 0.4969, "step": 184830 }, { "epoch": 9.180490712228073, "grad_norm": 0.2001953125, "learning_rate": 6.556471640011921e-05, "loss": 0.4938, "step": 184840 }, { "epoch": 9.180987384523691, "grad_norm": 0.177734375, "learning_rate": 6.552498261646966e-05, "loss": 0.4687, "step": 184850 }, { "epoch": 9.181484056819311, "grad_norm": 0.193359375, "learning_rate": 6.548524883282011e-05, "loss": 0.5009, "step": 184860 }, { "epoch": 9.18198072911493, "grad_norm": 0.1953125, "learning_rate": 6.544551504917056e-05, "loss": 0.491, "step": 184870 }, { "epoch": 9.18247740141055, "grad_norm": 0.193359375, "learning_rate": 6.540578126552101e-05, "loss": 0.4583, "step": 184880 }, { "epoch": 9.182974073706168, "grad_norm": 0.205078125, "learning_rate": 6.536604748187146e-05, "loss": 0.4648, "step": 184890 }, { "epoch": 9.183470746001788, "grad_norm": 0.240234375, "learning_rate": 6.532631369822192e-05, "loss": 0.5045, "step": 184900 }, { "epoch": 9.183967418297406, "grad_norm": 0.1826171875, "learning_rate": 6.528657991457236e-05, "loss": 0.4865, "step": 184910 }, { "epoch": 9.184464090593027, "grad_norm": 0.1845703125, "learning_rate": 6.524684613092282e-05, "loss": 0.4882, "step": 184920 }, { "epoch": 9.184960762888647, "grad_norm": 0.1865234375, "learning_rate": 6.520711234727326e-05, "loss": 0.4672, "step": 184930 }, { "epoch": 9.185457435184265, "grad_norm": 0.2314453125, "learning_rate": 6.516737856362372e-05, "loss": 0.4775, "step": 184940 }, { "epoch": 9.185954107479885, "grad_norm": 0.2177734375, "learning_rate": 6.512764477997418e-05, "loss": 0.4545, "step": 184950 }, { "epoch": 9.186450779775504, "grad_norm": 0.1962890625, "learning_rate": 6.508791099632464e-05, "loss": 0.4564, "step": 184960 }, { "epoch": 9.186947452071124, "grad_norm": 0.2265625, "learning_rate": 6.504817721267508e-05, "loss": 0.4562, "step": 184970 }, { "epoch": 9.187444124366742, "grad_norm": 0.197265625, "learning_rate": 6.500844342902554e-05, "loss": 0.4695, "step": 184980 }, { "epoch": 9.187940796662362, "grad_norm": 0.205078125, "learning_rate": 6.496870964537598e-05, "loss": 0.4632, "step": 184990 }, { "epoch": 9.188437468957982, "grad_norm": 0.2109375, "learning_rate": 6.492897586172644e-05, "loss": 0.4847, "step": 185000 }, { "epoch": 9.1889341412536, "grad_norm": 0.2333984375, "learning_rate": 6.488924207807689e-05, "loss": 0.5039, "step": 185010 }, { "epoch": 9.18943081354922, "grad_norm": 0.1826171875, "learning_rate": 6.484950829442734e-05, "loss": 0.4899, "step": 185020 }, { "epoch": 9.189927485844839, "grad_norm": 0.1953125, "learning_rate": 6.480977451077779e-05, "loss": 0.4832, "step": 185030 }, { "epoch": 9.190424158140459, "grad_norm": 0.201171875, "learning_rate": 6.477004072712824e-05, "loss": 0.5118, "step": 185040 }, { "epoch": 9.190920830436077, "grad_norm": 0.205078125, "learning_rate": 6.473030694347869e-05, "loss": 0.4991, "step": 185050 }, { "epoch": 9.191417502731698, "grad_norm": 0.2109375, "learning_rate": 6.469057315982915e-05, "loss": 0.4967, "step": 185060 }, { "epoch": 9.191914175027318, "grad_norm": 0.205078125, "learning_rate": 6.465083937617959e-05, "loss": 0.5004, "step": 185070 }, { "epoch": 9.192410847322936, "grad_norm": 0.22265625, "learning_rate": 6.461110559253005e-05, "loss": 0.4704, "step": 185080 }, { "epoch": 9.192907519618556, "grad_norm": 0.2158203125, "learning_rate": 6.45713718088805e-05, "loss": 0.4667, "step": 185090 }, { "epoch": 9.193404191914174, "grad_norm": 0.1904296875, "learning_rate": 6.453163802523095e-05, "loss": 0.4669, "step": 185100 }, { "epoch": 9.193900864209795, "grad_norm": 0.2109375, "learning_rate": 6.449190424158141e-05, "loss": 0.4886, "step": 185110 }, { "epoch": 9.194397536505413, "grad_norm": 0.2255859375, "learning_rate": 6.445217045793185e-05, "loss": 0.4788, "step": 185120 }, { "epoch": 9.194894208801033, "grad_norm": 0.2265625, "learning_rate": 6.441243667428231e-05, "loss": 0.4927, "step": 185130 }, { "epoch": 9.195390881096653, "grad_norm": 0.205078125, "learning_rate": 6.437270289063277e-05, "loss": 0.4908, "step": 185140 }, { "epoch": 9.195887553392271, "grad_norm": 0.189453125, "learning_rate": 6.433296910698323e-05, "loss": 0.4802, "step": 185150 }, { "epoch": 9.196384225687892, "grad_norm": 0.1923828125, "learning_rate": 6.429323532333367e-05, "loss": 0.4572, "step": 185160 }, { "epoch": 9.19688089798351, "grad_norm": 0.181640625, "learning_rate": 6.425350153968413e-05, "loss": 0.4916, "step": 185170 }, { "epoch": 9.19737757027913, "grad_norm": 0.1845703125, "learning_rate": 6.421376775603457e-05, "loss": 0.4472, "step": 185180 }, { "epoch": 9.197874242574748, "grad_norm": 0.2001953125, "learning_rate": 6.417403397238503e-05, "loss": 0.4867, "step": 185190 }, { "epoch": 9.198370914870369, "grad_norm": 0.212890625, "learning_rate": 6.413430018873547e-05, "loss": 0.4517, "step": 185200 }, { "epoch": 9.198867587165989, "grad_norm": 0.189453125, "learning_rate": 6.409456640508593e-05, "loss": 0.4819, "step": 185210 }, { "epoch": 9.199364259461607, "grad_norm": 0.1953125, "learning_rate": 6.405483262143638e-05, "loss": 0.4861, "step": 185220 }, { "epoch": 9.199860931757227, "grad_norm": 0.1953125, "learning_rate": 6.401509883778683e-05, "loss": 0.4791, "step": 185230 }, { "epoch": 9.200357604052845, "grad_norm": 0.1953125, "learning_rate": 6.397536505413728e-05, "loss": 0.5115, "step": 185240 }, { "epoch": 9.200854276348466, "grad_norm": 0.2177734375, "learning_rate": 6.393563127048774e-05, "loss": 0.5009, "step": 185250 }, { "epoch": 9.201350948644084, "grad_norm": 0.1875, "learning_rate": 6.389589748683818e-05, "loss": 0.4814, "step": 185260 }, { "epoch": 9.201847620939704, "grad_norm": 0.1826171875, "learning_rate": 6.385616370318864e-05, "loss": 0.4707, "step": 185270 }, { "epoch": 9.202344293235324, "grad_norm": 0.19140625, "learning_rate": 6.381642991953908e-05, "loss": 0.4635, "step": 185280 }, { "epoch": 9.202840965530942, "grad_norm": 0.2060546875, "learning_rate": 6.377669613588954e-05, "loss": 0.446, "step": 185290 }, { "epoch": 9.203337637826563, "grad_norm": 0.1923828125, "learning_rate": 6.373696235224e-05, "loss": 0.5201, "step": 185300 }, { "epoch": 9.203834310122181, "grad_norm": 0.2041015625, "learning_rate": 6.369722856859046e-05, "loss": 0.5005, "step": 185310 }, { "epoch": 9.204330982417801, "grad_norm": 0.2392578125, "learning_rate": 6.36574947849409e-05, "loss": 0.475, "step": 185320 }, { "epoch": 9.20482765471342, "grad_norm": 0.193359375, "learning_rate": 6.361776100129136e-05, "loss": 0.4586, "step": 185330 }, { "epoch": 9.20532432700904, "grad_norm": 0.201171875, "learning_rate": 6.35780272176418e-05, "loss": 0.4828, "step": 185340 }, { "epoch": 9.20582099930466, "grad_norm": 0.1943359375, "learning_rate": 6.353829343399226e-05, "loss": 0.4793, "step": 185350 }, { "epoch": 9.206317671600278, "grad_norm": 0.173828125, "learning_rate": 6.34985596503427e-05, "loss": 0.477, "step": 185360 }, { "epoch": 9.206814343895898, "grad_norm": 0.2109375, "learning_rate": 6.345882586669316e-05, "loss": 0.4984, "step": 185370 }, { "epoch": 9.207311016191516, "grad_norm": 0.2099609375, "learning_rate": 6.341909208304361e-05, "loss": 0.4755, "step": 185380 }, { "epoch": 9.207807688487136, "grad_norm": 0.208984375, "learning_rate": 6.337935829939406e-05, "loss": 0.5122, "step": 185390 }, { "epoch": 9.208304360782755, "grad_norm": 0.1875, "learning_rate": 6.333962451574451e-05, "loss": 0.4924, "step": 185400 }, { "epoch": 9.208801033078375, "grad_norm": 0.205078125, "learning_rate": 6.329989073209497e-05, "loss": 0.4954, "step": 185410 }, { "epoch": 9.209297705373995, "grad_norm": 0.1953125, "learning_rate": 6.326015694844541e-05, "loss": 0.469, "step": 185420 }, { "epoch": 9.209794377669613, "grad_norm": 0.29296875, "learning_rate": 6.322042316479587e-05, "loss": 0.4755, "step": 185430 }, { "epoch": 9.210291049965234, "grad_norm": 0.19921875, "learning_rate": 6.318068938114631e-05, "loss": 0.4923, "step": 185440 }, { "epoch": 9.210787722260852, "grad_norm": 0.185546875, "learning_rate": 6.314095559749677e-05, "loss": 0.5002, "step": 185450 }, { "epoch": 9.211284394556472, "grad_norm": 0.189453125, "learning_rate": 6.310122181384723e-05, "loss": 0.5146, "step": 185460 }, { "epoch": 9.21178106685209, "grad_norm": 0.1865234375, "learning_rate": 6.306148803019767e-05, "loss": 0.5041, "step": 185470 }, { "epoch": 9.21227773914771, "grad_norm": 0.2138671875, "learning_rate": 6.302175424654813e-05, "loss": 0.4766, "step": 185480 }, { "epoch": 9.21277441144333, "grad_norm": 0.212890625, "learning_rate": 6.298202046289859e-05, "loss": 0.4516, "step": 185490 }, { "epoch": 9.213271083738949, "grad_norm": 0.1884765625, "learning_rate": 6.294228667924903e-05, "loss": 0.4993, "step": 185500 }, { "epoch": 9.213767756034569, "grad_norm": 0.2177734375, "learning_rate": 6.290255289559949e-05, "loss": 0.496, "step": 185510 }, { "epoch": 9.214264428330187, "grad_norm": 0.203125, "learning_rate": 6.286281911194993e-05, "loss": 0.4954, "step": 185520 }, { "epoch": 9.214761100625807, "grad_norm": 0.19140625, "learning_rate": 6.282308532830039e-05, "loss": 0.4816, "step": 185530 }, { "epoch": 9.215257772921426, "grad_norm": 0.21484375, "learning_rate": 6.278335154465084e-05, "loss": 0.4819, "step": 185540 }, { "epoch": 9.215754445217046, "grad_norm": 0.1806640625, "learning_rate": 6.27436177610013e-05, "loss": 0.4628, "step": 185550 }, { "epoch": 9.216251117512666, "grad_norm": 0.2080078125, "learning_rate": 6.270388397735175e-05, "loss": 0.4623, "step": 185560 }, { "epoch": 9.216747789808284, "grad_norm": 0.1796875, "learning_rate": 6.26641501937022e-05, "loss": 0.4967, "step": 185570 }, { "epoch": 9.217244462103904, "grad_norm": 0.185546875, "learning_rate": 6.262441641005265e-05, "loss": 0.4698, "step": 185580 }, { "epoch": 9.217741134399523, "grad_norm": 0.2099609375, "learning_rate": 6.25846826264031e-05, "loss": 0.4738, "step": 185590 }, { "epoch": 9.218237806695143, "grad_norm": 0.2060546875, "learning_rate": 6.254494884275356e-05, "loss": 0.4926, "step": 185600 }, { "epoch": 9.218734478990761, "grad_norm": 0.1982421875, "learning_rate": 6.2505215059104e-05, "loss": 0.496, "step": 185610 }, { "epoch": 9.219231151286381, "grad_norm": 0.193359375, "learning_rate": 6.246548127545446e-05, "loss": 0.4931, "step": 185620 }, { "epoch": 9.219727823582001, "grad_norm": 0.224609375, "learning_rate": 6.24257474918049e-05, "loss": 0.5287, "step": 185630 }, { "epoch": 9.22022449587762, "grad_norm": 0.1923828125, "learning_rate": 6.238601370815536e-05, "loss": 0.5118, "step": 185640 }, { "epoch": 9.22072116817324, "grad_norm": 0.1953125, "learning_rate": 6.234627992450582e-05, "loss": 0.4663, "step": 185650 }, { "epoch": 9.221217840468858, "grad_norm": 0.189453125, "learning_rate": 6.230654614085628e-05, "loss": 0.5234, "step": 185660 }, { "epoch": 9.221714512764478, "grad_norm": 0.2265625, "learning_rate": 6.226681235720672e-05, "loss": 0.4939, "step": 185670 }, { "epoch": 9.222211185060097, "grad_norm": 0.2001953125, "learning_rate": 6.222707857355718e-05, "loss": 0.499, "step": 185680 }, { "epoch": 9.222707857355717, "grad_norm": 0.19921875, "learning_rate": 6.218734478990762e-05, "loss": 0.4977, "step": 185690 }, { "epoch": 9.223204529651335, "grad_norm": 0.2041015625, "learning_rate": 6.214761100625808e-05, "loss": 0.4894, "step": 185700 }, { "epoch": 9.223701201946955, "grad_norm": 0.2041015625, "learning_rate": 6.210787722260852e-05, "loss": 0.4917, "step": 185710 }, { "epoch": 9.224197874242575, "grad_norm": 0.193359375, "learning_rate": 6.206814343895898e-05, "loss": 0.5219, "step": 185720 }, { "epoch": 9.224694546538194, "grad_norm": 0.189453125, "learning_rate": 6.202840965530943e-05, "loss": 0.4857, "step": 185730 }, { "epoch": 9.225191218833814, "grad_norm": 0.212890625, "learning_rate": 6.198867587165988e-05, "loss": 0.4869, "step": 185740 }, { "epoch": 9.225687891129432, "grad_norm": 0.19140625, "learning_rate": 6.194894208801033e-05, "loss": 0.4897, "step": 185750 }, { "epoch": 9.226184563425052, "grad_norm": 0.205078125, "learning_rate": 6.190920830436079e-05, "loss": 0.4721, "step": 185760 }, { "epoch": 9.22668123572067, "grad_norm": 0.1923828125, "learning_rate": 6.186947452071123e-05, "loss": 0.4598, "step": 185770 }, { "epoch": 9.22717790801629, "grad_norm": 0.2080078125, "learning_rate": 6.182974073706169e-05, "loss": 0.5066, "step": 185780 }, { "epoch": 9.227674580311911, "grad_norm": 0.201171875, "learning_rate": 6.179000695341213e-05, "loss": 0.4993, "step": 185790 }, { "epoch": 9.22817125260753, "grad_norm": 0.17578125, "learning_rate": 6.175027316976259e-05, "loss": 0.4807, "step": 185800 }, { "epoch": 9.22866792490315, "grad_norm": 0.201171875, "learning_rate": 6.171053938611305e-05, "loss": 0.4626, "step": 185810 }, { "epoch": 9.229164597198768, "grad_norm": 0.185546875, "learning_rate": 6.167080560246349e-05, "loss": 0.4757, "step": 185820 }, { "epoch": 9.229661269494388, "grad_norm": 0.189453125, "learning_rate": 6.163107181881395e-05, "loss": 0.4612, "step": 185830 }, { "epoch": 9.230157941790006, "grad_norm": 0.2138671875, "learning_rate": 6.159133803516441e-05, "loss": 0.4929, "step": 185840 }, { "epoch": 9.230654614085626, "grad_norm": 0.1875, "learning_rate": 6.155160425151485e-05, "loss": 0.483, "step": 185850 }, { "epoch": 9.231151286381246, "grad_norm": 0.19140625, "learning_rate": 6.151187046786531e-05, "loss": 0.4785, "step": 185860 }, { "epoch": 9.231647958676865, "grad_norm": 0.1953125, "learning_rate": 6.147213668421575e-05, "loss": 0.4799, "step": 185870 }, { "epoch": 9.232144630972485, "grad_norm": 0.1982421875, "learning_rate": 6.143240290056621e-05, "loss": 0.49, "step": 185880 }, { "epoch": 9.232641303268103, "grad_norm": 0.197265625, "learning_rate": 6.139266911691666e-05, "loss": 0.4554, "step": 185890 }, { "epoch": 9.233137975563723, "grad_norm": 0.189453125, "learning_rate": 6.135293533326711e-05, "loss": 0.4884, "step": 185900 }, { "epoch": 9.233634647859342, "grad_norm": 0.19921875, "learning_rate": 6.131320154961756e-05, "loss": 0.4668, "step": 185910 }, { "epoch": 9.234131320154962, "grad_norm": 0.1865234375, "learning_rate": 6.127346776596802e-05, "loss": 0.4803, "step": 185920 }, { "epoch": 9.234627992450582, "grad_norm": 0.1953125, "learning_rate": 6.123373398231846e-05, "loss": 0.5186, "step": 185930 }, { "epoch": 9.2351246647462, "grad_norm": 0.244140625, "learning_rate": 6.119400019866892e-05, "loss": 0.4741, "step": 185940 }, { "epoch": 9.23562133704182, "grad_norm": 0.1923828125, "learning_rate": 6.115426641501936e-05, "loss": 0.4883, "step": 185950 }, { "epoch": 9.236118009337439, "grad_norm": 0.185546875, "learning_rate": 6.111453263136982e-05, "loss": 0.4836, "step": 185960 }, { "epoch": 9.236614681633059, "grad_norm": 0.1904296875, "learning_rate": 6.107479884772028e-05, "loss": 0.4724, "step": 185970 }, { "epoch": 9.237111353928677, "grad_norm": 0.216796875, "learning_rate": 6.103506506407073e-05, "loss": 0.5036, "step": 185980 }, { "epoch": 9.237608026224297, "grad_norm": 0.2099609375, "learning_rate": 6.099533128042119e-05, "loss": 0.4576, "step": 185990 }, { "epoch": 9.238104698519917, "grad_norm": 0.232421875, "learning_rate": 6.095559749677163e-05, "loss": 0.4848, "step": 186000 }, { "epoch": 9.238601370815536, "grad_norm": 0.20703125, "learning_rate": 6.091586371312209e-05, "loss": 0.4708, "step": 186010 }, { "epoch": 9.239098043111156, "grad_norm": 0.193359375, "learning_rate": 6.0876129929472534e-05, "loss": 0.4908, "step": 186020 }, { "epoch": 9.239594715406774, "grad_norm": 0.203125, "learning_rate": 6.083639614582299e-05, "loss": 0.5043, "step": 186030 }, { "epoch": 9.240091387702394, "grad_norm": 0.216796875, "learning_rate": 6.079666236217344e-05, "loss": 0.4702, "step": 186040 }, { "epoch": 9.240588059998013, "grad_norm": 0.1845703125, "learning_rate": 6.07569285785239e-05, "loss": 0.4784, "step": 186050 }, { "epoch": 9.241084732293633, "grad_norm": 0.1962890625, "learning_rate": 6.0717194794874345e-05, "loss": 0.485, "step": 186060 }, { "epoch": 9.241581404589253, "grad_norm": 0.25390625, "learning_rate": 6.06774610112248e-05, "loss": 0.4819, "step": 186070 }, { "epoch": 9.242078076884871, "grad_norm": 0.19140625, "learning_rate": 6.063772722757525e-05, "loss": 0.4849, "step": 186080 }, { "epoch": 9.242574749180491, "grad_norm": 0.1904296875, "learning_rate": 6.0597993443925704e-05, "loss": 0.5043, "step": 186090 }, { "epoch": 9.24307142147611, "grad_norm": 0.1904296875, "learning_rate": 6.055825966027615e-05, "loss": 0.4819, "step": 186100 }, { "epoch": 9.24356809377173, "grad_norm": 0.1943359375, "learning_rate": 6.0518525876626606e-05, "loss": 0.4952, "step": 186110 }, { "epoch": 9.244064766067348, "grad_norm": 0.1884765625, "learning_rate": 6.047879209297706e-05, "loss": 0.4589, "step": 186120 }, { "epoch": 9.244561438362968, "grad_norm": 0.23046875, "learning_rate": 6.0439058309327515e-05, "loss": 0.5142, "step": 186130 }, { "epoch": 9.245058110658588, "grad_norm": 0.2099609375, "learning_rate": 6.039932452567796e-05, "loss": 0.4881, "step": 186140 }, { "epoch": 9.245554782954207, "grad_norm": 0.1923828125, "learning_rate": 6.035959074202842e-05, "loss": 0.4849, "step": 186150 }, { "epoch": 9.246051455249827, "grad_norm": 0.2197265625, "learning_rate": 6.031985695837886e-05, "loss": 0.4902, "step": 186160 }, { "epoch": 9.246548127545445, "grad_norm": 0.177734375, "learning_rate": 6.028012317472932e-05, "loss": 0.5227, "step": 186170 }, { "epoch": 9.247044799841065, "grad_norm": 0.203125, "learning_rate": 6.0240389391079764e-05, "loss": 0.5197, "step": 186180 }, { "epoch": 9.247541472136684, "grad_norm": 0.21875, "learning_rate": 6.020065560743022e-05, "loss": 0.5058, "step": 186190 }, { "epoch": 9.248038144432304, "grad_norm": 0.1845703125, "learning_rate": 6.016092182378067e-05, "loss": 0.4697, "step": 186200 }, { "epoch": 9.248534816727924, "grad_norm": 0.21484375, "learning_rate": 6.0121188040131124e-05, "loss": 0.5106, "step": 186210 }, { "epoch": 9.249031489023542, "grad_norm": 0.2197265625, "learning_rate": 6.0081454256481575e-05, "loss": 0.4796, "step": 186220 }, { "epoch": 9.249528161319162, "grad_norm": 0.2109375, "learning_rate": 6.004172047283203e-05, "loss": 0.4907, "step": 186230 }, { "epoch": 9.25002483361478, "grad_norm": 0.189453125, "learning_rate": 6.0001986689182477e-05, "loss": 0.4646, "step": 186240 }, { "epoch": 9.2505215059104, "grad_norm": 0.177734375, "learning_rate": 5.9962252905532934e-05, "loss": 0.4674, "step": 186250 }, { "epoch": 9.251018178206019, "grad_norm": 0.1865234375, "learning_rate": 5.992251912188338e-05, "loss": 0.4699, "step": 186260 }, { "epoch": 9.25151485050164, "grad_norm": 0.2021484375, "learning_rate": 5.9882785338233836e-05, "loss": 0.4921, "step": 186270 }, { "epoch": 9.252011522797257, "grad_norm": 0.20703125, "learning_rate": 5.984305155458428e-05, "loss": 0.5019, "step": 186280 }, { "epoch": 9.252508195092878, "grad_norm": 0.1845703125, "learning_rate": 5.980331777093474e-05, "loss": 0.4835, "step": 186290 }, { "epoch": 9.253004867388498, "grad_norm": 0.19921875, "learning_rate": 5.976358398728519e-05, "loss": 0.5084, "step": 186300 }, { "epoch": 9.253501539684116, "grad_norm": 0.232421875, "learning_rate": 5.972385020363565e-05, "loss": 0.5069, "step": 186310 }, { "epoch": 9.253998211979736, "grad_norm": 0.2119140625, "learning_rate": 5.968411641998609e-05, "loss": 0.4888, "step": 186320 }, { "epoch": 9.254494884275354, "grad_norm": 0.1904296875, "learning_rate": 5.964438263633655e-05, "loss": 0.4543, "step": 186330 }, { "epoch": 9.254991556570975, "grad_norm": 0.1884765625, "learning_rate": 5.9604648852686994e-05, "loss": 0.4822, "step": 186340 }, { "epoch": 9.255488228866593, "grad_norm": 0.2021484375, "learning_rate": 5.956491506903745e-05, "loss": 0.4528, "step": 186350 }, { "epoch": 9.255984901162213, "grad_norm": 0.2353515625, "learning_rate": 5.9525181285387896e-05, "loss": 0.5025, "step": 186360 }, { "epoch": 9.256481573457833, "grad_norm": 0.18359375, "learning_rate": 5.9485447501738353e-05, "loss": 0.4753, "step": 186370 }, { "epoch": 9.256978245753452, "grad_norm": 0.2099609375, "learning_rate": 5.9445713718088805e-05, "loss": 0.4525, "step": 186380 }, { "epoch": 9.257474918049072, "grad_norm": 0.212890625, "learning_rate": 5.940597993443926e-05, "loss": 0.4734, "step": 186390 }, { "epoch": 9.25797159034469, "grad_norm": 0.18359375, "learning_rate": 5.936624615078972e-05, "loss": 0.467, "step": 186400 }, { "epoch": 9.25846826264031, "grad_norm": 0.23046875, "learning_rate": 5.9326512367140164e-05, "loss": 0.5307, "step": 186410 }, { "epoch": 9.258964934935928, "grad_norm": 0.2001953125, "learning_rate": 5.928677858349062e-05, "loss": 0.492, "step": 186420 }, { "epoch": 9.259461607231549, "grad_norm": 0.20703125, "learning_rate": 5.9247044799841066e-05, "loss": 0.4876, "step": 186430 }, { "epoch": 9.259958279527169, "grad_norm": 0.181640625, "learning_rate": 5.9207311016191524e-05, "loss": 0.4827, "step": 186440 }, { "epoch": 9.260454951822787, "grad_norm": 0.2001953125, "learning_rate": 5.916757723254197e-05, "loss": 0.5002, "step": 186450 }, { "epoch": 9.260951624118407, "grad_norm": 0.1953125, "learning_rate": 5.9127843448892426e-05, "loss": 0.4513, "step": 186460 }, { "epoch": 9.261448296414025, "grad_norm": 0.23046875, "learning_rate": 5.908810966524288e-05, "loss": 0.5047, "step": 186470 }, { "epoch": 9.261944968709646, "grad_norm": 0.1953125, "learning_rate": 5.9048375881593335e-05, "loss": 0.4695, "step": 186480 }, { "epoch": 9.262441641005264, "grad_norm": 0.205078125, "learning_rate": 5.900864209794378e-05, "loss": 0.4897, "step": 186490 }, { "epoch": 9.262938313300884, "grad_norm": 0.1962890625, "learning_rate": 5.896890831429424e-05, "loss": 0.4734, "step": 186500 }, { "epoch": 9.263434985596504, "grad_norm": 0.1826171875, "learning_rate": 5.892917453064468e-05, "loss": 0.4814, "step": 186510 }, { "epoch": 9.263931657892122, "grad_norm": 0.2109375, "learning_rate": 5.888944074699514e-05, "loss": 0.5046, "step": 186520 }, { "epoch": 9.264428330187743, "grad_norm": 0.1865234375, "learning_rate": 5.8849706963345583e-05, "loss": 0.4948, "step": 186530 }, { "epoch": 9.264925002483361, "grad_norm": 0.2060546875, "learning_rate": 5.880997317969604e-05, "loss": 0.5241, "step": 186540 }, { "epoch": 9.265421674778981, "grad_norm": 0.2041015625, "learning_rate": 5.877023939604649e-05, "loss": 0.5078, "step": 186550 }, { "epoch": 9.2659183470746, "grad_norm": 0.212890625, "learning_rate": 5.873050561239694e-05, "loss": 0.4903, "step": 186560 }, { "epoch": 9.26641501937022, "grad_norm": 0.197265625, "learning_rate": 5.8690771828747394e-05, "loss": 0.4595, "step": 186570 }, { "epoch": 9.26691169166584, "grad_norm": 0.1884765625, "learning_rate": 5.865103804509785e-05, "loss": 0.4921, "step": 186580 }, { "epoch": 9.267408363961458, "grad_norm": 0.1982421875, "learning_rate": 5.8611304261448296e-05, "loss": 0.4667, "step": 186590 }, { "epoch": 9.267905036257078, "grad_norm": 0.21484375, "learning_rate": 5.8571570477798754e-05, "loss": 0.4944, "step": 186600 }, { "epoch": 9.268401708552696, "grad_norm": 0.2158203125, "learning_rate": 5.85318366941492e-05, "loss": 0.4866, "step": 186610 }, { "epoch": 9.268898380848317, "grad_norm": 0.23046875, "learning_rate": 5.8492102910499656e-05, "loss": 0.454, "step": 186620 }, { "epoch": 9.269395053143935, "grad_norm": 0.220703125, "learning_rate": 5.84523691268501e-05, "loss": 0.4836, "step": 186630 }, { "epoch": 9.269891725439555, "grad_norm": 0.1865234375, "learning_rate": 5.841263534320056e-05, "loss": 0.4713, "step": 186640 }, { "epoch": 9.270388397735175, "grad_norm": 0.224609375, "learning_rate": 5.837290155955101e-05, "loss": 0.5056, "step": 186650 }, { "epoch": 9.270885070030793, "grad_norm": 0.193359375, "learning_rate": 5.833316777590147e-05, "loss": 0.4788, "step": 186660 }, { "epoch": 9.271381742326414, "grad_norm": 0.216796875, "learning_rate": 5.829343399225191e-05, "loss": 0.4942, "step": 186670 }, { "epoch": 9.271878414622032, "grad_norm": 0.2060546875, "learning_rate": 5.825370020860237e-05, "loss": 0.4962, "step": 186680 }, { "epoch": 9.272375086917652, "grad_norm": 0.259765625, "learning_rate": 5.8213966424952813e-05, "loss": 0.5012, "step": 186690 }, { "epoch": 9.27287175921327, "grad_norm": 0.1884765625, "learning_rate": 5.817423264130327e-05, "loss": 0.4848, "step": 186700 }, { "epoch": 9.27336843150889, "grad_norm": 0.1796875, "learning_rate": 5.8134498857653716e-05, "loss": 0.5006, "step": 186710 }, { "epoch": 9.27386510380451, "grad_norm": 0.1884765625, "learning_rate": 5.809476507400417e-05, "loss": 0.4955, "step": 186720 }, { "epoch": 9.274361776100129, "grad_norm": 0.205078125, "learning_rate": 5.8055031290354624e-05, "loss": 0.4762, "step": 186730 }, { "epoch": 9.274858448395749, "grad_norm": 0.1845703125, "learning_rate": 5.801529750670508e-05, "loss": 0.5008, "step": 186740 }, { "epoch": 9.275355120691367, "grad_norm": 0.224609375, "learning_rate": 5.7975563723055526e-05, "loss": 0.501, "step": 186750 }, { "epoch": 9.275851792986987, "grad_norm": 0.1796875, "learning_rate": 5.7935829939405984e-05, "loss": 0.4476, "step": 186760 }, { "epoch": 9.276348465282606, "grad_norm": 0.2021484375, "learning_rate": 5.789609615575643e-05, "loss": 0.4489, "step": 186770 }, { "epoch": 9.276845137578226, "grad_norm": 0.2197265625, "learning_rate": 5.7856362372106886e-05, "loss": 0.4733, "step": 186780 }, { "epoch": 9.277341809873846, "grad_norm": 0.23046875, "learning_rate": 5.781662858845733e-05, "loss": 0.5189, "step": 186790 }, { "epoch": 9.277838482169464, "grad_norm": 0.2333984375, "learning_rate": 5.777689480480779e-05, "loss": 0.5196, "step": 186800 }, { "epoch": 9.278335154465084, "grad_norm": 0.2109375, "learning_rate": 5.7737161021158246e-05, "loss": 0.4705, "step": 186810 }, { "epoch": 9.278831826760703, "grad_norm": 0.2021484375, "learning_rate": 5.76974272375087e-05, "loss": 0.4956, "step": 186820 }, { "epoch": 9.279328499056323, "grad_norm": 0.1923828125, "learning_rate": 5.7657693453859155e-05, "loss": 0.5072, "step": 186830 }, { "epoch": 9.279825171351941, "grad_norm": 0.1943359375, "learning_rate": 5.76179596702096e-05, "loss": 0.4981, "step": 186840 }, { "epoch": 9.280321843647561, "grad_norm": 0.1875, "learning_rate": 5.757822588656006e-05, "loss": 0.47, "step": 186850 }, { "epoch": 9.280818515943182, "grad_norm": 0.2138671875, "learning_rate": 5.75384921029105e-05, "loss": 0.5055, "step": 186860 }, { "epoch": 9.2813151882388, "grad_norm": 0.19921875, "learning_rate": 5.749875831926096e-05, "loss": 0.4947, "step": 186870 }, { "epoch": 9.28181186053442, "grad_norm": 0.2021484375, "learning_rate": 5.74590245356114e-05, "loss": 0.5082, "step": 186880 }, { "epoch": 9.282308532830038, "grad_norm": 0.2001953125, "learning_rate": 5.741929075196186e-05, "loss": 0.4985, "step": 186890 }, { "epoch": 9.282805205125658, "grad_norm": 0.197265625, "learning_rate": 5.737955696831231e-05, "loss": 0.4571, "step": 186900 }, { "epoch": 9.283301877421277, "grad_norm": 0.1796875, "learning_rate": 5.733982318466276e-05, "loss": 0.4831, "step": 186910 }, { "epoch": 9.283798549716897, "grad_norm": 0.1923828125, "learning_rate": 5.7300089401013214e-05, "loss": 0.4659, "step": 186920 }, { "epoch": 9.284295222012517, "grad_norm": 0.1875, "learning_rate": 5.726035561736367e-05, "loss": 0.4556, "step": 186930 }, { "epoch": 9.284791894308135, "grad_norm": 0.236328125, "learning_rate": 5.7220621833714116e-05, "loss": 0.4494, "step": 186940 }, { "epoch": 9.285288566603755, "grad_norm": 0.1953125, "learning_rate": 5.7180888050064574e-05, "loss": 0.4987, "step": 186950 }, { "epoch": 9.285785238899374, "grad_norm": 0.19921875, "learning_rate": 5.714115426641502e-05, "loss": 0.4772, "step": 186960 }, { "epoch": 9.286281911194994, "grad_norm": 0.1962890625, "learning_rate": 5.7101420482765476e-05, "loss": 0.4859, "step": 186970 }, { "epoch": 9.286778583490612, "grad_norm": 0.20703125, "learning_rate": 5.706168669911593e-05, "loss": 0.5106, "step": 186980 }, { "epoch": 9.287275255786232, "grad_norm": 0.197265625, "learning_rate": 5.702195291546638e-05, "loss": 0.4717, "step": 186990 }, { "epoch": 9.287771928081852, "grad_norm": 0.2236328125, "learning_rate": 5.698221913181683e-05, "loss": 0.4641, "step": 187000 }, { "epoch": 9.28826860037747, "grad_norm": 0.193359375, "learning_rate": 5.694248534816729e-05, "loss": 0.5042, "step": 187010 }, { "epoch": 9.288765272673091, "grad_norm": 0.1982421875, "learning_rate": 5.690275156451773e-05, "loss": 0.4724, "step": 187020 }, { "epoch": 9.28926194496871, "grad_norm": 0.2001953125, "learning_rate": 5.686301778086819e-05, "loss": 0.4697, "step": 187030 }, { "epoch": 9.28975861726433, "grad_norm": 0.2119140625, "learning_rate": 5.682328399721863e-05, "loss": 0.4926, "step": 187040 }, { "epoch": 9.290255289559948, "grad_norm": 0.2060546875, "learning_rate": 5.678355021356909e-05, "loss": 0.4981, "step": 187050 }, { "epoch": 9.290751961855568, "grad_norm": 0.216796875, "learning_rate": 5.6743816429919535e-05, "loss": 0.4962, "step": 187060 }, { "epoch": 9.291248634151188, "grad_norm": 0.2021484375, "learning_rate": 5.670408264626999e-05, "loss": 0.4751, "step": 187070 }, { "epoch": 9.291745306446806, "grad_norm": 0.2099609375, "learning_rate": 5.6664348862620444e-05, "loss": 0.489, "step": 187080 }, { "epoch": 9.292241978742426, "grad_norm": 0.1923828125, "learning_rate": 5.66246150789709e-05, "loss": 0.4979, "step": 187090 }, { "epoch": 9.292738651038045, "grad_norm": 0.19140625, "learning_rate": 5.6584881295321346e-05, "loss": 0.4927, "step": 187100 }, { "epoch": 9.293235323333665, "grad_norm": 0.1943359375, "learning_rate": 5.6545147511671804e-05, "loss": 0.489, "step": 187110 }, { "epoch": 9.293731995629283, "grad_norm": 0.2099609375, "learning_rate": 5.650541372802225e-05, "loss": 0.4744, "step": 187120 }, { "epoch": 9.294228667924903, "grad_norm": 0.2021484375, "learning_rate": 5.6465679944372706e-05, "loss": 0.5103, "step": 187130 }, { "epoch": 9.294725340220522, "grad_norm": 0.2060546875, "learning_rate": 5.642594616072315e-05, "loss": 0.493, "step": 187140 }, { "epoch": 9.295222012516142, "grad_norm": 0.1728515625, "learning_rate": 5.638621237707361e-05, "loss": 0.4323, "step": 187150 }, { "epoch": 9.295718684811762, "grad_norm": 0.2138671875, "learning_rate": 5.634647859342406e-05, "loss": 0.4903, "step": 187160 }, { "epoch": 9.29621535710738, "grad_norm": 0.2275390625, "learning_rate": 5.630674480977452e-05, "loss": 0.4894, "step": 187170 }, { "epoch": 9.296712029403, "grad_norm": 0.208984375, "learning_rate": 5.626701102612496e-05, "loss": 0.5074, "step": 187180 }, { "epoch": 9.297208701698619, "grad_norm": 0.20703125, "learning_rate": 5.622727724247542e-05, "loss": 0.5152, "step": 187190 }, { "epoch": 9.297705373994239, "grad_norm": 0.205078125, "learning_rate": 5.618754345882586e-05, "loss": 0.4853, "step": 187200 }, { "epoch": 9.298202046289857, "grad_norm": 0.2060546875, "learning_rate": 5.614780967517632e-05, "loss": 0.5112, "step": 187210 }, { "epoch": 9.298698718585477, "grad_norm": 0.2060546875, "learning_rate": 5.6108075891526765e-05, "loss": 0.5068, "step": 187220 }, { "epoch": 9.299195390881097, "grad_norm": 0.1982421875, "learning_rate": 5.606834210787722e-05, "loss": 0.4822, "step": 187230 }, { "epoch": 9.299692063176716, "grad_norm": 0.1787109375, "learning_rate": 5.602860832422768e-05, "loss": 0.4783, "step": 187240 }, { "epoch": 9.300188735472336, "grad_norm": 0.18359375, "learning_rate": 5.598887454057813e-05, "loss": 0.4732, "step": 187250 }, { "epoch": 9.300685407767954, "grad_norm": 0.20703125, "learning_rate": 5.594914075692858e-05, "loss": 0.4715, "step": 187260 }, { "epoch": 9.301182080063574, "grad_norm": 0.197265625, "learning_rate": 5.5909406973279034e-05, "loss": 0.4939, "step": 187270 }, { "epoch": 9.301678752359193, "grad_norm": 0.2001953125, "learning_rate": 5.586967318962949e-05, "loss": 0.4863, "step": 187280 }, { "epoch": 9.302175424654813, "grad_norm": 0.1748046875, "learning_rate": 5.5829939405979936e-05, "loss": 0.4837, "step": 187290 }, { "epoch": 9.302672096950433, "grad_norm": 0.2001953125, "learning_rate": 5.5790205622330394e-05, "loss": 0.4811, "step": 187300 }, { "epoch": 9.303168769246051, "grad_norm": 0.185546875, "learning_rate": 5.575047183868084e-05, "loss": 0.4762, "step": 187310 }, { "epoch": 9.303665441541671, "grad_norm": 0.2314453125, "learning_rate": 5.5710738055031296e-05, "loss": 0.511, "step": 187320 }, { "epoch": 9.30416211383729, "grad_norm": 0.1923828125, "learning_rate": 5.567100427138175e-05, "loss": 0.4655, "step": 187330 }, { "epoch": 9.30465878613291, "grad_norm": 0.19921875, "learning_rate": 5.56312704877322e-05, "loss": 0.4847, "step": 187340 }, { "epoch": 9.305155458428528, "grad_norm": 0.2275390625, "learning_rate": 5.559153670408265e-05, "loss": 0.486, "step": 187350 }, { "epoch": 9.305652130724148, "grad_norm": 0.2080078125, "learning_rate": 5.555180292043311e-05, "loss": 0.4938, "step": 187360 }, { "epoch": 9.306148803019768, "grad_norm": 0.1982421875, "learning_rate": 5.551206913678355e-05, "loss": 0.4912, "step": 187370 }, { "epoch": 9.306645475315387, "grad_norm": 0.2119140625, "learning_rate": 5.547233535313401e-05, "loss": 0.5352, "step": 187380 }, { "epoch": 9.307142147611007, "grad_norm": 0.203125, "learning_rate": 5.543260156948445e-05, "loss": 0.4706, "step": 187390 }, { "epoch": 9.307638819906625, "grad_norm": 0.2080078125, "learning_rate": 5.539286778583491e-05, "loss": 0.4798, "step": 187400 }, { "epoch": 9.308135492202245, "grad_norm": 0.2080078125, "learning_rate": 5.5353134002185355e-05, "loss": 0.4745, "step": 187410 }, { "epoch": 9.308632164497864, "grad_norm": 0.1982421875, "learning_rate": 5.531340021853581e-05, "loss": 0.4589, "step": 187420 }, { "epoch": 9.309128836793484, "grad_norm": 0.1962890625, "learning_rate": 5.5273666434886264e-05, "loss": 0.4995, "step": 187430 }, { "epoch": 9.309625509089104, "grad_norm": 0.2177734375, "learning_rate": 5.523393265123672e-05, "loss": 0.5002, "step": 187440 }, { "epoch": 9.310122181384722, "grad_norm": 0.2236328125, "learning_rate": 5.5194198867587166e-05, "loss": 0.4806, "step": 187450 }, { "epoch": 9.310618853680342, "grad_norm": 0.22265625, "learning_rate": 5.5154465083937624e-05, "loss": 0.4971, "step": 187460 }, { "epoch": 9.31111552597596, "grad_norm": 0.2099609375, "learning_rate": 5.511473130028807e-05, "loss": 0.4762, "step": 187470 }, { "epoch": 9.31161219827158, "grad_norm": 0.2041015625, "learning_rate": 5.5074997516638526e-05, "loss": 0.5103, "step": 187480 }, { "epoch": 9.312108870567199, "grad_norm": 0.1884765625, "learning_rate": 5.503526373298897e-05, "loss": 0.498, "step": 187490 }, { "epoch": 9.31260554286282, "grad_norm": 0.22265625, "learning_rate": 5.499552994933943e-05, "loss": 0.4498, "step": 187500 }, { "epoch": 9.31310221515844, "grad_norm": 0.2255859375, "learning_rate": 5.495579616568988e-05, "loss": 0.5271, "step": 187510 }, { "epoch": 9.313598887454058, "grad_norm": 0.22265625, "learning_rate": 5.491606238204034e-05, "loss": 0.523, "step": 187520 }, { "epoch": 9.314095559749678, "grad_norm": 0.1923828125, "learning_rate": 5.487632859839078e-05, "loss": 0.4399, "step": 187530 }, { "epoch": 9.314592232045296, "grad_norm": 0.19140625, "learning_rate": 5.483659481474124e-05, "loss": 0.4728, "step": 187540 }, { "epoch": 9.315088904340916, "grad_norm": 0.232421875, "learning_rate": 5.479686103109168e-05, "loss": 0.4888, "step": 187550 }, { "epoch": 9.315585576636535, "grad_norm": 0.2412109375, "learning_rate": 5.475712724744214e-05, "loss": 0.4777, "step": 187560 }, { "epoch": 9.316082248932155, "grad_norm": 0.234375, "learning_rate": 5.4717393463792585e-05, "loss": 0.5027, "step": 187570 }, { "epoch": 9.316578921227775, "grad_norm": 0.1865234375, "learning_rate": 5.467765968014304e-05, "loss": 0.4625, "step": 187580 }, { "epoch": 9.317075593523393, "grad_norm": 0.197265625, "learning_rate": 5.4637925896493494e-05, "loss": 0.4635, "step": 187590 }, { "epoch": 9.317572265819013, "grad_norm": 0.1875, "learning_rate": 5.459819211284395e-05, "loss": 0.4702, "step": 187600 }, { "epoch": 9.318068938114632, "grad_norm": 0.2177734375, "learning_rate": 5.4558458329194396e-05, "loss": 0.4579, "step": 187610 }, { "epoch": 9.318565610410252, "grad_norm": 0.2431640625, "learning_rate": 5.4518724545544854e-05, "loss": 0.504, "step": 187620 }, { "epoch": 9.31906228270587, "grad_norm": 0.240234375, "learning_rate": 5.44789907618953e-05, "loss": 0.4948, "step": 187630 }, { "epoch": 9.31955895500149, "grad_norm": 0.2099609375, "learning_rate": 5.4439256978245756e-05, "loss": 0.4738, "step": 187640 }, { "epoch": 9.320055627297108, "grad_norm": 0.220703125, "learning_rate": 5.43995231945962e-05, "loss": 0.4768, "step": 187650 }, { "epoch": 9.320552299592729, "grad_norm": 0.2080078125, "learning_rate": 5.435978941094666e-05, "loss": 0.5013, "step": 187660 }, { "epoch": 9.321048971888349, "grad_norm": 0.2138671875, "learning_rate": 5.4320055627297116e-05, "loss": 0.4793, "step": 187670 }, { "epoch": 9.321545644183967, "grad_norm": 0.185546875, "learning_rate": 5.428032184364757e-05, "loss": 0.4856, "step": 187680 }, { "epoch": 9.322042316479587, "grad_norm": 0.2041015625, "learning_rate": 5.424058805999802e-05, "loss": 0.4884, "step": 187690 }, { "epoch": 9.322538988775205, "grad_norm": 0.201171875, "learning_rate": 5.420085427634847e-05, "loss": 0.5033, "step": 187700 }, { "epoch": 9.323035661070826, "grad_norm": 0.20703125, "learning_rate": 5.416112049269893e-05, "loss": 0.4459, "step": 187710 }, { "epoch": 9.323532333366444, "grad_norm": 0.2080078125, "learning_rate": 5.412138670904937e-05, "loss": 0.4816, "step": 187720 }, { "epoch": 9.324029005662064, "grad_norm": 0.2001953125, "learning_rate": 5.408165292539983e-05, "loss": 0.4857, "step": 187730 }, { "epoch": 9.324525677957684, "grad_norm": 0.1904296875, "learning_rate": 5.404191914175027e-05, "loss": 0.4951, "step": 187740 }, { "epoch": 9.325022350253303, "grad_norm": 0.189453125, "learning_rate": 5.400218535810073e-05, "loss": 0.4738, "step": 187750 }, { "epoch": 9.325519022548923, "grad_norm": 0.185546875, "learning_rate": 5.3962451574451175e-05, "loss": 0.4828, "step": 187760 }, { "epoch": 9.326015694844541, "grad_norm": 0.2412109375, "learning_rate": 5.392271779080163e-05, "loss": 0.4806, "step": 187770 }, { "epoch": 9.326512367140161, "grad_norm": 0.19140625, "learning_rate": 5.3882984007152084e-05, "loss": 0.5165, "step": 187780 }, { "epoch": 9.32700903943578, "grad_norm": 0.2373046875, "learning_rate": 5.384325022350254e-05, "loss": 0.5057, "step": 187790 }, { "epoch": 9.3275057117314, "grad_norm": 0.2021484375, "learning_rate": 5.3803516439852986e-05, "loss": 0.4697, "step": 187800 }, { "epoch": 9.32800238402702, "grad_norm": 0.201171875, "learning_rate": 5.3763782656203444e-05, "loss": 0.4986, "step": 187810 }, { "epoch": 9.328499056322638, "grad_norm": 0.1923828125, "learning_rate": 5.372404887255389e-05, "loss": 0.485, "step": 187820 }, { "epoch": 9.328995728618258, "grad_norm": 0.18359375, "learning_rate": 5.3684315088904346e-05, "loss": 0.4799, "step": 187830 }, { "epoch": 9.329492400913876, "grad_norm": 0.2294921875, "learning_rate": 5.364458130525479e-05, "loss": 0.5037, "step": 187840 }, { "epoch": 9.329989073209497, "grad_norm": 0.1943359375, "learning_rate": 5.360484752160525e-05, "loss": 0.4841, "step": 187850 }, { "epoch": 9.330485745505115, "grad_norm": 0.2099609375, "learning_rate": 5.35651137379557e-05, "loss": 0.4872, "step": 187860 }, { "epoch": 9.330982417800735, "grad_norm": 0.2021484375, "learning_rate": 5.3525379954306157e-05, "loss": 0.4738, "step": 187870 }, { "epoch": 9.331479090096355, "grad_norm": 0.2119140625, "learning_rate": 5.34856461706566e-05, "loss": 0.5264, "step": 187880 }, { "epoch": 9.331975762391973, "grad_norm": 0.1982421875, "learning_rate": 5.344591238700706e-05, "loss": 0.4613, "step": 187890 }, { "epoch": 9.332472434687594, "grad_norm": 0.1865234375, "learning_rate": 5.34061786033575e-05, "loss": 0.4727, "step": 187900 }, { "epoch": 9.332969106983212, "grad_norm": 0.2041015625, "learning_rate": 5.336644481970796e-05, "loss": 0.5011, "step": 187910 }, { "epoch": 9.333465779278832, "grad_norm": 0.1943359375, "learning_rate": 5.3326711036058405e-05, "loss": 0.4775, "step": 187920 }, { "epoch": 9.33396245157445, "grad_norm": 0.203125, "learning_rate": 5.328697725240886e-05, "loss": 0.4799, "step": 187930 }, { "epoch": 9.33445912387007, "grad_norm": 0.197265625, "learning_rate": 5.3247243468759314e-05, "loss": 0.4657, "step": 187940 }, { "epoch": 9.33495579616569, "grad_norm": 0.2109375, "learning_rate": 5.320750968510977e-05, "loss": 0.5079, "step": 187950 }, { "epoch": 9.335452468461309, "grad_norm": 0.201171875, "learning_rate": 5.3167775901460216e-05, "loss": 0.5437, "step": 187960 }, { "epoch": 9.335949140756929, "grad_norm": 0.1875, "learning_rate": 5.3128042117810674e-05, "loss": 0.4687, "step": 187970 }, { "epoch": 9.336445813052547, "grad_norm": 0.212890625, "learning_rate": 5.308830833416112e-05, "loss": 0.4709, "step": 187980 }, { "epoch": 9.336942485348168, "grad_norm": 0.224609375, "learning_rate": 5.3048574550511576e-05, "loss": 0.4948, "step": 187990 }, { "epoch": 9.337439157643786, "grad_norm": 0.19921875, "learning_rate": 5.300884076686202e-05, "loss": 0.4843, "step": 188000 }, { "epoch": 9.337935829939406, "grad_norm": 0.1875, "learning_rate": 5.296910698321248e-05, "loss": 0.4385, "step": 188010 }, { "epoch": 9.338432502235026, "grad_norm": 0.2158203125, "learning_rate": 5.292937319956293e-05, "loss": 0.4862, "step": 188020 }, { "epoch": 9.338929174530644, "grad_norm": 0.2109375, "learning_rate": 5.2889639415913387e-05, "loss": 0.4811, "step": 188030 }, { "epoch": 9.339425846826265, "grad_norm": 0.189453125, "learning_rate": 5.284990563226383e-05, "loss": 0.4694, "step": 188040 }, { "epoch": 9.339922519121883, "grad_norm": 0.2021484375, "learning_rate": 5.281017184861429e-05, "loss": 0.5002, "step": 188050 }, { "epoch": 9.340419191417503, "grad_norm": 0.2333984375, "learning_rate": 5.277043806496473e-05, "loss": 0.4877, "step": 188060 }, { "epoch": 9.340915863713121, "grad_norm": 0.1943359375, "learning_rate": 5.273070428131519e-05, "loss": 0.4835, "step": 188070 }, { "epoch": 9.341412536008741, "grad_norm": 0.2099609375, "learning_rate": 5.269097049766565e-05, "loss": 0.5024, "step": 188080 }, { "epoch": 9.341909208304362, "grad_norm": 0.203125, "learning_rate": 5.265123671401609e-05, "loss": 0.4623, "step": 188090 }, { "epoch": 9.34240588059998, "grad_norm": 0.228515625, "learning_rate": 5.261150293036655e-05, "loss": 0.5086, "step": 188100 }, { "epoch": 9.3429025528956, "grad_norm": 0.2001953125, "learning_rate": 5.2571769146716995e-05, "loss": 0.4834, "step": 188110 }, { "epoch": 9.343399225191218, "grad_norm": 0.1923828125, "learning_rate": 5.253203536306745e-05, "loss": 0.4931, "step": 188120 }, { "epoch": 9.343895897486838, "grad_norm": 0.201171875, "learning_rate": 5.2492301579417904e-05, "loss": 0.4896, "step": 188130 }, { "epoch": 9.344392569782457, "grad_norm": 0.1953125, "learning_rate": 5.245256779576836e-05, "loss": 0.4814, "step": 188140 }, { "epoch": 9.344889242078077, "grad_norm": 0.21484375, "learning_rate": 5.2412834012118806e-05, "loss": 0.4714, "step": 188150 }, { "epoch": 9.345385914373697, "grad_norm": 0.1884765625, "learning_rate": 5.2373100228469263e-05, "loss": 0.4631, "step": 188160 }, { "epoch": 9.345882586669315, "grad_norm": 0.21484375, "learning_rate": 5.233336644481971e-05, "loss": 0.4694, "step": 188170 }, { "epoch": 9.346379258964935, "grad_norm": 0.2021484375, "learning_rate": 5.2293632661170166e-05, "loss": 0.5235, "step": 188180 }, { "epoch": 9.346875931260554, "grad_norm": 0.2216796875, "learning_rate": 5.225389887752061e-05, "loss": 0.4933, "step": 188190 }, { "epoch": 9.347372603556174, "grad_norm": 0.2109375, "learning_rate": 5.221416509387107e-05, "loss": 0.4818, "step": 188200 }, { "epoch": 9.347869275851792, "grad_norm": 0.197265625, "learning_rate": 5.217443131022152e-05, "loss": 0.4755, "step": 188210 }, { "epoch": 9.348365948147412, "grad_norm": 0.1982421875, "learning_rate": 5.2134697526571976e-05, "loss": 0.5092, "step": 188220 }, { "epoch": 9.348862620443033, "grad_norm": 0.1748046875, "learning_rate": 5.209496374292242e-05, "loss": 0.5006, "step": 188230 }, { "epoch": 9.34935929273865, "grad_norm": 0.1943359375, "learning_rate": 5.205522995927288e-05, "loss": 0.4579, "step": 188240 }, { "epoch": 9.349855965034271, "grad_norm": 0.1943359375, "learning_rate": 5.201549617562332e-05, "loss": 0.4728, "step": 188250 }, { "epoch": 9.35035263732989, "grad_norm": 0.1865234375, "learning_rate": 5.197576239197378e-05, "loss": 0.4806, "step": 188260 }, { "epoch": 9.35084930962551, "grad_norm": 0.1943359375, "learning_rate": 5.1936028608324225e-05, "loss": 0.4834, "step": 188270 }, { "epoch": 9.351345981921128, "grad_norm": 0.1904296875, "learning_rate": 5.189629482467468e-05, "loss": 0.4668, "step": 188280 }, { "epoch": 9.351842654216748, "grad_norm": 0.1953125, "learning_rate": 5.1856561041025134e-05, "loss": 0.4714, "step": 188290 }, { "epoch": 9.352339326512368, "grad_norm": 0.232421875, "learning_rate": 5.181682725737559e-05, "loss": 0.4732, "step": 188300 }, { "epoch": 9.352835998807986, "grad_norm": 0.216796875, "learning_rate": 5.1777093473726036e-05, "loss": 0.5134, "step": 188310 }, { "epoch": 9.353332671103606, "grad_norm": 0.22265625, "learning_rate": 5.1737359690076493e-05, "loss": 0.4875, "step": 188320 }, { "epoch": 9.353829343399225, "grad_norm": 0.22265625, "learning_rate": 5.169762590642694e-05, "loss": 0.513, "step": 188330 }, { "epoch": 9.354326015694845, "grad_norm": 0.2314453125, "learning_rate": 5.1657892122777396e-05, "loss": 0.4639, "step": 188340 }, { "epoch": 9.354822687990463, "grad_norm": 0.193359375, "learning_rate": 5.161815833912784e-05, "loss": 0.5193, "step": 188350 }, { "epoch": 9.355319360286083, "grad_norm": 0.20703125, "learning_rate": 5.15784245554783e-05, "loss": 0.4933, "step": 188360 }, { "epoch": 9.355816032581703, "grad_norm": 0.197265625, "learning_rate": 5.153869077182875e-05, "loss": 0.4735, "step": 188370 }, { "epoch": 9.356312704877322, "grad_norm": 0.212890625, "learning_rate": 5.1498956988179206e-05, "loss": 0.4596, "step": 188380 }, { "epoch": 9.356809377172942, "grad_norm": 0.1904296875, "learning_rate": 5.145922320452965e-05, "loss": 0.4963, "step": 188390 }, { "epoch": 9.35730604946856, "grad_norm": 0.189453125, "learning_rate": 5.141948942088011e-05, "loss": 0.5084, "step": 188400 }, { "epoch": 9.35780272176418, "grad_norm": 0.19921875, "learning_rate": 5.137975563723055e-05, "loss": 0.4659, "step": 188410 }, { "epoch": 9.358299394059799, "grad_norm": 0.1923828125, "learning_rate": 5.134002185358101e-05, "loss": 0.4687, "step": 188420 }, { "epoch": 9.358796066355419, "grad_norm": 0.205078125, "learning_rate": 5.1300288069931455e-05, "loss": 0.4858, "step": 188430 }, { "epoch": 9.359292738651039, "grad_norm": 0.2080078125, "learning_rate": 5.126055428628191e-05, "loss": 0.5013, "step": 188440 }, { "epoch": 9.359789410946657, "grad_norm": 0.2373046875, "learning_rate": 5.1220820502632364e-05, "loss": 0.5071, "step": 188450 }, { "epoch": 9.360286083242277, "grad_norm": 0.2109375, "learning_rate": 5.118108671898282e-05, "loss": 0.5256, "step": 188460 }, { "epoch": 9.360782755537896, "grad_norm": 0.1962890625, "learning_rate": 5.1141352935333266e-05, "loss": 0.49, "step": 188470 }, { "epoch": 9.361279427833516, "grad_norm": 0.1923828125, "learning_rate": 5.1101619151683723e-05, "loss": 0.4787, "step": 188480 }, { "epoch": 9.361776100129134, "grad_norm": 0.2314453125, "learning_rate": 5.106188536803417e-05, "loss": 0.4634, "step": 188490 }, { "epoch": 9.362272772424754, "grad_norm": 0.21484375, "learning_rate": 5.1022151584384626e-05, "loss": 0.4745, "step": 188500 }, { "epoch": 9.362769444720373, "grad_norm": 0.2041015625, "learning_rate": 5.098241780073508e-05, "loss": 0.4861, "step": 188510 }, { "epoch": 9.363266117015993, "grad_norm": 0.2021484375, "learning_rate": 5.094268401708553e-05, "loss": 0.5009, "step": 188520 }, { "epoch": 9.363762789311613, "grad_norm": 0.2392578125, "learning_rate": 5.0902950233435985e-05, "loss": 0.5162, "step": 188530 }, { "epoch": 9.364259461607231, "grad_norm": 0.212890625, "learning_rate": 5.086321644978643e-05, "loss": 0.4731, "step": 188540 }, { "epoch": 9.364756133902851, "grad_norm": 0.216796875, "learning_rate": 5.082348266613689e-05, "loss": 0.4853, "step": 188550 }, { "epoch": 9.36525280619847, "grad_norm": 0.2080078125, "learning_rate": 5.078374888248734e-05, "loss": 0.4853, "step": 188560 }, { "epoch": 9.36574947849409, "grad_norm": 0.1982421875, "learning_rate": 5.0744015098837796e-05, "loss": 0.519, "step": 188570 }, { "epoch": 9.366246150789708, "grad_norm": 0.193359375, "learning_rate": 5.070428131518824e-05, "loss": 0.4911, "step": 188580 }, { "epoch": 9.366742823085328, "grad_norm": 0.234375, "learning_rate": 5.06645475315387e-05, "loss": 0.4996, "step": 188590 }, { "epoch": 9.367239495380948, "grad_norm": 0.1923828125, "learning_rate": 5.062481374788914e-05, "loss": 0.4864, "step": 188600 }, { "epoch": 9.367736167676567, "grad_norm": 0.203125, "learning_rate": 5.05850799642396e-05, "loss": 0.4807, "step": 188610 }, { "epoch": 9.368232839972187, "grad_norm": 0.1845703125, "learning_rate": 5.0545346180590045e-05, "loss": 0.4658, "step": 188620 }, { "epoch": 9.368729512267805, "grad_norm": 0.19921875, "learning_rate": 5.05056123969405e-05, "loss": 0.4984, "step": 188630 }, { "epoch": 9.369226184563425, "grad_norm": 0.212890625, "learning_rate": 5.0465878613290953e-05, "loss": 0.4678, "step": 188640 }, { "epoch": 9.369722856859044, "grad_norm": 0.1943359375, "learning_rate": 5.042614482964141e-05, "loss": 0.4779, "step": 188650 }, { "epoch": 9.370219529154664, "grad_norm": 0.185546875, "learning_rate": 5.0386411045991855e-05, "loss": 0.4985, "step": 188660 }, { "epoch": 9.370716201450284, "grad_norm": 0.2109375, "learning_rate": 5.034667726234231e-05, "loss": 0.4886, "step": 188670 }, { "epoch": 9.371212873745902, "grad_norm": 0.212890625, "learning_rate": 5.030694347869276e-05, "loss": 0.5163, "step": 188680 }, { "epoch": 9.371709546041522, "grad_norm": 0.197265625, "learning_rate": 5.0267209695043215e-05, "loss": 0.4977, "step": 188690 }, { "epoch": 9.37220621833714, "grad_norm": 0.2431640625, "learning_rate": 5.022747591139366e-05, "loss": 0.5211, "step": 188700 }, { "epoch": 9.37270289063276, "grad_norm": 0.1953125, "learning_rate": 5.018774212774412e-05, "loss": 0.4903, "step": 188710 }, { "epoch": 9.373199562928379, "grad_norm": 0.1875, "learning_rate": 5.014800834409457e-05, "loss": 0.4677, "step": 188720 }, { "epoch": 9.373696235224, "grad_norm": 0.1923828125, "learning_rate": 5.0108274560445026e-05, "loss": 0.4746, "step": 188730 }, { "epoch": 9.37419290751962, "grad_norm": 0.193359375, "learning_rate": 5.006854077679547e-05, "loss": 0.4586, "step": 188740 }, { "epoch": 9.374689579815238, "grad_norm": 0.185546875, "learning_rate": 5.002880699314593e-05, "loss": 0.4692, "step": 188750 }, { "epoch": 9.375186252110858, "grad_norm": 0.208984375, "learning_rate": 4.998907320949638e-05, "loss": 0.4933, "step": 188760 }, { "epoch": 9.375682924406476, "grad_norm": 0.25, "learning_rate": 4.994933942584683e-05, "loss": 0.493, "step": 188770 }, { "epoch": 9.376179596702096, "grad_norm": 0.19921875, "learning_rate": 4.990960564219728e-05, "loss": 0.4864, "step": 188780 }, { "epoch": 9.376676268997715, "grad_norm": 0.21484375, "learning_rate": 4.986987185854773e-05, "loss": 0.4733, "step": 188790 }, { "epoch": 9.377172941293335, "grad_norm": 0.2265625, "learning_rate": 4.9830138074898183e-05, "loss": 0.5183, "step": 188800 }, { "epoch": 9.377669613588955, "grad_norm": 0.19921875, "learning_rate": 4.979040429124864e-05, "loss": 0.4848, "step": 188810 }, { "epoch": 9.378166285884573, "grad_norm": 0.1904296875, "learning_rate": 4.975067050759909e-05, "loss": 0.5142, "step": 188820 }, { "epoch": 9.378662958180193, "grad_norm": 0.21875, "learning_rate": 4.971093672394954e-05, "loss": 0.4965, "step": 188830 }, { "epoch": 9.379159630475812, "grad_norm": 0.2099609375, "learning_rate": 4.9671202940299994e-05, "loss": 0.4981, "step": 188840 }, { "epoch": 9.379656302771432, "grad_norm": 0.17578125, "learning_rate": 4.9631469156650445e-05, "loss": 0.4926, "step": 188850 }, { "epoch": 9.38015297506705, "grad_norm": 0.197265625, "learning_rate": 4.9591735373000896e-05, "loss": 0.5015, "step": 188860 }, { "epoch": 9.38064964736267, "grad_norm": 0.189453125, "learning_rate": 4.955200158935135e-05, "loss": 0.5019, "step": 188870 }, { "epoch": 9.38114631965829, "grad_norm": 0.201171875, "learning_rate": 4.95122678057018e-05, "loss": 0.5043, "step": 188880 }, { "epoch": 9.381642991953909, "grad_norm": 0.1982421875, "learning_rate": 4.947253402205225e-05, "loss": 0.4871, "step": 188890 }, { "epoch": 9.382139664249529, "grad_norm": 0.2001953125, "learning_rate": 4.943280023840271e-05, "loss": 0.499, "step": 188900 }, { "epoch": 9.382636336545147, "grad_norm": 0.1796875, "learning_rate": 4.939306645475316e-05, "loss": 0.4528, "step": 188910 }, { "epoch": 9.383133008840767, "grad_norm": 0.1865234375, "learning_rate": 4.935333267110361e-05, "loss": 0.483, "step": 188920 }, { "epoch": 9.383629681136386, "grad_norm": 0.216796875, "learning_rate": 4.931359888745406e-05, "loss": 0.4871, "step": 188930 }, { "epoch": 9.384126353432006, "grad_norm": 0.1689453125, "learning_rate": 4.927386510380451e-05, "loss": 0.469, "step": 188940 }, { "epoch": 9.384623025727626, "grad_norm": 0.2109375, "learning_rate": 4.923413132015496e-05, "loss": 0.4758, "step": 188950 }, { "epoch": 9.385119698023244, "grad_norm": 0.248046875, "learning_rate": 4.9194397536505413e-05, "loss": 0.4691, "step": 188960 }, { "epoch": 9.385616370318864, "grad_norm": 0.2373046875, "learning_rate": 4.9154663752855864e-05, "loss": 0.4728, "step": 188970 }, { "epoch": 9.386113042614483, "grad_norm": 0.2216796875, "learning_rate": 4.911492996920632e-05, "loss": 0.4822, "step": 188980 }, { "epoch": 9.386609714910103, "grad_norm": 0.193359375, "learning_rate": 4.907519618555677e-05, "loss": 0.4673, "step": 188990 }, { "epoch": 9.387106387205721, "grad_norm": 0.1806640625, "learning_rate": 4.9035462401907224e-05, "loss": 0.4952, "step": 189000 }, { "epoch": 9.387603059501341, "grad_norm": 0.19140625, "learning_rate": 4.8995728618257675e-05, "loss": 0.4755, "step": 189010 }, { "epoch": 9.388099731796961, "grad_norm": 0.2021484375, "learning_rate": 4.8955994834608126e-05, "loss": 0.5159, "step": 189020 }, { "epoch": 9.38859640409258, "grad_norm": 0.205078125, "learning_rate": 4.891626105095858e-05, "loss": 0.4792, "step": 189030 }, { "epoch": 9.3890930763882, "grad_norm": 0.201171875, "learning_rate": 4.887652726730903e-05, "loss": 0.4912, "step": 189040 }, { "epoch": 9.389589748683818, "grad_norm": 0.2216796875, "learning_rate": 4.883679348365948e-05, "loss": 0.4851, "step": 189050 }, { "epoch": 9.390086420979438, "grad_norm": 0.265625, "learning_rate": 4.879705970000994e-05, "loss": 0.5188, "step": 189060 }, { "epoch": 9.390583093275056, "grad_norm": 0.1845703125, "learning_rate": 4.875732591636039e-05, "loss": 0.4958, "step": 189070 }, { "epoch": 9.391079765570677, "grad_norm": 0.2255859375, "learning_rate": 4.871759213271084e-05, "loss": 0.4566, "step": 189080 }, { "epoch": 9.391576437866295, "grad_norm": 0.2041015625, "learning_rate": 4.867785834906129e-05, "loss": 0.485, "step": 189090 }, { "epoch": 9.392073110161915, "grad_norm": 0.208984375, "learning_rate": 4.863812456541174e-05, "loss": 0.4859, "step": 189100 }, { "epoch": 9.392569782457535, "grad_norm": 0.203125, "learning_rate": 4.859839078176219e-05, "loss": 0.4685, "step": 189110 }, { "epoch": 9.393066454753153, "grad_norm": 0.203125, "learning_rate": 4.855865699811264e-05, "loss": 0.4739, "step": 189120 }, { "epoch": 9.393563127048774, "grad_norm": 0.1875, "learning_rate": 4.85189232144631e-05, "loss": 0.464, "step": 189130 }, { "epoch": 9.394059799344392, "grad_norm": 0.220703125, "learning_rate": 4.847918943081355e-05, "loss": 0.5047, "step": 189140 }, { "epoch": 9.394556471640012, "grad_norm": 0.2421875, "learning_rate": 4.8439455647164e-05, "loss": 0.5102, "step": 189150 }, { "epoch": 9.39505314393563, "grad_norm": 0.197265625, "learning_rate": 4.839972186351446e-05, "loss": 0.4791, "step": 189160 }, { "epoch": 9.39554981623125, "grad_norm": 0.2216796875, "learning_rate": 4.835998807986491e-05, "loss": 0.4723, "step": 189170 }, { "epoch": 9.39604648852687, "grad_norm": 0.1875, "learning_rate": 4.832025429621536e-05, "loss": 0.5001, "step": 189180 }, { "epoch": 9.396543160822489, "grad_norm": 0.208984375, "learning_rate": 4.8280520512565814e-05, "loss": 0.4919, "step": 189190 }, { "epoch": 9.397039833118109, "grad_norm": 0.2421875, "learning_rate": 4.8240786728916265e-05, "loss": 0.531, "step": 189200 }, { "epoch": 9.397536505413727, "grad_norm": 0.1943359375, "learning_rate": 4.8201052945266716e-05, "loss": 0.4866, "step": 189210 }, { "epoch": 9.398033177709348, "grad_norm": 0.2236328125, "learning_rate": 4.816131916161717e-05, "loss": 0.4984, "step": 189220 }, { "epoch": 9.398529850004966, "grad_norm": 0.2099609375, "learning_rate": 4.812158537796762e-05, "loss": 0.466, "step": 189230 }, { "epoch": 9.399026522300586, "grad_norm": 0.2041015625, "learning_rate": 4.808185159431807e-05, "loss": 0.5262, "step": 189240 }, { "epoch": 9.399523194596206, "grad_norm": 0.185546875, "learning_rate": 4.804211781066853e-05, "loss": 0.4608, "step": 189250 }, { "epoch": 9.400019866891824, "grad_norm": 0.2109375, "learning_rate": 4.800238402701898e-05, "loss": 0.4591, "step": 189260 }, { "epoch": 9.400516539187445, "grad_norm": 0.208984375, "learning_rate": 4.796265024336943e-05, "loss": 0.4883, "step": 189270 }, { "epoch": 9.401013211483063, "grad_norm": 0.2275390625, "learning_rate": 4.792291645971988e-05, "loss": 0.4759, "step": 189280 }, { "epoch": 9.401509883778683, "grad_norm": 0.201171875, "learning_rate": 4.788318267607033e-05, "loss": 0.4968, "step": 189290 }, { "epoch": 9.402006556074301, "grad_norm": 0.21875, "learning_rate": 4.784344889242078e-05, "loss": 0.4804, "step": 189300 }, { "epoch": 9.402503228369921, "grad_norm": 0.2099609375, "learning_rate": 4.780371510877123e-05, "loss": 0.4754, "step": 189310 }, { "epoch": 9.402999900665542, "grad_norm": 0.212890625, "learning_rate": 4.7763981325121684e-05, "loss": 0.4695, "step": 189320 }, { "epoch": 9.40349657296116, "grad_norm": 0.205078125, "learning_rate": 4.772424754147214e-05, "loss": 0.4796, "step": 189330 }, { "epoch": 9.40399324525678, "grad_norm": 0.208984375, "learning_rate": 4.768451375782259e-05, "loss": 0.4904, "step": 189340 }, { "epoch": 9.404489917552398, "grad_norm": 0.1962890625, "learning_rate": 4.7644779974173044e-05, "loss": 0.4853, "step": 189350 }, { "epoch": 9.404986589848018, "grad_norm": 0.26171875, "learning_rate": 4.7605046190523495e-05, "loss": 0.513, "step": 189360 }, { "epoch": 9.405483262143637, "grad_norm": 0.2138671875, "learning_rate": 4.7565312406873946e-05, "loss": 0.4981, "step": 189370 }, { "epoch": 9.405979934439257, "grad_norm": 0.19921875, "learning_rate": 4.75255786232244e-05, "loss": 0.4926, "step": 189380 }, { "epoch": 9.406476606734877, "grad_norm": 0.216796875, "learning_rate": 4.748584483957485e-05, "loss": 0.4727, "step": 189390 }, { "epoch": 9.406973279030495, "grad_norm": 0.193359375, "learning_rate": 4.74461110559253e-05, "loss": 0.4861, "step": 189400 }, { "epoch": 9.407469951326116, "grad_norm": 0.216796875, "learning_rate": 4.740637727227576e-05, "loss": 0.5027, "step": 189410 }, { "epoch": 9.407966623621734, "grad_norm": 0.189453125, "learning_rate": 4.736664348862621e-05, "loss": 0.5004, "step": 189420 }, { "epoch": 9.408463295917354, "grad_norm": 0.1865234375, "learning_rate": 4.732690970497666e-05, "loss": 0.5357, "step": 189430 }, { "epoch": 9.408959968212972, "grad_norm": 0.19140625, "learning_rate": 4.728717592132711e-05, "loss": 0.4972, "step": 189440 }, { "epoch": 9.409456640508592, "grad_norm": 0.2275390625, "learning_rate": 4.724744213767756e-05, "loss": 0.5064, "step": 189450 }, { "epoch": 9.409953312804213, "grad_norm": 0.19921875, "learning_rate": 4.720770835402801e-05, "loss": 0.4709, "step": 189460 }, { "epoch": 9.41044998509983, "grad_norm": 0.1767578125, "learning_rate": 4.716797457037846e-05, "loss": 0.4683, "step": 189470 }, { "epoch": 9.410946657395451, "grad_norm": 0.271484375, "learning_rate": 4.7128240786728914e-05, "loss": 0.5011, "step": 189480 }, { "epoch": 9.41144332969107, "grad_norm": 0.2109375, "learning_rate": 4.7088507003079365e-05, "loss": 0.5058, "step": 189490 }, { "epoch": 9.41194000198669, "grad_norm": 0.208984375, "learning_rate": 4.704877321942982e-05, "loss": 0.5098, "step": 189500 }, { "epoch": 9.412436674282308, "grad_norm": 0.22265625, "learning_rate": 4.7009039435780274e-05, "loss": 0.4796, "step": 189510 }, { "epoch": 9.412933346577928, "grad_norm": 0.224609375, "learning_rate": 4.6969305652130725e-05, "loss": 0.5047, "step": 189520 }, { "epoch": 9.413430018873548, "grad_norm": 0.1884765625, "learning_rate": 4.6929571868481176e-05, "loss": 0.4659, "step": 189530 }, { "epoch": 9.413926691169166, "grad_norm": 0.197265625, "learning_rate": 4.688983808483163e-05, "loss": 0.471, "step": 189540 }, { "epoch": 9.414423363464786, "grad_norm": 0.1875, "learning_rate": 4.6850104301182085e-05, "loss": 0.4838, "step": 189550 }, { "epoch": 9.414920035760405, "grad_norm": 0.2177734375, "learning_rate": 4.6810370517532536e-05, "loss": 0.4829, "step": 189560 }, { "epoch": 9.415416708056025, "grad_norm": 0.2080078125, "learning_rate": 4.677063673388299e-05, "loss": 0.4788, "step": 189570 }, { "epoch": 9.415913380351643, "grad_norm": 0.2080078125, "learning_rate": 4.673090295023344e-05, "loss": 0.4913, "step": 189580 }, { "epoch": 9.416410052647263, "grad_norm": 0.236328125, "learning_rate": 4.669116916658389e-05, "loss": 0.5042, "step": 189590 }, { "epoch": 9.416906724942883, "grad_norm": 0.1982421875, "learning_rate": 4.665143538293435e-05, "loss": 0.4967, "step": 189600 }, { "epoch": 9.417403397238502, "grad_norm": 0.1923828125, "learning_rate": 4.66117015992848e-05, "loss": 0.4489, "step": 189610 }, { "epoch": 9.417900069534122, "grad_norm": 0.216796875, "learning_rate": 4.657196781563525e-05, "loss": 0.5097, "step": 189620 }, { "epoch": 9.41839674182974, "grad_norm": 0.22265625, "learning_rate": 4.65322340319857e-05, "loss": 0.4542, "step": 189630 }, { "epoch": 9.41889341412536, "grad_norm": 0.2119140625, "learning_rate": 4.649250024833615e-05, "loss": 0.4861, "step": 189640 }, { "epoch": 9.419390086420979, "grad_norm": 0.255859375, "learning_rate": 4.64527664646866e-05, "loss": 0.4727, "step": 189650 }, { "epoch": 9.419886758716599, "grad_norm": 0.1826171875, "learning_rate": 4.641303268103705e-05, "loss": 0.4634, "step": 189660 }, { "epoch": 9.420383431012219, "grad_norm": 0.19921875, "learning_rate": 4.6373298897387504e-05, "loss": 0.4895, "step": 189670 }, { "epoch": 9.420880103307837, "grad_norm": 0.2119140625, "learning_rate": 4.633356511373796e-05, "loss": 0.5143, "step": 189680 }, { "epoch": 9.421376775603457, "grad_norm": 0.193359375, "learning_rate": 4.629383133008841e-05, "loss": 0.4595, "step": 189690 }, { "epoch": 9.421873447899076, "grad_norm": 0.193359375, "learning_rate": 4.6254097546438864e-05, "loss": 0.501, "step": 189700 }, { "epoch": 9.422370120194696, "grad_norm": 0.2158203125, "learning_rate": 4.6214363762789315e-05, "loss": 0.4674, "step": 189710 }, { "epoch": 9.422866792490314, "grad_norm": 0.2177734375, "learning_rate": 4.6174629979139766e-05, "loss": 0.5058, "step": 189720 }, { "epoch": 9.423363464785934, "grad_norm": 0.224609375, "learning_rate": 4.613489619549022e-05, "loss": 0.4846, "step": 189730 }, { "epoch": 9.423860137081554, "grad_norm": 0.1845703125, "learning_rate": 4.609516241184067e-05, "loss": 0.4785, "step": 189740 }, { "epoch": 9.424356809377173, "grad_norm": 0.19921875, "learning_rate": 4.605542862819112e-05, "loss": 0.5165, "step": 189750 }, { "epoch": 9.424853481672793, "grad_norm": 0.19921875, "learning_rate": 4.601569484454158e-05, "loss": 0.4946, "step": 189760 }, { "epoch": 9.425350153968411, "grad_norm": 0.1982421875, "learning_rate": 4.597596106089203e-05, "loss": 0.5008, "step": 189770 }, { "epoch": 9.425846826264031, "grad_norm": 0.1962890625, "learning_rate": 4.593622727724248e-05, "loss": 0.5056, "step": 189780 }, { "epoch": 9.42634349855965, "grad_norm": 0.224609375, "learning_rate": 4.589649349359293e-05, "loss": 0.4819, "step": 189790 }, { "epoch": 9.42684017085527, "grad_norm": 0.185546875, "learning_rate": 4.585675970994338e-05, "loss": 0.4559, "step": 189800 }, { "epoch": 9.42733684315089, "grad_norm": 0.2109375, "learning_rate": 4.581702592629383e-05, "loss": 0.4852, "step": 189810 }, { "epoch": 9.427833515446508, "grad_norm": 0.1923828125, "learning_rate": 4.577729214264428e-05, "loss": 0.4828, "step": 189820 }, { "epoch": 9.428330187742128, "grad_norm": 0.2021484375, "learning_rate": 4.5737558358994734e-05, "loss": 0.5205, "step": 189830 }, { "epoch": 9.428826860037747, "grad_norm": 0.1982421875, "learning_rate": 4.5697824575345185e-05, "loss": 0.4804, "step": 189840 }, { "epoch": 9.429323532333367, "grad_norm": 0.201171875, "learning_rate": 4.565809079169564e-05, "loss": 0.5254, "step": 189850 }, { "epoch": 9.429820204628985, "grad_norm": 0.18359375, "learning_rate": 4.5618357008046094e-05, "loss": 0.49, "step": 189860 }, { "epoch": 9.430316876924605, "grad_norm": 0.1953125, "learning_rate": 4.5578623224396545e-05, "loss": 0.4915, "step": 189870 }, { "epoch": 9.430813549220225, "grad_norm": 0.189453125, "learning_rate": 4.5538889440746996e-05, "loss": 0.4658, "step": 189880 }, { "epoch": 9.431310221515844, "grad_norm": 0.2197265625, "learning_rate": 4.549915565709745e-05, "loss": 0.5017, "step": 189890 }, { "epoch": 9.431806893811464, "grad_norm": 0.212890625, "learning_rate": 4.54594218734479e-05, "loss": 0.5033, "step": 189900 }, { "epoch": 9.432303566107082, "grad_norm": 0.224609375, "learning_rate": 4.541968808979835e-05, "loss": 0.4766, "step": 189910 }, { "epoch": 9.432800238402702, "grad_norm": 0.1826171875, "learning_rate": 4.53799543061488e-05, "loss": 0.4802, "step": 189920 }, { "epoch": 9.43329691069832, "grad_norm": 0.203125, "learning_rate": 4.534022052249926e-05, "loss": 0.4642, "step": 189930 }, { "epoch": 9.43379358299394, "grad_norm": 0.2197265625, "learning_rate": 4.530048673884971e-05, "loss": 0.4749, "step": 189940 }, { "epoch": 9.43429025528956, "grad_norm": 0.2197265625, "learning_rate": 4.526075295520016e-05, "loss": 0.4763, "step": 189950 }, { "epoch": 9.43478692758518, "grad_norm": 0.1953125, "learning_rate": 4.522101917155061e-05, "loss": 0.4624, "step": 189960 }, { "epoch": 9.4352835998808, "grad_norm": 0.208984375, "learning_rate": 4.518128538790107e-05, "loss": 0.4999, "step": 189970 }, { "epoch": 9.435780272176418, "grad_norm": 0.205078125, "learning_rate": 4.514155160425152e-05, "loss": 0.5081, "step": 189980 }, { "epoch": 9.436276944472038, "grad_norm": 0.2080078125, "learning_rate": 4.510181782060197e-05, "loss": 0.4671, "step": 189990 }, { "epoch": 9.436773616767656, "grad_norm": 0.267578125, "learning_rate": 4.506208403695242e-05, "loss": 0.4814, "step": 190000 }, { "epoch": 9.437270289063276, "grad_norm": 0.2099609375, "learning_rate": 4.502235025330287e-05, "loss": 0.481, "step": 190010 }, { "epoch": 9.437766961358895, "grad_norm": 0.1904296875, "learning_rate": 4.4982616469653324e-05, "loss": 0.4711, "step": 190020 }, { "epoch": 9.438263633654515, "grad_norm": 0.1904296875, "learning_rate": 4.494288268600378e-05, "loss": 0.4816, "step": 190030 }, { "epoch": 9.438760305950135, "grad_norm": 0.205078125, "learning_rate": 4.490314890235423e-05, "loss": 0.4984, "step": 190040 }, { "epoch": 9.439256978245753, "grad_norm": 0.234375, "learning_rate": 4.4863415118704684e-05, "loss": 0.4892, "step": 190050 }, { "epoch": 9.439753650541373, "grad_norm": 0.2021484375, "learning_rate": 4.4823681335055135e-05, "loss": 0.4802, "step": 190060 }, { "epoch": 9.440250322836992, "grad_norm": 0.21484375, "learning_rate": 4.4783947551405586e-05, "loss": 0.4801, "step": 190070 }, { "epoch": 9.440746995132612, "grad_norm": 0.2490234375, "learning_rate": 4.474421376775604e-05, "loss": 0.4821, "step": 190080 }, { "epoch": 9.44124366742823, "grad_norm": 0.2421875, "learning_rate": 4.470447998410649e-05, "loss": 0.4888, "step": 190090 }, { "epoch": 9.44174033972385, "grad_norm": 0.1923828125, "learning_rate": 4.466474620045694e-05, "loss": 0.4631, "step": 190100 }, { "epoch": 9.44223701201947, "grad_norm": 0.2421875, "learning_rate": 4.46250124168074e-05, "loss": 0.4987, "step": 190110 }, { "epoch": 9.442733684315089, "grad_norm": 0.2060546875, "learning_rate": 4.458527863315785e-05, "loss": 0.4833, "step": 190120 }, { "epoch": 9.443230356610709, "grad_norm": 0.1953125, "learning_rate": 4.45455448495083e-05, "loss": 0.4911, "step": 190130 }, { "epoch": 9.443727028906327, "grad_norm": 0.2197265625, "learning_rate": 4.450581106585875e-05, "loss": 0.4735, "step": 190140 }, { "epoch": 9.444223701201947, "grad_norm": 0.19140625, "learning_rate": 4.44660772822092e-05, "loss": 0.4937, "step": 190150 }, { "epoch": 9.444720373497566, "grad_norm": 0.189453125, "learning_rate": 4.442634349855965e-05, "loss": 0.47, "step": 190160 }, { "epoch": 9.445217045793186, "grad_norm": 0.1884765625, "learning_rate": 4.43866097149101e-05, "loss": 0.4796, "step": 190170 }, { "epoch": 9.445713718088806, "grad_norm": 0.181640625, "learning_rate": 4.4346875931260554e-05, "loss": 0.4912, "step": 190180 }, { "epoch": 9.446210390384424, "grad_norm": 0.2021484375, "learning_rate": 4.4307142147611005e-05, "loss": 0.5073, "step": 190190 }, { "epoch": 9.446707062680044, "grad_norm": 0.205078125, "learning_rate": 4.426740836396146e-05, "loss": 0.4368, "step": 190200 }, { "epoch": 9.447203734975663, "grad_norm": 0.234375, "learning_rate": 4.4227674580311914e-05, "loss": 0.5063, "step": 190210 }, { "epoch": 9.447700407271283, "grad_norm": 0.1953125, "learning_rate": 4.4187940796662365e-05, "loss": 0.4876, "step": 190220 }, { "epoch": 9.448197079566901, "grad_norm": 0.1953125, "learning_rate": 4.4148207013012816e-05, "loss": 0.4991, "step": 190230 }, { "epoch": 9.448693751862521, "grad_norm": 0.1953125, "learning_rate": 4.410847322936327e-05, "loss": 0.475, "step": 190240 }, { "epoch": 9.449190424158141, "grad_norm": 0.2216796875, "learning_rate": 4.406873944571372e-05, "loss": 0.5007, "step": 190250 }, { "epoch": 9.44968709645376, "grad_norm": 0.208984375, "learning_rate": 4.402900566206417e-05, "loss": 0.4957, "step": 190260 }, { "epoch": 9.45018376874938, "grad_norm": 0.205078125, "learning_rate": 4.398927187841462e-05, "loss": 0.474, "step": 190270 }, { "epoch": 9.450680441044998, "grad_norm": 0.2119140625, "learning_rate": 4.394953809476508e-05, "loss": 0.4743, "step": 190280 }, { "epoch": 9.451177113340618, "grad_norm": 0.2109375, "learning_rate": 4.390980431111553e-05, "loss": 0.4712, "step": 190290 }, { "epoch": 9.451673785636237, "grad_norm": 0.19921875, "learning_rate": 4.387007052746598e-05, "loss": 0.458, "step": 190300 }, { "epoch": 9.452170457931857, "grad_norm": 0.19921875, "learning_rate": 4.383033674381643e-05, "loss": 0.4802, "step": 190310 }, { "epoch": 9.452667130227477, "grad_norm": 0.2001953125, "learning_rate": 4.379060296016688e-05, "loss": 0.4896, "step": 190320 }, { "epoch": 9.453163802523095, "grad_norm": 0.2158203125, "learning_rate": 4.375086917651733e-05, "loss": 0.4979, "step": 190330 }, { "epoch": 9.453660474818715, "grad_norm": 0.2021484375, "learning_rate": 4.3711135392867784e-05, "loss": 0.5046, "step": 190340 }, { "epoch": 9.454157147114334, "grad_norm": 0.216796875, "learning_rate": 4.3671401609218235e-05, "loss": 0.4758, "step": 190350 }, { "epoch": 9.454653819409954, "grad_norm": 0.2294921875, "learning_rate": 4.363166782556869e-05, "loss": 0.4676, "step": 190360 }, { "epoch": 9.455150491705572, "grad_norm": 0.2021484375, "learning_rate": 4.3591934041919144e-05, "loss": 0.4821, "step": 190370 }, { "epoch": 9.455647164001192, "grad_norm": 0.21875, "learning_rate": 4.3552200258269595e-05, "loss": 0.5047, "step": 190380 }, { "epoch": 9.456143836296812, "grad_norm": 0.2001953125, "learning_rate": 4.351246647462005e-05, "loss": 0.5165, "step": 190390 }, { "epoch": 9.45664050859243, "grad_norm": 0.2041015625, "learning_rate": 4.3472732690970504e-05, "loss": 0.4672, "step": 190400 }, { "epoch": 9.45713718088805, "grad_norm": 0.1953125, "learning_rate": 4.3432998907320955e-05, "loss": 0.472, "step": 190410 }, { "epoch": 9.457633853183669, "grad_norm": 0.208984375, "learning_rate": 4.3393265123671406e-05, "loss": 0.4751, "step": 190420 }, { "epoch": 9.45813052547929, "grad_norm": 0.185546875, "learning_rate": 4.335353134002186e-05, "loss": 0.4832, "step": 190430 }, { "epoch": 9.458627197774907, "grad_norm": 0.228515625, "learning_rate": 4.331379755637231e-05, "loss": 0.5197, "step": 190440 }, { "epoch": 9.459123870070528, "grad_norm": 0.21484375, "learning_rate": 4.327406377272276e-05, "loss": 0.5084, "step": 190450 }, { "epoch": 9.459620542366146, "grad_norm": 0.1884765625, "learning_rate": 4.3234329989073217e-05, "loss": 0.4717, "step": 190460 }, { "epoch": 9.460117214661766, "grad_norm": 0.203125, "learning_rate": 4.319459620542367e-05, "loss": 0.5032, "step": 190470 }, { "epoch": 9.460613886957386, "grad_norm": 0.2119140625, "learning_rate": 4.315486242177412e-05, "loss": 0.4943, "step": 190480 }, { "epoch": 9.461110559253004, "grad_norm": 0.240234375, "learning_rate": 4.311512863812457e-05, "loss": 0.4644, "step": 190490 }, { "epoch": 9.461607231548625, "grad_norm": 0.203125, "learning_rate": 4.307539485447502e-05, "loss": 0.4937, "step": 190500 }, { "epoch": 9.462103903844243, "grad_norm": 0.232421875, "learning_rate": 4.303566107082547e-05, "loss": 0.5034, "step": 190510 }, { "epoch": 9.462600576139863, "grad_norm": 0.2041015625, "learning_rate": 4.299592728717592e-05, "loss": 0.4807, "step": 190520 }, { "epoch": 9.463097248435481, "grad_norm": 0.189453125, "learning_rate": 4.2956193503526374e-05, "loss": 0.4897, "step": 190530 }, { "epoch": 9.463593920731102, "grad_norm": 0.197265625, "learning_rate": 4.291645971987683e-05, "loss": 0.4738, "step": 190540 }, { "epoch": 9.464090593026722, "grad_norm": 0.2099609375, "learning_rate": 4.287672593622728e-05, "loss": 0.5169, "step": 190550 }, { "epoch": 9.46458726532234, "grad_norm": 0.205078125, "learning_rate": 4.2836992152577734e-05, "loss": 0.5095, "step": 190560 }, { "epoch": 9.46508393761796, "grad_norm": 0.201171875, "learning_rate": 4.2797258368928185e-05, "loss": 0.4726, "step": 190570 }, { "epoch": 9.465580609913578, "grad_norm": 0.1923828125, "learning_rate": 4.2757524585278636e-05, "loss": 0.4548, "step": 190580 }, { "epoch": 9.466077282209199, "grad_norm": 0.216796875, "learning_rate": 4.271779080162909e-05, "loss": 0.481, "step": 190590 }, { "epoch": 9.466573954504817, "grad_norm": 0.2099609375, "learning_rate": 4.267805701797954e-05, "loss": 0.5057, "step": 190600 }, { "epoch": 9.467070626800437, "grad_norm": 0.19921875, "learning_rate": 4.263832323432999e-05, "loss": 0.4813, "step": 190610 }, { "epoch": 9.467567299096057, "grad_norm": 0.201171875, "learning_rate": 4.259858945068044e-05, "loss": 0.5279, "step": 190620 }, { "epoch": 9.468063971391675, "grad_norm": 0.193359375, "learning_rate": 4.25588556670309e-05, "loss": 0.4943, "step": 190630 }, { "epoch": 9.468560643687296, "grad_norm": 0.19921875, "learning_rate": 4.251912188338135e-05, "loss": 0.4927, "step": 190640 }, { "epoch": 9.469057315982914, "grad_norm": 0.2138671875, "learning_rate": 4.24793880997318e-05, "loss": 0.4796, "step": 190650 }, { "epoch": 9.469553988278534, "grad_norm": 0.2099609375, "learning_rate": 4.243965431608225e-05, "loss": 0.4778, "step": 190660 }, { "epoch": 9.470050660574152, "grad_norm": 0.2158203125, "learning_rate": 4.23999205324327e-05, "loss": 0.5134, "step": 190670 }, { "epoch": 9.470547332869772, "grad_norm": 0.2255859375, "learning_rate": 4.236018674878315e-05, "loss": 0.4747, "step": 190680 }, { "epoch": 9.471044005165393, "grad_norm": 0.1865234375, "learning_rate": 4.2320452965133604e-05, "loss": 0.5005, "step": 190690 }, { "epoch": 9.471540677461011, "grad_norm": 0.2060546875, "learning_rate": 4.2280719181484055e-05, "loss": 0.4956, "step": 190700 }, { "epoch": 9.472037349756631, "grad_norm": 0.228515625, "learning_rate": 4.224098539783451e-05, "loss": 0.5076, "step": 190710 }, { "epoch": 9.47253402205225, "grad_norm": 0.189453125, "learning_rate": 4.2201251614184964e-05, "loss": 0.4921, "step": 190720 }, { "epoch": 9.47303069434787, "grad_norm": 0.208984375, "learning_rate": 4.2161517830535415e-05, "loss": 0.4581, "step": 190730 }, { "epoch": 9.473527366643488, "grad_norm": 0.201171875, "learning_rate": 4.2121784046885866e-05, "loss": 0.4879, "step": 190740 }, { "epoch": 9.474024038939108, "grad_norm": 0.2041015625, "learning_rate": 4.2082050263236317e-05, "loss": 0.4743, "step": 190750 }, { "epoch": 9.474520711234728, "grad_norm": 0.220703125, "learning_rate": 4.204231647958677e-05, "loss": 0.4943, "step": 190760 }, { "epoch": 9.475017383530346, "grad_norm": 0.1943359375, "learning_rate": 4.200258269593722e-05, "loss": 0.4593, "step": 190770 }, { "epoch": 9.475514055825967, "grad_norm": 0.189453125, "learning_rate": 4.196284891228767e-05, "loss": 0.4725, "step": 190780 }, { "epoch": 9.476010728121585, "grad_norm": 0.1982421875, "learning_rate": 4.192311512863812e-05, "loss": 0.4773, "step": 190790 }, { "epoch": 9.476507400417205, "grad_norm": 0.19921875, "learning_rate": 4.188338134498858e-05, "loss": 0.4791, "step": 190800 }, { "epoch": 9.477004072712823, "grad_norm": 0.208984375, "learning_rate": 4.184364756133903e-05, "loss": 0.503, "step": 190810 }, { "epoch": 9.477500745008443, "grad_norm": 0.2001953125, "learning_rate": 4.180391377768949e-05, "loss": 0.5181, "step": 190820 }, { "epoch": 9.477997417304064, "grad_norm": 0.2119140625, "learning_rate": 4.176417999403994e-05, "loss": 0.465, "step": 190830 }, { "epoch": 9.478494089599682, "grad_norm": 0.2109375, "learning_rate": 4.172444621039039e-05, "loss": 0.511, "step": 190840 }, { "epoch": 9.478990761895302, "grad_norm": 0.1826171875, "learning_rate": 4.168471242674084e-05, "loss": 0.4744, "step": 190850 }, { "epoch": 9.47948743419092, "grad_norm": 0.189453125, "learning_rate": 4.164497864309129e-05, "loss": 0.4672, "step": 190860 }, { "epoch": 9.47998410648654, "grad_norm": 0.1904296875, "learning_rate": 4.160524485944174e-05, "loss": 0.472, "step": 190870 }, { "epoch": 9.480480778782159, "grad_norm": 0.19921875, "learning_rate": 4.1565511075792194e-05, "loss": 0.4951, "step": 190880 }, { "epoch": 9.480977451077779, "grad_norm": 0.23046875, "learning_rate": 4.152577729214265e-05, "loss": 0.5035, "step": 190890 }, { "epoch": 9.481474123373399, "grad_norm": 0.2080078125, "learning_rate": 4.14860435084931e-05, "loss": 0.4604, "step": 190900 }, { "epoch": 9.481970795669017, "grad_norm": 0.19140625, "learning_rate": 4.144630972484355e-05, "loss": 0.5188, "step": 190910 }, { "epoch": 9.482467467964637, "grad_norm": 0.212890625, "learning_rate": 4.1406575941194004e-05, "loss": 0.4791, "step": 190920 }, { "epoch": 9.482964140260256, "grad_norm": 0.1923828125, "learning_rate": 4.1366842157544455e-05, "loss": 0.4911, "step": 190930 }, { "epoch": 9.483460812555876, "grad_norm": 0.2021484375, "learning_rate": 4.1327108373894906e-05, "loss": 0.4816, "step": 190940 }, { "epoch": 9.483957484851494, "grad_norm": 0.2099609375, "learning_rate": 4.128737459024536e-05, "loss": 0.4844, "step": 190950 }, { "epoch": 9.484454157147114, "grad_norm": 0.21484375, "learning_rate": 4.124764080659581e-05, "loss": 0.5126, "step": 190960 }, { "epoch": 9.484950829442734, "grad_norm": 0.205078125, "learning_rate": 4.120790702294626e-05, "loss": 0.5173, "step": 190970 }, { "epoch": 9.485447501738353, "grad_norm": 0.2099609375, "learning_rate": 4.116817323929672e-05, "loss": 0.4946, "step": 190980 }, { "epoch": 9.485944174033973, "grad_norm": 0.2265625, "learning_rate": 4.112843945564717e-05, "loss": 0.5297, "step": 190990 }, { "epoch": 9.486440846329591, "grad_norm": 0.201171875, "learning_rate": 4.108870567199762e-05, "loss": 0.4899, "step": 191000 }, { "epoch": 9.486937518625211, "grad_norm": 0.2216796875, "learning_rate": 4.104897188834807e-05, "loss": 0.5052, "step": 191010 }, { "epoch": 9.48743419092083, "grad_norm": 0.21484375, "learning_rate": 4.100923810469852e-05, "loss": 0.5071, "step": 191020 }, { "epoch": 9.48793086321645, "grad_norm": 0.1865234375, "learning_rate": 4.096950432104897e-05, "loss": 0.4538, "step": 191030 }, { "epoch": 9.48842753551207, "grad_norm": 0.2138671875, "learning_rate": 4.0929770537399423e-05, "loss": 0.4967, "step": 191040 }, { "epoch": 9.488924207807688, "grad_norm": 0.1875, "learning_rate": 4.0890036753749875e-05, "loss": 0.5068, "step": 191050 }, { "epoch": 9.489420880103308, "grad_norm": 0.2021484375, "learning_rate": 4.085030297010033e-05, "loss": 0.4787, "step": 191060 }, { "epoch": 9.489917552398927, "grad_norm": 0.1923828125, "learning_rate": 4.081056918645078e-05, "loss": 0.4781, "step": 191070 }, { "epoch": 9.490414224694547, "grad_norm": 0.2041015625, "learning_rate": 4.0770835402801234e-05, "loss": 0.5005, "step": 191080 }, { "epoch": 9.490910896990165, "grad_norm": 0.205078125, "learning_rate": 4.0731101619151685e-05, "loss": 0.4583, "step": 191090 }, { "epoch": 9.491407569285785, "grad_norm": 0.2060546875, "learning_rate": 4.0691367835502136e-05, "loss": 0.4993, "step": 191100 }, { "epoch": 9.491904241581405, "grad_norm": 0.2060546875, "learning_rate": 4.065163405185259e-05, "loss": 0.4824, "step": 191110 }, { "epoch": 9.492400913877024, "grad_norm": 0.19921875, "learning_rate": 4.061190026820304e-05, "loss": 0.4716, "step": 191120 }, { "epoch": 9.492897586172644, "grad_norm": 0.2041015625, "learning_rate": 4.057216648455349e-05, "loss": 0.4972, "step": 191130 }, { "epoch": 9.493394258468262, "grad_norm": 0.2021484375, "learning_rate": 4.053243270090395e-05, "loss": 0.5021, "step": 191140 }, { "epoch": 9.493890930763882, "grad_norm": 0.205078125, "learning_rate": 4.04926989172544e-05, "loss": 0.5069, "step": 191150 }, { "epoch": 9.4943876030595, "grad_norm": 0.234375, "learning_rate": 4.045296513360485e-05, "loss": 0.5028, "step": 191160 }, { "epoch": 9.49488427535512, "grad_norm": 0.212890625, "learning_rate": 4.04132313499553e-05, "loss": 0.4892, "step": 191170 }, { "epoch": 9.495380947650741, "grad_norm": 0.2021484375, "learning_rate": 4.037349756630575e-05, "loss": 0.5041, "step": 191180 }, { "epoch": 9.49587761994636, "grad_norm": 0.2041015625, "learning_rate": 4.03337637826562e-05, "loss": 0.461, "step": 191190 }, { "epoch": 9.49637429224198, "grad_norm": 0.1982421875, "learning_rate": 4.0294029999006653e-05, "loss": 0.5065, "step": 191200 }, { "epoch": 9.496870964537598, "grad_norm": 0.2177734375, "learning_rate": 4.0254296215357105e-05, "loss": 0.4549, "step": 191210 }, { "epoch": 9.497367636833218, "grad_norm": 0.2294921875, "learning_rate": 4.0214562431707556e-05, "loss": 0.5129, "step": 191220 }, { "epoch": 9.497864309128836, "grad_norm": 0.2177734375, "learning_rate": 4.017482864805801e-05, "loss": 0.4957, "step": 191230 }, { "epoch": 9.498360981424456, "grad_norm": 0.18359375, "learning_rate": 4.013509486440847e-05, "loss": 0.4678, "step": 191240 }, { "epoch": 9.498857653720076, "grad_norm": 0.203125, "learning_rate": 4.009536108075892e-05, "loss": 0.4919, "step": 191250 }, { "epoch": 9.499354326015695, "grad_norm": 0.1923828125, "learning_rate": 4.005562729710937e-05, "loss": 0.473, "step": 191260 }, { "epoch": 9.499850998311315, "grad_norm": 0.1943359375, "learning_rate": 4.0015893513459824e-05, "loss": 0.4701, "step": 191270 }, { "epoch": 9.500347670606933, "grad_norm": 0.2060546875, "learning_rate": 3.9976159729810275e-05, "loss": 0.5078, "step": 191280 }, { "epoch": 9.500844342902553, "grad_norm": 0.2021484375, "learning_rate": 3.9936425946160726e-05, "loss": 0.4614, "step": 191290 }, { "epoch": 9.501341015198172, "grad_norm": 0.2158203125, "learning_rate": 3.989669216251118e-05, "loss": 0.5163, "step": 191300 }, { "epoch": 9.501837687493792, "grad_norm": 0.1982421875, "learning_rate": 3.985695837886163e-05, "loss": 0.4596, "step": 191310 }, { "epoch": 9.502334359789412, "grad_norm": 0.2109375, "learning_rate": 3.981722459521208e-05, "loss": 0.4776, "step": 191320 }, { "epoch": 9.50283103208503, "grad_norm": 0.2060546875, "learning_rate": 3.977749081156254e-05, "loss": 0.4652, "step": 191330 }, { "epoch": 9.50332770438065, "grad_norm": 0.216796875, "learning_rate": 3.973775702791299e-05, "loss": 0.4922, "step": 191340 }, { "epoch": 9.503824376676269, "grad_norm": 0.203125, "learning_rate": 3.969802324426344e-05, "loss": 0.4952, "step": 191350 }, { "epoch": 9.504321048971889, "grad_norm": 0.201171875, "learning_rate": 3.965828946061389e-05, "loss": 0.4622, "step": 191360 }, { "epoch": 9.504817721267507, "grad_norm": 0.2080078125, "learning_rate": 3.961855567696434e-05, "loss": 0.4596, "step": 191370 }, { "epoch": 9.505314393563127, "grad_norm": 0.2021484375, "learning_rate": 3.957882189331479e-05, "loss": 0.4928, "step": 191380 }, { "epoch": 9.505811065858746, "grad_norm": 0.2001953125, "learning_rate": 3.953908810966524e-05, "loss": 0.4832, "step": 191390 }, { "epoch": 9.506307738154366, "grad_norm": 0.197265625, "learning_rate": 3.9499354326015694e-05, "loss": 0.4834, "step": 191400 }, { "epoch": 9.506804410449986, "grad_norm": 0.197265625, "learning_rate": 3.945962054236615e-05, "loss": 0.4693, "step": 191410 }, { "epoch": 9.507301082745604, "grad_norm": 0.18359375, "learning_rate": 3.94198867587166e-05, "loss": 0.4934, "step": 191420 }, { "epoch": 9.507797755041224, "grad_norm": 0.1962890625, "learning_rate": 3.9380152975067054e-05, "loss": 0.4893, "step": 191430 }, { "epoch": 9.508294427336843, "grad_norm": 0.1962890625, "learning_rate": 3.9340419191417505e-05, "loss": 0.4647, "step": 191440 }, { "epoch": 9.508791099632463, "grad_norm": 0.2197265625, "learning_rate": 3.9300685407767956e-05, "loss": 0.4931, "step": 191450 }, { "epoch": 9.509287771928081, "grad_norm": 0.2431640625, "learning_rate": 3.926095162411841e-05, "loss": 0.4797, "step": 191460 }, { "epoch": 9.509784444223701, "grad_norm": 0.1875, "learning_rate": 3.922121784046886e-05, "loss": 0.4645, "step": 191470 }, { "epoch": 9.510281116519321, "grad_norm": 0.1953125, "learning_rate": 3.918148405681931e-05, "loss": 0.4774, "step": 191480 }, { "epoch": 9.51077778881494, "grad_norm": 0.2216796875, "learning_rate": 3.914175027316977e-05, "loss": 0.5144, "step": 191490 }, { "epoch": 9.51127446111056, "grad_norm": 0.216796875, "learning_rate": 3.910201648952022e-05, "loss": 0.5031, "step": 191500 }, { "epoch": 9.511771133406178, "grad_norm": 0.23046875, "learning_rate": 3.906228270587067e-05, "loss": 0.4978, "step": 191510 }, { "epoch": 9.512267805701798, "grad_norm": 0.203125, "learning_rate": 3.902254892222112e-05, "loss": 0.4938, "step": 191520 }, { "epoch": 9.512764477997417, "grad_norm": 0.197265625, "learning_rate": 3.898281513857157e-05, "loss": 0.4878, "step": 191530 }, { "epoch": 9.513261150293037, "grad_norm": 0.1962890625, "learning_rate": 3.894308135492202e-05, "loss": 0.4879, "step": 191540 }, { "epoch": 9.513757822588657, "grad_norm": 0.21484375, "learning_rate": 3.890334757127247e-05, "loss": 0.5107, "step": 191550 }, { "epoch": 9.514254494884275, "grad_norm": 0.2109375, "learning_rate": 3.8863613787622924e-05, "loss": 0.457, "step": 191560 }, { "epoch": 9.514751167179895, "grad_norm": 0.2001953125, "learning_rate": 3.8823880003973375e-05, "loss": 0.4798, "step": 191570 }, { "epoch": 9.515247839475514, "grad_norm": 0.197265625, "learning_rate": 3.878414622032383e-05, "loss": 0.4988, "step": 191580 }, { "epoch": 9.515744511771134, "grad_norm": 0.2001953125, "learning_rate": 3.8744412436674284e-05, "loss": 0.4823, "step": 191590 }, { "epoch": 9.516241184066752, "grad_norm": 0.1884765625, "learning_rate": 3.8704678653024735e-05, "loss": 0.4892, "step": 191600 }, { "epoch": 9.516737856362372, "grad_norm": 0.189453125, "learning_rate": 3.8664944869375186e-05, "loss": 0.4959, "step": 191610 }, { "epoch": 9.517234528657992, "grad_norm": 0.1923828125, "learning_rate": 3.862521108572564e-05, "loss": 0.479, "step": 191620 }, { "epoch": 9.51773120095361, "grad_norm": 0.197265625, "learning_rate": 3.858547730207609e-05, "loss": 0.4896, "step": 191630 }, { "epoch": 9.51822787324923, "grad_norm": 0.2138671875, "learning_rate": 3.854574351842654e-05, "loss": 0.4593, "step": 191640 }, { "epoch": 9.518724545544849, "grad_norm": 0.2158203125, "learning_rate": 3.850600973477699e-05, "loss": 0.5126, "step": 191650 }, { "epoch": 9.51922121784047, "grad_norm": 0.2275390625, "learning_rate": 3.846627595112745e-05, "loss": 0.513, "step": 191660 }, { "epoch": 9.519717890136087, "grad_norm": 0.205078125, "learning_rate": 3.84265421674779e-05, "loss": 0.4827, "step": 191670 }, { "epoch": 9.520214562431708, "grad_norm": 0.201171875, "learning_rate": 3.838680838382836e-05, "loss": 0.467, "step": 191680 }, { "epoch": 9.520711234727328, "grad_norm": 0.2001953125, "learning_rate": 3.834707460017881e-05, "loss": 0.4703, "step": 191690 }, { "epoch": 9.521207907022946, "grad_norm": 0.1923828125, "learning_rate": 3.830734081652926e-05, "loss": 0.4589, "step": 191700 }, { "epoch": 9.521704579318566, "grad_norm": 0.197265625, "learning_rate": 3.826760703287971e-05, "loss": 0.4747, "step": 191710 }, { "epoch": 9.522201251614185, "grad_norm": 0.201171875, "learning_rate": 3.822787324923016e-05, "loss": 0.4633, "step": 191720 }, { "epoch": 9.522697923909805, "grad_norm": 0.1962890625, "learning_rate": 3.818813946558061e-05, "loss": 0.49, "step": 191730 }, { "epoch": 9.523194596205423, "grad_norm": 0.2255859375, "learning_rate": 3.814840568193106e-05, "loss": 0.5207, "step": 191740 }, { "epoch": 9.523691268501043, "grad_norm": 0.1953125, "learning_rate": 3.8108671898281514e-05, "loss": 0.4922, "step": 191750 }, { "epoch": 9.524187940796661, "grad_norm": 0.2001953125, "learning_rate": 3.806893811463197e-05, "loss": 0.4768, "step": 191760 }, { "epoch": 9.524684613092282, "grad_norm": 0.1962890625, "learning_rate": 3.802920433098242e-05, "loss": 0.5086, "step": 191770 }, { "epoch": 9.525181285387902, "grad_norm": 0.2041015625, "learning_rate": 3.7989470547332874e-05, "loss": 0.4887, "step": 191780 }, { "epoch": 9.52567795768352, "grad_norm": 0.201171875, "learning_rate": 3.7949736763683325e-05, "loss": 0.4955, "step": 191790 }, { "epoch": 9.52617462997914, "grad_norm": 0.2099609375, "learning_rate": 3.7910002980033776e-05, "loss": 0.4866, "step": 191800 }, { "epoch": 9.526671302274758, "grad_norm": 0.21484375, "learning_rate": 3.787026919638423e-05, "loss": 0.4779, "step": 191810 }, { "epoch": 9.527167974570379, "grad_norm": 0.232421875, "learning_rate": 3.783053541273468e-05, "loss": 0.4713, "step": 191820 }, { "epoch": 9.527664646865997, "grad_norm": 0.20703125, "learning_rate": 3.779080162908513e-05, "loss": 0.5013, "step": 191830 }, { "epoch": 9.528161319161617, "grad_norm": 0.193359375, "learning_rate": 3.775106784543559e-05, "loss": 0.5004, "step": 191840 }, { "epoch": 9.528657991457237, "grad_norm": 0.236328125, "learning_rate": 3.771133406178604e-05, "loss": 0.5178, "step": 191850 }, { "epoch": 9.529154663752855, "grad_norm": 0.1953125, "learning_rate": 3.767160027813649e-05, "loss": 0.4951, "step": 191860 }, { "epoch": 9.529651336048476, "grad_norm": 0.2080078125, "learning_rate": 3.763186649448694e-05, "loss": 0.4878, "step": 191870 }, { "epoch": 9.530148008344094, "grad_norm": 0.1982421875, "learning_rate": 3.759213271083739e-05, "loss": 0.489, "step": 191880 }, { "epoch": 9.530644680639714, "grad_norm": 0.2001953125, "learning_rate": 3.755239892718784e-05, "loss": 0.4764, "step": 191890 }, { "epoch": 9.531141352935332, "grad_norm": 0.2099609375, "learning_rate": 3.751266514353829e-05, "loss": 0.4773, "step": 191900 }, { "epoch": 9.531638025230952, "grad_norm": 0.2060546875, "learning_rate": 3.7472931359888744e-05, "loss": 0.4708, "step": 191910 }, { "epoch": 9.532134697526573, "grad_norm": 0.20703125, "learning_rate": 3.7433197576239195e-05, "loss": 0.5081, "step": 191920 }, { "epoch": 9.532631369822191, "grad_norm": 0.2216796875, "learning_rate": 3.739346379258965e-05, "loss": 0.4885, "step": 191930 }, { "epoch": 9.533128042117811, "grad_norm": 0.2060546875, "learning_rate": 3.7353730008940104e-05, "loss": 0.4689, "step": 191940 }, { "epoch": 9.53362471441343, "grad_norm": 0.203125, "learning_rate": 3.7313996225290555e-05, "loss": 0.4842, "step": 191950 }, { "epoch": 9.53412138670905, "grad_norm": 0.1962890625, "learning_rate": 3.7274262441641006e-05, "loss": 0.4605, "step": 191960 }, { "epoch": 9.534618059004668, "grad_norm": 0.220703125, "learning_rate": 3.723452865799146e-05, "loss": 0.4799, "step": 191970 }, { "epoch": 9.535114731300288, "grad_norm": 0.2177734375, "learning_rate": 3.719479487434191e-05, "loss": 0.5146, "step": 191980 }, { "epoch": 9.535611403595908, "grad_norm": 0.2080078125, "learning_rate": 3.715506109069236e-05, "loss": 0.5379, "step": 191990 }, { "epoch": 9.536108075891526, "grad_norm": 0.2001953125, "learning_rate": 3.711532730704281e-05, "loss": 0.5096, "step": 192000 }, { "epoch": 9.536604748187147, "grad_norm": 0.2041015625, "learning_rate": 3.707559352339327e-05, "loss": 0.5056, "step": 192010 }, { "epoch": 9.537101420482765, "grad_norm": 0.23046875, "learning_rate": 3.703585973974372e-05, "loss": 0.5013, "step": 192020 }, { "epoch": 9.537598092778385, "grad_norm": 0.1806640625, "learning_rate": 3.699612595609417e-05, "loss": 0.4694, "step": 192030 }, { "epoch": 9.538094765074003, "grad_norm": 0.203125, "learning_rate": 3.695639217244462e-05, "loss": 0.4828, "step": 192040 }, { "epoch": 9.538591437369623, "grad_norm": 0.251953125, "learning_rate": 3.691665838879507e-05, "loss": 0.5081, "step": 192050 }, { "epoch": 9.539088109665244, "grad_norm": 0.2138671875, "learning_rate": 3.687692460514552e-05, "loss": 0.5002, "step": 192060 }, { "epoch": 9.539584781960862, "grad_norm": 0.208984375, "learning_rate": 3.6837190821495974e-05, "loss": 0.4973, "step": 192070 }, { "epoch": 9.540081454256482, "grad_norm": 0.220703125, "learning_rate": 3.679745703784643e-05, "loss": 0.4881, "step": 192080 }, { "epoch": 9.5405781265521, "grad_norm": 0.2158203125, "learning_rate": 3.675772325419688e-05, "loss": 0.5215, "step": 192090 }, { "epoch": 9.54107479884772, "grad_norm": 0.203125, "learning_rate": 3.6717989470547334e-05, "loss": 0.4756, "step": 192100 }, { "epoch": 9.541571471143339, "grad_norm": 0.19140625, "learning_rate": 3.667825568689779e-05, "loss": 0.4757, "step": 192110 }, { "epoch": 9.542068143438959, "grad_norm": 0.2119140625, "learning_rate": 3.663852190324824e-05, "loss": 0.4859, "step": 192120 }, { "epoch": 9.542564815734579, "grad_norm": 0.1826171875, "learning_rate": 3.6598788119598694e-05, "loss": 0.4845, "step": 192130 }, { "epoch": 9.543061488030197, "grad_norm": 0.2021484375, "learning_rate": 3.6559054335949145e-05, "loss": 0.4964, "step": 192140 }, { "epoch": 9.543558160325817, "grad_norm": 0.2041015625, "learning_rate": 3.6519320552299596e-05, "loss": 0.4743, "step": 192150 }, { "epoch": 9.544054832621436, "grad_norm": 0.2197265625, "learning_rate": 3.647958676865005e-05, "loss": 0.4867, "step": 192160 }, { "epoch": 9.544551504917056, "grad_norm": 0.201171875, "learning_rate": 3.64398529850005e-05, "loss": 0.49, "step": 192170 }, { "epoch": 9.545048177212674, "grad_norm": 0.201171875, "learning_rate": 3.640011920135095e-05, "loss": 0.5057, "step": 192180 }, { "epoch": 9.545544849508294, "grad_norm": 0.220703125, "learning_rate": 3.636038541770141e-05, "loss": 0.48, "step": 192190 }, { "epoch": 9.546041521803915, "grad_norm": 0.1943359375, "learning_rate": 3.632065163405186e-05, "loss": 0.4865, "step": 192200 }, { "epoch": 9.546538194099533, "grad_norm": 0.1943359375, "learning_rate": 3.628091785040231e-05, "loss": 0.5045, "step": 192210 }, { "epoch": 9.547034866395153, "grad_norm": 0.212890625, "learning_rate": 3.624118406675276e-05, "loss": 0.5066, "step": 192220 }, { "epoch": 9.547531538690771, "grad_norm": 0.2236328125, "learning_rate": 3.620145028310321e-05, "loss": 0.5052, "step": 192230 }, { "epoch": 9.548028210986391, "grad_norm": 0.2001953125, "learning_rate": 3.616171649945366e-05, "loss": 0.5034, "step": 192240 }, { "epoch": 9.54852488328201, "grad_norm": 0.2412109375, "learning_rate": 3.612198271580411e-05, "loss": 0.5156, "step": 192250 }, { "epoch": 9.54902155557763, "grad_norm": 0.251953125, "learning_rate": 3.6082248932154564e-05, "loss": 0.4992, "step": 192260 }, { "epoch": 9.54951822787325, "grad_norm": 0.21484375, "learning_rate": 3.6042515148505015e-05, "loss": 0.5048, "step": 192270 }, { "epoch": 9.550014900168868, "grad_norm": 0.2021484375, "learning_rate": 3.600278136485547e-05, "loss": 0.4819, "step": 192280 }, { "epoch": 9.550511572464488, "grad_norm": 0.2578125, "learning_rate": 3.5963047581205924e-05, "loss": 0.4945, "step": 192290 }, { "epoch": 9.551008244760107, "grad_norm": 0.2041015625, "learning_rate": 3.5923313797556375e-05, "loss": 0.4933, "step": 192300 }, { "epoch": 9.551504917055727, "grad_norm": 0.2275390625, "learning_rate": 3.5883580013906826e-05, "loss": 0.4834, "step": 192310 }, { "epoch": 9.552001589351345, "grad_norm": 0.1943359375, "learning_rate": 3.584384623025728e-05, "loss": 0.5017, "step": 192320 }, { "epoch": 9.552498261646965, "grad_norm": 0.203125, "learning_rate": 3.580411244660773e-05, "loss": 0.4988, "step": 192330 }, { "epoch": 9.552994933942585, "grad_norm": 0.2236328125, "learning_rate": 3.576437866295818e-05, "loss": 0.4934, "step": 192340 }, { "epoch": 9.553491606238204, "grad_norm": 0.1962890625, "learning_rate": 3.572464487930863e-05, "loss": 0.5099, "step": 192350 }, { "epoch": 9.553988278533824, "grad_norm": 0.2080078125, "learning_rate": 3.568491109565909e-05, "loss": 0.5079, "step": 192360 }, { "epoch": 9.554484950829442, "grad_norm": 0.1943359375, "learning_rate": 3.564517731200954e-05, "loss": 0.4867, "step": 192370 }, { "epoch": 9.554981623125062, "grad_norm": 0.20703125, "learning_rate": 3.560544352835999e-05, "loss": 0.4577, "step": 192380 }, { "epoch": 9.55547829542068, "grad_norm": 0.1904296875, "learning_rate": 3.556570974471044e-05, "loss": 0.4575, "step": 192390 }, { "epoch": 9.5559749677163, "grad_norm": 0.2060546875, "learning_rate": 3.552597596106089e-05, "loss": 0.4862, "step": 192400 }, { "epoch": 9.556471640011921, "grad_norm": 0.2431640625, "learning_rate": 3.548624217741134e-05, "loss": 0.4689, "step": 192410 }, { "epoch": 9.55696831230754, "grad_norm": 0.2109375, "learning_rate": 3.5446508393761794e-05, "loss": 0.4932, "step": 192420 }, { "epoch": 9.55746498460316, "grad_norm": 0.205078125, "learning_rate": 3.5406774610112245e-05, "loss": 0.4783, "step": 192430 }, { "epoch": 9.557961656898778, "grad_norm": 0.21875, "learning_rate": 3.53670408264627e-05, "loss": 0.4805, "step": 192440 }, { "epoch": 9.558458329194398, "grad_norm": 0.2021484375, "learning_rate": 3.5327307042813154e-05, "loss": 0.5025, "step": 192450 }, { "epoch": 9.558955001490016, "grad_norm": 0.189453125, "learning_rate": 3.5287573259163605e-05, "loss": 0.4831, "step": 192460 }, { "epoch": 9.559451673785636, "grad_norm": 0.2021484375, "learning_rate": 3.5247839475514056e-05, "loss": 0.4607, "step": 192470 }, { "epoch": 9.559948346081256, "grad_norm": 0.2021484375, "learning_rate": 3.520810569186451e-05, "loss": 0.4851, "step": 192480 }, { "epoch": 9.560445018376875, "grad_norm": 0.1875, "learning_rate": 3.516837190821496e-05, "loss": 0.4971, "step": 192490 }, { "epoch": 9.560941690672495, "grad_norm": 0.2275390625, "learning_rate": 3.5128638124565416e-05, "loss": 0.4889, "step": 192500 }, { "epoch": 9.561438362968113, "grad_norm": 0.2197265625, "learning_rate": 3.508890434091587e-05, "loss": 0.4994, "step": 192510 }, { "epoch": 9.561935035263733, "grad_norm": 0.1923828125, "learning_rate": 3.504917055726632e-05, "loss": 0.4843, "step": 192520 }, { "epoch": 9.562431707559352, "grad_norm": 0.1923828125, "learning_rate": 3.500943677361677e-05, "loss": 0.4535, "step": 192530 }, { "epoch": 9.562928379854972, "grad_norm": 0.2197265625, "learning_rate": 3.4969702989967227e-05, "loss": 0.469, "step": 192540 }, { "epoch": 9.563425052150592, "grad_norm": 0.212890625, "learning_rate": 3.492996920631768e-05, "loss": 0.4811, "step": 192550 }, { "epoch": 9.56392172444621, "grad_norm": 0.2060546875, "learning_rate": 3.489023542266813e-05, "loss": 0.4826, "step": 192560 }, { "epoch": 9.56441839674183, "grad_norm": 0.2119140625, "learning_rate": 3.485050163901858e-05, "loss": 0.4918, "step": 192570 }, { "epoch": 9.564915069037449, "grad_norm": 0.212890625, "learning_rate": 3.481076785536903e-05, "loss": 0.4913, "step": 192580 }, { "epoch": 9.565411741333069, "grad_norm": 0.19921875, "learning_rate": 3.477103407171948e-05, "loss": 0.4949, "step": 192590 }, { "epoch": 9.565908413628687, "grad_norm": 0.201171875, "learning_rate": 3.473130028806993e-05, "loss": 0.5122, "step": 192600 }, { "epoch": 9.566405085924307, "grad_norm": 0.2373046875, "learning_rate": 3.4691566504420384e-05, "loss": 0.4734, "step": 192610 }, { "epoch": 9.566901758219927, "grad_norm": 0.2099609375, "learning_rate": 3.4651832720770835e-05, "loss": 0.4896, "step": 192620 }, { "epoch": 9.567398430515546, "grad_norm": 0.1953125, "learning_rate": 3.461209893712129e-05, "loss": 0.5158, "step": 192630 }, { "epoch": 9.567895102811166, "grad_norm": 0.2236328125, "learning_rate": 3.4572365153471744e-05, "loss": 0.5142, "step": 192640 }, { "epoch": 9.568391775106784, "grad_norm": 0.220703125, "learning_rate": 3.4532631369822195e-05, "loss": 0.4838, "step": 192650 }, { "epoch": 9.568888447402404, "grad_norm": 0.205078125, "learning_rate": 3.4492897586172646e-05, "loss": 0.4967, "step": 192660 }, { "epoch": 9.569385119698023, "grad_norm": 0.23046875, "learning_rate": 3.44531638025231e-05, "loss": 0.4832, "step": 192670 }, { "epoch": 9.569881791993643, "grad_norm": 0.2431640625, "learning_rate": 3.441343001887355e-05, "loss": 0.4955, "step": 192680 }, { "epoch": 9.570378464289263, "grad_norm": 0.19140625, "learning_rate": 3.4373696235224e-05, "loss": 0.4461, "step": 192690 }, { "epoch": 9.570875136584881, "grad_norm": 0.234375, "learning_rate": 3.433396245157445e-05, "loss": 0.4708, "step": 192700 }, { "epoch": 9.571371808880501, "grad_norm": 0.1943359375, "learning_rate": 3.429422866792491e-05, "loss": 0.468, "step": 192710 }, { "epoch": 9.57186848117612, "grad_norm": 0.201171875, "learning_rate": 3.425449488427536e-05, "loss": 0.5008, "step": 192720 }, { "epoch": 9.57236515347174, "grad_norm": 0.197265625, "learning_rate": 3.421476110062581e-05, "loss": 0.4814, "step": 192730 }, { "epoch": 9.572861825767358, "grad_norm": 0.2109375, "learning_rate": 3.417502731697626e-05, "loss": 0.4937, "step": 192740 }, { "epoch": 9.573358498062978, "grad_norm": 0.2109375, "learning_rate": 3.413529353332671e-05, "loss": 0.4934, "step": 192750 }, { "epoch": 9.573855170358597, "grad_norm": 0.2041015625, "learning_rate": 3.409555974967716e-05, "loss": 0.4747, "step": 192760 }, { "epoch": 9.574351842654217, "grad_norm": 0.2578125, "learning_rate": 3.4055825966027614e-05, "loss": 0.5158, "step": 192770 }, { "epoch": 9.574848514949837, "grad_norm": 0.19140625, "learning_rate": 3.4016092182378065e-05, "loss": 0.4607, "step": 192780 }, { "epoch": 9.575345187245455, "grad_norm": 0.2158203125, "learning_rate": 3.397635839872852e-05, "loss": 0.507, "step": 192790 }, { "epoch": 9.575841859541075, "grad_norm": 0.2275390625, "learning_rate": 3.3936624615078974e-05, "loss": 0.5017, "step": 192800 }, { "epoch": 9.576338531836694, "grad_norm": 0.205078125, "learning_rate": 3.3896890831429425e-05, "loss": 0.5241, "step": 192810 }, { "epoch": 9.576835204132314, "grad_norm": 0.2119140625, "learning_rate": 3.3857157047779876e-05, "loss": 0.4668, "step": 192820 }, { "epoch": 9.577331876427932, "grad_norm": 0.1943359375, "learning_rate": 3.381742326413033e-05, "loss": 0.5134, "step": 192830 }, { "epoch": 9.577828548723552, "grad_norm": 0.2021484375, "learning_rate": 3.377768948048078e-05, "loss": 0.4805, "step": 192840 }, { "epoch": 9.578325221019172, "grad_norm": 0.216796875, "learning_rate": 3.373795569683123e-05, "loss": 0.4943, "step": 192850 }, { "epoch": 9.57882189331479, "grad_norm": 0.2041015625, "learning_rate": 3.369822191318168e-05, "loss": 0.4839, "step": 192860 }, { "epoch": 9.57931856561041, "grad_norm": 0.201171875, "learning_rate": 3.365848812953213e-05, "loss": 0.4885, "step": 192870 }, { "epoch": 9.579815237906029, "grad_norm": 0.1826171875, "learning_rate": 3.361875434588259e-05, "loss": 0.4661, "step": 192880 }, { "epoch": 9.58031191020165, "grad_norm": 0.201171875, "learning_rate": 3.357902056223304e-05, "loss": 0.4859, "step": 192890 }, { "epoch": 9.580808582497268, "grad_norm": 0.2060546875, "learning_rate": 3.353928677858349e-05, "loss": 0.4815, "step": 192900 }, { "epoch": 9.581305254792888, "grad_norm": 0.1865234375, "learning_rate": 3.349955299493394e-05, "loss": 0.5095, "step": 192910 }, { "epoch": 9.581801927088508, "grad_norm": 0.1982421875, "learning_rate": 3.34598192112844e-05, "loss": 0.4854, "step": 192920 }, { "epoch": 9.582298599384126, "grad_norm": 0.236328125, "learning_rate": 3.342008542763485e-05, "loss": 0.5088, "step": 192930 }, { "epoch": 9.582795271679746, "grad_norm": 0.2021484375, "learning_rate": 3.33803516439853e-05, "loss": 0.4978, "step": 192940 }, { "epoch": 9.583291943975365, "grad_norm": 0.2021484375, "learning_rate": 3.334061786033575e-05, "loss": 0.5023, "step": 192950 }, { "epoch": 9.583788616270985, "grad_norm": 0.1904296875, "learning_rate": 3.3300884076686204e-05, "loss": 0.4632, "step": 192960 }, { "epoch": 9.584285288566603, "grad_norm": 0.212890625, "learning_rate": 3.326115029303666e-05, "loss": 0.4744, "step": 192970 }, { "epoch": 9.584781960862223, "grad_norm": 0.2001953125, "learning_rate": 3.322141650938711e-05, "loss": 0.4646, "step": 192980 }, { "epoch": 9.585278633157843, "grad_norm": 0.23046875, "learning_rate": 3.3181682725737563e-05, "loss": 0.4881, "step": 192990 }, { "epoch": 9.585775305453462, "grad_norm": 0.23828125, "learning_rate": 3.3141948942088014e-05, "loss": 0.4947, "step": 193000 }, { "epoch": 9.586271977749082, "grad_norm": 0.2138671875, "learning_rate": 3.3102215158438466e-05, "loss": 0.4865, "step": 193010 }, { "epoch": 9.5867686500447, "grad_norm": 0.1982421875, "learning_rate": 3.3062481374788917e-05, "loss": 0.4791, "step": 193020 }, { "epoch": 9.58726532234032, "grad_norm": 0.224609375, "learning_rate": 3.302274759113937e-05, "loss": 0.4772, "step": 193030 }, { "epoch": 9.587761994635938, "grad_norm": 0.26171875, "learning_rate": 3.298301380748982e-05, "loss": 0.5063, "step": 193040 }, { "epoch": 9.588258666931559, "grad_norm": 0.2216796875, "learning_rate": 3.294328002384027e-05, "loss": 0.4819, "step": 193050 }, { "epoch": 9.588755339227179, "grad_norm": 0.1953125, "learning_rate": 3.290354624019073e-05, "loss": 0.5097, "step": 193060 }, { "epoch": 9.589252011522797, "grad_norm": 0.21484375, "learning_rate": 3.286381245654118e-05, "loss": 0.4994, "step": 193070 }, { "epoch": 9.589748683818417, "grad_norm": 0.251953125, "learning_rate": 3.282407867289163e-05, "loss": 0.4718, "step": 193080 }, { "epoch": 9.590245356114036, "grad_norm": 0.24609375, "learning_rate": 3.278434488924208e-05, "loss": 0.5238, "step": 193090 }, { "epoch": 9.590742028409656, "grad_norm": 0.205078125, "learning_rate": 3.274461110559253e-05, "loss": 0.4581, "step": 193100 }, { "epoch": 9.591238700705274, "grad_norm": 0.2392578125, "learning_rate": 3.270487732194298e-05, "loss": 0.454, "step": 193110 }, { "epoch": 9.591735373000894, "grad_norm": 0.2041015625, "learning_rate": 3.2665143538293434e-05, "loss": 0.4849, "step": 193120 }, { "epoch": 9.592232045296514, "grad_norm": 0.197265625, "learning_rate": 3.2625409754643885e-05, "loss": 0.5066, "step": 193130 }, { "epoch": 9.592728717592133, "grad_norm": 0.1923828125, "learning_rate": 3.258567597099434e-05, "loss": 0.4867, "step": 193140 }, { "epoch": 9.593225389887753, "grad_norm": 0.1962890625, "learning_rate": 3.2545942187344793e-05, "loss": 0.4706, "step": 193150 }, { "epoch": 9.593722062183371, "grad_norm": 0.1982421875, "learning_rate": 3.2506208403695244e-05, "loss": 0.4917, "step": 193160 }, { "epoch": 9.594218734478991, "grad_norm": 0.212890625, "learning_rate": 3.2466474620045696e-05, "loss": 0.4563, "step": 193170 }, { "epoch": 9.59471540677461, "grad_norm": 0.2119140625, "learning_rate": 3.2426740836396147e-05, "loss": 0.4949, "step": 193180 }, { "epoch": 9.59521207907023, "grad_norm": 0.2119140625, "learning_rate": 3.23870070527466e-05, "loss": 0.5061, "step": 193190 }, { "epoch": 9.595708751365848, "grad_norm": 0.205078125, "learning_rate": 3.234727326909705e-05, "loss": 0.4821, "step": 193200 }, { "epoch": 9.596205423661468, "grad_norm": 0.19140625, "learning_rate": 3.23075394854475e-05, "loss": 0.4701, "step": 193210 }, { "epoch": 9.596702095957088, "grad_norm": 0.224609375, "learning_rate": 3.226780570179796e-05, "loss": 0.5114, "step": 193220 }, { "epoch": 9.597198768252706, "grad_norm": 0.205078125, "learning_rate": 3.222807191814841e-05, "loss": 0.4598, "step": 193230 }, { "epoch": 9.597695440548327, "grad_norm": 0.2373046875, "learning_rate": 3.218833813449886e-05, "loss": 0.4873, "step": 193240 }, { "epoch": 9.598192112843945, "grad_norm": 0.21484375, "learning_rate": 3.214860435084931e-05, "loss": 0.4576, "step": 193250 }, { "epoch": 9.598688785139565, "grad_norm": 0.236328125, "learning_rate": 3.210887056719976e-05, "loss": 0.5117, "step": 193260 }, { "epoch": 9.599185457435183, "grad_norm": 0.20703125, "learning_rate": 3.206913678355021e-05, "loss": 0.4732, "step": 193270 }, { "epoch": 9.599682129730803, "grad_norm": 0.2255859375, "learning_rate": 3.2029402999900664e-05, "loss": 0.4554, "step": 193280 }, { "epoch": 9.600178802026424, "grad_norm": 0.2265625, "learning_rate": 3.1989669216251115e-05, "loss": 0.4936, "step": 193290 }, { "epoch": 9.600675474322042, "grad_norm": 0.197265625, "learning_rate": 3.1949935432601566e-05, "loss": 0.4698, "step": 193300 }, { "epoch": 9.601172146617662, "grad_norm": 0.2080078125, "learning_rate": 3.1910201648952023e-05, "loss": 0.4765, "step": 193310 }, { "epoch": 9.60166881891328, "grad_norm": 0.2109375, "learning_rate": 3.1870467865302474e-05, "loss": 0.487, "step": 193320 }, { "epoch": 9.6021654912089, "grad_norm": 0.2197265625, "learning_rate": 3.1830734081652925e-05, "loss": 0.501, "step": 193330 }, { "epoch": 9.602662163504519, "grad_norm": 0.2080078125, "learning_rate": 3.179100029800338e-05, "loss": 0.4816, "step": 193340 }, { "epoch": 9.603158835800139, "grad_norm": 0.23046875, "learning_rate": 3.1751266514353834e-05, "loss": 0.4811, "step": 193350 }, { "epoch": 9.603655508095759, "grad_norm": 0.19921875, "learning_rate": 3.1711532730704285e-05, "loss": 0.4869, "step": 193360 }, { "epoch": 9.604152180391377, "grad_norm": 0.2333984375, "learning_rate": 3.1671798947054736e-05, "loss": 0.4844, "step": 193370 }, { "epoch": 9.604648852686998, "grad_norm": 0.228515625, "learning_rate": 3.163206516340519e-05, "loss": 0.5069, "step": 193380 }, { "epoch": 9.605145524982616, "grad_norm": 0.2060546875, "learning_rate": 3.159233137975564e-05, "loss": 0.5005, "step": 193390 }, { "epoch": 9.605642197278236, "grad_norm": 0.2080078125, "learning_rate": 3.155259759610609e-05, "loss": 0.5013, "step": 193400 }, { "epoch": 9.606138869573854, "grad_norm": 0.2109375, "learning_rate": 3.151286381245655e-05, "loss": 0.5013, "step": 193410 }, { "epoch": 9.606635541869474, "grad_norm": 0.2412109375, "learning_rate": 3.1473130028807e-05, "loss": 0.505, "step": 193420 }, { "epoch": 9.607132214165095, "grad_norm": 0.1875, "learning_rate": 3.143339624515745e-05, "loss": 0.4849, "step": 193430 }, { "epoch": 9.607628886460713, "grad_norm": 0.20703125, "learning_rate": 3.13936624615079e-05, "loss": 0.5046, "step": 193440 }, { "epoch": 9.608125558756333, "grad_norm": 0.203125, "learning_rate": 3.135392867785835e-05, "loss": 0.4999, "step": 193450 }, { "epoch": 9.608622231051951, "grad_norm": 0.21875, "learning_rate": 3.13141948942088e-05, "loss": 0.4982, "step": 193460 }, { "epoch": 9.609118903347571, "grad_norm": 0.1962890625, "learning_rate": 3.1274461110559253e-05, "loss": 0.5025, "step": 193470 }, { "epoch": 9.60961557564319, "grad_norm": 0.2060546875, "learning_rate": 3.1234727326909704e-05, "loss": 0.4829, "step": 193480 }, { "epoch": 9.61011224793881, "grad_norm": 0.234375, "learning_rate": 3.119499354326016e-05, "loss": 0.4738, "step": 193490 }, { "epoch": 9.61060892023443, "grad_norm": 0.197265625, "learning_rate": 3.115525975961061e-05, "loss": 0.4664, "step": 193500 }, { "epoch": 9.611105592530048, "grad_norm": 0.2080078125, "learning_rate": 3.1115525975961064e-05, "loss": 0.4688, "step": 193510 }, { "epoch": 9.611602264825668, "grad_norm": 0.2041015625, "learning_rate": 3.1075792192311515e-05, "loss": 0.4797, "step": 193520 }, { "epoch": 9.612098937121287, "grad_norm": 0.23828125, "learning_rate": 3.1036058408661966e-05, "loss": 0.4989, "step": 193530 }, { "epoch": 9.612595609416907, "grad_norm": 0.2158203125, "learning_rate": 3.099632462501242e-05, "loss": 0.5014, "step": 193540 }, { "epoch": 9.613092281712525, "grad_norm": 0.2109375, "learning_rate": 3.095659084136287e-05, "loss": 0.4888, "step": 193550 }, { "epoch": 9.613588954008145, "grad_norm": 0.2119140625, "learning_rate": 3.091685705771332e-05, "loss": 0.5036, "step": 193560 }, { "epoch": 9.614085626303766, "grad_norm": 0.201171875, "learning_rate": 3.087712327406378e-05, "loss": 0.4386, "step": 193570 }, { "epoch": 9.614582298599384, "grad_norm": 0.2197265625, "learning_rate": 3.083738949041423e-05, "loss": 0.5126, "step": 193580 }, { "epoch": 9.615078970895004, "grad_norm": 0.2373046875, "learning_rate": 3.079765570676468e-05, "loss": 0.4903, "step": 193590 }, { "epoch": 9.615575643190622, "grad_norm": 0.22265625, "learning_rate": 3.075792192311513e-05, "loss": 0.4977, "step": 193600 }, { "epoch": 9.616072315486242, "grad_norm": 0.2119140625, "learning_rate": 3.071818813946558e-05, "loss": 0.479, "step": 193610 }, { "epoch": 9.61656898778186, "grad_norm": 0.212890625, "learning_rate": 3.067845435581603e-05, "loss": 0.5186, "step": 193620 }, { "epoch": 9.61706566007748, "grad_norm": 0.2041015625, "learning_rate": 3.0638720572166483e-05, "loss": 0.52, "step": 193630 }, { "epoch": 9.617562332373101, "grad_norm": 0.208984375, "learning_rate": 3.0598986788516934e-05, "loss": 0.4909, "step": 193640 }, { "epoch": 9.61805900466872, "grad_norm": 0.2119140625, "learning_rate": 3.0559253004867385e-05, "loss": 0.4971, "step": 193650 }, { "epoch": 9.61855567696434, "grad_norm": 0.224609375, "learning_rate": 3.051951922121784e-05, "loss": 0.492, "step": 193660 }, { "epoch": 9.619052349259958, "grad_norm": 0.212890625, "learning_rate": 3.047978543756829e-05, "loss": 0.4944, "step": 193670 }, { "epoch": 9.619549021555578, "grad_norm": 0.275390625, "learning_rate": 3.0440051653918745e-05, "loss": 0.5009, "step": 193680 }, { "epoch": 9.620045693851196, "grad_norm": 0.2060546875, "learning_rate": 3.0400317870269196e-05, "loss": 0.5048, "step": 193690 }, { "epoch": 9.620542366146816, "grad_norm": 0.1962890625, "learning_rate": 3.0360584086619647e-05, "loss": 0.47, "step": 193700 }, { "epoch": 9.621039038442436, "grad_norm": 0.189453125, "learning_rate": 3.03208503029701e-05, "loss": 0.5024, "step": 193710 }, { "epoch": 9.621535710738055, "grad_norm": 0.1923828125, "learning_rate": 3.0281116519320553e-05, "loss": 0.4578, "step": 193720 }, { "epoch": 9.622032383033675, "grad_norm": 0.2001953125, "learning_rate": 3.0241382735671004e-05, "loss": 0.5051, "step": 193730 }, { "epoch": 9.622529055329293, "grad_norm": 0.2109375, "learning_rate": 3.0201648952021455e-05, "loss": 0.4967, "step": 193740 }, { "epoch": 9.623025727624913, "grad_norm": 0.1806640625, "learning_rate": 3.0161915168371906e-05, "loss": 0.4644, "step": 193750 }, { "epoch": 9.623522399920532, "grad_norm": 0.2451171875, "learning_rate": 3.0122181384722364e-05, "loss": 0.468, "step": 193760 }, { "epoch": 9.624019072216152, "grad_norm": 0.2060546875, "learning_rate": 3.0082447601072815e-05, "loss": 0.4674, "step": 193770 }, { "epoch": 9.624515744511772, "grad_norm": 0.2119140625, "learning_rate": 3.004271381742327e-05, "loss": 0.4686, "step": 193780 }, { "epoch": 9.62501241680739, "grad_norm": 0.2021484375, "learning_rate": 3.000298003377372e-05, "loss": 0.5239, "step": 193790 }, { "epoch": 9.62550908910301, "grad_norm": 0.2138671875, "learning_rate": 2.996324625012417e-05, "loss": 0.5107, "step": 193800 }, { "epoch": 9.626005761398629, "grad_norm": 0.2021484375, "learning_rate": 2.9923512466474622e-05, "loss": 0.5074, "step": 193810 }, { "epoch": 9.626502433694249, "grad_norm": 0.2119140625, "learning_rate": 2.9883778682825077e-05, "loss": 0.4984, "step": 193820 }, { "epoch": 9.626999105989867, "grad_norm": 0.2216796875, "learning_rate": 2.9844044899175528e-05, "loss": 0.4873, "step": 193830 }, { "epoch": 9.627495778285487, "grad_norm": 0.2080078125, "learning_rate": 2.980431111552598e-05, "loss": 0.5125, "step": 193840 }, { "epoch": 9.627992450581107, "grad_norm": 0.1943359375, "learning_rate": 2.976457733187643e-05, "loss": 0.4712, "step": 193850 }, { "epoch": 9.628489122876726, "grad_norm": 0.2197265625, "learning_rate": 2.9724843548226884e-05, "loss": 0.4807, "step": 193860 }, { "epoch": 9.628985795172346, "grad_norm": 0.2275390625, "learning_rate": 2.9685109764577335e-05, "loss": 0.4664, "step": 193870 }, { "epoch": 9.629482467467964, "grad_norm": 0.251953125, "learning_rate": 2.9645375980927786e-05, "loss": 0.515, "step": 193880 }, { "epoch": 9.629979139763584, "grad_norm": 0.208984375, "learning_rate": 2.9605642197278237e-05, "loss": 0.4993, "step": 193890 }, { "epoch": 9.630475812059203, "grad_norm": 0.2294921875, "learning_rate": 2.956590841362869e-05, "loss": 0.5217, "step": 193900 }, { "epoch": 9.630972484354823, "grad_norm": 0.2294921875, "learning_rate": 2.9526174629979143e-05, "loss": 0.4946, "step": 193910 }, { "epoch": 9.631469156650443, "grad_norm": 0.2080078125, "learning_rate": 2.9486440846329594e-05, "loss": 0.4654, "step": 193920 }, { "epoch": 9.631965828946061, "grad_norm": 0.189453125, "learning_rate": 2.9446707062680045e-05, "loss": 0.4723, "step": 193930 }, { "epoch": 9.632462501241681, "grad_norm": 0.2001953125, "learning_rate": 2.94069732790305e-05, "loss": 0.5216, "step": 193940 }, { "epoch": 9.6329591735373, "grad_norm": 0.2099609375, "learning_rate": 2.936723949538095e-05, "loss": 0.4662, "step": 193950 }, { "epoch": 9.63345584583292, "grad_norm": 0.1923828125, "learning_rate": 2.93275057117314e-05, "loss": 0.4802, "step": 193960 }, { "epoch": 9.633952518128538, "grad_norm": 0.212890625, "learning_rate": 2.9287771928081852e-05, "loss": 0.4854, "step": 193970 }, { "epoch": 9.634449190424158, "grad_norm": 0.2060546875, "learning_rate": 2.9248038144432303e-05, "loss": 0.5109, "step": 193980 }, { "epoch": 9.634945862719778, "grad_norm": 0.19921875, "learning_rate": 2.9208304360782758e-05, "loss": 0.4891, "step": 193990 }, { "epoch": 9.635442535015397, "grad_norm": 0.2216796875, "learning_rate": 2.916857057713321e-05, "loss": 0.4895, "step": 194000 }, { "epoch": 9.635939207311017, "grad_norm": 0.2021484375, "learning_rate": 2.912883679348366e-05, "loss": 0.5063, "step": 194010 }, { "epoch": 9.636435879606635, "grad_norm": 0.2041015625, "learning_rate": 2.908910300983411e-05, "loss": 0.473, "step": 194020 }, { "epoch": 9.636932551902255, "grad_norm": 0.203125, "learning_rate": 2.9049369226184565e-05, "loss": 0.5293, "step": 194030 }, { "epoch": 9.637429224197874, "grad_norm": 0.19921875, "learning_rate": 2.9009635442535016e-05, "loss": 0.478, "step": 194040 }, { "epoch": 9.637925896493494, "grad_norm": 0.236328125, "learning_rate": 2.8969901658885467e-05, "loss": 0.4794, "step": 194050 }, { "epoch": 9.638422568789114, "grad_norm": 0.203125, "learning_rate": 2.8930167875235918e-05, "loss": 0.5039, "step": 194060 }, { "epoch": 9.638919241084732, "grad_norm": 0.2001953125, "learning_rate": 2.8890434091586373e-05, "loss": 0.4825, "step": 194070 }, { "epoch": 9.639415913380352, "grad_norm": 0.2373046875, "learning_rate": 2.8850700307936824e-05, "loss": 0.4791, "step": 194080 }, { "epoch": 9.63991258567597, "grad_norm": 0.1962890625, "learning_rate": 2.8810966524287275e-05, "loss": 0.4545, "step": 194090 }, { "epoch": 9.64040925797159, "grad_norm": 0.2099609375, "learning_rate": 2.8771232740637726e-05, "loss": 0.5037, "step": 194100 }, { "epoch": 9.640905930267209, "grad_norm": 0.26953125, "learning_rate": 2.873149895698818e-05, "loss": 0.4948, "step": 194110 }, { "epoch": 9.64140260256283, "grad_norm": 0.2001953125, "learning_rate": 2.869176517333863e-05, "loss": 0.4682, "step": 194120 }, { "epoch": 9.64189927485845, "grad_norm": 0.197265625, "learning_rate": 2.8652031389689082e-05, "loss": 0.4699, "step": 194130 }, { "epoch": 9.642395947154068, "grad_norm": 0.193359375, "learning_rate": 2.8612297606039533e-05, "loss": 0.5437, "step": 194140 }, { "epoch": 9.642892619449688, "grad_norm": 0.2236328125, "learning_rate": 2.8572563822389988e-05, "loss": 0.4852, "step": 194150 }, { "epoch": 9.643389291745306, "grad_norm": 0.2138671875, "learning_rate": 2.853283003874044e-05, "loss": 0.5102, "step": 194160 }, { "epoch": 9.643885964040926, "grad_norm": 0.232421875, "learning_rate": 2.849309625509089e-05, "loss": 0.5137, "step": 194170 }, { "epoch": 9.644382636336545, "grad_norm": 0.228515625, "learning_rate": 2.845336247144134e-05, "loss": 0.4675, "step": 194180 }, { "epoch": 9.644879308632165, "grad_norm": 0.1943359375, "learning_rate": 2.84136286877918e-05, "loss": 0.4924, "step": 194190 }, { "epoch": 9.645375980927783, "grad_norm": 0.212890625, "learning_rate": 2.837389490414225e-05, "loss": 0.5043, "step": 194200 }, { "epoch": 9.645872653223403, "grad_norm": 0.22265625, "learning_rate": 2.8334161120492704e-05, "loss": 0.4471, "step": 194210 }, { "epoch": 9.646369325519023, "grad_norm": 0.2314453125, "learning_rate": 2.8294427336843155e-05, "loss": 0.4832, "step": 194220 }, { "epoch": 9.646865997814642, "grad_norm": 0.21484375, "learning_rate": 2.8254693553193606e-05, "loss": 0.487, "step": 194230 }, { "epoch": 9.647362670110262, "grad_norm": 0.2119140625, "learning_rate": 2.8214959769544057e-05, "loss": 0.4722, "step": 194240 }, { "epoch": 9.64785934240588, "grad_norm": 0.1953125, "learning_rate": 2.817522598589451e-05, "loss": 0.4637, "step": 194250 }, { "epoch": 9.6483560147015, "grad_norm": 0.208984375, "learning_rate": 2.8135492202244962e-05, "loss": 0.4972, "step": 194260 }, { "epoch": 9.648852686997119, "grad_norm": 0.2392578125, "learning_rate": 2.8095758418595413e-05, "loss": 0.4992, "step": 194270 }, { "epoch": 9.649349359292739, "grad_norm": 0.1953125, "learning_rate": 2.8056024634945865e-05, "loss": 0.4637, "step": 194280 }, { "epoch": 9.649846031588359, "grad_norm": 0.22265625, "learning_rate": 2.801629085129632e-05, "loss": 0.4976, "step": 194290 }, { "epoch": 9.650342703883977, "grad_norm": 0.19921875, "learning_rate": 2.797655706764677e-05, "loss": 0.4965, "step": 194300 }, { "epoch": 9.650839376179597, "grad_norm": 0.2001953125, "learning_rate": 2.793682328399722e-05, "loss": 0.4809, "step": 194310 }, { "epoch": 9.651336048475216, "grad_norm": 0.21875, "learning_rate": 2.7897089500347672e-05, "loss": 0.5083, "step": 194320 }, { "epoch": 9.651832720770836, "grad_norm": 0.1962890625, "learning_rate": 2.7857355716698123e-05, "loss": 0.4955, "step": 194330 }, { "epoch": 9.652329393066454, "grad_norm": 0.2041015625, "learning_rate": 2.7817621933048577e-05, "loss": 0.4847, "step": 194340 }, { "epoch": 9.652826065362074, "grad_norm": 0.220703125, "learning_rate": 2.777788814939903e-05, "loss": 0.4941, "step": 194350 }, { "epoch": 9.653322737657694, "grad_norm": 0.22265625, "learning_rate": 2.773815436574948e-05, "loss": 0.5079, "step": 194360 }, { "epoch": 9.653819409953313, "grad_norm": 0.23828125, "learning_rate": 2.769842058209993e-05, "loss": 0.5025, "step": 194370 }, { "epoch": 9.654316082248933, "grad_norm": 0.21875, "learning_rate": 2.7658686798450385e-05, "loss": 0.4983, "step": 194380 }, { "epoch": 9.654812754544551, "grad_norm": 0.2060546875, "learning_rate": 2.7618953014800836e-05, "loss": 0.4958, "step": 194390 }, { "epoch": 9.655309426840171, "grad_norm": 0.205078125, "learning_rate": 2.7579219231151287e-05, "loss": 0.4882, "step": 194400 }, { "epoch": 9.65580609913579, "grad_norm": 0.2294921875, "learning_rate": 2.7539485447501738e-05, "loss": 0.4729, "step": 194410 }, { "epoch": 9.65630277143141, "grad_norm": 0.2021484375, "learning_rate": 2.7499751663852192e-05, "loss": 0.4555, "step": 194420 }, { "epoch": 9.65679944372703, "grad_norm": 0.2158203125, "learning_rate": 2.7460017880202643e-05, "loss": 0.4827, "step": 194430 }, { "epoch": 9.657296116022648, "grad_norm": 0.220703125, "learning_rate": 2.7420284096553094e-05, "loss": 0.5014, "step": 194440 }, { "epoch": 9.657792788318268, "grad_norm": 0.197265625, "learning_rate": 2.7380550312903546e-05, "loss": 0.4615, "step": 194450 }, { "epoch": 9.658289460613886, "grad_norm": 0.2314453125, "learning_rate": 2.7340816529254e-05, "loss": 0.4873, "step": 194460 }, { "epoch": 9.658786132909507, "grad_norm": 0.20703125, "learning_rate": 2.730108274560445e-05, "loss": 0.4867, "step": 194470 }, { "epoch": 9.659282805205125, "grad_norm": 0.208984375, "learning_rate": 2.7261348961954902e-05, "loss": 0.5168, "step": 194480 }, { "epoch": 9.659779477500745, "grad_norm": 0.197265625, "learning_rate": 2.7221615178305353e-05, "loss": 0.4792, "step": 194490 }, { "epoch": 9.660276149796365, "grad_norm": 0.2041015625, "learning_rate": 2.7181881394655807e-05, "loss": 0.4787, "step": 194500 }, { "epoch": 9.660772822091984, "grad_norm": 0.2138671875, "learning_rate": 2.714214761100626e-05, "loss": 0.5178, "step": 194510 }, { "epoch": 9.661269494387604, "grad_norm": 0.2333984375, "learning_rate": 2.710241382735671e-05, "loss": 0.4603, "step": 194520 }, { "epoch": 9.661766166683222, "grad_norm": 0.22265625, "learning_rate": 2.706268004370716e-05, "loss": 0.5243, "step": 194530 }, { "epoch": 9.662262838978842, "grad_norm": 0.2197265625, "learning_rate": 2.7022946260057615e-05, "loss": 0.4726, "step": 194540 }, { "epoch": 9.66275951127446, "grad_norm": 0.21484375, "learning_rate": 2.6983212476408066e-05, "loss": 0.488, "step": 194550 }, { "epoch": 9.66325618357008, "grad_norm": 0.2373046875, "learning_rate": 2.6943478692758517e-05, "loss": 0.5003, "step": 194560 }, { "epoch": 9.663752855865699, "grad_norm": 0.2177734375, "learning_rate": 2.6903744909108968e-05, "loss": 0.4723, "step": 194570 }, { "epoch": 9.664249528161319, "grad_norm": 0.1923828125, "learning_rate": 2.686401112545942e-05, "loss": 0.4933, "step": 194580 }, { "epoch": 9.664746200456939, "grad_norm": 0.1865234375, "learning_rate": 2.6824277341809873e-05, "loss": 0.4836, "step": 194590 }, { "epoch": 9.665242872752557, "grad_norm": 0.1923828125, "learning_rate": 2.6784543558160324e-05, "loss": 0.4715, "step": 194600 }, { "epoch": 9.665739545048178, "grad_norm": 0.2138671875, "learning_rate": 2.6744809774510782e-05, "loss": 0.4682, "step": 194610 }, { "epoch": 9.666236217343796, "grad_norm": 0.2265625, "learning_rate": 2.6705075990861233e-05, "loss": 0.5242, "step": 194620 }, { "epoch": 9.666732889639416, "grad_norm": 0.2021484375, "learning_rate": 2.6665342207211684e-05, "loss": 0.5015, "step": 194630 }, { "epoch": 9.667229561935034, "grad_norm": 0.2001953125, "learning_rate": 2.662560842356214e-05, "loss": 0.504, "step": 194640 }, { "epoch": 9.667726234230654, "grad_norm": 0.2021484375, "learning_rate": 2.658587463991259e-05, "loss": 0.467, "step": 194650 }, { "epoch": 9.668222906526275, "grad_norm": 0.20703125, "learning_rate": 2.654614085626304e-05, "loss": 0.4887, "step": 194660 }, { "epoch": 9.668719578821893, "grad_norm": 0.1962890625, "learning_rate": 2.6506407072613492e-05, "loss": 0.4907, "step": 194670 }, { "epoch": 9.669216251117513, "grad_norm": 0.234375, "learning_rate": 2.6466673288963946e-05, "loss": 0.5036, "step": 194680 }, { "epoch": 9.669712923413131, "grad_norm": 0.25390625, "learning_rate": 2.6426939505314397e-05, "loss": 0.4953, "step": 194690 }, { "epoch": 9.670209595708751, "grad_norm": 0.2099609375, "learning_rate": 2.6387205721664848e-05, "loss": 0.5137, "step": 194700 }, { "epoch": 9.67070626800437, "grad_norm": 0.1865234375, "learning_rate": 2.63474719380153e-05, "loss": 0.4861, "step": 194710 }, { "epoch": 9.67120294029999, "grad_norm": 0.205078125, "learning_rate": 2.630773815436575e-05, "loss": 0.4877, "step": 194720 }, { "epoch": 9.67169961259561, "grad_norm": 0.208984375, "learning_rate": 2.6268004370716205e-05, "loss": 0.486, "step": 194730 }, { "epoch": 9.672196284891228, "grad_norm": 0.1943359375, "learning_rate": 2.6228270587066656e-05, "loss": 0.4809, "step": 194740 }, { "epoch": 9.672692957186849, "grad_norm": 0.201171875, "learning_rate": 2.6188536803417107e-05, "loss": 0.4748, "step": 194750 }, { "epoch": 9.673189629482467, "grad_norm": 0.18359375, "learning_rate": 2.6148803019767558e-05, "loss": 0.4607, "step": 194760 }, { "epoch": 9.673686301778087, "grad_norm": 0.2197265625, "learning_rate": 2.6109069236118012e-05, "loss": 0.4913, "step": 194770 }, { "epoch": 9.674182974073705, "grad_norm": 0.22265625, "learning_rate": 2.6069335452468463e-05, "loss": 0.4711, "step": 194780 }, { "epoch": 9.674679646369325, "grad_norm": 0.1962890625, "learning_rate": 2.6029601668818914e-05, "loss": 0.4707, "step": 194790 }, { "epoch": 9.675176318664946, "grad_norm": 0.25, "learning_rate": 2.5989867885169365e-05, "loss": 0.48, "step": 194800 }, { "epoch": 9.675672990960564, "grad_norm": 0.22265625, "learning_rate": 2.595013410151982e-05, "loss": 0.4536, "step": 194810 }, { "epoch": 9.676169663256184, "grad_norm": 0.1982421875, "learning_rate": 2.591040031787027e-05, "loss": 0.4831, "step": 194820 }, { "epoch": 9.676666335551802, "grad_norm": 0.20703125, "learning_rate": 2.5870666534220722e-05, "loss": 0.4773, "step": 194830 }, { "epoch": 9.677163007847422, "grad_norm": 0.2080078125, "learning_rate": 2.5830932750571173e-05, "loss": 0.502, "step": 194840 }, { "epoch": 9.67765968014304, "grad_norm": 0.23046875, "learning_rate": 2.5791198966921627e-05, "loss": 0.4914, "step": 194850 }, { "epoch": 9.678156352438661, "grad_norm": 0.21484375, "learning_rate": 2.5751465183272078e-05, "loss": 0.4692, "step": 194860 }, { "epoch": 9.678653024734281, "grad_norm": 0.21484375, "learning_rate": 2.571173139962253e-05, "loss": 0.476, "step": 194870 }, { "epoch": 9.6791496970299, "grad_norm": 0.208984375, "learning_rate": 2.567199761597298e-05, "loss": 0.4852, "step": 194880 }, { "epoch": 9.67964636932552, "grad_norm": 0.2119140625, "learning_rate": 2.5632263832323435e-05, "loss": 0.5134, "step": 194890 }, { "epoch": 9.680143041621138, "grad_norm": 0.1943359375, "learning_rate": 2.5592530048673886e-05, "loss": 0.4981, "step": 194900 }, { "epoch": 9.680639713916758, "grad_norm": 0.2236328125, "learning_rate": 2.5552796265024337e-05, "loss": 0.4656, "step": 194910 }, { "epoch": 9.681136386212376, "grad_norm": 0.2421875, "learning_rate": 2.5513062481374788e-05, "loss": 0.4665, "step": 194920 }, { "epoch": 9.681633058507996, "grad_norm": 0.2138671875, "learning_rate": 2.5473328697725242e-05, "loss": 0.5203, "step": 194930 }, { "epoch": 9.682129730803616, "grad_norm": 0.205078125, "learning_rate": 2.5433594914075693e-05, "loss": 0.526, "step": 194940 }, { "epoch": 9.682626403099235, "grad_norm": 0.2119140625, "learning_rate": 2.5393861130426144e-05, "loss": 0.5292, "step": 194950 }, { "epoch": 9.683123075394855, "grad_norm": 0.23828125, "learning_rate": 2.5354127346776595e-05, "loss": 0.5179, "step": 194960 }, { "epoch": 9.683619747690473, "grad_norm": 0.205078125, "learning_rate": 2.5314393563127046e-05, "loss": 0.4797, "step": 194970 }, { "epoch": 9.684116419986093, "grad_norm": 0.21484375, "learning_rate": 2.52746597794775e-05, "loss": 0.4866, "step": 194980 }, { "epoch": 9.684613092281712, "grad_norm": 0.2197265625, "learning_rate": 2.5234925995827952e-05, "loss": 0.5057, "step": 194990 }, { "epoch": 9.685109764577332, "grad_norm": 0.2158203125, "learning_rate": 2.5195192212178403e-05, "loss": 0.5067, "step": 195000 }, { "epoch": 9.685606436872952, "grad_norm": 0.2060546875, "learning_rate": 2.5155458428528854e-05, "loss": 0.4849, "step": 195010 }, { "epoch": 9.68610310916857, "grad_norm": 0.2314453125, "learning_rate": 2.5115724644879308e-05, "loss": 0.4757, "step": 195020 }, { "epoch": 9.68659978146419, "grad_norm": 0.203125, "learning_rate": 2.5075990861229766e-05, "loss": 0.4971, "step": 195030 }, { "epoch": 9.687096453759809, "grad_norm": 0.1982421875, "learning_rate": 2.5036257077580217e-05, "loss": 0.5103, "step": 195040 }, { "epoch": 9.687593126055429, "grad_norm": 0.216796875, "learning_rate": 2.4996523293930665e-05, "loss": 0.4784, "step": 195050 }, { "epoch": 9.688089798351047, "grad_norm": 0.197265625, "learning_rate": 2.4956789510281116e-05, "loss": 0.501, "step": 195060 }, { "epoch": 9.688586470646667, "grad_norm": 0.2041015625, "learning_rate": 2.491705572663157e-05, "loss": 0.46, "step": 195070 }, { "epoch": 9.689083142942287, "grad_norm": 0.2109375, "learning_rate": 2.487732194298202e-05, "loss": 0.5057, "step": 195080 }, { "epoch": 9.689579815237906, "grad_norm": 0.2294921875, "learning_rate": 2.4837588159332472e-05, "loss": 0.5015, "step": 195090 }, { "epoch": 9.690076487533526, "grad_norm": 0.2119140625, "learning_rate": 2.4797854375682923e-05, "loss": 0.4884, "step": 195100 }, { "epoch": 9.690573159829144, "grad_norm": 0.193359375, "learning_rate": 2.4758120592033378e-05, "loss": 0.5, "step": 195110 }, { "epoch": 9.691069832124764, "grad_norm": 0.19921875, "learning_rate": 2.471838680838383e-05, "loss": 0.496, "step": 195120 }, { "epoch": 9.691566504420383, "grad_norm": 0.19921875, "learning_rate": 2.4678653024734283e-05, "loss": 0.5178, "step": 195130 }, { "epoch": 9.692063176716003, "grad_norm": 0.2578125, "learning_rate": 2.4638919241084734e-05, "loss": 0.4895, "step": 195140 }, { "epoch": 9.692559849011623, "grad_norm": 0.201171875, "learning_rate": 2.4599185457435185e-05, "loss": 0.4871, "step": 195150 }, { "epoch": 9.693056521307241, "grad_norm": 0.1982421875, "learning_rate": 2.455945167378564e-05, "loss": 0.5003, "step": 195160 }, { "epoch": 9.693553193602861, "grad_norm": 0.201171875, "learning_rate": 2.451971789013609e-05, "loss": 0.4816, "step": 195170 }, { "epoch": 9.69404986589848, "grad_norm": 0.19921875, "learning_rate": 2.447998410648654e-05, "loss": 0.4768, "step": 195180 }, { "epoch": 9.6945465381941, "grad_norm": 0.177734375, "learning_rate": 2.4440250322836993e-05, "loss": 0.4494, "step": 195190 }, { "epoch": 9.695043210489718, "grad_norm": 0.1962890625, "learning_rate": 2.4400516539187447e-05, "loss": 0.4537, "step": 195200 }, { "epoch": 9.695539882785338, "grad_norm": 0.1943359375, "learning_rate": 2.4360782755537898e-05, "loss": 0.4809, "step": 195210 }, { "epoch": 9.696036555080958, "grad_norm": 0.2041015625, "learning_rate": 2.432104897188835e-05, "loss": 0.46, "step": 195220 }, { "epoch": 9.696533227376577, "grad_norm": 0.197265625, "learning_rate": 2.42813151882388e-05, "loss": 0.4753, "step": 195230 }, { "epoch": 9.697029899672197, "grad_norm": 0.2080078125, "learning_rate": 2.4241581404589255e-05, "loss": 0.4959, "step": 195240 }, { "epoch": 9.697526571967815, "grad_norm": 0.205078125, "learning_rate": 2.4201847620939706e-05, "loss": 0.4751, "step": 195250 }, { "epoch": 9.698023244263435, "grad_norm": 0.212890625, "learning_rate": 2.4162113837290157e-05, "loss": 0.5048, "step": 195260 }, { "epoch": 9.698519916559054, "grad_norm": 0.197265625, "learning_rate": 2.4122380053640608e-05, "loss": 0.4776, "step": 195270 }, { "epoch": 9.699016588854674, "grad_norm": 0.1953125, "learning_rate": 2.4082646269991062e-05, "loss": 0.4882, "step": 195280 }, { "epoch": 9.699513261150294, "grad_norm": 0.208984375, "learning_rate": 2.4042912486341513e-05, "loss": 0.4751, "step": 195290 }, { "epoch": 9.700009933445912, "grad_norm": 0.208984375, "learning_rate": 2.4003178702691964e-05, "loss": 0.4944, "step": 195300 }, { "epoch": 9.700506605741532, "grad_norm": 0.2412109375, "learning_rate": 2.3963444919042415e-05, "loss": 0.5295, "step": 195310 }, { "epoch": 9.70100327803715, "grad_norm": 0.220703125, "learning_rate": 2.3923711135392866e-05, "loss": 0.5129, "step": 195320 }, { "epoch": 9.70149995033277, "grad_norm": 0.216796875, "learning_rate": 2.388397735174332e-05, "loss": 0.481, "step": 195330 }, { "epoch": 9.70199662262839, "grad_norm": 0.1962890625, "learning_rate": 2.3844243568093775e-05, "loss": 0.4714, "step": 195340 }, { "epoch": 9.70249329492401, "grad_norm": 0.212890625, "learning_rate": 2.3804509784444226e-05, "loss": 0.5126, "step": 195350 }, { "epoch": 9.70298996721963, "grad_norm": 0.2265625, "learning_rate": 2.3764776000794677e-05, "loss": 0.4717, "step": 195360 }, { "epoch": 9.703486639515248, "grad_norm": 0.2138671875, "learning_rate": 2.3725042217145128e-05, "loss": 0.5067, "step": 195370 }, { "epoch": 9.703983311810868, "grad_norm": 0.1962890625, "learning_rate": 2.3685308433495582e-05, "loss": 0.4728, "step": 195380 }, { "epoch": 9.704479984106486, "grad_norm": 0.2158203125, "learning_rate": 2.3645574649846034e-05, "loss": 0.4708, "step": 195390 }, { "epoch": 9.704976656402106, "grad_norm": 0.2119140625, "learning_rate": 2.3605840866196485e-05, "loss": 0.4978, "step": 195400 }, { "epoch": 9.705473328697725, "grad_norm": 0.19921875, "learning_rate": 2.3566107082546936e-05, "loss": 0.491, "step": 195410 }, { "epoch": 9.705970000993345, "grad_norm": 0.1953125, "learning_rate": 2.352637329889739e-05, "loss": 0.5006, "step": 195420 }, { "epoch": 9.706466673288965, "grad_norm": 0.197265625, "learning_rate": 2.348663951524784e-05, "loss": 0.4791, "step": 195430 }, { "epoch": 9.706963345584583, "grad_norm": 0.22265625, "learning_rate": 2.3446905731598292e-05, "loss": 0.5047, "step": 195440 }, { "epoch": 9.707460017880203, "grad_norm": 0.20703125, "learning_rate": 2.3407171947948743e-05, "loss": 0.4874, "step": 195450 }, { "epoch": 9.707956690175822, "grad_norm": 0.2119140625, "learning_rate": 2.3367438164299197e-05, "loss": 0.4617, "step": 195460 }, { "epoch": 9.708453362471442, "grad_norm": 0.20703125, "learning_rate": 2.332770438064965e-05, "loss": 0.4805, "step": 195470 }, { "epoch": 9.70895003476706, "grad_norm": 0.2578125, "learning_rate": 2.32879705970001e-05, "loss": 0.5012, "step": 195480 }, { "epoch": 9.70944670706268, "grad_norm": 0.19140625, "learning_rate": 2.324823681335055e-05, "loss": 0.4576, "step": 195490 }, { "epoch": 9.7099433793583, "grad_norm": 0.2080078125, "learning_rate": 2.3208503029701005e-05, "loss": 0.4666, "step": 195500 }, { "epoch": 9.710440051653919, "grad_norm": 0.1943359375, "learning_rate": 2.3168769246051456e-05, "loss": 0.4796, "step": 195510 }, { "epoch": 9.710936723949539, "grad_norm": 0.2021484375, "learning_rate": 2.3129035462401907e-05, "loss": 0.5225, "step": 195520 }, { "epoch": 9.711433396245157, "grad_norm": 0.2119140625, "learning_rate": 2.3089301678752358e-05, "loss": 0.5004, "step": 195530 }, { "epoch": 9.711930068540777, "grad_norm": 0.20703125, "learning_rate": 2.3049567895102812e-05, "loss": 0.4764, "step": 195540 }, { "epoch": 9.712426740836396, "grad_norm": 0.18359375, "learning_rate": 2.3009834111453267e-05, "loss": 0.496, "step": 195550 }, { "epoch": 9.712923413132016, "grad_norm": 0.232421875, "learning_rate": 2.2970100327803718e-05, "loss": 0.4956, "step": 195560 }, { "epoch": 9.713420085427634, "grad_norm": 0.2197265625, "learning_rate": 2.293036654415417e-05, "loss": 0.4903, "step": 195570 }, { "epoch": 9.713916757723254, "grad_norm": 0.2138671875, "learning_rate": 2.289063276050462e-05, "loss": 0.4937, "step": 195580 }, { "epoch": 9.714413430018874, "grad_norm": 0.259765625, "learning_rate": 2.2850898976855074e-05, "loss": 0.5052, "step": 195590 }, { "epoch": 9.714910102314493, "grad_norm": 0.2119140625, "learning_rate": 2.2811165193205525e-05, "loss": 0.514, "step": 195600 }, { "epoch": 9.715406774610113, "grad_norm": 0.2392578125, "learning_rate": 2.2771431409555976e-05, "loss": 0.5074, "step": 195610 }, { "epoch": 9.715903446905731, "grad_norm": 0.23046875, "learning_rate": 2.2731697625906427e-05, "loss": 0.4891, "step": 195620 }, { "epoch": 9.716400119201351, "grad_norm": 0.240234375, "learning_rate": 2.2691963842256882e-05, "loss": 0.4955, "step": 195630 }, { "epoch": 9.71689679149697, "grad_norm": 0.2060546875, "learning_rate": 2.2652230058607333e-05, "loss": 0.5105, "step": 195640 }, { "epoch": 9.71739346379259, "grad_norm": 0.2236328125, "learning_rate": 2.2612496274957784e-05, "loss": 0.4582, "step": 195650 }, { "epoch": 9.71789013608821, "grad_norm": 0.19140625, "learning_rate": 2.2572762491308235e-05, "loss": 0.4924, "step": 195660 }, { "epoch": 9.718386808383828, "grad_norm": 0.22265625, "learning_rate": 2.2533028707658686e-05, "loss": 0.4607, "step": 195670 }, { "epoch": 9.718883480679448, "grad_norm": 0.19921875, "learning_rate": 2.249329492400914e-05, "loss": 0.5146, "step": 195680 }, { "epoch": 9.719380152975067, "grad_norm": 0.2080078125, "learning_rate": 2.245356114035959e-05, "loss": 0.4757, "step": 195690 }, { "epoch": 9.719876825270687, "grad_norm": 0.2041015625, "learning_rate": 2.2413827356710042e-05, "loss": 0.469, "step": 195700 }, { "epoch": 9.720373497566305, "grad_norm": 0.2099609375, "learning_rate": 2.2374093573060493e-05, "loss": 0.4821, "step": 195710 }, { "epoch": 9.720870169861925, "grad_norm": 0.2197265625, "learning_rate": 2.2334359789410948e-05, "loss": 0.5017, "step": 195720 }, { "epoch": 9.721366842157545, "grad_norm": 0.212890625, "learning_rate": 2.22946260057614e-05, "loss": 0.5143, "step": 195730 }, { "epoch": 9.721863514453164, "grad_norm": 0.19921875, "learning_rate": 2.225489222211185e-05, "loss": 0.5029, "step": 195740 }, { "epoch": 9.722360186748784, "grad_norm": 0.2109375, "learning_rate": 2.22151584384623e-05, "loss": 0.4939, "step": 195750 }, { "epoch": 9.722856859044402, "grad_norm": 0.2099609375, "learning_rate": 2.2175424654812755e-05, "loss": 0.4829, "step": 195760 }, { "epoch": 9.723353531340022, "grad_norm": 0.19921875, "learning_rate": 2.213569087116321e-05, "loss": 0.4811, "step": 195770 }, { "epoch": 9.72385020363564, "grad_norm": 0.2041015625, "learning_rate": 2.209595708751366e-05, "loss": 0.4619, "step": 195780 }, { "epoch": 9.72434687593126, "grad_norm": 0.1884765625, "learning_rate": 2.2056223303864112e-05, "loss": 0.4848, "step": 195790 }, { "epoch": 9.72484354822688, "grad_norm": 0.1953125, "learning_rate": 2.2016489520214563e-05, "loss": 0.4892, "step": 195800 }, { "epoch": 9.725340220522499, "grad_norm": 0.2109375, "learning_rate": 2.1976755736565017e-05, "loss": 0.4849, "step": 195810 }, { "epoch": 9.72583689281812, "grad_norm": 0.1923828125, "learning_rate": 2.193702195291547e-05, "loss": 0.4736, "step": 195820 }, { "epoch": 9.726333565113737, "grad_norm": 0.21875, "learning_rate": 2.189728816926592e-05, "loss": 0.5, "step": 195830 }, { "epoch": 9.726830237409358, "grad_norm": 0.2265625, "learning_rate": 2.185755438561637e-05, "loss": 0.5078, "step": 195840 }, { "epoch": 9.727326909704976, "grad_norm": 0.279296875, "learning_rate": 2.1817820601966825e-05, "loss": 0.4833, "step": 195850 }, { "epoch": 9.727823582000596, "grad_norm": 0.2177734375, "learning_rate": 2.1778086818317276e-05, "loss": 0.5043, "step": 195860 }, { "epoch": 9.728320254296216, "grad_norm": 0.2421875, "learning_rate": 2.1738353034667727e-05, "loss": 0.4861, "step": 195870 }, { "epoch": 9.728816926591835, "grad_norm": 0.2216796875, "learning_rate": 2.1698619251018178e-05, "loss": 0.4772, "step": 195880 }, { "epoch": 9.729313598887455, "grad_norm": 0.212890625, "learning_rate": 2.1658885467368632e-05, "loss": 0.5058, "step": 195890 }, { "epoch": 9.729810271183073, "grad_norm": 0.193359375, "learning_rate": 2.1619151683719083e-05, "loss": 0.507, "step": 195900 }, { "epoch": 9.730306943478693, "grad_norm": 0.2294921875, "learning_rate": 2.1579417900069534e-05, "loss": 0.4756, "step": 195910 }, { "epoch": 9.730803615774311, "grad_norm": 0.2177734375, "learning_rate": 2.1539684116419985e-05, "loss": 0.4806, "step": 195920 }, { "epoch": 9.731300288069932, "grad_norm": 0.205078125, "learning_rate": 2.149995033277044e-05, "loss": 0.4856, "step": 195930 }, { "epoch": 9.731796960365552, "grad_norm": 0.18359375, "learning_rate": 2.146021654912089e-05, "loss": 0.4852, "step": 195940 }, { "epoch": 9.73229363266117, "grad_norm": 0.197265625, "learning_rate": 2.1420482765471342e-05, "loss": 0.5089, "step": 195950 }, { "epoch": 9.73279030495679, "grad_norm": 0.21484375, "learning_rate": 2.1380748981821793e-05, "loss": 0.4961, "step": 195960 }, { "epoch": 9.733286977252408, "grad_norm": 0.2099609375, "learning_rate": 2.1341015198172244e-05, "loss": 0.4767, "step": 195970 }, { "epoch": 9.733783649548029, "grad_norm": 0.1806640625, "learning_rate": 2.1301281414522702e-05, "loss": 0.4586, "step": 195980 }, { "epoch": 9.734280321843647, "grad_norm": 0.2265625, "learning_rate": 2.1261547630873153e-05, "loss": 0.4875, "step": 195990 }, { "epoch": 9.734776994139267, "grad_norm": 0.251953125, "learning_rate": 2.1221813847223604e-05, "loss": 0.5347, "step": 196000 }, { "epoch": 9.735273666434885, "grad_norm": 0.2265625, "learning_rate": 2.1182080063574055e-05, "loss": 0.4966, "step": 196010 }, { "epoch": 9.735770338730505, "grad_norm": 0.18359375, "learning_rate": 2.114234627992451e-05, "loss": 0.4585, "step": 196020 }, { "epoch": 9.736267011026126, "grad_norm": 0.25, "learning_rate": 2.110261249627496e-05, "loss": 0.4704, "step": 196030 }, { "epoch": 9.736763683321744, "grad_norm": 0.2255859375, "learning_rate": 2.106287871262541e-05, "loss": 0.4908, "step": 196040 }, { "epoch": 9.737260355617364, "grad_norm": 0.2109375, "learning_rate": 2.1023144928975862e-05, "loss": 0.4916, "step": 196050 }, { "epoch": 9.737757027912982, "grad_norm": 0.2177734375, "learning_rate": 2.0983411145326313e-05, "loss": 0.5012, "step": 196060 }, { "epoch": 9.738253700208602, "grad_norm": 0.224609375, "learning_rate": 2.0943677361676768e-05, "loss": 0.4638, "step": 196070 }, { "epoch": 9.73875037250422, "grad_norm": 0.1982421875, "learning_rate": 2.090394357802722e-05, "loss": 0.4841, "step": 196080 }, { "epoch": 9.739247044799841, "grad_norm": 0.19921875, "learning_rate": 2.086420979437767e-05, "loss": 0.4794, "step": 196090 }, { "epoch": 9.739743717095461, "grad_norm": 0.26171875, "learning_rate": 2.082447601072812e-05, "loss": 0.5137, "step": 196100 }, { "epoch": 9.74024038939108, "grad_norm": 0.1943359375, "learning_rate": 2.0784742227078575e-05, "loss": 0.4988, "step": 196110 }, { "epoch": 9.7407370616867, "grad_norm": 0.2255859375, "learning_rate": 2.0745008443429026e-05, "loss": 0.4938, "step": 196120 }, { "epoch": 9.741233733982318, "grad_norm": 0.310546875, "learning_rate": 2.0705274659779477e-05, "loss": 0.4864, "step": 196130 }, { "epoch": 9.741730406277938, "grad_norm": 0.20703125, "learning_rate": 2.0665540876129928e-05, "loss": 0.4936, "step": 196140 }, { "epoch": 9.742227078573556, "grad_norm": 0.2099609375, "learning_rate": 2.0625807092480383e-05, "loss": 0.4587, "step": 196150 }, { "epoch": 9.742723750869176, "grad_norm": 0.197265625, "learning_rate": 2.0586073308830834e-05, "loss": 0.4832, "step": 196160 }, { "epoch": 9.743220423164797, "grad_norm": 0.203125, "learning_rate": 2.0546339525181285e-05, "loss": 0.5016, "step": 196170 }, { "epoch": 9.743717095460415, "grad_norm": 0.2119140625, "learning_rate": 2.0506605741531736e-05, "loss": 0.4711, "step": 196180 }, { "epoch": 9.744213767756035, "grad_norm": 0.1943359375, "learning_rate": 2.046687195788219e-05, "loss": 0.48, "step": 196190 }, { "epoch": 9.744710440051653, "grad_norm": 0.2314453125, "learning_rate": 2.0427138174232645e-05, "loss": 0.4777, "step": 196200 }, { "epoch": 9.745207112347273, "grad_norm": 0.1982421875, "learning_rate": 2.0387404390583096e-05, "loss": 0.4614, "step": 196210 }, { "epoch": 9.745703784642892, "grad_norm": 0.216796875, "learning_rate": 2.0347670606933547e-05, "loss": 0.5297, "step": 196220 }, { "epoch": 9.746200456938512, "grad_norm": 0.21875, "learning_rate": 2.0307936823283998e-05, "loss": 0.5064, "step": 196230 }, { "epoch": 9.746697129234132, "grad_norm": 0.203125, "learning_rate": 2.0268203039634452e-05, "loss": 0.4925, "step": 196240 }, { "epoch": 9.74719380152975, "grad_norm": 0.212890625, "learning_rate": 2.0228469255984903e-05, "loss": 0.4921, "step": 196250 }, { "epoch": 9.74769047382537, "grad_norm": 0.2236328125, "learning_rate": 2.0188735472335354e-05, "loss": 0.4602, "step": 196260 }, { "epoch": 9.748187146120989, "grad_norm": 0.2177734375, "learning_rate": 2.0149001688685805e-05, "loss": 0.4966, "step": 196270 }, { "epoch": 9.748683818416609, "grad_norm": 0.2021484375, "learning_rate": 2.010926790503626e-05, "loss": 0.4766, "step": 196280 }, { "epoch": 9.749180490712227, "grad_norm": 0.2099609375, "learning_rate": 2.006953412138671e-05, "loss": 0.4918, "step": 196290 }, { "epoch": 9.749677163007847, "grad_norm": 0.1982421875, "learning_rate": 2.002980033773716e-05, "loss": 0.5133, "step": 196300 }, { "epoch": 9.750173835303467, "grad_norm": 0.2021484375, "learning_rate": 1.9990066554087613e-05, "loss": 0.4892, "step": 196310 }, { "epoch": 9.750670507599086, "grad_norm": 0.208984375, "learning_rate": 1.9950332770438067e-05, "loss": 0.4952, "step": 196320 }, { "epoch": 9.751167179894706, "grad_norm": 0.193359375, "learning_rate": 1.9910598986788518e-05, "loss": 0.5125, "step": 196330 }, { "epoch": 9.751663852190324, "grad_norm": 0.2216796875, "learning_rate": 1.987086520313897e-05, "loss": 0.5006, "step": 196340 }, { "epoch": 9.752160524485944, "grad_norm": 0.1943359375, "learning_rate": 1.983113141948942e-05, "loss": 0.5008, "step": 196350 }, { "epoch": 9.752657196781563, "grad_norm": 0.2041015625, "learning_rate": 1.979139763583987e-05, "loss": 0.4845, "step": 196360 }, { "epoch": 9.753153869077183, "grad_norm": 0.2197265625, "learning_rate": 1.9751663852190326e-05, "loss": 0.4881, "step": 196370 }, { "epoch": 9.753650541372803, "grad_norm": 0.2197265625, "learning_rate": 1.9711930068540777e-05, "loss": 0.5026, "step": 196380 }, { "epoch": 9.754147213668421, "grad_norm": 0.1982421875, "learning_rate": 1.9672196284891228e-05, "loss": 0.4911, "step": 196390 }, { "epoch": 9.754643885964041, "grad_norm": 0.1904296875, "learning_rate": 1.9632462501241682e-05, "loss": 0.467, "step": 196400 }, { "epoch": 9.75514055825966, "grad_norm": 0.20703125, "learning_rate": 1.9592728717592133e-05, "loss": 0.47, "step": 196410 }, { "epoch": 9.75563723055528, "grad_norm": 0.2177734375, "learning_rate": 1.9552994933942588e-05, "loss": 0.4833, "step": 196420 }, { "epoch": 9.756133902850898, "grad_norm": 0.197265625, "learning_rate": 1.951326115029304e-05, "loss": 0.4716, "step": 196430 }, { "epoch": 9.756630575146518, "grad_norm": 0.234375, "learning_rate": 1.947352736664349e-05, "loss": 0.5068, "step": 196440 }, { "epoch": 9.757127247442138, "grad_norm": 0.212890625, "learning_rate": 1.943379358299394e-05, "loss": 0.4756, "step": 196450 }, { "epoch": 9.757623919737757, "grad_norm": 0.236328125, "learning_rate": 1.9394059799344395e-05, "loss": 0.4787, "step": 196460 }, { "epoch": 9.758120592033377, "grad_norm": 0.19921875, "learning_rate": 1.9354326015694846e-05, "loss": 0.5143, "step": 196470 }, { "epoch": 9.758617264328995, "grad_norm": 0.203125, "learning_rate": 1.9314592232045297e-05, "loss": 0.4796, "step": 196480 }, { "epoch": 9.759113936624615, "grad_norm": 0.208984375, "learning_rate": 1.9274858448395748e-05, "loss": 0.4623, "step": 196490 }, { "epoch": 9.759610608920234, "grad_norm": 0.2490234375, "learning_rate": 1.9235124664746203e-05, "loss": 0.5225, "step": 196500 }, { "epoch": 9.760107281215854, "grad_norm": 0.2021484375, "learning_rate": 1.9195390881096654e-05, "loss": 0.5526, "step": 196510 }, { "epoch": 9.760603953511474, "grad_norm": 0.21875, "learning_rate": 1.9155657097447105e-05, "loss": 0.4987, "step": 196520 }, { "epoch": 9.761100625807092, "grad_norm": 0.2021484375, "learning_rate": 1.9115923313797556e-05, "loss": 0.4952, "step": 196530 }, { "epoch": 9.761597298102712, "grad_norm": 0.25390625, "learning_rate": 1.907618953014801e-05, "loss": 0.5011, "step": 196540 }, { "epoch": 9.76209397039833, "grad_norm": 0.236328125, "learning_rate": 1.903645574649846e-05, "loss": 0.4748, "step": 196550 }, { "epoch": 9.76259064269395, "grad_norm": 0.2451171875, "learning_rate": 1.8996721962848912e-05, "loss": 0.5095, "step": 196560 }, { "epoch": 9.76308731498957, "grad_norm": 0.205078125, "learning_rate": 1.8956988179199363e-05, "loss": 0.5107, "step": 196570 }, { "epoch": 9.76358398728519, "grad_norm": 0.2001953125, "learning_rate": 1.8917254395549818e-05, "loss": 0.5026, "step": 196580 }, { "epoch": 9.76408065958081, "grad_norm": 0.216796875, "learning_rate": 1.887752061190027e-05, "loss": 0.512, "step": 196590 }, { "epoch": 9.764577331876428, "grad_norm": 0.2431640625, "learning_rate": 1.883778682825072e-05, "loss": 0.4859, "step": 196600 }, { "epoch": 9.765074004172048, "grad_norm": 0.19921875, "learning_rate": 1.8798053044601174e-05, "loss": 0.4648, "step": 196610 }, { "epoch": 9.765570676467666, "grad_norm": 0.21875, "learning_rate": 1.8758319260951625e-05, "loss": 0.4673, "step": 196620 }, { "epoch": 9.766067348763286, "grad_norm": 0.1884765625, "learning_rate": 1.871858547730208e-05, "loss": 0.4805, "step": 196630 }, { "epoch": 9.766564021058905, "grad_norm": 0.2041015625, "learning_rate": 1.867885169365253e-05, "loss": 0.4962, "step": 196640 }, { "epoch": 9.767060693354525, "grad_norm": 0.2109375, "learning_rate": 1.863911791000298e-05, "loss": 0.4537, "step": 196650 }, { "epoch": 9.767557365650145, "grad_norm": 0.203125, "learning_rate": 1.8599384126353433e-05, "loss": 0.484, "step": 196660 }, { "epoch": 9.768054037945763, "grad_norm": 0.2060546875, "learning_rate": 1.8559650342703887e-05, "loss": 0.4855, "step": 196670 }, { "epoch": 9.768550710241383, "grad_norm": 0.2216796875, "learning_rate": 1.8519916559054338e-05, "loss": 0.4983, "step": 196680 }, { "epoch": 9.769047382537002, "grad_norm": 0.216796875, "learning_rate": 1.848018277540479e-05, "loss": 0.4894, "step": 196690 }, { "epoch": 9.769544054832622, "grad_norm": 0.2109375, "learning_rate": 1.844044899175524e-05, "loss": 0.4924, "step": 196700 }, { "epoch": 9.77004072712824, "grad_norm": 0.1953125, "learning_rate": 1.840071520810569e-05, "loss": 0.496, "step": 196710 }, { "epoch": 9.77053739942386, "grad_norm": 0.2041015625, "learning_rate": 1.8360981424456145e-05, "loss": 0.4968, "step": 196720 }, { "epoch": 9.77103407171948, "grad_norm": 0.1943359375, "learning_rate": 1.8321247640806596e-05, "loss": 0.4638, "step": 196730 }, { "epoch": 9.771530744015099, "grad_norm": 0.2294921875, "learning_rate": 1.8281513857157048e-05, "loss": 0.5139, "step": 196740 }, { "epoch": 9.772027416310719, "grad_norm": 0.2109375, "learning_rate": 1.82417800735075e-05, "loss": 0.4681, "step": 196750 }, { "epoch": 9.772524088606337, "grad_norm": 0.232421875, "learning_rate": 1.8202046289857953e-05, "loss": 0.48, "step": 196760 }, { "epoch": 9.773020760901957, "grad_norm": 0.279296875, "learning_rate": 1.8162312506208404e-05, "loss": 0.4901, "step": 196770 }, { "epoch": 9.773517433197576, "grad_norm": 0.2177734375, "learning_rate": 1.8122578722558855e-05, "loss": 0.5155, "step": 196780 }, { "epoch": 9.774014105493196, "grad_norm": 0.2294921875, "learning_rate": 1.8082844938909306e-05, "loss": 0.4819, "step": 196790 }, { "epoch": 9.774510777788816, "grad_norm": 0.203125, "learning_rate": 1.804311115525976e-05, "loss": 0.4388, "step": 196800 }, { "epoch": 9.775007450084434, "grad_norm": 0.197265625, "learning_rate": 1.800337737161021e-05, "loss": 0.5005, "step": 196810 }, { "epoch": 9.775504122380054, "grad_norm": 0.2421875, "learning_rate": 1.7963643587960666e-05, "loss": 0.4617, "step": 196820 }, { "epoch": 9.776000794675673, "grad_norm": 0.208984375, "learning_rate": 1.7923909804311117e-05, "loss": 0.468, "step": 196830 }, { "epoch": 9.776497466971293, "grad_norm": 0.201171875, "learning_rate": 1.7884176020661568e-05, "loss": 0.5165, "step": 196840 }, { "epoch": 9.776994139266911, "grad_norm": 0.2236328125, "learning_rate": 1.7844442237012022e-05, "loss": 0.4809, "step": 196850 }, { "epoch": 9.777490811562531, "grad_norm": 0.2099609375, "learning_rate": 1.7804708453362473e-05, "loss": 0.4636, "step": 196860 }, { "epoch": 9.777987483858151, "grad_norm": 0.212890625, "learning_rate": 1.7764974669712924e-05, "loss": 0.482, "step": 196870 }, { "epoch": 9.77848415615377, "grad_norm": 0.1943359375, "learning_rate": 1.7725240886063375e-05, "loss": 0.4925, "step": 196880 }, { "epoch": 9.77898082844939, "grad_norm": 0.2041015625, "learning_rate": 1.768550710241383e-05, "loss": 0.4965, "step": 196890 }, { "epoch": 9.779477500745008, "grad_norm": 0.193359375, "learning_rate": 1.764577331876428e-05, "loss": 0.4706, "step": 196900 }, { "epoch": 9.779974173040628, "grad_norm": 0.23828125, "learning_rate": 1.7606039535114732e-05, "loss": 0.492, "step": 196910 }, { "epoch": 9.780470845336247, "grad_norm": 0.2392578125, "learning_rate": 1.7566305751465183e-05, "loss": 0.4736, "step": 196920 }, { "epoch": 9.780967517631867, "grad_norm": 0.2041015625, "learning_rate": 1.7526571967815637e-05, "loss": 0.5131, "step": 196930 }, { "epoch": 9.781464189927487, "grad_norm": 0.2099609375, "learning_rate": 1.748683818416609e-05, "loss": 0.464, "step": 196940 }, { "epoch": 9.781960862223105, "grad_norm": 0.19921875, "learning_rate": 1.744710440051654e-05, "loss": 0.4476, "step": 196950 }, { "epoch": 9.782457534518725, "grad_norm": 0.203125, "learning_rate": 1.740737061686699e-05, "loss": 0.496, "step": 196960 }, { "epoch": 9.782954206814344, "grad_norm": 0.203125, "learning_rate": 1.7367636833217445e-05, "loss": 0.472, "step": 196970 }, { "epoch": 9.783450879109964, "grad_norm": 0.2490234375, "learning_rate": 1.7327903049567896e-05, "loss": 0.4679, "step": 196980 }, { "epoch": 9.783947551405582, "grad_norm": 0.2041015625, "learning_rate": 1.7288169265918347e-05, "loss": 0.4861, "step": 196990 }, { "epoch": 9.784444223701202, "grad_norm": 0.189453125, "learning_rate": 1.7248435482268798e-05, "loss": 0.4758, "step": 197000 }, { "epoch": 9.78494089599682, "grad_norm": 0.2080078125, "learning_rate": 1.720870169861925e-05, "loss": 0.4768, "step": 197010 }, { "epoch": 9.78543756829244, "grad_norm": 0.220703125, "learning_rate": 1.7168967914969703e-05, "loss": 0.4772, "step": 197020 }, { "epoch": 9.78593424058806, "grad_norm": 0.216796875, "learning_rate": 1.7129234131320158e-05, "loss": 0.4638, "step": 197030 }, { "epoch": 9.786430912883679, "grad_norm": 0.2353515625, "learning_rate": 1.708950034767061e-05, "loss": 0.4954, "step": 197040 }, { "epoch": 9.7869275851793, "grad_norm": 0.22265625, "learning_rate": 1.704976656402106e-05, "loss": 0.4877, "step": 197050 }, { "epoch": 9.787424257474918, "grad_norm": 0.212890625, "learning_rate": 1.7010032780371514e-05, "loss": 0.4782, "step": 197060 }, { "epoch": 9.787920929770538, "grad_norm": 0.2138671875, "learning_rate": 1.6970298996721965e-05, "loss": 0.4893, "step": 197070 }, { "epoch": 9.788417602066156, "grad_norm": 0.234375, "learning_rate": 1.6930565213072416e-05, "loss": 0.4931, "step": 197080 }, { "epoch": 9.788914274361776, "grad_norm": 0.20703125, "learning_rate": 1.6890831429422867e-05, "loss": 0.4815, "step": 197090 }, { "epoch": 9.789410946657396, "grad_norm": 0.224609375, "learning_rate": 1.685109764577332e-05, "loss": 0.4825, "step": 197100 }, { "epoch": 9.789907618953015, "grad_norm": 0.265625, "learning_rate": 1.6811363862123773e-05, "loss": 0.4978, "step": 197110 }, { "epoch": 9.790404291248635, "grad_norm": 0.2080078125, "learning_rate": 1.6771630078474224e-05, "loss": 0.4736, "step": 197120 }, { "epoch": 9.790900963544253, "grad_norm": 0.2451171875, "learning_rate": 1.6731896294824675e-05, "loss": 0.4855, "step": 197130 }, { "epoch": 9.791397635839873, "grad_norm": 0.2109375, "learning_rate": 1.6692162511175126e-05, "loss": 0.4818, "step": 197140 }, { "epoch": 9.791894308135491, "grad_norm": 0.244140625, "learning_rate": 1.665242872752558e-05, "loss": 0.5244, "step": 197150 }, { "epoch": 9.792390980431112, "grad_norm": 0.19140625, "learning_rate": 1.661269494387603e-05, "loss": 0.4878, "step": 197160 }, { "epoch": 9.792887652726732, "grad_norm": 0.232421875, "learning_rate": 1.6572961160226482e-05, "loss": 0.5117, "step": 197170 }, { "epoch": 9.79338432502235, "grad_norm": 0.224609375, "learning_rate": 1.6533227376576933e-05, "loss": 0.4566, "step": 197180 }, { "epoch": 9.79388099731797, "grad_norm": 0.2138671875, "learning_rate": 1.6493493592927388e-05, "loss": 0.468, "step": 197190 }, { "epoch": 9.794377669613588, "grad_norm": 0.2109375, "learning_rate": 1.645375980927784e-05, "loss": 0.4971, "step": 197200 }, { "epoch": 9.794874341909209, "grad_norm": 0.1943359375, "learning_rate": 1.641402602562829e-05, "loss": 0.4705, "step": 197210 }, { "epoch": 9.795371014204827, "grad_norm": 0.2216796875, "learning_rate": 1.637429224197874e-05, "loss": 0.466, "step": 197220 }, { "epoch": 9.795867686500447, "grad_norm": 0.2021484375, "learning_rate": 1.6334558458329195e-05, "loss": 0.5081, "step": 197230 }, { "epoch": 9.796364358796067, "grad_norm": 0.2333984375, "learning_rate": 1.629482467467965e-05, "loss": 0.5119, "step": 197240 }, { "epoch": 9.796861031091685, "grad_norm": 0.2177734375, "learning_rate": 1.62550908910301e-05, "loss": 0.5001, "step": 197250 }, { "epoch": 9.797357703387306, "grad_norm": 0.21875, "learning_rate": 1.6215357107380552e-05, "loss": 0.5009, "step": 197260 }, { "epoch": 9.797854375682924, "grad_norm": 0.2080078125, "learning_rate": 1.6175623323731003e-05, "loss": 0.4913, "step": 197270 }, { "epoch": 9.798351047978544, "grad_norm": 0.244140625, "learning_rate": 1.6135889540081457e-05, "loss": 0.5293, "step": 197280 }, { "epoch": 9.798847720274162, "grad_norm": 0.2021484375, "learning_rate": 1.6096155756431908e-05, "loss": 0.4953, "step": 197290 }, { "epoch": 9.799344392569783, "grad_norm": 0.1962890625, "learning_rate": 1.605642197278236e-05, "loss": 0.4845, "step": 197300 }, { "epoch": 9.799841064865403, "grad_norm": 0.2001953125, "learning_rate": 1.601668818913281e-05, "loss": 0.5252, "step": 197310 }, { "epoch": 9.800337737161021, "grad_norm": 0.220703125, "learning_rate": 1.5976954405483265e-05, "loss": 0.4906, "step": 197320 }, { "epoch": 9.800834409456641, "grad_norm": 0.23828125, "learning_rate": 1.5937220621833716e-05, "loss": 0.4903, "step": 197330 }, { "epoch": 9.80133108175226, "grad_norm": 0.2119140625, "learning_rate": 1.5897486838184167e-05, "loss": 0.4786, "step": 197340 }, { "epoch": 9.80182775404788, "grad_norm": 0.240234375, "learning_rate": 1.5857753054534618e-05, "loss": 0.4979, "step": 197350 }, { "epoch": 9.802324426343498, "grad_norm": 0.20703125, "learning_rate": 1.5818019270885072e-05, "loss": 0.4802, "step": 197360 }, { "epoch": 9.802821098639118, "grad_norm": 0.2060546875, "learning_rate": 1.5778285487235523e-05, "loss": 0.4789, "step": 197370 }, { "epoch": 9.803317770934736, "grad_norm": 0.2001953125, "learning_rate": 1.5738551703585974e-05, "loss": 0.4494, "step": 197380 }, { "epoch": 9.803814443230356, "grad_norm": 0.2060546875, "learning_rate": 1.5698817919936425e-05, "loss": 0.4801, "step": 197390 }, { "epoch": 9.804311115525977, "grad_norm": 0.240234375, "learning_rate": 1.5659084136286876e-05, "loss": 0.5141, "step": 197400 }, { "epoch": 9.804807787821595, "grad_norm": 0.263671875, "learning_rate": 1.561935035263733e-05, "loss": 0.5059, "step": 197410 }, { "epoch": 9.805304460117215, "grad_norm": 0.2177734375, "learning_rate": 1.5579616568987782e-05, "loss": 0.4943, "step": 197420 }, { "epoch": 9.805801132412833, "grad_norm": 0.208984375, "learning_rate": 1.5539882785338233e-05, "loss": 0.4801, "step": 197430 }, { "epoch": 9.806297804708453, "grad_norm": 0.1943359375, "learning_rate": 1.5500149001688684e-05, "loss": 0.4785, "step": 197440 }, { "epoch": 9.806794477004072, "grad_norm": 0.2294921875, "learning_rate": 1.5460415218039138e-05, "loss": 0.49, "step": 197450 }, { "epoch": 9.807291149299692, "grad_norm": 0.1943359375, "learning_rate": 1.5420681434389593e-05, "loss": 0.4955, "step": 197460 }, { "epoch": 9.807787821595312, "grad_norm": 0.2265625, "learning_rate": 1.5380947650740044e-05, "loss": 0.4877, "step": 197470 }, { "epoch": 9.80828449389093, "grad_norm": 0.21484375, "learning_rate": 1.5341213867090495e-05, "loss": 0.4834, "step": 197480 }, { "epoch": 9.80878116618655, "grad_norm": 0.2216796875, "learning_rate": 1.5301480083440946e-05, "loss": 0.4689, "step": 197490 }, { "epoch": 9.809277838482169, "grad_norm": 0.1982421875, "learning_rate": 1.52617462997914e-05, "loss": 0.4608, "step": 197500 }, { "epoch": 9.809774510777789, "grad_norm": 0.21875, "learning_rate": 1.5222012516141851e-05, "loss": 0.528, "step": 197510 }, { "epoch": 9.810271183073407, "grad_norm": 0.2021484375, "learning_rate": 1.5182278732492302e-05, "loss": 0.5089, "step": 197520 }, { "epoch": 9.810767855369027, "grad_norm": 0.193359375, "learning_rate": 1.5142544948842755e-05, "loss": 0.487, "step": 197530 }, { "epoch": 9.811264527664648, "grad_norm": 0.208984375, "learning_rate": 1.5102811165193206e-05, "loss": 0.4944, "step": 197540 }, { "epoch": 9.811761199960266, "grad_norm": 0.1962890625, "learning_rate": 1.5063077381543659e-05, "loss": 0.51, "step": 197550 }, { "epoch": 9.812257872255886, "grad_norm": 0.2119140625, "learning_rate": 1.502334359789411e-05, "loss": 0.486, "step": 197560 }, { "epoch": 9.812754544551504, "grad_norm": 0.1953125, "learning_rate": 1.4983609814244562e-05, "loss": 0.501, "step": 197570 }, { "epoch": 9.813251216847124, "grad_norm": 0.2421875, "learning_rate": 1.4943876030595013e-05, "loss": 0.4839, "step": 197580 }, { "epoch": 9.813747889142743, "grad_norm": 0.21875, "learning_rate": 1.4904142246945466e-05, "loss": 0.4591, "step": 197590 }, { "epoch": 9.814244561438363, "grad_norm": 0.2138671875, "learning_rate": 1.4864408463295917e-05, "loss": 0.484, "step": 197600 }, { "epoch": 9.814741233733983, "grad_norm": 0.22265625, "learning_rate": 1.482467467964637e-05, "loss": 0.5014, "step": 197610 }, { "epoch": 9.815237906029601, "grad_norm": 0.1953125, "learning_rate": 1.4784940895996821e-05, "loss": 0.454, "step": 197620 }, { "epoch": 9.815734578325221, "grad_norm": 0.228515625, "learning_rate": 1.4745207112347274e-05, "loss": 0.526, "step": 197630 }, { "epoch": 9.81623125062084, "grad_norm": 0.2216796875, "learning_rate": 1.4705473328697725e-05, "loss": 0.495, "step": 197640 }, { "epoch": 9.81672792291646, "grad_norm": 0.2001953125, "learning_rate": 1.4665739545048177e-05, "loss": 0.4979, "step": 197650 }, { "epoch": 9.817224595212078, "grad_norm": 0.1845703125, "learning_rate": 1.4626005761398632e-05, "loss": 0.5173, "step": 197660 }, { "epoch": 9.817721267507698, "grad_norm": 0.21875, "learning_rate": 1.4586271977749083e-05, "loss": 0.5001, "step": 197670 }, { "epoch": 9.818217939803318, "grad_norm": 0.2138671875, "learning_rate": 1.4546538194099536e-05, "loss": 0.4927, "step": 197680 }, { "epoch": 9.818714612098937, "grad_norm": 0.2158203125, "learning_rate": 1.4506804410449987e-05, "loss": 0.4713, "step": 197690 }, { "epoch": 9.819211284394557, "grad_norm": 0.1943359375, "learning_rate": 1.446707062680044e-05, "loss": 0.4837, "step": 197700 }, { "epoch": 9.819707956690175, "grad_norm": 0.2021484375, "learning_rate": 1.442733684315089e-05, "loss": 0.4783, "step": 197710 }, { "epoch": 9.820204628985795, "grad_norm": 0.275390625, "learning_rate": 1.4387603059501343e-05, "loss": 0.5004, "step": 197720 }, { "epoch": 9.820701301281414, "grad_norm": 0.224609375, "learning_rate": 1.4347869275851794e-05, "loss": 0.516, "step": 197730 }, { "epoch": 9.821197973577034, "grad_norm": 0.23046875, "learning_rate": 1.4308135492202247e-05, "loss": 0.4959, "step": 197740 }, { "epoch": 9.821694645872654, "grad_norm": 0.212890625, "learning_rate": 1.4268401708552698e-05, "loss": 0.5084, "step": 197750 }, { "epoch": 9.822191318168272, "grad_norm": 0.2138671875, "learning_rate": 1.4228667924903149e-05, "loss": 0.4627, "step": 197760 }, { "epoch": 9.822687990463892, "grad_norm": 0.2265625, "learning_rate": 1.4188934141253602e-05, "loss": 0.4914, "step": 197770 }, { "epoch": 9.82318466275951, "grad_norm": 0.1962890625, "learning_rate": 1.4149200357604053e-05, "loss": 0.4794, "step": 197780 }, { "epoch": 9.82368133505513, "grad_norm": 0.2119140625, "learning_rate": 1.4109466573954505e-05, "loss": 0.4824, "step": 197790 }, { "epoch": 9.82417800735075, "grad_norm": 0.1923828125, "learning_rate": 1.4069732790304956e-05, "loss": 0.4791, "step": 197800 }, { "epoch": 9.82467467964637, "grad_norm": 0.212890625, "learning_rate": 1.4029999006655409e-05, "loss": 0.4911, "step": 197810 }, { "epoch": 9.82517135194199, "grad_norm": 0.197265625, "learning_rate": 1.399026522300586e-05, "loss": 0.4759, "step": 197820 }, { "epoch": 9.825668024237608, "grad_norm": 0.203125, "learning_rate": 1.3950531439356313e-05, "loss": 0.481, "step": 197830 }, { "epoch": 9.826164696533228, "grad_norm": 0.203125, "learning_rate": 1.3910797655706764e-05, "loss": 0.5043, "step": 197840 }, { "epoch": 9.826661368828846, "grad_norm": 0.2041015625, "learning_rate": 1.3871063872057217e-05, "loss": 0.4735, "step": 197850 }, { "epoch": 9.827158041124466, "grad_norm": 0.208984375, "learning_rate": 1.3831330088407668e-05, "loss": 0.5065, "step": 197860 }, { "epoch": 9.827654713420085, "grad_norm": 0.20703125, "learning_rate": 1.3791596304758122e-05, "loss": 0.5063, "step": 197870 }, { "epoch": 9.828151385715705, "grad_norm": 0.189453125, "learning_rate": 1.3751862521108575e-05, "loss": 0.4593, "step": 197880 }, { "epoch": 9.828648058011325, "grad_norm": 0.2294921875, "learning_rate": 1.3712128737459026e-05, "loss": 0.4911, "step": 197890 }, { "epoch": 9.829144730306943, "grad_norm": 0.208984375, "learning_rate": 1.3672394953809478e-05, "loss": 0.4695, "step": 197900 }, { "epoch": 9.829641402602563, "grad_norm": 0.2021484375, "learning_rate": 1.363266117015993e-05, "loss": 0.4862, "step": 197910 }, { "epoch": 9.830138074898182, "grad_norm": 0.234375, "learning_rate": 1.3592927386510382e-05, "loss": 0.464, "step": 197920 }, { "epoch": 9.830634747193802, "grad_norm": 0.2236328125, "learning_rate": 1.3553193602860833e-05, "loss": 0.4768, "step": 197930 }, { "epoch": 9.83113141948942, "grad_norm": 0.220703125, "learning_rate": 1.3513459819211286e-05, "loss": 0.4961, "step": 197940 }, { "epoch": 9.83162809178504, "grad_norm": 0.251953125, "learning_rate": 1.3473726035561737e-05, "loss": 0.4947, "step": 197950 }, { "epoch": 9.83212476408066, "grad_norm": 0.205078125, "learning_rate": 1.343399225191219e-05, "loss": 0.4792, "step": 197960 }, { "epoch": 9.832621436376279, "grad_norm": 0.2041015625, "learning_rate": 1.339425846826264e-05, "loss": 0.4931, "step": 197970 }, { "epoch": 9.833118108671899, "grad_norm": 0.20703125, "learning_rate": 1.3354524684613093e-05, "loss": 0.497, "step": 197980 }, { "epoch": 9.833614780967517, "grad_norm": 0.1923828125, "learning_rate": 1.3314790900963544e-05, "loss": 0.4727, "step": 197990 }, { "epoch": 9.834111453263137, "grad_norm": 0.205078125, "learning_rate": 1.3275057117313997e-05, "loss": 0.4828, "step": 198000 }, { "epoch": 9.834608125558756, "grad_norm": 0.1875, "learning_rate": 1.3235323333664448e-05, "loss": 0.4798, "step": 198010 }, { "epoch": 9.835104797854376, "grad_norm": 0.193359375, "learning_rate": 1.3195589550014901e-05, "loss": 0.4992, "step": 198020 }, { "epoch": 9.835601470149996, "grad_norm": 0.203125, "learning_rate": 1.3155855766365352e-05, "loss": 0.4932, "step": 198030 }, { "epoch": 9.836098142445614, "grad_norm": 0.2060546875, "learning_rate": 1.3116121982715805e-05, "loss": 0.4665, "step": 198040 }, { "epoch": 9.836594814741234, "grad_norm": 0.2236328125, "learning_rate": 1.3076388199066256e-05, "loss": 0.4977, "step": 198050 }, { "epoch": 9.837091487036853, "grad_norm": 0.2177734375, "learning_rate": 1.3036654415416707e-05, "loss": 0.4823, "step": 198060 }, { "epoch": 9.837588159332473, "grad_norm": 0.203125, "learning_rate": 1.299692063176716e-05, "loss": 0.5245, "step": 198070 }, { "epoch": 9.838084831628091, "grad_norm": 0.23046875, "learning_rate": 1.2957186848117614e-05, "loss": 0.5101, "step": 198080 }, { "epoch": 9.838581503923711, "grad_norm": 0.21484375, "learning_rate": 1.2917453064468067e-05, "loss": 0.5216, "step": 198090 }, { "epoch": 9.839078176219331, "grad_norm": 0.197265625, "learning_rate": 1.2877719280818518e-05, "loss": 0.4817, "step": 198100 }, { "epoch": 9.83957484851495, "grad_norm": 0.212890625, "learning_rate": 1.2837985497168969e-05, "loss": 0.4895, "step": 198110 }, { "epoch": 9.84007152081057, "grad_norm": 0.197265625, "learning_rate": 1.2798251713519421e-05, "loss": 0.5008, "step": 198120 }, { "epoch": 9.840568193106188, "grad_norm": 0.216796875, "learning_rate": 1.2758517929869872e-05, "loss": 0.4854, "step": 198130 }, { "epoch": 9.841064865401808, "grad_norm": 0.2158203125, "learning_rate": 1.2718784146220325e-05, "loss": 0.4617, "step": 198140 }, { "epoch": 9.841561537697427, "grad_norm": 0.212890625, "learning_rate": 1.2679050362570776e-05, "loss": 0.5255, "step": 198150 }, { "epoch": 9.842058209993047, "grad_norm": 0.22265625, "learning_rate": 1.2639316578921229e-05, "loss": 0.4817, "step": 198160 }, { "epoch": 9.842554882288667, "grad_norm": 0.2021484375, "learning_rate": 1.259958279527168e-05, "loss": 0.4856, "step": 198170 }, { "epoch": 9.843051554584285, "grad_norm": 0.1923828125, "learning_rate": 1.2559849011622133e-05, "loss": 0.4613, "step": 198180 }, { "epoch": 9.843548226879905, "grad_norm": 0.2197265625, "learning_rate": 1.2520115227972584e-05, "loss": 0.5174, "step": 198190 }, { "epoch": 9.844044899175524, "grad_norm": 0.228515625, "learning_rate": 1.2480381444323036e-05, "loss": 0.5079, "step": 198200 }, { "epoch": 9.844541571471144, "grad_norm": 0.2001953125, "learning_rate": 1.2440647660673487e-05, "loss": 0.4683, "step": 198210 }, { "epoch": 9.845038243766762, "grad_norm": 0.2421875, "learning_rate": 1.240091387702394e-05, "loss": 0.5125, "step": 198220 }, { "epoch": 9.845534916062382, "grad_norm": 0.203125, "learning_rate": 1.2361180093374391e-05, "loss": 0.4804, "step": 198230 }, { "epoch": 9.846031588358002, "grad_norm": 0.21484375, "learning_rate": 1.2321446309724846e-05, "loss": 0.477, "step": 198240 }, { "epoch": 9.84652826065362, "grad_norm": 0.2060546875, "learning_rate": 1.2281712526075297e-05, "loss": 0.4946, "step": 198250 }, { "epoch": 9.84702493294924, "grad_norm": 0.2265625, "learning_rate": 1.224197874242575e-05, "loss": 0.4718, "step": 198260 }, { "epoch": 9.847521605244859, "grad_norm": 0.20703125, "learning_rate": 1.22022449587762e-05, "loss": 0.4889, "step": 198270 }, { "epoch": 9.84801827754048, "grad_norm": 0.2158203125, "learning_rate": 1.2162511175126651e-05, "loss": 0.4428, "step": 198280 }, { "epoch": 9.848514949836098, "grad_norm": 0.1982421875, "learning_rate": 1.2122777391477104e-05, "loss": 0.4802, "step": 198290 }, { "epoch": 9.849011622131718, "grad_norm": 0.20703125, "learning_rate": 1.2083043607827555e-05, "loss": 0.478, "step": 198300 }, { "epoch": 9.849508294427338, "grad_norm": 0.2119140625, "learning_rate": 1.2043309824178008e-05, "loss": 0.4929, "step": 198310 }, { "epoch": 9.850004966722956, "grad_norm": 0.205078125, "learning_rate": 1.2003576040528459e-05, "loss": 0.4814, "step": 198320 }, { "epoch": 9.850501639018576, "grad_norm": 0.21875, "learning_rate": 1.1963842256878912e-05, "loss": 0.498, "step": 198330 }, { "epoch": 9.850998311314195, "grad_norm": 0.1943359375, "learning_rate": 1.1924108473229364e-05, "loss": 0.4992, "step": 198340 }, { "epoch": 9.851494983609815, "grad_norm": 0.2109375, "learning_rate": 1.1884374689579817e-05, "loss": 0.5012, "step": 198350 }, { "epoch": 9.851991655905433, "grad_norm": 0.1982421875, "learning_rate": 1.1844640905930268e-05, "loss": 0.4566, "step": 198360 }, { "epoch": 9.852488328201053, "grad_norm": 0.20703125, "learning_rate": 1.180490712228072e-05, "loss": 0.4975, "step": 198370 }, { "epoch": 9.852985000496671, "grad_norm": 0.208984375, "learning_rate": 1.1765173338631172e-05, "loss": 0.4822, "step": 198380 }, { "epoch": 9.853481672792292, "grad_norm": 0.296875, "learning_rate": 1.1725439554981624e-05, "loss": 0.4713, "step": 198390 }, { "epoch": 9.853978345087912, "grad_norm": 0.2109375, "learning_rate": 1.1685705771332076e-05, "loss": 0.5041, "step": 198400 }, { "epoch": 9.85447501738353, "grad_norm": 0.2138671875, "learning_rate": 1.1645971987682528e-05, "loss": 0.4846, "step": 198410 }, { "epoch": 9.85497168967915, "grad_norm": 0.201171875, "learning_rate": 1.160623820403298e-05, "loss": 0.4568, "step": 198420 }, { "epoch": 9.855468361974768, "grad_norm": 0.220703125, "learning_rate": 1.156650442038343e-05, "loss": 0.5135, "step": 198430 }, { "epoch": 9.855965034270389, "grad_norm": 0.255859375, "learning_rate": 1.1526770636733883e-05, "loss": 0.5184, "step": 198440 }, { "epoch": 9.856461706566007, "grad_norm": 0.224609375, "learning_rate": 1.1487036853084336e-05, "loss": 0.4928, "step": 198450 }, { "epoch": 9.856958378861627, "grad_norm": 0.2060546875, "learning_rate": 1.1447303069434788e-05, "loss": 0.4681, "step": 198460 }, { "epoch": 9.857455051157247, "grad_norm": 0.216796875, "learning_rate": 1.140756928578524e-05, "loss": 0.469, "step": 198470 }, { "epoch": 9.857951723452866, "grad_norm": 0.21875, "learning_rate": 1.1367835502135692e-05, "loss": 0.4632, "step": 198480 }, { "epoch": 9.858448395748486, "grad_norm": 0.1884765625, "learning_rate": 1.1328101718486143e-05, "loss": 0.4931, "step": 198490 }, { "epoch": 9.858945068044104, "grad_norm": 0.232421875, "learning_rate": 1.1288367934836596e-05, "loss": 0.4856, "step": 198500 }, { "epoch": 9.859441740339724, "grad_norm": 0.2578125, "learning_rate": 1.1248634151187047e-05, "loss": 0.4895, "step": 198510 }, { "epoch": 9.859938412635342, "grad_norm": 0.2041015625, "learning_rate": 1.12089003675375e-05, "loss": 0.4701, "step": 198520 }, { "epoch": 9.860435084930963, "grad_norm": 0.240234375, "learning_rate": 1.116916658388795e-05, "loss": 0.4943, "step": 198530 }, { "epoch": 9.860931757226583, "grad_norm": 0.2470703125, "learning_rate": 1.1129432800238403e-05, "loss": 0.482, "step": 198540 }, { "epoch": 9.861428429522201, "grad_norm": 0.2080078125, "learning_rate": 1.1089699016588854e-05, "loss": 0.4901, "step": 198550 }, { "epoch": 9.861925101817821, "grad_norm": 0.244140625, "learning_rate": 1.1049965232939307e-05, "loss": 0.4929, "step": 198560 }, { "epoch": 9.86242177411344, "grad_norm": 0.208984375, "learning_rate": 1.101023144928976e-05, "loss": 0.5104, "step": 198570 }, { "epoch": 9.86291844640906, "grad_norm": 0.2236328125, "learning_rate": 1.0970497665640211e-05, "loss": 0.4708, "step": 198580 }, { "epoch": 9.863415118704678, "grad_norm": 0.2138671875, "learning_rate": 1.0930763881990664e-05, "loss": 0.4916, "step": 198590 }, { "epoch": 9.863911791000298, "grad_norm": 0.2021484375, "learning_rate": 1.0891030098341115e-05, "loss": 0.4666, "step": 198600 }, { "epoch": 9.864408463295918, "grad_norm": 0.28515625, "learning_rate": 1.0851296314691567e-05, "loss": 0.5095, "step": 198610 }, { "epoch": 9.864905135591536, "grad_norm": 0.2080078125, "learning_rate": 1.0811562531042018e-05, "loss": 0.47, "step": 198620 }, { "epoch": 9.865401807887157, "grad_norm": 0.2021484375, "learning_rate": 1.0771828747392471e-05, "loss": 0.4777, "step": 198630 }, { "epoch": 9.865898480182775, "grad_norm": 0.2138671875, "learning_rate": 1.0732094963742922e-05, "loss": 0.4784, "step": 198640 }, { "epoch": 9.866395152478395, "grad_norm": 0.197265625, "learning_rate": 1.0692361180093375e-05, "loss": 0.4772, "step": 198650 }, { "epoch": 9.866891824774013, "grad_norm": 0.212890625, "learning_rate": 1.0652627396443828e-05, "loss": 0.4804, "step": 198660 }, { "epoch": 9.867388497069634, "grad_norm": 0.1962890625, "learning_rate": 1.0612893612794279e-05, "loss": 0.4588, "step": 198670 }, { "epoch": 9.867885169365254, "grad_norm": 0.21484375, "learning_rate": 1.0573159829144731e-05, "loss": 0.4805, "step": 198680 }, { "epoch": 9.868381841660872, "grad_norm": 0.21875, "learning_rate": 1.0533426045495182e-05, "loss": 0.4908, "step": 198690 }, { "epoch": 9.868878513956492, "grad_norm": 0.236328125, "learning_rate": 1.0493692261845635e-05, "loss": 0.4779, "step": 198700 }, { "epoch": 9.86937518625211, "grad_norm": 0.2119140625, "learning_rate": 1.0453958478196086e-05, "loss": 0.4911, "step": 198710 }, { "epoch": 9.86987185854773, "grad_norm": 0.2158203125, "learning_rate": 1.0414224694546539e-05, "loss": 0.49, "step": 198720 }, { "epoch": 9.870368530843349, "grad_norm": 0.1943359375, "learning_rate": 1.037449091089699e-05, "loss": 0.467, "step": 198730 }, { "epoch": 9.870865203138969, "grad_norm": 0.244140625, "learning_rate": 1.0334757127247443e-05, "loss": 0.5067, "step": 198740 }, { "epoch": 9.871361875434587, "grad_norm": 0.1982421875, "learning_rate": 1.0295023343597894e-05, "loss": 0.4727, "step": 198750 }, { "epoch": 9.871858547730207, "grad_norm": 0.208984375, "learning_rate": 1.0255289559948346e-05, "loss": 0.468, "step": 198760 }, { "epoch": 9.872355220025828, "grad_norm": 0.2177734375, "learning_rate": 1.0215555776298799e-05, "loss": 0.488, "step": 198770 }, { "epoch": 9.872851892321446, "grad_norm": 0.21484375, "learning_rate": 1.017582199264925e-05, "loss": 0.4788, "step": 198780 }, { "epoch": 9.873348564617066, "grad_norm": 0.2236328125, "learning_rate": 1.0136088208999703e-05, "loss": 0.5071, "step": 198790 }, { "epoch": 9.873845236912684, "grad_norm": 0.2412109375, "learning_rate": 1.0096354425350154e-05, "loss": 0.4792, "step": 198800 }, { "epoch": 9.874341909208304, "grad_norm": 0.2431640625, "learning_rate": 1.0056620641700607e-05, "loss": 0.5005, "step": 198810 }, { "epoch": 9.874838581503923, "grad_norm": 0.24609375, "learning_rate": 1.0016886858051058e-05, "loss": 0.5227, "step": 198820 }, { "epoch": 9.875335253799543, "grad_norm": 0.201171875, "learning_rate": 9.97715307440151e-06, "loss": 0.4724, "step": 198830 }, { "epoch": 9.875831926095163, "grad_norm": 0.2158203125, "learning_rate": 9.937419290751961e-06, "loss": 0.4943, "step": 198840 }, { "epoch": 9.876328598390781, "grad_norm": 0.2041015625, "learning_rate": 9.897685507102414e-06, "loss": 0.5062, "step": 198850 }, { "epoch": 9.876825270686401, "grad_norm": 0.2138671875, "learning_rate": 9.857951723452865e-06, "loss": 0.5059, "step": 198860 }, { "epoch": 9.87732194298202, "grad_norm": 0.23828125, "learning_rate": 9.81821793980332e-06, "loss": 0.5141, "step": 198870 }, { "epoch": 9.87781861527764, "grad_norm": 0.1953125, "learning_rate": 9.77848415615377e-06, "loss": 0.5031, "step": 198880 }, { "epoch": 9.878315287573258, "grad_norm": 0.1904296875, "learning_rate": 9.738750372504223e-06, "loss": 0.4937, "step": 198890 }, { "epoch": 9.878811959868878, "grad_norm": 0.21484375, "learning_rate": 9.699016588854674e-06, "loss": 0.4737, "step": 198900 }, { "epoch": 9.879308632164499, "grad_norm": 0.205078125, "learning_rate": 9.659282805205127e-06, "loss": 0.475, "step": 198910 }, { "epoch": 9.879805304460117, "grad_norm": 0.2001953125, "learning_rate": 9.619549021555578e-06, "loss": 0.5084, "step": 198920 }, { "epoch": 9.880301976755737, "grad_norm": 0.21484375, "learning_rate": 9.57981523790603e-06, "loss": 0.5012, "step": 198930 }, { "epoch": 9.880798649051355, "grad_norm": 0.2255859375, "learning_rate": 9.540081454256482e-06, "loss": 0.483, "step": 198940 }, { "epoch": 9.881295321346975, "grad_norm": 0.236328125, "learning_rate": 9.500347670606933e-06, "loss": 0.5229, "step": 198950 }, { "epoch": 9.881791993642594, "grad_norm": 0.2421875, "learning_rate": 9.460613886957386e-06, "loss": 0.524, "step": 198960 }, { "epoch": 9.882288665938214, "grad_norm": 0.208984375, "learning_rate": 9.420880103307837e-06, "loss": 0.4776, "step": 198970 }, { "epoch": 9.882785338233834, "grad_norm": 0.21484375, "learning_rate": 9.381146319658291e-06, "loss": 0.4885, "step": 198980 }, { "epoch": 9.883282010529452, "grad_norm": 0.2080078125, "learning_rate": 9.341412536008742e-06, "loss": 0.4874, "step": 198990 }, { "epoch": 9.883778682825072, "grad_norm": 0.201171875, "learning_rate": 9.301678752359195e-06, "loss": 0.5069, "step": 199000 }, { "epoch": 9.88427535512069, "grad_norm": 0.193359375, "learning_rate": 9.261944968709646e-06, "loss": 0.4734, "step": 199010 }, { "epoch": 9.884772027416311, "grad_norm": 0.228515625, "learning_rate": 9.222211185060098e-06, "loss": 0.4917, "step": 199020 }, { "epoch": 9.88526869971193, "grad_norm": 0.208984375, "learning_rate": 9.18247740141055e-06, "loss": 0.4996, "step": 199030 }, { "epoch": 9.88576537200755, "grad_norm": 0.2109375, "learning_rate": 9.142743617761002e-06, "loss": 0.5089, "step": 199040 }, { "epoch": 9.88626204430317, "grad_norm": 0.212890625, "learning_rate": 9.103009834111453e-06, "loss": 0.5113, "step": 199050 }, { "epoch": 9.886758716598788, "grad_norm": 0.205078125, "learning_rate": 9.063276050461906e-06, "loss": 0.483, "step": 199060 }, { "epoch": 9.887255388894408, "grad_norm": 0.220703125, "learning_rate": 9.023542266812357e-06, "loss": 0.5336, "step": 199070 }, { "epoch": 9.887752061190026, "grad_norm": 0.21484375, "learning_rate": 8.98380848316281e-06, "loss": 0.4434, "step": 199080 }, { "epoch": 9.888248733485646, "grad_norm": 0.19921875, "learning_rate": 8.944074699513262e-06, "loss": 0.494, "step": 199090 }, { "epoch": 9.888745405781265, "grad_norm": 0.22265625, "learning_rate": 8.904340915863713e-06, "loss": 0.5094, "step": 199100 }, { "epoch": 9.889242078076885, "grad_norm": 0.2197265625, "learning_rate": 8.864607132214166e-06, "loss": 0.4903, "step": 199110 }, { "epoch": 9.889738750372505, "grad_norm": 0.2177734375, "learning_rate": 8.824873348564617e-06, "loss": 0.4492, "step": 199120 }, { "epoch": 9.890235422668123, "grad_norm": 0.228515625, "learning_rate": 8.78513956491507e-06, "loss": 0.5074, "step": 199130 }, { "epoch": 9.890732094963743, "grad_norm": 0.2353515625, "learning_rate": 8.745405781265521e-06, "loss": 0.4771, "step": 199140 }, { "epoch": 9.891228767259362, "grad_norm": 0.2080078125, "learning_rate": 8.705671997615974e-06, "loss": 0.5127, "step": 199150 }, { "epoch": 9.891725439554982, "grad_norm": 0.2158203125, "learning_rate": 8.665938213966425e-06, "loss": 0.4915, "step": 199160 }, { "epoch": 9.8922221118506, "grad_norm": 0.216796875, "learning_rate": 8.626204430316877e-06, "loss": 0.4868, "step": 199170 }, { "epoch": 9.89271878414622, "grad_norm": 0.279296875, "learning_rate": 8.586470646667328e-06, "loss": 0.5011, "step": 199180 }, { "epoch": 9.89321545644184, "grad_norm": 0.220703125, "learning_rate": 8.546736863017781e-06, "loss": 0.4708, "step": 199190 }, { "epoch": 9.893712128737459, "grad_norm": 0.2080078125, "learning_rate": 8.507003079368234e-06, "loss": 0.4672, "step": 199200 }, { "epoch": 9.894208801033079, "grad_norm": 0.2197265625, "learning_rate": 8.467269295718685e-06, "loss": 0.496, "step": 199210 }, { "epoch": 9.894705473328697, "grad_norm": 0.25390625, "learning_rate": 8.427535512069138e-06, "loss": 0.4965, "step": 199220 }, { "epoch": 9.895202145624317, "grad_norm": 0.22265625, "learning_rate": 8.387801728419589e-06, "loss": 0.4911, "step": 199230 }, { "epoch": 9.895698817919936, "grad_norm": 0.201171875, "learning_rate": 8.348067944770041e-06, "loss": 0.4907, "step": 199240 }, { "epoch": 9.896195490215556, "grad_norm": 0.236328125, "learning_rate": 8.308334161120492e-06, "loss": 0.5264, "step": 199250 }, { "epoch": 9.896692162511176, "grad_norm": 0.220703125, "learning_rate": 8.268600377470945e-06, "loss": 0.4876, "step": 199260 }, { "epoch": 9.897188834806794, "grad_norm": 0.197265625, "learning_rate": 8.228866593821396e-06, "loss": 0.5253, "step": 199270 }, { "epoch": 9.897685507102414, "grad_norm": 0.2021484375, "learning_rate": 8.189132810171849e-06, "loss": 0.4679, "step": 199280 }, { "epoch": 9.898182179398033, "grad_norm": 0.2119140625, "learning_rate": 8.149399026522302e-06, "loss": 0.512, "step": 199290 }, { "epoch": 9.898678851693653, "grad_norm": 0.2314453125, "learning_rate": 8.109665242872753e-06, "loss": 0.4922, "step": 199300 }, { "epoch": 9.899175523989271, "grad_norm": 0.216796875, "learning_rate": 8.069931459223205e-06, "loss": 0.4891, "step": 199310 }, { "epoch": 9.899672196284891, "grad_norm": 0.291015625, "learning_rate": 8.030197675573656e-06, "loss": 0.5064, "step": 199320 }, { "epoch": 9.900168868580511, "grad_norm": 0.1923828125, "learning_rate": 7.990463891924109e-06, "loss": 0.4944, "step": 199330 }, { "epoch": 9.90066554087613, "grad_norm": 0.1923828125, "learning_rate": 7.95073010827456e-06, "loss": 0.4719, "step": 199340 }, { "epoch": 9.90116221317175, "grad_norm": 0.2119140625, "learning_rate": 7.910996324625013e-06, "loss": 0.4804, "step": 199350 }, { "epoch": 9.901658885467368, "grad_norm": 0.197265625, "learning_rate": 7.871262540975464e-06, "loss": 0.5012, "step": 199360 }, { "epoch": 9.902155557762988, "grad_norm": 0.2451171875, "learning_rate": 7.831528757325917e-06, "loss": 0.5368, "step": 199370 }, { "epoch": 9.902652230058607, "grad_norm": 0.212890625, "learning_rate": 7.791794973676368e-06, "loss": 0.5028, "step": 199380 }, { "epoch": 9.903148902354227, "grad_norm": 0.19921875, "learning_rate": 7.75206119002682e-06, "loss": 0.4807, "step": 199390 }, { "epoch": 9.903645574649847, "grad_norm": 0.2060546875, "learning_rate": 7.712327406377273e-06, "loss": 0.4862, "step": 199400 }, { "epoch": 9.904142246945465, "grad_norm": 0.21484375, "learning_rate": 7.672593622727726e-06, "loss": 0.5135, "step": 199410 }, { "epoch": 9.904638919241085, "grad_norm": 0.2060546875, "learning_rate": 7.632859839078177e-06, "loss": 0.4843, "step": 199420 }, { "epoch": 9.905135591536704, "grad_norm": 0.1865234375, "learning_rate": 7.593126055428629e-06, "loss": 0.5021, "step": 199430 }, { "epoch": 9.905632263832324, "grad_norm": 0.216796875, "learning_rate": 7.5533922717790806e-06, "loss": 0.4686, "step": 199440 }, { "epoch": 9.906128936127942, "grad_norm": 0.2255859375, "learning_rate": 7.5136584881295324e-06, "loss": 0.4893, "step": 199450 }, { "epoch": 9.906625608423562, "grad_norm": 0.216796875, "learning_rate": 7.473924704479984e-06, "loss": 0.4977, "step": 199460 }, { "epoch": 9.907122280719182, "grad_norm": 0.23046875, "learning_rate": 7.434190920830436e-06, "loss": 0.517, "step": 199470 }, { "epoch": 9.9076189530148, "grad_norm": 0.2275390625, "learning_rate": 7.394457137180888e-06, "loss": 0.4733, "step": 199480 }, { "epoch": 9.90811562531042, "grad_norm": 0.21484375, "learning_rate": 7.35472335353134e-06, "loss": 0.4651, "step": 199490 }, { "epoch": 9.90861229760604, "grad_norm": 0.2099609375, "learning_rate": 7.314989569881793e-06, "loss": 0.4776, "step": 199500 }, { "epoch": 9.90910896990166, "grad_norm": 0.2265625, "learning_rate": 7.2752557862322445e-06, "loss": 0.4696, "step": 199510 }, { "epoch": 9.909605642197278, "grad_norm": 0.2060546875, "learning_rate": 7.235522002582696e-06, "loss": 0.4766, "step": 199520 }, { "epoch": 9.910102314492898, "grad_norm": 0.20703125, "learning_rate": 7.195788218933148e-06, "loss": 0.4903, "step": 199530 }, { "epoch": 9.910598986788518, "grad_norm": 0.2001953125, "learning_rate": 7.1560544352836e-06, "loss": 0.4591, "step": 199540 }, { "epoch": 9.911095659084136, "grad_norm": 0.22265625, "learning_rate": 7.116320651634052e-06, "loss": 0.4726, "step": 199550 }, { "epoch": 9.911592331379756, "grad_norm": 0.189453125, "learning_rate": 7.076586867984504e-06, "loss": 0.5133, "step": 199560 }, { "epoch": 9.912089003675375, "grad_norm": 0.2333984375, "learning_rate": 7.036853084334956e-06, "loss": 0.4847, "step": 199570 }, { "epoch": 9.912585675970995, "grad_norm": 0.216796875, "learning_rate": 6.997119300685408e-06, "loss": 0.5002, "step": 199580 }, { "epoch": 9.913082348266613, "grad_norm": 0.220703125, "learning_rate": 6.9573855170358595e-06, "loss": 0.4941, "step": 199590 }, { "epoch": 9.913579020562233, "grad_norm": 0.220703125, "learning_rate": 6.917651733386311e-06, "loss": 0.5123, "step": 199600 }, { "epoch": 9.914075692857853, "grad_norm": 0.2119140625, "learning_rate": 6.877917949736765e-06, "loss": 0.5071, "step": 199610 }, { "epoch": 9.914572365153472, "grad_norm": 0.2236328125, "learning_rate": 6.838184166087217e-06, "loss": 0.4905, "step": 199620 }, { "epoch": 9.915069037449092, "grad_norm": 0.2080078125, "learning_rate": 6.798450382437669e-06, "loss": 0.4938, "step": 199630 }, { "epoch": 9.91556570974471, "grad_norm": 0.232421875, "learning_rate": 6.75871659878812e-06, "loss": 0.4943, "step": 199640 }, { "epoch": 9.91606238204033, "grad_norm": 0.2021484375, "learning_rate": 6.718982815138572e-06, "loss": 0.4932, "step": 199650 }, { "epoch": 9.916559054335949, "grad_norm": 0.29296875, "learning_rate": 6.6792490314890235e-06, "loss": 0.4677, "step": 199660 }, { "epoch": 9.917055726631569, "grad_norm": 0.21875, "learning_rate": 6.639515247839475e-06, "loss": 0.4805, "step": 199670 }, { "epoch": 9.917552398927189, "grad_norm": 0.25390625, "learning_rate": 6.599781464189927e-06, "loss": 0.5073, "step": 199680 }, { "epoch": 9.918049071222807, "grad_norm": 0.1982421875, "learning_rate": 6.560047680540379e-06, "loss": 0.4645, "step": 199690 }, { "epoch": 9.918545743518427, "grad_norm": 0.203125, "learning_rate": 6.520313896890831e-06, "loss": 0.458, "step": 199700 }, { "epoch": 9.919042415814046, "grad_norm": 0.20703125, "learning_rate": 6.4805801132412845e-06, "loss": 0.4685, "step": 199710 }, { "epoch": 9.919539088109666, "grad_norm": 0.2041015625, "learning_rate": 6.440846329591736e-06, "loss": 0.4949, "step": 199720 }, { "epoch": 9.920035760405284, "grad_norm": 0.203125, "learning_rate": 6.401112545942188e-06, "loss": 0.4867, "step": 199730 }, { "epoch": 9.920532432700904, "grad_norm": 0.2080078125, "learning_rate": 6.36137876229264e-06, "loss": 0.4741, "step": 199740 }, { "epoch": 9.921029104996522, "grad_norm": 0.271484375, "learning_rate": 6.321644978643092e-06, "loss": 0.4931, "step": 199750 }, { "epoch": 9.921525777292143, "grad_norm": 0.2060546875, "learning_rate": 6.281911194993544e-06, "loss": 0.4854, "step": 199760 }, { "epoch": 9.922022449587763, "grad_norm": 0.23046875, "learning_rate": 6.242177411343996e-06, "loss": 0.4866, "step": 199770 }, { "epoch": 9.922519121883381, "grad_norm": 0.2119140625, "learning_rate": 6.202443627694448e-06, "loss": 0.4936, "step": 199780 }, { "epoch": 9.923015794179001, "grad_norm": 0.2138671875, "learning_rate": 6.1627098440448995e-06, "loss": 0.4695, "step": 199790 }, { "epoch": 9.92351246647462, "grad_norm": 0.205078125, "learning_rate": 6.122976060395351e-06, "loss": 0.5042, "step": 199800 }, { "epoch": 9.92400913877024, "grad_norm": 0.208984375, "learning_rate": 6.083242276745803e-06, "loss": 0.4995, "step": 199810 }, { "epoch": 9.924505811065858, "grad_norm": 0.2158203125, "learning_rate": 6.043508493096255e-06, "loss": 0.4638, "step": 199820 }, { "epoch": 9.925002483361478, "grad_norm": 0.224609375, "learning_rate": 6.003774709446707e-06, "loss": 0.4836, "step": 199830 }, { "epoch": 9.925499155657098, "grad_norm": 0.1943359375, "learning_rate": 5.964040925797159e-06, "loss": 0.4783, "step": 199840 }, { "epoch": 9.925995827952717, "grad_norm": 0.185546875, "learning_rate": 5.924307142147612e-06, "loss": 0.4765, "step": 199850 }, { "epoch": 9.926492500248337, "grad_norm": 0.2109375, "learning_rate": 5.8845733584980635e-06, "loss": 0.5236, "step": 199860 }, { "epoch": 9.926989172543955, "grad_norm": 0.2431640625, "learning_rate": 5.844839574848515e-06, "loss": 0.5028, "step": 199870 }, { "epoch": 9.927485844839575, "grad_norm": 0.208984375, "learning_rate": 5.805105791198967e-06, "loss": 0.4942, "step": 199880 }, { "epoch": 9.927982517135193, "grad_norm": 0.21875, "learning_rate": 5.765372007549419e-06, "loss": 0.5034, "step": 199890 }, { "epoch": 9.928479189430814, "grad_norm": 0.205078125, "learning_rate": 5.725638223899871e-06, "loss": 0.4904, "step": 199900 }, { "epoch": 9.928975861726434, "grad_norm": 0.2109375, "learning_rate": 5.685904440250323e-06, "loss": 0.5059, "step": 199910 }, { "epoch": 9.929472534022052, "grad_norm": 0.2119140625, "learning_rate": 5.646170656600775e-06, "loss": 0.4754, "step": 199920 }, { "epoch": 9.929969206317672, "grad_norm": 0.212890625, "learning_rate": 5.606436872951227e-06, "loss": 0.5001, "step": 199930 }, { "epoch": 9.93046587861329, "grad_norm": 0.205078125, "learning_rate": 5.5667030893016785e-06, "loss": 0.4922, "step": 199940 }, { "epoch": 9.93096255090891, "grad_norm": 0.21875, "learning_rate": 5.526969305652131e-06, "loss": 0.4963, "step": 199950 }, { "epoch": 9.931459223204529, "grad_norm": 0.2451171875, "learning_rate": 5.487235522002583e-06, "loss": 0.5201, "step": 199960 }, { "epoch": 9.931955895500149, "grad_norm": 0.2119140625, "learning_rate": 5.447501738353035e-06, "loss": 0.4807, "step": 199970 }, { "epoch": 9.93245256779577, "grad_norm": 0.208984375, "learning_rate": 5.407767954703487e-06, "loss": 0.4466, "step": 199980 }, { "epoch": 9.932949240091387, "grad_norm": 0.2333984375, "learning_rate": 5.368034171053939e-06, "loss": 0.4951, "step": 199990 }, { "epoch": 9.933445912387008, "grad_norm": 0.201171875, "learning_rate": 5.3283003874043914e-06, "loss": 0.4759, "step": 200000 }, { "epoch": 9.933942584682626, "grad_norm": 0.20703125, "learning_rate": 5.288566603754843e-06, "loss": 0.4694, "step": 200010 }, { "epoch": 9.934439256978246, "grad_norm": 0.189453125, "learning_rate": 5.248832820105295e-06, "loss": 0.4585, "step": 200020 }, { "epoch": 9.934935929273864, "grad_norm": 0.216796875, "learning_rate": 5.209099036455747e-06, "loss": 0.4969, "step": 200030 }, { "epoch": 9.935432601569484, "grad_norm": 0.201171875, "learning_rate": 5.169365252806199e-06, "loss": 0.4757, "step": 200040 }, { "epoch": 9.935929273865105, "grad_norm": 0.2158203125, "learning_rate": 5.12963146915665e-06, "loss": 0.4838, "step": 200050 }, { "epoch": 9.936425946160723, "grad_norm": 0.2041015625, "learning_rate": 5.089897685507103e-06, "loss": 0.4847, "step": 200060 }, { "epoch": 9.936922618456343, "grad_norm": 0.2109375, "learning_rate": 5.0501639018575545e-06, "loss": 0.4847, "step": 200070 }, { "epoch": 9.937419290751961, "grad_norm": 0.2060546875, "learning_rate": 5.010430118208006e-06, "loss": 0.4708, "step": 200080 }, { "epoch": 9.937915963047582, "grad_norm": 0.18359375, "learning_rate": 4.970696334558458e-06, "loss": 0.4802, "step": 200090 }, { "epoch": 9.9384126353432, "grad_norm": 0.197265625, "learning_rate": 4.93096255090891e-06, "loss": 0.4859, "step": 200100 }, { "epoch": 9.93890930763882, "grad_norm": 0.185546875, "learning_rate": 4.891228767259363e-06, "loss": 0.4903, "step": 200110 }, { "epoch": 9.93940597993444, "grad_norm": 0.2314453125, "learning_rate": 4.851494983609815e-06, "loss": 0.5034, "step": 200120 }, { "epoch": 9.939902652230058, "grad_norm": 0.236328125, "learning_rate": 4.811761199960267e-06, "loss": 0.4839, "step": 200130 }, { "epoch": 9.940399324525679, "grad_norm": 0.201171875, "learning_rate": 4.7720274163107185e-06, "loss": 0.4631, "step": 200140 }, { "epoch": 9.940895996821297, "grad_norm": 0.205078125, "learning_rate": 4.73229363266117e-06, "loss": 0.4924, "step": 200150 }, { "epoch": 9.941392669116917, "grad_norm": 0.2001953125, "learning_rate": 4.692559849011622e-06, "loss": 0.4681, "step": 200160 }, { "epoch": 9.941889341412535, "grad_norm": 0.201171875, "learning_rate": 4.652826065362074e-06, "loss": 0.4945, "step": 200170 }, { "epoch": 9.942386013708155, "grad_norm": 0.2216796875, "learning_rate": 4.613092281712526e-06, "loss": 0.4998, "step": 200180 }, { "epoch": 9.942882686003774, "grad_norm": 0.236328125, "learning_rate": 4.573358498062978e-06, "loss": 0.5073, "step": 200190 }, { "epoch": 9.943379358299394, "grad_norm": 0.20703125, "learning_rate": 4.53362471441343e-06, "loss": 0.5035, "step": 200200 }, { "epoch": 9.943876030595014, "grad_norm": 0.2373046875, "learning_rate": 4.4938909307638825e-06, "loss": 0.5029, "step": 200210 }, { "epoch": 9.944372702890632, "grad_norm": 0.21484375, "learning_rate": 4.454157147114334e-06, "loss": 0.511, "step": 200220 }, { "epoch": 9.944869375186252, "grad_norm": 0.216796875, "learning_rate": 4.414423363464786e-06, "loss": 0.5102, "step": 200230 }, { "epoch": 9.94536604748187, "grad_norm": 0.2333984375, "learning_rate": 4.374689579815238e-06, "loss": 0.5198, "step": 200240 }, { "epoch": 9.945862719777491, "grad_norm": 0.21484375, "learning_rate": 4.33495579616569e-06, "loss": 0.482, "step": 200250 }, { "epoch": 9.94635939207311, "grad_norm": 0.2109375, "learning_rate": 4.295222012516142e-06, "loss": 0.5265, "step": 200260 }, { "epoch": 9.94685606436873, "grad_norm": 0.21484375, "learning_rate": 4.2554882288665946e-06, "loss": 0.5336, "step": 200270 }, { "epoch": 9.94735273666435, "grad_norm": 0.25390625, "learning_rate": 4.2157544452170464e-06, "loss": 0.5028, "step": 200280 }, { "epoch": 9.947849408959968, "grad_norm": 0.228515625, "learning_rate": 4.176020661567498e-06, "loss": 0.4794, "step": 200290 }, { "epoch": 9.948346081255588, "grad_norm": 0.234375, "learning_rate": 4.13628687791795e-06, "loss": 0.5099, "step": 200300 }, { "epoch": 9.948842753551206, "grad_norm": 0.1943359375, "learning_rate": 4.096553094268401e-06, "loss": 0.4611, "step": 200310 }, { "epoch": 9.949339425846826, "grad_norm": 0.197265625, "learning_rate": 4.056819310618854e-06, "loss": 0.466, "step": 200320 }, { "epoch": 9.949836098142445, "grad_norm": 0.2314453125, "learning_rate": 4.017085526969306e-06, "loss": 0.482, "step": 200330 }, { "epoch": 9.950332770438065, "grad_norm": 0.2119140625, "learning_rate": 3.977351743319758e-06, "loss": 0.4685, "step": 200340 }, { "epoch": 9.950829442733685, "grad_norm": 0.2109375, "learning_rate": 3.9376179596702096e-06, "loss": 0.4698, "step": 200350 }, { "epoch": 9.951326115029303, "grad_norm": 0.1767578125, "learning_rate": 3.8978841760206614e-06, "loss": 0.4589, "step": 200360 }, { "epoch": 9.951822787324923, "grad_norm": 0.205078125, "learning_rate": 3.858150392371114e-06, "loss": 0.4901, "step": 200370 }, { "epoch": 9.952319459620542, "grad_norm": 0.212890625, "learning_rate": 3.818416608721566e-06, "loss": 0.4826, "step": 200380 }, { "epoch": 9.952816131916162, "grad_norm": 0.2177734375, "learning_rate": 3.778682825072018e-06, "loss": 0.4923, "step": 200390 }, { "epoch": 9.95331280421178, "grad_norm": 0.2001953125, "learning_rate": 3.7389490414224694e-06, "loss": 0.4776, "step": 200400 }, { "epoch": 9.9538094765074, "grad_norm": 0.236328125, "learning_rate": 3.6992152577729212e-06, "loss": 0.4819, "step": 200410 }, { "epoch": 9.95430614880302, "grad_norm": 0.201171875, "learning_rate": 3.659481474123374e-06, "loss": 0.4963, "step": 200420 }, { "epoch": 9.954802821098639, "grad_norm": 0.2197265625, "learning_rate": 3.619747690473826e-06, "loss": 0.4885, "step": 200430 }, { "epoch": 9.955299493394259, "grad_norm": 0.2060546875, "learning_rate": 3.5800139068242777e-06, "loss": 0.466, "step": 200440 }, { "epoch": 9.955796165689877, "grad_norm": 0.201171875, "learning_rate": 3.5402801231747296e-06, "loss": 0.4784, "step": 200450 }, { "epoch": 9.956292837985497, "grad_norm": 0.2216796875, "learning_rate": 3.5005463395251814e-06, "loss": 0.5032, "step": 200460 }, { "epoch": 9.956789510281116, "grad_norm": 0.2001953125, "learning_rate": 3.4608125558756333e-06, "loss": 0.509, "step": 200470 }, { "epoch": 9.957286182576736, "grad_norm": 0.19140625, "learning_rate": 3.4210787722260856e-06, "loss": 0.4765, "step": 200480 }, { "epoch": 9.957782854872356, "grad_norm": 0.21484375, "learning_rate": 3.3813449885765375e-06, "loss": 0.492, "step": 200490 }, { "epoch": 9.958279527167974, "grad_norm": 0.2060546875, "learning_rate": 3.3416112049269894e-06, "loss": 0.5059, "step": 200500 }, { "epoch": 9.958776199463594, "grad_norm": 0.2021484375, "learning_rate": 3.3018774212774412e-06, "loss": 0.4879, "step": 200510 }, { "epoch": 9.959272871759213, "grad_norm": 0.2080078125, "learning_rate": 3.262143637627893e-06, "loss": 0.4838, "step": 200520 }, { "epoch": 9.959769544054833, "grad_norm": 0.2021484375, "learning_rate": 3.2224098539783454e-06, "loss": 0.4625, "step": 200530 }, { "epoch": 9.960266216350451, "grad_norm": 0.2275390625, "learning_rate": 3.1826760703287973e-06, "loss": 0.5267, "step": 200540 }, { "epoch": 9.960762888646071, "grad_norm": 0.1875, "learning_rate": 3.142942286679249e-06, "loss": 0.4634, "step": 200550 }, { "epoch": 9.961259560941691, "grad_norm": 0.2119140625, "learning_rate": 3.103208503029701e-06, "loss": 0.4889, "step": 200560 }, { "epoch": 9.96175623323731, "grad_norm": 0.251953125, "learning_rate": 3.0634747193801533e-06, "loss": 0.4917, "step": 200570 }, { "epoch": 9.96225290553293, "grad_norm": 0.205078125, "learning_rate": 3.023740935730605e-06, "loss": 0.496, "step": 200580 }, { "epoch": 9.962749577828548, "grad_norm": 0.2080078125, "learning_rate": 2.984007152081057e-06, "loss": 0.4951, "step": 200590 }, { "epoch": 9.963246250124168, "grad_norm": 0.20703125, "learning_rate": 2.944273368431509e-06, "loss": 0.5108, "step": 200600 }, { "epoch": 9.963742922419787, "grad_norm": 0.236328125, "learning_rate": 2.904539584781961e-06, "loss": 0.4966, "step": 200610 }, { "epoch": 9.964239594715407, "grad_norm": 0.232421875, "learning_rate": 2.864805801132413e-06, "loss": 0.517, "step": 200620 }, { "epoch": 9.964736267011027, "grad_norm": 0.2109375, "learning_rate": 2.825072017482865e-06, "loss": 0.4991, "step": 200630 }, { "epoch": 9.965232939306645, "grad_norm": 0.2001953125, "learning_rate": 2.785338233833317e-06, "loss": 0.4462, "step": 200640 }, { "epoch": 9.965729611602265, "grad_norm": 0.2099609375, "learning_rate": 2.745604450183769e-06, "loss": 0.5046, "step": 200650 }, { "epoch": 9.966226283897884, "grad_norm": 0.208984375, "learning_rate": 2.7058706665342206e-06, "loss": 0.4744, "step": 200660 }, { "epoch": 9.966722956193504, "grad_norm": 0.2060546875, "learning_rate": 2.666136882884673e-06, "loss": 0.498, "step": 200670 }, { "epoch": 9.967219628489122, "grad_norm": 0.2177734375, "learning_rate": 2.6264030992351248e-06, "loss": 0.5009, "step": 200680 }, { "epoch": 9.967716300784742, "grad_norm": 0.2197265625, "learning_rate": 2.5866693155855767e-06, "loss": 0.4931, "step": 200690 }, { "epoch": 9.968212973080362, "grad_norm": 0.197265625, "learning_rate": 2.546935531936029e-06, "loss": 0.4721, "step": 200700 }, { "epoch": 9.96870964537598, "grad_norm": 0.193359375, "learning_rate": 2.507201748286481e-06, "loss": 0.4798, "step": 200710 }, { "epoch": 9.9692063176716, "grad_norm": 0.2109375, "learning_rate": 2.4674679646369327e-06, "loss": 0.4699, "step": 200720 }, { "epoch": 9.96970298996722, "grad_norm": 0.1982421875, "learning_rate": 2.4277341809873846e-06, "loss": 0.5015, "step": 200730 }, { "epoch": 9.97019966226284, "grad_norm": 0.2001953125, "learning_rate": 2.3880003973378365e-06, "loss": 0.4943, "step": 200740 }, { "epoch": 9.970696334558458, "grad_norm": 0.2236328125, "learning_rate": 2.3482666136882887e-06, "loss": 0.4559, "step": 200750 }, { "epoch": 9.971193006854078, "grad_norm": 0.234375, "learning_rate": 2.3085328300387406e-06, "loss": 0.503, "step": 200760 }, { "epoch": 9.971689679149698, "grad_norm": 0.2021484375, "learning_rate": 2.2687990463891925e-06, "loss": 0.5, "step": 200770 }, { "epoch": 9.972186351445316, "grad_norm": 0.2333984375, "learning_rate": 2.229065262739645e-06, "loss": 0.4934, "step": 200780 }, { "epoch": 9.972683023740936, "grad_norm": 0.2099609375, "learning_rate": 2.1893314790900962e-06, "loss": 0.5046, "step": 200790 }, { "epoch": 9.973179696036555, "grad_norm": 0.263671875, "learning_rate": 2.149597695440548e-06, "loss": 0.5024, "step": 200800 }, { "epoch": 9.973676368332175, "grad_norm": 0.220703125, "learning_rate": 2.1098639117910004e-06, "loss": 0.4862, "step": 200810 }, { "epoch": 9.974173040627793, "grad_norm": 0.1943359375, "learning_rate": 2.0701301281414523e-06, "loss": 0.4961, "step": 200820 }, { "epoch": 9.974669712923413, "grad_norm": 0.205078125, "learning_rate": 2.0303963444919046e-06, "loss": 0.4749, "step": 200830 }, { "epoch": 9.975166385219033, "grad_norm": 0.21484375, "learning_rate": 1.9906625608423565e-06, "loss": 0.4709, "step": 200840 }, { "epoch": 9.975663057514652, "grad_norm": 0.2578125, "learning_rate": 1.9509287771928083e-06, "loss": 0.5292, "step": 200850 }, { "epoch": 9.976159729810272, "grad_norm": 0.2060546875, "learning_rate": 1.91119499354326e-06, "loss": 0.4629, "step": 200860 }, { "epoch": 9.97665640210589, "grad_norm": 0.2109375, "learning_rate": 1.871461209893712e-06, "loss": 0.4717, "step": 200870 }, { "epoch": 9.97715307440151, "grad_norm": 0.23828125, "learning_rate": 1.831727426244164e-06, "loss": 0.4822, "step": 200880 }, { "epoch": 9.977649746697129, "grad_norm": 0.205078125, "learning_rate": 1.7919936425946163e-06, "loss": 0.5102, "step": 200890 }, { "epoch": 9.978146418992749, "grad_norm": 0.2099609375, "learning_rate": 1.7522598589450681e-06, "loss": 0.4861, "step": 200900 }, { "epoch": 9.978643091288369, "grad_norm": 0.2294921875, "learning_rate": 1.7125260752955202e-06, "loss": 0.4787, "step": 200910 }, { "epoch": 9.979139763583987, "grad_norm": 0.21484375, "learning_rate": 1.672792291645972e-06, "loss": 0.4888, "step": 200920 }, { "epoch": 9.979636435879607, "grad_norm": 0.21875, "learning_rate": 1.633058507996424e-06, "loss": 0.4792, "step": 200930 }, { "epoch": 9.980133108175226, "grad_norm": 0.2080078125, "learning_rate": 1.593324724346876e-06, "loss": 0.5013, "step": 200940 }, { "epoch": 9.980629780470846, "grad_norm": 0.181640625, "learning_rate": 1.553590940697328e-06, "loss": 0.4623, "step": 200950 }, { "epoch": 9.981126452766464, "grad_norm": 0.2080078125, "learning_rate": 1.51385715704778e-06, "loss": 0.4763, "step": 200960 }, { "epoch": 9.981623125062084, "grad_norm": 0.2216796875, "learning_rate": 1.4741233733982319e-06, "loss": 0.5116, "step": 200970 }, { "epoch": 9.982119797357704, "grad_norm": 0.2119140625, "learning_rate": 1.434389589748684e-06, "loss": 0.484, "step": 200980 }, { "epoch": 9.982616469653323, "grad_norm": 0.193359375, "learning_rate": 1.3946558060991358e-06, "loss": 0.4718, "step": 200990 }, { "epoch": 9.983113141948943, "grad_norm": 0.2177734375, "learning_rate": 1.3549220224495877e-06, "loss": 0.4857, "step": 201000 }, { "epoch": 9.983609814244561, "grad_norm": 0.19921875, "learning_rate": 1.3151882388000398e-06, "loss": 0.4885, "step": 201010 }, { "epoch": 9.984106486540181, "grad_norm": 0.220703125, "learning_rate": 1.2754544551504919e-06, "loss": 0.4994, "step": 201020 }, { "epoch": 9.9846031588358, "grad_norm": 0.1923828125, "learning_rate": 1.2357206715009438e-06, "loss": 0.5013, "step": 201030 }, { "epoch": 9.98509983113142, "grad_norm": 0.2099609375, "learning_rate": 1.1959868878513956e-06, "loss": 0.4846, "step": 201040 }, { "epoch": 9.98559650342704, "grad_norm": 0.197265625, "learning_rate": 1.1562531042018477e-06, "loss": 0.4439, "step": 201050 }, { "epoch": 9.986093175722658, "grad_norm": 0.22265625, "learning_rate": 1.1165193205522998e-06, "loss": 0.4679, "step": 201060 }, { "epoch": 9.986589848018278, "grad_norm": 0.2099609375, "learning_rate": 1.0767855369027515e-06, "loss": 0.4808, "step": 201070 }, { "epoch": 9.987086520313897, "grad_norm": 0.1982421875, "learning_rate": 1.0370517532532036e-06, "loss": 0.5098, "step": 201080 }, { "epoch": 9.987583192609517, "grad_norm": 0.2041015625, "learning_rate": 9.973179696036556e-07, "loss": 0.4858, "step": 201090 }, { "epoch": 9.988079864905135, "grad_norm": 0.197265625, "learning_rate": 9.575841859541075e-07, "loss": 0.4841, "step": 201100 }, { "epoch": 9.988576537200755, "grad_norm": 0.220703125, "learning_rate": 9.178504023045594e-07, "loss": 0.4603, "step": 201110 }, { "epoch": 9.989073209496375, "grad_norm": 0.2177734375, "learning_rate": 8.781166186550115e-07, "loss": 0.4741, "step": 201120 }, { "epoch": 9.989569881791994, "grad_norm": 0.1953125, "learning_rate": 8.383828350054634e-07, "loss": 0.4818, "step": 201130 }, { "epoch": 9.990066554087614, "grad_norm": 0.216796875, "learning_rate": 7.986490513559154e-07, "loss": 0.4931, "step": 201140 }, { "epoch": 9.990563226383232, "grad_norm": 0.212890625, "learning_rate": 7.589152677063674e-07, "loss": 0.51, "step": 201150 }, { "epoch": 9.991059898678852, "grad_norm": 0.1865234375, "learning_rate": 7.191814840568193e-07, "loss": 0.4486, "step": 201160 }, { "epoch": 9.99155657097447, "grad_norm": 0.2216796875, "learning_rate": 6.794477004072713e-07, "loss": 0.5048, "step": 201170 }, { "epoch": 9.99205324327009, "grad_norm": 0.205078125, "learning_rate": 6.397139167577233e-07, "loss": 0.5031, "step": 201180 }, { "epoch": 9.992549915565709, "grad_norm": 0.1943359375, "learning_rate": 5.999801331081752e-07, "loss": 0.5323, "step": 201190 }, { "epoch": 9.993046587861329, "grad_norm": 0.2177734375, "learning_rate": 5.602463494586273e-07, "loss": 0.4506, "step": 201200 }, { "epoch": 9.99354326015695, "grad_norm": 0.2109375, "learning_rate": 5.205125658090792e-07, "loss": 0.4974, "step": 201210 }, { "epoch": 9.994039932452567, "grad_norm": 0.25, "learning_rate": 4.807787821595312e-07, "loss": 0.5025, "step": 201220 }, { "epoch": 9.994536604748188, "grad_norm": 0.2294921875, "learning_rate": 4.4104499850998314e-07, "loss": 0.5047, "step": 201230 }, { "epoch": 9.995033277043806, "grad_norm": 0.2353515625, "learning_rate": 4.013112148604351e-07, "loss": 0.4562, "step": 201240 }, { "epoch": 9.995529949339426, "grad_norm": 0.2109375, "learning_rate": 3.6157743121088705e-07, "loss": 0.4832, "step": 201250 }, { "epoch": 9.996026621635044, "grad_norm": 0.1923828125, "learning_rate": 3.2184364756133903e-07, "loss": 0.5173, "step": 201260 }, { "epoch": 9.996523293930665, "grad_norm": 0.2197265625, "learning_rate": 2.82109863911791e-07, "loss": 0.4914, "step": 201270 }, { "epoch": 9.997019966226285, "grad_norm": 0.2138671875, "learning_rate": 2.42376080262243e-07, "loss": 0.5044, "step": 201280 }, { "epoch": 9.997516638521903, "grad_norm": 0.1904296875, "learning_rate": 2.0264229661269497e-07, "loss": 0.4748, "step": 201290 }, { "epoch": 9.998013310817523, "grad_norm": 0.2158203125, "learning_rate": 1.6290851296314692e-07, "loss": 0.4938, "step": 201300 }, { "epoch": 9.998509983113141, "grad_norm": 0.2001953125, "learning_rate": 1.231747293135989e-07, "loss": 0.5312, "step": 201310 }, { "epoch": 9.999006655408762, "grad_norm": 0.1875, "learning_rate": 8.344094566405085e-08, "loss": 0.4645, "step": 201320 }, { "epoch": 9.99950332770438, "grad_norm": 0.2421875, "learning_rate": 4.370716201450283e-08, "loss": 0.4832, "step": 201330 }, { "epoch": 10.0, "grad_norm": 0.203125, "learning_rate": 3.9733783649548035e-09, "loss": 0.4821, "step": 201340 }, { "epoch": 10.0, "step": 201340, "total_flos": 1.6346195493598003e+18, "train_loss": 0.5436877673617467, "train_runtime": 108051.857, "train_samples_per_second": 7.453, "train_steps_per_second": 1.863 } ], "logging_steps": 10, "max_steps": 201340, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6346195493598003e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }