{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 20134, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.966722956193504e-05, "grad_norm": 3.921875, "learning_rate": 0.0008, "loss": 3.0588, "step": 1 }, { "epoch": 0.0004966722956193504, "grad_norm": 1.3125, "learning_rate": 0.0007999642395947154, "loss": 2.3153, "step": 10 }, { "epoch": 0.0009933445912387007, "grad_norm": 0.53125, "learning_rate": 0.0007999245058110659, "loss": 1.8684, "step": 20 }, { "epoch": 0.001490016886858051, "grad_norm": 0.53515625, "learning_rate": 0.0007998847720274163, "loss": 1.8397, "step": 30 }, { "epoch": 0.0019866891824774015, "grad_norm": 0.609375, "learning_rate": 0.0007998450382437668, "loss": 1.7934, "step": 40 }, { "epoch": 0.0024833614780967518, "grad_norm": 1.09375, "learning_rate": 0.0007998053044601173, "loss": 1.7761, "step": 50 }, { "epoch": 0.002980033773716102, "grad_norm": 0.419921875, "learning_rate": 0.0007997655706764677, "loss": 1.7596, "step": 60 }, { "epoch": 0.0034767060693354523, "grad_norm": 0.8671875, "learning_rate": 0.0007997258368928181, "loss": 1.7166, "step": 70 }, { "epoch": 0.003973378364954803, "grad_norm": 0.482421875, "learning_rate": 0.0007996861031091686, "loss": 1.6598, "step": 80 }, { "epoch": 0.004470050660574153, "grad_norm": 0.5234375, "learning_rate": 0.0007996463693255191, "loss": 1.6145, "step": 90 }, { "epoch": 0.0049667229561935035, "grad_norm": 0.53125, "learning_rate": 0.0007996066355418695, "loss": 1.5857, "step": 100 }, { "epoch": 0.005463395251812854, "grad_norm": 0.40625, "learning_rate": 0.00079956690175822, "loss": 1.5669, "step": 110 }, { "epoch": 0.005960067547432204, "grad_norm": 0.50390625, "learning_rate": 0.0007995271679745704, "loss": 1.5253, "step": 120 }, { "epoch": 0.006456739843051554, "grad_norm": 0.396484375, "learning_rate": 0.0007994874341909208, "loss": 1.5324, "step": 130 }, { "epoch": 0.006953412138670905, "grad_norm": 0.60546875, "learning_rate": 0.0007994477004072714, "loss": 1.4808, "step": 140 }, { "epoch": 0.007450084434290256, "grad_norm": 0.48046875, "learning_rate": 0.0007994079666236218, "loss": 1.4696, "step": 150 }, { "epoch": 0.007946756729909606, "grad_norm": 0.3984375, "learning_rate": 0.0007993682328399723, "loss": 1.45, "step": 160 }, { "epoch": 0.008443429025528956, "grad_norm": 0.408203125, "learning_rate": 0.0007993284990563226, "loss": 1.4663, "step": 170 }, { "epoch": 0.008940101321148307, "grad_norm": 0.44921875, "learning_rate": 0.0007992887652726731, "loss": 1.4412, "step": 180 }, { "epoch": 0.009436773616767657, "grad_norm": 0.427734375, "learning_rate": 0.0007992490314890237, "loss": 1.3908, "step": 190 }, { "epoch": 0.009933445912387007, "grad_norm": 0.51953125, "learning_rate": 0.000799209297705374, "loss": 1.3938, "step": 200 }, { "epoch": 0.010430118208006357, "grad_norm": 0.43359375, "learning_rate": 0.0007991695639217245, "loss": 1.3474, "step": 210 }, { "epoch": 0.010926790503625708, "grad_norm": 0.36328125, "learning_rate": 0.0007991298301380749, "loss": 1.3319, "step": 220 }, { "epoch": 0.011423462799245058, "grad_norm": 0.35546875, "learning_rate": 0.0007990900963544253, "loss": 1.3428, "step": 230 }, { "epoch": 0.011920135094864408, "grad_norm": 0.431640625, "learning_rate": 0.0007990503625707759, "loss": 1.3505, "step": 240 }, { "epoch": 0.012416807390483758, "grad_norm": 0.53125, "learning_rate": 0.0007990106287871263, "loss": 1.3445, "step": 250 }, { "epoch": 0.012913479686103109, "grad_norm": 0.345703125, "learning_rate": 0.0007989708950034767, "loss": 1.3059, "step": 260 }, { "epoch": 0.013410151981722459, "grad_norm": 0.318359375, "learning_rate": 0.0007989311612198272, "loss": 1.3184, "step": 270 }, { "epoch": 0.01390682427734181, "grad_norm": 0.359375, "learning_rate": 0.0007988914274361776, "loss": 1.2753, "step": 280 }, { "epoch": 0.01440349657296116, "grad_norm": 0.4140625, "learning_rate": 0.0007988516936525282, "loss": 1.2823, "step": 290 }, { "epoch": 0.014900168868580511, "grad_norm": 0.451171875, "learning_rate": 0.0007988119598688786, "loss": 1.3007, "step": 300 }, { "epoch": 0.015396841164199862, "grad_norm": 0.4296875, "learning_rate": 0.000798772226085229, "loss": 1.2959, "step": 310 }, { "epoch": 0.015893513459819212, "grad_norm": 0.3828125, "learning_rate": 0.0007987324923015795, "loss": 1.2481, "step": 320 }, { "epoch": 0.01639018575543856, "grad_norm": 0.53515625, "learning_rate": 0.0007986927585179299, "loss": 1.2824, "step": 330 }, { "epoch": 0.016886858051057912, "grad_norm": 0.412109375, "learning_rate": 0.0007986530247342804, "loss": 1.242, "step": 340 }, { "epoch": 0.01738353034667726, "grad_norm": 0.42578125, "learning_rate": 0.0007986132909506309, "loss": 1.2588, "step": 350 }, { "epoch": 0.017880202642296613, "grad_norm": 0.35546875, "learning_rate": 0.0007985735571669812, "loss": 1.2736, "step": 360 }, { "epoch": 0.01837687493791596, "grad_norm": 0.4765625, "learning_rate": 0.0007985338233833317, "loss": 1.2348, "step": 370 }, { "epoch": 0.018873547233535314, "grad_norm": 0.283203125, "learning_rate": 0.0007984940895996822, "loss": 1.2293, "step": 380 }, { "epoch": 0.019370219529154662, "grad_norm": 0.361328125, "learning_rate": 0.0007984543558160325, "loss": 1.1832, "step": 390 }, { "epoch": 0.019866891824774014, "grad_norm": 0.326171875, "learning_rate": 0.0007984146220323831, "loss": 1.2007, "step": 400 }, { "epoch": 0.020363564120393366, "grad_norm": 0.314453125, "learning_rate": 0.0007983748882487336, "loss": 1.2085, "step": 410 }, { "epoch": 0.020860236416012715, "grad_norm": 0.333984375, "learning_rate": 0.0007983351544650839, "loss": 1.2127, "step": 420 }, { "epoch": 0.021356908711632067, "grad_norm": 0.388671875, "learning_rate": 0.0007982954206814344, "loss": 1.1983, "step": 430 }, { "epoch": 0.021853581007251415, "grad_norm": 0.408203125, "learning_rate": 0.0007982556868977848, "loss": 1.2332, "step": 440 }, { "epoch": 0.022350253302870767, "grad_norm": 0.349609375, "learning_rate": 0.0007982159531141354, "loss": 1.1835, "step": 450 }, { "epoch": 0.022846925598490116, "grad_norm": 0.345703125, "learning_rate": 0.0007981762193304858, "loss": 1.21, "step": 460 }, { "epoch": 0.023343597894109468, "grad_norm": 0.3515625, "learning_rate": 0.0007981364855468362, "loss": 1.1797, "step": 470 }, { "epoch": 0.023840270189728816, "grad_norm": 0.34765625, "learning_rate": 0.0007980967517631867, "loss": 1.1599, "step": 480 }, { "epoch": 0.024336942485348168, "grad_norm": 0.318359375, "learning_rate": 0.0007980570179795371, "loss": 1.1898, "step": 490 }, { "epoch": 0.024833614780967517, "grad_norm": 0.291015625, "learning_rate": 0.0007980172841958876, "loss": 1.1622, "step": 500 }, { "epoch": 0.02533028707658687, "grad_norm": 0.373046875, "learning_rate": 0.0007979775504122381, "loss": 1.182, "step": 510 }, { "epoch": 0.025826959372206217, "grad_norm": 0.3203125, "learning_rate": 0.0007979378166285885, "loss": 1.1502, "step": 520 }, { "epoch": 0.02632363166782557, "grad_norm": 0.365234375, "learning_rate": 0.0007978980828449389, "loss": 1.1264, "step": 530 }, { "epoch": 0.026820303963444918, "grad_norm": 0.37890625, "learning_rate": 0.0007978583490612895, "loss": 1.1711, "step": 540 }, { "epoch": 0.02731697625906427, "grad_norm": 0.333984375, "learning_rate": 0.0007978186152776398, "loss": 1.1233, "step": 550 }, { "epoch": 0.02781364855468362, "grad_norm": 0.396484375, "learning_rate": 0.0007977788814939903, "loss": 1.1435, "step": 560 }, { "epoch": 0.02831032085030297, "grad_norm": 0.330078125, "learning_rate": 0.0007977391477103408, "loss": 1.1424, "step": 570 }, { "epoch": 0.02880699314592232, "grad_norm": 0.353515625, "learning_rate": 0.0007976994139266911, "loss": 1.1642, "step": 580 }, { "epoch": 0.02930366544154167, "grad_norm": 0.337890625, "learning_rate": 0.0007976596801430416, "loss": 1.11, "step": 590 }, { "epoch": 0.029800337737161023, "grad_norm": 0.33984375, "learning_rate": 0.0007976199463593922, "loss": 1.152, "step": 600 }, { "epoch": 0.03029701003278037, "grad_norm": 0.3203125, "learning_rate": 0.0007975802125757426, "loss": 1.1054, "step": 610 }, { "epoch": 0.030793682328399723, "grad_norm": 0.369140625, "learning_rate": 0.000797540478792093, "loss": 1.1078, "step": 620 }, { "epoch": 0.03129035462401907, "grad_norm": 0.333984375, "learning_rate": 0.0007975007450084434, "loss": 1.1475, "step": 630 }, { "epoch": 0.031787026919638424, "grad_norm": 0.296875, "learning_rate": 0.000797461011224794, "loss": 1.1248, "step": 640 }, { "epoch": 0.032283699215257776, "grad_norm": 0.349609375, "learning_rate": 0.0007974212774411444, "loss": 1.1052, "step": 650 }, { "epoch": 0.03278037151087712, "grad_norm": 0.3515625, "learning_rate": 0.0007973815436574948, "loss": 1.1349, "step": 660 }, { "epoch": 0.03327704380649647, "grad_norm": 0.337890625, "learning_rate": 0.0007973418098738453, "loss": 1.1056, "step": 670 }, { "epoch": 0.033773716102115825, "grad_norm": 0.396484375, "learning_rate": 0.0007973020760901957, "loss": 1.1263, "step": 680 }, { "epoch": 0.03427038839773518, "grad_norm": 0.30078125, "learning_rate": 0.0007972623423065461, "loss": 1.1334, "step": 690 }, { "epoch": 0.03476706069335452, "grad_norm": 0.3203125, "learning_rate": 0.0007972226085228967, "loss": 1.1171, "step": 700 }, { "epoch": 0.035263732988973874, "grad_norm": 0.33984375, "learning_rate": 0.0007971828747392471, "loss": 1.1007, "step": 710 }, { "epoch": 0.035760405284593226, "grad_norm": 0.326171875, "learning_rate": 0.0007971431409555975, "loss": 1.1092, "step": 720 }, { "epoch": 0.03625707758021258, "grad_norm": 0.35546875, "learning_rate": 0.000797103407171948, "loss": 1.1299, "step": 730 }, { "epoch": 0.03675374987583192, "grad_norm": 0.314453125, "learning_rate": 0.0007970636733882984, "loss": 1.1215, "step": 740 }, { "epoch": 0.037250422171451275, "grad_norm": 0.294921875, "learning_rate": 0.0007970239396046489, "loss": 1.0959, "step": 750 }, { "epoch": 0.03774709446707063, "grad_norm": 0.34765625, "learning_rate": 0.0007969842058209994, "loss": 1.0947, "step": 760 }, { "epoch": 0.03824376676268998, "grad_norm": 0.3984375, "learning_rate": 0.0007969444720373498, "loss": 1.0743, "step": 770 }, { "epoch": 0.038740439058309324, "grad_norm": 0.3125, "learning_rate": 0.0007969047382537002, "loss": 1.0936, "step": 780 }, { "epoch": 0.039237111353928676, "grad_norm": 0.306640625, "learning_rate": 0.0007968650044700507, "loss": 1.0967, "step": 790 }, { "epoch": 0.03973378364954803, "grad_norm": 0.314453125, "learning_rate": 0.0007968252706864012, "loss": 1.0531, "step": 800 }, { "epoch": 0.04023045594516738, "grad_norm": 0.2734375, "learning_rate": 0.0007967855369027516, "loss": 1.0668, "step": 810 }, { "epoch": 0.04072712824078673, "grad_norm": 0.294921875, "learning_rate": 0.000796745803119102, "loss": 1.0908, "step": 820 }, { "epoch": 0.04122380053640608, "grad_norm": 0.265625, "learning_rate": 0.0007967060693354525, "loss": 1.0599, "step": 830 }, { "epoch": 0.04172047283202543, "grad_norm": 0.29296875, "learning_rate": 0.0007966663355518029, "loss": 1.0601, "step": 840 }, { "epoch": 0.04221714512764478, "grad_norm": 0.28515625, "learning_rate": 0.0007966266017681534, "loss": 1.0303, "step": 850 }, { "epoch": 0.04271381742326413, "grad_norm": 0.271484375, "learning_rate": 0.0007965868679845039, "loss": 1.0721, "step": 860 }, { "epoch": 0.04321048971888348, "grad_norm": 0.322265625, "learning_rate": 0.0007965471342008543, "loss": 1.0544, "step": 870 }, { "epoch": 0.04370716201450283, "grad_norm": 0.32421875, "learning_rate": 0.0007965074004172047, "loss": 1.0499, "step": 880 }, { "epoch": 0.04420383431012218, "grad_norm": 0.326171875, "learning_rate": 0.0007964676666335552, "loss": 1.0578, "step": 890 }, { "epoch": 0.044700506605741534, "grad_norm": 0.3046875, "learning_rate": 0.0007964279328499057, "loss": 1.103, "step": 900 }, { "epoch": 0.04519717890136088, "grad_norm": 0.2734375, "learning_rate": 0.0007963881990662561, "loss": 1.0236, "step": 910 }, { "epoch": 0.04569385119698023, "grad_norm": 0.31640625, "learning_rate": 0.0007963484652826066, "loss": 1.0282, "step": 920 }, { "epoch": 0.04619052349259958, "grad_norm": 0.33984375, "learning_rate": 0.000796308731498957, "loss": 1.0686, "step": 930 }, { "epoch": 0.046687195788218935, "grad_norm": 0.291015625, "learning_rate": 0.0007962689977153074, "loss": 1.0468, "step": 940 }, { "epoch": 0.04718386808383828, "grad_norm": 0.31640625, "learning_rate": 0.000796229263931658, "loss": 1.0379, "step": 950 }, { "epoch": 0.04768054037945763, "grad_norm": 0.349609375, "learning_rate": 0.0007961895301480084, "loss": 1.0553, "step": 960 }, { "epoch": 0.048177212675076984, "grad_norm": 0.2890625, "learning_rate": 0.0007961497963643588, "loss": 1.0768, "step": 970 }, { "epoch": 0.048673884970696336, "grad_norm": 0.34765625, "learning_rate": 0.0007961100625807093, "loss": 1.0261, "step": 980 }, { "epoch": 0.04917055726631568, "grad_norm": 0.255859375, "learning_rate": 0.0007960703287970597, "loss": 1.0311, "step": 990 }, { "epoch": 0.04966722956193503, "grad_norm": 0.326171875, "learning_rate": 0.0007960305950134102, "loss": 1.0556, "step": 1000 }, { "epoch": 0.050163901857554385, "grad_norm": 0.28125, "learning_rate": 0.0007959908612297607, "loss": 1.034, "step": 1010 }, { "epoch": 0.05066057415317374, "grad_norm": 0.330078125, "learning_rate": 0.0007959511274461111, "loss": 1.0009, "step": 1020 }, { "epoch": 0.05115724644879309, "grad_norm": 0.310546875, "learning_rate": 0.0007959113936624615, "loss": 1.0109, "step": 1030 }, { "epoch": 0.051653918744412435, "grad_norm": 0.27734375, "learning_rate": 0.0007958716598788119, "loss": 1.0085, "step": 1040 }, { "epoch": 0.052150591040031787, "grad_norm": 0.27734375, "learning_rate": 0.0007958319260951625, "loss": 1.0898, "step": 1050 }, { "epoch": 0.05264726333565114, "grad_norm": 0.34765625, "learning_rate": 0.000795792192311513, "loss": 1.0379, "step": 1060 }, { "epoch": 0.05314393563127049, "grad_norm": 0.291015625, "learning_rate": 0.0007957524585278633, "loss": 0.9888, "step": 1070 }, { "epoch": 0.053640607926889836, "grad_norm": 0.275390625, "learning_rate": 0.0007957127247442138, "loss": 0.9989, "step": 1080 }, { "epoch": 0.05413728022250919, "grad_norm": 0.333984375, "learning_rate": 0.0007956729909605642, "loss": 1.046, "step": 1090 }, { "epoch": 0.05463395251812854, "grad_norm": 0.306640625, "learning_rate": 0.0007956332571769147, "loss": 0.9776, "step": 1100 }, { "epoch": 0.05513062481374789, "grad_norm": 0.3125, "learning_rate": 0.0007955935233932652, "loss": 1.0245, "step": 1110 }, { "epoch": 0.05562729710936724, "grad_norm": 0.302734375, "learning_rate": 0.0007955537896096156, "loss": 1.0557, "step": 1120 }, { "epoch": 0.05612396940498659, "grad_norm": 0.2734375, "learning_rate": 0.000795514055825966, "loss": 1.0043, "step": 1130 }, { "epoch": 0.05662064170060594, "grad_norm": 0.279296875, "learning_rate": 0.0007954743220423165, "loss": 0.9955, "step": 1140 }, { "epoch": 0.05711731399622529, "grad_norm": 0.328125, "learning_rate": 0.000795434588258667, "loss": 1.0221, "step": 1150 }, { "epoch": 0.05761398629184464, "grad_norm": 0.279296875, "learning_rate": 0.0007953948544750174, "loss": 0.9994, "step": 1160 }, { "epoch": 0.05811065858746399, "grad_norm": 0.275390625, "learning_rate": 0.0007953551206913679, "loss": 1.031, "step": 1170 }, { "epoch": 0.05860733088308334, "grad_norm": 0.279296875, "learning_rate": 0.0007953153869077183, "loss": 0.9968, "step": 1180 }, { "epoch": 0.059104003178702694, "grad_norm": 0.2734375, "learning_rate": 0.0007952756531240688, "loss": 1.0061, "step": 1190 }, { "epoch": 0.059600675474322046, "grad_norm": 0.298828125, "learning_rate": 0.0007952359193404193, "loss": 1.0162, "step": 1200 }, { "epoch": 0.06009734776994139, "grad_norm": 0.29296875, "learning_rate": 0.0007951961855567697, "loss": 0.9925, "step": 1210 }, { "epoch": 0.06059402006556074, "grad_norm": 0.271484375, "learning_rate": 0.0007951564517731202, "loss": 1.0197, "step": 1220 }, { "epoch": 0.061090692361180095, "grad_norm": 0.265625, "learning_rate": 0.0007951167179894705, "loss": 1.0029, "step": 1230 }, { "epoch": 0.06158736465679945, "grad_norm": 0.27734375, "learning_rate": 0.000795076984205821, "loss": 0.9652, "step": 1240 }, { "epoch": 0.06208403695241879, "grad_norm": 0.361328125, "learning_rate": 0.0007950372504221716, "loss": 1.0034, "step": 1250 }, { "epoch": 0.06258070924803814, "grad_norm": 0.2734375, "learning_rate": 0.0007949975166385219, "loss": 1.0125, "step": 1260 }, { "epoch": 0.0630773815436575, "grad_norm": 0.3203125, "learning_rate": 0.0007949577828548724, "loss": 0.999, "step": 1270 }, { "epoch": 0.06357405383927685, "grad_norm": 0.30078125, "learning_rate": 0.0007949180490712229, "loss": 0.9831, "step": 1280 }, { "epoch": 0.0640707261348962, "grad_norm": 0.30859375, "learning_rate": 0.0007948783152875732, "loss": 0.9889, "step": 1290 }, { "epoch": 0.06456739843051555, "grad_norm": 0.259765625, "learning_rate": 0.0007948385815039238, "loss": 0.9954, "step": 1300 }, { "epoch": 0.06506407072613489, "grad_norm": 0.40234375, "learning_rate": 0.0007947988477202742, "loss": 1.0029, "step": 1310 }, { "epoch": 0.06556074302175424, "grad_norm": 0.279296875, "learning_rate": 0.0007947591139366246, "loss": 0.9666, "step": 1320 }, { "epoch": 0.0660574153173736, "grad_norm": 0.31640625, "learning_rate": 0.0007947193801529751, "loss": 0.9921, "step": 1330 }, { "epoch": 0.06655408761299295, "grad_norm": 0.302734375, "learning_rate": 0.0007946796463693255, "loss": 0.9799, "step": 1340 }, { "epoch": 0.0670507599086123, "grad_norm": 0.279296875, "learning_rate": 0.0007946399125856761, "loss": 1.0026, "step": 1350 }, { "epoch": 0.06754743220423165, "grad_norm": 0.26171875, "learning_rate": 0.0007946001788020265, "loss": 0.9782, "step": 1360 }, { "epoch": 0.068044104499851, "grad_norm": 0.283203125, "learning_rate": 0.0007945604450183769, "loss": 1.0086, "step": 1370 }, { "epoch": 0.06854077679547035, "grad_norm": 0.271484375, "learning_rate": 0.0007945207112347274, "loss": 0.9763, "step": 1380 }, { "epoch": 0.0690374490910897, "grad_norm": 0.30078125, "learning_rate": 0.0007944809774510778, "loss": 1.0011, "step": 1390 }, { "epoch": 0.06953412138670904, "grad_norm": 0.267578125, "learning_rate": 0.0007944412436674283, "loss": 0.9751, "step": 1400 }, { "epoch": 0.0700307936823284, "grad_norm": 0.29296875, "learning_rate": 0.0007944015098837788, "loss": 1.0073, "step": 1410 }, { "epoch": 0.07052746597794775, "grad_norm": 0.287109375, "learning_rate": 0.0007943617761001291, "loss": 0.9551, "step": 1420 }, { "epoch": 0.0710241382735671, "grad_norm": 0.271484375, "learning_rate": 0.0007943220423164796, "loss": 0.9414, "step": 1430 }, { "epoch": 0.07152081056918645, "grad_norm": 0.3125, "learning_rate": 0.0007942823085328301, "loss": 0.9908, "step": 1440 }, { "epoch": 0.0720174828648058, "grad_norm": 0.294921875, "learning_rate": 0.0007942425747491804, "loss": 0.9451, "step": 1450 }, { "epoch": 0.07251415516042516, "grad_norm": 0.234375, "learning_rate": 0.000794202840965531, "loss": 0.9761, "step": 1460 }, { "epoch": 0.07301082745604451, "grad_norm": 0.322265625, "learning_rate": 0.0007941631071818815, "loss": 0.9389, "step": 1470 }, { "epoch": 0.07350749975166385, "grad_norm": 0.267578125, "learning_rate": 0.0007941233733982319, "loss": 0.9704, "step": 1480 }, { "epoch": 0.0740041720472832, "grad_norm": 0.283203125, "learning_rate": 0.0007940836396145823, "loss": 0.9748, "step": 1490 }, { "epoch": 0.07450084434290255, "grad_norm": 0.3046875, "learning_rate": 0.0007940439058309328, "loss": 0.982, "step": 1500 }, { "epoch": 0.0749975166385219, "grad_norm": 0.26953125, "learning_rate": 0.0007940041720472833, "loss": 0.9661, "step": 1510 }, { "epoch": 0.07549418893414125, "grad_norm": 0.2578125, "learning_rate": 0.0007939644382636337, "loss": 0.9424, "step": 1520 }, { "epoch": 0.0759908612297606, "grad_norm": 0.29296875, "learning_rate": 0.0007939247044799841, "loss": 0.9927, "step": 1530 }, { "epoch": 0.07648753352537996, "grad_norm": 0.2451171875, "learning_rate": 0.0007938849706963346, "loss": 0.9794, "step": 1540 }, { "epoch": 0.07698420582099931, "grad_norm": 0.333984375, "learning_rate": 0.000793845236912685, "loss": 0.9505, "step": 1550 }, { "epoch": 0.07748087811661865, "grad_norm": 0.296875, "learning_rate": 0.0007938055031290355, "loss": 0.9565, "step": 1560 }, { "epoch": 0.077977550412238, "grad_norm": 0.3046875, "learning_rate": 0.000793765769345386, "loss": 0.9998, "step": 1570 }, { "epoch": 0.07847422270785735, "grad_norm": 0.287109375, "learning_rate": 0.0007937260355617364, "loss": 0.9816, "step": 1580 }, { "epoch": 0.0789708950034767, "grad_norm": 0.30078125, "learning_rate": 0.0007936863017780868, "loss": 0.9523, "step": 1590 }, { "epoch": 0.07946756729909606, "grad_norm": 0.255859375, "learning_rate": 0.0007936465679944374, "loss": 0.9512, "step": 1600 }, { "epoch": 0.07996423959471541, "grad_norm": 0.26171875, "learning_rate": 0.0007936068342107878, "loss": 0.9186, "step": 1610 }, { "epoch": 0.08046091189033476, "grad_norm": 0.296875, "learning_rate": 0.0007935671004271382, "loss": 0.9518, "step": 1620 }, { "epoch": 0.08095758418595411, "grad_norm": 0.33203125, "learning_rate": 0.0007935273666434887, "loss": 0.9269, "step": 1630 }, { "epoch": 0.08145425648157346, "grad_norm": 0.25390625, "learning_rate": 0.0007934876328598391, "loss": 0.9667, "step": 1640 }, { "epoch": 0.0819509287771928, "grad_norm": 0.2578125, "learning_rate": 0.0007934478990761896, "loss": 0.947, "step": 1650 }, { "epoch": 0.08244760107281215, "grad_norm": 0.30859375, "learning_rate": 0.0007934081652925401, "loss": 0.9563, "step": 1660 }, { "epoch": 0.0829442733684315, "grad_norm": 0.28515625, "learning_rate": 0.0007933684315088905, "loss": 0.9397, "step": 1670 }, { "epoch": 0.08344094566405086, "grad_norm": 0.275390625, "learning_rate": 0.0007933286977252409, "loss": 0.965, "step": 1680 }, { "epoch": 0.08393761795967021, "grad_norm": 0.25, "learning_rate": 0.0007932889639415913, "loss": 0.9461, "step": 1690 }, { "epoch": 0.08443429025528956, "grad_norm": 0.26171875, "learning_rate": 0.0007932492301579419, "loss": 0.9537, "step": 1700 }, { "epoch": 0.08493096255090891, "grad_norm": 0.2392578125, "learning_rate": 0.0007932094963742923, "loss": 0.9146, "step": 1710 }, { "epoch": 0.08542763484652827, "grad_norm": 0.259765625, "learning_rate": 0.0007931697625906427, "loss": 0.9558, "step": 1720 }, { "epoch": 0.0859243071421476, "grad_norm": 0.263671875, "learning_rate": 0.0007931300288069932, "loss": 0.9614, "step": 1730 }, { "epoch": 0.08642097943776696, "grad_norm": 0.2412109375, "learning_rate": 0.0007930902950233436, "loss": 0.9227, "step": 1740 }, { "epoch": 0.08691765173338631, "grad_norm": 0.279296875, "learning_rate": 0.000793050561239694, "loss": 0.9533, "step": 1750 }, { "epoch": 0.08741432402900566, "grad_norm": 0.2373046875, "learning_rate": 0.0007930108274560446, "loss": 0.9488, "step": 1760 }, { "epoch": 0.08791099632462501, "grad_norm": 0.25390625, "learning_rate": 0.000792971093672395, "loss": 0.9396, "step": 1770 }, { "epoch": 0.08840766862024436, "grad_norm": 0.294921875, "learning_rate": 0.0007929313598887454, "loss": 0.9199, "step": 1780 }, { "epoch": 0.08890434091586372, "grad_norm": 0.267578125, "learning_rate": 0.0007928916261050959, "loss": 0.9593, "step": 1790 }, { "epoch": 0.08940101321148307, "grad_norm": 0.2490234375, "learning_rate": 0.0007928518923214463, "loss": 0.9639, "step": 1800 }, { "epoch": 0.08989768550710242, "grad_norm": 0.2578125, "learning_rate": 0.0007928121585377968, "loss": 0.9267, "step": 1810 }, { "epoch": 0.09039435780272176, "grad_norm": 0.287109375, "learning_rate": 0.0007927724247541473, "loss": 0.9629, "step": 1820 }, { "epoch": 0.09089103009834111, "grad_norm": 0.248046875, "learning_rate": 0.0007927326909704977, "loss": 0.9051, "step": 1830 }, { "epoch": 0.09138770239396046, "grad_norm": 0.25390625, "learning_rate": 0.0007926929571868481, "loss": 0.9445, "step": 1840 }, { "epoch": 0.09188437468957981, "grad_norm": 0.2490234375, "learning_rate": 0.0007926532234031987, "loss": 0.9441, "step": 1850 }, { "epoch": 0.09238104698519917, "grad_norm": 0.28515625, "learning_rate": 0.0007926134896195491, "loss": 0.9459, "step": 1860 }, { "epoch": 0.09287771928081852, "grad_norm": 0.263671875, "learning_rate": 0.0007925737558358995, "loss": 0.944, "step": 1870 }, { "epoch": 0.09337439157643787, "grad_norm": 0.2431640625, "learning_rate": 0.00079253402205225, "loss": 0.9664, "step": 1880 }, { "epoch": 0.09387106387205722, "grad_norm": 0.25390625, "learning_rate": 0.0007924942882686004, "loss": 0.9384, "step": 1890 }, { "epoch": 0.09436773616767656, "grad_norm": 0.25390625, "learning_rate": 0.0007924545544849508, "loss": 0.936, "step": 1900 }, { "epoch": 0.09486440846329591, "grad_norm": 0.259765625, "learning_rate": 0.0007924148207013013, "loss": 0.9484, "step": 1910 }, { "epoch": 0.09536108075891526, "grad_norm": 0.28125, "learning_rate": 0.0007923750869176518, "loss": 0.8862, "step": 1920 }, { "epoch": 0.09585775305453462, "grad_norm": 0.2412109375, "learning_rate": 0.0007923353531340023, "loss": 0.933, "step": 1930 }, { "epoch": 0.09635442535015397, "grad_norm": 0.220703125, "learning_rate": 0.0007922956193503526, "loss": 0.9179, "step": 1940 }, { "epoch": 0.09685109764577332, "grad_norm": 0.302734375, "learning_rate": 0.0007922558855667031, "loss": 0.9129, "step": 1950 }, { "epoch": 0.09734776994139267, "grad_norm": 0.259765625, "learning_rate": 0.0007922161517830536, "loss": 0.9578, "step": 1960 }, { "epoch": 0.09784444223701202, "grad_norm": 0.314453125, "learning_rate": 0.000792176417999404, "loss": 0.8939, "step": 1970 }, { "epoch": 0.09834111453263136, "grad_norm": 0.28515625, "learning_rate": 0.0007921366842157545, "loss": 0.9061, "step": 1980 }, { "epoch": 0.09883778682825071, "grad_norm": 0.271484375, "learning_rate": 0.0007920969504321049, "loss": 0.9174, "step": 1990 }, { "epoch": 0.09933445912387007, "grad_norm": 0.2373046875, "learning_rate": 0.0007920572166484553, "loss": 0.9491, "step": 2000 }, { "epoch": 0.09983113141948942, "grad_norm": 0.265625, "learning_rate": 0.0007920174828648059, "loss": 0.9257, "step": 2010 }, { "epoch": 0.10032780371510877, "grad_norm": 0.2578125, "learning_rate": 0.0007919777490811563, "loss": 0.8943, "step": 2020 }, { "epoch": 0.10082447601072812, "grad_norm": 0.25390625, "learning_rate": 0.0007919380152975067, "loss": 0.9324, "step": 2030 }, { "epoch": 0.10132114830634747, "grad_norm": 0.2373046875, "learning_rate": 0.0007918982815138572, "loss": 0.9209, "step": 2040 }, { "epoch": 0.10181782060196683, "grad_norm": 0.302734375, "learning_rate": 0.0007918585477302076, "loss": 0.9143, "step": 2050 }, { "epoch": 0.10231449289758618, "grad_norm": 0.28125, "learning_rate": 0.0007918188139465581, "loss": 0.8881, "step": 2060 }, { "epoch": 0.10281116519320552, "grad_norm": 0.255859375, "learning_rate": 0.0007917790801629086, "loss": 0.9361, "step": 2070 }, { "epoch": 0.10330783748882487, "grad_norm": 0.23828125, "learning_rate": 0.000791739346379259, "loss": 0.8759, "step": 2080 }, { "epoch": 0.10380450978444422, "grad_norm": 0.2421875, "learning_rate": 0.0007916996125956095, "loss": 0.9115, "step": 2090 }, { "epoch": 0.10430118208006357, "grad_norm": 0.25, "learning_rate": 0.0007916598788119598, "loss": 0.9272, "step": 2100 }, { "epoch": 0.10479785437568293, "grad_norm": 0.294921875, "learning_rate": 0.0007916201450283104, "loss": 0.9501, "step": 2110 }, { "epoch": 0.10529452667130228, "grad_norm": 0.2490234375, "learning_rate": 0.0007915804112446609, "loss": 0.8966, "step": 2120 }, { "epoch": 0.10579119896692163, "grad_norm": 0.2490234375, "learning_rate": 0.0007915406774610112, "loss": 0.9069, "step": 2130 }, { "epoch": 0.10628787126254098, "grad_norm": 0.267578125, "learning_rate": 0.0007915009436773617, "loss": 0.9063, "step": 2140 }, { "epoch": 0.10678454355816032, "grad_norm": 0.26171875, "learning_rate": 0.0007914612098937123, "loss": 0.9479, "step": 2150 }, { "epoch": 0.10728121585377967, "grad_norm": 0.23828125, "learning_rate": 0.0007914214761100626, "loss": 0.8811, "step": 2160 }, { "epoch": 0.10777788814939902, "grad_norm": 0.275390625, "learning_rate": 0.0007913817423264131, "loss": 0.9181, "step": 2170 }, { "epoch": 0.10827456044501838, "grad_norm": 0.275390625, "learning_rate": 0.0007913420085427635, "loss": 0.8811, "step": 2180 }, { "epoch": 0.10877123274063773, "grad_norm": 0.248046875, "learning_rate": 0.0007913022747591139, "loss": 0.8769, "step": 2190 }, { "epoch": 0.10926790503625708, "grad_norm": 0.275390625, "learning_rate": 0.0007912625409754644, "loss": 0.9298, "step": 2200 }, { "epoch": 0.10976457733187643, "grad_norm": 0.25390625, "learning_rate": 0.0007912228071918149, "loss": 0.8929, "step": 2210 }, { "epoch": 0.11026124962749578, "grad_norm": 0.296875, "learning_rate": 0.0007911830734081653, "loss": 0.9101, "step": 2220 }, { "epoch": 0.11075792192311514, "grad_norm": 0.234375, "learning_rate": 0.0007911433396245158, "loss": 0.9398, "step": 2230 }, { "epoch": 0.11125459421873447, "grad_norm": 0.2333984375, "learning_rate": 0.0007911036058408662, "loss": 0.9077, "step": 2240 }, { "epoch": 0.11175126651435383, "grad_norm": 0.294921875, "learning_rate": 0.0007910638720572167, "loss": 0.9285, "step": 2250 }, { "epoch": 0.11224793880997318, "grad_norm": 0.26953125, "learning_rate": 0.0007910241382735672, "loss": 0.8824, "step": 2260 }, { "epoch": 0.11274461110559253, "grad_norm": 0.2734375, "learning_rate": 0.0007909844044899176, "loss": 0.8869, "step": 2270 }, { "epoch": 0.11324128340121188, "grad_norm": 0.302734375, "learning_rate": 0.0007909446707062681, "loss": 0.9341, "step": 2280 }, { "epoch": 0.11373795569683123, "grad_norm": 0.240234375, "learning_rate": 0.0007909049369226184, "loss": 0.9054, "step": 2290 }, { "epoch": 0.11423462799245059, "grad_norm": 0.2578125, "learning_rate": 0.0007908652031389689, "loss": 0.91, "step": 2300 }, { "epoch": 0.11473130028806994, "grad_norm": 0.2353515625, "learning_rate": 0.0007908254693553195, "loss": 0.9317, "step": 2310 }, { "epoch": 0.11522797258368928, "grad_norm": 0.267578125, "learning_rate": 0.0007907857355716698, "loss": 0.9031, "step": 2320 }, { "epoch": 0.11572464487930863, "grad_norm": 0.2734375, "learning_rate": 0.0007907460017880203, "loss": 0.9089, "step": 2330 }, { "epoch": 0.11622131717492798, "grad_norm": 0.2578125, "learning_rate": 0.0007907062680043708, "loss": 0.9035, "step": 2340 }, { "epoch": 0.11671798947054733, "grad_norm": 0.29296875, "learning_rate": 0.0007906665342207211, "loss": 0.8807, "step": 2350 }, { "epoch": 0.11721466176616668, "grad_norm": 0.24609375, "learning_rate": 0.0007906268004370717, "loss": 0.8563, "step": 2360 }, { "epoch": 0.11771133406178604, "grad_norm": 0.2353515625, "learning_rate": 0.0007905870666534221, "loss": 0.8702, "step": 2370 }, { "epoch": 0.11820800635740539, "grad_norm": 0.2109375, "learning_rate": 0.0007905473328697726, "loss": 0.9268, "step": 2380 }, { "epoch": 0.11870467865302474, "grad_norm": 0.240234375, "learning_rate": 0.000790507599086123, "loss": 0.902, "step": 2390 }, { "epoch": 0.11920135094864409, "grad_norm": 0.26171875, "learning_rate": 0.0007904678653024734, "loss": 0.9036, "step": 2400 }, { "epoch": 0.11969802324426343, "grad_norm": 0.263671875, "learning_rate": 0.000790428131518824, "loss": 0.9041, "step": 2410 }, { "epoch": 0.12019469553988278, "grad_norm": 0.25390625, "learning_rate": 0.0007903883977351744, "loss": 0.8601, "step": 2420 }, { "epoch": 0.12069136783550213, "grad_norm": 0.27734375, "learning_rate": 0.0007903486639515248, "loss": 0.8791, "step": 2430 }, { "epoch": 0.12118804013112149, "grad_norm": 0.263671875, "learning_rate": 0.0007903089301678753, "loss": 0.9051, "step": 2440 }, { "epoch": 0.12168471242674084, "grad_norm": 0.255859375, "learning_rate": 0.0007902691963842257, "loss": 0.8775, "step": 2450 }, { "epoch": 0.12218138472236019, "grad_norm": 0.2255859375, "learning_rate": 0.0007902294626005762, "loss": 0.8495, "step": 2460 }, { "epoch": 0.12267805701797954, "grad_norm": 0.25, "learning_rate": 0.0007901897288169267, "loss": 0.9162, "step": 2470 }, { "epoch": 0.1231747293135989, "grad_norm": 0.259765625, "learning_rate": 0.000790149995033277, "loss": 0.8772, "step": 2480 }, { "epoch": 0.12367140160921823, "grad_norm": 0.2490234375, "learning_rate": 0.0007901102612496275, "loss": 0.8825, "step": 2490 }, { "epoch": 0.12416807390483758, "grad_norm": 0.251953125, "learning_rate": 0.000790070527465978, "loss": 0.9092, "step": 2500 }, { "epoch": 0.12466474620045694, "grad_norm": 0.263671875, "learning_rate": 0.0007900307936823284, "loss": 0.8917, "step": 2510 }, { "epoch": 0.1251614184960763, "grad_norm": 0.3125, "learning_rate": 0.0007899910598986789, "loss": 0.8767, "step": 2520 }, { "epoch": 0.12565809079169563, "grad_norm": 0.2353515625, "learning_rate": 0.0007899513261150294, "loss": 0.8803, "step": 2530 }, { "epoch": 0.126154763087315, "grad_norm": 0.2412109375, "learning_rate": 0.0007899115923313798, "loss": 0.895, "step": 2540 }, { "epoch": 0.12665143538293433, "grad_norm": 0.259765625, "learning_rate": 0.0007898718585477302, "loss": 0.9083, "step": 2550 }, { "epoch": 0.1271481076785537, "grad_norm": 0.240234375, "learning_rate": 0.0007898321247640807, "loss": 0.8967, "step": 2560 }, { "epoch": 0.12764477997417303, "grad_norm": 0.2373046875, "learning_rate": 0.0007897923909804312, "loss": 0.8881, "step": 2570 }, { "epoch": 0.1281414522697924, "grad_norm": 0.220703125, "learning_rate": 0.0007897526571967816, "loss": 0.9021, "step": 2580 }, { "epoch": 0.12863812456541174, "grad_norm": 0.306640625, "learning_rate": 0.000789712923413132, "loss": 0.8832, "step": 2590 }, { "epoch": 0.1291347968610311, "grad_norm": 0.255859375, "learning_rate": 0.0007896731896294825, "loss": 0.9154, "step": 2600 }, { "epoch": 0.12963146915665044, "grad_norm": 0.2255859375, "learning_rate": 0.000789633455845833, "loss": 0.8943, "step": 2610 }, { "epoch": 0.13012814145226978, "grad_norm": 0.248046875, "learning_rate": 0.0007895937220621834, "loss": 0.8884, "step": 2620 }, { "epoch": 0.13062481374788915, "grad_norm": 0.248046875, "learning_rate": 0.0007895539882785339, "loss": 0.8645, "step": 2630 }, { "epoch": 0.13112148604350848, "grad_norm": 0.26171875, "learning_rate": 0.0007895142544948843, "loss": 0.8769, "step": 2640 }, { "epoch": 0.13161815833912785, "grad_norm": 0.271484375, "learning_rate": 0.0007894745207112347, "loss": 0.9108, "step": 2650 }, { "epoch": 0.1321148306347472, "grad_norm": 0.267578125, "learning_rate": 0.0007894347869275853, "loss": 0.8442, "step": 2660 }, { "epoch": 0.13261150293036655, "grad_norm": 0.25, "learning_rate": 0.0007893950531439357, "loss": 0.8799, "step": 2670 }, { "epoch": 0.1331081752259859, "grad_norm": 0.2265625, "learning_rate": 0.0007893553193602861, "loss": 0.9038, "step": 2680 }, { "epoch": 0.13360484752160526, "grad_norm": 0.224609375, "learning_rate": 0.0007893155855766366, "loss": 0.8603, "step": 2690 }, { "epoch": 0.1341015198172246, "grad_norm": 0.22265625, "learning_rate": 0.000789275851792987, "loss": 0.8633, "step": 2700 }, { "epoch": 0.13459819211284393, "grad_norm": 0.234375, "learning_rate": 0.0007892361180093375, "loss": 0.8866, "step": 2710 }, { "epoch": 0.1350948644084633, "grad_norm": 0.25, "learning_rate": 0.000789196384225688, "loss": 0.873, "step": 2720 }, { "epoch": 0.13559153670408264, "grad_norm": 0.248046875, "learning_rate": 0.0007891566504420384, "loss": 0.85, "step": 2730 }, { "epoch": 0.136088208999702, "grad_norm": 0.23046875, "learning_rate": 0.0007891169166583888, "loss": 0.8394, "step": 2740 }, { "epoch": 0.13658488129532134, "grad_norm": 0.251953125, "learning_rate": 0.0007890771828747392, "loss": 0.8567, "step": 2750 }, { "epoch": 0.1370815535909407, "grad_norm": 0.265625, "learning_rate": 0.0007890374490910898, "loss": 0.9067, "step": 2760 }, { "epoch": 0.13757822588656005, "grad_norm": 0.25, "learning_rate": 0.0007889977153074402, "loss": 0.8816, "step": 2770 }, { "epoch": 0.1380748981821794, "grad_norm": 0.2265625, "learning_rate": 0.0007889579815237906, "loss": 0.8963, "step": 2780 }, { "epoch": 0.13857157047779875, "grad_norm": 0.251953125, "learning_rate": 0.0007889182477401411, "loss": 0.838, "step": 2790 }, { "epoch": 0.1390682427734181, "grad_norm": 0.212890625, "learning_rate": 0.0007888785139564915, "loss": 0.8586, "step": 2800 }, { "epoch": 0.13956491506903745, "grad_norm": 0.23046875, "learning_rate": 0.000788838780172842, "loss": 0.8659, "step": 2810 }, { "epoch": 0.1400615873646568, "grad_norm": 0.2421875, "learning_rate": 0.0007887990463891925, "loss": 0.9058, "step": 2820 }, { "epoch": 0.14055825966027616, "grad_norm": 0.228515625, "learning_rate": 0.0007887593126055429, "loss": 0.8714, "step": 2830 }, { "epoch": 0.1410549319558955, "grad_norm": 0.2294921875, "learning_rate": 0.0007887195788218933, "loss": 0.8725, "step": 2840 }, { "epoch": 0.14155160425151486, "grad_norm": 0.2353515625, "learning_rate": 0.0007886798450382438, "loss": 0.8797, "step": 2850 }, { "epoch": 0.1420482765471342, "grad_norm": 0.2294921875, "learning_rate": 0.0007886401112545943, "loss": 0.866, "step": 2860 }, { "epoch": 0.14254494884275354, "grad_norm": 0.2470703125, "learning_rate": 0.0007886003774709447, "loss": 0.8559, "step": 2870 }, { "epoch": 0.1430416211383729, "grad_norm": 0.25, "learning_rate": 0.0007885606436872952, "loss": 0.8683, "step": 2880 }, { "epoch": 0.14353829343399224, "grad_norm": 0.216796875, "learning_rate": 0.0007885209099036456, "loss": 0.8658, "step": 2890 }, { "epoch": 0.1440349657296116, "grad_norm": 0.234375, "learning_rate": 0.000788481176119996, "loss": 0.8778, "step": 2900 }, { "epoch": 0.14453163802523095, "grad_norm": 0.2255859375, "learning_rate": 0.0007884414423363466, "loss": 0.8682, "step": 2910 }, { "epoch": 0.1450283103208503, "grad_norm": 0.25390625, "learning_rate": 0.000788401708552697, "loss": 0.8524, "step": 2920 }, { "epoch": 0.14552498261646965, "grad_norm": 0.251953125, "learning_rate": 0.0007883619747690474, "loss": 0.8535, "step": 2930 }, { "epoch": 0.14602165491208902, "grad_norm": 0.216796875, "learning_rate": 0.0007883222409853979, "loss": 0.8414, "step": 2940 }, { "epoch": 0.14651832720770835, "grad_norm": 0.271484375, "learning_rate": 0.0007882825072017483, "loss": 0.8294, "step": 2950 }, { "epoch": 0.1470149995033277, "grad_norm": 0.22265625, "learning_rate": 0.0007882427734180987, "loss": 0.8701, "step": 2960 }, { "epoch": 0.14751167179894706, "grad_norm": 0.259765625, "learning_rate": 0.0007882030396344492, "loss": 0.8428, "step": 2970 }, { "epoch": 0.1480083440945664, "grad_norm": 0.2265625, "learning_rate": 0.0007881633058507997, "loss": 0.8341, "step": 2980 }, { "epoch": 0.14850501639018576, "grad_norm": 0.2578125, "learning_rate": 0.0007881235720671502, "loss": 0.839, "step": 2990 }, { "epoch": 0.1490016886858051, "grad_norm": 0.2333984375, "learning_rate": 0.0007880838382835005, "loss": 0.8262, "step": 3000 }, { "epoch": 0.14949836098142447, "grad_norm": 0.2265625, "learning_rate": 0.000788044104499851, "loss": 0.8626, "step": 3010 }, { "epoch": 0.1499950332770438, "grad_norm": 0.240234375, "learning_rate": 0.0007880043707162015, "loss": 0.8461, "step": 3020 }, { "epoch": 0.15049170557266317, "grad_norm": 0.208984375, "learning_rate": 0.0007879646369325519, "loss": 0.8441, "step": 3030 }, { "epoch": 0.1509883778682825, "grad_norm": 0.216796875, "learning_rate": 0.0007879249031489024, "loss": 0.8595, "step": 3040 }, { "epoch": 0.15148505016390185, "grad_norm": 0.265625, "learning_rate": 0.0007878851693652528, "loss": 0.877, "step": 3050 }, { "epoch": 0.1519817224595212, "grad_norm": 0.21875, "learning_rate": 0.0007878454355816032, "loss": 0.8823, "step": 3060 }, { "epoch": 0.15247839475514055, "grad_norm": 0.25390625, "learning_rate": 0.0007878057017979538, "loss": 0.8481, "step": 3070 }, { "epoch": 0.15297506705075992, "grad_norm": 0.23828125, "learning_rate": 0.0007877659680143042, "loss": 0.8838, "step": 3080 }, { "epoch": 0.15347173934637925, "grad_norm": 0.2392578125, "learning_rate": 0.0007877262342306546, "loss": 0.8341, "step": 3090 }, { "epoch": 0.15396841164199862, "grad_norm": 0.2412109375, "learning_rate": 0.0007876865004470051, "loss": 0.8893, "step": 3100 }, { "epoch": 0.15446508393761796, "grad_norm": 0.2412109375, "learning_rate": 0.0007876467666633555, "loss": 0.8442, "step": 3110 }, { "epoch": 0.1549617562332373, "grad_norm": 0.26171875, "learning_rate": 0.000787607032879706, "loss": 0.837, "step": 3120 }, { "epoch": 0.15545842852885666, "grad_norm": 0.2197265625, "learning_rate": 0.0007875672990960565, "loss": 0.8601, "step": 3130 }, { "epoch": 0.155955100824476, "grad_norm": 0.216796875, "learning_rate": 0.0007875275653124069, "loss": 0.8514, "step": 3140 }, { "epoch": 0.15645177312009537, "grad_norm": 0.2451171875, "learning_rate": 0.0007874878315287574, "loss": 0.886, "step": 3150 }, { "epoch": 0.1569484454157147, "grad_norm": 0.212890625, "learning_rate": 0.0007874480977451077, "loss": 0.8572, "step": 3160 }, { "epoch": 0.15744511771133407, "grad_norm": 0.2294921875, "learning_rate": 0.0007874083639614583, "loss": 0.862, "step": 3170 }, { "epoch": 0.1579417900069534, "grad_norm": 0.2138671875, "learning_rate": 0.0007873686301778088, "loss": 0.8112, "step": 3180 }, { "epoch": 0.15843846230257277, "grad_norm": 0.2470703125, "learning_rate": 0.0007873288963941591, "loss": 0.8726, "step": 3190 }, { "epoch": 0.1589351345981921, "grad_norm": 0.2197265625, "learning_rate": 0.0007872891626105096, "loss": 0.8618, "step": 3200 }, { "epoch": 0.15943180689381145, "grad_norm": 0.228515625, "learning_rate": 0.0007872494288268602, "loss": 0.8528, "step": 3210 }, { "epoch": 0.15992847918943082, "grad_norm": 0.24609375, "learning_rate": 0.0007872096950432105, "loss": 0.8395, "step": 3220 }, { "epoch": 0.16042515148505015, "grad_norm": 0.25, "learning_rate": 0.000787169961259561, "loss": 0.827, "step": 3230 }, { "epoch": 0.16092182378066952, "grad_norm": 0.232421875, "learning_rate": 0.0007871302274759114, "loss": 0.8478, "step": 3240 }, { "epoch": 0.16141849607628886, "grad_norm": 0.236328125, "learning_rate": 0.0007870904936922618, "loss": 0.8247, "step": 3250 }, { "epoch": 0.16191516837190822, "grad_norm": 0.2275390625, "learning_rate": 0.0007870507599086123, "loss": 0.8557, "step": 3260 }, { "epoch": 0.16241184066752756, "grad_norm": 0.2265625, "learning_rate": 0.0007870110261249628, "loss": 0.8532, "step": 3270 }, { "epoch": 0.16290851296314693, "grad_norm": 0.23828125, "learning_rate": 0.0007869712923413133, "loss": 0.8545, "step": 3280 }, { "epoch": 0.16340518525876627, "grad_norm": 0.2578125, "learning_rate": 0.0007869315585576637, "loss": 0.8675, "step": 3290 }, { "epoch": 0.1639018575543856, "grad_norm": 0.203125, "learning_rate": 0.0007868918247740141, "loss": 0.8209, "step": 3300 }, { "epoch": 0.16439852985000497, "grad_norm": 0.2265625, "learning_rate": 0.0007868520909903647, "loss": 0.8332, "step": 3310 }, { "epoch": 0.1648952021456243, "grad_norm": 0.212890625, "learning_rate": 0.0007868123572067151, "loss": 0.8373, "step": 3320 }, { "epoch": 0.16539187444124367, "grad_norm": 0.2333984375, "learning_rate": 0.0007867726234230655, "loss": 0.862, "step": 3330 }, { "epoch": 0.165888546736863, "grad_norm": 0.2060546875, "learning_rate": 0.000786732889639416, "loss": 0.8397, "step": 3340 }, { "epoch": 0.16638521903248238, "grad_norm": 0.248046875, "learning_rate": 0.0007866931558557663, "loss": 0.8448, "step": 3350 }, { "epoch": 0.16688189132810172, "grad_norm": 0.2294921875, "learning_rate": 0.0007866534220721168, "loss": 0.8382, "step": 3360 }, { "epoch": 0.16737856362372106, "grad_norm": 0.23046875, "learning_rate": 0.0007866136882884674, "loss": 0.837, "step": 3370 }, { "epoch": 0.16787523591934042, "grad_norm": 0.2294921875, "learning_rate": 0.0007865739545048177, "loss": 0.8321, "step": 3380 }, { "epoch": 0.16837190821495976, "grad_norm": 0.2255859375, "learning_rate": 0.0007865342207211682, "loss": 0.8347, "step": 3390 }, { "epoch": 0.16886858051057912, "grad_norm": 0.26953125, "learning_rate": 0.0007864944869375187, "loss": 0.879, "step": 3400 }, { "epoch": 0.16936525280619846, "grad_norm": 0.2021484375, "learning_rate": 0.000786454753153869, "loss": 0.8377, "step": 3410 }, { "epoch": 0.16986192510181783, "grad_norm": 0.2138671875, "learning_rate": 0.0007864150193702196, "loss": 0.7883, "step": 3420 }, { "epoch": 0.17035859739743717, "grad_norm": 0.228515625, "learning_rate": 0.00078637528558657, "loss": 0.8456, "step": 3430 }, { "epoch": 0.17085526969305653, "grad_norm": 0.26171875, "learning_rate": 0.0007863355518029205, "loss": 0.8309, "step": 3440 }, { "epoch": 0.17135194198867587, "grad_norm": 0.2314453125, "learning_rate": 0.0007862958180192709, "loss": 0.8715, "step": 3450 }, { "epoch": 0.1718486142842952, "grad_norm": 0.2197265625, "learning_rate": 0.0007862560842356213, "loss": 0.8108, "step": 3460 }, { "epoch": 0.17234528657991458, "grad_norm": 0.2216796875, "learning_rate": 0.0007862163504519719, "loss": 0.8348, "step": 3470 }, { "epoch": 0.1728419588755339, "grad_norm": 0.2138671875, "learning_rate": 0.0007861766166683223, "loss": 0.8579, "step": 3480 }, { "epoch": 0.17333863117115328, "grad_norm": 0.201171875, "learning_rate": 0.0007861368828846727, "loss": 0.8149, "step": 3490 }, { "epoch": 0.17383530346677262, "grad_norm": 0.2412109375, "learning_rate": 0.0007860971491010232, "loss": 0.8031, "step": 3500 }, { "epoch": 0.17433197576239198, "grad_norm": 0.2421875, "learning_rate": 0.0007860574153173736, "loss": 0.8666, "step": 3510 }, { "epoch": 0.17482864805801132, "grad_norm": 0.2060546875, "learning_rate": 0.0007860176815337241, "loss": 0.8271, "step": 3520 }, { "epoch": 0.1753253203536307, "grad_norm": 0.220703125, "learning_rate": 0.0007859779477500746, "loss": 0.8059, "step": 3530 }, { "epoch": 0.17582199264925003, "grad_norm": 0.2001953125, "learning_rate": 0.000785938213966425, "loss": 0.8332, "step": 3540 }, { "epoch": 0.17631866494486936, "grad_norm": 0.2412109375, "learning_rate": 0.0007858984801827754, "loss": 0.827, "step": 3550 }, { "epoch": 0.17681533724048873, "grad_norm": 0.23046875, "learning_rate": 0.000785858746399126, "loss": 0.8217, "step": 3560 }, { "epoch": 0.17731200953610807, "grad_norm": 0.2265625, "learning_rate": 0.0007858190126154763, "loss": 0.8426, "step": 3570 }, { "epoch": 0.17780868183172743, "grad_norm": 0.2294921875, "learning_rate": 0.0007857792788318268, "loss": 0.8383, "step": 3580 }, { "epoch": 0.17830535412734677, "grad_norm": 0.2060546875, "learning_rate": 0.0007857395450481773, "loss": 0.8361, "step": 3590 }, { "epoch": 0.17880202642296614, "grad_norm": 0.2060546875, "learning_rate": 0.0007856998112645277, "loss": 0.815, "step": 3600 }, { "epoch": 0.17929869871858548, "grad_norm": 0.2099609375, "learning_rate": 0.0007856600774808781, "loss": 0.822, "step": 3610 }, { "epoch": 0.17979537101420484, "grad_norm": 0.25390625, "learning_rate": 0.0007856203436972286, "loss": 0.8272, "step": 3620 }, { "epoch": 0.18029204330982418, "grad_norm": 0.251953125, "learning_rate": 0.0007855806099135791, "loss": 0.8239, "step": 3630 }, { "epoch": 0.18078871560544352, "grad_norm": 0.2138671875, "learning_rate": 0.0007855408761299295, "loss": 0.8254, "step": 3640 }, { "epoch": 0.18128538790106288, "grad_norm": 0.236328125, "learning_rate": 0.0007855011423462799, "loss": 0.8422, "step": 3650 }, { "epoch": 0.18178206019668222, "grad_norm": 0.26171875, "learning_rate": 0.0007854614085626304, "loss": 0.8412, "step": 3660 }, { "epoch": 0.1822787324923016, "grad_norm": 0.2373046875, "learning_rate": 0.0007854216747789809, "loss": 0.8247, "step": 3670 }, { "epoch": 0.18277540478792093, "grad_norm": 0.20703125, "learning_rate": 0.0007853819409953313, "loss": 0.8094, "step": 3680 }, { "epoch": 0.1832720770835403, "grad_norm": 0.267578125, "learning_rate": 0.0007853422072116818, "loss": 0.8558, "step": 3690 }, { "epoch": 0.18376874937915963, "grad_norm": 0.2333984375, "learning_rate": 0.0007853024734280322, "loss": 0.8463, "step": 3700 }, { "epoch": 0.18426542167477897, "grad_norm": 0.2412109375, "learning_rate": 0.0007852627396443826, "loss": 0.834, "step": 3710 }, { "epoch": 0.18476209397039833, "grad_norm": 0.2373046875, "learning_rate": 0.0007852230058607332, "loss": 0.8033, "step": 3720 }, { "epoch": 0.18525876626601767, "grad_norm": 0.1982421875, "learning_rate": 0.0007851832720770836, "loss": 0.8285, "step": 3730 }, { "epoch": 0.18575543856163704, "grad_norm": 0.2353515625, "learning_rate": 0.000785143538293434, "loss": 0.7968, "step": 3740 }, { "epoch": 0.18625211085725638, "grad_norm": 0.2451171875, "learning_rate": 0.0007851038045097845, "loss": 0.823, "step": 3750 }, { "epoch": 0.18674878315287574, "grad_norm": 0.2470703125, "learning_rate": 0.0007850640707261349, "loss": 0.8429, "step": 3760 }, { "epoch": 0.18724545544849508, "grad_norm": 0.2373046875, "learning_rate": 0.0007850243369424854, "loss": 0.8029, "step": 3770 }, { "epoch": 0.18774212774411445, "grad_norm": 0.216796875, "learning_rate": 0.0007849846031588359, "loss": 0.7974, "step": 3780 }, { "epoch": 0.18823880003973378, "grad_norm": 0.2119140625, "learning_rate": 0.0007849448693751863, "loss": 0.8167, "step": 3790 }, { "epoch": 0.18873547233535312, "grad_norm": 0.216796875, "learning_rate": 0.0007849051355915367, "loss": 0.8322, "step": 3800 }, { "epoch": 0.1892321446309725, "grad_norm": 0.23046875, "learning_rate": 0.0007848654018078872, "loss": 0.8332, "step": 3810 }, { "epoch": 0.18972881692659183, "grad_norm": 0.2041015625, "learning_rate": 0.0007848256680242377, "loss": 0.8058, "step": 3820 }, { "epoch": 0.1902254892222112, "grad_norm": 0.2197265625, "learning_rate": 0.0007847859342405881, "loss": 0.8551, "step": 3830 }, { "epoch": 0.19072216151783053, "grad_norm": 0.2099609375, "learning_rate": 0.0007847462004569385, "loss": 0.8265, "step": 3840 }, { "epoch": 0.1912188338134499, "grad_norm": 0.2421875, "learning_rate": 0.000784706466673289, "loss": 0.8347, "step": 3850 }, { "epoch": 0.19171550610906923, "grad_norm": 0.216796875, "learning_rate": 0.0007846667328896394, "loss": 0.803, "step": 3860 }, { "epoch": 0.1922121784046886, "grad_norm": 0.2275390625, "learning_rate": 0.0007846269991059899, "loss": 0.8361, "step": 3870 }, { "epoch": 0.19270885070030794, "grad_norm": 0.2392578125, "learning_rate": 0.0007845872653223404, "loss": 0.8125, "step": 3880 }, { "epoch": 0.19320552299592728, "grad_norm": 0.197265625, "learning_rate": 0.0007845475315386908, "loss": 0.8293, "step": 3890 }, { "epoch": 0.19370219529154664, "grad_norm": 0.205078125, "learning_rate": 0.0007845077977550412, "loss": 0.8149, "step": 3900 }, { "epoch": 0.19419886758716598, "grad_norm": 0.2265625, "learning_rate": 0.0007844680639713917, "loss": 0.8187, "step": 3910 }, { "epoch": 0.19469553988278535, "grad_norm": 0.251953125, "learning_rate": 0.0007844283301877422, "loss": 0.8111, "step": 3920 }, { "epoch": 0.19519221217840468, "grad_norm": 0.21875, "learning_rate": 0.0007843885964040926, "loss": 0.8085, "step": 3930 }, { "epoch": 0.19568888447402405, "grad_norm": 0.2294921875, "learning_rate": 0.0007843488626204431, "loss": 0.8489, "step": 3940 }, { "epoch": 0.1961855567696434, "grad_norm": 0.2041015625, "learning_rate": 0.0007843091288367935, "loss": 0.8231, "step": 3950 }, { "epoch": 0.19668222906526273, "grad_norm": 0.2109375, "learning_rate": 0.0007842693950531439, "loss": 0.8353, "step": 3960 }, { "epoch": 0.1971789013608821, "grad_norm": 0.2314453125, "learning_rate": 0.0007842296612694945, "loss": 0.8116, "step": 3970 }, { "epoch": 0.19767557365650143, "grad_norm": 0.2451171875, "learning_rate": 0.0007841899274858449, "loss": 0.8392, "step": 3980 }, { "epoch": 0.1981722459521208, "grad_norm": 0.21484375, "learning_rate": 0.0007841501937021953, "loss": 0.8145, "step": 3990 }, { "epoch": 0.19866891824774013, "grad_norm": 0.1953125, "learning_rate": 0.0007841104599185458, "loss": 0.8646, "step": 4000 }, { "epoch": 0.1991655905433595, "grad_norm": 0.2216796875, "learning_rate": 0.0007840707261348962, "loss": 0.792, "step": 4010 }, { "epoch": 0.19966226283897884, "grad_norm": 0.19921875, "learning_rate": 0.0007840309923512468, "loss": 0.7947, "step": 4020 }, { "epoch": 0.2001589351345982, "grad_norm": 0.216796875, "learning_rate": 0.0007839912585675971, "loss": 0.7848, "step": 4030 }, { "epoch": 0.20065560743021754, "grad_norm": 0.21875, "learning_rate": 0.0007839515247839476, "loss": 0.8112, "step": 4040 }, { "epoch": 0.20115227972583688, "grad_norm": 0.23046875, "learning_rate": 0.0007839117910002981, "loss": 0.8311, "step": 4050 }, { "epoch": 0.20164895202145625, "grad_norm": 0.2021484375, "learning_rate": 0.0007838720572166484, "loss": 0.7896, "step": 4060 }, { "epoch": 0.20214562431707558, "grad_norm": 0.21875, "learning_rate": 0.000783832323432999, "loss": 0.7951, "step": 4070 }, { "epoch": 0.20264229661269495, "grad_norm": 0.228515625, "learning_rate": 0.0007837925896493495, "loss": 0.757, "step": 4080 }, { "epoch": 0.2031389689083143, "grad_norm": 0.2158203125, "learning_rate": 0.0007837528558656998, "loss": 0.7988, "step": 4090 }, { "epoch": 0.20363564120393365, "grad_norm": 0.19140625, "learning_rate": 0.0007837131220820503, "loss": 0.8171, "step": 4100 }, { "epoch": 0.204132313499553, "grad_norm": 0.2275390625, "learning_rate": 0.0007836733882984007, "loss": 0.806, "step": 4110 }, { "epoch": 0.20462898579517236, "grad_norm": 0.197265625, "learning_rate": 0.0007836336545147512, "loss": 0.8052, "step": 4120 }, { "epoch": 0.2051256580907917, "grad_norm": 0.2080078125, "learning_rate": 0.0007835939207311017, "loss": 0.814, "step": 4130 }, { "epoch": 0.20562233038641103, "grad_norm": 0.2099609375, "learning_rate": 0.0007835541869474521, "loss": 0.8323, "step": 4140 }, { "epoch": 0.2061190026820304, "grad_norm": 0.1953125, "learning_rate": 0.0007835144531638025, "loss": 0.7967, "step": 4150 }, { "epoch": 0.20661567497764974, "grad_norm": 0.197265625, "learning_rate": 0.000783474719380153, "loss": 0.7966, "step": 4160 }, { "epoch": 0.2071123472732691, "grad_norm": 0.205078125, "learning_rate": 0.0007834349855965035, "loss": 0.8208, "step": 4170 }, { "epoch": 0.20760901956888844, "grad_norm": 0.23046875, "learning_rate": 0.000783395251812854, "loss": 0.8258, "step": 4180 }, { "epoch": 0.2081056918645078, "grad_norm": 0.251953125, "learning_rate": 0.0007833555180292044, "loss": 0.8061, "step": 4190 }, { "epoch": 0.20860236416012715, "grad_norm": 0.1953125, "learning_rate": 0.0007833157842455548, "loss": 0.824, "step": 4200 }, { "epoch": 0.2090990364557465, "grad_norm": 0.2236328125, "learning_rate": 0.0007832760504619053, "loss": 0.8106, "step": 4210 }, { "epoch": 0.20959570875136585, "grad_norm": 0.23046875, "learning_rate": 0.0007832363166782556, "loss": 0.8123, "step": 4220 }, { "epoch": 0.2100923810469852, "grad_norm": 0.2138671875, "learning_rate": 0.0007831965828946062, "loss": 0.863, "step": 4230 }, { "epoch": 0.21058905334260455, "grad_norm": 0.25, "learning_rate": 0.0007831568491109567, "loss": 0.8205, "step": 4240 }, { "epoch": 0.2110857256382239, "grad_norm": 0.2236328125, "learning_rate": 0.000783117115327307, "loss": 0.8325, "step": 4250 }, { "epoch": 0.21158239793384326, "grad_norm": 0.2412109375, "learning_rate": 0.0007830773815436575, "loss": 0.8257, "step": 4260 }, { "epoch": 0.2120790702294626, "grad_norm": 0.2041015625, "learning_rate": 0.0007830376477600081, "loss": 0.8101, "step": 4270 }, { "epoch": 0.21257574252508196, "grad_norm": 0.2197265625, "learning_rate": 0.0007829979139763584, "loss": 0.8094, "step": 4280 }, { "epoch": 0.2130724148207013, "grad_norm": 0.19921875, "learning_rate": 0.0007829581801927089, "loss": 0.8194, "step": 4290 }, { "epoch": 0.21356908711632064, "grad_norm": 0.234375, "learning_rate": 0.0007829184464090593, "loss": 0.8438, "step": 4300 }, { "epoch": 0.21406575941194, "grad_norm": 0.248046875, "learning_rate": 0.0007828787126254097, "loss": 0.8385, "step": 4310 }, { "epoch": 0.21456243170755934, "grad_norm": 0.1953125, "learning_rate": 0.0007828389788417603, "loss": 0.8205, "step": 4320 }, { "epoch": 0.2150591040031787, "grad_norm": 0.2099609375, "learning_rate": 0.0007827992450581107, "loss": 0.7999, "step": 4330 }, { "epoch": 0.21555577629879805, "grad_norm": 0.2099609375, "learning_rate": 0.0007827595112744612, "loss": 0.7847, "step": 4340 }, { "epoch": 0.2160524485944174, "grad_norm": 0.2001953125, "learning_rate": 0.0007827197774908116, "loss": 0.7887, "step": 4350 }, { "epoch": 0.21654912089003675, "grad_norm": 0.232421875, "learning_rate": 0.000782680043707162, "loss": 0.8135, "step": 4360 }, { "epoch": 0.21704579318565612, "grad_norm": 0.255859375, "learning_rate": 0.0007826403099235126, "loss": 0.8124, "step": 4370 }, { "epoch": 0.21754246548127545, "grad_norm": 0.208984375, "learning_rate": 0.000782600576139863, "loss": 0.7823, "step": 4380 }, { "epoch": 0.2180391377768948, "grad_norm": 0.1953125, "learning_rate": 0.0007825608423562134, "loss": 0.8115, "step": 4390 }, { "epoch": 0.21853581007251416, "grad_norm": 0.2001953125, "learning_rate": 0.0007825211085725639, "loss": 0.8017, "step": 4400 }, { "epoch": 0.2190324823681335, "grad_norm": 0.2236328125, "learning_rate": 0.0007824813747889143, "loss": 0.7821, "step": 4410 }, { "epoch": 0.21952915466375286, "grad_norm": 0.2041015625, "learning_rate": 0.0007824416410052647, "loss": 0.8001, "step": 4420 }, { "epoch": 0.2200258269593722, "grad_norm": 0.2158203125, "learning_rate": 0.0007824019072216153, "loss": 0.7993, "step": 4430 }, { "epoch": 0.22052249925499157, "grad_norm": 0.2099609375, "learning_rate": 0.0007823621734379656, "loss": 0.8269, "step": 4440 }, { "epoch": 0.2210191715506109, "grad_norm": 0.2451171875, "learning_rate": 0.0007823224396543161, "loss": 0.8173, "step": 4450 }, { "epoch": 0.22151584384623027, "grad_norm": 0.2177734375, "learning_rate": 0.0007822827058706666, "loss": 0.7898, "step": 4460 }, { "epoch": 0.2220125161418496, "grad_norm": 0.236328125, "learning_rate": 0.000782242972087017, "loss": 0.8034, "step": 4470 }, { "epoch": 0.22250918843746895, "grad_norm": 0.228515625, "learning_rate": 0.0007822032383033675, "loss": 0.7929, "step": 4480 }, { "epoch": 0.2230058607330883, "grad_norm": 0.2265625, "learning_rate": 0.0007821635045197179, "loss": 0.7875, "step": 4490 }, { "epoch": 0.22350253302870765, "grad_norm": 0.212890625, "learning_rate": 0.0007821237707360684, "loss": 0.7699, "step": 4500 }, { "epoch": 0.22399920532432702, "grad_norm": 0.2080078125, "learning_rate": 0.0007820840369524188, "loss": 0.815, "step": 4510 }, { "epoch": 0.22449587761994635, "grad_norm": 0.2060546875, "learning_rate": 0.0007820443031687692, "loss": 0.7915, "step": 4520 }, { "epoch": 0.22499254991556572, "grad_norm": 0.197265625, "learning_rate": 0.0007820045693851198, "loss": 0.8077, "step": 4530 }, { "epoch": 0.22548922221118506, "grad_norm": 0.2216796875, "learning_rate": 0.0007819648356014702, "loss": 0.7716, "step": 4540 }, { "epoch": 0.2259858945068044, "grad_norm": 0.2001953125, "learning_rate": 0.0007819251018178206, "loss": 0.7838, "step": 4550 }, { "epoch": 0.22648256680242376, "grad_norm": 0.2197265625, "learning_rate": 0.0007818853680341711, "loss": 0.8343, "step": 4560 }, { "epoch": 0.2269792390980431, "grad_norm": 0.2373046875, "learning_rate": 0.0007818456342505215, "loss": 0.7671, "step": 4570 }, { "epoch": 0.22747591139366247, "grad_norm": 0.2060546875, "learning_rate": 0.000781805900466872, "loss": 0.7987, "step": 4580 }, { "epoch": 0.2279725836892818, "grad_norm": 0.212890625, "learning_rate": 0.0007817661666832225, "loss": 0.7885, "step": 4590 }, { "epoch": 0.22846925598490117, "grad_norm": 0.208984375, "learning_rate": 0.0007817264328995729, "loss": 0.7943, "step": 4600 }, { "epoch": 0.2289659282805205, "grad_norm": 0.20703125, "learning_rate": 0.0007816866991159233, "loss": 0.8023, "step": 4610 }, { "epoch": 0.22946260057613987, "grad_norm": 0.2373046875, "learning_rate": 0.0007816469653322739, "loss": 0.7773, "step": 4620 }, { "epoch": 0.2299592728717592, "grad_norm": 0.232421875, "learning_rate": 0.0007816072315486243, "loss": 0.8065, "step": 4630 }, { "epoch": 0.23045594516737855, "grad_norm": 0.19921875, "learning_rate": 0.0007815674977649747, "loss": 0.7635, "step": 4640 }, { "epoch": 0.23095261746299792, "grad_norm": 0.1845703125, "learning_rate": 0.0007815277639813252, "loss": 0.7777, "step": 4650 }, { "epoch": 0.23144928975861725, "grad_norm": 0.201171875, "learning_rate": 0.0007814880301976756, "loss": 0.7775, "step": 4660 }, { "epoch": 0.23194596205423662, "grad_norm": 0.2265625, "learning_rate": 0.000781448296414026, "loss": 0.8366, "step": 4670 }, { "epoch": 0.23244263434985596, "grad_norm": 0.2060546875, "learning_rate": 0.0007814085626303766, "loss": 0.7759, "step": 4680 }, { "epoch": 0.23293930664547532, "grad_norm": 0.2060546875, "learning_rate": 0.000781368828846727, "loss": 0.7545, "step": 4690 }, { "epoch": 0.23343597894109466, "grad_norm": 0.205078125, "learning_rate": 0.0007813290950630774, "loss": 0.8055, "step": 4700 }, { "epoch": 0.23393265123671403, "grad_norm": 0.2021484375, "learning_rate": 0.0007812893612794278, "loss": 0.7886, "step": 4710 }, { "epoch": 0.23442932353233337, "grad_norm": 0.189453125, "learning_rate": 0.0007812496274957783, "loss": 0.8074, "step": 4720 }, { "epoch": 0.2349259958279527, "grad_norm": 0.2119140625, "learning_rate": 0.0007812098937121288, "loss": 0.8098, "step": 4730 }, { "epoch": 0.23542266812357207, "grad_norm": 0.1845703125, "learning_rate": 0.0007811701599284792, "loss": 0.8287, "step": 4740 }, { "epoch": 0.2359193404191914, "grad_norm": 0.2158203125, "learning_rate": 0.0007811304261448297, "loss": 0.7923, "step": 4750 }, { "epoch": 0.23641601271481077, "grad_norm": 0.216796875, "learning_rate": 0.0007810906923611801, "loss": 0.8065, "step": 4760 }, { "epoch": 0.2369126850104301, "grad_norm": 0.20703125, "learning_rate": 0.0007810509585775305, "loss": 0.7706, "step": 4770 }, { "epoch": 0.23740935730604948, "grad_norm": 0.208984375, "learning_rate": 0.0007810112247938811, "loss": 0.8026, "step": 4780 }, { "epoch": 0.23790602960166882, "grad_norm": 0.2294921875, "learning_rate": 0.0007809714910102315, "loss": 0.8084, "step": 4790 }, { "epoch": 0.23840270189728818, "grad_norm": 0.21875, "learning_rate": 0.0007809317572265819, "loss": 0.7945, "step": 4800 }, { "epoch": 0.23889937419290752, "grad_norm": 0.2138671875, "learning_rate": 0.0007808920234429324, "loss": 0.8042, "step": 4810 }, { "epoch": 0.23939604648852686, "grad_norm": 0.1953125, "learning_rate": 0.0007808522896592828, "loss": 0.7626, "step": 4820 }, { "epoch": 0.23989271878414622, "grad_norm": 0.193359375, "learning_rate": 0.0007808125558756333, "loss": 0.7616, "step": 4830 }, { "epoch": 0.24038939107976556, "grad_norm": 0.1962890625, "learning_rate": 0.0007807728220919838, "loss": 0.7974, "step": 4840 }, { "epoch": 0.24088606337538493, "grad_norm": 0.2060546875, "learning_rate": 0.0007807330883083342, "loss": 0.8034, "step": 4850 }, { "epoch": 0.24138273567100427, "grad_norm": 0.2216796875, "learning_rate": 0.0007806933545246846, "loss": 0.8325, "step": 4860 }, { "epoch": 0.24187940796662363, "grad_norm": 0.1962890625, "learning_rate": 0.0007806536207410351, "loss": 0.8055, "step": 4870 }, { "epoch": 0.24237608026224297, "grad_norm": 0.20703125, "learning_rate": 0.0007806138869573856, "loss": 0.7872, "step": 4880 }, { "epoch": 0.2428727525578623, "grad_norm": 0.2275390625, "learning_rate": 0.000780574153173736, "loss": 0.8107, "step": 4890 }, { "epoch": 0.24336942485348168, "grad_norm": 0.197265625, "learning_rate": 0.0007805344193900864, "loss": 0.7817, "step": 4900 }, { "epoch": 0.243866097149101, "grad_norm": 0.2021484375, "learning_rate": 0.0007804946856064369, "loss": 0.8126, "step": 4910 }, { "epoch": 0.24436276944472038, "grad_norm": 0.1875, "learning_rate": 0.0007804549518227874, "loss": 0.7746, "step": 4920 }, { "epoch": 0.24485944174033972, "grad_norm": 0.2119140625, "learning_rate": 0.0007804152180391378, "loss": 0.7759, "step": 4930 }, { "epoch": 0.24535611403595908, "grad_norm": 0.203125, "learning_rate": 0.0007803754842554883, "loss": 0.77, "step": 4940 }, { "epoch": 0.24585278633157842, "grad_norm": 0.181640625, "learning_rate": 0.0007803357504718388, "loss": 0.7883, "step": 4950 }, { "epoch": 0.2463494586271978, "grad_norm": 0.19921875, "learning_rate": 0.0007802960166881891, "loss": 0.7896, "step": 4960 }, { "epoch": 0.24684613092281713, "grad_norm": 0.208984375, "learning_rate": 0.0007802562829045396, "loss": 0.796, "step": 4970 }, { "epoch": 0.24734280321843646, "grad_norm": 0.1962890625, "learning_rate": 0.0007802165491208901, "loss": 0.8069, "step": 4980 }, { "epoch": 0.24783947551405583, "grad_norm": 0.220703125, "learning_rate": 0.0007801768153372405, "loss": 0.8263, "step": 4990 }, { "epoch": 0.24833614780967517, "grad_norm": 0.2275390625, "learning_rate": 0.000780137081553591, "loss": 0.7691, "step": 5000 }, { "epoch": 0.24883282010529453, "grad_norm": 0.18359375, "learning_rate": 0.0007800973477699414, "loss": 0.7552, "step": 5010 }, { "epoch": 0.24932949240091387, "grad_norm": 0.205078125, "learning_rate": 0.0007800576139862918, "loss": 0.7764, "step": 5020 }, { "epoch": 0.24982616469653324, "grad_norm": 0.197265625, "learning_rate": 0.0007800178802026424, "loss": 0.7935, "step": 5030 }, { "epoch": 0.2503228369921526, "grad_norm": 0.2255859375, "learning_rate": 0.0007799781464189928, "loss": 0.7723, "step": 5040 }, { "epoch": 0.2508195092877719, "grad_norm": 0.1982421875, "learning_rate": 0.0007799384126353432, "loss": 0.7674, "step": 5050 }, { "epoch": 0.25131618158339125, "grad_norm": 0.1962890625, "learning_rate": 0.0007798986788516937, "loss": 0.7987, "step": 5060 }, { "epoch": 0.25181285387901065, "grad_norm": 0.19921875, "learning_rate": 0.0007798589450680441, "loss": 0.8071, "step": 5070 }, { "epoch": 0.25230952617463, "grad_norm": 0.203125, "learning_rate": 0.0007798192112843947, "loss": 0.8022, "step": 5080 }, { "epoch": 0.2528061984702493, "grad_norm": 0.2255859375, "learning_rate": 0.000779779477500745, "loss": 0.7887, "step": 5090 }, { "epoch": 0.25330287076586866, "grad_norm": 0.20703125, "learning_rate": 0.0007797397437170955, "loss": 0.7665, "step": 5100 }, { "epoch": 0.25379954306148805, "grad_norm": 0.1982421875, "learning_rate": 0.000779700009933446, "loss": 0.8016, "step": 5110 }, { "epoch": 0.2542962153571074, "grad_norm": 0.21484375, "learning_rate": 0.0007796602761497963, "loss": 0.7871, "step": 5120 }, { "epoch": 0.25479288765272673, "grad_norm": 0.2021484375, "learning_rate": 0.0007796205423661469, "loss": 0.8054, "step": 5130 }, { "epoch": 0.25528955994834607, "grad_norm": 0.208984375, "learning_rate": 0.0007795808085824974, "loss": 0.7859, "step": 5140 }, { "epoch": 0.2557862322439654, "grad_norm": 0.1953125, "learning_rate": 0.0007795410747988477, "loss": 0.7812, "step": 5150 }, { "epoch": 0.2562829045395848, "grad_norm": 0.181640625, "learning_rate": 0.0007795013410151982, "loss": 0.7794, "step": 5160 }, { "epoch": 0.25677957683520414, "grad_norm": 0.23046875, "learning_rate": 0.0007794616072315486, "loss": 0.7664, "step": 5170 }, { "epoch": 0.2572762491308235, "grad_norm": 0.1904296875, "learning_rate": 0.000779421873447899, "loss": 0.7864, "step": 5180 }, { "epoch": 0.2577729214264428, "grad_norm": 0.255859375, "learning_rate": 0.0007793821396642496, "loss": 0.8203, "step": 5190 }, { "epoch": 0.2582695937220622, "grad_norm": 0.1796875, "learning_rate": 0.0007793424058806, "loss": 0.7712, "step": 5200 }, { "epoch": 0.25876626601768155, "grad_norm": 0.259765625, "learning_rate": 0.0007793026720969504, "loss": 0.7664, "step": 5210 }, { "epoch": 0.2592629383133009, "grad_norm": 0.1953125, "learning_rate": 0.0007792629383133009, "loss": 0.7916, "step": 5220 }, { "epoch": 0.2597596106089202, "grad_norm": 0.2197265625, "learning_rate": 0.0007792232045296514, "loss": 0.7596, "step": 5230 }, { "epoch": 0.26025628290453956, "grad_norm": 0.2255859375, "learning_rate": 0.0007791834707460019, "loss": 0.7672, "step": 5240 }, { "epoch": 0.26075295520015895, "grad_norm": 0.1982421875, "learning_rate": 0.0007791437369623523, "loss": 0.7835, "step": 5250 }, { "epoch": 0.2612496274957783, "grad_norm": 0.208984375, "learning_rate": 0.0007791040031787027, "loss": 0.7874, "step": 5260 }, { "epoch": 0.26174629979139763, "grad_norm": 0.21875, "learning_rate": 0.0007790642693950532, "loss": 0.7597, "step": 5270 }, { "epoch": 0.26224297208701697, "grad_norm": 0.185546875, "learning_rate": 0.0007790245356114037, "loss": 0.7825, "step": 5280 }, { "epoch": 0.26273964438263636, "grad_norm": 0.2177734375, "learning_rate": 0.0007789848018277541, "loss": 0.8065, "step": 5290 }, { "epoch": 0.2632363166782557, "grad_norm": 0.21484375, "learning_rate": 0.0007789450680441046, "loss": 0.7338, "step": 5300 }, { "epoch": 0.26373298897387504, "grad_norm": 0.1923828125, "learning_rate": 0.0007789053342604549, "loss": 0.769, "step": 5310 }, { "epoch": 0.2642296612694944, "grad_norm": 0.193359375, "learning_rate": 0.0007788656004768054, "loss": 0.776, "step": 5320 }, { "epoch": 0.2647263335651137, "grad_norm": 0.189453125, "learning_rate": 0.000778825866693156, "loss": 0.8244, "step": 5330 }, { "epoch": 0.2652230058607331, "grad_norm": 0.1962890625, "learning_rate": 0.0007787861329095063, "loss": 0.7883, "step": 5340 }, { "epoch": 0.26571967815635245, "grad_norm": 0.212890625, "learning_rate": 0.0007787463991258568, "loss": 0.7951, "step": 5350 }, { "epoch": 0.2662163504519718, "grad_norm": 0.2060546875, "learning_rate": 0.0007787066653422072, "loss": 0.8077, "step": 5360 }, { "epoch": 0.2667130227475911, "grad_norm": 0.197265625, "learning_rate": 0.0007786669315585577, "loss": 0.7914, "step": 5370 }, { "epoch": 0.2672096950432105, "grad_norm": 0.19140625, "learning_rate": 0.0007786271977749082, "loss": 0.7563, "step": 5380 }, { "epoch": 0.26770636733882985, "grad_norm": 0.224609375, "learning_rate": 0.0007785874639912586, "loss": 0.7683, "step": 5390 }, { "epoch": 0.2682030396344492, "grad_norm": 0.1845703125, "learning_rate": 0.0007785477302076091, "loss": 0.7949, "step": 5400 }, { "epoch": 0.26869971193006853, "grad_norm": 0.18359375, "learning_rate": 0.0007785079964239595, "loss": 0.8195, "step": 5410 }, { "epoch": 0.26919638422568787, "grad_norm": 0.1962890625, "learning_rate": 0.0007784682626403099, "loss": 0.7995, "step": 5420 }, { "epoch": 0.26969305652130726, "grad_norm": 0.173828125, "learning_rate": 0.0007784285288566605, "loss": 0.7776, "step": 5430 }, { "epoch": 0.2701897288169266, "grad_norm": 0.19921875, "learning_rate": 0.0007783887950730109, "loss": 0.7899, "step": 5440 }, { "epoch": 0.27068640111254594, "grad_norm": 0.2080078125, "learning_rate": 0.0007783490612893613, "loss": 0.8031, "step": 5450 }, { "epoch": 0.2711830734081653, "grad_norm": 0.1962890625, "learning_rate": 0.0007783093275057118, "loss": 0.749, "step": 5460 }, { "epoch": 0.27167974570378467, "grad_norm": 0.224609375, "learning_rate": 0.0007782695937220622, "loss": 0.7685, "step": 5470 }, { "epoch": 0.272176417999404, "grad_norm": 0.216796875, "learning_rate": 0.0007782298599384127, "loss": 0.765, "step": 5480 }, { "epoch": 0.27267309029502335, "grad_norm": 0.2001953125, "learning_rate": 0.0007781901261547632, "loss": 0.7843, "step": 5490 }, { "epoch": 0.2731697625906427, "grad_norm": 0.2080078125, "learning_rate": 0.0007781503923711135, "loss": 0.7801, "step": 5500 }, { "epoch": 0.273666434886262, "grad_norm": 0.1865234375, "learning_rate": 0.000778110658587464, "loss": 0.7614, "step": 5510 }, { "epoch": 0.2741631071818814, "grad_norm": 0.2119140625, "learning_rate": 0.0007780709248038145, "loss": 0.8162, "step": 5520 }, { "epoch": 0.27465977947750075, "grad_norm": 0.1875, "learning_rate": 0.000778031191020165, "loss": 0.7625, "step": 5530 }, { "epoch": 0.2751564517731201, "grad_norm": 0.1943359375, "learning_rate": 0.0007779914572365154, "loss": 0.7837, "step": 5540 }, { "epoch": 0.27565312406873943, "grad_norm": 0.1953125, "learning_rate": 0.0007779517234528659, "loss": 0.7613, "step": 5550 }, { "epoch": 0.2761497963643588, "grad_norm": 0.244140625, "learning_rate": 0.0007779119896692163, "loss": 0.7512, "step": 5560 }, { "epoch": 0.27664646865997816, "grad_norm": 0.1865234375, "learning_rate": 0.0007778722558855667, "loss": 0.7763, "step": 5570 }, { "epoch": 0.2771431409555975, "grad_norm": 0.1982421875, "learning_rate": 0.0007778325221019171, "loss": 0.7752, "step": 5580 }, { "epoch": 0.27763981325121684, "grad_norm": 0.177734375, "learning_rate": 0.0007777927883182677, "loss": 0.8011, "step": 5590 }, { "epoch": 0.2781364855468362, "grad_norm": 0.224609375, "learning_rate": 0.0007777530545346181, "loss": 0.7834, "step": 5600 }, { "epoch": 0.27863315784245557, "grad_norm": 0.2119140625, "learning_rate": 0.0007777133207509685, "loss": 0.8148, "step": 5610 }, { "epoch": 0.2791298301380749, "grad_norm": 0.1953125, "learning_rate": 0.000777673586967319, "loss": 0.7647, "step": 5620 }, { "epoch": 0.27962650243369425, "grad_norm": 0.193359375, "learning_rate": 0.0007776338531836695, "loss": 0.7446, "step": 5630 }, { "epoch": 0.2801231747293136, "grad_norm": 0.2001953125, "learning_rate": 0.0007775941194000199, "loss": 0.7964, "step": 5640 }, { "epoch": 0.2806198470249329, "grad_norm": 0.197265625, "learning_rate": 0.0007775543856163704, "loss": 0.7563, "step": 5650 }, { "epoch": 0.2811165193205523, "grad_norm": 0.1845703125, "learning_rate": 0.0007775146518327208, "loss": 0.7848, "step": 5660 }, { "epoch": 0.28161319161617165, "grad_norm": 0.244140625, "learning_rate": 0.0007774749180490712, "loss": 0.7805, "step": 5670 }, { "epoch": 0.282109863911791, "grad_norm": 0.205078125, "learning_rate": 0.0007774351842654218, "loss": 0.7383, "step": 5680 }, { "epoch": 0.28260653620741033, "grad_norm": 0.2021484375, "learning_rate": 0.0007773954504817722, "loss": 0.7549, "step": 5690 }, { "epoch": 0.2831032085030297, "grad_norm": 0.2177734375, "learning_rate": 0.0007773557166981226, "loss": 0.7672, "step": 5700 }, { "epoch": 0.28359988079864906, "grad_norm": 0.21484375, "learning_rate": 0.0007773159829144731, "loss": 0.7659, "step": 5710 }, { "epoch": 0.2840965530942684, "grad_norm": 0.1982421875, "learning_rate": 0.0007772762491308235, "loss": 0.7696, "step": 5720 }, { "epoch": 0.28459322538988774, "grad_norm": 0.220703125, "learning_rate": 0.000777236515347174, "loss": 0.7976, "step": 5730 }, { "epoch": 0.2850898976855071, "grad_norm": 0.1826171875, "learning_rate": 0.0007771967815635245, "loss": 0.7625, "step": 5740 }, { "epoch": 0.28558656998112647, "grad_norm": 0.2021484375, "learning_rate": 0.0007771570477798749, "loss": 0.8234, "step": 5750 }, { "epoch": 0.2860832422767458, "grad_norm": 0.244140625, "learning_rate": 0.0007771173139962253, "loss": 0.7991, "step": 5760 }, { "epoch": 0.28657991457236515, "grad_norm": 0.1767578125, "learning_rate": 0.0007770775802125757, "loss": 0.7791, "step": 5770 }, { "epoch": 0.2870765868679845, "grad_norm": 0.2119140625, "learning_rate": 0.0007770378464289263, "loss": 0.8082, "step": 5780 }, { "epoch": 0.2875732591636039, "grad_norm": 0.193359375, "learning_rate": 0.0007769981126452767, "loss": 0.7729, "step": 5790 }, { "epoch": 0.2880699314592232, "grad_norm": 0.2041015625, "learning_rate": 0.0007769583788616271, "loss": 0.7724, "step": 5800 }, { "epoch": 0.28856660375484255, "grad_norm": 0.19140625, "learning_rate": 0.0007769186450779776, "loss": 0.7698, "step": 5810 }, { "epoch": 0.2890632760504619, "grad_norm": 0.20703125, "learning_rate": 0.000776878911294328, "loss": 0.7587, "step": 5820 }, { "epoch": 0.28955994834608123, "grad_norm": 0.1982421875, "learning_rate": 0.0007768391775106784, "loss": 0.7634, "step": 5830 }, { "epoch": 0.2900566206417006, "grad_norm": 0.2080078125, "learning_rate": 0.000776799443727029, "loss": 0.7557, "step": 5840 }, { "epoch": 0.29055329293731996, "grad_norm": 0.177734375, "learning_rate": 0.0007767597099433794, "loss": 0.7405, "step": 5850 }, { "epoch": 0.2910499652329393, "grad_norm": 0.18359375, "learning_rate": 0.0007767199761597298, "loss": 0.7826, "step": 5860 }, { "epoch": 0.29154663752855864, "grad_norm": 0.1884765625, "learning_rate": 0.0007766802423760803, "loss": 0.7608, "step": 5870 }, { "epoch": 0.29204330982417803, "grad_norm": 0.2236328125, "learning_rate": 0.0007766405085924307, "loss": 0.739, "step": 5880 }, { "epoch": 0.29253998211979737, "grad_norm": 0.19921875, "learning_rate": 0.0007766007748087812, "loss": 0.7819, "step": 5890 }, { "epoch": 0.2930366544154167, "grad_norm": 0.19921875, "learning_rate": 0.0007765610410251317, "loss": 0.7584, "step": 5900 }, { "epoch": 0.29353332671103605, "grad_norm": 0.24609375, "learning_rate": 0.0007765213072414821, "loss": 0.7396, "step": 5910 }, { "epoch": 0.2940299990066554, "grad_norm": 0.205078125, "learning_rate": 0.0007764815734578325, "loss": 0.7617, "step": 5920 }, { "epoch": 0.2945266713022748, "grad_norm": 0.2080078125, "learning_rate": 0.000776441839674183, "loss": 0.7464, "step": 5930 }, { "epoch": 0.2950233435978941, "grad_norm": 0.1962890625, "learning_rate": 0.0007764021058905335, "loss": 0.757, "step": 5940 }, { "epoch": 0.29552001589351345, "grad_norm": 0.1767578125, "learning_rate": 0.0007763623721068839, "loss": 0.7461, "step": 5950 }, { "epoch": 0.2960166881891328, "grad_norm": 0.1923828125, "learning_rate": 0.0007763226383232343, "loss": 0.77, "step": 5960 }, { "epoch": 0.2965133604847522, "grad_norm": 0.181640625, "learning_rate": 0.0007762829045395848, "loss": 0.7571, "step": 5970 }, { "epoch": 0.2970100327803715, "grad_norm": 0.2119140625, "learning_rate": 0.0007762431707559354, "loss": 0.7764, "step": 5980 }, { "epoch": 0.29750670507599086, "grad_norm": 0.208984375, "learning_rate": 0.0007762034369722857, "loss": 0.7419, "step": 5990 }, { "epoch": 0.2980033773716102, "grad_norm": 0.1962890625, "learning_rate": 0.0007761637031886362, "loss": 0.7873, "step": 6000 }, { "epoch": 0.29850004966722954, "grad_norm": 0.2001953125, "learning_rate": 0.0007761239694049867, "loss": 0.7525, "step": 6010 }, { "epoch": 0.29899672196284893, "grad_norm": 0.1953125, "learning_rate": 0.000776084235621337, "loss": 0.7782, "step": 6020 }, { "epoch": 0.29949339425846827, "grad_norm": 0.2216796875, "learning_rate": 0.0007760445018376875, "loss": 0.7807, "step": 6030 }, { "epoch": 0.2999900665540876, "grad_norm": 0.1943359375, "learning_rate": 0.000776004768054038, "loss": 0.7586, "step": 6040 }, { "epoch": 0.30048673884970695, "grad_norm": 0.169921875, "learning_rate": 0.0007759650342703884, "loss": 0.7722, "step": 6050 }, { "epoch": 0.30098341114532634, "grad_norm": 0.181640625, "learning_rate": 0.0007759253004867389, "loss": 0.7659, "step": 6060 }, { "epoch": 0.3014800834409457, "grad_norm": 0.1787109375, "learning_rate": 0.0007758855667030893, "loss": 0.7636, "step": 6070 }, { "epoch": 0.301976755736565, "grad_norm": 0.1953125, "learning_rate": 0.0007758458329194397, "loss": 0.7604, "step": 6080 }, { "epoch": 0.30247342803218435, "grad_norm": 0.177734375, "learning_rate": 0.0007758060991357903, "loss": 0.7528, "step": 6090 }, { "epoch": 0.3029701003278037, "grad_norm": 0.189453125, "learning_rate": 0.0007757663653521407, "loss": 0.7645, "step": 6100 }, { "epoch": 0.3034667726234231, "grad_norm": 0.201171875, "learning_rate": 0.0007757266315684912, "loss": 0.7779, "step": 6110 }, { "epoch": 0.3039634449190424, "grad_norm": 0.2265625, "learning_rate": 0.0007756868977848416, "loss": 0.758, "step": 6120 }, { "epoch": 0.30446011721466176, "grad_norm": 0.189453125, "learning_rate": 0.000775647164001192, "loss": 0.7451, "step": 6130 }, { "epoch": 0.3049567895102811, "grad_norm": 0.1875, "learning_rate": 0.0007756074302175426, "loss": 0.7465, "step": 6140 }, { "epoch": 0.30545346180590044, "grad_norm": 0.1796875, "learning_rate": 0.0007755676964338929, "loss": 0.7755, "step": 6150 }, { "epoch": 0.30595013410151983, "grad_norm": 0.234375, "learning_rate": 0.0007755279626502434, "loss": 0.7573, "step": 6160 }, { "epoch": 0.30644680639713917, "grad_norm": 0.1728515625, "learning_rate": 0.0007754882288665939, "loss": 0.7536, "step": 6170 }, { "epoch": 0.3069434786927585, "grad_norm": 0.19140625, "learning_rate": 0.0007754484950829442, "loss": 0.7855, "step": 6180 }, { "epoch": 0.30744015098837785, "grad_norm": 0.1865234375, "learning_rate": 0.0007754087612992948, "loss": 0.756, "step": 6190 }, { "epoch": 0.30793682328399724, "grad_norm": 0.197265625, "learning_rate": 0.0007753690275156453, "loss": 0.7682, "step": 6200 }, { "epoch": 0.3084334955796166, "grad_norm": 0.1962890625, "learning_rate": 0.0007753292937319956, "loss": 0.7463, "step": 6210 }, { "epoch": 0.3089301678752359, "grad_norm": 0.2080078125, "learning_rate": 0.0007752895599483461, "loss": 0.7839, "step": 6220 }, { "epoch": 0.30942684017085526, "grad_norm": 0.2080078125, "learning_rate": 0.0007752498261646965, "loss": 0.7485, "step": 6230 }, { "epoch": 0.3099235124664746, "grad_norm": 0.1826171875, "learning_rate": 0.000775210092381047, "loss": 0.7452, "step": 6240 }, { "epoch": 0.310420184762094, "grad_norm": 0.19140625, "learning_rate": 0.0007751703585973975, "loss": 0.7546, "step": 6250 }, { "epoch": 0.3109168570577133, "grad_norm": 0.1875, "learning_rate": 0.0007751306248137479, "loss": 0.7653, "step": 6260 }, { "epoch": 0.31141352935333266, "grad_norm": 0.185546875, "learning_rate": 0.0007750908910300984, "loss": 0.7725, "step": 6270 }, { "epoch": 0.311910201648952, "grad_norm": 0.1806640625, "learning_rate": 0.0007750511572464488, "loss": 0.7692, "step": 6280 }, { "epoch": 0.3124068739445714, "grad_norm": 0.29296875, "learning_rate": 0.0007750114234627993, "loss": 0.7469, "step": 6290 }, { "epoch": 0.31290354624019073, "grad_norm": 0.2001953125, "learning_rate": 0.0007749716896791498, "loss": 0.7579, "step": 6300 }, { "epoch": 0.31340021853581007, "grad_norm": 0.197265625, "learning_rate": 0.0007749319558955002, "loss": 0.7455, "step": 6310 }, { "epoch": 0.3138968908314294, "grad_norm": 0.2158203125, "learning_rate": 0.0007748922221118506, "loss": 0.7563, "step": 6320 }, { "epoch": 0.31439356312704875, "grad_norm": 0.1875, "learning_rate": 0.0007748524883282011, "loss": 0.7535, "step": 6330 }, { "epoch": 0.31489023542266814, "grad_norm": 0.1962890625, "learning_rate": 0.0007748127545445516, "loss": 0.7547, "step": 6340 }, { "epoch": 0.3153869077182875, "grad_norm": 0.1826171875, "learning_rate": 0.000774773020760902, "loss": 0.7392, "step": 6350 }, { "epoch": 0.3158835800139068, "grad_norm": 0.189453125, "learning_rate": 0.0007747332869772525, "loss": 0.7651, "step": 6360 }, { "epoch": 0.31638025230952616, "grad_norm": 0.1796875, "learning_rate": 0.0007746935531936028, "loss": 0.7575, "step": 6370 }, { "epoch": 0.31687692460514555, "grad_norm": 0.21484375, "learning_rate": 0.0007746538194099533, "loss": 0.7446, "step": 6380 }, { "epoch": 0.3173735969007649, "grad_norm": 0.166015625, "learning_rate": 0.0007746140856263039, "loss": 0.7425, "step": 6390 }, { "epoch": 0.3178702691963842, "grad_norm": 0.1748046875, "learning_rate": 0.0007745743518426542, "loss": 0.7696, "step": 6400 }, { "epoch": 0.31836694149200356, "grad_norm": 0.216796875, "learning_rate": 0.0007745346180590047, "loss": 0.7632, "step": 6410 }, { "epoch": 0.3188636137876229, "grad_norm": 0.2138671875, "learning_rate": 0.0007744948842753551, "loss": 0.7838, "step": 6420 }, { "epoch": 0.3193602860832423, "grad_norm": 0.1943359375, "learning_rate": 0.0007744551504917056, "loss": 0.7685, "step": 6430 }, { "epoch": 0.31985695837886163, "grad_norm": 0.1689453125, "learning_rate": 0.0007744154167080561, "loss": 0.7652, "step": 6440 }, { "epoch": 0.32035363067448097, "grad_norm": 0.1806640625, "learning_rate": 0.0007743756829244065, "loss": 0.755, "step": 6450 }, { "epoch": 0.3208503029701003, "grad_norm": 0.197265625, "learning_rate": 0.000774335949140757, "loss": 0.7277, "step": 6460 }, { "epoch": 0.3213469752657197, "grad_norm": 0.19140625, "learning_rate": 0.0007742962153571074, "loss": 0.7414, "step": 6470 }, { "epoch": 0.32184364756133904, "grad_norm": 0.201171875, "learning_rate": 0.0007742564815734578, "loss": 0.7466, "step": 6480 }, { "epoch": 0.3223403198569584, "grad_norm": 0.1962890625, "learning_rate": 0.0007742167477898084, "loss": 0.7707, "step": 6490 }, { "epoch": 0.3228369921525777, "grad_norm": 0.166015625, "learning_rate": 0.0007741770140061588, "loss": 0.7345, "step": 6500 }, { "epoch": 0.32333366444819706, "grad_norm": 0.1923828125, "learning_rate": 0.0007741372802225092, "loss": 0.7477, "step": 6510 }, { "epoch": 0.32383033674381645, "grad_norm": 0.1923828125, "learning_rate": 0.0007740975464388597, "loss": 0.7523, "step": 6520 }, { "epoch": 0.3243270090394358, "grad_norm": 0.169921875, "learning_rate": 0.0007740578126552101, "loss": 0.7233, "step": 6530 }, { "epoch": 0.3248236813350551, "grad_norm": 0.17578125, "learning_rate": 0.0007740180788715606, "loss": 0.7454, "step": 6540 }, { "epoch": 0.32532035363067446, "grad_norm": 0.20703125, "learning_rate": 0.0007739783450879111, "loss": 0.8268, "step": 6550 }, { "epoch": 0.32581702592629386, "grad_norm": 0.181640625, "learning_rate": 0.0007739386113042615, "loss": 0.7481, "step": 6560 }, { "epoch": 0.3263136982219132, "grad_norm": 0.1826171875, "learning_rate": 0.0007738988775206119, "loss": 0.742, "step": 6570 }, { "epoch": 0.32681037051753253, "grad_norm": 0.2001953125, "learning_rate": 0.0007738591437369624, "loss": 0.7556, "step": 6580 }, { "epoch": 0.32730704281315187, "grad_norm": 0.19140625, "learning_rate": 0.0007738194099533129, "loss": 0.7327, "step": 6590 }, { "epoch": 0.3278037151087712, "grad_norm": 0.1748046875, "learning_rate": 0.0007737796761696633, "loss": 0.7419, "step": 6600 }, { "epoch": 0.3283003874043906, "grad_norm": 0.181640625, "learning_rate": 0.0007737399423860138, "loss": 0.7433, "step": 6610 }, { "epoch": 0.32879705970000994, "grad_norm": 0.1748046875, "learning_rate": 0.0007737002086023642, "loss": 0.7658, "step": 6620 }, { "epoch": 0.3292937319956293, "grad_norm": 0.1748046875, "learning_rate": 0.0007736604748187146, "loss": 0.7514, "step": 6630 }, { "epoch": 0.3297904042912486, "grad_norm": 0.1796875, "learning_rate": 0.000773620741035065, "loss": 0.7481, "step": 6640 }, { "epoch": 0.330287076586868, "grad_norm": 0.193359375, "learning_rate": 0.0007735810072514156, "loss": 0.741, "step": 6650 }, { "epoch": 0.33078374888248735, "grad_norm": 0.2177734375, "learning_rate": 0.000773541273467766, "loss": 0.7469, "step": 6660 }, { "epoch": 0.3312804211781067, "grad_norm": 0.1806640625, "learning_rate": 0.0007735015396841164, "loss": 0.7719, "step": 6670 }, { "epoch": 0.331777093473726, "grad_norm": 0.1767578125, "learning_rate": 0.0007734618059004669, "loss": 0.7762, "step": 6680 }, { "epoch": 0.33227376576934536, "grad_norm": 0.169921875, "learning_rate": 0.0007734220721168174, "loss": 0.7379, "step": 6690 }, { "epoch": 0.33277043806496476, "grad_norm": 0.19140625, "learning_rate": 0.0007733823383331678, "loss": 0.7259, "step": 6700 }, { "epoch": 0.3332671103605841, "grad_norm": 0.205078125, "learning_rate": 0.0007733426045495183, "loss": 0.7645, "step": 6710 }, { "epoch": 0.33376378265620343, "grad_norm": 0.19140625, "learning_rate": 0.0007733028707658687, "loss": 0.7616, "step": 6720 }, { "epoch": 0.33426045495182277, "grad_norm": 0.1669921875, "learning_rate": 0.0007732631369822191, "loss": 0.7393, "step": 6730 }, { "epoch": 0.3347571272474421, "grad_norm": 0.1904296875, "learning_rate": 0.0007732234031985697, "loss": 0.7931, "step": 6740 }, { "epoch": 0.3352537995430615, "grad_norm": 0.2158203125, "learning_rate": 0.0007731836694149201, "loss": 0.7229, "step": 6750 }, { "epoch": 0.33575047183868084, "grad_norm": 0.1787109375, "learning_rate": 0.0007731439356312705, "loss": 0.7394, "step": 6760 }, { "epoch": 0.3362471441343002, "grad_norm": 0.2119140625, "learning_rate": 0.000773104201847621, "loss": 0.7355, "step": 6770 }, { "epoch": 0.3367438164299195, "grad_norm": 0.2080078125, "learning_rate": 0.0007730644680639714, "loss": 0.7552, "step": 6780 }, { "epoch": 0.3372404887255389, "grad_norm": 0.1591796875, "learning_rate": 0.0007730247342803219, "loss": 0.7536, "step": 6790 }, { "epoch": 0.33773716102115825, "grad_norm": 0.203125, "learning_rate": 0.0007729850004966724, "loss": 0.7641, "step": 6800 }, { "epoch": 0.3382338333167776, "grad_norm": 0.224609375, "learning_rate": 0.0007729452667130228, "loss": 0.7434, "step": 6810 }, { "epoch": 0.3387305056123969, "grad_norm": 0.1962890625, "learning_rate": 0.0007729055329293732, "loss": 0.7578, "step": 6820 }, { "epoch": 0.33922717790801626, "grad_norm": 0.1787109375, "learning_rate": 0.0007728657991457236, "loss": 0.7551, "step": 6830 }, { "epoch": 0.33972385020363566, "grad_norm": 0.1650390625, "learning_rate": 0.0007728260653620742, "loss": 0.7743, "step": 6840 }, { "epoch": 0.340220522499255, "grad_norm": 0.1826171875, "learning_rate": 0.0007727863315784246, "loss": 0.7424, "step": 6850 }, { "epoch": 0.34071719479487433, "grad_norm": 0.1884765625, "learning_rate": 0.000772746597794775, "loss": 0.7685, "step": 6860 }, { "epoch": 0.34121386709049367, "grad_norm": 0.15625, "learning_rate": 0.0007727068640111255, "loss": 0.7318, "step": 6870 }, { "epoch": 0.34171053938611307, "grad_norm": 0.169921875, "learning_rate": 0.000772667130227476, "loss": 0.7452, "step": 6880 }, { "epoch": 0.3422072116817324, "grad_norm": 0.1953125, "learning_rate": 0.0007726273964438263, "loss": 0.7754, "step": 6890 }, { "epoch": 0.34270388397735174, "grad_norm": 0.1787109375, "learning_rate": 0.0007725876626601769, "loss": 0.763, "step": 6900 }, { "epoch": 0.3432005562729711, "grad_norm": 0.1962890625, "learning_rate": 0.0007725479288765273, "loss": 0.7346, "step": 6910 }, { "epoch": 0.3436972285685904, "grad_norm": 0.177734375, "learning_rate": 0.0007725081950928777, "loss": 0.7633, "step": 6920 }, { "epoch": 0.3441939008642098, "grad_norm": 0.185546875, "learning_rate": 0.0007724684613092282, "loss": 0.7258, "step": 6930 }, { "epoch": 0.34469057315982915, "grad_norm": 0.1806640625, "learning_rate": 0.0007724287275255787, "loss": 0.7323, "step": 6940 }, { "epoch": 0.3451872454554485, "grad_norm": 0.166015625, "learning_rate": 0.0007723889937419291, "loss": 0.7417, "step": 6950 }, { "epoch": 0.3456839177510678, "grad_norm": 0.18359375, "learning_rate": 0.0007723492599582796, "loss": 0.7511, "step": 6960 }, { "epoch": 0.3461805900466872, "grad_norm": 0.2060546875, "learning_rate": 0.00077230952617463, "loss": 0.703, "step": 6970 }, { "epoch": 0.34667726234230656, "grad_norm": 0.1943359375, "learning_rate": 0.0007722697923909804, "loss": 0.7316, "step": 6980 }, { "epoch": 0.3471739346379259, "grad_norm": 0.185546875, "learning_rate": 0.000772230058607331, "loss": 0.7663, "step": 6990 }, { "epoch": 0.34767060693354523, "grad_norm": 0.1806640625, "learning_rate": 0.0007721903248236814, "loss": 0.7679, "step": 7000 }, { "epoch": 0.3481672792291646, "grad_norm": 0.1865234375, "learning_rate": 0.0007721505910400319, "loss": 0.7432, "step": 7010 }, { "epoch": 0.34866395152478397, "grad_norm": 0.19921875, "learning_rate": 0.0007721108572563822, "loss": 0.7137, "step": 7020 }, { "epoch": 0.3491606238204033, "grad_norm": 0.1513671875, "learning_rate": 0.0007720711234727327, "loss": 0.7484, "step": 7030 }, { "epoch": 0.34965729611602264, "grad_norm": 0.193359375, "learning_rate": 0.0007720313896890833, "loss": 0.8066, "step": 7040 }, { "epoch": 0.350153968411642, "grad_norm": 0.1904296875, "learning_rate": 0.0007719916559054336, "loss": 0.7591, "step": 7050 }, { "epoch": 0.3506506407072614, "grad_norm": 0.17578125, "learning_rate": 0.0007719519221217841, "loss": 0.7204, "step": 7060 }, { "epoch": 0.3511473130028807, "grad_norm": 0.18359375, "learning_rate": 0.0007719121883381346, "loss": 0.7619, "step": 7070 }, { "epoch": 0.35164398529850005, "grad_norm": 0.171875, "learning_rate": 0.0007718724545544849, "loss": 0.7494, "step": 7080 }, { "epoch": 0.3521406575941194, "grad_norm": 0.16796875, "learning_rate": 0.0007718327207708355, "loss": 0.7119, "step": 7090 }, { "epoch": 0.3526373298897387, "grad_norm": 0.173828125, "learning_rate": 0.0007717929869871859, "loss": 0.74, "step": 7100 }, { "epoch": 0.3531340021853581, "grad_norm": 0.1728515625, "learning_rate": 0.0007717532532035363, "loss": 0.7364, "step": 7110 }, { "epoch": 0.35363067448097746, "grad_norm": 0.18359375, "learning_rate": 0.0007717135194198868, "loss": 0.723, "step": 7120 }, { "epoch": 0.3541273467765968, "grad_norm": 0.1669921875, "learning_rate": 0.0007716737856362372, "loss": 0.7895, "step": 7130 }, { "epoch": 0.35462401907221613, "grad_norm": 0.1787109375, "learning_rate": 0.0007716340518525876, "loss": 0.7586, "step": 7140 }, { "epoch": 0.35512069136783553, "grad_norm": 0.18359375, "learning_rate": 0.0007715943180689382, "loss": 0.7063, "step": 7150 }, { "epoch": 0.35561736366345487, "grad_norm": 0.19921875, "learning_rate": 0.0007715545842852886, "loss": 0.7826, "step": 7160 }, { "epoch": 0.3561140359590742, "grad_norm": 0.1669921875, "learning_rate": 0.0007715148505016391, "loss": 0.7246, "step": 7170 }, { "epoch": 0.35661070825469354, "grad_norm": 0.181640625, "learning_rate": 0.0007714751167179895, "loss": 0.7159, "step": 7180 }, { "epoch": 0.3571073805503129, "grad_norm": 0.1787109375, "learning_rate": 0.00077143538293434, "loss": 0.7204, "step": 7190 }, { "epoch": 0.3576040528459323, "grad_norm": 0.1904296875, "learning_rate": 0.0007713956491506905, "loss": 0.7538, "step": 7200 }, { "epoch": 0.3581007251415516, "grad_norm": 0.1767578125, "learning_rate": 0.0007713559153670409, "loss": 0.7469, "step": 7210 }, { "epoch": 0.35859739743717095, "grad_norm": 0.181640625, "learning_rate": 0.0007713161815833913, "loss": 0.7408, "step": 7220 }, { "epoch": 0.3590940697327903, "grad_norm": 0.1748046875, "learning_rate": 0.0007712764477997418, "loss": 0.7432, "step": 7230 }, { "epoch": 0.3595907420284097, "grad_norm": 0.1796875, "learning_rate": 0.0007712367140160921, "loss": 0.7366, "step": 7240 }, { "epoch": 0.360087414324029, "grad_norm": 0.1806640625, "learning_rate": 0.0007711969802324427, "loss": 0.728, "step": 7250 }, { "epoch": 0.36058408661964836, "grad_norm": 0.1806640625, "learning_rate": 0.0007711572464487932, "loss": 0.7584, "step": 7260 }, { "epoch": 0.3610807589152677, "grad_norm": 0.158203125, "learning_rate": 0.0007711175126651435, "loss": 0.6997, "step": 7270 }, { "epoch": 0.36157743121088703, "grad_norm": 0.1806640625, "learning_rate": 0.000771077778881494, "loss": 0.7144, "step": 7280 }, { "epoch": 0.36207410350650643, "grad_norm": 0.19921875, "learning_rate": 0.0007710380450978444, "loss": 0.728, "step": 7290 }, { "epoch": 0.36257077580212577, "grad_norm": 0.1650390625, "learning_rate": 0.0007709983113141949, "loss": 0.7011, "step": 7300 }, { "epoch": 0.3630674480977451, "grad_norm": 0.1962890625, "learning_rate": 0.0007709585775305454, "loss": 0.7655, "step": 7310 }, { "epoch": 0.36356412039336444, "grad_norm": 0.1875, "learning_rate": 0.0007709188437468958, "loss": 0.7247, "step": 7320 }, { "epoch": 0.3640607926889838, "grad_norm": 0.15625, "learning_rate": 0.0007708791099632463, "loss": 0.7441, "step": 7330 }, { "epoch": 0.3645574649846032, "grad_norm": 0.21484375, "learning_rate": 0.0007708393761795967, "loss": 0.7515, "step": 7340 }, { "epoch": 0.3650541372802225, "grad_norm": 0.177734375, "learning_rate": 0.0007707996423959472, "loss": 0.7171, "step": 7350 }, { "epoch": 0.36555080957584185, "grad_norm": 0.1689453125, "learning_rate": 0.0007707599086122977, "loss": 0.6862, "step": 7360 }, { "epoch": 0.3660474818714612, "grad_norm": 0.1630859375, "learning_rate": 0.0007707201748286481, "loss": 0.706, "step": 7370 }, { "epoch": 0.3665441541670806, "grad_norm": 0.216796875, "learning_rate": 0.0007706804410449985, "loss": 0.7725, "step": 7380 }, { "epoch": 0.3670408264626999, "grad_norm": 0.15625, "learning_rate": 0.000770640707261349, "loss": 0.7314, "step": 7390 }, { "epoch": 0.36753749875831926, "grad_norm": 0.1796875, "learning_rate": 0.0007706009734776995, "loss": 0.7398, "step": 7400 }, { "epoch": 0.3680341710539386, "grad_norm": 0.1806640625, "learning_rate": 0.0007705612396940499, "loss": 0.7266, "step": 7410 }, { "epoch": 0.36853084334955793, "grad_norm": 0.173828125, "learning_rate": 0.0007705215059104004, "loss": 0.7625, "step": 7420 }, { "epoch": 0.36902751564517733, "grad_norm": 0.1708984375, "learning_rate": 0.0007704817721267507, "loss": 0.716, "step": 7430 }, { "epoch": 0.36952418794079667, "grad_norm": 0.1708984375, "learning_rate": 0.0007704420383431012, "loss": 0.7418, "step": 7440 }, { "epoch": 0.370020860236416, "grad_norm": 0.17578125, "learning_rate": 0.0007704023045594518, "loss": 0.7125, "step": 7450 }, { "epoch": 0.37051753253203534, "grad_norm": 0.1875, "learning_rate": 0.0007703625707758022, "loss": 0.7312, "step": 7460 }, { "epoch": 0.37101420482765474, "grad_norm": 0.1708984375, "learning_rate": 0.0007703228369921526, "loss": 0.7424, "step": 7470 }, { "epoch": 0.3715108771232741, "grad_norm": 0.1904296875, "learning_rate": 0.0007702831032085031, "loss": 0.7156, "step": 7480 }, { "epoch": 0.3720075494188934, "grad_norm": 0.193359375, "learning_rate": 0.0007702433694248535, "loss": 0.7348, "step": 7490 }, { "epoch": 0.37250422171451275, "grad_norm": 0.2021484375, "learning_rate": 0.000770203635641204, "loss": 0.6993, "step": 7500 }, { "epoch": 0.3730008940101321, "grad_norm": 0.171875, "learning_rate": 0.0007701639018575544, "loss": 0.7259, "step": 7510 }, { "epoch": 0.3734975663057515, "grad_norm": 0.1572265625, "learning_rate": 0.0007701241680739049, "loss": 0.681, "step": 7520 }, { "epoch": 0.3739942386013708, "grad_norm": 0.189453125, "learning_rate": 0.0007700844342902553, "loss": 0.734, "step": 7530 }, { "epoch": 0.37449091089699016, "grad_norm": 0.169921875, "learning_rate": 0.0007700447005066057, "loss": 0.7473, "step": 7540 }, { "epoch": 0.3749875831926095, "grad_norm": 0.1875, "learning_rate": 0.0007700049667229563, "loss": 0.7197, "step": 7550 }, { "epoch": 0.3754842554882289, "grad_norm": 0.1845703125, "learning_rate": 0.0007699652329393067, "loss": 0.7397, "step": 7560 }, { "epoch": 0.37598092778384823, "grad_norm": 0.1650390625, "learning_rate": 0.0007699254991556571, "loss": 0.749, "step": 7570 }, { "epoch": 0.37647760007946757, "grad_norm": 0.166015625, "learning_rate": 0.0007698857653720076, "loss": 0.7399, "step": 7580 }, { "epoch": 0.3769742723750869, "grad_norm": 0.181640625, "learning_rate": 0.000769846031588358, "loss": 0.7477, "step": 7590 }, { "epoch": 0.37747094467070624, "grad_norm": 0.1748046875, "learning_rate": 0.0007698062978047085, "loss": 0.7321, "step": 7600 }, { "epoch": 0.37796761696632564, "grad_norm": 0.1533203125, "learning_rate": 0.000769766564021059, "loss": 0.7328, "step": 7610 }, { "epoch": 0.378464289261945, "grad_norm": 0.1591796875, "learning_rate": 0.0007697268302374094, "loss": 0.7521, "step": 7620 }, { "epoch": 0.3789609615575643, "grad_norm": 0.2041015625, "learning_rate": 0.0007696870964537598, "loss": 0.762, "step": 7630 }, { "epoch": 0.37945763385318365, "grad_norm": 0.169921875, "learning_rate": 0.0007696473626701103, "loss": 0.7221, "step": 7640 }, { "epoch": 0.37995430614880304, "grad_norm": 0.1572265625, "learning_rate": 0.0007696076288864608, "loss": 0.7237, "step": 7650 }, { "epoch": 0.3804509784444224, "grad_norm": 0.1728515625, "learning_rate": 0.0007695678951028112, "loss": 0.7303, "step": 7660 }, { "epoch": 0.3809476507400417, "grad_norm": 0.220703125, "learning_rate": 0.0007695281613191617, "loss": 0.7715, "step": 7670 }, { "epoch": 0.38144432303566106, "grad_norm": 0.181640625, "learning_rate": 0.0007694884275355121, "loss": 0.7481, "step": 7680 }, { "epoch": 0.3819409953312804, "grad_norm": 0.1953125, "learning_rate": 0.0007694486937518625, "loss": 0.7587, "step": 7690 }, { "epoch": 0.3824376676268998, "grad_norm": 0.166015625, "learning_rate": 0.000769408959968213, "loss": 0.7155, "step": 7700 }, { "epoch": 0.38293433992251913, "grad_norm": 0.171875, "learning_rate": 0.0007693692261845635, "loss": 0.7326, "step": 7710 }, { "epoch": 0.38343101221813847, "grad_norm": 0.1767578125, "learning_rate": 0.0007693294924009139, "loss": 0.7087, "step": 7720 }, { "epoch": 0.3839276845137578, "grad_norm": 0.2060546875, "learning_rate": 0.0007692897586172643, "loss": 0.7131, "step": 7730 }, { "epoch": 0.3844243568093772, "grad_norm": 0.1689453125, "learning_rate": 0.0007692500248336148, "loss": 0.7382, "step": 7740 }, { "epoch": 0.38492102910499654, "grad_norm": 0.1611328125, "learning_rate": 0.0007692102910499653, "loss": 0.7186, "step": 7750 }, { "epoch": 0.3854177014006159, "grad_norm": 0.1796875, "learning_rate": 0.0007691705572663157, "loss": 0.7351, "step": 7760 }, { "epoch": 0.3859143736962352, "grad_norm": 0.17578125, "learning_rate": 0.0007691308234826662, "loss": 0.7372, "step": 7770 }, { "epoch": 0.38641104599185455, "grad_norm": 0.1650390625, "learning_rate": 0.0007690910896990166, "loss": 0.751, "step": 7780 }, { "epoch": 0.38690771828747395, "grad_norm": 0.1669921875, "learning_rate": 0.000769051355915367, "loss": 0.7708, "step": 7790 }, { "epoch": 0.3874043905830933, "grad_norm": 0.1669921875, "learning_rate": 0.0007690116221317176, "loss": 0.7545, "step": 7800 }, { "epoch": 0.3879010628787126, "grad_norm": 0.1806640625, "learning_rate": 0.000768971888348068, "loss": 0.7645, "step": 7810 }, { "epoch": 0.38839773517433196, "grad_norm": 0.1650390625, "learning_rate": 0.0007689321545644184, "loss": 0.7348, "step": 7820 }, { "epoch": 0.38889440746995135, "grad_norm": 0.162109375, "learning_rate": 0.0007688924207807689, "loss": 0.7003, "step": 7830 }, { "epoch": 0.3893910797655707, "grad_norm": 0.1552734375, "learning_rate": 0.0007688526869971193, "loss": 0.7449, "step": 7840 }, { "epoch": 0.38988775206119003, "grad_norm": 0.16015625, "learning_rate": 0.0007688129532134698, "loss": 0.7048, "step": 7850 }, { "epoch": 0.39038442435680937, "grad_norm": 0.1767578125, "learning_rate": 0.0007687732194298203, "loss": 0.6932, "step": 7860 }, { "epoch": 0.3908810966524287, "grad_norm": 0.1533203125, "learning_rate": 0.0007687334856461707, "loss": 0.6957, "step": 7870 }, { "epoch": 0.3913777689480481, "grad_norm": 0.1796875, "learning_rate": 0.0007686937518625211, "loss": 0.7196, "step": 7880 }, { "epoch": 0.39187444124366744, "grad_norm": 0.162109375, "learning_rate": 0.0007686540180788715, "loss": 0.7493, "step": 7890 }, { "epoch": 0.3923711135392868, "grad_norm": 0.171875, "learning_rate": 0.0007686142842952221, "loss": 0.7667, "step": 7900 }, { "epoch": 0.3928677858349061, "grad_norm": 0.1708984375, "learning_rate": 0.0007685745505115726, "loss": 0.7573, "step": 7910 }, { "epoch": 0.39336445813052545, "grad_norm": 0.2060546875, "learning_rate": 0.0007685348167279229, "loss": 0.7438, "step": 7920 }, { "epoch": 0.39386113042614485, "grad_norm": 0.177734375, "learning_rate": 0.0007684950829442734, "loss": 0.7369, "step": 7930 }, { "epoch": 0.3943578027217642, "grad_norm": 0.15625, "learning_rate": 0.0007684553491606239, "loss": 0.7336, "step": 7940 }, { "epoch": 0.3948544750173835, "grad_norm": 0.162109375, "learning_rate": 0.0007684156153769743, "loss": 0.7205, "step": 7950 }, { "epoch": 0.39535114731300286, "grad_norm": 0.173828125, "learning_rate": 0.0007683758815933248, "loss": 0.699, "step": 7960 }, { "epoch": 0.39584781960862225, "grad_norm": 0.189453125, "learning_rate": 0.0007683361478096752, "loss": 0.7284, "step": 7970 }, { "epoch": 0.3963444919042416, "grad_norm": 0.1591796875, "learning_rate": 0.0007682964140260256, "loss": 0.727, "step": 7980 }, { "epoch": 0.39684116419986093, "grad_norm": 0.1787109375, "learning_rate": 0.0007682566802423761, "loss": 0.7521, "step": 7990 }, { "epoch": 0.39733783649548027, "grad_norm": 0.193359375, "learning_rate": 0.0007682169464587266, "loss": 0.7427, "step": 8000 }, { "epoch": 0.3978345087910996, "grad_norm": 0.16796875, "learning_rate": 0.000768177212675077, "loss": 0.6822, "step": 8010 }, { "epoch": 0.398331181086719, "grad_norm": 0.1669921875, "learning_rate": 0.0007681374788914275, "loss": 0.718, "step": 8020 }, { "epoch": 0.39882785338233834, "grad_norm": 0.16015625, "learning_rate": 0.0007680977451077779, "loss": 0.7153, "step": 8030 }, { "epoch": 0.3993245256779577, "grad_norm": 0.15625, "learning_rate": 0.0007680580113241283, "loss": 0.7443, "step": 8040 }, { "epoch": 0.399821197973577, "grad_norm": 0.1552734375, "learning_rate": 0.0007680182775404789, "loss": 0.7849, "step": 8050 }, { "epoch": 0.4003178702691964, "grad_norm": 0.1748046875, "learning_rate": 0.0007679785437568293, "loss": 0.7288, "step": 8060 }, { "epoch": 0.40081454256481575, "grad_norm": 0.1640625, "learning_rate": 0.0007679388099731798, "loss": 0.7566, "step": 8070 }, { "epoch": 0.4013112148604351, "grad_norm": 0.158203125, "learning_rate": 0.0007678990761895302, "loss": 0.7008, "step": 8080 }, { "epoch": 0.4018078871560544, "grad_norm": 0.150390625, "learning_rate": 0.0007678593424058806, "loss": 0.754, "step": 8090 }, { "epoch": 0.40230455945167376, "grad_norm": 0.15625, "learning_rate": 0.0007678196086222312, "loss": 0.7161, "step": 8100 }, { "epoch": 0.40280123174729315, "grad_norm": 0.1806640625, "learning_rate": 0.0007677798748385815, "loss": 0.7508, "step": 8110 }, { "epoch": 0.4032979040429125, "grad_norm": 0.158203125, "learning_rate": 0.000767740141054932, "loss": 0.7183, "step": 8120 }, { "epoch": 0.40379457633853183, "grad_norm": 0.1806640625, "learning_rate": 0.0007677004072712825, "loss": 0.7561, "step": 8130 }, { "epoch": 0.40429124863415117, "grad_norm": 0.16015625, "learning_rate": 0.0007676606734876328, "loss": 0.7123, "step": 8140 }, { "epoch": 0.40478792092977056, "grad_norm": 0.1806640625, "learning_rate": 0.0007676209397039834, "loss": 0.7231, "step": 8150 }, { "epoch": 0.4052845932253899, "grad_norm": 0.197265625, "learning_rate": 0.0007675812059203338, "loss": 0.7267, "step": 8160 }, { "epoch": 0.40578126552100924, "grad_norm": 0.171875, "learning_rate": 0.0007675414721366842, "loss": 0.7173, "step": 8170 }, { "epoch": 0.4062779378166286, "grad_norm": 0.16796875, "learning_rate": 0.0007675017383530347, "loss": 0.7031, "step": 8180 }, { "epoch": 0.4067746101122479, "grad_norm": 0.15234375, "learning_rate": 0.0007674620045693851, "loss": 0.7208, "step": 8190 }, { "epoch": 0.4072712824078673, "grad_norm": 0.2021484375, "learning_rate": 0.0007674222707857355, "loss": 0.7613, "step": 8200 }, { "epoch": 0.40776795470348665, "grad_norm": 0.181640625, "learning_rate": 0.0007673825370020861, "loss": 0.7161, "step": 8210 }, { "epoch": 0.408264626999106, "grad_norm": 0.1640625, "learning_rate": 0.0007673428032184365, "loss": 0.7485, "step": 8220 }, { "epoch": 0.4087612992947253, "grad_norm": 0.1767578125, "learning_rate": 0.000767303069434787, "loss": 0.7091, "step": 8230 }, { "epoch": 0.4092579715903447, "grad_norm": 0.16015625, "learning_rate": 0.0007672633356511374, "loss": 0.6908, "step": 8240 }, { "epoch": 0.40975464388596405, "grad_norm": 0.166015625, "learning_rate": 0.0007672236018674879, "loss": 0.7208, "step": 8250 }, { "epoch": 0.4102513161815834, "grad_norm": 0.171875, "learning_rate": 0.0007671838680838384, "loss": 0.735, "step": 8260 }, { "epoch": 0.41074798847720273, "grad_norm": 0.1552734375, "learning_rate": 0.0007671441343001888, "loss": 0.7397, "step": 8270 }, { "epoch": 0.41124466077282207, "grad_norm": 0.16796875, "learning_rate": 0.0007671044005165392, "loss": 0.7125, "step": 8280 }, { "epoch": 0.41174133306844146, "grad_norm": 0.1962890625, "learning_rate": 0.0007670646667328897, "loss": 0.7101, "step": 8290 }, { "epoch": 0.4122380053640608, "grad_norm": 0.173828125, "learning_rate": 0.00076702493294924, "loss": 0.74, "step": 8300 }, { "epoch": 0.41273467765968014, "grad_norm": 0.1708984375, "learning_rate": 0.0007669851991655906, "loss": 0.7184, "step": 8310 }, { "epoch": 0.4132313499552995, "grad_norm": 0.15234375, "learning_rate": 0.0007669454653819411, "loss": 0.7223, "step": 8320 }, { "epoch": 0.41372802225091887, "grad_norm": 0.1572265625, "learning_rate": 0.0007669057315982914, "loss": 0.7655, "step": 8330 }, { "epoch": 0.4142246945465382, "grad_norm": 0.1591796875, "learning_rate": 0.0007668659978146419, "loss": 0.7088, "step": 8340 }, { "epoch": 0.41472136684215755, "grad_norm": 0.1728515625, "learning_rate": 0.0007668262640309925, "loss": 0.7325, "step": 8350 }, { "epoch": 0.4152180391377769, "grad_norm": 0.169921875, "learning_rate": 0.0007667865302473429, "loss": 0.7189, "step": 8360 }, { "epoch": 0.4157147114333962, "grad_norm": 0.1650390625, "learning_rate": 0.0007667467964636933, "loss": 0.7222, "step": 8370 }, { "epoch": 0.4162113837290156, "grad_norm": 0.16796875, "learning_rate": 0.0007667070626800437, "loss": 0.7308, "step": 8380 }, { "epoch": 0.41670805602463495, "grad_norm": 0.1728515625, "learning_rate": 0.0007666673288963942, "loss": 0.6973, "step": 8390 }, { "epoch": 0.4172047283202543, "grad_norm": 0.166015625, "learning_rate": 0.0007666275951127447, "loss": 0.6914, "step": 8400 }, { "epoch": 0.41770140061587363, "grad_norm": 0.162109375, "learning_rate": 0.0007665878613290951, "loss": 0.72, "step": 8410 }, { "epoch": 0.418198072911493, "grad_norm": 0.15234375, "learning_rate": 0.0007665481275454456, "loss": 0.6967, "step": 8420 }, { "epoch": 0.41869474520711236, "grad_norm": 0.15234375, "learning_rate": 0.000766508393761796, "loss": 0.7139, "step": 8430 }, { "epoch": 0.4191914175027317, "grad_norm": 0.158203125, "learning_rate": 0.0007664686599781464, "loss": 0.7003, "step": 8440 }, { "epoch": 0.41968808979835104, "grad_norm": 0.150390625, "learning_rate": 0.000766428926194497, "loss": 0.7177, "step": 8450 }, { "epoch": 0.4201847620939704, "grad_norm": 0.16015625, "learning_rate": 0.0007663891924108474, "loss": 0.7192, "step": 8460 }, { "epoch": 0.42068143438958977, "grad_norm": 0.1640625, "learning_rate": 0.0007663494586271978, "loss": 0.7029, "step": 8470 }, { "epoch": 0.4211781066852091, "grad_norm": 0.16015625, "learning_rate": 0.0007663097248435483, "loss": 0.6977, "step": 8480 }, { "epoch": 0.42167477898082845, "grad_norm": 0.15625, "learning_rate": 0.0007662699910598986, "loss": 0.7426, "step": 8490 }, { "epoch": 0.4221714512764478, "grad_norm": 0.16015625, "learning_rate": 0.0007662302572762491, "loss": 0.7655, "step": 8500 }, { "epoch": 0.4226681235720671, "grad_norm": 0.1787109375, "learning_rate": 0.0007661905234925997, "loss": 0.7211, "step": 8510 }, { "epoch": 0.4231647958676865, "grad_norm": 0.1650390625, "learning_rate": 0.0007661507897089501, "loss": 0.7069, "step": 8520 }, { "epoch": 0.42366146816330585, "grad_norm": 0.158203125, "learning_rate": 0.0007661110559253005, "loss": 0.7189, "step": 8530 }, { "epoch": 0.4241581404589252, "grad_norm": 0.1875, "learning_rate": 0.000766071322141651, "loss": 0.7167, "step": 8540 }, { "epoch": 0.42465481275454453, "grad_norm": 0.1845703125, "learning_rate": 0.0007660315883580014, "loss": 0.7307, "step": 8550 }, { "epoch": 0.4251514850501639, "grad_norm": 0.173828125, "learning_rate": 0.0007659918545743519, "loss": 0.7289, "step": 8560 }, { "epoch": 0.42564815734578326, "grad_norm": 0.1806640625, "learning_rate": 0.0007659521207907023, "loss": 0.7157, "step": 8570 }, { "epoch": 0.4261448296414026, "grad_norm": 0.1708984375, "learning_rate": 0.0007659123870070528, "loss": 0.7354, "step": 8580 }, { "epoch": 0.42664150193702194, "grad_norm": 0.181640625, "learning_rate": 0.0007658726532234032, "loss": 0.7431, "step": 8590 }, { "epoch": 0.4271381742326413, "grad_norm": 0.1611328125, "learning_rate": 0.0007658329194397536, "loss": 0.7369, "step": 8600 }, { "epoch": 0.42763484652826067, "grad_norm": 0.1865234375, "learning_rate": 0.0007657931856561042, "loss": 0.7348, "step": 8610 }, { "epoch": 0.42813151882388, "grad_norm": 0.1826171875, "learning_rate": 0.0007657534518724546, "loss": 0.7231, "step": 8620 }, { "epoch": 0.42862819111949935, "grad_norm": 0.1640625, "learning_rate": 0.000765713718088805, "loss": 0.7217, "step": 8630 }, { "epoch": 0.4291248634151187, "grad_norm": 0.1728515625, "learning_rate": 0.0007656739843051555, "loss": 0.7138, "step": 8640 }, { "epoch": 0.4296215357107381, "grad_norm": 0.1455078125, "learning_rate": 0.000765634250521506, "loss": 0.7112, "step": 8650 }, { "epoch": 0.4301182080063574, "grad_norm": 0.1591796875, "learning_rate": 0.0007655945167378564, "loss": 0.7388, "step": 8660 }, { "epoch": 0.43061488030197675, "grad_norm": 0.189453125, "learning_rate": 0.0007655547829542069, "loss": 0.7088, "step": 8670 }, { "epoch": 0.4311115525975961, "grad_norm": 0.1650390625, "learning_rate": 0.0007655150491705573, "loss": 0.7148, "step": 8680 }, { "epoch": 0.43160822489321543, "grad_norm": 0.1572265625, "learning_rate": 0.0007654753153869077, "loss": 0.7137, "step": 8690 }, { "epoch": 0.4321048971888348, "grad_norm": 0.1484375, "learning_rate": 0.0007654355816032582, "loss": 0.6875, "step": 8700 }, { "epoch": 0.43260156948445416, "grad_norm": 0.158203125, "learning_rate": 0.0007653958478196087, "loss": 0.6891, "step": 8710 }, { "epoch": 0.4330982417800735, "grad_norm": 0.1484375, "learning_rate": 0.0007653561140359591, "loss": 0.7064, "step": 8720 }, { "epoch": 0.43359491407569284, "grad_norm": 0.146484375, "learning_rate": 0.0007653163802523096, "loss": 0.7231, "step": 8730 }, { "epoch": 0.43409158637131223, "grad_norm": 0.1494140625, "learning_rate": 0.00076527664646866, "loss": 0.7066, "step": 8740 }, { "epoch": 0.43458825866693157, "grad_norm": 0.1669921875, "learning_rate": 0.0007652369126850104, "loss": 0.7, "step": 8750 }, { "epoch": 0.4350849309625509, "grad_norm": 0.1611328125, "learning_rate": 0.0007651971789013609, "loss": 0.7378, "step": 8760 }, { "epoch": 0.43558160325817025, "grad_norm": 0.169921875, "learning_rate": 0.0007651574451177114, "loss": 0.7336, "step": 8770 }, { "epoch": 0.4360782755537896, "grad_norm": 0.158203125, "learning_rate": 0.0007651177113340618, "loss": 0.7575, "step": 8780 }, { "epoch": 0.436574947849409, "grad_norm": 0.150390625, "learning_rate": 0.0007650779775504122, "loss": 0.7354, "step": 8790 }, { "epoch": 0.4370716201450283, "grad_norm": 0.15625, "learning_rate": 0.0007650382437667627, "loss": 0.7177, "step": 8800 }, { "epoch": 0.43756829244064765, "grad_norm": 0.1630859375, "learning_rate": 0.0007649985099831133, "loss": 0.7363, "step": 8810 }, { "epoch": 0.438064964736267, "grad_norm": 0.177734375, "learning_rate": 0.0007649587761994636, "loss": 0.7019, "step": 8820 }, { "epoch": 0.4385616370318864, "grad_norm": 0.15234375, "learning_rate": 0.0007649190424158141, "loss": 0.6998, "step": 8830 }, { "epoch": 0.4390583093275057, "grad_norm": 0.1640625, "learning_rate": 0.0007648793086321645, "loss": 0.7236, "step": 8840 }, { "epoch": 0.43955498162312506, "grad_norm": 0.189453125, "learning_rate": 0.0007648395748485149, "loss": 0.7097, "step": 8850 }, { "epoch": 0.4400516539187444, "grad_norm": 0.15625, "learning_rate": 0.0007647998410648655, "loss": 0.6748, "step": 8860 }, { "epoch": 0.44054832621436374, "grad_norm": 0.2041015625, "learning_rate": 0.0007647601072812159, "loss": 0.6921, "step": 8870 }, { "epoch": 0.44104499850998313, "grad_norm": 0.15234375, "learning_rate": 0.0007647203734975663, "loss": 0.7079, "step": 8880 }, { "epoch": 0.44154167080560247, "grad_norm": 0.1455078125, "learning_rate": 0.0007646806397139168, "loss": 0.7137, "step": 8890 }, { "epoch": 0.4420383431012218, "grad_norm": 0.1494140625, "learning_rate": 0.0007646409059302672, "loss": 0.7328, "step": 8900 }, { "epoch": 0.44253501539684115, "grad_norm": 0.1748046875, "learning_rate": 0.0007646011721466177, "loss": 0.7409, "step": 8910 }, { "epoch": 0.44303168769246054, "grad_norm": 0.1767578125, "learning_rate": 0.0007645614383629682, "loss": 0.7505, "step": 8920 }, { "epoch": 0.4435283599880799, "grad_norm": 0.16796875, "learning_rate": 0.0007645217045793186, "loss": 0.7348, "step": 8930 }, { "epoch": 0.4440250322836992, "grad_norm": 0.1591796875, "learning_rate": 0.000764481970795669, "loss": 0.7477, "step": 8940 }, { "epoch": 0.44452170457931856, "grad_norm": 0.1640625, "learning_rate": 0.0007644422370120194, "loss": 0.71, "step": 8950 }, { "epoch": 0.4450183768749379, "grad_norm": 0.1630859375, "learning_rate": 0.00076440250322837, "loss": 0.7137, "step": 8960 }, { "epoch": 0.4455150491705573, "grad_norm": 0.1650390625, "learning_rate": 0.0007643627694447205, "loss": 0.7641, "step": 8970 }, { "epoch": 0.4460117214661766, "grad_norm": 0.203125, "learning_rate": 0.0007643230356610708, "loss": 0.699, "step": 8980 }, { "epoch": 0.44650839376179596, "grad_norm": 0.1806640625, "learning_rate": 0.0007642833018774213, "loss": 0.6906, "step": 8990 }, { "epoch": 0.4470050660574153, "grad_norm": 0.18359375, "learning_rate": 0.0007642435680937718, "loss": 0.7058, "step": 9000 }, { "epoch": 0.4475017383530347, "grad_norm": 0.166015625, "learning_rate": 0.0007642038343101222, "loss": 0.7377, "step": 9010 }, { "epoch": 0.44799841064865403, "grad_norm": 0.1650390625, "learning_rate": 0.0007641641005264727, "loss": 0.6808, "step": 9020 }, { "epoch": 0.44849508294427337, "grad_norm": 0.185546875, "learning_rate": 0.0007641243667428231, "loss": 0.7154, "step": 9030 }, { "epoch": 0.4489917552398927, "grad_norm": 0.15625, "learning_rate": 0.0007640846329591735, "loss": 0.7077, "step": 9040 }, { "epoch": 0.44948842753551205, "grad_norm": 0.162109375, "learning_rate": 0.000764044899175524, "loss": 0.6977, "step": 9050 }, { "epoch": 0.44998509983113144, "grad_norm": 0.16796875, "learning_rate": 0.0007640051653918745, "loss": 0.7314, "step": 9060 }, { "epoch": 0.4504817721267508, "grad_norm": 0.177734375, "learning_rate": 0.0007639654316082249, "loss": 0.7332, "step": 9070 }, { "epoch": 0.4509784444223701, "grad_norm": 0.1591796875, "learning_rate": 0.0007639256978245754, "loss": 0.7256, "step": 9080 }, { "epoch": 0.45147511671798946, "grad_norm": 0.18359375, "learning_rate": 0.0007638859640409258, "loss": 0.6945, "step": 9090 }, { "epoch": 0.4519717890136088, "grad_norm": 0.1513671875, "learning_rate": 0.0007638462302572763, "loss": 0.7337, "step": 9100 }, { "epoch": 0.4524684613092282, "grad_norm": 0.15234375, "learning_rate": 0.0007638064964736268, "loss": 0.7143, "step": 9110 }, { "epoch": 0.4529651336048475, "grad_norm": 0.162109375, "learning_rate": 0.0007637667626899772, "loss": 0.7248, "step": 9120 }, { "epoch": 0.45346180590046686, "grad_norm": 0.15625, "learning_rate": 0.0007637270289063277, "loss": 0.7323, "step": 9130 }, { "epoch": 0.4539584781960862, "grad_norm": 0.181640625, "learning_rate": 0.0007636872951226781, "loss": 0.7023, "step": 9140 }, { "epoch": 0.4544551504917056, "grad_norm": 0.1533203125, "learning_rate": 0.0007636475613390285, "loss": 0.7105, "step": 9150 }, { "epoch": 0.45495182278732493, "grad_norm": 0.158203125, "learning_rate": 0.0007636078275553791, "loss": 0.7187, "step": 9160 }, { "epoch": 0.45544849508294427, "grad_norm": 0.189453125, "learning_rate": 0.0007635680937717294, "loss": 0.7122, "step": 9170 }, { "epoch": 0.4559451673785636, "grad_norm": 0.15625, "learning_rate": 0.0007635283599880799, "loss": 0.7229, "step": 9180 }, { "epoch": 0.45644183967418295, "grad_norm": 0.1708984375, "learning_rate": 0.0007634886262044304, "loss": 0.7008, "step": 9190 }, { "epoch": 0.45693851196980234, "grad_norm": 0.1591796875, "learning_rate": 0.0007634488924207807, "loss": 0.6972, "step": 9200 }, { "epoch": 0.4574351842654217, "grad_norm": 0.140625, "learning_rate": 0.0007634091586371313, "loss": 0.6998, "step": 9210 }, { "epoch": 0.457931856561041, "grad_norm": 0.177734375, "learning_rate": 0.0007633694248534817, "loss": 0.7409, "step": 9220 }, { "epoch": 0.45842852885666036, "grad_norm": 0.1796875, "learning_rate": 0.0007633296910698321, "loss": 0.7144, "step": 9230 }, { "epoch": 0.45892520115227975, "grad_norm": 0.158203125, "learning_rate": 0.0007632899572861826, "loss": 0.7273, "step": 9240 }, { "epoch": 0.4594218734478991, "grad_norm": 0.171875, "learning_rate": 0.000763250223502533, "loss": 0.7022, "step": 9250 }, { "epoch": 0.4599185457435184, "grad_norm": 0.162109375, "learning_rate": 0.0007632104897188836, "loss": 0.7172, "step": 9260 }, { "epoch": 0.46041521803913776, "grad_norm": 0.1591796875, "learning_rate": 0.000763170755935234, "loss": 0.7561, "step": 9270 }, { "epoch": 0.4609118903347571, "grad_norm": 0.1796875, "learning_rate": 0.0007631310221515844, "loss": 0.7032, "step": 9280 }, { "epoch": 0.4614085626303765, "grad_norm": 0.197265625, "learning_rate": 0.0007630912883679349, "loss": 0.7463, "step": 9290 }, { "epoch": 0.46190523492599583, "grad_norm": 0.15234375, "learning_rate": 0.0007630515545842853, "loss": 0.7138, "step": 9300 }, { "epoch": 0.46240190722161517, "grad_norm": 0.1591796875, "learning_rate": 0.0007630118208006358, "loss": 0.7258, "step": 9310 }, { "epoch": 0.4628985795172345, "grad_norm": 0.18359375, "learning_rate": 0.0007629720870169863, "loss": 0.7383, "step": 9320 }, { "epoch": 0.4633952518128539, "grad_norm": 0.146484375, "learning_rate": 0.0007629323532333367, "loss": 0.7449, "step": 9330 }, { "epoch": 0.46389192410847324, "grad_norm": 0.16015625, "learning_rate": 0.0007628926194496871, "loss": 0.7348, "step": 9340 }, { "epoch": 0.4643885964040926, "grad_norm": 0.16015625, "learning_rate": 0.0007628528856660376, "loss": 0.7234, "step": 9350 }, { "epoch": 0.4648852686997119, "grad_norm": 0.146484375, "learning_rate": 0.000762813151882388, "loss": 0.6861, "step": 9360 }, { "epoch": 0.46538194099533126, "grad_norm": 0.1611328125, "learning_rate": 0.0007627734180987385, "loss": 0.6995, "step": 9370 }, { "epoch": 0.46587861329095065, "grad_norm": 0.169921875, "learning_rate": 0.000762733684315089, "loss": 0.6967, "step": 9380 }, { "epoch": 0.46637528558657, "grad_norm": 0.16015625, "learning_rate": 0.0007626939505314393, "loss": 0.6964, "step": 9390 }, { "epoch": 0.4668719578821893, "grad_norm": 0.162109375, "learning_rate": 0.0007626542167477898, "loss": 0.7155, "step": 9400 }, { "epoch": 0.46736863017780866, "grad_norm": 0.1416015625, "learning_rate": 0.0007626144829641404, "loss": 0.7281, "step": 9410 }, { "epoch": 0.46786530247342806, "grad_norm": 0.1640625, "learning_rate": 0.0007625747491804908, "loss": 0.7683, "step": 9420 }, { "epoch": 0.4683619747690474, "grad_norm": 0.146484375, "learning_rate": 0.0007625350153968412, "loss": 0.6822, "step": 9430 }, { "epoch": 0.46885864706466673, "grad_norm": 0.1494140625, "learning_rate": 0.0007624952816131916, "loss": 0.7145, "step": 9440 }, { "epoch": 0.46935531936028607, "grad_norm": 0.14453125, "learning_rate": 0.0007624555478295421, "loss": 0.7266, "step": 9450 }, { "epoch": 0.4698519916559054, "grad_norm": 0.1767578125, "learning_rate": 0.0007624158140458926, "loss": 0.7053, "step": 9460 }, { "epoch": 0.4703486639515248, "grad_norm": 0.154296875, "learning_rate": 0.000762376080262243, "loss": 0.6892, "step": 9470 }, { "epoch": 0.47084533624714414, "grad_norm": 0.1591796875, "learning_rate": 0.0007623363464785935, "loss": 0.7253, "step": 9480 }, { "epoch": 0.4713420085427635, "grad_norm": 0.150390625, "learning_rate": 0.0007622966126949439, "loss": 0.7233, "step": 9490 }, { "epoch": 0.4718386808383828, "grad_norm": 0.173828125, "learning_rate": 0.0007622568789112943, "loss": 0.7241, "step": 9500 }, { "epoch": 0.4723353531340022, "grad_norm": 0.1552734375, "learning_rate": 0.0007622171451276449, "loss": 0.7205, "step": 9510 }, { "epoch": 0.47283202542962155, "grad_norm": 0.1767578125, "learning_rate": 0.0007621774113439953, "loss": 0.7085, "step": 9520 }, { "epoch": 0.4733286977252409, "grad_norm": 0.150390625, "learning_rate": 0.0007621376775603457, "loss": 0.6921, "step": 9530 }, { "epoch": 0.4738253700208602, "grad_norm": 0.13671875, "learning_rate": 0.0007620979437766962, "loss": 0.7269, "step": 9540 }, { "epoch": 0.47432204231647956, "grad_norm": 0.1572265625, "learning_rate": 0.0007620582099930466, "loss": 0.7659, "step": 9550 }, { "epoch": 0.47481871461209896, "grad_norm": 0.1611328125, "learning_rate": 0.000762018476209397, "loss": 0.7237, "step": 9560 }, { "epoch": 0.4753153869077183, "grad_norm": 0.14453125, "learning_rate": 0.0007619787424257476, "loss": 0.7036, "step": 9570 }, { "epoch": 0.47581205920333763, "grad_norm": 0.189453125, "learning_rate": 0.000761939008642098, "loss": 0.7232, "step": 9580 }, { "epoch": 0.47630873149895697, "grad_norm": 0.154296875, "learning_rate": 0.0007618992748584484, "loss": 0.6916, "step": 9590 }, { "epoch": 0.47680540379457637, "grad_norm": 0.1669921875, "learning_rate": 0.0007618595410747989, "loss": 0.6853, "step": 9600 }, { "epoch": 0.4773020760901957, "grad_norm": 0.1689453125, "learning_rate": 0.0007618198072911494, "loss": 0.6997, "step": 9610 }, { "epoch": 0.47779874838581504, "grad_norm": 0.1552734375, "learning_rate": 0.0007617800735074998, "loss": 0.7772, "step": 9620 }, { "epoch": 0.4782954206814344, "grad_norm": 0.15625, "learning_rate": 0.0007617403397238502, "loss": 0.7037, "step": 9630 }, { "epoch": 0.4787920929770537, "grad_norm": 0.16015625, "learning_rate": 0.0007617006059402007, "loss": 0.7249, "step": 9640 }, { "epoch": 0.4792887652726731, "grad_norm": 0.1435546875, "learning_rate": 0.0007616608721565511, "loss": 0.7063, "step": 9650 }, { "epoch": 0.47978543756829245, "grad_norm": 0.1533203125, "learning_rate": 0.0007616211383729015, "loss": 0.6947, "step": 9660 }, { "epoch": 0.4802821098639118, "grad_norm": 0.1572265625, "learning_rate": 0.0007615814045892521, "loss": 0.6985, "step": 9670 }, { "epoch": 0.4807787821595311, "grad_norm": 0.16796875, "learning_rate": 0.0007615416708056025, "loss": 0.7313, "step": 9680 }, { "epoch": 0.48127545445515046, "grad_norm": 0.140625, "learning_rate": 0.0007615019370219529, "loss": 0.708, "step": 9690 }, { "epoch": 0.48177212675076986, "grad_norm": 0.142578125, "learning_rate": 0.0007614622032383034, "loss": 0.7357, "step": 9700 }, { "epoch": 0.4822687990463892, "grad_norm": 0.1474609375, "learning_rate": 0.0007614224694546539, "loss": 0.7357, "step": 9710 }, { "epoch": 0.48276547134200853, "grad_norm": 0.1513671875, "learning_rate": 0.0007613827356710043, "loss": 0.6737, "step": 9720 }, { "epoch": 0.4832621436376279, "grad_norm": 0.166015625, "learning_rate": 0.0007613430018873548, "loss": 0.6925, "step": 9730 }, { "epoch": 0.48375881593324727, "grad_norm": 0.1484375, "learning_rate": 0.0007613032681037052, "loss": 0.6754, "step": 9740 }, { "epoch": 0.4842554882288666, "grad_norm": 0.1484375, "learning_rate": 0.0007612635343200556, "loss": 0.7491, "step": 9750 }, { "epoch": 0.48475216052448594, "grad_norm": 0.1591796875, "learning_rate": 0.0007612238005364062, "loss": 0.7246, "step": 9760 }, { "epoch": 0.4852488328201053, "grad_norm": 0.158203125, "learning_rate": 0.0007611840667527566, "loss": 0.6921, "step": 9770 }, { "epoch": 0.4857455051157246, "grad_norm": 0.1572265625, "learning_rate": 0.000761144332969107, "loss": 0.6853, "step": 9780 }, { "epoch": 0.486242177411344, "grad_norm": 0.1474609375, "learning_rate": 0.0007611045991854575, "loss": 0.6736, "step": 9790 }, { "epoch": 0.48673884970696335, "grad_norm": 0.138671875, "learning_rate": 0.0007610648654018079, "loss": 0.7246, "step": 9800 }, { "epoch": 0.4872355220025827, "grad_norm": 0.1455078125, "learning_rate": 0.0007610251316181583, "loss": 0.7048, "step": 9810 }, { "epoch": 0.487732194298202, "grad_norm": 0.1630859375, "learning_rate": 0.0007609853978345088, "loss": 0.688, "step": 9820 }, { "epoch": 0.4882288665938214, "grad_norm": 0.1435546875, "learning_rate": 0.0007609456640508593, "loss": 0.7032, "step": 9830 }, { "epoch": 0.48872553888944076, "grad_norm": 0.150390625, "learning_rate": 0.0007609059302672097, "loss": 0.6997, "step": 9840 }, { "epoch": 0.4892222111850601, "grad_norm": 0.1591796875, "learning_rate": 0.0007608661964835601, "loss": 0.6674, "step": 9850 }, { "epoch": 0.48971888348067943, "grad_norm": 0.1416015625, "learning_rate": 0.0007608264626999106, "loss": 0.7512, "step": 9860 }, { "epoch": 0.4902155557762988, "grad_norm": 0.150390625, "learning_rate": 0.0007607867289162612, "loss": 0.7171, "step": 9870 }, { "epoch": 0.49071222807191817, "grad_norm": 0.158203125, "learning_rate": 0.0007607469951326115, "loss": 0.7149, "step": 9880 }, { "epoch": 0.4912089003675375, "grad_norm": 0.140625, "learning_rate": 0.000760707261348962, "loss": 0.6987, "step": 9890 }, { "epoch": 0.49170557266315684, "grad_norm": 0.154296875, "learning_rate": 0.0007606675275653124, "loss": 0.728, "step": 9900 }, { "epoch": 0.4922022449587762, "grad_norm": 0.13671875, "learning_rate": 0.0007606277937816628, "loss": 0.669, "step": 9910 }, { "epoch": 0.4926989172543956, "grad_norm": 0.1591796875, "learning_rate": 0.0007605880599980134, "loss": 0.6901, "step": 9920 }, { "epoch": 0.4931955895500149, "grad_norm": 0.1533203125, "learning_rate": 0.0007605483262143638, "loss": 0.7053, "step": 9930 }, { "epoch": 0.49369226184563425, "grad_norm": 0.171875, "learning_rate": 0.0007605085924307142, "loss": 0.7132, "step": 9940 }, { "epoch": 0.4941889341412536, "grad_norm": 0.11962890625, "learning_rate": 0.0007604688586470647, "loss": 0.6702, "step": 9950 }, { "epoch": 0.4946856064368729, "grad_norm": 0.126953125, "learning_rate": 0.0007604291248634151, "loss": 0.7449, "step": 9960 }, { "epoch": 0.4951822787324923, "grad_norm": 0.1826171875, "learning_rate": 0.0007603893910797656, "loss": 0.6888, "step": 9970 }, { "epoch": 0.49567895102811166, "grad_norm": 0.142578125, "learning_rate": 0.0007603496572961161, "loss": 0.7041, "step": 9980 }, { "epoch": 0.496175623323731, "grad_norm": 0.1533203125, "learning_rate": 0.0007603099235124665, "loss": 0.7158, "step": 9990 }, { "epoch": 0.49667229561935033, "grad_norm": 0.15234375, "learning_rate": 0.000760270189728817, "loss": 0.7205, "step": 10000 }, { "epoch": 0.49716896791496973, "grad_norm": 0.130859375, "learning_rate": 0.0007602304559451674, "loss": 0.7384, "step": 10010 }, { "epoch": 0.49766564021058907, "grad_norm": 0.1376953125, "learning_rate": 0.0007601907221615179, "loss": 0.7441, "step": 10020 }, { "epoch": 0.4981623125062084, "grad_norm": 0.1416015625, "learning_rate": 0.0007601509883778684, "loss": 0.6916, "step": 10030 }, { "epoch": 0.49865898480182774, "grad_norm": 0.15234375, "learning_rate": 0.0007601112545942187, "loss": 0.7187, "step": 10040 }, { "epoch": 0.4991556570974471, "grad_norm": 0.1259765625, "learning_rate": 0.0007600715208105692, "loss": 0.7011, "step": 10050 }, { "epoch": 0.4996523293930665, "grad_norm": 0.13671875, "learning_rate": 0.0007600317870269198, "loss": 0.6988, "step": 10060 }, { "epoch": 0.5001490016886858, "grad_norm": 0.1328125, "learning_rate": 0.0007599920532432701, "loss": 0.6829, "step": 10070 }, { "epoch": 0.5006456739843052, "grad_norm": 0.1318359375, "learning_rate": 0.0007599523194596206, "loss": 0.7256, "step": 10080 }, { "epoch": 0.5011423462799245, "grad_norm": 0.134765625, "learning_rate": 0.000759912585675971, "loss": 0.741, "step": 10090 }, { "epoch": 0.5016390185755438, "grad_norm": 0.126953125, "learning_rate": 0.0007598728518923214, "loss": 0.6929, "step": 10100 }, { "epoch": 0.5021356908711632, "grad_norm": 0.13671875, "learning_rate": 0.0007598331181086719, "loss": 0.7239, "step": 10110 }, { "epoch": 0.5026323631667825, "grad_norm": 0.1572265625, "learning_rate": 0.0007597933843250224, "loss": 0.7027, "step": 10120 }, { "epoch": 0.503129035462402, "grad_norm": 0.1259765625, "learning_rate": 0.0007597536505413728, "loss": 0.6563, "step": 10130 }, { "epoch": 0.5036257077580213, "grad_norm": 0.140625, "learning_rate": 0.0007597139167577233, "loss": 0.727, "step": 10140 }, { "epoch": 0.5041223800536406, "grad_norm": 0.234375, "learning_rate": 0.0007596741829740737, "loss": 0.7071, "step": 10150 }, { "epoch": 0.50461905234926, "grad_norm": 0.1376953125, "learning_rate": 0.0007596344491904242, "loss": 0.7103, "step": 10160 }, { "epoch": 0.5051157246448793, "grad_norm": 0.154296875, "learning_rate": 0.0007595947154067747, "loss": 0.6813, "step": 10170 }, { "epoch": 0.5056123969404986, "grad_norm": 0.1416015625, "learning_rate": 0.0007595549816231251, "loss": 0.7, "step": 10180 }, { "epoch": 0.506109069236118, "grad_norm": 0.146484375, "learning_rate": 0.0007595152478394756, "loss": 0.7448, "step": 10190 }, { "epoch": 0.5066057415317373, "grad_norm": 0.181640625, "learning_rate": 0.000759475514055826, "loss": 0.6848, "step": 10200 }, { "epoch": 0.5071024138273567, "grad_norm": 0.150390625, "learning_rate": 0.0007594357802721764, "loss": 0.7022, "step": 10210 }, { "epoch": 0.5075990861229761, "grad_norm": 0.1455078125, "learning_rate": 0.000759396046488527, "loss": 0.6825, "step": 10220 }, { "epoch": 0.5080957584185954, "grad_norm": 0.1435546875, "learning_rate": 0.0007593563127048773, "loss": 0.6914, "step": 10230 }, { "epoch": 0.5085924307142148, "grad_norm": 0.134765625, "learning_rate": 0.0007593165789212278, "loss": 0.737, "step": 10240 }, { "epoch": 0.5090891030098341, "grad_norm": 0.140625, "learning_rate": 0.0007592768451375783, "loss": 0.6901, "step": 10250 }, { "epoch": 0.5095857753054535, "grad_norm": 0.1474609375, "learning_rate": 0.0007592371113539286, "loss": 0.7328, "step": 10260 }, { "epoch": 0.5100824476010728, "grad_norm": 0.123046875, "learning_rate": 0.0007591973775702792, "loss": 0.7049, "step": 10270 }, { "epoch": 0.5105791198966921, "grad_norm": 0.1484375, "learning_rate": 0.0007591576437866297, "loss": 0.6783, "step": 10280 }, { "epoch": 0.5110757921923115, "grad_norm": 0.14453125, "learning_rate": 0.00075911791000298, "loss": 0.7185, "step": 10290 }, { "epoch": 0.5115724644879308, "grad_norm": 0.1435546875, "learning_rate": 0.0007590781762193305, "loss": 0.7185, "step": 10300 }, { "epoch": 0.5120691367835503, "grad_norm": 0.1357421875, "learning_rate": 0.0007590384424356809, "loss": 0.7252, "step": 10310 }, { "epoch": 0.5125658090791696, "grad_norm": 0.140625, "learning_rate": 0.0007589987086520315, "loss": 0.7113, "step": 10320 }, { "epoch": 0.5130624813747889, "grad_norm": 0.1259765625, "learning_rate": 0.0007589589748683819, "loss": 0.6794, "step": 10330 }, { "epoch": 0.5135591536704083, "grad_norm": 0.146484375, "learning_rate": 0.0007589192410847323, "loss": 0.7233, "step": 10340 }, { "epoch": 0.5140558259660276, "grad_norm": 0.1279296875, "learning_rate": 0.0007588795073010828, "loss": 0.6812, "step": 10350 }, { "epoch": 0.514552498261647, "grad_norm": 0.130859375, "learning_rate": 0.0007588397735174332, "loss": 0.7091, "step": 10360 }, { "epoch": 0.5150491705572663, "grad_norm": 0.13671875, "learning_rate": 0.0007588000397337837, "loss": 0.734, "step": 10370 }, { "epoch": 0.5155458428528856, "grad_norm": 0.1259765625, "learning_rate": 0.0007587603059501342, "loss": 0.7, "step": 10380 }, { "epoch": 0.516042515148505, "grad_norm": 0.140625, "learning_rate": 0.0007587205721664846, "loss": 0.6981, "step": 10390 }, { "epoch": 0.5165391874441244, "grad_norm": 0.1611328125, "learning_rate": 0.000758680838382835, "loss": 0.7074, "step": 10400 }, { "epoch": 0.5170358597397438, "grad_norm": 0.1357421875, "learning_rate": 0.0007586411045991855, "loss": 0.722, "step": 10410 }, { "epoch": 0.5175325320353631, "grad_norm": 0.126953125, "learning_rate": 0.0007586013708155359, "loss": 0.7269, "step": 10420 }, { "epoch": 0.5180292043309824, "grad_norm": 0.1279296875, "learning_rate": 0.0007585616370318864, "loss": 0.7256, "step": 10430 }, { "epoch": 0.5185258766266018, "grad_norm": 0.1572265625, "learning_rate": 0.0007585219032482369, "loss": 0.711, "step": 10440 }, { "epoch": 0.5190225489222211, "grad_norm": 0.1328125, "learning_rate": 0.0007584821694645873, "loss": 0.6722, "step": 10450 }, { "epoch": 0.5195192212178404, "grad_norm": 0.130859375, "learning_rate": 0.0007584424356809377, "loss": 0.6736, "step": 10460 }, { "epoch": 0.5200158935134598, "grad_norm": 0.1357421875, "learning_rate": 0.0007584027018972883, "loss": 0.6942, "step": 10470 }, { "epoch": 0.5205125658090791, "grad_norm": 0.1240234375, "learning_rate": 0.0007583629681136387, "loss": 0.6936, "step": 10480 }, { "epoch": 0.5210092381046986, "grad_norm": 0.125, "learning_rate": 0.0007583232343299891, "loss": 0.7083, "step": 10490 }, { "epoch": 0.5215059104003179, "grad_norm": 0.134765625, "learning_rate": 0.0007582835005463395, "loss": 0.7019, "step": 10500 }, { "epoch": 0.5220025826959372, "grad_norm": 0.126953125, "learning_rate": 0.00075824376676269, "loss": 0.7379, "step": 10510 }, { "epoch": 0.5224992549915566, "grad_norm": 0.142578125, "learning_rate": 0.0007582040329790405, "loss": 0.6855, "step": 10520 }, { "epoch": 0.5229959272871759, "grad_norm": 0.1376953125, "learning_rate": 0.0007581642991953909, "loss": 0.7362, "step": 10530 }, { "epoch": 0.5234925995827953, "grad_norm": 0.1474609375, "learning_rate": 0.0007581245654117414, "loss": 0.6975, "step": 10540 }, { "epoch": 0.5239892718784146, "grad_norm": 0.1591796875, "learning_rate": 0.0007580848316280918, "loss": 0.668, "step": 10550 }, { "epoch": 0.5244859441740339, "grad_norm": 0.1259765625, "learning_rate": 0.0007580450978444422, "loss": 0.7077, "step": 10560 }, { "epoch": 0.5249826164696533, "grad_norm": 0.1533203125, "learning_rate": 0.0007580053640607928, "loss": 0.7183, "step": 10570 }, { "epoch": 0.5254792887652727, "grad_norm": 0.1494140625, "learning_rate": 0.0007579656302771432, "loss": 0.6935, "step": 10580 }, { "epoch": 0.5259759610608921, "grad_norm": 0.134765625, "learning_rate": 0.0007579258964934936, "loss": 0.7131, "step": 10590 }, { "epoch": 0.5264726333565114, "grad_norm": 0.1240234375, "learning_rate": 0.0007578861627098441, "loss": 0.6672, "step": 10600 }, { "epoch": 0.5269693056521307, "grad_norm": 0.12890625, "learning_rate": 0.0007578464289261945, "loss": 0.6699, "step": 10610 }, { "epoch": 0.5274659779477501, "grad_norm": 0.1279296875, "learning_rate": 0.000757806695142545, "loss": 0.6993, "step": 10620 }, { "epoch": 0.5279626502433694, "grad_norm": 0.12353515625, "learning_rate": 0.0007577669613588955, "loss": 0.6946, "step": 10630 }, { "epoch": 0.5284593225389888, "grad_norm": 0.1298828125, "learning_rate": 0.0007577272275752459, "loss": 0.6683, "step": 10640 }, { "epoch": 0.5289559948346081, "grad_norm": 0.1279296875, "learning_rate": 0.0007576874937915963, "loss": 0.7298, "step": 10650 }, { "epoch": 0.5294526671302274, "grad_norm": 0.1552734375, "learning_rate": 0.0007576477600079468, "loss": 0.6767, "step": 10660 }, { "epoch": 0.5299493394258469, "grad_norm": 0.166015625, "learning_rate": 0.0007576080262242973, "loss": 0.7152, "step": 10670 }, { "epoch": 0.5304460117214662, "grad_norm": 0.1279296875, "learning_rate": 0.0007575682924406477, "loss": 0.7154, "step": 10680 }, { "epoch": 0.5309426840170856, "grad_norm": 0.126953125, "learning_rate": 0.0007575285586569981, "loss": 0.6851, "step": 10690 }, { "epoch": 0.5314393563127049, "grad_norm": 0.1298828125, "learning_rate": 0.0007574888248733486, "loss": 0.7228, "step": 10700 }, { "epoch": 0.5319360286083242, "grad_norm": 0.13671875, "learning_rate": 0.000757449091089699, "loss": 0.6955, "step": 10710 }, { "epoch": 0.5324327009039436, "grad_norm": 0.11767578125, "learning_rate": 0.0007574093573060495, "loss": 0.6805, "step": 10720 }, { "epoch": 0.5329293731995629, "grad_norm": 0.1220703125, "learning_rate": 0.0007573696235224, "loss": 0.6945, "step": 10730 }, { "epoch": 0.5334260454951822, "grad_norm": 0.1474609375, "learning_rate": 0.0007573298897387504, "loss": 0.6913, "step": 10740 }, { "epoch": 0.5339227177908016, "grad_norm": 0.12451171875, "learning_rate": 0.0007572901559551008, "loss": 0.6735, "step": 10750 }, { "epoch": 0.534419390086421, "grad_norm": 0.13671875, "learning_rate": 0.0007572504221714513, "loss": 0.7134, "step": 10760 }, { "epoch": 0.5349160623820404, "grad_norm": 0.140625, "learning_rate": 0.0007572106883878018, "loss": 0.6831, "step": 10770 }, { "epoch": 0.5354127346776597, "grad_norm": 0.1533203125, "learning_rate": 0.0007571709546041522, "loss": 0.7085, "step": 10780 }, { "epoch": 0.535909406973279, "grad_norm": 0.12353515625, "learning_rate": 0.0007571312208205027, "loss": 0.6748, "step": 10790 }, { "epoch": 0.5364060792688984, "grad_norm": 0.12353515625, "learning_rate": 0.0007570914870368531, "loss": 0.7158, "step": 10800 }, { "epoch": 0.5369027515645177, "grad_norm": 0.1298828125, "learning_rate": 0.0007570517532532035, "loss": 0.7052, "step": 10810 }, { "epoch": 0.5373994238601371, "grad_norm": 0.130859375, "learning_rate": 0.0007570120194695541, "loss": 0.7261, "step": 10820 }, { "epoch": 0.5378960961557564, "grad_norm": 0.1259765625, "learning_rate": 0.0007569722856859045, "loss": 0.7455, "step": 10830 }, { "epoch": 0.5383927684513757, "grad_norm": 0.1240234375, "learning_rate": 0.0007569325519022549, "loss": 0.6688, "step": 10840 }, { "epoch": 0.5388894407469952, "grad_norm": 0.1318359375, "learning_rate": 0.0007568928181186054, "loss": 0.6693, "step": 10850 }, { "epoch": 0.5393861130426145, "grad_norm": 0.1162109375, "learning_rate": 0.0007568530843349558, "loss": 0.7121, "step": 10860 }, { "epoch": 0.5398827853382339, "grad_norm": 0.1181640625, "learning_rate": 0.0007568133505513063, "loss": 0.6963, "step": 10870 }, { "epoch": 0.5403794576338532, "grad_norm": 0.1376953125, "learning_rate": 0.0007567736167676568, "loss": 0.7068, "step": 10880 }, { "epoch": 0.5408761299294725, "grad_norm": 0.12255859375, "learning_rate": 0.0007567338829840072, "loss": 0.6676, "step": 10890 }, { "epoch": 0.5413728022250919, "grad_norm": 0.125, "learning_rate": 0.0007566941492003577, "loss": 0.6815, "step": 10900 }, { "epoch": 0.5418694745207112, "grad_norm": 0.1357421875, "learning_rate": 0.000756654415416708, "loss": 0.7308, "step": 10910 }, { "epoch": 0.5423661468163306, "grad_norm": 0.1318359375, "learning_rate": 0.0007566146816330586, "loss": 0.6691, "step": 10920 }, { "epoch": 0.5428628191119499, "grad_norm": 0.12451171875, "learning_rate": 0.0007565749478494091, "loss": 0.7001, "step": 10930 }, { "epoch": 0.5433594914075693, "grad_norm": 0.1396484375, "learning_rate": 0.0007565352140657594, "loss": 0.6703, "step": 10940 }, { "epoch": 0.5438561637031887, "grad_norm": 0.1318359375, "learning_rate": 0.0007564954802821099, "loss": 0.6909, "step": 10950 }, { "epoch": 0.544352835998808, "grad_norm": 0.1318359375, "learning_rate": 0.0007564557464984603, "loss": 0.6848, "step": 10960 }, { "epoch": 0.5448495082944274, "grad_norm": 0.1357421875, "learning_rate": 0.0007564160127148107, "loss": 0.6682, "step": 10970 }, { "epoch": 0.5453461805900467, "grad_norm": 0.15625, "learning_rate": 0.0007563762789311613, "loss": 0.7165, "step": 10980 }, { "epoch": 0.545842852885666, "grad_norm": 0.12890625, "learning_rate": 0.0007563365451475117, "loss": 0.6916, "step": 10990 }, { "epoch": 0.5463395251812854, "grad_norm": 0.13671875, "learning_rate": 0.0007562968113638621, "loss": 0.7004, "step": 11000 }, { "epoch": 0.5468361974769047, "grad_norm": 0.1650390625, "learning_rate": 0.0007562570775802126, "loss": 0.7482, "step": 11010 }, { "epoch": 0.547332869772524, "grad_norm": 0.115234375, "learning_rate": 0.000756217343796563, "loss": 0.6794, "step": 11020 }, { "epoch": 0.5478295420681435, "grad_norm": 0.1357421875, "learning_rate": 0.0007561776100129135, "loss": 0.7104, "step": 11030 }, { "epoch": 0.5483262143637628, "grad_norm": 0.11083984375, "learning_rate": 0.000756137876229264, "loss": 0.681, "step": 11040 }, { "epoch": 0.5488228866593822, "grad_norm": 0.12255859375, "learning_rate": 0.0007560981424456144, "loss": 0.6743, "step": 11050 }, { "epoch": 0.5493195589550015, "grad_norm": 0.12158203125, "learning_rate": 0.0007560584086619649, "loss": 0.7129, "step": 11060 }, { "epoch": 0.5498162312506208, "grad_norm": 0.1513671875, "learning_rate": 0.0007560186748783154, "loss": 0.7309, "step": 11070 }, { "epoch": 0.5503129035462402, "grad_norm": 0.1376953125, "learning_rate": 0.0007559789410946658, "loss": 0.6973, "step": 11080 }, { "epoch": 0.5508095758418595, "grad_norm": 0.134765625, "learning_rate": 0.0007559392073110163, "loss": 0.6736, "step": 11090 }, { "epoch": 0.5513062481374789, "grad_norm": 0.12158203125, "learning_rate": 0.0007558994735273666, "loss": 0.6728, "step": 11100 }, { "epoch": 0.5518029204330982, "grad_norm": 0.11865234375, "learning_rate": 0.0007558597397437171, "loss": 0.6845, "step": 11110 }, { "epoch": 0.5522995927287176, "grad_norm": 0.12158203125, "learning_rate": 0.0007558200059600677, "loss": 0.6922, "step": 11120 }, { "epoch": 0.552796265024337, "grad_norm": 0.1142578125, "learning_rate": 0.000755780272176418, "loss": 0.6936, "step": 11130 }, { "epoch": 0.5532929373199563, "grad_norm": 0.11669921875, "learning_rate": 0.0007557405383927685, "loss": 0.6968, "step": 11140 }, { "epoch": 0.5537896096155757, "grad_norm": 0.12109375, "learning_rate": 0.000755700804609119, "loss": 0.6866, "step": 11150 }, { "epoch": 0.554286281911195, "grad_norm": 0.1279296875, "learning_rate": 0.0007556610708254693, "loss": 0.6958, "step": 11160 }, { "epoch": 0.5547829542068143, "grad_norm": 0.130859375, "learning_rate": 0.0007556213370418198, "loss": 0.6782, "step": 11170 }, { "epoch": 0.5552796265024337, "grad_norm": 0.130859375, "learning_rate": 0.0007555816032581703, "loss": 0.6872, "step": 11180 }, { "epoch": 0.555776298798053, "grad_norm": 0.1279296875, "learning_rate": 0.0007555418694745208, "loss": 0.6499, "step": 11190 }, { "epoch": 0.5562729710936724, "grad_norm": 0.1298828125, "learning_rate": 0.0007555021356908712, "loss": 0.6933, "step": 11200 }, { "epoch": 0.5567696433892917, "grad_norm": 0.12451171875, "learning_rate": 0.0007554624019072216, "loss": 0.7007, "step": 11210 }, { "epoch": 0.5572663156849111, "grad_norm": 0.1279296875, "learning_rate": 0.0007554226681235722, "loss": 0.7212, "step": 11220 }, { "epoch": 0.5577629879805305, "grad_norm": 0.1240234375, "learning_rate": 0.0007553829343399226, "loss": 0.6869, "step": 11230 }, { "epoch": 0.5582596602761498, "grad_norm": 0.12353515625, "learning_rate": 0.000755343200556273, "loss": 0.7108, "step": 11240 }, { "epoch": 0.5587563325717692, "grad_norm": 0.126953125, "learning_rate": 0.0007553034667726235, "loss": 0.7133, "step": 11250 }, { "epoch": 0.5592530048673885, "grad_norm": 0.125, "learning_rate": 0.0007552637329889739, "loss": 0.6634, "step": 11260 }, { "epoch": 0.5597496771630078, "grad_norm": 0.1201171875, "learning_rate": 0.0007552239992053243, "loss": 0.7046, "step": 11270 }, { "epoch": 0.5602463494586272, "grad_norm": 0.126953125, "learning_rate": 0.0007551842654216749, "loss": 0.6988, "step": 11280 }, { "epoch": 0.5607430217542465, "grad_norm": 0.1279296875, "learning_rate": 0.0007551445316380252, "loss": 0.681, "step": 11290 }, { "epoch": 0.5612396940498658, "grad_norm": 0.12890625, "learning_rate": 0.0007551047978543757, "loss": 0.7067, "step": 11300 }, { "epoch": 0.5617363663454853, "grad_norm": 0.11328125, "learning_rate": 0.0007550650640707262, "loss": 0.6846, "step": 11310 }, { "epoch": 0.5622330386411046, "grad_norm": 0.11083984375, "learning_rate": 0.0007550253302870765, "loss": 0.6974, "step": 11320 }, { "epoch": 0.562729710936724, "grad_norm": 0.1337890625, "learning_rate": 0.0007549855965034271, "loss": 0.6877, "step": 11330 }, { "epoch": 0.5632263832323433, "grad_norm": 0.10498046875, "learning_rate": 0.0007549458627197776, "loss": 0.6674, "step": 11340 }, { "epoch": 0.5637230555279626, "grad_norm": 0.12255859375, "learning_rate": 0.000754906128936128, "loss": 0.6544, "step": 11350 }, { "epoch": 0.564219727823582, "grad_norm": 0.123046875, "learning_rate": 0.0007548663951524784, "loss": 0.6775, "step": 11360 }, { "epoch": 0.5647164001192013, "grad_norm": 0.1201171875, "learning_rate": 0.0007548266613688288, "loss": 0.6838, "step": 11370 }, { "epoch": 0.5652130724148207, "grad_norm": 0.14453125, "learning_rate": 0.0007547869275851794, "loss": 0.6775, "step": 11380 }, { "epoch": 0.56570974471044, "grad_norm": 0.11181640625, "learning_rate": 0.0007547471938015298, "loss": 0.6907, "step": 11390 }, { "epoch": 0.5662064170060594, "grad_norm": 0.119140625, "learning_rate": 0.0007547074600178802, "loss": 0.669, "step": 11400 }, { "epoch": 0.5667030893016788, "grad_norm": 0.11865234375, "learning_rate": 0.0007546677262342307, "loss": 0.681, "step": 11410 }, { "epoch": 0.5671997615972981, "grad_norm": 0.11474609375, "learning_rate": 0.0007546279924505811, "loss": 0.685, "step": 11420 }, { "epoch": 0.5676964338929175, "grad_norm": 0.1259765625, "learning_rate": 0.0007545882586669316, "loss": 0.7101, "step": 11430 }, { "epoch": 0.5681931061885368, "grad_norm": 0.11376953125, "learning_rate": 0.0007545485248832821, "loss": 0.7297, "step": 11440 }, { "epoch": 0.5686897784841561, "grad_norm": 0.10986328125, "learning_rate": 0.0007545087910996325, "loss": 0.7116, "step": 11450 }, { "epoch": 0.5691864507797755, "grad_norm": 0.123046875, "learning_rate": 0.0007544690573159829, "loss": 0.6823, "step": 11460 }, { "epoch": 0.5696831230753948, "grad_norm": 0.1201171875, "learning_rate": 0.0007544293235323334, "loss": 0.7039, "step": 11470 }, { "epoch": 0.5701797953710142, "grad_norm": 0.111328125, "learning_rate": 0.0007543895897486839, "loss": 0.6877, "step": 11480 }, { "epoch": 0.5706764676666336, "grad_norm": 0.1279296875, "learning_rate": 0.0007543498559650343, "loss": 0.7188, "step": 11490 }, { "epoch": 0.5711731399622529, "grad_norm": 0.1162109375, "learning_rate": 0.0007543101221813848, "loss": 0.6998, "step": 11500 }, { "epoch": 0.5716698122578723, "grad_norm": 0.1689453125, "learning_rate": 0.0007542703883977352, "loss": 0.6943, "step": 11510 }, { "epoch": 0.5721664845534916, "grad_norm": 0.12353515625, "learning_rate": 0.0007542306546140856, "loss": 0.6909, "step": 11520 }, { "epoch": 0.572663156849111, "grad_norm": 0.11669921875, "learning_rate": 0.0007541909208304362, "loss": 0.6551, "step": 11530 }, { "epoch": 0.5731598291447303, "grad_norm": 0.15234375, "learning_rate": 0.0007541511870467866, "loss": 0.6907, "step": 11540 }, { "epoch": 0.5736565014403496, "grad_norm": 0.12353515625, "learning_rate": 0.000754111453263137, "loss": 0.6967, "step": 11550 }, { "epoch": 0.574153173735969, "grad_norm": 0.11865234375, "learning_rate": 0.0007540717194794874, "loss": 0.6866, "step": 11560 }, { "epoch": 0.5746498460315883, "grad_norm": 0.1162109375, "learning_rate": 0.0007540319856958379, "loss": 0.6793, "step": 11570 }, { "epoch": 0.5751465183272078, "grad_norm": 0.1240234375, "learning_rate": 0.0007539922519121884, "loss": 0.71, "step": 11580 }, { "epoch": 0.5756431906228271, "grad_norm": 0.12158203125, "learning_rate": 0.0007539525181285388, "loss": 0.6951, "step": 11590 }, { "epoch": 0.5761398629184464, "grad_norm": 0.1201171875, "learning_rate": 0.0007539127843448893, "loss": 0.6879, "step": 11600 }, { "epoch": 0.5766365352140658, "grad_norm": 0.11962890625, "learning_rate": 0.0007538730505612397, "loss": 0.6827, "step": 11610 }, { "epoch": 0.5771332075096851, "grad_norm": 0.11474609375, "learning_rate": 0.0007538333167775901, "loss": 0.666, "step": 11620 }, { "epoch": 0.5776298798053044, "grad_norm": 0.11572265625, "learning_rate": 0.0007537935829939407, "loss": 0.6948, "step": 11630 }, { "epoch": 0.5781265521009238, "grad_norm": 0.10986328125, "learning_rate": 0.0007537538492102911, "loss": 0.6813, "step": 11640 }, { "epoch": 0.5786232243965431, "grad_norm": 0.1181640625, "learning_rate": 0.0007537141154266415, "loss": 0.6757, "step": 11650 }, { "epoch": 0.5791198966921625, "grad_norm": 0.11865234375, "learning_rate": 0.000753674381642992, "loss": 0.7123, "step": 11660 }, { "epoch": 0.5796165689877819, "grad_norm": 0.1328125, "learning_rate": 0.0007536346478593424, "loss": 0.7048, "step": 11670 }, { "epoch": 0.5801132412834012, "grad_norm": 0.11181640625, "learning_rate": 0.0007535949140756929, "loss": 0.6927, "step": 11680 }, { "epoch": 0.5806099135790206, "grad_norm": 0.119140625, "learning_rate": 0.0007535551802920434, "loss": 0.6679, "step": 11690 }, { "epoch": 0.5811065858746399, "grad_norm": 0.1201171875, "learning_rate": 0.0007535154465083938, "loss": 0.729, "step": 11700 }, { "epoch": 0.5816032581702593, "grad_norm": 0.111328125, "learning_rate": 0.0007534757127247442, "loss": 0.7099, "step": 11710 }, { "epoch": 0.5820999304658786, "grad_norm": 0.1240234375, "learning_rate": 0.0007534359789410947, "loss": 0.6524, "step": 11720 }, { "epoch": 0.5825966027614979, "grad_norm": 0.1064453125, "learning_rate": 0.0007533962451574452, "loss": 0.7253, "step": 11730 }, { "epoch": 0.5830932750571173, "grad_norm": 0.11669921875, "learning_rate": 0.0007533565113737956, "loss": 0.6756, "step": 11740 }, { "epoch": 0.5835899473527366, "grad_norm": 0.1298828125, "learning_rate": 0.0007533167775901461, "loss": 0.6726, "step": 11750 }, { "epoch": 0.5840866196483561, "grad_norm": 0.1142578125, "learning_rate": 0.0007532770438064965, "loss": 0.6854, "step": 11760 }, { "epoch": 0.5845832919439754, "grad_norm": 0.1181640625, "learning_rate": 0.0007532373100228469, "loss": 0.6881, "step": 11770 }, { "epoch": 0.5850799642395947, "grad_norm": 0.1201171875, "learning_rate": 0.0007531975762391974, "loss": 0.7094, "step": 11780 }, { "epoch": 0.5855766365352141, "grad_norm": 0.1123046875, "learning_rate": 0.0007531578424555479, "loss": 0.6962, "step": 11790 }, { "epoch": 0.5860733088308334, "grad_norm": 0.10595703125, "learning_rate": 0.0007531181086718984, "loss": 0.7148, "step": 11800 }, { "epoch": 0.5865699811264528, "grad_norm": 0.1259765625, "learning_rate": 0.0007530783748882487, "loss": 0.6397, "step": 11810 }, { "epoch": 0.5870666534220721, "grad_norm": 0.154296875, "learning_rate": 0.0007530386411045992, "loss": 0.6775, "step": 11820 }, { "epoch": 0.5875633257176914, "grad_norm": 0.1220703125, "learning_rate": 0.0007529989073209497, "loss": 0.7079, "step": 11830 }, { "epoch": 0.5880599980133108, "grad_norm": 0.134765625, "learning_rate": 0.0007529591735373001, "loss": 0.7033, "step": 11840 }, { "epoch": 0.5885566703089302, "grad_norm": 0.14453125, "learning_rate": 0.0007529194397536506, "loss": 0.6891, "step": 11850 }, { "epoch": 0.5890533426045496, "grad_norm": 0.1328125, "learning_rate": 0.000752879705970001, "loss": 0.6991, "step": 11860 }, { "epoch": 0.5895500149001689, "grad_norm": 0.126953125, "learning_rate": 0.0007528399721863514, "loss": 0.7138, "step": 11870 }, { "epoch": 0.5900466871957882, "grad_norm": 0.12353515625, "learning_rate": 0.000752800238402702, "loss": 0.694, "step": 11880 }, { "epoch": 0.5905433594914076, "grad_norm": 0.115234375, "learning_rate": 0.0007527605046190524, "loss": 0.6693, "step": 11890 }, { "epoch": 0.5910400317870269, "grad_norm": 0.12451171875, "learning_rate": 0.0007527207708354028, "loss": 0.6626, "step": 11900 }, { "epoch": 0.5915367040826462, "grad_norm": 0.1123046875, "learning_rate": 0.0007526810370517533, "loss": 0.6696, "step": 11910 }, { "epoch": 0.5920333763782656, "grad_norm": 0.138671875, "learning_rate": 0.0007526413032681037, "loss": 0.7056, "step": 11920 }, { "epoch": 0.5925300486738849, "grad_norm": 0.15234375, "learning_rate": 0.0007526015694844542, "loss": 0.7229, "step": 11930 }, { "epoch": 0.5930267209695044, "grad_norm": 0.130859375, "learning_rate": 0.0007525618357008047, "loss": 0.6533, "step": 11940 }, { "epoch": 0.5935233932651237, "grad_norm": 0.119140625, "learning_rate": 0.0007525221019171551, "loss": 0.6976, "step": 11950 }, { "epoch": 0.594020065560743, "grad_norm": 0.12353515625, "learning_rate": 0.0007524823681335056, "loss": 0.6907, "step": 11960 }, { "epoch": 0.5945167378563624, "grad_norm": 0.12109375, "learning_rate": 0.0007524426343498559, "loss": 0.6898, "step": 11970 }, { "epoch": 0.5950134101519817, "grad_norm": 0.12158203125, "learning_rate": 0.0007524029005662065, "loss": 0.6755, "step": 11980 }, { "epoch": 0.5955100824476011, "grad_norm": 0.1162109375, "learning_rate": 0.000752363166782557, "loss": 0.7117, "step": 11990 }, { "epoch": 0.5960067547432204, "grad_norm": 0.12109375, "learning_rate": 0.0007523234329989073, "loss": 0.6984, "step": 12000 }, { "epoch": 0.5965034270388397, "grad_norm": 0.15234375, "learning_rate": 0.0007522836992152578, "loss": 0.693, "step": 12010 }, { "epoch": 0.5970000993344591, "grad_norm": 0.11572265625, "learning_rate": 0.0007522439654316083, "loss": 0.6968, "step": 12020 }, { "epoch": 0.5974967716300785, "grad_norm": 0.11181640625, "learning_rate": 0.0007522042316479587, "loss": 0.726, "step": 12030 }, { "epoch": 0.5979934439256979, "grad_norm": 0.109375, "learning_rate": 0.0007521644978643092, "loss": 0.6597, "step": 12040 }, { "epoch": 0.5984901162213172, "grad_norm": 0.1396484375, "learning_rate": 0.0007521247640806596, "loss": 0.6991, "step": 12050 }, { "epoch": 0.5989867885169365, "grad_norm": 0.11767578125, "learning_rate": 0.00075208503029701, "loss": 0.69, "step": 12060 }, { "epoch": 0.5994834608125559, "grad_norm": 0.1357421875, "learning_rate": 0.0007520452965133605, "loss": 0.6338, "step": 12070 }, { "epoch": 0.5999801331081752, "grad_norm": 0.123046875, "learning_rate": 0.000752005562729711, "loss": 0.6749, "step": 12080 }, { "epoch": 0.6004768054037946, "grad_norm": 0.109375, "learning_rate": 0.0007519658289460615, "loss": 0.6509, "step": 12090 }, { "epoch": 0.6009734776994139, "grad_norm": 0.1279296875, "learning_rate": 0.0007519260951624119, "loss": 0.6835, "step": 12100 }, { "epoch": 0.6014701499950332, "grad_norm": 0.11083984375, "learning_rate": 0.0007518863613787623, "loss": 0.6922, "step": 12110 }, { "epoch": 0.6019668222906527, "grad_norm": 0.1220703125, "learning_rate": 0.0007518466275951128, "loss": 0.6965, "step": 12120 }, { "epoch": 0.602463494586272, "grad_norm": 0.11865234375, "learning_rate": 0.0007518068938114633, "loss": 0.6965, "step": 12130 }, { "epoch": 0.6029601668818914, "grad_norm": 0.1435546875, "learning_rate": 0.0007517671600278137, "loss": 0.6884, "step": 12140 }, { "epoch": 0.6034568391775107, "grad_norm": 0.11572265625, "learning_rate": 0.0007517274262441642, "loss": 0.6822, "step": 12150 }, { "epoch": 0.60395351147313, "grad_norm": 0.11669921875, "learning_rate": 0.0007516876924605145, "loss": 0.6637, "step": 12160 }, { "epoch": 0.6044501837687494, "grad_norm": 0.11962890625, "learning_rate": 0.000751647958676865, "loss": 0.6784, "step": 12170 }, { "epoch": 0.6049468560643687, "grad_norm": 0.1181640625, "learning_rate": 0.0007516082248932156, "loss": 0.6741, "step": 12180 }, { "epoch": 0.605443528359988, "grad_norm": 0.11962890625, "learning_rate": 0.0007515684911095659, "loss": 0.6529, "step": 12190 }, { "epoch": 0.6059402006556074, "grad_norm": 0.11279296875, "learning_rate": 0.0007515287573259164, "loss": 0.6806, "step": 12200 }, { "epoch": 0.6064368729512268, "grad_norm": 0.111328125, "learning_rate": 0.0007514890235422669, "loss": 0.6758, "step": 12210 }, { "epoch": 0.6069335452468462, "grad_norm": 0.1357421875, "learning_rate": 0.0007514492897586172, "loss": 0.7012, "step": 12220 }, { "epoch": 0.6074302175424655, "grad_norm": 0.1259765625, "learning_rate": 0.0007514095559749678, "loss": 0.7264, "step": 12230 }, { "epoch": 0.6079268898380848, "grad_norm": 0.1240234375, "learning_rate": 0.0007513698221913182, "loss": 0.6513, "step": 12240 }, { "epoch": 0.6084235621337042, "grad_norm": 0.11279296875, "learning_rate": 0.0007513300884076687, "loss": 0.6646, "step": 12250 }, { "epoch": 0.6089202344293235, "grad_norm": 0.11865234375, "learning_rate": 0.0007512903546240191, "loss": 0.6786, "step": 12260 }, { "epoch": 0.6094169067249429, "grad_norm": 0.1259765625, "learning_rate": 0.0007512506208403695, "loss": 0.6541, "step": 12270 }, { "epoch": 0.6099135790205622, "grad_norm": 0.11767578125, "learning_rate": 0.0007512108870567201, "loss": 0.6807, "step": 12280 }, { "epoch": 0.6104102513161815, "grad_norm": 0.12890625, "learning_rate": 0.0007511711532730705, "loss": 0.6737, "step": 12290 }, { "epoch": 0.6109069236118009, "grad_norm": 0.11572265625, "learning_rate": 0.0007511314194894209, "loss": 0.6765, "step": 12300 }, { "epoch": 0.6114035959074203, "grad_norm": 0.10888671875, "learning_rate": 0.0007510916857057714, "loss": 0.7203, "step": 12310 }, { "epoch": 0.6119002682030397, "grad_norm": 0.12255859375, "learning_rate": 0.0007510519519221218, "loss": 0.665, "step": 12320 }, { "epoch": 0.612396940498659, "grad_norm": 0.1181640625, "learning_rate": 0.0007510122181384722, "loss": 0.6752, "step": 12330 }, { "epoch": 0.6128936127942783, "grad_norm": 0.140625, "learning_rate": 0.0007509724843548228, "loss": 0.6806, "step": 12340 }, { "epoch": 0.6133902850898977, "grad_norm": 0.11181640625, "learning_rate": 0.0007509327505711731, "loss": 0.6697, "step": 12350 }, { "epoch": 0.613886957385517, "grad_norm": 0.11767578125, "learning_rate": 0.0007508930167875236, "loss": 0.6807, "step": 12360 }, { "epoch": 0.6143836296811364, "grad_norm": 0.1103515625, "learning_rate": 0.0007508532830038741, "loss": 0.6599, "step": 12370 }, { "epoch": 0.6148803019767557, "grad_norm": 0.125, "learning_rate": 0.0007508135492202244, "loss": 0.6782, "step": 12380 }, { "epoch": 0.615376974272375, "grad_norm": 0.126953125, "learning_rate": 0.000750773815436575, "loss": 0.6866, "step": 12390 }, { "epoch": 0.6158736465679945, "grad_norm": 0.10986328125, "learning_rate": 0.0007507340816529255, "loss": 0.6832, "step": 12400 }, { "epoch": 0.6163703188636138, "grad_norm": 0.10791015625, "learning_rate": 0.0007506943478692759, "loss": 0.6741, "step": 12410 }, { "epoch": 0.6168669911592332, "grad_norm": 0.1025390625, "learning_rate": 0.0007506546140856263, "loss": 0.6638, "step": 12420 }, { "epoch": 0.6173636634548525, "grad_norm": 0.134765625, "learning_rate": 0.0007506148803019767, "loss": 0.6734, "step": 12430 }, { "epoch": 0.6178603357504718, "grad_norm": 0.1162109375, "learning_rate": 0.0007505751465183273, "loss": 0.6718, "step": 12440 }, { "epoch": 0.6183570080460912, "grad_norm": 0.1279296875, "learning_rate": 0.0007505354127346777, "loss": 0.7069, "step": 12450 }, { "epoch": 0.6188536803417105, "grad_norm": 0.10986328125, "learning_rate": 0.0007504956789510281, "loss": 0.7089, "step": 12460 }, { "epoch": 0.6193503526373298, "grad_norm": 0.11767578125, "learning_rate": 0.0007504559451673786, "loss": 0.6918, "step": 12470 }, { "epoch": 0.6198470249329492, "grad_norm": 0.1591796875, "learning_rate": 0.000750416211383729, "loss": 0.695, "step": 12480 }, { "epoch": 0.6203436972285686, "grad_norm": 0.134765625, "learning_rate": 0.0007503764776000795, "loss": 0.6542, "step": 12490 }, { "epoch": 0.620840369524188, "grad_norm": 0.14453125, "learning_rate": 0.00075033674381643, "loss": 0.7126, "step": 12500 }, { "epoch": 0.6213370418198073, "grad_norm": 0.1171875, "learning_rate": 0.0007502970100327804, "loss": 0.6595, "step": 12510 }, { "epoch": 0.6218337141154267, "grad_norm": 0.11279296875, "learning_rate": 0.0007502572762491308, "loss": 0.6853, "step": 12520 }, { "epoch": 0.622330386411046, "grad_norm": 0.11376953125, "learning_rate": 0.0007502175424654814, "loss": 0.6536, "step": 12530 }, { "epoch": 0.6228270587066653, "grad_norm": 0.1123046875, "learning_rate": 0.0007501778086818318, "loss": 0.6682, "step": 12540 }, { "epoch": 0.6233237310022847, "grad_norm": 0.11181640625, "learning_rate": 0.0007501380748981822, "loss": 0.6557, "step": 12550 }, { "epoch": 0.623820403297904, "grad_norm": 0.10791015625, "learning_rate": 0.0007500983411145327, "loss": 0.6871, "step": 12560 }, { "epoch": 0.6243170755935233, "grad_norm": 0.12451171875, "learning_rate": 0.0007500586073308831, "loss": 0.6744, "step": 12570 }, { "epoch": 0.6248137478891428, "grad_norm": 0.10498046875, "learning_rate": 0.0007500188735472335, "loss": 0.6408, "step": 12580 }, { "epoch": 0.6253104201847621, "grad_norm": 0.1064453125, "learning_rate": 0.0007499791397635841, "loss": 0.6815, "step": 12590 }, { "epoch": 0.6258070924803815, "grad_norm": 0.10693359375, "learning_rate": 0.0007499394059799345, "loss": 0.6647, "step": 12600 }, { "epoch": 0.6263037647760008, "grad_norm": 0.12255859375, "learning_rate": 0.0007498996721962849, "loss": 0.666, "step": 12610 }, { "epoch": 0.6268004370716201, "grad_norm": 0.10400390625, "learning_rate": 0.0007498599384126353, "loss": 0.6886, "step": 12620 }, { "epoch": 0.6272971093672395, "grad_norm": 0.1083984375, "learning_rate": 0.0007498202046289858, "loss": 0.7055, "step": 12630 }, { "epoch": 0.6277937816628588, "grad_norm": 0.12109375, "learning_rate": 0.0007497804708453363, "loss": 0.6681, "step": 12640 }, { "epoch": 0.6282904539584782, "grad_norm": 0.11767578125, "learning_rate": 0.0007497407370616867, "loss": 0.6827, "step": 12650 }, { "epoch": 0.6287871262540975, "grad_norm": 0.1220703125, "learning_rate": 0.0007497010032780372, "loss": 0.6637, "step": 12660 }, { "epoch": 0.6292837985497169, "grad_norm": 0.11083984375, "learning_rate": 0.0007496612694943876, "loss": 0.6761, "step": 12670 }, { "epoch": 0.6297804708453363, "grad_norm": 0.1162109375, "learning_rate": 0.000749621535710738, "loss": 0.6713, "step": 12680 }, { "epoch": 0.6302771431409556, "grad_norm": 0.1103515625, "learning_rate": 0.0007495818019270886, "loss": 0.6797, "step": 12690 }, { "epoch": 0.630773815436575, "grad_norm": 0.11474609375, "learning_rate": 0.000749542068143439, "loss": 0.6841, "step": 12700 }, { "epoch": 0.6312704877321943, "grad_norm": 0.1064453125, "learning_rate": 0.0007495023343597894, "loss": 0.6869, "step": 12710 }, { "epoch": 0.6317671600278136, "grad_norm": 0.140625, "learning_rate": 0.0007494626005761399, "loss": 0.6889, "step": 12720 }, { "epoch": 0.632263832323433, "grad_norm": 0.10498046875, "learning_rate": 0.0007494228667924903, "loss": 0.7024, "step": 12730 }, { "epoch": 0.6327605046190523, "grad_norm": 0.11279296875, "learning_rate": 0.0007493831330088408, "loss": 0.6904, "step": 12740 }, { "epoch": 0.6332571769146716, "grad_norm": 0.1337890625, "learning_rate": 0.0007493433992251913, "loss": 0.6811, "step": 12750 }, { "epoch": 0.6337538492102911, "grad_norm": 0.11083984375, "learning_rate": 0.0007493036654415417, "loss": 0.6713, "step": 12760 }, { "epoch": 0.6342505215059104, "grad_norm": 0.11376953125, "learning_rate": 0.0007492639316578921, "loss": 0.6732, "step": 12770 }, { "epoch": 0.6347471938015298, "grad_norm": 0.1298828125, "learning_rate": 0.0007492241978742426, "loss": 0.6961, "step": 12780 }, { "epoch": 0.6352438660971491, "grad_norm": 0.1103515625, "learning_rate": 0.0007491844640905931, "loss": 0.672, "step": 12790 }, { "epoch": 0.6357405383927685, "grad_norm": 0.1171875, "learning_rate": 0.0007491447303069435, "loss": 0.7121, "step": 12800 }, { "epoch": 0.6362372106883878, "grad_norm": 0.10400390625, "learning_rate": 0.000749104996523294, "loss": 0.6957, "step": 12810 }, { "epoch": 0.6367338829840071, "grad_norm": 0.11767578125, "learning_rate": 0.0007490652627396444, "loss": 0.6893, "step": 12820 }, { "epoch": 0.6372305552796265, "grad_norm": 0.11474609375, "learning_rate": 0.0007490255289559948, "loss": 0.6501, "step": 12830 }, { "epoch": 0.6377272275752458, "grad_norm": 0.10986328125, "learning_rate": 0.0007489857951723453, "loss": 0.6912, "step": 12840 }, { "epoch": 0.6382238998708653, "grad_norm": 0.1181640625, "learning_rate": 0.0007489460613886958, "loss": 0.6945, "step": 12850 }, { "epoch": 0.6387205721664846, "grad_norm": 0.130859375, "learning_rate": 0.0007489063276050463, "loss": 0.66, "step": 12860 }, { "epoch": 0.6392172444621039, "grad_norm": 0.1171875, "learning_rate": 0.0007488665938213966, "loss": 0.7147, "step": 12870 }, { "epoch": 0.6397139167577233, "grad_norm": 0.1123046875, "learning_rate": 0.0007488268600377471, "loss": 0.668, "step": 12880 }, { "epoch": 0.6402105890533426, "grad_norm": 0.10791015625, "learning_rate": 0.0007487871262540976, "loss": 0.6798, "step": 12890 }, { "epoch": 0.6407072613489619, "grad_norm": 0.11767578125, "learning_rate": 0.000748747392470448, "loss": 0.6702, "step": 12900 }, { "epoch": 0.6412039336445813, "grad_norm": 0.11669921875, "learning_rate": 0.0007487076586867985, "loss": 0.661, "step": 12910 }, { "epoch": 0.6417006059402006, "grad_norm": 0.1220703125, "learning_rate": 0.0007486679249031489, "loss": 0.6679, "step": 12920 }, { "epoch": 0.64219727823582, "grad_norm": 0.1044921875, "learning_rate": 0.0007486281911194993, "loss": 0.659, "step": 12930 }, { "epoch": 0.6426939505314394, "grad_norm": 0.10205078125, "learning_rate": 0.0007485884573358499, "loss": 0.6966, "step": 12940 }, { "epoch": 0.6431906228270587, "grad_norm": 0.1328125, "learning_rate": 0.0007485487235522003, "loss": 0.7063, "step": 12950 }, { "epoch": 0.6436872951226781, "grad_norm": 0.111328125, "learning_rate": 0.0007485089897685507, "loss": 0.6778, "step": 12960 }, { "epoch": 0.6441839674182974, "grad_norm": 0.1044921875, "learning_rate": 0.0007484692559849012, "loss": 0.7112, "step": 12970 }, { "epoch": 0.6446806397139168, "grad_norm": 0.1064453125, "learning_rate": 0.0007484295222012516, "loss": 0.7005, "step": 12980 }, { "epoch": 0.6451773120095361, "grad_norm": 0.10986328125, "learning_rate": 0.0007483897884176022, "loss": 0.657, "step": 12990 }, { "epoch": 0.6456739843051554, "grad_norm": 0.140625, "learning_rate": 0.0007483500546339526, "loss": 0.6676, "step": 13000 }, { "epoch": 0.6461706566007748, "grad_norm": 0.1123046875, "learning_rate": 0.000748310320850303, "loss": 0.6943, "step": 13010 }, { "epoch": 0.6466673288963941, "grad_norm": 0.1103515625, "learning_rate": 0.0007482705870666535, "loss": 0.6727, "step": 13020 }, { "epoch": 0.6471640011920136, "grad_norm": 0.1220703125, "learning_rate": 0.0007482308532830038, "loss": 0.6818, "step": 13030 }, { "epoch": 0.6476606734876329, "grad_norm": 0.11083984375, "learning_rate": 0.0007481911194993544, "loss": 0.6715, "step": 13040 }, { "epoch": 0.6481573457832522, "grad_norm": 0.107421875, "learning_rate": 0.0007481513857157049, "loss": 0.6993, "step": 13050 }, { "epoch": 0.6486540180788716, "grad_norm": 0.11474609375, "learning_rate": 0.0007481116519320552, "loss": 0.6996, "step": 13060 }, { "epoch": 0.6491506903744909, "grad_norm": 0.154296875, "learning_rate": 0.0007480719181484057, "loss": 0.6714, "step": 13070 }, { "epoch": 0.6496473626701103, "grad_norm": 0.11279296875, "learning_rate": 0.0007480321843647562, "loss": 0.702, "step": 13080 }, { "epoch": 0.6501440349657296, "grad_norm": 0.11328125, "learning_rate": 0.0007479924505811066, "loss": 0.6975, "step": 13090 }, { "epoch": 0.6506407072613489, "grad_norm": 0.12255859375, "learning_rate": 0.0007479527167974571, "loss": 0.7115, "step": 13100 }, { "epoch": 0.6511373795569683, "grad_norm": 0.107421875, "learning_rate": 0.0007479129830138075, "loss": 0.7013, "step": 13110 }, { "epoch": 0.6516340518525877, "grad_norm": 0.1162109375, "learning_rate": 0.0007478732492301579, "loss": 0.6469, "step": 13120 }, { "epoch": 0.652130724148207, "grad_norm": 0.1484375, "learning_rate": 0.0007478335154465084, "loss": 0.7094, "step": 13130 }, { "epoch": 0.6526273964438264, "grad_norm": 0.134765625, "learning_rate": 0.0007477937816628589, "loss": 0.6609, "step": 13140 }, { "epoch": 0.6531240687394457, "grad_norm": 0.11328125, "learning_rate": 0.0007477540478792094, "loss": 0.684, "step": 13150 }, { "epoch": 0.6536207410350651, "grad_norm": 0.10888671875, "learning_rate": 0.0007477143140955598, "loss": 0.6833, "step": 13160 }, { "epoch": 0.6541174133306844, "grad_norm": 0.11669921875, "learning_rate": 0.0007476745803119102, "loss": 0.6783, "step": 13170 }, { "epoch": 0.6546140856263037, "grad_norm": 0.11083984375, "learning_rate": 0.0007476348465282607, "loss": 0.6597, "step": 13180 }, { "epoch": 0.6551107579219231, "grad_norm": 0.123046875, "learning_rate": 0.0007475951127446112, "loss": 0.6951, "step": 13190 }, { "epoch": 0.6556074302175424, "grad_norm": 0.1240234375, "learning_rate": 0.0007475553789609616, "loss": 0.6596, "step": 13200 }, { "epoch": 0.6561041025131619, "grad_norm": 0.12353515625, "learning_rate": 0.0007475156451773121, "loss": 0.6579, "step": 13210 }, { "epoch": 0.6566007748087812, "grad_norm": 0.1142578125, "learning_rate": 0.0007474759113936624, "loss": 0.669, "step": 13220 }, { "epoch": 0.6570974471044005, "grad_norm": 0.1181640625, "learning_rate": 0.0007474361776100129, "loss": 0.6524, "step": 13230 }, { "epoch": 0.6575941194000199, "grad_norm": 0.10986328125, "learning_rate": 0.0007473964438263635, "loss": 0.6781, "step": 13240 }, { "epoch": 0.6580907916956392, "grad_norm": 0.111328125, "learning_rate": 0.0007473567100427138, "loss": 0.6868, "step": 13250 }, { "epoch": 0.6585874639912586, "grad_norm": 0.1123046875, "learning_rate": 0.0007473169762590643, "loss": 0.6969, "step": 13260 }, { "epoch": 0.6590841362868779, "grad_norm": 0.10986328125, "learning_rate": 0.0007472772424754148, "loss": 0.7009, "step": 13270 }, { "epoch": 0.6595808085824972, "grad_norm": 0.109375, "learning_rate": 0.0007472375086917652, "loss": 0.681, "step": 13280 }, { "epoch": 0.6600774808781166, "grad_norm": 0.1015625, "learning_rate": 0.0007471977749081157, "loss": 0.6342, "step": 13290 }, { "epoch": 0.660574153173736, "grad_norm": 0.111328125, "learning_rate": 0.0007471580411244661, "loss": 0.6394, "step": 13300 }, { "epoch": 0.6610708254693554, "grad_norm": 0.11279296875, "learning_rate": 0.0007471183073408166, "loss": 0.7037, "step": 13310 }, { "epoch": 0.6615674977649747, "grad_norm": 0.12109375, "learning_rate": 0.000747078573557167, "loss": 0.6478, "step": 13320 }, { "epoch": 0.662064170060594, "grad_norm": 0.11865234375, "learning_rate": 0.0007470388397735174, "loss": 0.6727, "step": 13330 }, { "epoch": 0.6625608423562134, "grad_norm": 0.123046875, "learning_rate": 0.000746999105989868, "loss": 0.6829, "step": 13340 }, { "epoch": 0.6630575146518327, "grad_norm": 0.125, "learning_rate": 0.0007469593722062184, "loss": 0.6971, "step": 13350 }, { "epoch": 0.663554186947452, "grad_norm": 0.10791015625, "learning_rate": 0.0007469196384225688, "loss": 0.6622, "step": 13360 }, { "epoch": 0.6640508592430714, "grad_norm": 0.10546875, "learning_rate": 0.0007468799046389193, "loss": 0.6789, "step": 13370 }, { "epoch": 0.6645475315386907, "grad_norm": 0.1162109375, "learning_rate": 0.0007468401708552697, "loss": 0.6986, "step": 13380 }, { "epoch": 0.6650442038343102, "grad_norm": 0.1142578125, "learning_rate": 0.0007468004370716202, "loss": 0.7204, "step": 13390 }, { "epoch": 0.6655408761299295, "grad_norm": 0.111328125, "learning_rate": 0.0007467607032879707, "loss": 0.6522, "step": 13400 }, { "epoch": 0.6660375484255489, "grad_norm": 0.1083984375, "learning_rate": 0.0007467209695043211, "loss": 0.6546, "step": 13410 }, { "epoch": 0.6665342207211682, "grad_norm": 0.10693359375, "learning_rate": 0.0007466812357206715, "loss": 0.6535, "step": 13420 }, { "epoch": 0.6670308930167875, "grad_norm": 0.1123046875, "learning_rate": 0.000746641501937022, "loss": 0.6729, "step": 13430 }, { "epoch": 0.6675275653124069, "grad_norm": 0.11279296875, "learning_rate": 0.0007466017681533725, "loss": 0.6886, "step": 13440 }, { "epoch": 0.6680242376080262, "grad_norm": 0.134765625, "learning_rate": 0.0007465620343697229, "loss": 0.6564, "step": 13450 }, { "epoch": 0.6685209099036455, "grad_norm": 0.1259765625, "learning_rate": 0.0007465223005860734, "loss": 0.6938, "step": 13460 }, { "epoch": 0.6690175821992649, "grad_norm": 0.1357421875, "learning_rate": 0.0007464825668024238, "loss": 0.6715, "step": 13470 }, { "epoch": 0.6695142544948842, "grad_norm": 0.11865234375, "learning_rate": 0.0007464428330187742, "loss": 0.6789, "step": 13480 }, { "epoch": 0.6700109267905037, "grad_norm": 0.1337890625, "learning_rate": 0.0007464030992351247, "loss": 0.6537, "step": 13490 }, { "epoch": 0.670507599086123, "grad_norm": 0.109375, "learning_rate": 0.0007463633654514752, "loss": 0.6739, "step": 13500 }, { "epoch": 0.6710042713817423, "grad_norm": 0.103515625, "learning_rate": 0.0007463236316678256, "loss": 0.6662, "step": 13510 }, { "epoch": 0.6715009436773617, "grad_norm": 0.10888671875, "learning_rate": 0.000746283897884176, "loss": 0.6451, "step": 13520 }, { "epoch": 0.671997615972981, "grad_norm": 0.12158203125, "learning_rate": 0.0007462441641005265, "loss": 0.6754, "step": 13530 }, { "epoch": 0.6724942882686004, "grad_norm": 0.12255859375, "learning_rate": 0.000746204430316877, "loss": 0.6632, "step": 13540 }, { "epoch": 0.6729909605642197, "grad_norm": 0.119140625, "learning_rate": 0.0007461646965332274, "loss": 0.7027, "step": 13550 }, { "epoch": 0.673487632859839, "grad_norm": 0.11669921875, "learning_rate": 0.0007461249627495779, "loss": 0.6755, "step": 13560 }, { "epoch": 0.6739843051554584, "grad_norm": 0.109375, "learning_rate": 0.0007460852289659283, "loss": 0.688, "step": 13570 }, { "epoch": 0.6744809774510778, "grad_norm": 0.11865234375, "learning_rate": 0.0007460454951822787, "loss": 0.69, "step": 13580 }, { "epoch": 0.6749776497466972, "grad_norm": 0.11279296875, "learning_rate": 0.0007460057613986293, "loss": 0.7026, "step": 13590 }, { "epoch": 0.6754743220423165, "grad_norm": 0.1123046875, "learning_rate": 0.0007459660276149797, "loss": 0.6592, "step": 13600 }, { "epoch": 0.6759709943379358, "grad_norm": 0.1259765625, "learning_rate": 0.0007459262938313301, "loss": 0.6609, "step": 13610 }, { "epoch": 0.6764676666335552, "grad_norm": 0.125, "learning_rate": 0.0007458865600476806, "loss": 0.6837, "step": 13620 }, { "epoch": 0.6769643389291745, "grad_norm": 0.11181640625, "learning_rate": 0.000745846826264031, "loss": 0.6865, "step": 13630 }, { "epoch": 0.6774610112247939, "grad_norm": 0.10107421875, "learning_rate": 0.0007458070924803814, "loss": 0.6354, "step": 13640 }, { "epoch": 0.6779576835204132, "grad_norm": 0.12060546875, "learning_rate": 0.000745767358696732, "loss": 0.7152, "step": 13650 }, { "epoch": 0.6784543558160325, "grad_norm": 0.12890625, "learning_rate": 0.0007457276249130824, "loss": 0.6739, "step": 13660 }, { "epoch": 0.678951028111652, "grad_norm": 0.1220703125, "learning_rate": 0.0007456878911294328, "loss": 0.6947, "step": 13670 }, { "epoch": 0.6794477004072713, "grad_norm": 0.11376953125, "learning_rate": 0.0007456481573457833, "loss": 0.6693, "step": 13680 }, { "epoch": 0.6799443727028907, "grad_norm": 0.13671875, "learning_rate": 0.0007456084235621338, "loss": 0.6785, "step": 13690 }, { "epoch": 0.68044104499851, "grad_norm": 0.1162109375, "learning_rate": 0.0007455686897784842, "loss": 0.6698, "step": 13700 }, { "epoch": 0.6809377172941293, "grad_norm": 0.1083984375, "learning_rate": 0.0007455289559948346, "loss": 0.6377, "step": 13710 }, { "epoch": 0.6814343895897487, "grad_norm": 0.130859375, "learning_rate": 0.0007454892222111851, "loss": 0.6667, "step": 13720 }, { "epoch": 0.681931061885368, "grad_norm": 0.1103515625, "learning_rate": 0.0007454494884275356, "loss": 0.7018, "step": 13730 }, { "epoch": 0.6824277341809873, "grad_norm": 0.10888671875, "learning_rate": 0.000745409754643886, "loss": 0.6803, "step": 13740 }, { "epoch": 0.6829244064766067, "grad_norm": 0.1044921875, "learning_rate": 0.0007453700208602365, "loss": 0.6521, "step": 13750 }, { "epoch": 0.6834210787722261, "grad_norm": 0.11279296875, "learning_rate": 0.0007453302870765869, "loss": 0.696, "step": 13760 }, { "epoch": 0.6839177510678455, "grad_norm": 0.1025390625, "learning_rate": 0.0007452905532929373, "loss": 0.6783, "step": 13770 }, { "epoch": 0.6844144233634648, "grad_norm": 0.10693359375, "learning_rate": 0.0007452508195092878, "loss": 0.6695, "step": 13780 }, { "epoch": 0.6849110956590841, "grad_norm": 0.111328125, "learning_rate": 0.0007452110857256382, "loss": 0.694, "step": 13790 }, { "epoch": 0.6854077679547035, "grad_norm": 0.12158203125, "learning_rate": 0.0007451713519419887, "loss": 0.6664, "step": 13800 }, { "epoch": 0.6859044402503228, "grad_norm": 0.1044921875, "learning_rate": 0.0007451316181583392, "loss": 0.6734, "step": 13810 }, { "epoch": 0.6864011125459422, "grad_norm": 0.111328125, "learning_rate": 0.0007450918843746896, "loss": 0.6455, "step": 13820 }, { "epoch": 0.6868977848415615, "grad_norm": 0.10546875, "learning_rate": 0.00074505215059104, "loss": 0.6509, "step": 13830 }, { "epoch": 0.6873944571371808, "grad_norm": 0.1103515625, "learning_rate": 0.0007450124168073906, "loss": 0.6705, "step": 13840 }, { "epoch": 0.6878911294328003, "grad_norm": 0.1171875, "learning_rate": 0.000744972683023741, "loss": 0.6952, "step": 13850 }, { "epoch": 0.6883878017284196, "grad_norm": 0.1142578125, "learning_rate": 0.0007449329492400914, "loss": 0.6787, "step": 13860 }, { "epoch": 0.688884474024039, "grad_norm": 0.1259765625, "learning_rate": 0.0007448932154564419, "loss": 0.6715, "step": 13870 }, { "epoch": 0.6893811463196583, "grad_norm": 0.123046875, "learning_rate": 0.0007448534816727923, "loss": 0.6722, "step": 13880 }, { "epoch": 0.6898778186152776, "grad_norm": 0.1669921875, "learning_rate": 0.0007448137478891429, "loss": 0.6656, "step": 13890 }, { "epoch": 0.690374490910897, "grad_norm": 0.11279296875, "learning_rate": 0.0007447740141054932, "loss": 0.6341, "step": 13900 }, { "epoch": 0.6908711632065163, "grad_norm": 0.123046875, "learning_rate": 0.0007447342803218437, "loss": 0.687, "step": 13910 }, { "epoch": 0.6913678355021357, "grad_norm": 0.1083984375, "learning_rate": 0.0007446945465381942, "loss": 0.7099, "step": 13920 }, { "epoch": 0.691864507797755, "grad_norm": 0.1337890625, "learning_rate": 0.0007446548127545445, "loss": 0.6738, "step": 13930 }, { "epoch": 0.6923611800933744, "grad_norm": 0.111328125, "learning_rate": 0.000744615078970895, "loss": 0.6398, "step": 13940 }, { "epoch": 0.6928578523889938, "grad_norm": 0.1142578125, "learning_rate": 0.0007445753451872456, "loss": 0.66, "step": 13950 }, { "epoch": 0.6933545246846131, "grad_norm": 0.1142578125, "learning_rate": 0.0007445356114035959, "loss": 0.6886, "step": 13960 }, { "epoch": 0.6938511969802325, "grad_norm": 0.10986328125, "learning_rate": 0.0007444958776199464, "loss": 0.6649, "step": 13970 }, { "epoch": 0.6943478692758518, "grad_norm": 0.11767578125, "learning_rate": 0.0007444561438362968, "loss": 0.6863, "step": 13980 }, { "epoch": 0.6948445415714711, "grad_norm": 0.126953125, "learning_rate": 0.0007444164100526472, "loss": 0.6929, "step": 13990 }, { "epoch": 0.6953412138670905, "grad_norm": 0.107421875, "learning_rate": 0.0007443766762689978, "loss": 0.6631, "step": 14000 }, { "epoch": 0.6958378861627098, "grad_norm": 0.134765625, "learning_rate": 0.0007443369424853482, "loss": 0.6998, "step": 14010 }, { "epoch": 0.6963345584583291, "grad_norm": 0.1474609375, "learning_rate": 0.0007442972087016986, "loss": 0.6604, "step": 14020 }, { "epoch": 0.6968312307539486, "grad_norm": 0.11279296875, "learning_rate": 0.0007442574749180491, "loss": 0.6781, "step": 14030 }, { "epoch": 0.6973279030495679, "grad_norm": 0.11279296875, "learning_rate": 0.0007442177411343995, "loss": 0.653, "step": 14040 }, { "epoch": 0.6978245753451873, "grad_norm": 0.11376953125, "learning_rate": 0.0007441780073507501, "loss": 0.6524, "step": 14050 }, { "epoch": 0.6983212476408066, "grad_norm": 0.1083984375, "learning_rate": 0.0007441382735671005, "loss": 0.6796, "step": 14060 }, { "epoch": 0.698817919936426, "grad_norm": 0.1064453125, "learning_rate": 0.0007440985397834509, "loss": 0.6614, "step": 14070 }, { "epoch": 0.6993145922320453, "grad_norm": 0.1259765625, "learning_rate": 0.0007440588059998014, "loss": 0.6559, "step": 14080 }, { "epoch": 0.6998112645276646, "grad_norm": 0.10791015625, "learning_rate": 0.0007440190722161517, "loss": 0.6349, "step": 14090 }, { "epoch": 0.700307936823284, "grad_norm": 0.1171875, "learning_rate": 0.0007439793384325023, "loss": 0.6741, "step": 14100 }, { "epoch": 0.7008046091189033, "grad_norm": 0.1474609375, "learning_rate": 0.0007439396046488528, "loss": 0.6455, "step": 14110 }, { "epoch": 0.7013012814145227, "grad_norm": 0.11669921875, "learning_rate": 0.0007438998708652031, "loss": 0.6997, "step": 14120 }, { "epoch": 0.7017979537101421, "grad_norm": 0.109375, "learning_rate": 0.0007438601370815536, "loss": 0.661, "step": 14130 }, { "epoch": 0.7022946260057614, "grad_norm": 0.11962890625, "learning_rate": 0.0007438204032979041, "loss": 0.6776, "step": 14140 }, { "epoch": 0.7027912983013808, "grad_norm": 0.123046875, "learning_rate": 0.0007437806695142545, "loss": 0.6656, "step": 14150 }, { "epoch": 0.7032879705970001, "grad_norm": 0.1064453125, "learning_rate": 0.000743740935730605, "loss": 0.6608, "step": 14160 }, { "epoch": 0.7037846428926194, "grad_norm": 0.10693359375, "learning_rate": 0.0007437012019469554, "loss": 0.6501, "step": 14170 }, { "epoch": 0.7042813151882388, "grad_norm": 0.1005859375, "learning_rate": 0.0007436614681633059, "loss": 0.6619, "step": 14180 }, { "epoch": 0.7047779874838581, "grad_norm": 0.125, "learning_rate": 0.0007436217343796563, "loss": 0.6832, "step": 14190 }, { "epoch": 0.7052746597794775, "grad_norm": 0.111328125, "learning_rate": 0.0007435820005960068, "loss": 0.66, "step": 14200 }, { "epoch": 0.7057713320750969, "grad_norm": 0.12109375, "learning_rate": 0.0007435422668123573, "loss": 0.6392, "step": 14210 }, { "epoch": 0.7062680043707162, "grad_norm": 0.1171875, "learning_rate": 0.0007435025330287077, "loss": 0.6357, "step": 14220 }, { "epoch": 0.7067646766663356, "grad_norm": 0.1171875, "learning_rate": 0.0007434627992450581, "loss": 0.6774, "step": 14230 }, { "epoch": 0.7072613489619549, "grad_norm": 0.1083984375, "learning_rate": 0.0007434230654614086, "loss": 0.63, "step": 14240 }, { "epoch": 0.7077580212575743, "grad_norm": 0.1220703125, "learning_rate": 0.0007433833316777591, "loss": 0.6643, "step": 14250 }, { "epoch": 0.7082546935531936, "grad_norm": 0.10009765625, "learning_rate": 0.0007433435978941095, "loss": 0.6845, "step": 14260 }, { "epoch": 0.7087513658488129, "grad_norm": 0.1044921875, "learning_rate": 0.00074330386411046, "loss": 0.6639, "step": 14270 }, { "epoch": 0.7092480381444323, "grad_norm": 0.1044921875, "learning_rate": 0.0007432641303268104, "loss": 0.7032, "step": 14280 }, { "epoch": 0.7097447104400516, "grad_norm": 0.10986328125, "learning_rate": 0.0007432243965431608, "loss": 0.6655, "step": 14290 }, { "epoch": 0.7102413827356711, "grad_norm": 0.158203125, "learning_rate": 0.0007431846627595114, "loss": 0.6785, "step": 14300 }, { "epoch": 0.7107380550312904, "grad_norm": 0.11767578125, "learning_rate": 0.0007431449289758617, "loss": 0.673, "step": 14310 }, { "epoch": 0.7112347273269097, "grad_norm": 0.12255859375, "learning_rate": 0.0007431051951922122, "loss": 0.6534, "step": 14320 }, { "epoch": 0.7117313996225291, "grad_norm": 0.109375, "learning_rate": 0.0007430654614085627, "loss": 0.6963, "step": 14330 }, { "epoch": 0.7122280719181484, "grad_norm": 0.111328125, "learning_rate": 0.0007430257276249131, "loss": 0.6584, "step": 14340 }, { "epoch": 0.7127247442137677, "grad_norm": 0.11279296875, "learning_rate": 0.0007429859938412636, "loss": 0.6742, "step": 14350 }, { "epoch": 0.7132214165093871, "grad_norm": 0.1025390625, "learning_rate": 0.000742946260057614, "loss": 0.6555, "step": 14360 }, { "epoch": 0.7137180888050064, "grad_norm": 0.1376953125, "learning_rate": 0.0007429065262739645, "loss": 0.6705, "step": 14370 }, { "epoch": 0.7142147611006258, "grad_norm": 0.10693359375, "learning_rate": 0.0007428667924903149, "loss": 0.661, "step": 14380 }, { "epoch": 0.7147114333962452, "grad_norm": 0.11328125, "learning_rate": 0.0007428270587066653, "loss": 0.6886, "step": 14390 }, { "epoch": 0.7152081056918645, "grad_norm": 0.10205078125, "learning_rate": 0.0007427873249230159, "loss": 0.6865, "step": 14400 }, { "epoch": 0.7157047779874839, "grad_norm": 0.12353515625, "learning_rate": 0.0007427475911393663, "loss": 0.6566, "step": 14410 }, { "epoch": 0.7162014502831032, "grad_norm": 0.1171875, "learning_rate": 0.0007427078573557167, "loss": 0.6595, "step": 14420 }, { "epoch": 0.7166981225787226, "grad_norm": 0.1201171875, "learning_rate": 0.0007426681235720672, "loss": 0.6846, "step": 14430 }, { "epoch": 0.7171947948743419, "grad_norm": 0.10888671875, "learning_rate": 0.0007426283897884176, "loss": 0.6815, "step": 14440 }, { "epoch": 0.7176914671699612, "grad_norm": 0.1064453125, "learning_rate": 0.0007425886560047681, "loss": 0.6479, "step": 14450 }, { "epoch": 0.7181881394655806, "grad_norm": 0.1181640625, "learning_rate": 0.0007425489222211186, "loss": 0.6771, "step": 14460 }, { "epoch": 0.7186848117611999, "grad_norm": 0.1064453125, "learning_rate": 0.000742509188437469, "loss": 0.6768, "step": 14470 }, { "epoch": 0.7191814840568194, "grad_norm": 0.12451171875, "learning_rate": 0.0007424694546538194, "loss": 0.6603, "step": 14480 }, { "epoch": 0.7196781563524387, "grad_norm": 0.11767578125, "learning_rate": 0.0007424297208701699, "loss": 0.6645, "step": 14490 }, { "epoch": 0.720174828648058, "grad_norm": 0.10546875, "learning_rate": 0.0007423899870865204, "loss": 0.6885, "step": 14500 }, { "epoch": 0.7206715009436774, "grad_norm": 0.1181640625, "learning_rate": 0.0007423502533028708, "loss": 0.6573, "step": 14510 }, { "epoch": 0.7211681732392967, "grad_norm": 0.11669921875, "learning_rate": 0.0007423105195192213, "loss": 0.6914, "step": 14520 }, { "epoch": 0.721664845534916, "grad_norm": 0.10595703125, "learning_rate": 0.0007422707857355717, "loss": 0.6392, "step": 14530 }, { "epoch": 0.7221615178305354, "grad_norm": 0.111328125, "learning_rate": 0.0007422310519519221, "loss": 0.6606, "step": 14540 }, { "epoch": 0.7226581901261547, "grad_norm": 0.1279296875, "learning_rate": 0.0007421913181682727, "loss": 0.7015, "step": 14550 }, { "epoch": 0.7231548624217741, "grad_norm": 0.11767578125, "learning_rate": 0.0007421515843846231, "loss": 0.6693, "step": 14560 }, { "epoch": 0.7236515347173935, "grad_norm": 0.1259765625, "learning_rate": 0.0007421118506009735, "loss": 0.6702, "step": 14570 }, { "epoch": 0.7241482070130129, "grad_norm": 0.109375, "learning_rate": 0.0007420721168173239, "loss": 0.7158, "step": 14580 }, { "epoch": 0.7246448793086322, "grad_norm": 0.1259765625, "learning_rate": 0.0007420323830336744, "loss": 0.6439, "step": 14590 }, { "epoch": 0.7251415516042515, "grad_norm": 0.1259765625, "learning_rate": 0.0007419926492500249, "loss": 0.6677, "step": 14600 }, { "epoch": 0.7256382238998709, "grad_norm": 0.11181640625, "learning_rate": 0.0007419529154663753, "loss": 0.6892, "step": 14610 }, { "epoch": 0.7261348961954902, "grad_norm": 0.10888671875, "learning_rate": 0.0007419131816827258, "loss": 0.6827, "step": 14620 }, { "epoch": 0.7266315684911095, "grad_norm": 0.099609375, "learning_rate": 0.0007418734478990762, "loss": 0.6628, "step": 14630 }, { "epoch": 0.7271282407867289, "grad_norm": 0.11328125, "learning_rate": 0.0007418337141154266, "loss": 0.6387, "step": 14640 }, { "epoch": 0.7276249130823482, "grad_norm": 0.12060546875, "learning_rate": 0.0007417939803317772, "loss": 0.6581, "step": 14650 }, { "epoch": 0.7281215853779676, "grad_norm": 0.11962890625, "learning_rate": 0.0007417542465481276, "loss": 0.6713, "step": 14660 }, { "epoch": 0.728618257673587, "grad_norm": 0.10107421875, "learning_rate": 0.000741714512764478, "loss": 0.6754, "step": 14670 }, { "epoch": 0.7291149299692063, "grad_norm": 0.1044921875, "learning_rate": 0.0007416747789808285, "loss": 0.6784, "step": 14680 }, { "epoch": 0.7296116022648257, "grad_norm": 0.10107421875, "learning_rate": 0.0007416350451971789, "loss": 0.6548, "step": 14690 }, { "epoch": 0.730108274560445, "grad_norm": 0.1064453125, "learning_rate": 0.0007415953114135294, "loss": 0.6828, "step": 14700 }, { "epoch": 0.7306049468560644, "grad_norm": 0.1025390625, "learning_rate": 0.0007415555776298799, "loss": 0.6802, "step": 14710 }, { "epoch": 0.7311016191516837, "grad_norm": 0.10888671875, "learning_rate": 0.0007415158438462303, "loss": 0.657, "step": 14720 }, { "epoch": 0.731598291447303, "grad_norm": 0.10986328125, "learning_rate": 0.0007414761100625807, "loss": 0.6715, "step": 14730 }, { "epoch": 0.7320949637429224, "grad_norm": 0.1162109375, "learning_rate": 0.0007414363762789312, "loss": 0.633, "step": 14740 }, { "epoch": 0.7325916360385417, "grad_norm": 0.12255859375, "learning_rate": 0.0007413966424952817, "loss": 0.643, "step": 14750 }, { "epoch": 0.7330883083341612, "grad_norm": 0.09228515625, "learning_rate": 0.0007413569087116321, "loss": 0.6803, "step": 14760 }, { "epoch": 0.7335849806297805, "grad_norm": 0.13671875, "learning_rate": 0.0007413171749279825, "loss": 0.6491, "step": 14770 }, { "epoch": 0.7340816529253998, "grad_norm": 0.10498046875, "learning_rate": 0.000741277441144333, "loss": 0.6822, "step": 14780 }, { "epoch": 0.7345783252210192, "grad_norm": 0.123046875, "learning_rate": 0.0007412377073606835, "loss": 0.6758, "step": 14790 }, { "epoch": 0.7350749975166385, "grad_norm": 0.11376953125, "learning_rate": 0.0007411979735770338, "loss": 0.6977, "step": 14800 }, { "epoch": 0.7355716698122579, "grad_norm": 0.1318359375, "learning_rate": 0.0007411582397933844, "loss": 0.6655, "step": 14810 }, { "epoch": 0.7360683421078772, "grad_norm": 0.10498046875, "learning_rate": 0.0007411185060097349, "loss": 0.6554, "step": 14820 }, { "epoch": 0.7365650144034965, "grad_norm": 0.1650390625, "learning_rate": 0.0007410787722260852, "loss": 0.623, "step": 14830 }, { "epoch": 0.7370616866991159, "grad_norm": 0.10546875, "learning_rate": 0.0007410390384424357, "loss": 0.6476, "step": 14840 }, { "epoch": 0.7375583589947353, "grad_norm": 0.126953125, "learning_rate": 0.0007409993046587862, "loss": 0.6634, "step": 14850 }, { "epoch": 0.7380550312903547, "grad_norm": 0.10595703125, "learning_rate": 0.0007409595708751366, "loss": 0.6542, "step": 14860 }, { "epoch": 0.738551703585974, "grad_norm": 0.1220703125, "learning_rate": 0.0007409198370914871, "loss": 0.6161, "step": 14870 }, { "epoch": 0.7390483758815933, "grad_norm": 0.1279296875, "learning_rate": 0.0007408801033078375, "loss": 0.6548, "step": 14880 }, { "epoch": 0.7395450481772127, "grad_norm": 0.1083984375, "learning_rate": 0.0007408403695241879, "loss": 0.6417, "step": 14890 }, { "epoch": 0.740041720472832, "grad_norm": 0.12109375, "learning_rate": 0.0007408006357405385, "loss": 0.6701, "step": 14900 }, { "epoch": 0.7405383927684513, "grad_norm": 0.11474609375, "learning_rate": 0.0007407609019568889, "loss": 0.6672, "step": 14910 }, { "epoch": 0.7410350650640707, "grad_norm": 0.1298828125, "learning_rate": 0.0007407211681732393, "loss": 0.6359, "step": 14920 }, { "epoch": 0.74153173735969, "grad_norm": 0.11279296875, "learning_rate": 0.0007406814343895898, "loss": 0.668, "step": 14930 }, { "epoch": 0.7420284096553095, "grad_norm": 0.10546875, "learning_rate": 0.0007406417006059402, "loss": 0.6952, "step": 14940 }, { "epoch": 0.7425250819509288, "grad_norm": 0.12890625, "learning_rate": 0.0007406019668222908, "loss": 0.6725, "step": 14950 }, { "epoch": 0.7430217542465481, "grad_norm": 0.1123046875, "learning_rate": 0.0007405622330386411, "loss": 0.6643, "step": 14960 }, { "epoch": 0.7435184265421675, "grad_norm": 0.115234375, "learning_rate": 0.0007405224992549916, "loss": 0.6796, "step": 14970 }, { "epoch": 0.7440150988377868, "grad_norm": 0.11767578125, "learning_rate": 0.0007404827654713421, "loss": 0.6462, "step": 14980 }, { "epoch": 0.7445117711334062, "grad_norm": 0.12060546875, "learning_rate": 0.0007404430316876924, "loss": 0.6689, "step": 14990 }, { "epoch": 0.7450084434290255, "grad_norm": 0.1162109375, "learning_rate": 0.000740403297904043, "loss": 0.7093, "step": 15000 }, { "epoch": 0.7455051157246448, "grad_norm": 0.1181640625, "learning_rate": 0.0007403635641203935, "loss": 0.6595, "step": 15010 }, { "epoch": 0.7460017880202642, "grad_norm": 0.1259765625, "learning_rate": 0.0007403238303367438, "loss": 0.7084, "step": 15020 }, { "epoch": 0.7464984603158836, "grad_norm": 0.1171875, "learning_rate": 0.0007402840965530943, "loss": 0.6587, "step": 15030 }, { "epoch": 0.746995132611503, "grad_norm": 0.10302734375, "learning_rate": 0.0007402443627694447, "loss": 0.6821, "step": 15040 }, { "epoch": 0.7474918049071223, "grad_norm": 0.109375, "learning_rate": 0.0007402046289857951, "loss": 0.6574, "step": 15050 }, { "epoch": 0.7479884772027416, "grad_norm": 0.11083984375, "learning_rate": 0.0007401648952021457, "loss": 0.6512, "step": 15060 }, { "epoch": 0.748485149498361, "grad_norm": 0.1298828125, "learning_rate": 0.0007401251614184961, "loss": 0.6612, "step": 15070 }, { "epoch": 0.7489818217939803, "grad_norm": 0.11376953125, "learning_rate": 0.0007400854276348466, "loss": 0.6923, "step": 15080 }, { "epoch": 0.7494784940895997, "grad_norm": 0.1162109375, "learning_rate": 0.000740045693851197, "loss": 0.6737, "step": 15090 }, { "epoch": 0.749975166385219, "grad_norm": 0.12060546875, "learning_rate": 0.0007400059600675474, "loss": 0.6633, "step": 15100 }, { "epoch": 0.7504718386808383, "grad_norm": 0.10791015625, "learning_rate": 0.000739966226283898, "loss": 0.6599, "step": 15110 }, { "epoch": 0.7509685109764578, "grad_norm": 0.1279296875, "learning_rate": 0.0007399264925002484, "loss": 0.6532, "step": 15120 }, { "epoch": 0.7514651832720771, "grad_norm": 0.10888671875, "learning_rate": 0.0007398867587165988, "loss": 0.6475, "step": 15130 }, { "epoch": 0.7519618555676965, "grad_norm": 0.10302734375, "learning_rate": 0.0007398470249329493, "loss": 0.627, "step": 15140 }, { "epoch": 0.7524585278633158, "grad_norm": 0.11083984375, "learning_rate": 0.0007398072911492998, "loss": 0.6591, "step": 15150 }, { "epoch": 0.7529552001589351, "grad_norm": 0.1318359375, "learning_rate": 0.0007397675573656502, "loss": 0.678, "step": 15160 }, { "epoch": 0.7534518724545545, "grad_norm": 0.1259765625, "learning_rate": 0.0007397278235820007, "loss": 0.6714, "step": 15170 }, { "epoch": 0.7539485447501738, "grad_norm": 0.10205078125, "learning_rate": 0.000739688089798351, "loss": 0.6759, "step": 15180 }, { "epoch": 0.7544452170457931, "grad_norm": 0.15234375, "learning_rate": 0.0007396483560147015, "loss": 0.7192, "step": 15190 }, { "epoch": 0.7549418893414125, "grad_norm": 0.138671875, "learning_rate": 0.000739608622231052, "loss": 0.6535, "step": 15200 }, { "epoch": 0.7554385616370319, "grad_norm": 0.10791015625, "learning_rate": 0.0007395688884474024, "loss": 0.6535, "step": 15210 }, { "epoch": 0.7559352339326513, "grad_norm": 0.119140625, "learning_rate": 0.0007395291546637529, "loss": 0.6493, "step": 15220 }, { "epoch": 0.7564319062282706, "grad_norm": 0.123046875, "learning_rate": 0.0007394894208801033, "loss": 0.6432, "step": 15230 }, { "epoch": 0.75692857852389, "grad_norm": 0.1318359375, "learning_rate": 0.0007394496870964538, "loss": 0.6387, "step": 15240 }, { "epoch": 0.7574252508195093, "grad_norm": 0.10009765625, "learning_rate": 0.0007394099533128042, "loss": 0.6892, "step": 15250 }, { "epoch": 0.7579219231151286, "grad_norm": 0.099609375, "learning_rate": 0.0007393702195291547, "loss": 0.6835, "step": 15260 }, { "epoch": 0.758418595410748, "grad_norm": 0.109375, "learning_rate": 0.0007393304857455052, "loss": 0.6625, "step": 15270 }, { "epoch": 0.7589152677063673, "grad_norm": 0.1123046875, "learning_rate": 0.0007392907519618556, "loss": 0.687, "step": 15280 }, { "epoch": 0.7594119400019866, "grad_norm": 0.111328125, "learning_rate": 0.000739251018178206, "loss": 0.63, "step": 15290 }, { "epoch": 0.7599086122976061, "grad_norm": 0.1142578125, "learning_rate": 0.0007392112843945566, "loss": 0.6379, "step": 15300 }, { "epoch": 0.7604052845932254, "grad_norm": 0.1015625, "learning_rate": 0.000739171550610907, "loss": 0.6866, "step": 15310 }, { "epoch": 0.7609019568888448, "grad_norm": 0.1044921875, "learning_rate": 0.0007391318168272574, "loss": 0.651, "step": 15320 }, { "epoch": 0.7613986291844641, "grad_norm": 0.10546875, "learning_rate": 0.0007390920830436079, "loss": 0.6841, "step": 15330 }, { "epoch": 0.7618953014800834, "grad_norm": 0.126953125, "learning_rate": 0.0007390523492599583, "loss": 0.6586, "step": 15340 }, { "epoch": 0.7623919737757028, "grad_norm": 0.11669921875, "learning_rate": 0.0007390126154763087, "loss": 0.6854, "step": 15350 }, { "epoch": 0.7628886460713221, "grad_norm": 0.10107421875, "learning_rate": 0.0007389728816926593, "loss": 0.668, "step": 15360 }, { "epoch": 0.7633853183669415, "grad_norm": 0.1376953125, "learning_rate": 0.0007389331479090096, "loss": 0.6417, "step": 15370 }, { "epoch": 0.7638819906625608, "grad_norm": 0.1142578125, "learning_rate": 0.0007388934141253601, "loss": 0.6731, "step": 15380 }, { "epoch": 0.7643786629581802, "grad_norm": 0.11669921875, "learning_rate": 0.0007388536803417106, "loss": 0.6381, "step": 15390 }, { "epoch": 0.7648753352537996, "grad_norm": 0.10791015625, "learning_rate": 0.000738813946558061, "loss": 0.6457, "step": 15400 }, { "epoch": 0.7653720075494189, "grad_norm": 0.11083984375, "learning_rate": 0.0007387742127744115, "loss": 0.6486, "step": 15410 }, { "epoch": 0.7658686798450383, "grad_norm": 0.1259765625, "learning_rate": 0.000738734478990762, "loss": 0.6659, "step": 15420 }, { "epoch": 0.7663653521406576, "grad_norm": 0.10595703125, "learning_rate": 0.0007386947452071124, "loss": 0.6527, "step": 15430 }, { "epoch": 0.7668620244362769, "grad_norm": 0.09765625, "learning_rate": 0.0007386550114234628, "loss": 0.6608, "step": 15440 }, { "epoch": 0.7673586967318963, "grad_norm": 0.09765625, "learning_rate": 0.0007386152776398132, "loss": 0.6618, "step": 15450 }, { "epoch": 0.7678553690275156, "grad_norm": 0.10888671875, "learning_rate": 0.0007385755438561638, "loss": 0.6524, "step": 15460 }, { "epoch": 0.768352041323135, "grad_norm": 0.11181640625, "learning_rate": 0.0007385358100725142, "loss": 0.6715, "step": 15470 }, { "epoch": 0.7688487136187544, "grad_norm": 0.11767578125, "learning_rate": 0.0007384960762888646, "loss": 0.6431, "step": 15480 }, { "epoch": 0.7693453859143737, "grad_norm": 0.11279296875, "learning_rate": 0.0007384563425052151, "loss": 0.6363, "step": 15490 }, { "epoch": 0.7698420582099931, "grad_norm": 0.142578125, "learning_rate": 0.0007384166087215655, "loss": 0.6678, "step": 15500 }, { "epoch": 0.7703387305056124, "grad_norm": 0.1123046875, "learning_rate": 0.000738376874937916, "loss": 0.6862, "step": 15510 }, { "epoch": 0.7708354028012318, "grad_norm": 0.10009765625, "learning_rate": 0.0007383371411542665, "loss": 0.6488, "step": 15520 }, { "epoch": 0.7713320750968511, "grad_norm": 0.10498046875, "learning_rate": 0.0007382974073706169, "loss": 0.6608, "step": 15530 }, { "epoch": 0.7718287473924704, "grad_norm": 0.12255859375, "learning_rate": 0.0007382576735869673, "loss": 0.6887, "step": 15540 }, { "epoch": 0.7723254196880898, "grad_norm": 0.11474609375, "learning_rate": 0.0007382179398033178, "loss": 0.65, "step": 15550 }, { "epoch": 0.7728220919837091, "grad_norm": 0.1083984375, "learning_rate": 0.0007381782060196683, "loss": 0.6542, "step": 15560 }, { "epoch": 0.7733187642793286, "grad_norm": 0.09765625, "learning_rate": 0.0007381384722360187, "loss": 0.6324, "step": 15570 }, { "epoch": 0.7738154365749479, "grad_norm": 0.10498046875, "learning_rate": 0.0007380987384523692, "loss": 0.6682, "step": 15580 }, { "epoch": 0.7743121088705672, "grad_norm": 0.1259765625, "learning_rate": 0.0007380590046687196, "loss": 0.6871, "step": 15590 }, { "epoch": 0.7748087811661866, "grad_norm": 0.125, "learning_rate": 0.00073801927088507, "loss": 0.6528, "step": 15600 }, { "epoch": 0.7753054534618059, "grad_norm": 0.1337890625, "learning_rate": 0.0007379795371014206, "loss": 0.6642, "step": 15610 }, { "epoch": 0.7758021257574252, "grad_norm": 0.1259765625, "learning_rate": 0.000737939803317771, "loss": 0.657, "step": 15620 }, { "epoch": 0.7762987980530446, "grad_norm": 0.11083984375, "learning_rate": 0.0007379000695341214, "loss": 0.672, "step": 15630 }, { "epoch": 0.7767954703486639, "grad_norm": 0.10595703125, "learning_rate": 0.0007378603357504718, "loss": 0.6719, "step": 15640 }, { "epoch": 0.7772921426442833, "grad_norm": 0.10302734375, "learning_rate": 0.0007378206019668223, "loss": 0.6735, "step": 15650 }, { "epoch": 0.7777888149399027, "grad_norm": 0.1103515625, "learning_rate": 0.0007377808681831728, "loss": 0.6648, "step": 15660 }, { "epoch": 0.778285487235522, "grad_norm": 0.11865234375, "learning_rate": 0.0007377411343995232, "loss": 0.6676, "step": 15670 }, { "epoch": 0.7787821595311414, "grad_norm": 0.11279296875, "learning_rate": 0.0007377014006158737, "loss": 0.685, "step": 15680 }, { "epoch": 0.7792788318267607, "grad_norm": 0.1142578125, "learning_rate": 0.0007376616668322241, "loss": 0.6626, "step": 15690 }, { "epoch": 0.7797755041223801, "grad_norm": 0.107421875, "learning_rate": 0.0007376219330485745, "loss": 0.6375, "step": 15700 }, { "epoch": 0.7802721764179994, "grad_norm": 0.140625, "learning_rate": 0.0007375821992649251, "loss": 0.6498, "step": 15710 }, { "epoch": 0.7807688487136187, "grad_norm": 0.13671875, "learning_rate": 0.0007375424654812755, "loss": 0.6627, "step": 15720 }, { "epoch": 0.7812655210092381, "grad_norm": 0.1298828125, "learning_rate": 0.0007375027316976259, "loss": 0.6476, "step": 15730 }, { "epoch": 0.7817621933048574, "grad_norm": 0.12255859375, "learning_rate": 0.0007374629979139764, "loss": 0.6621, "step": 15740 }, { "epoch": 0.7822588656004769, "grad_norm": 0.1083984375, "learning_rate": 0.0007374232641303268, "loss": 0.6777, "step": 15750 }, { "epoch": 0.7827555378960962, "grad_norm": 0.10546875, "learning_rate": 0.0007373835303466773, "loss": 0.6634, "step": 15760 }, { "epoch": 0.7832522101917155, "grad_norm": 0.099609375, "learning_rate": 0.0007373437965630278, "loss": 0.6365, "step": 15770 }, { "epoch": 0.7837488824873349, "grad_norm": 0.1259765625, "learning_rate": 0.0007373040627793782, "loss": 0.6644, "step": 15780 }, { "epoch": 0.7842455547829542, "grad_norm": 0.12890625, "learning_rate": 0.0007372643289957286, "loss": 0.6768, "step": 15790 }, { "epoch": 0.7847422270785736, "grad_norm": 0.10888671875, "learning_rate": 0.0007372245952120791, "loss": 0.6666, "step": 15800 }, { "epoch": 0.7852388993741929, "grad_norm": 0.1279296875, "learning_rate": 0.0007371848614284296, "loss": 0.6616, "step": 15810 }, { "epoch": 0.7857355716698122, "grad_norm": 0.11572265625, "learning_rate": 0.0007371451276447801, "loss": 0.6727, "step": 15820 }, { "epoch": 0.7862322439654316, "grad_norm": 0.10546875, "learning_rate": 0.0007371053938611304, "loss": 0.6297, "step": 15830 }, { "epoch": 0.7867289162610509, "grad_norm": 0.11376953125, "learning_rate": 0.0007370656600774809, "loss": 0.661, "step": 15840 }, { "epoch": 0.7872255885566704, "grad_norm": 0.0927734375, "learning_rate": 0.0007370259262938314, "loss": 0.6471, "step": 15850 }, { "epoch": 0.7877222608522897, "grad_norm": 0.10693359375, "learning_rate": 0.0007369861925101818, "loss": 0.6412, "step": 15860 }, { "epoch": 0.788218933147909, "grad_norm": 0.1083984375, "learning_rate": 0.0007369464587265323, "loss": 0.647, "step": 15870 }, { "epoch": 0.7887156054435284, "grad_norm": 0.10107421875, "learning_rate": 0.0007369067249428828, "loss": 0.6365, "step": 15880 }, { "epoch": 0.7892122777391477, "grad_norm": 0.130859375, "learning_rate": 0.0007368669911592331, "loss": 0.6588, "step": 15890 }, { "epoch": 0.789708950034767, "grad_norm": 0.130859375, "learning_rate": 0.0007368272573755836, "loss": 0.6499, "step": 15900 }, { "epoch": 0.7902056223303864, "grad_norm": 0.10498046875, "learning_rate": 0.0007367875235919341, "loss": 0.6606, "step": 15910 }, { "epoch": 0.7907022946260057, "grad_norm": 0.109375, "learning_rate": 0.0007367477898082845, "loss": 0.6713, "step": 15920 }, { "epoch": 0.7911989669216251, "grad_norm": 0.11474609375, "learning_rate": 0.000736708056024635, "loss": 0.633, "step": 15930 }, { "epoch": 0.7916956392172445, "grad_norm": 0.10888671875, "learning_rate": 0.0007366683222409854, "loss": 0.6714, "step": 15940 }, { "epoch": 0.7921923115128638, "grad_norm": 0.1162109375, "learning_rate": 0.0007366285884573358, "loss": 0.6649, "step": 15950 }, { "epoch": 0.7926889838084832, "grad_norm": 0.111328125, "learning_rate": 0.0007365888546736864, "loss": 0.68, "step": 15960 }, { "epoch": 0.7931856561041025, "grad_norm": 0.10888671875, "learning_rate": 0.0007365491208900368, "loss": 0.6256, "step": 15970 }, { "epoch": 0.7936823283997219, "grad_norm": 0.11474609375, "learning_rate": 0.0007365093871063873, "loss": 0.6611, "step": 15980 }, { "epoch": 0.7941790006953412, "grad_norm": 0.109375, "learning_rate": 0.0007364696533227377, "loss": 0.6605, "step": 15990 }, { "epoch": 0.7946756729909605, "grad_norm": 0.1044921875, "learning_rate": 0.0007364299195390881, "loss": 0.6494, "step": 16000 }, { "epoch": 0.7951723452865799, "grad_norm": 0.09912109375, "learning_rate": 0.0007363901857554387, "loss": 0.6674, "step": 16010 }, { "epoch": 0.7956690175821992, "grad_norm": 0.12353515625, "learning_rate": 0.000736350451971789, "loss": 0.6483, "step": 16020 }, { "epoch": 0.7961656898778187, "grad_norm": 0.11962890625, "learning_rate": 0.0007363107181881395, "loss": 0.6441, "step": 16030 }, { "epoch": 0.796662362173438, "grad_norm": 0.1533203125, "learning_rate": 0.00073627098440449, "loss": 0.6642, "step": 16040 }, { "epoch": 0.7971590344690573, "grad_norm": 0.10205078125, "learning_rate": 0.0007362312506208403, "loss": 0.6259, "step": 16050 }, { "epoch": 0.7976557067646767, "grad_norm": 0.09912109375, "learning_rate": 0.0007361915168371909, "loss": 0.6641, "step": 16060 }, { "epoch": 0.798152379060296, "grad_norm": 0.1064453125, "learning_rate": 0.0007361517830535414, "loss": 0.6431, "step": 16070 }, { "epoch": 0.7986490513559154, "grad_norm": 0.1083984375, "learning_rate": 0.0007361120492698917, "loss": 0.6418, "step": 16080 }, { "epoch": 0.7991457236515347, "grad_norm": 0.109375, "learning_rate": 0.0007360723154862422, "loss": 0.6362, "step": 16090 }, { "epoch": 0.799642395947154, "grad_norm": 0.10400390625, "learning_rate": 0.0007360325817025926, "loss": 0.6457, "step": 16100 }, { "epoch": 0.8001390682427734, "grad_norm": 0.10888671875, "learning_rate": 0.000735992847918943, "loss": 0.6353, "step": 16110 }, { "epoch": 0.8006357405383928, "grad_norm": 0.10791015625, "learning_rate": 0.0007359531141352936, "loss": 0.6309, "step": 16120 }, { "epoch": 0.8011324128340122, "grad_norm": 0.11572265625, "learning_rate": 0.000735913380351644, "loss": 0.6537, "step": 16130 }, { "epoch": 0.8016290851296315, "grad_norm": 0.099609375, "learning_rate": 0.0007358736465679945, "loss": 0.6506, "step": 16140 }, { "epoch": 0.8021257574252508, "grad_norm": 0.1279296875, "learning_rate": 0.0007358339127843449, "loss": 0.6716, "step": 16150 }, { "epoch": 0.8026224297208702, "grad_norm": 0.10009765625, "learning_rate": 0.0007357941790006954, "loss": 0.6404, "step": 16160 }, { "epoch": 0.8031191020164895, "grad_norm": 0.103515625, "learning_rate": 0.0007357544452170459, "loss": 0.6493, "step": 16170 }, { "epoch": 0.8036157743121088, "grad_norm": 0.11083984375, "learning_rate": 0.0007357147114333963, "loss": 0.6635, "step": 16180 }, { "epoch": 0.8041124466077282, "grad_norm": 0.1064453125, "learning_rate": 0.0007356749776497467, "loss": 0.619, "step": 16190 }, { "epoch": 0.8046091189033475, "grad_norm": 0.11474609375, "learning_rate": 0.0007356352438660972, "loss": 0.6638, "step": 16200 }, { "epoch": 0.805105791198967, "grad_norm": 0.10546875, "learning_rate": 0.0007355955100824477, "loss": 0.665, "step": 16210 }, { "epoch": 0.8056024634945863, "grad_norm": 0.1474609375, "learning_rate": 0.0007355557762987981, "loss": 0.6213, "step": 16220 }, { "epoch": 0.8060991357902056, "grad_norm": 0.12353515625, "learning_rate": 0.0007355160425151486, "loss": 0.6451, "step": 16230 }, { "epoch": 0.806595808085825, "grad_norm": 0.115234375, "learning_rate": 0.0007354763087314989, "loss": 0.6398, "step": 16240 }, { "epoch": 0.8070924803814443, "grad_norm": 0.10302734375, "learning_rate": 0.0007354365749478494, "loss": 0.6553, "step": 16250 }, { "epoch": 0.8075891526770637, "grad_norm": 0.1103515625, "learning_rate": 0.0007353968411642, "loss": 0.691, "step": 16260 }, { "epoch": 0.808085824972683, "grad_norm": 0.10888671875, "learning_rate": 0.0007353571073805504, "loss": 0.6825, "step": 16270 }, { "epoch": 0.8085824972683023, "grad_norm": 0.11474609375, "learning_rate": 0.0007353173735969008, "loss": 0.6685, "step": 16280 }, { "epoch": 0.8090791695639217, "grad_norm": 0.1181640625, "learning_rate": 0.0007352776398132512, "loss": 0.6356, "step": 16290 }, { "epoch": 0.8095758418595411, "grad_norm": 0.12158203125, "learning_rate": 0.0007352379060296017, "loss": 0.6552, "step": 16300 }, { "epoch": 0.8100725141551605, "grad_norm": 0.10791015625, "learning_rate": 0.0007351981722459522, "loss": 0.6721, "step": 16310 }, { "epoch": 0.8105691864507798, "grad_norm": 0.11669921875, "learning_rate": 0.0007351584384623026, "loss": 0.6464, "step": 16320 }, { "epoch": 0.8110658587463991, "grad_norm": 0.1123046875, "learning_rate": 0.0007351187046786531, "loss": 0.6584, "step": 16330 }, { "epoch": 0.8115625310420185, "grad_norm": 0.1103515625, "learning_rate": 0.0007350789708950035, "loss": 0.6583, "step": 16340 }, { "epoch": 0.8120592033376378, "grad_norm": 0.1171875, "learning_rate": 0.0007350392371113539, "loss": 0.6785, "step": 16350 }, { "epoch": 0.8125558756332572, "grad_norm": 0.10888671875, "learning_rate": 0.0007349995033277045, "loss": 0.6511, "step": 16360 }, { "epoch": 0.8130525479288765, "grad_norm": 0.1474609375, "learning_rate": 0.0007349597695440549, "loss": 0.6634, "step": 16370 }, { "epoch": 0.8135492202244958, "grad_norm": 0.111328125, "learning_rate": 0.0007349200357604053, "loss": 0.7017, "step": 16380 }, { "epoch": 0.8140458925201153, "grad_norm": 0.115234375, "learning_rate": 0.0007348803019767558, "loss": 0.6716, "step": 16390 }, { "epoch": 0.8145425648157346, "grad_norm": 0.11083984375, "learning_rate": 0.0007348405681931062, "loss": 0.6769, "step": 16400 }, { "epoch": 0.815039237111354, "grad_norm": 0.11376953125, "learning_rate": 0.0007348008344094566, "loss": 0.6457, "step": 16410 }, { "epoch": 0.8155359094069733, "grad_norm": 0.09912109375, "learning_rate": 0.0007347611006258072, "loss": 0.6589, "step": 16420 }, { "epoch": 0.8160325817025926, "grad_norm": 0.119140625, "learning_rate": 0.0007347213668421576, "loss": 0.6282, "step": 16430 }, { "epoch": 0.816529253998212, "grad_norm": 0.1328125, "learning_rate": 0.000734681633058508, "loss": 0.6914, "step": 16440 }, { "epoch": 0.8170259262938313, "grad_norm": 0.10302734375, "learning_rate": 0.0007346418992748585, "loss": 0.6693, "step": 16450 }, { "epoch": 0.8175225985894506, "grad_norm": 0.11376953125, "learning_rate": 0.000734602165491209, "loss": 0.6435, "step": 16460 }, { "epoch": 0.81801927088507, "grad_norm": 0.1142578125, "learning_rate": 0.0007345624317075594, "loss": 0.6676, "step": 16470 }, { "epoch": 0.8185159431806894, "grad_norm": 0.109375, "learning_rate": 0.0007345226979239099, "loss": 0.6642, "step": 16480 }, { "epoch": 0.8190126154763088, "grad_norm": 0.11083984375, "learning_rate": 0.0007344829641402603, "loss": 0.677, "step": 16490 }, { "epoch": 0.8195092877719281, "grad_norm": 0.123046875, "learning_rate": 0.0007344432303566107, "loss": 0.6594, "step": 16500 }, { "epoch": 0.8200059600675474, "grad_norm": 0.1162109375, "learning_rate": 0.0007344034965729611, "loss": 0.6555, "step": 16510 }, { "epoch": 0.8205026323631668, "grad_norm": 0.115234375, "learning_rate": 0.0007343637627893117, "loss": 0.6422, "step": 16520 }, { "epoch": 0.8209993046587861, "grad_norm": 0.11474609375, "learning_rate": 0.0007343240290056621, "loss": 0.6481, "step": 16530 }, { "epoch": 0.8214959769544055, "grad_norm": 0.10791015625, "learning_rate": 0.0007342842952220125, "loss": 0.678, "step": 16540 }, { "epoch": 0.8219926492500248, "grad_norm": 0.1044921875, "learning_rate": 0.000734244561438363, "loss": 0.638, "step": 16550 }, { "epoch": 0.8224893215456441, "grad_norm": 0.12060546875, "learning_rate": 0.0007342048276547134, "loss": 0.6303, "step": 16560 }, { "epoch": 0.8229859938412636, "grad_norm": 0.119140625, "learning_rate": 0.0007341650938710639, "loss": 0.6615, "step": 16570 }, { "epoch": 0.8234826661368829, "grad_norm": 0.1015625, "learning_rate": 0.0007341253600874144, "loss": 0.6814, "step": 16580 }, { "epoch": 0.8239793384325023, "grad_norm": 0.1064453125, "learning_rate": 0.0007340856263037648, "loss": 0.6632, "step": 16590 }, { "epoch": 0.8244760107281216, "grad_norm": 0.11474609375, "learning_rate": 0.0007340458925201152, "loss": 0.6387, "step": 16600 }, { "epoch": 0.8249726830237409, "grad_norm": 0.115234375, "learning_rate": 0.0007340061587364658, "loss": 0.6394, "step": 16610 }, { "epoch": 0.8254693553193603, "grad_norm": 0.11328125, "learning_rate": 0.0007339664249528162, "loss": 0.6571, "step": 16620 }, { "epoch": 0.8259660276149796, "grad_norm": 0.11328125, "learning_rate": 0.0007339266911691666, "loss": 0.662, "step": 16630 }, { "epoch": 0.826462699910599, "grad_norm": 0.109375, "learning_rate": 0.0007338869573855171, "loss": 0.6662, "step": 16640 }, { "epoch": 0.8269593722062183, "grad_norm": 0.11279296875, "learning_rate": 0.0007338472236018675, "loss": 0.6464, "step": 16650 }, { "epoch": 0.8274560445018377, "grad_norm": 0.1005859375, "learning_rate": 0.0007338074898182179, "loss": 0.6808, "step": 16660 }, { "epoch": 0.8279527167974571, "grad_norm": 0.10400390625, "learning_rate": 0.0007337677560345685, "loss": 0.6563, "step": 16670 }, { "epoch": 0.8284493890930764, "grad_norm": 0.1259765625, "learning_rate": 0.0007337280222509189, "loss": 0.6661, "step": 16680 }, { "epoch": 0.8289460613886958, "grad_norm": 0.11669921875, "learning_rate": 0.0007336882884672693, "loss": 0.6458, "step": 16690 }, { "epoch": 0.8294427336843151, "grad_norm": 0.10986328125, "learning_rate": 0.0007336485546836197, "loss": 0.6569, "step": 16700 }, { "epoch": 0.8299394059799344, "grad_norm": 0.109375, "learning_rate": 0.0007336088208999702, "loss": 0.6661, "step": 16710 }, { "epoch": 0.8304360782755538, "grad_norm": 0.1123046875, "learning_rate": 0.0007335690871163208, "loss": 0.6804, "step": 16720 }, { "epoch": 0.8309327505711731, "grad_norm": 0.11279296875, "learning_rate": 0.0007335293533326711, "loss": 0.6887, "step": 16730 }, { "epoch": 0.8314294228667924, "grad_norm": 0.1259765625, "learning_rate": 0.0007334896195490216, "loss": 0.6213, "step": 16740 }, { "epoch": 0.8319260951624119, "grad_norm": 0.107421875, "learning_rate": 0.0007334498857653721, "loss": 0.649, "step": 16750 }, { "epoch": 0.8324227674580312, "grad_norm": 0.1103515625, "learning_rate": 0.0007334101519817224, "loss": 0.6543, "step": 16760 }, { "epoch": 0.8329194397536506, "grad_norm": 0.1103515625, "learning_rate": 0.000733370418198073, "loss": 0.6634, "step": 16770 }, { "epoch": 0.8334161120492699, "grad_norm": 0.1025390625, "learning_rate": 0.0007333306844144234, "loss": 0.6675, "step": 16780 }, { "epoch": 0.8339127843448892, "grad_norm": 0.1064453125, "learning_rate": 0.0007332909506307738, "loss": 0.6521, "step": 16790 }, { "epoch": 0.8344094566405086, "grad_norm": 0.1298828125, "learning_rate": 0.0007332512168471243, "loss": 0.6378, "step": 16800 }, { "epoch": 0.8349061289361279, "grad_norm": 0.11181640625, "learning_rate": 0.0007332114830634747, "loss": 0.6496, "step": 16810 }, { "epoch": 0.8354028012317473, "grad_norm": 0.10009765625, "learning_rate": 0.0007331717492798252, "loss": 0.6638, "step": 16820 }, { "epoch": 0.8358994735273666, "grad_norm": 0.10546875, "learning_rate": 0.0007331320154961757, "loss": 0.6344, "step": 16830 }, { "epoch": 0.836396145822986, "grad_norm": 0.185546875, "learning_rate": 0.0007330922817125261, "loss": 0.6616, "step": 16840 }, { "epoch": 0.8368928181186054, "grad_norm": 0.11376953125, "learning_rate": 0.0007330525479288765, "loss": 0.6469, "step": 16850 }, { "epoch": 0.8373894904142247, "grad_norm": 0.107421875, "learning_rate": 0.000733012814145227, "loss": 0.6803, "step": 16860 }, { "epoch": 0.8378861627098441, "grad_norm": 0.12255859375, "learning_rate": 0.0007329730803615775, "loss": 0.6452, "step": 16870 }, { "epoch": 0.8383828350054634, "grad_norm": 0.09423828125, "learning_rate": 0.000732933346577928, "loss": 0.6699, "step": 16880 }, { "epoch": 0.8388795073010827, "grad_norm": 0.1015625, "learning_rate": 0.0007328936127942783, "loss": 0.6362, "step": 16890 }, { "epoch": 0.8393761795967021, "grad_norm": 0.107421875, "learning_rate": 0.0007328538790106288, "loss": 0.6622, "step": 16900 }, { "epoch": 0.8398728518923214, "grad_norm": 0.099609375, "learning_rate": 0.0007328141452269793, "loss": 0.6484, "step": 16910 }, { "epoch": 0.8403695241879408, "grad_norm": 0.15234375, "learning_rate": 0.0007327744114433297, "loss": 0.6497, "step": 16920 }, { "epoch": 0.8408661964835602, "grad_norm": 0.12890625, "learning_rate": 0.0007327346776596802, "loss": 0.6783, "step": 16930 }, { "epoch": 0.8413628687791795, "grad_norm": 0.1552734375, "learning_rate": 0.0007326949438760307, "loss": 0.6665, "step": 16940 }, { "epoch": 0.8418595410747989, "grad_norm": 0.1142578125, "learning_rate": 0.000732655210092381, "loss": 0.6902, "step": 16950 }, { "epoch": 0.8423562133704182, "grad_norm": 0.1220703125, "learning_rate": 0.0007326154763087315, "loss": 0.6522, "step": 16960 }, { "epoch": 0.8428528856660376, "grad_norm": 0.10986328125, "learning_rate": 0.000732575742525082, "loss": 0.6571, "step": 16970 }, { "epoch": 0.8433495579616569, "grad_norm": 0.1396484375, "learning_rate": 0.0007325360087414324, "loss": 0.657, "step": 16980 }, { "epoch": 0.8438462302572762, "grad_norm": 0.10595703125, "learning_rate": 0.0007324962749577829, "loss": 0.648, "step": 16990 }, { "epoch": 0.8443429025528956, "grad_norm": 0.11279296875, "learning_rate": 0.0007324565411741333, "loss": 0.6394, "step": 17000 }, { "epoch": 0.8448395748485149, "grad_norm": 0.119140625, "learning_rate": 0.0007324168073904837, "loss": 0.6504, "step": 17010 }, { "epoch": 0.8453362471441342, "grad_norm": 0.1181640625, "learning_rate": 0.0007323770736068343, "loss": 0.6483, "step": 17020 }, { "epoch": 0.8458329194397537, "grad_norm": 0.115234375, "learning_rate": 0.0007323373398231847, "loss": 0.6492, "step": 17030 }, { "epoch": 0.846329591735373, "grad_norm": 0.11669921875, "learning_rate": 0.0007322976060395352, "loss": 0.6662, "step": 17040 }, { "epoch": 0.8468262640309924, "grad_norm": 0.14453125, "learning_rate": 0.0007322578722558856, "loss": 0.6364, "step": 17050 }, { "epoch": 0.8473229363266117, "grad_norm": 0.10302734375, "learning_rate": 0.000732218138472236, "loss": 0.6674, "step": 17060 }, { "epoch": 0.847819608622231, "grad_norm": 0.109375, "learning_rate": 0.0007321784046885866, "loss": 0.6588, "step": 17070 }, { "epoch": 0.8483162809178504, "grad_norm": 0.11181640625, "learning_rate": 0.000732138670904937, "loss": 0.6432, "step": 17080 }, { "epoch": 0.8488129532134697, "grad_norm": 0.162109375, "learning_rate": 0.0007320989371212874, "loss": 0.6654, "step": 17090 }, { "epoch": 0.8493096255090891, "grad_norm": 0.177734375, "learning_rate": 0.0007320592033376379, "loss": 0.6632, "step": 17100 }, { "epoch": 0.8498062978047084, "grad_norm": 0.125, "learning_rate": 0.0007320194695539882, "loss": 0.6595, "step": 17110 }, { "epoch": 0.8503029701003278, "grad_norm": 0.1142578125, "learning_rate": 0.0007319797357703388, "loss": 0.6614, "step": 17120 }, { "epoch": 0.8507996423959472, "grad_norm": 0.1201171875, "learning_rate": 0.0007319400019866893, "loss": 0.6409, "step": 17130 }, { "epoch": 0.8512963146915665, "grad_norm": 0.103515625, "learning_rate": 0.0007319002682030396, "loss": 0.6245, "step": 17140 }, { "epoch": 0.8517929869871859, "grad_norm": 0.126953125, "learning_rate": 0.0007318605344193901, "loss": 0.6708, "step": 17150 }, { "epoch": 0.8522896592828052, "grad_norm": 0.1103515625, "learning_rate": 0.0007318208006357405, "loss": 0.6608, "step": 17160 }, { "epoch": 0.8527863315784245, "grad_norm": 0.1220703125, "learning_rate": 0.0007317810668520911, "loss": 0.6411, "step": 17170 }, { "epoch": 0.8532830038740439, "grad_norm": 0.140625, "learning_rate": 0.0007317413330684415, "loss": 0.6536, "step": 17180 }, { "epoch": 0.8537796761696632, "grad_norm": 0.11328125, "learning_rate": 0.0007317015992847919, "loss": 0.6427, "step": 17190 }, { "epoch": 0.8542763484652826, "grad_norm": 0.12451171875, "learning_rate": 0.0007316618655011424, "loss": 0.6913, "step": 17200 }, { "epoch": 0.854773020760902, "grad_norm": 0.10595703125, "learning_rate": 0.0007316221317174928, "loss": 0.6436, "step": 17210 }, { "epoch": 0.8552696930565213, "grad_norm": 0.1083984375, "learning_rate": 0.0007315823979338433, "loss": 0.6381, "step": 17220 }, { "epoch": 0.8557663653521407, "grad_norm": 0.1357421875, "learning_rate": 0.0007315426641501938, "loss": 0.66, "step": 17230 }, { "epoch": 0.85626303764776, "grad_norm": 0.14453125, "learning_rate": 0.0007315029303665442, "loss": 0.6569, "step": 17240 }, { "epoch": 0.8567597099433794, "grad_norm": 0.115234375, "learning_rate": 0.0007314631965828946, "loss": 0.6972, "step": 17250 }, { "epoch": 0.8572563822389987, "grad_norm": 0.126953125, "learning_rate": 0.0007314234627992451, "loss": 0.6668, "step": 17260 }, { "epoch": 0.857753054534618, "grad_norm": 0.1328125, "learning_rate": 0.0007313837290155956, "loss": 0.6675, "step": 17270 }, { "epoch": 0.8582497268302374, "grad_norm": 0.12255859375, "learning_rate": 0.000731343995231946, "loss": 0.6405, "step": 17280 }, { "epoch": 0.8587463991258567, "grad_norm": 0.09765625, "learning_rate": 0.0007313042614482965, "loss": 0.657, "step": 17290 }, { "epoch": 0.8592430714214762, "grad_norm": 0.11767578125, "learning_rate": 0.0007312645276646468, "loss": 0.6507, "step": 17300 }, { "epoch": 0.8597397437170955, "grad_norm": 0.1376953125, "learning_rate": 0.0007312247938809973, "loss": 0.6625, "step": 17310 }, { "epoch": 0.8602364160127148, "grad_norm": 0.109375, "learning_rate": 0.0007311850600973479, "loss": 0.6755, "step": 17320 }, { "epoch": 0.8607330883083342, "grad_norm": 0.103515625, "learning_rate": 0.0007311453263136983, "loss": 0.6671, "step": 17330 }, { "epoch": 0.8612297606039535, "grad_norm": 0.10009765625, "learning_rate": 0.0007311055925300487, "loss": 0.6828, "step": 17340 }, { "epoch": 0.8617264328995728, "grad_norm": 0.09130859375, "learning_rate": 0.0007310658587463992, "loss": 0.6169, "step": 17350 }, { "epoch": 0.8622231051951922, "grad_norm": 0.109375, "learning_rate": 0.0007310261249627496, "loss": 0.6387, "step": 17360 }, { "epoch": 0.8627197774908115, "grad_norm": 0.10791015625, "learning_rate": 0.0007309863911791001, "loss": 0.7025, "step": 17370 }, { "epoch": 0.8632164497864309, "grad_norm": 0.095703125, "learning_rate": 0.0007309466573954505, "loss": 0.6599, "step": 17380 }, { "epoch": 0.8637131220820503, "grad_norm": 0.1005859375, "learning_rate": 0.000730906923611801, "loss": 0.6288, "step": 17390 }, { "epoch": 0.8642097943776696, "grad_norm": 0.130859375, "learning_rate": 0.0007308671898281514, "loss": 0.6718, "step": 17400 }, { "epoch": 0.864706466673289, "grad_norm": 0.10498046875, "learning_rate": 0.0007308274560445018, "loss": 0.6607, "step": 17410 }, { "epoch": 0.8652031389689083, "grad_norm": 0.09912109375, "learning_rate": 0.0007307877222608524, "loss": 0.6593, "step": 17420 }, { "epoch": 0.8656998112645277, "grad_norm": 0.109375, "learning_rate": 0.0007307479884772028, "loss": 0.6401, "step": 17430 }, { "epoch": 0.866196483560147, "grad_norm": 0.10498046875, "learning_rate": 0.0007307082546935532, "loss": 0.6316, "step": 17440 }, { "epoch": 0.8666931558557663, "grad_norm": 0.0986328125, "learning_rate": 0.0007306685209099037, "loss": 0.6319, "step": 17450 }, { "epoch": 0.8671898281513857, "grad_norm": 0.11669921875, "learning_rate": 0.0007306287871262541, "loss": 0.6675, "step": 17460 }, { "epoch": 0.867686500447005, "grad_norm": 0.12109375, "learning_rate": 0.0007305890533426046, "loss": 0.6245, "step": 17470 }, { "epoch": 0.8681831727426245, "grad_norm": 0.1259765625, "learning_rate": 0.0007305493195589551, "loss": 0.6518, "step": 17480 }, { "epoch": 0.8686798450382438, "grad_norm": 0.1376953125, "learning_rate": 0.0007305095857753055, "loss": 0.6574, "step": 17490 }, { "epoch": 0.8691765173338631, "grad_norm": 0.10888671875, "learning_rate": 0.0007304698519916559, "loss": 0.6386, "step": 17500 }, { "epoch": 0.8696731896294825, "grad_norm": 0.1357421875, "learning_rate": 0.0007304301182080064, "loss": 0.6485, "step": 17510 }, { "epoch": 0.8701698619251018, "grad_norm": 0.11767578125, "learning_rate": 0.0007303903844243569, "loss": 0.653, "step": 17520 }, { "epoch": 0.8706665342207212, "grad_norm": 0.1162109375, "learning_rate": 0.0007303506506407073, "loss": 0.6777, "step": 17530 }, { "epoch": 0.8711632065163405, "grad_norm": 0.150390625, "learning_rate": 0.0007303109168570578, "loss": 0.6541, "step": 17540 }, { "epoch": 0.8716598788119598, "grad_norm": 0.12890625, "learning_rate": 0.0007302711830734082, "loss": 0.6746, "step": 17550 }, { "epoch": 0.8721565511075792, "grad_norm": 0.11474609375, "learning_rate": 0.0007302314492897586, "loss": 0.6732, "step": 17560 }, { "epoch": 0.8726532234031986, "grad_norm": 0.1171875, "learning_rate": 0.000730191715506109, "loss": 0.6948, "step": 17570 }, { "epoch": 0.873149895698818, "grad_norm": 0.11279296875, "learning_rate": 0.0007301519817224596, "loss": 0.6498, "step": 17580 }, { "epoch": 0.8736465679944373, "grad_norm": 0.115234375, "learning_rate": 0.00073011224793881, "loss": 0.6505, "step": 17590 }, { "epoch": 0.8741432402900566, "grad_norm": 0.099609375, "learning_rate": 0.0007300725141551604, "loss": 0.6503, "step": 17600 }, { "epoch": 0.874639912585676, "grad_norm": 0.1005859375, "learning_rate": 0.0007300327803715109, "loss": 0.6586, "step": 17610 }, { "epoch": 0.8751365848812953, "grad_norm": 0.138671875, "learning_rate": 0.0007299930465878615, "loss": 0.6732, "step": 17620 }, { "epoch": 0.8756332571769146, "grad_norm": 0.10693359375, "learning_rate": 0.0007299533128042118, "loss": 0.6564, "step": 17630 }, { "epoch": 0.876129929472534, "grad_norm": 0.1103515625, "learning_rate": 0.0007299135790205623, "loss": 0.6678, "step": 17640 }, { "epoch": 0.8766266017681533, "grad_norm": 0.10986328125, "learning_rate": 0.0007298738452369127, "loss": 0.6443, "step": 17650 }, { "epoch": 0.8771232740637728, "grad_norm": 0.1123046875, "learning_rate": 0.0007298341114532631, "loss": 0.643, "step": 17660 }, { "epoch": 0.8776199463593921, "grad_norm": 0.10400390625, "learning_rate": 0.0007297943776696137, "loss": 0.6385, "step": 17670 }, { "epoch": 0.8781166186550114, "grad_norm": 0.09423828125, "learning_rate": 0.0007297546438859641, "loss": 0.6425, "step": 17680 }, { "epoch": 0.8786132909506308, "grad_norm": 0.09716796875, "learning_rate": 0.0007297149101023145, "loss": 0.6334, "step": 17690 }, { "epoch": 0.8791099632462501, "grad_norm": 0.134765625, "learning_rate": 0.000729675176318665, "loss": 0.6628, "step": 17700 }, { "epoch": 0.8796066355418695, "grad_norm": 0.134765625, "learning_rate": 0.0007296354425350154, "loss": 0.674, "step": 17710 }, { "epoch": 0.8801033078374888, "grad_norm": 0.107421875, "learning_rate": 0.0007295957087513658, "loss": 0.6799, "step": 17720 }, { "epoch": 0.8805999801331081, "grad_norm": 0.11328125, "learning_rate": 0.0007295559749677164, "loss": 0.6566, "step": 17730 }, { "epoch": 0.8810966524287275, "grad_norm": 0.10107421875, "learning_rate": 0.0007295162411840668, "loss": 0.6363, "step": 17740 }, { "epoch": 0.8815933247243469, "grad_norm": 0.10595703125, "learning_rate": 0.0007294765074004172, "loss": 0.681, "step": 17750 }, { "epoch": 0.8820899970199663, "grad_norm": 0.1259765625, "learning_rate": 0.0007294367736167676, "loss": 0.6795, "step": 17760 }, { "epoch": 0.8825866693155856, "grad_norm": 0.10400390625, "learning_rate": 0.0007293970398331182, "loss": 0.668, "step": 17770 }, { "epoch": 0.8830833416112049, "grad_norm": 0.12060546875, "learning_rate": 0.0007293573060494687, "loss": 0.6346, "step": 17780 }, { "epoch": 0.8835800139068243, "grad_norm": 0.11328125, "learning_rate": 0.000729317572265819, "loss": 0.6488, "step": 17790 }, { "epoch": 0.8840766862024436, "grad_norm": 0.11279296875, "learning_rate": 0.0007292778384821695, "loss": 0.6297, "step": 17800 }, { "epoch": 0.884573358498063, "grad_norm": 0.099609375, "learning_rate": 0.00072923810469852, "loss": 0.6498, "step": 17810 }, { "epoch": 0.8850700307936823, "grad_norm": 0.1064453125, "learning_rate": 0.0007291983709148703, "loss": 0.6379, "step": 17820 }, { "epoch": 0.8855667030893016, "grad_norm": 0.12890625, "learning_rate": 0.0007291586371312209, "loss": 0.6485, "step": 17830 }, { "epoch": 0.8860633753849211, "grad_norm": 0.119140625, "learning_rate": 0.0007291189033475713, "loss": 0.6255, "step": 17840 }, { "epoch": 0.8865600476805404, "grad_norm": 0.11376953125, "learning_rate": 0.0007290791695639217, "loss": 0.6269, "step": 17850 }, { "epoch": 0.8870567199761598, "grad_norm": 0.1279296875, "learning_rate": 0.0007290394357802722, "loss": 0.658, "step": 17860 }, { "epoch": 0.8875533922717791, "grad_norm": 0.12060546875, "learning_rate": 0.0007289997019966226, "loss": 0.6499, "step": 17870 }, { "epoch": 0.8880500645673984, "grad_norm": 0.12060546875, "learning_rate": 0.0007289599682129731, "loss": 0.6215, "step": 17880 }, { "epoch": 0.8885467368630178, "grad_norm": 0.103515625, "learning_rate": 0.0007289202344293236, "loss": 0.638, "step": 17890 }, { "epoch": 0.8890434091586371, "grad_norm": 0.12109375, "learning_rate": 0.000728880500645674, "loss": 0.6508, "step": 17900 }, { "epoch": 0.8895400814542564, "grad_norm": 0.197265625, "learning_rate": 0.0007288407668620245, "loss": 0.6515, "step": 17910 }, { "epoch": 0.8900367537498758, "grad_norm": 0.12158203125, "learning_rate": 0.000728801033078375, "loss": 0.6798, "step": 17920 }, { "epoch": 0.8905334260454952, "grad_norm": 0.1220703125, "learning_rate": 0.0007287612992947254, "loss": 0.6558, "step": 17930 }, { "epoch": 0.8910300983411146, "grad_norm": 0.1005859375, "learning_rate": 0.0007287215655110759, "loss": 0.6627, "step": 17940 }, { "epoch": 0.8915267706367339, "grad_norm": 0.1357421875, "learning_rate": 0.0007286818317274263, "loss": 0.6434, "step": 17950 }, { "epoch": 0.8920234429323532, "grad_norm": 0.111328125, "learning_rate": 0.0007286420979437767, "loss": 0.636, "step": 17960 }, { "epoch": 0.8925201152279726, "grad_norm": 0.10888671875, "learning_rate": 0.0007286023641601273, "loss": 0.6488, "step": 17970 }, { "epoch": 0.8930167875235919, "grad_norm": 0.119140625, "learning_rate": 0.0007285626303764776, "loss": 0.6529, "step": 17980 }, { "epoch": 0.8935134598192113, "grad_norm": 0.12158203125, "learning_rate": 0.0007285228965928281, "loss": 0.6544, "step": 17990 }, { "epoch": 0.8940101321148306, "grad_norm": 0.111328125, "learning_rate": 0.0007284831628091786, "loss": 0.6757, "step": 18000 }, { "epoch": 0.8945068044104499, "grad_norm": 0.10009765625, "learning_rate": 0.0007284434290255289, "loss": 0.6446, "step": 18010 }, { "epoch": 0.8950034767060694, "grad_norm": 0.10546875, "learning_rate": 0.0007284036952418794, "loss": 0.6686, "step": 18020 }, { "epoch": 0.8955001490016887, "grad_norm": 0.146484375, "learning_rate": 0.0007283639614582299, "loss": 0.6257, "step": 18030 }, { "epoch": 0.8959968212973081, "grad_norm": 0.10888671875, "learning_rate": 0.0007283242276745803, "loss": 0.684, "step": 18040 }, { "epoch": 0.8964934935929274, "grad_norm": 0.1162109375, "learning_rate": 0.0007282844938909308, "loss": 0.6261, "step": 18050 }, { "epoch": 0.8969901658885467, "grad_norm": 0.123046875, "learning_rate": 0.0007282447601072812, "loss": 0.6511, "step": 18060 }, { "epoch": 0.8974868381841661, "grad_norm": 0.1416015625, "learning_rate": 0.0007282050263236317, "loss": 0.669, "step": 18070 }, { "epoch": 0.8979835104797854, "grad_norm": 0.11669921875, "learning_rate": 0.0007281652925399822, "loss": 0.6212, "step": 18080 }, { "epoch": 0.8984801827754048, "grad_norm": 0.1025390625, "learning_rate": 0.0007281255587563326, "loss": 0.6908, "step": 18090 }, { "epoch": 0.8989768550710241, "grad_norm": 0.10009765625, "learning_rate": 0.0007280858249726831, "loss": 0.6437, "step": 18100 }, { "epoch": 0.8994735273666435, "grad_norm": 0.203125, "learning_rate": 0.0007280460911890335, "loss": 0.6976, "step": 18110 }, { "epoch": 0.8999701996622629, "grad_norm": 0.119140625, "learning_rate": 0.0007280063574053839, "loss": 0.6431, "step": 18120 }, { "epoch": 0.9004668719578822, "grad_norm": 0.1474609375, "learning_rate": 0.0007279666236217345, "loss": 0.6197, "step": 18130 }, { "epoch": 0.9009635442535016, "grad_norm": 0.1259765625, "learning_rate": 0.0007279268898380849, "loss": 0.6198, "step": 18140 }, { "epoch": 0.9014602165491209, "grad_norm": 0.10595703125, "learning_rate": 0.0007278871560544353, "loss": 0.6532, "step": 18150 }, { "epoch": 0.9019568888447402, "grad_norm": 0.10400390625, "learning_rate": 0.0007278474222707858, "loss": 0.6735, "step": 18160 }, { "epoch": 0.9024535611403596, "grad_norm": 0.1044921875, "learning_rate": 0.0007278076884871361, "loss": 0.6289, "step": 18170 }, { "epoch": 0.9029502334359789, "grad_norm": 0.1064453125, "learning_rate": 0.0007277679547034867, "loss": 0.6466, "step": 18180 }, { "epoch": 0.9034469057315982, "grad_norm": 0.109375, "learning_rate": 0.0007277282209198372, "loss": 0.6393, "step": 18190 }, { "epoch": 0.9039435780272176, "grad_norm": 0.0986328125, "learning_rate": 0.0007276884871361875, "loss": 0.6556, "step": 18200 }, { "epoch": 0.904440250322837, "grad_norm": 0.1005859375, "learning_rate": 0.000727648753352538, "loss": 0.6643, "step": 18210 }, { "epoch": 0.9049369226184564, "grad_norm": 0.1513671875, "learning_rate": 0.0007276090195688885, "loss": 0.6401, "step": 18220 }, { "epoch": 0.9054335949140757, "grad_norm": 0.1455078125, "learning_rate": 0.000727569285785239, "loss": 0.6655, "step": 18230 }, { "epoch": 0.905930267209695, "grad_norm": 0.11328125, "learning_rate": 0.0007275295520015894, "loss": 0.6161, "step": 18240 }, { "epoch": 0.9064269395053144, "grad_norm": 0.10791015625, "learning_rate": 0.0007274898182179398, "loss": 0.6177, "step": 18250 }, { "epoch": 0.9069236118009337, "grad_norm": 0.1484375, "learning_rate": 0.0007274500844342903, "loss": 0.6307, "step": 18260 }, { "epoch": 0.9074202840965531, "grad_norm": 0.11181640625, "learning_rate": 0.0007274103506506407, "loss": 0.6363, "step": 18270 }, { "epoch": 0.9079169563921724, "grad_norm": 0.1533203125, "learning_rate": 0.0007273706168669912, "loss": 0.6561, "step": 18280 }, { "epoch": 0.9084136286877917, "grad_norm": 0.10498046875, "learning_rate": 0.0007273308830833417, "loss": 0.6347, "step": 18290 }, { "epoch": 0.9089103009834112, "grad_norm": 0.10400390625, "learning_rate": 0.0007272911492996921, "loss": 0.6362, "step": 18300 }, { "epoch": 0.9094069732790305, "grad_norm": 0.10546875, "learning_rate": 0.0007272514155160425, "loss": 0.6456, "step": 18310 }, { "epoch": 0.9099036455746499, "grad_norm": 0.1025390625, "learning_rate": 0.000727211681732393, "loss": 0.6526, "step": 18320 }, { "epoch": 0.9104003178702692, "grad_norm": 0.11962890625, "learning_rate": 0.0007271719479487435, "loss": 0.6516, "step": 18330 }, { "epoch": 0.9108969901658885, "grad_norm": 0.1005859375, "learning_rate": 0.0007271322141650939, "loss": 0.6862, "step": 18340 }, { "epoch": 0.9113936624615079, "grad_norm": 0.1044921875, "learning_rate": 0.0007270924803814444, "loss": 0.6343, "step": 18350 }, { "epoch": 0.9118903347571272, "grad_norm": 0.1015625, "learning_rate": 0.0007270527465977948, "loss": 0.6607, "step": 18360 }, { "epoch": 0.9123870070527466, "grad_norm": 0.10888671875, "learning_rate": 0.0007270130128141452, "loss": 0.6355, "step": 18370 }, { "epoch": 0.9128836793483659, "grad_norm": 0.10888671875, "learning_rate": 0.0007269732790304958, "loss": 0.6391, "step": 18380 }, { "epoch": 0.9133803516439853, "grad_norm": 0.10205078125, "learning_rate": 0.0007269335452468462, "loss": 0.638, "step": 18390 }, { "epoch": 0.9138770239396047, "grad_norm": 0.1123046875, "learning_rate": 0.0007268938114631966, "loss": 0.6353, "step": 18400 }, { "epoch": 0.914373696235224, "grad_norm": 0.10791015625, "learning_rate": 0.0007268540776795471, "loss": 0.6596, "step": 18410 }, { "epoch": 0.9148703685308434, "grad_norm": 0.115234375, "learning_rate": 0.0007268143438958975, "loss": 0.6434, "step": 18420 }, { "epoch": 0.9153670408264627, "grad_norm": 0.126953125, "learning_rate": 0.000726774610112248, "loss": 0.6595, "step": 18430 }, { "epoch": 0.915863713122082, "grad_norm": 0.11474609375, "learning_rate": 0.0007267348763285984, "loss": 0.6257, "step": 18440 }, { "epoch": 0.9163603854177014, "grad_norm": 0.11474609375, "learning_rate": 0.0007266951425449489, "loss": 0.6293, "step": 18450 }, { "epoch": 0.9168570577133207, "grad_norm": 0.10888671875, "learning_rate": 0.0007266554087612993, "loss": 0.6326, "step": 18460 }, { "epoch": 0.91735373000894, "grad_norm": 0.1259765625, "learning_rate": 0.0007266156749776497, "loss": 0.6738, "step": 18470 }, { "epoch": 0.9178504023045595, "grad_norm": 0.1015625, "learning_rate": 0.0007265759411940003, "loss": 0.64, "step": 18480 }, { "epoch": 0.9183470746001788, "grad_norm": 0.10791015625, "learning_rate": 0.0007265362074103507, "loss": 0.6349, "step": 18490 }, { "epoch": 0.9188437468957982, "grad_norm": 0.1142578125, "learning_rate": 0.0007264964736267011, "loss": 0.6403, "step": 18500 }, { "epoch": 0.9193404191914175, "grad_norm": 0.10107421875, "learning_rate": 0.0007264567398430516, "loss": 0.666, "step": 18510 }, { "epoch": 0.9198370914870369, "grad_norm": 0.11767578125, "learning_rate": 0.000726417006059402, "loss": 0.642, "step": 18520 }, { "epoch": 0.9203337637826562, "grad_norm": 0.10302734375, "learning_rate": 0.0007263772722757525, "loss": 0.6183, "step": 18530 }, { "epoch": 0.9208304360782755, "grad_norm": 0.0986328125, "learning_rate": 0.000726337538492103, "loss": 0.6488, "step": 18540 }, { "epoch": 0.9213271083738949, "grad_norm": 0.11572265625, "learning_rate": 0.0007262978047084534, "loss": 0.6425, "step": 18550 }, { "epoch": 0.9218237806695142, "grad_norm": 0.10888671875, "learning_rate": 0.0007262580709248038, "loss": 0.6517, "step": 18560 }, { "epoch": 0.9223204529651337, "grad_norm": 0.146484375, "learning_rate": 0.0007262183371411543, "loss": 0.6648, "step": 18570 }, { "epoch": 0.922817125260753, "grad_norm": 0.10595703125, "learning_rate": 0.0007261786033575048, "loss": 0.6293, "step": 18580 }, { "epoch": 0.9233137975563723, "grad_norm": 0.138671875, "learning_rate": 0.0007261388695738552, "loss": 0.6352, "step": 18590 }, { "epoch": 0.9238104698519917, "grad_norm": 0.103515625, "learning_rate": 0.0007260991357902057, "loss": 0.6556, "step": 18600 }, { "epoch": 0.924307142147611, "grad_norm": 0.09423828125, "learning_rate": 0.0007260594020065561, "loss": 0.6334, "step": 18610 }, { "epoch": 0.9248038144432303, "grad_norm": 0.1025390625, "learning_rate": 0.0007260196682229065, "loss": 0.6108, "step": 18620 }, { "epoch": 0.9253004867388497, "grad_norm": 0.10205078125, "learning_rate": 0.000725979934439257, "loss": 0.6333, "step": 18630 }, { "epoch": 0.925797159034469, "grad_norm": 0.11328125, "learning_rate": 0.0007259402006556075, "loss": 0.6463, "step": 18640 }, { "epoch": 0.9262938313300884, "grad_norm": 0.10400390625, "learning_rate": 0.0007259004668719579, "loss": 0.6333, "step": 18650 }, { "epoch": 0.9267905036257078, "grad_norm": 0.08837890625, "learning_rate": 0.0007258607330883083, "loss": 0.676, "step": 18660 }, { "epoch": 0.9272871759213271, "grad_norm": 0.10888671875, "learning_rate": 0.0007258209993046588, "loss": 0.6571, "step": 18670 }, { "epoch": 0.9277838482169465, "grad_norm": 0.11279296875, "learning_rate": 0.0007257812655210094, "loss": 0.6544, "step": 18680 }, { "epoch": 0.9282805205125658, "grad_norm": 0.10693359375, "learning_rate": 0.0007257415317373597, "loss": 0.6293, "step": 18690 }, { "epoch": 0.9287771928081852, "grad_norm": 0.1015625, "learning_rate": 0.0007257017979537102, "loss": 0.6482, "step": 18700 }, { "epoch": 0.9292738651038045, "grad_norm": 0.1220703125, "learning_rate": 0.0007256620641700606, "loss": 0.6875, "step": 18710 }, { "epoch": 0.9297705373994238, "grad_norm": 0.11181640625, "learning_rate": 0.000725622330386411, "loss": 0.6797, "step": 18720 }, { "epoch": 0.9302672096950432, "grad_norm": 0.09130859375, "learning_rate": 0.0007255825966027616, "loss": 0.6506, "step": 18730 }, { "epoch": 0.9307638819906625, "grad_norm": 0.11279296875, "learning_rate": 0.000725542862819112, "loss": 0.6089, "step": 18740 }, { "epoch": 0.931260554286282, "grad_norm": 0.1708984375, "learning_rate": 0.0007255031290354624, "loss": 0.6626, "step": 18750 }, { "epoch": 0.9317572265819013, "grad_norm": 0.10498046875, "learning_rate": 0.0007254633952518129, "loss": 0.6508, "step": 18760 }, { "epoch": 0.9322538988775206, "grad_norm": 0.11962890625, "learning_rate": 0.0007254236614681633, "loss": 0.6255, "step": 18770 }, { "epoch": 0.93275057117314, "grad_norm": 0.10205078125, "learning_rate": 0.0007253839276845138, "loss": 0.6643, "step": 18780 }, { "epoch": 0.9332472434687593, "grad_norm": 0.10498046875, "learning_rate": 0.0007253441939008643, "loss": 0.6427, "step": 18790 }, { "epoch": 0.9337439157643787, "grad_norm": 0.1416015625, "learning_rate": 0.0007253044601172147, "loss": 0.641, "step": 18800 }, { "epoch": 0.934240588059998, "grad_norm": 0.09912109375, "learning_rate": 0.0007252647263335652, "loss": 0.6462, "step": 18810 }, { "epoch": 0.9347372603556173, "grad_norm": 0.09765625, "learning_rate": 0.0007252249925499155, "loss": 0.6797, "step": 18820 }, { "epoch": 0.9352339326512367, "grad_norm": 0.12451171875, "learning_rate": 0.000725185258766266, "loss": 0.6372, "step": 18830 }, { "epoch": 0.9357306049468561, "grad_norm": 0.1005859375, "learning_rate": 0.0007251455249826166, "loss": 0.6683, "step": 18840 }, { "epoch": 0.9362272772424755, "grad_norm": 0.1162109375, "learning_rate": 0.0007251057911989669, "loss": 0.6337, "step": 18850 }, { "epoch": 0.9367239495380948, "grad_norm": 0.103515625, "learning_rate": 0.0007250660574153174, "loss": 0.6449, "step": 18860 }, { "epoch": 0.9372206218337141, "grad_norm": 0.10791015625, "learning_rate": 0.0007250263236316679, "loss": 0.6515, "step": 18870 }, { "epoch": 0.9377172941293335, "grad_norm": 0.1044921875, "learning_rate": 0.0007249865898480182, "loss": 0.6232, "step": 18880 }, { "epoch": 0.9382139664249528, "grad_norm": 0.11328125, "learning_rate": 0.0007249468560643688, "loss": 0.6598, "step": 18890 }, { "epoch": 0.9387106387205721, "grad_norm": 0.10888671875, "learning_rate": 0.0007249071222807192, "loss": 0.6842, "step": 18900 }, { "epoch": 0.9392073110161915, "grad_norm": 0.111328125, "learning_rate": 0.0007248673884970696, "loss": 0.6436, "step": 18910 }, { "epoch": 0.9397039833118108, "grad_norm": 0.12353515625, "learning_rate": 0.0007248276547134201, "loss": 0.6466, "step": 18920 }, { "epoch": 0.9402006556074303, "grad_norm": 0.10693359375, "learning_rate": 0.0007247879209297706, "loss": 0.6312, "step": 18930 }, { "epoch": 0.9406973279030496, "grad_norm": 0.10302734375, "learning_rate": 0.000724748187146121, "loss": 0.676, "step": 18940 }, { "epoch": 0.941194000198669, "grad_norm": 0.1044921875, "learning_rate": 0.0007247084533624715, "loss": 0.6399, "step": 18950 }, { "epoch": 0.9416906724942883, "grad_norm": 0.10546875, "learning_rate": 0.0007246687195788219, "loss": 0.6462, "step": 18960 }, { "epoch": 0.9421873447899076, "grad_norm": 0.1328125, "learning_rate": 0.0007246289857951724, "loss": 0.6322, "step": 18970 }, { "epoch": 0.942684017085527, "grad_norm": 0.095703125, "learning_rate": 0.0007245892520115229, "loss": 0.6487, "step": 18980 }, { "epoch": 0.9431806893811463, "grad_norm": 0.10400390625, "learning_rate": 0.0007245495182278733, "loss": 0.6321, "step": 18990 }, { "epoch": 0.9436773616767656, "grad_norm": 0.1220703125, "learning_rate": 0.0007245097844442238, "loss": 0.6716, "step": 19000 }, { "epoch": 0.944174033972385, "grad_norm": 0.1005859375, "learning_rate": 0.0007244700506605742, "loss": 0.6537, "step": 19010 }, { "epoch": 0.9446707062680044, "grad_norm": 0.1171875, "learning_rate": 0.0007244303168769246, "loss": 0.6177, "step": 19020 }, { "epoch": 0.9451673785636238, "grad_norm": 0.146484375, "learning_rate": 0.0007243905830932752, "loss": 0.6269, "step": 19030 }, { "epoch": 0.9456640508592431, "grad_norm": 0.10009765625, "learning_rate": 0.0007243508493096255, "loss": 0.6405, "step": 19040 }, { "epoch": 0.9461607231548624, "grad_norm": 0.10791015625, "learning_rate": 0.000724311115525976, "loss": 0.6169, "step": 19050 }, { "epoch": 0.9466573954504818, "grad_norm": 0.11474609375, "learning_rate": 0.0007242713817423265, "loss": 0.6676, "step": 19060 }, { "epoch": 0.9471540677461011, "grad_norm": 0.09619140625, "learning_rate": 0.0007242316479586768, "loss": 0.6352, "step": 19070 }, { "epoch": 0.9476507400417205, "grad_norm": 0.11474609375, "learning_rate": 0.0007241919141750274, "loss": 0.6393, "step": 19080 }, { "epoch": 0.9481474123373398, "grad_norm": 0.09423828125, "learning_rate": 0.0007241521803913778, "loss": 0.6591, "step": 19090 }, { "epoch": 0.9486440846329591, "grad_norm": 0.10498046875, "learning_rate": 0.0007241124466077282, "loss": 0.6649, "step": 19100 }, { "epoch": 0.9491407569285786, "grad_norm": 0.123046875, "learning_rate": 0.0007240727128240787, "loss": 0.6774, "step": 19110 }, { "epoch": 0.9496374292241979, "grad_norm": 0.09619140625, "learning_rate": 0.0007240329790404291, "loss": 0.6421, "step": 19120 }, { "epoch": 0.9501341015198173, "grad_norm": 0.1630859375, "learning_rate": 0.0007239932452567797, "loss": 0.6432, "step": 19130 }, { "epoch": 0.9506307738154366, "grad_norm": 0.150390625, "learning_rate": 0.0007239535114731301, "loss": 0.6313, "step": 19140 }, { "epoch": 0.9511274461110559, "grad_norm": 0.11181640625, "learning_rate": 0.0007239137776894805, "loss": 0.6938, "step": 19150 }, { "epoch": 0.9516241184066753, "grad_norm": 0.1572265625, "learning_rate": 0.000723874043905831, "loss": 0.6474, "step": 19160 }, { "epoch": 0.9521207907022946, "grad_norm": 0.125, "learning_rate": 0.0007238343101221814, "loss": 0.66, "step": 19170 }, { "epoch": 0.9526174629979139, "grad_norm": 0.1357421875, "learning_rate": 0.0007237945763385318, "loss": 0.6707, "step": 19180 }, { "epoch": 0.9531141352935333, "grad_norm": 0.12353515625, "learning_rate": 0.0007237548425548824, "loss": 0.6322, "step": 19190 }, { "epoch": 0.9536108075891527, "grad_norm": 0.09130859375, "learning_rate": 0.0007237151087712328, "loss": 0.6423, "step": 19200 }, { "epoch": 0.9541074798847721, "grad_norm": 0.11962890625, "learning_rate": 0.0007236753749875832, "loss": 0.6438, "step": 19210 }, { "epoch": 0.9546041521803914, "grad_norm": 0.154296875, "learning_rate": 0.0007236356412039337, "loss": 0.65, "step": 19220 }, { "epoch": 0.9551008244760107, "grad_norm": 0.099609375, "learning_rate": 0.000723595907420284, "loss": 0.6024, "step": 19230 }, { "epoch": 0.9555974967716301, "grad_norm": 0.11279296875, "learning_rate": 0.0007235561736366346, "loss": 0.6478, "step": 19240 }, { "epoch": 0.9560941690672494, "grad_norm": 0.107421875, "learning_rate": 0.0007235164398529851, "loss": 0.6551, "step": 19250 }, { "epoch": 0.9565908413628688, "grad_norm": 0.10498046875, "learning_rate": 0.0007234767060693355, "loss": 0.6244, "step": 19260 }, { "epoch": 0.9570875136584881, "grad_norm": 0.10498046875, "learning_rate": 0.0007234369722856859, "loss": 0.6126, "step": 19270 }, { "epoch": 0.9575841859541074, "grad_norm": 0.10791015625, "learning_rate": 0.0007233972385020365, "loss": 0.6673, "step": 19280 }, { "epoch": 0.9580808582497269, "grad_norm": 0.10302734375, "learning_rate": 0.0007233575047183869, "loss": 0.6338, "step": 19290 }, { "epoch": 0.9585775305453462, "grad_norm": 0.140625, "learning_rate": 0.0007233177709347373, "loss": 0.6413, "step": 19300 }, { "epoch": 0.9590742028409656, "grad_norm": 0.1015625, "learning_rate": 0.0007232780371510877, "loss": 0.6325, "step": 19310 }, { "epoch": 0.9595708751365849, "grad_norm": 0.1025390625, "learning_rate": 0.0007232383033674382, "loss": 0.6368, "step": 19320 }, { "epoch": 0.9600675474322042, "grad_norm": 0.1015625, "learning_rate": 0.0007231985695837886, "loss": 0.6868, "step": 19330 }, { "epoch": 0.9605642197278236, "grad_norm": 0.11376953125, "learning_rate": 0.0007231588358001391, "loss": 0.6554, "step": 19340 }, { "epoch": 0.9610608920234429, "grad_norm": 0.109375, "learning_rate": 0.0007231191020164896, "loss": 0.6511, "step": 19350 }, { "epoch": 0.9615575643190623, "grad_norm": 0.1015625, "learning_rate": 0.00072307936823284, "loss": 0.601, "step": 19360 }, { "epoch": 0.9620542366146816, "grad_norm": 0.0986328125, "learning_rate": 0.0007230396344491904, "loss": 0.6424, "step": 19370 }, { "epoch": 0.9625509089103009, "grad_norm": 0.126953125, "learning_rate": 0.000722999900665541, "loss": 0.6213, "step": 19380 }, { "epoch": 0.9630475812059204, "grad_norm": 0.10205078125, "learning_rate": 0.0007229601668818914, "loss": 0.6322, "step": 19390 }, { "epoch": 0.9635442535015397, "grad_norm": 0.10888671875, "learning_rate": 0.0007229204330982418, "loss": 0.639, "step": 19400 }, { "epoch": 0.964040925797159, "grad_norm": 0.09814453125, "learning_rate": 0.0007228806993145923, "loss": 0.6287, "step": 19410 }, { "epoch": 0.9645375980927784, "grad_norm": 0.12060546875, "learning_rate": 0.0007228409655309427, "loss": 0.6506, "step": 19420 }, { "epoch": 0.9650342703883977, "grad_norm": 0.11083984375, "learning_rate": 0.0007228012317472931, "loss": 0.6512, "step": 19430 }, { "epoch": 0.9655309426840171, "grad_norm": 0.1103515625, "learning_rate": 0.0007227614979636437, "loss": 0.6371, "step": 19440 }, { "epoch": 0.9660276149796364, "grad_norm": 0.1240234375, "learning_rate": 0.0007227217641799941, "loss": 0.6314, "step": 19450 }, { "epoch": 0.9665242872752557, "grad_norm": 0.125, "learning_rate": 0.0007226820303963445, "loss": 0.633, "step": 19460 }, { "epoch": 0.9670209595708751, "grad_norm": 0.09716796875, "learning_rate": 0.000722642296612695, "loss": 0.6049, "step": 19470 }, { "epoch": 0.9675176318664945, "grad_norm": 0.123046875, "learning_rate": 0.0007226025628290454, "loss": 0.6461, "step": 19480 }, { "epoch": 0.9680143041621139, "grad_norm": 0.11572265625, "learning_rate": 0.0007225628290453959, "loss": 0.6392, "step": 19490 }, { "epoch": 0.9685109764577332, "grad_norm": 0.109375, "learning_rate": 0.0007225230952617463, "loss": 0.6055, "step": 19500 }, { "epoch": 0.9690076487533525, "grad_norm": 0.11669921875, "learning_rate": 0.0007224833614780968, "loss": 0.6556, "step": 19510 }, { "epoch": 0.9695043210489719, "grad_norm": 0.1005859375, "learning_rate": 0.0007224436276944472, "loss": 0.6339, "step": 19520 }, { "epoch": 0.9700009933445912, "grad_norm": 0.10400390625, "learning_rate": 0.0007224038939107976, "loss": 0.655, "step": 19530 }, { "epoch": 0.9704976656402106, "grad_norm": 0.1708984375, "learning_rate": 0.0007223641601271482, "loss": 0.6511, "step": 19540 }, { "epoch": 0.9709943379358299, "grad_norm": 0.1064453125, "learning_rate": 0.0007223244263434986, "loss": 0.6243, "step": 19550 }, { "epoch": 0.9714910102314492, "grad_norm": 0.10302734375, "learning_rate": 0.000722284692559849, "loss": 0.6578, "step": 19560 }, { "epoch": 0.9719876825270687, "grad_norm": 0.103515625, "learning_rate": 0.0007222449587761995, "loss": 0.6603, "step": 19570 }, { "epoch": 0.972484354822688, "grad_norm": 0.1171875, "learning_rate": 0.0007222052249925499, "loss": 0.6494, "step": 19580 }, { "epoch": 0.9729810271183074, "grad_norm": 0.11669921875, "learning_rate": 0.0007221654912089004, "loss": 0.6439, "step": 19590 }, { "epoch": 0.9734776994139267, "grad_norm": 0.10302734375, "learning_rate": 0.0007221257574252509, "loss": 0.6378, "step": 19600 }, { "epoch": 0.973974371709546, "grad_norm": 0.1025390625, "learning_rate": 0.0007220860236416013, "loss": 0.6525, "step": 19610 }, { "epoch": 0.9744710440051654, "grad_norm": 0.1318359375, "learning_rate": 0.0007220462898579517, "loss": 0.6577, "step": 19620 }, { "epoch": 0.9749677163007847, "grad_norm": 0.11083984375, "learning_rate": 0.0007220065560743022, "loss": 0.6399, "step": 19630 }, { "epoch": 0.975464388596404, "grad_norm": 0.12890625, "learning_rate": 0.0007219668222906527, "loss": 0.6335, "step": 19640 }, { "epoch": 0.9759610608920234, "grad_norm": 0.1044921875, "learning_rate": 0.0007219270885070031, "loss": 0.6269, "step": 19650 }, { "epoch": 0.9764577331876428, "grad_norm": 0.11669921875, "learning_rate": 0.0007218873547233536, "loss": 0.653, "step": 19660 }, { "epoch": 0.9769544054832622, "grad_norm": 0.107421875, "learning_rate": 0.000721847620939704, "loss": 0.6416, "step": 19670 }, { "epoch": 0.9774510777788815, "grad_norm": 0.1083984375, "learning_rate": 0.0007218078871560544, "loss": 0.6566, "step": 19680 }, { "epoch": 0.9779477500745009, "grad_norm": 0.1416015625, "learning_rate": 0.0007217681533724049, "loss": 0.6215, "step": 19690 }, { "epoch": 0.9784444223701202, "grad_norm": 0.11865234375, "learning_rate": 0.0007217284195887554, "loss": 0.6143, "step": 19700 }, { "epoch": 0.9789410946657395, "grad_norm": 0.11181640625, "learning_rate": 0.0007216886858051059, "loss": 0.6546, "step": 19710 }, { "epoch": 0.9794377669613589, "grad_norm": 0.1064453125, "learning_rate": 0.0007216489520214562, "loss": 0.6309, "step": 19720 }, { "epoch": 0.9799344392569782, "grad_norm": 0.1181640625, "learning_rate": 0.0007216092182378067, "loss": 0.6613, "step": 19730 }, { "epoch": 0.9804311115525975, "grad_norm": 0.11767578125, "learning_rate": 0.0007215694844541573, "loss": 0.634, "step": 19740 }, { "epoch": 0.980927783848217, "grad_norm": 0.1240234375, "learning_rate": 0.0007215297506705076, "loss": 0.6508, "step": 19750 }, { "epoch": 0.9814244561438363, "grad_norm": 0.12060546875, "learning_rate": 0.0007214900168868581, "loss": 0.6455, "step": 19760 }, { "epoch": 0.9819211284394557, "grad_norm": 0.1162109375, "learning_rate": 0.0007214502831032085, "loss": 0.6486, "step": 19770 }, { "epoch": 0.982417800735075, "grad_norm": 0.12060546875, "learning_rate": 0.0007214105493195589, "loss": 0.6511, "step": 19780 }, { "epoch": 0.9829144730306943, "grad_norm": 0.10498046875, "learning_rate": 0.0007213708155359095, "loss": 0.6744, "step": 19790 }, { "epoch": 0.9834111453263137, "grad_norm": 0.10888671875, "learning_rate": 0.0007213310817522599, "loss": 0.6283, "step": 19800 }, { "epoch": 0.983907817621933, "grad_norm": 0.15625, "learning_rate": 0.0007212913479686103, "loss": 0.666, "step": 19810 }, { "epoch": 0.9844044899175524, "grad_norm": 0.12890625, "learning_rate": 0.0007212516141849608, "loss": 0.6695, "step": 19820 }, { "epoch": 0.9849011622131717, "grad_norm": 0.109375, "learning_rate": 0.0007212118804013112, "loss": 0.6422, "step": 19830 }, { "epoch": 0.9853978345087911, "grad_norm": 0.1083984375, "learning_rate": 0.0007211721466176617, "loss": 0.6549, "step": 19840 }, { "epoch": 0.9858945068044105, "grad_norm": 0.1025390625, "learning_rate": 0.0007211324128340122, "loss": 0.6556, "step": 19850 }, { "epoch": 0.9863911791000298, "grad_norm": 0.1171875, "learning_rate": 0.0007210926790503626, "loss": 0.645, "step": 19860 }, { "epoch": 0.9868878513956492, "grad_norm": 0.130859375, "learning_rate": 0.0007210529452667131, "loss": 0.621, "step": 19870 }, { "epoch": 0.9873845236912685, "grad_norm": 0.09716796875, "learning_rate": 0.0007210132114830635, "loss": 0.6638, "step": 19880 }, { "epoch": 0.9878811959868878, "grad_norm": 0.130859375, "learning_rate": 0.000720973477699414, "loss": 0.6561, "step": 19890 }, { "epoch": 0.9883778682825072, "grad_norm": 0.10400390625, "learning_rate": 0.0007209337439157645, "loss": 0.6335, "step": 19900 }, { "epoch": 0.9888745405781265, "grad_norm": 0.0986328125, "learning_rate": 0.0007208940101321148, "loss": 0.6598, "step": 19910 }, { "epoch": 0.9893712128737459, "grad_norm": 0.11962890625, "learning_rate": 0.0007208542763484653, "loss": 0.6381, "step": 19920 }, { "epoch": 0.9898678851693653, "grad_norm": 0.1396484375, "learning_rate": 0.0007208145425648158, "loss": 0.6276, "step": 19930 }, { "epoch": 0.9903645574649846, "grad_norm": 0.1171875, "learning_rate": 0.0007207748087811662, "loss": 0.6665, "step": 19940 }, { "epoch": 0.990861229760604, "grad_norm": 0.1064453125, "learning_rate": 0.0007207350749975167, "loss": 0.6354, "step": 19950 }, { "epoch": 0.9913579020562233, "grad_norm": 0.10888671875, "learning_rate": 0.0007206953412138671, "loss": 0.6357, "step": 19960 }, { "epoch": 0.9918545743518427, "grad_norm": 0.1064453125, "learning_rate": 0.0007206556074302175, "loss": 0.6177, "step": 19970 }, { "epoch": 0.992351246647462, "grad_norm": 0.107421875, "learning_rate": 0.000720615873646568, "loss": 0.6346, "step": 19980 }, { "epoch": 0.9928479189430813, "grad_norm": 0.12255859375, "learning_rate": 0.0007205761398629185, "loss": 0.6313, "step": 19990 }, { "epoch": 0.9933445912387007, "grad_norm": 0.10986328125, "learning_rate": 0.0007205364060792689, "loss": 0.6217, "step": 20000 }, { "epoch": 0.99384126353432, "grad_norm": 0.134765625, "learning_rate": 0.0007204966722956194, "loss": 0.6423, "step": 20010 }, { "epoch": 0.9943379358299395, "grad_norm": 0.10205078125, "learning_rate": 0.0007204569385119698, "loss": 0.6366, "step": 20020 }, { "epoch": 0.9948346081255588, "grad_norm": 0.1376953125, "learning_rate": 0.0007204172047283203, "loss": 0.6465, "step": 20030 }, { "epoch": 0.9953312804211781, "grad_norm": 0.09912109375, "learning_rate": 0.0007203774709446708, "loss": 0.6485, "step": 20040 }, { "epoch": 0.9958279527167975, "grad_norm": 0.09619140625, "learning_rate": 0.0007203377371610212, "loss": 0.6753, "step": 20050 }, { "epoch": 0.9963246250124168, "grad_norm": 0.09326171875, "learning_rate": 0.0007202980033773717, "loss": 0.6464, "step": 20060 }, { "epoch": 0.9968212973080361, "grad_norm": 0.1103515625, "learning_rate": 0.0007202582695937221, "loss": 0.6352, "step": 20070 }, { "epoch": 0.9973179696036555, "grad_norm": 0.125, "learning_rate": 0.0007202185358100725, "loss": 0.6392, "step": 20080 }, { "epoch": 0.9978146418992748, "grad_norm": 0.095703125, "learning_rate": 0.0007201788020264231, "loss": 0.6418, "step": 20090 }, { "epoch": 0.9983113141948942, "grad_norm": 0.095703125, "learning_rate": 0.0007201390682427734, "loss": 0.6289, "step": 20100 }, { "epoch": 0.9988079864905136, "grad_norm": 0.09814453125, "learning_rate": 0.0007200993344591239, "loss": 0.6458, "step": 20110 }, { "epoch": 0.999304658786133, "grad_norm": 0.109375, "learning_rate": 0.0007200596006754744, "loss": 0.6511, "step": 20120 }, { "epoch": 0.9998013310817523, "grad_norm": 0.11328125, "learning_rate": 0.0007200198668918247, "loss": 0.6132, "step": 20130 } ], "logging_steps": 10, "max_steps": 201340, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6346195493598003e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }