{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9341374700054297, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023353436750135742, "grad_norm": 8.6875, "learning_rate": 3.8910505836575877e-07, "loss": 7.0785, "step": 10 }, { "epoch": 0.00046706873500271484, "grad_norm": 9.625, "learning_rate": 7.782101167315175e-07, "loss": 7.161, "step": 20 }, { "epoch": 0.0007006031025040722, "grad_norm": 8.0625, "learning_rate": 1.1673151750972764e-06, "loss": 7.0894, "step": 30 }, { "epoch": 0.0009341374700054297, "grad_norm": 6.8125, "learning_rate": 1.556420233463035e-06, "loss": 7.1359, "step": 40 }, { "epoch": 0.0011676718375067871, "grad_norm": 7.71875, "learning_rate": 1.945525291828794e-06, "loss": 7.1046, "step": 50 }, { "epoch": 0.0014012062050081445, "grad_norm": 9.125, "learning_rate": 2.3346303501945527e-06, "loss": 7.1506, "step": 60 }, { "epoch": 0.001634740572509502, "grad_norm": 7.40625, "learning_rate": 2.7237354085603114e-06, "loss": 7.1303, "step": 70 }, { "epoch": 0.0018682749400108594, "grad_norm": 6.03125, "learning_rate": 3.11284046692607e-06, "loss": 7.0496, "step": 80 }, { "epoch": 0.0021018093075122167, "grad_norm": 5.59375, "learning_rate": 3.501945525291829e-06, "loss": 7.1325, "step": 90 }, { "epoch": 0.0023353436750135743, "grad_norm": 5.46875, "learning_rate": 3.891050583657588e-06, "loss": 7.0251, "step": 100 }, { "epoch": 0.002568878042514932, "grad_norm": 5.75, "learning_rate": 4.280155642023347e-06, "loss": 7.0905, "step": 110 }, { "epoch": 0.002802412410016289, "grad_norm": 4.84375, "learning_rate": 4.669260700389105e-06, "loss": 7.0434, "step": 120 }, { "epoch": 0.0030359467775176465, "grad_norm": 4.53125, "learning_rate": 5.058365758754864e-06, "loss": 7.0124, "step": 130 }, { "epoch": 0.003269481145019004, "grad_norm": 4.6875, "learning_rate": 5.447470817120623e-06, "loss": 7.0268, "step": 140 }, { "epoch": 0.003503015512520361, "grad_norm": 3.765625, "learning_rate": 5.8365758754863816e-06, "loss": 6.983, "step": 150 }, { "epoch": 0.0037365498800217187, "grad_norm": 4.6875, "learning_rate": 6.22568093385214e-06, "loss": 6.971, "step": 160 }, { "epoch": 0.003970084247523076, "grad_norm": 3.75, "learning_rate": 6.614785992217899e-06, "loss": 6.9719, "step": 170 }, { "epoch": 0.004203618615024433, "grad_norm": 4.5, "learning_rate": 7.003891050583658e-06, "loss": 7.0427, "step": 180 }, { "epoch": 0.0044371529825257905, "grad_norm": 5.28125, "learning_rate": 7.392996108949416e-06, "loss": 7.0273, "step": 190 }, { "epoch": 0.0046706873500271485, "grad_norm": 4.375, "learning_rate": 7.782101167315176e-06, "loss": 6.943, "step": 200 }, { "epoch": 0.004904221717528506, "grad_norm": 3.59375, "learning_rate": 8.171206225680935e-06, "loss": 6.9912, "step": 210 }, { "epoch": 0.005137756085029864, "grad_norm": 4.125, "learning_rate": 8.560311284046693e-06, "loss": 7.0319, "step": 220 }, { "epoch": 0.005371290452531221, "grad_norm": 3.65625, "learning_rate": 8.949416342412452e-06, "loss": 7.0107, "step": 230 }, { "epoch": 0.005604824820032578, "grad_norm": 4.6875, "learning_rate": 9.33852140077821e-06, "loss": 7.0195, "step": 240 }, { "epoch": 0.005838359187533936, "grad_norm": 3.9375, "learning_rate": 9.72762645914397e-06, "loss": 6.9619, "step": 250 }, { "epoch": 0.006071893555035293, "grad_norm": 3.859375, "learning_rate": 1.0116731517509728e-05, "loss": 7.0111, "step": 260 }, { "epoch": 0.00630542792253665, "grad_norm": 5.0625, "learning_rate": 1.0505836575875487e-05, "loss": 6.9581, "step": 270 }, { "epoch": 0.006538962290038008, "grad_norm": 4.5625, "learning_rate": 1.0894941634241246e-05, "loss": 6.9873, "step": 280 }, { "epoch": 0.006772496657539365, "grad_norm": 4.375, "learning_rate": 1.1284046692607004e-05, "loss": 6.9152, "step": 290 }, { "epoch": 0.007006031025040722, "grad_norm": 4.0625, "learning_rate": 1.1673151750972763e-05, "loss": 6.9381, "step": 300 }, { "epoch": 0.00723956539254208, "grad_norm": 3.90625, "learning_rate": 1.2062256809338522e-05, "loss": 6.9842, "step": 310 }, { "epoch": 0.0074730997600434374, "grad_norm": 4.5, "learning_rate": 1.245136186770428e-05, "loss": 6.9774, "step": 320 }, { "epoch": 0.007706634127544795, "grad_norm": 3.9375, "learning_rate": 1.2840466926070038e-05, "loss": 6.9472, "step": 330 }, { "epoch": 0.007940168495046153, "grad_norm": 3.984375, "learning_rate": 1.3229571984435798e-05, "loss": 6.9117, "step": 340 }, { "epoch": 0.008173702862547509, "grad_norm": 4.3125, "learning_rate": 1.3618677042801557e-05, "loss": 6.9609, "step": 350 }, { "epoch": 0.008407237230048867, "grad_norm": 3.671875, "learning_rate": 1.4007782101167315e-05, "loss": 7.0054, "step": 360 }, { "epoch": 0.008640771597550225, "grad_norm": 4.625, "learning_rate": 1.4396887159533074e-05, "loss": 6.9233, "step": 370 }, { "epoch": 0.008874305965051581, "grad_norm": 4.53125, "learning_rate": 1.4785992217898833e-05, "loss": 6.976, "step": 380 }, { "epoch": 0.009107840332552939, "grad_norm": 3.640625, "learning_rate": 1.5175097276264592e-05, "loss": 6.9388, "step": 390 }, { "epoch": 0.009341374700054297, "grad_norm": 3.890625, "learning_rate": 1.5564202334630352e-05, "loss": 6.9473, "step": 400 }, { "epoch": 0.009574909067555655, "grad_norm": 4.875, "learning_rate": 1.595330739299611e-05, "loss": 6.9005, "step": 410 }, { "epoch": 0.009808443435057011, "grad_norm": 3.984375, "learning_rate": 1.634241245136187e-05, "loss": 6.9449, "step": 420 }, { "epoch": 0.01004197780255837, "grad_norm": 3.953125, "learning_rate": 1.6731517509727626e-05, "loss": 6.9816, "step": 430 }, { "epoch": 0.010275512170059727, "grad_norm": 4.4375, "learning_rate": 1.7120622568093387e-05, "loss": 7.0071, "step": 440 }, { "epoch": 0.010509046537561084, "grad_norm": 4.65625, "learning_rate": 1.7509727626459144e-05, "loss": 6.9804, "step": 450 }, { "epoch": 0.010742580905062441, "grad_norm": 4.09375, "learning_rate": 1.7898832684824904e-05, "loss": 6.974, "step": 460 }, { "epoch": 0.0109761152725638, "grad_norm": 4.25, "learning_rate": 1.828793774319066e-05, "loss": 6.9413, "step": 470 }, { "epoch": 0.011209649640065156, "grad_norm": 4.375, "learning_rate": 1.867704280155642e-05, "loss": 6.9143, "step": 480 }, { "epoch": 0.011443184007566514, "grad_norm": 4.03125, "learning_rate": 1.906614785992218e-05, "loss": 6.9795, "step": 490 }, { "epoch": 0.011676718375067872, "grad_norm": 4.0625, "learning_rate": 1.945525291828794e-05, "loss": 6.9466, "step": 500 }, { "epoch": 0.011676718375067872, "eval_loss": 6.972438335418701, "eval_runtime": 79.3421, "eval_samples_per_second": 12.604, "eval_steps_per_second": 12.604, "step": 500 }, { "epoch": 0.011910252742569228, "grad_norm": 3.6875, "learning_rate": 1.9844357976653696e-05, "loss": 6.8699, "step": 510 }, { "epoch": 0.012143787110070586, "grad_norm": 4.25, "learning_rate": 2.0233463035019457e-05, "loss": 6.9017, "step": 520 }, { "epoch": 0.012377321477571944, "grad_norm": 4.65625, "learning_rate": 2.0622568093385214e-05, "loss": 6.9472, "step": 530 }, { "epoch": 0.0126108558450733, "grad_norm": 4.6875, "learning_rate": 2.1011673151750974e-05, "loss": 6.9198, "step": 540 }, { "epoch": 0.012844390212574658, "grad_norm": 4.1875, "learning_rate": 2.140077821011673e-05, "loss": 6.964, "step": 550 }, { "epoch": 0.013077924580076016, "grad_norm": 4.1875, "learning_rate": 2.178988326848249e-05, "loss": 6.987, "step": 560 }, { "epoch": 0.013311458947577372, "grad_norm": 3.765625, "learning_rate": 2.217898832684825e-05, "loss": 6.9446, "step": 570 }, { "epoch": 0.01354499331507873, "grad_norm": 4.34375, "learning_rate": 2.256809338521401e-05, "loss": 6.9402, "step": 580 }, { "epoch": 0.013778527682580088, "grad_norm": 3.890625, "learning_rate": 2.2957198443579766e-05, "loss": 6.9287, "step": 590 }, { "epoch": 0.014012062050081445, "grad_norm": 4.5625, "learning_rate": 2.3346303501945526e-05, "loss": 6.9169, "step": 600 }, { "epoch": 0.014245596417582803, "grad_norm": 4.84375, "learning_rate": 2.3735408560311283e-05, "loss": 6.9085, "step": 610 }, { "epoch": 0.01447913078508416, "grad_norm": 4.125, "learning_rate": 2.4124513618677044e-05, "loss": 6.925, "step": 620 }, { "epoch": 0.014712665152585517, "grad_norm": 4.28125, "learning_rate": 2.45136186770428e-05, "loss": 6.9443, "step": 630 }, { "epoch": 0.014946199520086875, "grad_norm": 4.09375, "learning_rate": 2.490272373540856e-05, "loss": 6.9313, "step": 640 }, { "epoch": 0.015179733887588233, "grad_norm": 4.4375, "learning_rate": 2.5291828793774318e-05, "loss": 6.9198, "step": 650 }, { "epoch": 0.01541326825508959, "grad_norm": 3.578125, "learning_rate": 2.5680933852140075e-05, "loss": 6.9543, "step": 660 }, { "epoch": 0.015646802622590945, "grad_norm": 3.84375, "learning_rate": 2.607003891050584e-05, "loss": 6.9409, "step": 670 }, { "epoch": 0.015880336990092305, "grad_norm": 3.4375, "learning_rate": 2.6459143968871596e-05, "loss": 6.9213, "step": 680 }, { "epoch": 0.01611387135759366, "grad_norm": 4.84375, "learning_rate": 2.6848249027237353e-05, "loss": 6.9207, "step": 690 }, { "epoch": 0.016347405725095018, "grad_norm": 4.4375, "learning_rate": 2.7237354085603113e-05, "loss": 6.9082, "step": 700 }, { "epoch": 0.016580940092596377, "grad_norm": 4.21875, "learning_rate": 2.7626459143968874e-05, "loss": 6.9194, "step": 710 }, { "epoch": 0.016814474460097734, "grad_norm": 4.90625, "learning_rate": 2.801556420233463e-05, "loss": 6.9503, "step": 720 }, { "epoch": 0.01704800882759909, "grad_norm": 4.71875, "learning_rate": 2.8404669260700388e-05, "loss": 6.9325, "step": 730 }, { "epoch": 0.01728154319510045, "grad_norm": 4.0625, "learning_rate": 2.8793774319066148e-05, "loss": 6.9109, "step": 740 }, { "epoch": 0.017515077562601806, "grad_norm": 4.03125, "learning_rate": 2.918287937743191e-05, "loss": 6.9134, "step": 750 }, { "epoch": 0.017748611930103162, "grad_norm": 4.28125, "learning_rate": 2.9571984435797666e-05, "loss": 6.9575, "step": 760 }, { "epoch": 0.017982146297604522, "grad_norm": 4.71875, "learning_rate": 2.9961089494163426e-05, "loss": 6.9697, "step": 770 }, { "epoch": 0.018215680665105878, "grad_norm": 4.46875, "learning_rate": 3.0350194552529183e-05, "loss": 6.9045, "step": 780 }, { "epoch": 0.018449215032607234, "grad_norm": 4.1875, "learning_rate": 3.0739299610894944e-05, "loss": 7.0036, "step": 790 }, { "epoch": 0.018682749400108594, "grad_norm": 3.546875, "learning_rate": 3.1128404669260704e-05, "loss": 6.8647, "step": 800 }, { "epoch": 0.01891628376760995, "grad_norm": 4.625, "learning_rate": 3.151750972762646e-05, "loss": 6.8838, "step": 810 }, { "epoch": 0.01914981813511131, "grad_norm": 4.15625, "learning_rate": 3.190661478599222e-05, "loss": 6.9228, "step": 820 }, { "epoch": 0.019383352502612666, "grad_norm": 5.34375, "learning_rate": 3.229571984435798e-05, "loss": 6.9498, "step": 830 }, { "epoch": 0.019616886870114023, "grad_norm": 3.859375, "learning_rate": 3.268482490272374e-05, "loss": 6.9753, "step": 840 }, { "epoch": 0.019850421237615382, "grad_norm": 4.875, "learning_rate": 3.307392996108949e-05, "loss": 6.9631, "step": 850 }, { "epoch": 0.02008395560511674, "grad_norm": 4.21875, "learning_rate": 3.346303501945525e-05, "loss": 6.947, "step": 860 }, { "epoch": 0.020317489972618095, "grad_norm": 5.1875, "learning_rate": 3.385214007782101e-05, "loss": 6.9468, "step": 870 }, { "epoch": 0.020551024340119455, "grad_norm": 4.71875, "learning_rate": 3.4241245136186774e-05, "loss": 6.9413, "step": 880 }, { "epoch": 0.02078455870762081, "grad_norm": 3.6875, "learning_rate": 3.463035019455253e-05, "loss": 6.9587, "step": 890 }, { "epoch": 0.021018093075122167, "grad_norm": 3.671875, "learning_rate": 3.501945525291829e-05, "loss": 6.9773, "step": 900 }, { "epoch": 0.021251627442623527, "grad_norm": 4.5625, "learning_rate": 3.540856031128405e-05, "loss": 6.9367, "step": 910 }, { "epoch": 0.021485161810124883, "grad_norm": 3.796875, "learning_rate": 3.579766536964981e-05, "loss": 6.973, "step": 920 }, { "epoch": 0.02171869617762624, "grad_norm": 4.6875, "learning_rate": 3.618677042801556e-05, "loss": 6.9684, "step": 930 }, { "epoch": 0.0219522305451276, "grad_norm": 4.34375, "learning_rate": 3.657587548638132e-05, "loss": 6.8902, "step": 940 }, { "epoch": 0.022185764912628955, "grad_norm": 4.34375, "learning_rate": 3.696498054474708e-05, "loss": 6.932, "step": 950 }, { "epoch": 0.02241929928013031, "grad_norm": 4.21875, "learning_rate": 3.735408560311284e-05, "loss": 6.9274, "step": 960 }, { "epoch": 0.02265283364763167, "grad_norm": 4.09375, "learning_rate": 3.77431906614786e-05, "loss": 6.9041, "step": 970 }, { "epoch": 0.022886368015133027, "grad_norm": 5.0625, "learning_rate": 3.813229571984436e-05, "loss": 6.9196, "step": 980 }, { "epoch": 0.023119902382634384, "grad_norm": 4.125, "learning_rate": 3.852140077821012e-05, "loss": 6.9706, "step": 990 }, { "epoch": 0.023353436750135743, "grad_norm": 4.46875, "learning_rate": 3.891050583657588e-05, "loss": 6.936, "step": 1000 }, { "epoch": 0.023353436750135743, "eval_loss": 6.978423595428467, "eval_runtime": 78.8865, "eval_samples_per_second": 12.676, "eval_steps_per_second": 12.676, "step": 1000 }, { "epoch": 0.0235869711176371, "grad_norm": 3.953125, "learning_rate": 3.929961089494164e-05, "loss": 6.9135, "step": 1010 }, { "epoch": 0.023820505485138456, "grad_norm": 4.46875, "learning_rate": 3.968871595330739e-05, "loss": 6.9226, "step": 1020 }, { "epoch": 0.024054039852639816, "grad_norm": 3.96875, "learning_rate": 4.007782101167315e-05, "loss": 6.9098, "step": 1030 }, { "epoch": 0.024287574220141172, "grad_norm": 4.75, "learning_rate": 4.046692607003891e-05, "loss": 6.8875, "step": 1040 }, { "epoch": 0.024521108587642528, "grad_norm": 5.21875, "learning_rate": 4.0856031128404673e-05, "loss": 6.9923, "step": 1050 }, { "epoch": 0.024754642955143888, "grad_norm": 3.828125, "learning_rate": 4.124513618677043e-05, "loss": 6.9697, "step": 1060 }, { "epoch": 0.024988177322645244, "grad_norm": 4.625, "learning_rate": 4.163424124513619e-05, "loss": 6.9451, "step": 1070 }, { "epoch": 0.0252217116901466, "grad_norm": 4.71875, "learning_rate": 4.202334630350195e-05, "loss": 6.9351, "step": 1080 }, { "epoch": 0.02545524605764796, "grad_norm": 4.28125, "learning_rate": 4.241245136186771e-05, "loss": 6.9391, "step": 1090 }, { "epoch": 0.025688780425149316, "grad_norm": 4.40625, "learning_rate": 4.280155642023346e-05, "loss": 6.9308, "step": 1100 }, { "epoch": 0.025922314792650673, "grad_norm": 4.25, "learning_rate": 4.319066147859923e-05, "loss": 6.9454, "step": 1110 }, { "epoch": 0.026155849160152032, "grad_norm": 4.25, "learning_rate": 4.357976653696498e-05, "loss": 6.9398, "step": 1120 }, { "epoch": 0.02638938352765339, "grad_norm": 4.1875, "learning_rate": 4.396887159533074e-05, "loss": 6.9513, "step": 1130 }, { "epoch": 0.026622917895154745, "grad_norm": 3.65625, "learning_rate": 4.43579766536965e-05, "loss": 6.9684, "step": 1140 }, { "epoch": 0.026856452262656105, "grad_norm": 4.28125, "learning_rate": 4.4747081712062264e-05, "loss": 6.9495, "step": 1150 }, { "epoch": 0.02708998663015746, "grad_norm": 4.71875, "learning_rate": 4.513618677042802e-05, "loss": 6.972, "step": 1160 }, { "epoch": 0.027323520997658817, "grad_norm": 4.53125, "learning_rate": 4.552529182879378e-05, "loss": 6.9265, "step": 1170 }, { "epoch": 0.027557055365160177, "grad_norm": 5.28125, "learning_rate": 4.591439688715953e-05, "loss": 6.9761, "step": 1180 }, { "epoch": 0.027790589732661533, "grad_norm": 4.1875, "learning_rate": 4.63035019455253e-05, "loss": 7.0168, "step": 1190 }, { "epoch": 0.02802412410016289, "grad_norm": 4.375, "learning_rate": 4.669260700389105e-05, "loss": 6.9891, "step": 1200 }, { "epoch": 0.02825765846766425, "grad_norm": 3.578125, "learning_rate": 4.708171206225681e-05, "loss": 6.9463, "step": 1210 }, { "epoch": 0.028491192835165605, "grad_norm": 4.1875, "learning_rate": 4.7470817120622567e-05, "loss": 6.9701, "step": 1220 }, { "epoch": 0.02872472720266696, "grad_norm": 3.9375, "learning_rate": 4.7859922178988334e-05, "loss": 7.0322, "step": 1230 }, { "epoch": 0.02895826157016832, "grad_norm": 4.5, "learning_rate": 4.824902723735409e-05, "loss": 6.9378, "step": 1240 }, { "epoch": 0.029191795937669678, "grad_norm": 4.1875, "learning_rate": 4.863813229571985e-05, "loss": 6.9701, "step": 1250 }, { "epoch": 0.029425330305171034, "grad_norm": 4.46875, "learning_rate": 4.90272373540856e-05, "loss": 6.9927, "step": 1260 }, { "epoch": 0.029658864672672394, "grad_norm": 4.34375, "learning_rate": 4.941634241245137e-05, "loss": 6.9273, "step": 1270 }, { "epoch": 0.02989239904017375, "grad_norm": 4.0, "learning_rate": 4.980544747081712e-05, "loss": 7.028, "step": 1280 }, { "epoch": 0.030125933407675106, "grad_norm": 4.1875, "learning_rate": 4.9999998212190126e-05, "loss": 6.9852, "step": 1290 }, { "epoch": 0.030359467775176466, "grad_norm": 4.46875, "learning_rate": 4.999998390971264e-05, "loss": 7.0149, "step": 1300 }, { "epoch": 0.030593002142677822, "grad_norm": 5.8125, "learning_rate": 4.999995530476586e-05, "loss": 6.9807, "step": 1310 }, { "epoch": 0.03082653651017918, "grad_norm": 4.28125, "learning_rate": 4.999991239736614e-05, "loss": 6.9642, "step": 1320 }, { "epoch": 0.031060070877680538, "grad_norm": 3.9375, "learning_rate": 4.9999855187538033e-05, "loss": 6.982, "step": 1330 }, { "epoch": 0.03129360524518189, "grad_norm": 3.8125, "learning_rate": 4.999978367531427e-05, "loss": 6.9386, "step": 1340 }, { "epoch": 0.031527139612683254, "grad_norm": 4.28125, "learning_rate": 4.999969786073576e-05, "loss": 6.9928, "step": 1350 }, { "epoch": 0.03176067398018461, "grad_norm": 4.09375, "learning_rate": 4.99995977438516e-05, "loss": 7.0252, "step": 1360 }, { "epoch": 0.031994208347685966, "grad_norm": 3.8125, "learning_rate": 4.999948332471907e-05, "loss": 7.003, "step": 1370 }, { "epoch": 0.03222774271518732, "grad_norm": 4.6875, "learning_rate": 4.9999354603403614e-05, "loss": 6.9878, "step": 1380 }, { "epoch": 0.03246127708268868, "grad_norm": 4.34375, "learning_rate": 4.9999211579978886e-05, "loss": 7.0122, "step": 1390 }, { "epoch": 0.032694811450190035, "grad_norm": 4.46875, "learning_rate": 4.999905425452671e-05, "loss": 7.0136, "step": 1400 }, { "epoch": 0.0329283458176914, "grad_norm": 4.6875, "learning_rate": 4.9998882627137086e-05, "loss": 6.9745, "step": 1410 }, { "epoch": 0.033161880185192755, "grad_norm": 3.734375, "learning_rate": 4.99986966979082e-05, "loss": 6.9978, "step": 1420 }, { "epoch": 0.03339541455269411, "grad_norm": 4.59375, "learning_rate": 4.999849646694643e-05, "loss": 6.999, "step": 1430 }, { "epoch": 0.03362894892019547, "grad_norm": 4.375, "learning_rate": 4.9998281934366334e-05, "loss": 6.9924, "step": 1440 }, { "epoch": 0.033862483287696823, "grad_norm": 4.46875, "learning_rate": 4.999805310029063e-05, "loss": 6.9906, "step": 1450 }, { "epoch": 0.03409601765519818, "grad_norm": 3.75, "learning_rate": 4.999780996485024e-05, "loss": 7.0348, "step": 1460 }, { "epoch": 0.03432955202269954, "grad_norm": 4.625, "learning_rate": 4.999755252818425e-05, "loss": 6.9828, "step": 1470 }, { "epoch": 0.0345630863902009, "grad_norm": 4.65625, "learning_rate": 4.9997280790439974e-05, "loss": 7.0094, "step": 1480 }, { "epoch": 0.034796620757702255, "grad_norm": 4.375, "learning_rate": 4.9996994751772836e-05, "loss": 6.9882, "step": 1490 }, { "epoch": 0.03503015512520361, "grad_norm": 3.46875, "learning_rate": 4.999669441234649e-05, "loss": 6.9596, "step": 1500 }, { "epoch": 0.03503015512520361, "eval_loss": 7.0334248542785645, "eval_runtime": 79.3057, "eval_samples_per_second": 12.609, "eval_steps_per_second": 12.609, "step": 1500 }, { "epoch": 0.03526368949270497, "grad_norm": 3.8125, "learning_rate": 4.999637977233278e-05, "loss": 6.9792, "step": 1510 }, { "epoch": 0.035497223860206324, "grad_norm": 3.90625, "learning_rate": 4.999605083191168e-05, "loss": 7.0335, "step": 1520 }, { "epoch": 0.03573075822770769, "grad_norm": 4.34375, "learning_rate": 4.999570759127139e-05, "loss": 7.0191, "step": 1530 }, { "epoch": 0.035964292595209044, "grad_norm": 4.34375, "learning_rate": 4.999535005060828e-05, "loss": 7.0541, "step": 1540 }, { "epoch": 0.0361978269627104, "grad_norm": 4.3125, "learning_rate": 4.99949782101269e-05, "loss": 6.9913, "step": 1550 }, { "epoch": 0.036431361330211756, "grad_norm": 3.8125, "learning_rate": 4.999459207003997e-05, "loss": 7.0031, "step": 1560 }, { "epoch": 0.03666489569771311, "grad_norm": 3.53125, "learning_rate": 4.999419163056841e-05, "loss": 6.9404, "step": 1570 }, { "epoch": 0.03689843006521447, "grad_norm": 3.96875, "learning_rate": 4.99937768919413e-05, "loss": 7.0174, "step": 1580 }, { "epoch": 0.03713196443271583, "grad_norm": 3.609375, "learning_rate": 4.999334785439593e-05, "loss": 6.9988, "step": 1590 }, { "epoch": 0.03736549880021719, "grad_norm": 4.03125, "learning_rate": 4.999290451817774e-05, "loss": 6.9711, "step": 1600 }, { "epoch": 0.037599033167718544, "grad_norm": 4.90625, "learning_rate": 4.9992446883540364e-05, "loss": 7.0158, "step": 1610 }, { "epoch": 0.0378325675352199, "grad_norm": 4.0625, "learning_rate": 4.9991974950745614e-05, "loss": 7.0548, "step": 1620 }, { "epoch": 0.03806610190272126, "grad_norm": 4.8125, "learning_rate": 4.9991488720063486e-05, "loss": 6.9584, "step": 1630 }, { "epoch": 0.03829963627022262, "grad_norm": 4.28125, "learning_rate": 4.999098819177214e-05, "loss": 7.003, "step": 1640 }, { "epoch": 0.038533170637723976, "grad_norm": 4.6875, "learning_rate": 4.9990473366157945e-05, "loss": 7.0175, "step": 1650 }, { "epoch": 0.03876670500522533, "grad_norm": 4.125, "learning_rate": 4.9989944243515414e-05, "loss": 7.0214, "step": 1660 }, { "epoch": 0.03900023937272669, "grad_norm": 4.59375, "learning_rate": 4.998940082414727e-05, "loss": 7.0185, "step": 1670 }, { "epoch": 0.039233773740228045, "grad_norm": 4.15625, "learning_rate": 4.9988843108364404e-05, "loss": 7.038, "step": 1680 }, { "epoch": 0.0394673081077294, "grad_norm": 3.90625, "learning_rate": 4.998827109648588e-05, "loss": 7.0504, "step": 1690 }, { "epoch": 0.039700842475230765, "grad_norm": 4.21875, "learning_rate": 4.998768478883894e-05, "loss": 6.9616, "step": 1700 }, { "epoch": 0.03993437684273212, "grad_norm": 5.875, "learning_rate": 4.9987084185759025e-05, "loss": 7.008, "step": 1710 }, { "epoch": 0.04016791121023348, "grad_norm": 4.65625, "learning_rate": 4.9986469287589724e-05, "loss": 7.0038, "step": 1720 }, { "epoch": 0.04040144557773483, "grad_norm": 4.40625, "learning_rate": 4.9985840094682835e-05, "loss": 6.9903, "step": 1730 }, { "epoch": 0.04063497994523619, "grad_norm": 4.96875, "learning_rate": 4.99851966073983e-05, "loss": 7.0673, "step": 1740 }, { "epoch": 0.040868514312737546, "grad_norm": 4.46875, "learning_rate": 4.9984538826104274e-05, "loss": 6.9663, "step": 1750 }, { "epoch": 0.04110204868023891, "grad_norm": 3.609375, "learning_rate": 4.998386675117707e-05, "loss": 7.02, "step": 1760 }, { "epoch": 0.041335583047740265, "grad_norm": 3.90625, "learning_rate": 4.998318038300117e-05, "loss": 7.0489, "step": 1770 }, { "epoch": 0.04156911741524162, "grad_norm": 4.46875, "learning_rate": 4.998247972196926e-05, "loss": 7.0968, "step": 1780 }, { "epoch": 0.04180265178274298, "grad_norm": 4.0, "learning_rate": 4.998176476848218e-05, "loss": 6.9276, "step": 1790 }, { "epoch": 0.042036186150244334, "grad_norm": 4.59375, "learning_rate": 4.998103552294896e-05, "loss": 7.0356, "step": 1800 }, { "epoch": 0.04226972051774569, "grad_norm": 4.34375, "learning_rate": 4.9980291985786785e-05, "loss": 7.0365, "step": 1810 }, { "epoch": 0.042503254885247053, "grad_norm": 4.0, "learning_rate": 4.997953415742105e-05, "loss": 7.0399, "step": 1820 }, { "epoch": 0.04273678925274841, "grad_norm": 4.21875, "learning_rate": 4.997876203828529e-05, "loss": 7.0193, "step": 1830 }, { "epoch": 0.042970323620249766, "grad_norm": 4.0625, "learning_rate": 4.997797562882125e-05, "loss": 7.0644, "step": 1840 }, { "epoch": 0.04320385798775112, "grad_norm": 4.03125, "learning_rate": 4.9977174929478833e-05, "loss": 6.981, "step": 1850 }, { "epoch": 0.04343739235525248, "grad_norm": 3.90625, "learning_rate": 4.997635994071611e-05, "loss": 7.0201, "step": 1860 }, { "epoch": 0.043670926722753835, "grad_norm": 4.28125, "learning_rate": 4.9975530662999344e-05, "loss": 7.037, "step": 1870 }, { "epoch": 0.0439044610902552, "grad_norm": 3.6875, "learning_rate": 4.997468709680295e-05, "loss": 6.9987, "step": 1880 }, { "epoch": 0.044137995457756554, "grad_norm": 4.09375, "learning_rate": 4.997382924260955e-05, "loss": 7.0213, "step": 1890 }, { "epoch": 0.04437152982525791, "grad_norm": 4.5625, "learning_rate": 4.9972957100909915e-05, "loss": 7.0184, "step": 1900 }, { "epoch": 0.04460506419275927, "grad_norm": 4.40625, "learning_rate": 4.997207067220299e-05, "loss": 6.9747, "step": 1910 }, { "epoch": 0.04483859856026062, "grad_norm": 3.671875, "learning_rate": 4.997116995699589e-05, "loss": 6.9875, "step": 1920 }, { "epoch": 0.04507213292776198, "grad_norm": 4.0625, "learning_rate": 4.9970254955803944e-05, "loss": 7.0311, "step": 1930 }, { "epoch": 0.04530566729526334, "grad_norm": 4.375, "learning_rate": 4.9969325669150605e-05, "loss": 7.0345, "step": 1940 }, { "epoch": 0.0455392016627647, "grad_norm": 3.96875, "learning_rate": 4.9968382097567514e-05, "loss": 6.9868, "step": 1950 }, { "epoch": 0.045772736030266055, "grad_norm": 4.125, "learning_rate": 4.9967424241594496e-05, "loss": 7.0247, "step": 1960 }, { "epoch": 0.04600627039776741, "grad_norm": 4.28125, "learning_rate": 4.996645210177954e-05, "loss": 6.9874, "step": 1970 }, { "epoch": 0.04623980476526877, "grad_norm": 4.28125, "learning_rate": 4.996546567867879e-05, "loss": 7.058, "step": 1980 }, { "epoch": 0.046473339132770124, "grad_norm": 3.6875, "learning_rate": 4.99644649728566e-05, "loss": 7.0143, "step": 1990 }, { "epoch": 0.04670687350027149, "grad_norm": 3.9375, "learning_rate": 4.9963449984885456e-05, "loss": 7.0278, "step": 2000 }, { "epoch": 0.04670687350027149, "eval_loss": 7.0420684814453125, "eval_runtime": 79.0165, "eval_samples_per_second": 12.656, "eval_steps_per_second": 12.656, "step": 2000 }, { "epoch": 0.04694040786777284, "grad_norm": 3.84375, "learning_rate": 4.9962420715346045e-05, "loss": 7.0198, "step": 2010 }, { "epoch": 0.0471739422352742, "grad_norm": 4.21875, "learning_rate": 4.996137716482721e-05, "loss": 7.0392, "step": 2020 }, { "epoch": 0.047407476602775556, "grad_norm": 4.40625, "learning_rate": 4.996031933392596e-05, "loss": 7.047, "step": 2030 }, { "epoch": 0.04764101097027691, "grad_norm": 4.71875, "learning_rate": 4.995924722324746e-05, "loss": 6.9771, "step": 2040 }, { "epoch": 0.04787454533777827, "grad_norm": 5.1875, "learning_rate": 4.99581608334051e-05, "loss": 6.9976, "step": 2050 }, { "epoch": 0.04810807970527963, "grad_norm": 4.09375, "learning_rate": 4.995706016502039e-05, "loss": 7.0315, "step": 2060 }, { "epoch": 0.04834161407278099, "grad_norm": 3.984375, "learning_rate": 4.9955945218723006e-05, "loss": 7.0325, "step": 2070 }, { "epoch": 0.048575148440282344, "grad_norm": 4.0625, "learning_rate": 4.995481599515082e-05, "loss": 6.9876, "step": 2080 }, { "epoch": 0.0488086828077837, "grad_norm": 3.6875, "learning_rate": 4.9953672494949865e-05, "loss": 7.0594, "step": 2090 }, { "epoch": 0.049042217175285056, "grad_norm": 4.53125, "learning_rate": 4.995251471877433e-05, "loss": 7.0131, "step": 2100 }, { "epoch": 0.04927575154278641, "grad_norm": 4.375, "learning_rate": 4.995134266728657e-05, "loss": 7.0104, "step": 2110 }, { "epoch": 0.049509285910287776, "grad_norm": 4.1875, "learning_rate": 4.995015634115714e-05, "loss": 7.0046, "step": 2120 }, { "epoch": 0.04974282027778913, "grad_norm": 4.5, "learning_rate": 4.99489557410647e-05, "loss": 7.0044, "step": 2130 }, { "epoch": 0.04997635464529049, "grad_norm": 4.28125, "learning_rate": 4.994774086769614e-05, "loss": 7.1132, "step": 2140 }, { "epoch": 0.050209889012791845, "grad_norm": 4.0, "learning_rate": 4.994651172174648e-05, "loss": 6.9692, "step": 2150 }, { "epoch": 0.0504434233802932, "grad_norm": 4.6875, "learning_rate": 4.9945268303918915e-05, "loss": 7.0545, "step": 2160 }, { "epoch": 0.05067695774779456, "grad_norm": 3.828125, "learning_rate": 4.9944010614924794e-05, "loss": 7.0084, "step": 2170 }, { "epoch": 0.05091049211529592, "grad_norm": 4.34375, "learning_rate": 4.994273865548365e-05, "loss": 6.9846, "step": 2180 }, { "epoch": 0.05114402648279728, "grad_norm": 4.125, "learning_rate": 4.9941452426323166e-05, "loss": 7.0423, "step": 2190 }, { "epoch": 0.05137756085029863, "grad_norm": 3.890625, "learning_rate": 4.99401519281792e-05, "loss": 6.9695, "step": 2200 }, { "epoch": 0.05161109521779999, "grad_norm": 4.0, "learning_rate": 4.9938837161795746e-05, "loss": 6.9975, "step": 2210 }, { "epoch": 0.051844629585301345, "grad_norm": 4.4375, "learning_rate": 4.9937508127925004e-05, "loss": 7.0201, "step": 2220 }, { "epoch": 0.0520781639528027, "grad_norm": 3.84375, "learning_rate": 4.99361648273273e-05, "loss": 7.0364, "step": 2230 }, { "epoch": 0.052311698320304065, "grad_norm": 4.0, "learning_rate": 4.9934807260771135e-05, "loss": 7.0096, "step": 2240 }, { "epoch": 0.05254523268780542, "grad_norm": 3.859375, "learning_rate": 4.993343542903318e-05, "loss": 7.0564, "step": 2250 }, { "epoch": 0.05277876705530678, "grad_norm": 4.40625, "learning_rate": 4.993204933289825e-05, "loss": 7.0614, "step": 2260 }, { "epoch": 0.053012301422808134, "grad_norm": 4.90625, "learning_rate": 4.9930648973159335e-05, "loss": 6.957, "step": 2270 }, { "epoch": 0.05324583579030949, "grad_norm": 3.796875, "learning_rate": 4.9929234350617575e-05, "loss": 7.0216, "step": 2280 }, { "epoch": 0.053479370157810846, "grad_norm": 3.765625, "learning_rate": 4.992780546608228e-05, "loss": 7.0569, "step": 2290 }, { "epoch": 0.05371290452531221, "grad_norm": 4.03125, "learning_rate": 4.992636232037091e-05, "loss": 7.0066, "step": 2300 }, { "epoch": 0.053946438892813565, "grad_norm": 4.59375, "learning_rate": 4.992490491430909e-05, "loss": 7.0511, "step": 2310 }, { "epoch": 0.05417997326031492, "grad_norm": 3.9375, "learning_rate": 4.992343324873059e-05, "loss": 7.022, "step": 2320 }, { "epoch": 0.05441350762781628, "grad_norm": 4.53125, "learning_rate": 4.9921947324477366e-05, "loss": 6.9907, "step": 2330 }, { "epoch": 0.054647041995317634, "grad_norm": 4.40625, "learning_rate": 4.99204471423995e-05, "loss": 7.0108, "step": 2340 }, { "epoch": 0.05488057636281899, "grad_norm": 4.0, "learning_rate": 4.9918932703355256e-05, "loss": 7.05, "step": 2350 }, { "epoch": 0.055114110730320354, "grad_norm": 3.75, "learning_rate": 4.991740400821103e-05, "loss": 6.9698, "step": 2360 }, { "epoch": 0.05534764509782171, "grad_norm": 4.25, "learning_rate": 4.9915861057841394e-05, "loss": 7.0125, "step": 2370 }, { "epoch": 0.055581179465323066, "grad_norm": 4.3125, "learning_rate": 4.9914303853129076e-05, "loss": 7.0049, "step": 2380 }, { "epoch": 0.05581471383282442, "grad_norm": 3.8125, "learning_rate": 4.9912732394964946e-05, "loss": 7.0314, "step": 2390 }, { "epoch": 0.05604824820032578, "grad_norm": 4.15625, "learning_rate": 4.9911146684248024e-05, "loss": 7.0557, "step": 2400 }, { "epoch": 0.056281782567827135, "grad_norm": 4.375, "learning_rate": 4.99095467218855e-05, "loss": 7.0319, "step": 2410 }, { "epoch": 0.0565153169353285, "grad_norm": 4.0, "learning_rate": 4.9907932508792726e-05, "loss": 7.0092, "step": 2420 }, { "epoch": 0.056748851302829854, "grad_norm": 3.734375, "learning_rate": 4.990630404589317e-05, "loss": 7.0457, "step": 2430 }, { "epoch": 0.05698238567033121, "grad_norm": 4.65625, "learning_rate": 4.990466133411849e-05, "loss": 7.1198, "step": 2440 }, { "epoch": 0.05721592003783257, "grad_norm": 4.78125, "learning_rate": 4.990300437440847e-05, "loss": 7.0397, "step": 2450 }, { "epoch": 0.05744945440533392, "grad_norm": 3.34375, "learning_rate": 4.990133316771106e-05, "loss": 7.0217, "step": 2460 }, { "epoch": 0.05768298877283528, "grad_norm": 3.921875, "learning_rate": 4.989964771498235e-05, "loss": 7.0213, "step": 2470 }, { "epoch": 0.05791652314033664, "grad_norm": 4.65625, "learning_rate": 4.98979480171866e-05, "loss": 7.0585, "step": 2480 }, { "epoch": 0.058150057507838, "grad_norm": 4.25, "learning_rate": 4.9896234075296186e-05, "loss": 7.0777, "step": 2490 }, { "epoch": 0.058383591875339355, "grad_norm": 3.4375, "learning_rate": 4.989450589029167e-05, "loss": 7.0801, "step": 2500 }, { "epoch": 0.058383591875339355, "eval_loss": 7.051682949066162, "eval_runtime": 79.2856, "eval_samples_per_second": 12.613, "eval_steps_per_second": 12.613, "step": 2500 }, { "epoch": 0.05861712624284071, "grad_norm": 4.40625, "learning_rate": 4.989276346316173e-05, "loss": 7.0522, "step": 2510 }, { "epoch": 0.05885066061034207, "grad_norm": 4.3125, "learning_rate": 4.989100679490322e-05, "loss": 7.0271, "step": 2520 }, { "epoch": 0.059084194977843424, "grad_norm": 3.796875, "learning_rate": 4.988923588652112e-05, "loss": 7.0517, "step": 2530 }, { "epoch": 0.05931772934534479, "grad_norm": 3.8125, "learning_rate": 4.988745073902858e-05, "loss": 6.9586, "step": 2540 }, { "epoch": 0.05955126371284614, "grad_norm": 4.40625, "learning_rate": 4.988565135344686e-05, "loss": 6.9786, "step": 2550 }, { "epoch": 0.0597847980803475, "grad_norm": 4.28125, "learning_rate": 4.98838377308054e-05, "loss": 7.0774, "step": 2560 }, { "epoch": 0.060018332447848856, "grad_norm": 4.125, "learning_rate": 4.9882009872141774e-05, "loss": 7.0683, "step": 2570 }, { "epoch": 0.06025186681535021, "grad_norm": 3.703125, "learning_rate": 4.988016777850169e-05, "loss": 7.0419, "step": 2580 }, { "epoch": 0.06048540118285157, "grad_norm": 4.21875, "learning_rate": 4.9878311450939006e-05, "loss": 7.0097, "step": 2590 }, { "epoch": 0.06071893555035293, "grad_norm": 3.71875, "learning_rate": 4.9876440890515734e-05, "loss": 6.9938, "step": 2600 }, { "epoch": 0.06095246991785429, "grad_norm": 3.859375, "learning_rate": 4.9874556098302014e-05, "loss": 7.0289, "step": 2610 }, { "epoch": 0.061186004285355644, "grad_norm": 3.84375, "learning_rate": 4.9872657075376144e-05, "loss": 7.0244, "step": 2620 }, { "epoch": 0.061419538652857, "grad_norm": 5.0, "learning_rate": 4.9870743822824543e-05, "loss": 7.0124, "step": 2630 }, { "epoch": 0.06165307302035836, "grad_norm": 3.921875, "learning_rate": 4.986881634174179e-05, "loss": 7.0099, "step": 2640 }, { "epoch": 0.06188660738785971, "grad_norm": 3.53125, "learning_rate": 4.986687463323058e-05, "loss": 7.0479, "step": 2650 }, { "epoch": 0.062120141755361076, "grad_norm": 4.40625, "learning_rate": 4.986491869840177e-05, "loss": 7.0349, "step": 2660 }, { "epoch": 0.06235367612286243, "grad_norm": 4.125, "learning_rate": 4.986294853837436e-05, "loss": 7.0174, "step": 2670 }, { "epoch": 0.06258721049036378, "grad_norm": 4.65625, "learning_rate": 4.986096415427547e-05, "loss": 7.0466, "step": 2680 }, { "epoch": 0.06282074485786515, "grad_norm": 4.6875, "learning_rate": 4.985896554724036e-05, "loss": 7.0003, "step": 2690 }, { "epoch": 0.06305427922536651, "grad_norm": 4.15625, "learning_rate": 4.9856952718412433e-05, "loss": 7.0433, "step": 2700 }, { "epoch": 0.06328781359286786, "grad_norm": 4.1875, "learning_rate": 4.985492566894323e-05, "loss": 7.0354, "step": 2710 }, { "epoch": 0.06352134796036922, "grad_norm": 4.75, "learning_rate": 4.985288439999242e-05, "loss": 7.0, "step": 2720 }, { "epoch": 0.06375488232787058, "grad_norm": 4.8125, "learning_rate": 4.985082891272781e-05, "loss": 7.068, "step": 2730 }, { "epoch": 0.06398841669537193, "grad_norm": 4.0625, "learning_rate": 4.9848759208325345e-05, "loss": 6.9777, "step": 2740 }, { "epoch": 0.06422195106287329, "grad_norm": 4.28125, "learning_rate": 4.98466752879691e-05, "loss": 6.998, "step": 2750 }, { "epoch": 0.06445548543037465, "grad_norm": 4.1875, "learning_rate": 4.984457715285129e-05, "loss": 6.9829, "step": 2760 }, { "epoch": 0.064689019797876, "grad_norm": 4.46875, "learning_rate": 4.984246480417225e-05, "loss": 7.0212, "step": 2770 }, { "epoch": 0.06492255416537736, "grad_norm": 4.15625, "learning_rate": 4.9840338243140464e-05, "loss": 7.098, "step": 2780 }, { "epoch": 0.06515608853287871, "grad_norm": 3.53125, "learning_rate": 4.9838197470972516e-05, "loss": 7.0733, "step": 2790 }, { "epoch": 0.06538962290038007, "grad_norm": 4.21875, "learning_rate": 4.983604248889315e-05, "loss": 7.0289, "step": 2800 }, { "epoch": 0.06562315726788144, "grad_norm": 4.15625, "learning_rate": 4.983387329813524e-05, "loss": 7.0881, "step": 2810 }, { "epoch": 0.0658566916353828, "grad_norm": 4.0625, "learning_rate": 4.983168989993975e-05, "loss": 7.0012, "step": 2820 }, { "epoch": 0.06609022600288415, "grad_norm": 4.375, "learning_rate": 4.9829492295555836e-05, "loss": 7.0399, "step": 2830 }, { "epoch": 0.06632376037038551, "grad_norm": 4.3125, "learning_rate": 4.9827280486240715e-05, "loss": 7.0905, "step": 2840 }, { "epoch": 0.06655729473788687, "grad_norm": 4.40625, "learning_rate": 4.982505447325978e-05, "loss": 7.0767, "step": 2850 }, { "epoch": 0.06679082910538822, "grad_norm": 4.5, "learning_rate": 4.982281425788653e-05, "loss": 7.0378, "step": 2860 }, { "epoch": 0.06702436347288958, "grad_norm": 3.71875, "learning_rate": 4.982055984140258e-05, "loss": 7.024, "step": 2870 }, { "epoch": 0.06725789784039093, "grad_norm": 4.40625, "learning_rate": 4.981829122509768e-05, "loss": 7.0179, "step": 2880 }, { "epoch": 0.06749143220789229, "grad_norm": 3.359375, "learning_rate": 4.981600841026971e-05, "loss": 7.0584, "step": 2890 }, { "epoch": 0.06772496657539365, "grad_norm": 4.1875, "learning_rate": 4.981371139822467e-05, "loss": 7.0616, "step": 2900 }, { "epoch": 0.067958500942895, "grad_norm": 4.0625, "learning_rate": 4.981140019027668e-05, "loss": 7.0386, "step": 2910 }, { "epoch": 0.06819203531039636, "grad_norm": 3.984375, "learning_rate": 4.9809074787747965e-05, "loss": 7.0602, "step": 2920 }, { "epoch": 0.06842556967789773, "grad_norm": 4.21875, "learning_rate": 4.980673519196889e-05, "loss": 7.0342, "step": 2930 }, { "epoch": 0.06865910404539909, "grad_norm": 3.75, "learning_rate": 4.980438140427795e-05, "loss": 7.0106, "step": 2940 }, { "epoch": 0.06889263841290044, "grad_norm": 3.921875, "learning_rate": 4.980201342602173e-05, "loss": 7.0852, "step": 2950 }, { "epoch": 0.0691261727804018, "grad_norm": 4.40625, "learning_rate": 4.979963125855496e-05, "loss": 7.0312, "step": 2960 }, { "epoch": 0.06935970714790315, "grad_norm": 3.8125, "learning_rate": 4.979723490324046e-05, "loss": 7.0526, "step": 2970 }, { "epoch": 0.06959324151540451, "grad_norm": 4.25, "learning_rate": 4.979482436144919e-05, "loss": 7.0324, "step": 2980 }, { "epoch": 0.06982677588290587, "grad_norm": 4.65625, "learning_rate": 4.9792399634560226e-05, "loss": 7.1233, "step": 2990 }, { "epoch": 0.07006031025040722, "grad_norm": 4.09375, "learning_rate": 4.978996072396075e-05, "loss": 7.0493, "step": 3000 }, { "epoch": 0.07006031025040722, "eval_loss": 7.064417362213135, "eval_runtime": 78.9295, "eval_samples_per_second": 12.67, "eval_steps_per_second": 12.67, "step": 3000 }, { "epoch": 0.07029384461790858, "grad_norm": 3.84375, "learning_rate": 4.978750763104606e-05, "loss": 7.0929, "step": 3010 }, { "epoch": 0.07052737898540994, "grad_norm": 4.09375, "learning_rate": 4.978504035721956e-05, "loss": 7.0392, "step": 3020 }, { "epoch": 0.07076091335291129, "grad_norm": 3.890625, "learning_rate": 4.9782558903892783e-05, "loss": 7.0657, "step": 3030 }, { "epoch": 0.07099444772041265, "grad_norm": 3.6875, "learning_rate": 4.978006327248537e-05, "loss": 7.0599, "step": 3040 }, { "epoch": 0.07122798208791402, "grad_norm": 4.3125, "learning_rate": 4.9777553464425055e-05, "loss": 7.1517, "step": 3050 }, { "epoch": 0.07146151645541537, "grad_norm": 4.71875, "learning_rate": 4.977502948114772e-05, "loss": 7.0279, "step": 3060 }, { "epoch": 0.07169505082291673, "grad_norm": 4.3125, "learning_rate": 4.97724913240973e-05, "loss": 7.039, "step": 3070 }, { "epoch": 0.07192858519041809, "grad_norm": 3.640625, "learning_rate": 4.9769938994725905e-05, "loss": 7.0327, "step": 3080 }, { "epoch": 0.07216211955791944, "grad_norm": 3.671875, "learning_rate": 4.976737249449371e-05, "loss": 7.0898, "step": 3090 }, { "epoch": 0.0723956539254208, "grad_norm": 3.65625, "learning_rate": 4.9764791824869006e-05, "loss": 7.0576, "step": 3100 }, { "epoch": 0.07262918829292216, "grad_norm": 4.53125, "learning_rate": 4.9762196987328185e-05, "loss": 7.0528, "step": 3110 }, { "epoch": 0.07286272266042351, "grad_norm": 4.5625, "learning_rate": 4.975958798335576e-05, "loss": 7.0664, "step": 3120 }, { "epoch": 0.07309625702792487, "grad_norm": 3.734375, "learning_rate": 4.975696481444434e-05, "loss": 7.0753, "step": 3130 }, { "epoch": 0.07332979139542622, "grad_norm": 4.125, "learning_rate": 4.975432748209463e-05, "loss": 7.0682, "step": 3140 }, { "epoch": 0.07356332576292758, "grad_norm": 4.65625, "learning_rate": 4.975167598781546e-05, "loss": 7.0777, "step": 3150 }, { "epoch": 0.07379686013042894, "grad_norm": 3.703125, "learning_rate": 4.9749010333123735e-05, "loss": 7.0716, "step": 3160 }, { "epoch": 0.07403039449793031, "grad_norm": 3.71875, "learning_rate": 4.9746330519544474e-05, "loss": 7.0513, "step": 3170 }, { "epoch": 0.07426392886543166, "grad_norm": 4.03125, "learning_rate": 4.97436365486108e-05, "loss": 7.0777, "step": 3180 }, { "epoch": 0.07449746323293302, "grad_norm": 4.1875, "learning_rate": 4.974092842186393e-05, "loss": 7.056, "step": 3190 }, { "epoch": 0.07473099760043438, "grad_norm": 4.46875, "learning_rate": 4.9738206140853184e-05, "loss": 7.0839, "step": 3200 }, { "epoch": 0.07496453196793573, "grad_norm": 4.8125, "learning_rate": 4.9735469707135976e-05, "loss": 7.0326, "step": 3210 }, { "epoch": 0.07519806633543709, "grad_norm": 5.28125, "learning_rate": 4.9732719122277816e-05, "loss": 6.9695, "step": 3220 }, { "epoch": 0.07543160070293844, "grad_norm": 4.5625, "learning_rate": 4.972995438785231e-05, "loss": 7.0442, "step": 3230 }, { "epoch": 0.0756651350704398, "grad_norm": 4.09375, "learning_rate": 4.9727175505441153e-05, "loss": 7.0369, "step": 3240 }, { "epoch": 0.07589866943794116, "grad_norm": 4.0, "learning_rate": 4.972438247663416e-05, "loss": 7.0933, "step": 3250 }, { "epoch": 0.07613220380544251, "grad_norm": 4.78125, "learning_rate": 4.972157530302921e-05, "loss": 7.1505, "step": 3260 }, { "epoch": 0.07636573817294387, "grad_norm": 4.5625, "learning_rate": 4.9718753986232276e-05, "loss": 7.0769, "step": 3270 }, { "epoch": 0.07659927254044524, "grad_norm": 4.5, "learning_rate": 4.971591852785744e-05, "loss": 6.9359, "step": 3280 }, { "epoch": 0.0768328069079466, "grad_norm": 3.84375, "learning_rate": 4.971306892952687e-05, "loss": 7.04, "step": 3290 }, { "epoch": 0.07706634127544795, "grad_norm": 3.6875, "learning_rate": 4.971020519287081e-05, "loss": 7.0642, "step": 3300 }, { "epoch": 0.07729987564294931, "grad_norm": 4.4375, "learning_rate": 4.9707327319527606e-05, "loss": 7.0394, "step": 3310 }, { "epoch": 0.07753341001045067, "grad_norm": 4.3125, "learning_rate": 4.9704435311143696e-05, "loss": 7.078, "step": 3320 }, { "epoch": 0.07776694437795202, "grad_norm": 3.796875, "learning_rate": 4.970152916937357e-05, "loss": 7.0919, "step": 3330 }, { "epoch": 0.07800047874545338, "grad_norm": 4.0, "learning_rate": 4.969860889587985e-05, "loss": 7.0563, "step": 3340 }, { "epoch": 0.07823401311295473, "grad_norm": 4.1875, "learning_rate": 4.969567449233321e-05, "loss": 7.0321, "step": 3350 }, { "epoch": 0.07846754748045609, "grad_norm": 4.0, "learning_rate": 4.9692725960412435e-05, "loss": 7.1017, "step": 3360 }, { "epoch": 0.07870108184795745, "grad_norm": 4.6875, "learning_rate": 4.968976330180437e-05, "loss": 7.0427, "step": 3370 }, { "epoch": 0.0789346162154588, "grad_norm": 4.21875, "learning_rate": 4.9686786518203944e-05, "loss": 7.1385, "step": 3380 }, { "epoch": 0.07916815058296016, "grad_norm": 4.15625, "learning_rate": 4.9683795611314174e-05, "loss": 7.0892, "step": 3390 }, { "epoch": 0.07940168495046153, "grad_norm": 5.125, "learning_rate": 4.968079058284616e-05, "loss": 7.107, "step": 3400 }, { "epoch": 0.07963521931796289, "grad_norm": 4.65625, "learning_rate": 4.9677771434519074e-05, "loss": 7.0438, "step": 3410 }, { "epoch": 0.07986875368546424, "grad_norm": 4.125, "learning_rate": 4.9674738168060164e-05, "loss": 7.0574, "step": 3420 }, { "epoch": 0.0801022880529656, "grad_norm": 4.125, "learning_rate": 4.967169078520476e-05, "loss": 7.073, "step": 3430 }, { "epoch": 0.08033582242046695, "grad_norm": 4.4375, "learning_rate": 4.966862928769628e-05, "loss": 7.0692, "step": 3440 }, { "epoch": 0.08056935678796831, "grad_norm": 4.09375, "learning_rate": 4.966555367728619e-05, "loss": 7.0781, "step": 3450 }, { "epoch": 0.08080289115546967, "grad_norm": 3.671875, "learning_rate": 4.9662463955734054e-05, "loss": 7.0525, "step": 3460 }, { "epoch": 0.08103642552297102, "grad_norm": 3.9375, "learning_rate": 4.9659360124807484e-05, "loss": 7.078, "step": 3470 }, { "epoch": 0.08126995989047238, "grad_norm": 3.515625, "learning_rate": 4.9656242186282185e-05, "loss": 7.0146, "step": 3480 }, { "epoch": 0.08150349425797374, "grad_norm": 3.90625, "learning_rate": 4.965311014194195e-05, "loss": 7.0426, "step": 3490 }, { "epoch": 0.08173702862547509, "grad_norm": 4.1875, "learning_rate": 4.964996399357858e-05, "loss": 7.0132, "step": 3500 }, { "epoch": 0.08173702862547509, "eval_loss": 7.075212001800537, "eval_runtime": 78.8693, "eval_samples_per_second": 12.679, "eval_steps_per_second": 12.679, "step": 3500 }, { "epoch": 0.08197056299297645, "grad_norm": 4.21875, "learning_rate": 4.964680374299201e-05, "loss": 7.0251, "step": 3510 }, { "epoch": 0.08220409736047782, "grad_norm": 3.734375, "learning_rate": 4.964362939199021e-05, "loss": 7.0672, "step": 3520 }, { "epoch": 0.08243763172797917, "grad_norm": 3.984375, "learning_rate": 4.964044094238922e-05, "loss": 7.0456, "step": 3530 }, { "epoch": 0.08267116609548053, "grad_norm": 3.890625, "learning_rate": 4.9637238396013155e-05, "loss": 7.025, "step": 3540 }, { "epoch": 0.08290470046298189, "grad_norm": 3.671875, "learning_rate": 4.9634021754694196e-05, "loss": 7.1046, "step": 3550 }, { "epoch": 0.08313823483048324, "grad_norm": 4.5625, "learning_rate": 4.9630791020272556e-05, "loss": 7.0177, "step": 3560 }, { "epoch": 0.0833717691979846, "grad_norm": 5.34375, "learning_rate": 4.9627546194596554e-05, "loss": 7.0004, "step": 3570 }, { "epoch": 0.08360530356548596, "grad_norm": 4.21875, "learning_rate": 4.962428727952255e-05, "loss": 7.0281, "step": 3580 }, { "epoch": 0.08383883793298731, "grad_norm": 4.4375, "learning_rate": 4.962101427691497e-05, "loss": 7.0728, "step": 3590 }, { "epoch": 0.08407237230048867, "grad_norm": 5.21875, "learning_rate": 4.9617727188646284e-05, "loss": 7.0965, "step": 3600 }, { "epoch": 0.08430590666799002, "grad_norm": 3.609375, "learning_rate": 4.961442601659704e-05, "loss": 7.0718, "step": 3610 }, { "epoch": 0.08453944103549138, "grad_norm": 3.921875, "learning_rate": 4.9611110762655846e-05, "loss": 7.0539, "step": 3620 }, { "epoch": 0.08477297540299274, "grad_norm": 4.78125, "learning_rate": 4.960778142871933e-05, "loss": 7.0406, "step": 3630 }, { "epoch": 0.08500650977049411, "grad_norm": 4.59375, "learning_rate": 4.960443801669222e-05, "loss": 7.0089, "step": 3640 }, { "epoch": 0.08524004413799546, "grad_norm": 4.0625, "learning_rate": 4.9601080528487275e-05, "loss": 7.0943, "step": 3650 }, { "epoch": 0.08547357850549682, "grad_norm": 4.65625, "learning_rate": 4.959770896602532e-05, "loss": 7.0052, "step": 3660 }, { "epoch": 0.08570711287299818, "grad_norm": 3.8125, "learning_rate": 4.9594323331235205e-05, "loss": 7.0302, "step": 3670 }, { "epoch": 0.08594064724049953, "grad_norm": 4.5625, "learning_rate": 4.9590923626053865e-05, "loss": 7.0158, "step": 3680 }, { "epoch": 0.08617418160800089, "grad_norm": 5.59375, "learning_rate": 4.9587509852426254e-05, "loss": 7.0371, "step": 3690 }, { "epoch": 0.08640771597550224, "grad_norm": 4.125, "learning_rate": 4.95840820123054e-05, "loss": 7.0181, "step": 3700 }, { "epoch": 0.0866412503430036, "grad_norm": 4.40625, "learning_rate": 4.9580640107652365e-05, "loss": 7.0225, "step": 3710 }, { "epoch": 0.08687478471050496, "grad_norm": 3.671875, "learning_rate": 4.957718414043626e-05, "loss": 7.067, "step": 3720 }, { "epoch": 0.08710831907800631, "grad_norm": 5.125, "learning_rate": 4.9573714112634234e-05, "loss": 7.0351, "step": 3730 }, { "epoch": 0.08734185344550767, "grad_norm": 4.0, "learning_rate": 4.9570230026231496e-05, "loss": 7.0715, "step": 3740 }, { "epoch": 0.08757538781300903, "grad_norm": 4.125, "learning_rate": 4.9566731883221284e-05, "loss": 7.0528, "step": 3750 }, { "epoch": 0.0878089221805104, "grad_norm": 3.9375, "learning_rate": 4.9563219685604885e-05, "loss": 7.0505, "step": 3760 }, { "epoch": 0.08804245654801175, "grad_norm": 4.25, "learning_rate": 4.955969343539162e-05, "loss": 7.0277, "step": 3770 }, { "epoch": 0.08827599091551311, "grad_norm": 4.625, "learning_rate": 4.955615313459886e-05, "loss": 7.0137, "step": 3780 }, { "epoch": 0.08850952528301446, "grad_norm": 4.46875, "learning_rate": 4.9552598785252e-05, "loss": 7.0063, "step": 3790 }, { "epoch": 0.08874305965051582, "grad_norm": 4.34375, "learning_rate": 4.954903038938449e-05, "loss": 7.0932, "step": 3800 }, { "epoch": 0.08897659401801718, "grad_norm": 5.0, "learning_rate": 4.95454479490378e-05, "loss": 7.143, "step": 3810 }, { "epoch": 0.08921012838551853, "grad_norm": 3.984375, "learning_rate": 4.954185146626145e-05, "loss": 7.0734, "step": 3820 }, { "epoch": 0.08944366275301989, "grad_norm": 4.625, "learning_rate": 4.953824094311297e-05, "loss": 7.119, "step": 3830 }, { "epoch": 0.08967719712052125, "grad_norm": 3.921875, "learning_rate": 4.953461638165794e-05, "loss": 7.0584, "step": 3840 }, { "epoch": 0.0899107314880226, "grad_norm": 4.21875, "learning_rate": 4.953097778396997e-05, "loss": 7.0843, "step": 3850 }, { "epoch": 0.09014426585552396, "grad_norm": 4.15625, "learning_rate": 4.952732515213071e-05, "loss": 7.069, "step": 3860 }, { "epoch": 0.09037780022302531, "grad_norm": 4.25, "learning_rate": 4.952365848822982e-05, "loss": 7.0757, "step": 3870 }, { "epoch": 0.09061133459052668, "grad_norm": 3.75, "learning_rate": 4.9519977794364985e-05, "loss": 7.0525, "step": 3880 }, { "epoch": 0.09084486895802804, "grad_norm": 4.0, "learning_rate": 4.951628307264194e-05, "loss": 7.1011, "step": 3890 }, { "epoch": 0.0910784033255294, "grad_norm": 4.90625, "learning_rate": 4.9512574325174424e-05, "loss": 7.102, "step": 3900 }, { "epoch": 0.09131193769303075, "grad_norm": 4.6875, "learning_rate": 4.9508851554084226e-05, "loss": 7.0698, "step": 3910 }, { "epoch": 0.09154547206053211, "grad_norm": 3.84375, "learning_rate": 4.950511476150111e-05, "loss": 7.0297, "step": 3920 }, { "epoch": 0.09177900642803347, "grad_norm": 4.28125, "learning_rate": 4.9501363949562917e-05, "loss": 7.0728, "step": 3930 }, { "epoch": 0.09201254079553482, "grad_norm": 4.625, "learning_rate": 4.9497599120415473e-05, "loss": 7.1241, "step": 3940 }, { "epoch": 0.09224607516303618, "grad_norm": 3.90625, "learning_rate": 4.949382027621263e-05, "loss": 7.0797, "step": 3950 }, { "epoch": 0.09247960953053753, "grad_norm": 3.953125, "learning_rate": 4.949002741911627e-05, "loss": 7.0869, "step": 3960 }, { "epoch": 0.09271314389803889, "grad_norm": 5.125, "learning_rate": 4.948622055129628e-05, "loss": 7.055, "step": 3970 }, { "epoch": 0.09294667826554025, "grad_norm": 4.03125, "learning_rate": 4.948239967493057e-05, "loss": 7.015, "step": 3980 }, { "epoch": 0.0931802126330416, "grad_norm": 5.03125, "learning_rate": 4.947856479220505e-05, "loss": 7.0287, "step": 3990 }, { "epoch": 0.09341374700054297, "grad_norm": 4.28125, "learning_rate": 4.947471590531366e-05, "loss": 7.0804, "step": 4000 }, { "epoch": 0.09341374700054297, "eval_loss": 7.08168363571167, "eval_runtime": 78.5103, "eval_samples_per_second": 12.737, "eval_steps_per_second": 12.737, "step": 4000 }, { "epoch": 0.09364728136804433, "grad_norm": 4.8125, "learning_rate": 4.9470853016458344e-05, "loss": 7.0494, "step": 4010 }, { "epoch": 0.09388081573554569, "grad_norm": 4.875, "learning_rate": 4.946697612784906e-05, "loss": 7.0482, "step": 4020 }, { "epoch": 0.09411435010304704, "grad_norm": 4.09375, "learning_rate": 4.946308524170377e-05, "loss": 7.0537, "step": 4030 }, { "epoch": 0.0943478844705484, "grad_norm": 3.84375, "learning_rate": 4.945918036024843e-05, "loss": 7.0918, "step": 4040 }, { "epoch": 0.09458141883804976, "grad_norm": 4.875, "learning_rate": 4.945526148571705e-05, "loss": 7.0864, "step": 4050 }, { "epoch": 0.09481495320555111, "grad_norm": 4.46875, "learning_rate": 4.94513286203516e-05, "loss": 7.0878, "step": 4060 }, { "epoch": 0.09504848757305247, "grad_norm": 4.375, "learning_rate": 4.944738176640207e-05, "loss": 6.9977, "step": 4070 }, { "epoch": 0.09528202194055382, "grad_norm": 5.71875, "learning_rate": 4.944342092612645e-05, "loss": 7.052, "step": 4080 }, { "epoch": 0.09551555630805518, "grad_norm": 4.8125, "learning_rate": 4.943944610179073e-05, "loss": 7.0791, "step": 4090 }, { "epoch": 0.09574909067555654, "grad_norm": 3.96875, "learning_rate": 4.943545729566892e-05, "loss": 7.0166, "step": 4100 }, { "epoch": 0.09598262504305789, "grad_norm": 4.59375, "learning_rate": 4.9431454510042987e-05, "loss": 7.0023, "step": 4110 }, { "epoch": 0.09621615941055926, "grad_norm": 4.28125, "learning_rate": 4.942743774720294e-05, "loss": 7.0179, "step": 4120 }, { "epoch": 0.09644969377806062, "grad_norm": 3.796875, "learning_rate": 4.942340700944676e-05, "loss": 7.0121, "step": 4130 }, { "epoch": 0.09668322814556198, "grad_norm": 4.75, "learning_rate": 4.941936229908043e-05, "loss": 7.0799, "step": 4140 }, { "epoch": 0.09691676251306333, "grad_norm": 4.34375, "learning_rate": 4.941530361841792e-05, "loss": 7.069, "step": 4150 }, { "epoch": 0.09715029688056469, "grad_norm": 4.28125, "learning_rate": 4.94112309697812e-05, "loss": 7.0339, "step": 4160 }, { "epoch": 0.09738383124806604, "grad_norm": 3.546875, "learning_rate": 4.940714435550023e-05, "loss": 7.0736, "step": 4170 }, { "epoch": 0.0976173656155674, "grad_norm": 4.5, "learning_rate": 4.940304377791297e-05, "loss": 7.0532, "step": 4180 }, { "epoch": 0.09785089998306876, "grad_norm": 4.21875, "learning_rate": 4.939892923936534e-05, "loss": 7.1232, "step": 4190 }, { "epoch": 0.09808443435057011, "grad_norm": 4.09375, "learning_rate": 4.9394800742211275e-05, "loss": 7.1064, "step": 4200 }, { "epoch": 0.09831796871807147, "grad_norm": 3.984375, "learning_rate": 4.9390658288812675e-05, "loss": 7.0348, "step": 4210 }, { "epoch": 0.09855150308557283, "grad_norm": 4.21875, "learning_rate": 4.938650188153944e-05, "loss": 7.1323, "step": 4220 }, { "epoch": 0.09878503745307418, "grad_norm": 4.625, "learning_rate": 4.9382331522769444e-05, "loss": 7.0789, "step": 4230 }, { "epoch": 0.09901857182057555, "grad_norm": 4.125, "learning_rate": 4.937814721488855e-05, "loss": 7.0558, "step": 4240 }, { "epoch": 0.09925210618807691, "grad_norm": 3.9375, "learning_rate": 4.9373948960290586e-05, "loss": 7.1751, "step": 4250 }, { "epoch": 0.09948564055557826, "grad_norm": 3.515625, "learning_rate": 4.9369736761377385e-05, "loss": 7.1019, "step": 4260 }, { "epoch": 0.09971917492307962, "grad_norm": 4.15625, "learning_rate": 4.9365510620558734e-05, "loss": 7.0216, "step": 4270 }, { "epoch": 0.09995270929058098, "grad_norm": 4.28125, "learning_rate": 4.936127054025241e-05, "loss": 7.0324, "step": 4280 }, { "epoch": 0.10018624365808233, "grad_norm": 4.03125, "learning_rate": 4.935701652288415e-05, "loss": 7.0962, "step": 4290 }, { "epoch": 0.10041977802558369, "grad_norm": 5.71875, "learning_rate": 4.935274857088769e-05, "loss": 7.0431, "step": 4300 }, { "epoch": 0.10065331239308505, "grad_norm": 4.1875, "learning_rate": 4.93484666867047e-05, "loss": 7.0442, "step": 4310 }, { "epoch": 0.1008868467605864, "grad_norm": 3.6875, "learning_rate": 4.934417087278485e-05, "loss": 7.0679, "step": 4320 }, { "epoch": 0.10112038112808776, "grad_norm": 3.875, "learning_rate": 4.933986113158578e-05, "loss": 7.0471, "step": 4330 }, { "epoch": 0.10135391549558911, "grad_norm": 4.46875, "learning_rate": 4.9335537465573086e-05, "loss": 7.0516, "step": 4340 }, { "epoch": 0.10158744986309047, "grad_norm": 4.59375, "learning_rate": 4.9331199877220324e-05, "loss": 7.0557, "step": 4350 }, { "epoch": 0.10182098423059184, "grad_norm": 4.6875, "learning_rate": 4.932684836900904e-05, "loss": 7.0501, "step": 4360 }, { "epoch": 0.1020545185980932, "grad_norm": 5.1875, "learning_rate": 4.932248294342872e-05, "loss": 7.0058, "step": 4370 }, { "epoch": 0.10228805296559455, "grad_norm": 4.90625, "learning_rate": 4.9318103602976815e-05, "loss": 7.0882, "step": 4380 }, { "epoch": 0.10252158733309591, "grad_norm": 4.0625, "learning_rate": 4.931371035015875e-05, "loss": 7.0614, "step": 4390 }, { "epoch": 0.10275512170059727, "grad_norm": 4.0, "learning_rate": 4.93093031874879e-05, "loss": 7.0914, "step": 4400 }, { "epoch": 0.10298865606809862, "grad_norm": 4.25, "learning_rate": 4.930488211748559e-05, "loss": 7.0537, "step": 4410 }, { "epoch": 0.10322219043559998, "grad_norm": 4.53125, "learning_rate": 4.930044714268112e-05, "loss": 7.0915, "step": 4420 }, { "epoch": 0.10345572480310133, "grad_norm": 4.09375, "learning_rate": 4.9295998265611736e-05, "loss": 7.0614, "step": 4430 }, { "epoch": 0.10368925917060269, "grad_norm": 3.609375, "learning_rate": 4.9291535488822636e-05, "loss": 7.0962, "step": 4440 }, { "epoch": 0.10392279353810405, "grad_norm": 4.15625, "learning_rate": 4.928705881486696e-05, "loss": 7.0788, "step": 4450 }, { "epoch": 0.1041563279056054, "grad_norm": 4.46875, "learning_rate": 4.9282568246305825e-05, "loss": 7.1076, "step": 4460 }, { "epoch": 0.10438986227310676, "grad_norm": 4.28125, "learning_rate": 4.9278063785708274e-05, "loss": 7.144, "step": 4470 }, { "epoch": 0.10462339664060813, "grad_norm": 5.0, "learning_rate": 4.92735454356513e-05, "loss": 7.0507, "step": 4480 }, { "epoch": 0.10485693100810949, "grad_norm": 4.78125, "learning_rate": 4.926901319871986e-05, "loss": 7.0501, "step": 4490 }, { "epoch": 0.10509046537561084, "grad_norm": 4.3125, "learning_rate": 4.926446707750683e-05, "loss": 6.9801, "step": 4500 }, { "epoch": 0.10509046537561084, "eval_loss": 7.085182189941406, "eval_runtime": 78.7979, "eval_samples_per_second": 12.691, "eval_steps_per_second": 12.691, "step": 4500 }, { "epoch": 0.1053239997431122, "grad_norm": 3.609375, "learning_rate": 4.925990707461305e-05, "loss": 7.0803, "step": 4510 }, { "epoch": 0.10555753411061355, "grad_norm": 4.0, "learning_rate": 4.925533319264729e-05, "loss": 7.0649, "step": 4520 }, { "epoch": 0.10579106847811491, "grad_norm": 4.4375, "learning_rate": 4.9250745434226266e-05, "loss": 7.0816, "step": 4530 }, { "epoch": 0.10602460284561627, "grad_norm": 4.5, "learning_rate": 4.924614380197462e-05, "loss": 7.0522, "step": 4540 }, { "epoch": 0.10625813721311762, "grad_norm": 4.9375, "learning_rate": 4.924152829852496e-05, "loss": 7.0885, "step": 4550 }, { "epoch": 0.10649167158061898, "grad_norm": 4.8125, "learning_rate": 4.9236898926517805e-05, "loss": 7.0815, "step": 4560 }, { "epoch": 0.10672520594812034, "grad_norm": 5.09375, "learning_rate": 4.9232255688601604e-05, "loss": 7.131, "step": 4570 }, { "epoch": 0.10695874031562169, "grad_norm": 4.5625, "learning_rate": 4.922759858743277e-05, "loss": 7.039, "step": 4580 }, { "epoch": 0.10719227468312305, "grad_norm": 4.46875, "learning_rate": 4.922292762567561e-05, "loss": 7.0502, "step": 4590 }, { "epoch": 0.10742580905062442, "grad_norm": 3.96875, "learning_rate": 4.921824280600238e-05, "loss": 7.0237, "step": 4600 }, { "epoch": 0.10765934341812577, "grad_norm": 4.53125, "learning_rate": 4.9213544131093256e-05, "loss": 7.0475, "step": 4610 }, { "epoch": 0.10789287778562713, "grad_norm": 4.6875, "learning_rate": 4.920883160363636e-05, "loss": 7.0177, "step": 4620 }, { "epoch": 0.10812641215312849, "grad_norm": 4.375, "learning_rate": 4.9204105226327724e-05, "loss": 7.0588, "step": 4630 }, { "epoch": 0.10835994652062984, "grad_norm": 4.375, "learning_rate": 4.919936500187129e-05, "loss": 7.0573, "step": 4640 }, { "epoch": 0.1085934808881312, "grad_norm": 4.46875, "learning_rate": 4.919461093297895e-05, "loss": 7.1454, "step": 4650 }, { "epoch": 0.10882701525563256, "grad_norm": 3.765625, "learning_rate": 4.91898430223705e-05, "loss": 7.0562, "step": 4660 }, { "epoch": 0.10906054962313391, "grad_norm": 4.875, "learning_rate": 4.918506127277365e-05, "loss": 7.0354, "step": 4670 }, { "epoch": 0.10929408399063527, "grad_norm": 4.40625, "learning_rate": 4.9180265686924033e-05, "loss": 7.0627, "step": 4680 }, { "epoch": 0.10952761835813662, "grad_norm": 3.953125, "learning_rate": 4.917545626756521e-05, "loss": 7.0929, "step": 4690 }, { "epoch": 0.10976115272563798, "grad_norm": 4.40625, "learning_rate": 4.917063301744865e-05, "loss": 7.0966, "step": 4700 }, { "epoch": 0.10999468709313934, "grad_norm": 4.75, "learning_rate": 4.916579593933372e-05, "loss": 7.0789, "step": 4710 }, { "epoch": 0.11022822146064071, "grad_norm": 4.84375, "learning_rate": 4.91609450359877e-05, "loss": 7.0609, "step": 4720 }, { "epoch": 0.11046175582814206, "grad_norm": 4.4375, "learning_rate": 4.915608031018582e-05, "loss": 7.0624, "step": 4730 }, { "epoch": 0.11069529019564342, "grad_norm": 4.15625, "learning_rate": 4.9151201764711144e-05, "loss": 7.0443, "step": 4740 }, { "epoch": 0.11092882456314478, "grad_norm": 4.65625, "learning_rate": 4.914630940235471e-05, "loss": 7.177, "step": 4750 }, { "epoch": 0.11116235893064613, "grad_norm": 3.859375, "learning_rate": 4.914140322591543e-05, "loss": 7.0434, "step": 4760 }, { "epoch": 0.11139589329814749, "grad_norm": 3.859375, "learning_rate": 4.913648323820012e-05, "loss": 7.0089, "step": 4770 }, { "epoch": 0.11162942766564884, "grad_norm": 4.75, "learning_rate": 4.9131549442023496e-05, "loss": 7.0366, "step": 4780 }, { "epoch": 0.1118629620331502, "grad_norm": 4.28125, "learning_rate": 4.912660184020819e-05, "loss": 7.0612, "step": 4790 }, { "epoch": 0.11209649640065156, "grad_norm": 4.21875, "learning_rate": 4.9121640435584704e-05, "loss": 7.0832, "step": 4800 }, { "epoch": 0.11233003076815291, "grad_norm": 4.5625, "learning_rate": 4.9116665230991476e-05, "loss": 7.107, "step": 4810 }, { "epoch": 0.11256356513565427, "grad_norm": 5.09375, "learning_rate": 4.91116762292748e-05, "loss": 7.0373, "step": 4820 }, { "epoch": 0.11279709950315564, "grad_norm": 4.5625, "learning_rate": 4.9106673433288884e-05, "loss": 7.0791, "step": 4830 }, { "epoch": 0.113030633870657, "grad_norm": 4.09375, "learning_rate": 4.910165684589583e-05, "loss": 7.0617, "step": 4840 }, { "epoch": 0.11326416823815835, "grad_norm": 5.0625, "learning_rate": 4.909662646996561e-05, "loss": 7.0656, "step": 4850 }, { "epoch": 0.11349770260565971, "grad_norm": 4.1875, "learning_rate": 4.909158230837611e-05, "loss": 7.0524, "step": 4860 }, { "epoch": 0.11373123697316107, "grad_norm": 3.703125, "learning_rate": 4.908652436401308e-05, "loss": 7.0931, "step": 4870 }, { "epoch": 0.11396477134066242, "grad_norm": 5.25, "learning_rate": 4.908145263977017e-05, "loss": 7.0643, "step": 4880 }, { "epoch": 0.11419830570816378, "grad_norm": 4.0625, "learning_rate": 4.9076367138548915e-05, "loss": 7.0286, "step": 4890 }, { "epoch": 0.11443184007566513, "grad_norm": 3.734375, "learning_rate": 4.907126786325871e-05, "loss": 7.0213, "step": 4900 }, { "epoch": 0.11466537444316649, "grad_norm": 5.09375, "learning_rate": 4.9066154816816865e-05, "loss": 7.0505, "step": 4910 }, { "epoch": 0.11489890881066785, "grad_norm": 4.5625, "learning_rate": 4.906102800214854e-05, "loss": 7.0907, "step": 4920 }, { "epoch": 0.1151324431781692, "grad_norm": 4.125, "learning_rate": 4.9055887422186785e-05, "loss": 7.0827, "step": 4930 }, { "epoch": 0.11536597754567056, "grad_norm": 4.96875, "learning_rate": 4.9050733079872515e-05, "loss": 7.1519, "step": 4940 }, { "epoch": 0.11559951191317193, "grad_norm": 4.0625, "learning_rate": 4.9045564978154533e-05, "loss": 7.0826, "step": 4950 }, { "epoch": 0.11583304628067329, "grad_norm": 4.40625, "learning_rate": 4.9040383119989494e-05, "loss": 7.0726, "step": 4960 }, { "epoch": 0.11606658064817464, "grad_norm": 4.6875, "learning_rate": 4.903518750834195e-05, "loss": 7.0997, "step": 4970 }, { "epoch": 0.116300115015676, "grad_norm": 3.703125, "learning_rate": 4.902997814618428e-05, "loss": 7.0832, "step": 4980 }, { "epoch": 0.11653364938317735, "grad_norm": 4.0625, "learning_rate": 4.9024755036496795e-05, "loss": 7.1083, "step": 4990 }, { "epoch": 0.11676718375067871, "grad_norm": 3.6875, "learning_rate": 4.9019518182267595e-05, "loss": 7.0752, "step": 5000 }, { "epoch": 0.11676718375067871, "eval_loss": 7.0906476974487305, "eval_runtime": 78.608, "eval_samples_per_second": 12.721, "eval_steps_per_second": 12.721, "step": 5000 }, { "epoch": 0.11700071811818007, "grad_norm": 4.15625, "learning_rate": 4.9014267586492706e-05, "loss": 7.1567, "step": 5010 }, { "epoch": 0.11723425248568142, "grad_norm": 3.78125, "learning_rate": 4.900900325217597e-05, "loss": 7.0572, "step": 5020 }, { "epoch": 0.11746778685318278, "grad_norm": 3.90625, "learning_rate": 4.900372518232912e-05, "loss": 7.1001, "step": 5030 }, { "epoch": 0.11770132122068414, "grad_norm": 4.1875, "learning_rate": 4.899843337997173e-05, "loss": 7.0591, "step": 5040 }, { "epoch": 0.11793485558818549, "grad_norm": 3.921875, "learning_rate": 4.8993127848131235e-05, "loss": 7.0971, "step": 5050 }, { "epoch": 0.11816838995568685, "grad_norm": 4.84375, "learning_rate": 4.8987808589842933e-05, "loss": 7.0689, "step": 5060 }, { "epoch": 0.11840192432318822, "grad_norm": 3.375, "learning_rate": 4.8982475608149956e-05, "loss": 7.0375, "step": 5070 }, { "epoch": 0.11863545869068957, "grad_norm": 5.25, "learning_rate": 4.89771289061033e-05, "loss": 7.0809, "step": 5080 }, { "epoch": 0.11886899305819093, "grad_norm": 4.5625, "learning_rate": 4.897176848676182e-05, "loss": 7.1192, "step": 5090 }, { "epoch": 0.11910252742569229, "grad_norm": 4.84375, "learning_rate": 4.8966394353192196e-05, "loss": 7.0553, "step": 5100 }, { "epoch": 0.11933606179319364, "grad_norm": 5.03125, "learning_rate": 4.896100650846896e-05, "loss": 7.0773, "step": 5110 }, { "epoch": 0.119569596160695, "grad_norm": 4.34375, "learning_rate": 4.895560495567452e-05, "loss": 7.1004, "step": 5120 }, { "epoch": 0.11980313052819636, "grad_norm": 4.46875, "learning_rate": 4.895018969789907e-05, "loss": 7.1123, "step": 5130 }, { "epoch": 0.12003666489569771, "grad_norm": 4.46875, "learning_rate": 4.894476073824068e-05, "loss": 7.0471, "step": 5140 }, { "epoch": 0.12027019926319907, "grad_norm": 4.34375, "learning_rate": 4.893931807980527e-05, "loss": 7.1011, "step": 5150 }, { "epoch": 0.12050373363070042, "grad_norm": 3.953125, "learning_rate": 4.893386172570657e-05, "loss": 7.0315, "step": 5160 }, { "epoch": 0.12073726799820178, "grad_norm": 5.21875, "learning_rate": 4.892839167906615e-05, "loss": 7.1052, "step": 5170 }, { "epoch": 0.12097080236570314, "grad_norm": 4.21875, "learning_rate": 4.8922907943013426e-05, "loss": 7.0887, "step": 5180 }, { "epoch": 0.1212043367332045, "grad_norm": 3.3125, "learning_rate": 4.8917410520685635e-05, "loss": 7.0389, "step": 5190 }, { "epoch": 0.12143787110070586, "grad_norm": 4.75, "learning_rate": 4.891189941522785e-05, "loss": 7.0461, "step": 5200 }, { "epoch": 0.12167140546820722, "grad_norm": 4.8125, "learning_rate": 4.890637462979296e-05, "loss": 7.1074, "step": 5210 }, { "epoch": 0.12190493983570858, "grad_norm": 4.3125, "learning_rate": 4.890083616754171e-05, "loss": 7.1002, "step": 5220 }, { "epoch": 0.12213847420320993, "grad_norm": 4.25, "learning_rate": 4.889528403164263e-05, "loss": 7.05, "step": 5230 }, { "epoch": 0.12237200857071129, "grad_norm": 5.96875, "learning_rate": 4.888971822527211e-05, "loss": 7.0622, "step": 5240 }, { "epoch": 0.12260554293821264, "grad_norm": 3.875, "learning_rate": 4.8884138751614326e-05, "loss": 7.0649, "step": 5250 }, { "epoch": 0.122839077305714, "grad_norm": 5.1875, "learning_rate": 4.88785456138613e-05, "loss": 7.1001, "step": 5260 }, { "epoch": 0.12307261167321536, "grad_norm": 4.46875, "learning_rate": 4.8872938815212855e-05, "loss": 7.0842, "step": 5270 }, { "epoch": 0.12330614604071671, "grad_norm": 3.875, "learning_rate": 4.8867318358876646e-05, "loss": 7.0956, "step": 5280 }, { "epoch": 0.12353968040821807, "grad_norm": 5.5, "learning_rate": 4.8861684248068116e-05, "loss": 7.0289, "step": 5290 }, { "epoch": 0.12377321477571943, "grad_norm": 4.15625, "learning_rate": 4.885603648601055e-05, "loss": 7.0401, "step": 5300 }, { "epoch": 0.1240067491432208, "grad_norm": 4.4375, "learning_rate": 4.885037507593502e-05, "loss": 7.0443, "step": 5310 }, { "epoch": 0.12424028351072215, "grad_norm": 3.859375, "learning_rate": 4.884470002108042e-05, "loss": 7.0881, "step": 5320 }, { "epoch": 0.12447381787822351, "grad_norm": 3.921875, "learning_rate": 4.883901132469343e-05, "loss": 7.1317, "step": 5330 }, { "epoch": 0.12470735224572486, "grad_norm": 3.8125, "learning_rate": 4.883330899002857e-05, "loss": 7.1712, "step": 5340 }, { "epoch": 0.12494088661322622, "grad_norm": 4.53125, "learning_rate": 4.882759302034812e-05, "loss": 7.0289, "step": 5350 }, { "epoch": 0.12517442098072756, "grad_norm": 5.0, "learning_rate": 4.882186341892219e-05, "loss": 7.0946, "step": 5360 }, { "epoch": 0.12540795534822893, "grad_norm": 5.90625, "learning_rate": 4.881612018902868e-05, "loss": 7.0633, "step": 5370 }, { "epoch": 0.1256414897157303, "grad_norm": 3.75, "learning_rate": 4.881036333395329e-05, "loss": 7.0941, "step": 5380 }, { "epoch": 0.12587502408323165, "grad_norm": 4.34375, "learning_rate": 4.88045928569895e-05, "loss": 7.0946, "step": 5390 }, { "epoch": 0.12610855845073302, "grad_norm": 3.828125, "learning_rate": 4.879880876143861e-05, "loss": 7.0825, "step": 5400 }, { "epoch": 0.12634209281823436, "grad_norm": 4.46875, "learning_rate": 4.8793011050609685e-05, "loss": 7.0831, "step": 5410 }, { "epoch": 0.12657562718573573, "grad_norm": 4.25, "learning_rate": 4.87871997278196e-05, "loss": 7.0755, "step": 5420 }, { "epoch": 0.12680916155323707, "grad_norm": 4.375, "learning_rate": 4.8781374796393004e-05, "loss": 7.056, "step": 5430 }, { "epoch": 0.12704269592073844, "grad_norm": 4.625, "learning_rate": 4.877553625966233e-05, "loss": 7.0759, "step": 5440 }, { "epoch": 0.12727623028823978, "grad_norm": 4.28125, "learning_rate": 4.876968412096781e-05, "loss": 7.052, "step": 5450 }, { "epoch": 0.12750976465574115, "grad_norm": 3.703125, "learning_rate": 4.876381838365743e-05, "loss": 7.0699, "step": 5460 }, { "epoch": 0.1277432990232425, "grad_norm": 4.84375, "learning_rate": 4.875793905108699e-05, "loss": 7.0593, "step": 5470 }, { "epoch": 0.12797683339074387, "grad_norm": 4.40625, "learning_rate": 4.875204612662005e-05, "loss": 7.0691, "step": 5480 }, { "epoch": 0.12821036775824524, "grad_norm": 4.375, "learning_rate": 4.874613961362794e-05, "loss": 7.1474, "step": 5490 }, { "epoch": 0.12844390212574658, "grad_norm": 4.25, "learning_rate": 4.8740219515489774e-05, "loss": 7.1268, "step": 5500 }, { "epoch": 0.12844390212574658, "eval_loss": 7.088047027587891, "eval_runtime": 78.7456, "eval_samples_per_second": 12.699, "eval_steps_per_second": 12.699, "step": 5500 }, { "epoch": 0.12867743649324795, "grad_norm": 3.84375, "learning_rate": 4.873428583559243e-05, "loss": 7.0895, "step": 5510 }, { "epoch": 0.1289109708607493, "grad_norm": 5.5625, "learning_rate": 4.8728338577330565e-05, "loss": 7.0532, "step": 5520 }, { "epoch": 0.12914450522825066, "grad_norm": 4.40625, "learning_rate": 4.872237774410661e-05, "loss": 7.0836, "step": 5530 }, { "epoch": 0.129378039595752, "grad_norm": 5.8125, "learning_rate": 4.871640333933074e-05, "loss": 7.0601, "step": 5540 }, { "epoch": 0.12961157396325337, "grad_norm": 3.984375, "learning_rate": 4.8710415366420905e-05, "loss": 7.1493, "step": 5550 }, { "epoch": 0.12984510833075472, "grad_norm": 3.890625, "learning_rate": 4.870441382880283e-05, "loss": 7.0621, "step": 5560 }, { "epoch": 0.1300786426982561, "grad_norm": 3.78125, "learning_rate": 4.8698398729909984e-05, "loss": 7.1239, "step": 5570 }, { "epoch": 0.13031217706575743, "grad_norm": 4.53125, "learning_rate": 4.86923700731836e-05, "loss": 7.0748, "step": 5580 }, { "epoch": 0.1305457114332588, "grad_norm": 4.4375, "learning_rate": 4.868632786207267e-05, "loss": 7.1248, "step": 5590 }, { "epoch": 0.13077924580076014, "grad_norm": 4.0625, "learning_rate": 4.868027210003393e-05, "loss": 7.0127, "step": 5600 }, { "epoch": 0.1310127801682615, "grad_norm": 4.875, "learning_rate": 4.867420279053189e-05, "loss": 7.0839, "step": 5610 }, { "epoch": 0.13124631453576288, "grad_norm": 3.28125, "learning_rate": 4.866811993703878e-05, "loss": 7.0373, "step": 5620 }, { "epoch": 0.13147984890326422, "grad_norm": 5.09375, "learning_rate": 4.8662023543034604e-05, "loss": 7.0295, "step": 5630 }, { "epoch": 0.1317133832707656, "grad_norm": 4.6875, "learning_rate": 4.865591361200711e-05, "loss": 6.968, "step": 5640 }, { "epoch": 0.13194691763826694, "grad_norm": 4.5625, "learning_rate": 4.864979014745177e-05, "loss": 7.0321, "step": 5650 }, { "epoch": 0.1321804520057683, "grad_norm": 4.125, "learning_rate": 4.864365315287182e-05, "loss": 7.1014, "step": 5660 }, { "epoch": 0.13241398637326965, "grad_norm": 3.71875, "learning_rate": 4.863750263177823e-05, "loss": 7.0888, "step": 5670 }, { "epoch": 0.13264752074077102, "grad_norm": 4.53125, "learning_rate": 4.8631338587689714e-05, "loss": 7.0716, "step": 5680 }, { "epoch": 0.13288105510827236, "grad_norm": 4.34375, "learning_rate": 4.8625161024132704e-05, "loss": 7.0655, "step": 5690 }, { "epoch": 0.13311458947577373, "grad_norm": 3.984375, "learning_rate": 4.861896994464139e-05, "loss": 7.0601, "step": 5700 }, { "epoch": 0.13334812384327507, "grad_norm": 4.53125, "learning_rate": 4.861276535275768e-05, "loss": 7.0524, "step": 5710 }, { "epoch": 0.13358165821077644, "grad_norm": 4.125, "learning_rate": 4.860654725203121e-05, "loss": 6.9858, "step": 5720 }, { "epoch": 0.13381519257827781, "grad_norm": 4.53125, "learning_rate": 4.860031564601935e-05, "loss": 7.0139, "step": 5730 }, { "epoch": 0.13404872694577916, "grad_norm": 4.0625, "learning_rate": 4.85940705382872e-05, "loss": 7.1304, "step": 5740 }, { "epoch": 0.13428226131328053, "grad_norm": 4.1875, "learning_rate": 4.8587811932407586e-05, "loss": 7.0669, "step": 5750 }, { "epoch": 0.13451579568078187, "grad_norm": 3.84375, "learning_rate": 4.858153983196104e-05, "loss": 7.0625, "step": 5760 }, { "epoch": 0.13474933004828324, "grad_norm": 3.78125, "learning_rate": 4.857525424053584e-05, "loss": 7.0323, "step": 5770 }, { "epoch": 0.13498286441578458, "grad_norm": 4.03125, "learning_rate": 4.856895516172795e-05, "loss": 7.1045, "step": 5780 }, { "epoch": 0.13521639878328595, "grad_norm": 4.375, "learning_rate": 4.856264259914108e-05, "loss": 6.9669, "step": 5790 }, { "epoch": 0.1354499331507873, "grad_norm": 3.96875, "learning_rate": 4.855631655638664e-05, "loss": 7.0586, "step": 5800 }, { "epoch": 0.13568346751828866, "grad_norm": 3.78125, "learning_rate": 4.854997703708375e-05, "loss": 7.0838, "step": 5810 }, { "epoch": 0.13591700188579, "grad_norm": 3.953125, "learning_rate": 4.8543624044859244e-05, "loss": 7.1009, "step": 5820 }, { "epoch": 0.13615053625329138, "grad_norm": 4.21875, "learning_rate": 4.853725758334766e-05, "loss": 7.0931, "step": 5830 }, { "epoch": 0.13638407062079272, "grad_norm": 4.4375, "learning_rate": 4.853087765619126e-05, "loss": 7.0946, "step": 5840 }, { "epoch": 0.1366176049882941, "grad_norm": 3.828125, "learning_rate": 4.852448426703998e-05, "loss": 7.0494, "step": 5850 }, { "epoch": 0.13685113935579546, "grad_norm": 4.6875, "learning_rate": 4.8518077419551474e-05, "loss": 7.1549, "step": 5860 }, { "epoch": 0.1370846737232968, "grad_norm": 4.28125, "learning_rate": 4.8511657117391097e-05, "loss": 7.1084, "step": 5870 }, { "epoch": 0.13731820809079817, "grad_norm": 4.25, "learning_rate": 4.85052233642319e-05, "loss": 7.1312, "step": 5880 }, { "epoch": 0.13755174245829951, "grad_norm": 3.90625, "learning_rate": 4.849877616375462e-05, "loss": 7.0811, "step": 5890 }, { "epoch": 0.13778527682580088, "grad_norm": 3.8125, "learning_rate": 4.849231551964771e-05, "loss": 7.0668, "step": 5900 }, { "epoch": 0.13801881119330223, "grad_norm": 4.53125, "learning_rate": 4.848584143560729e-05, "loss": 7.0478, "step": 5910 }, { "epoch": 0.1382523455608036, "grad_norm": 4.03125, "learning_rate": 4.847935391533717e-05, "loss": 7.075, "step": 5920 }, { "epoch": 0.13848587992830494, "grad_norm": 4.3125, "learning_rate": 4.8472852962548865e-05, "loss": 7.1603, "step": 5930 }, { "epoch": 0.1387194142958063, "grad_norm": 3.921875, "learning_rate": 4.8466338580961566e-05, "loss": 7.1455, "step": 5940 }, { "epoch": 0.13895294866330765, "grad_norm": 3.484375, "learning_rate": 4.845981077430214e-05, "loss": 7.0597, "step": 5950 }, { "epoch": 0.13918648303080902, "grad_norm": 4.375, "learning_rate": 4.8453269546305135e-05, "loss": 7.044, "step": 5960 }, { "epoch": 0.1394200173983104, "grad_norm": 4.1875, "learning_rate": 4.844671490071279e-05, "loss": 7.1113, "step": 5970 }, { "epoch": 0.13965355176581173, "grad_norm": 3.59375, "learning_rate": 4.844014684127501e-05, "loss": 7.0941, "step": 5980 }, { "epoch": 0.1398870861333131, "grad_norm": 5.0625, "learning_rate": 4.843356537174937e-05, "loss": 7.1221, "step": 5990 }, { "epoch": 0.14012062050081445, "grad_norm": 3.84375, "learning_rate": 4.842697049590114e-05, "loss": 7.0745, "step": 6000 }, { "epoch": 0.14012062050081445, "eval_loss": 7.092975616455078, "eval_runtime": 78.8214, "eval_samples_per_second": 12.687, "eval_steps_per_second": 12.687, "step": 6000 }, { "epoch": 0.14035415486831582, "grad_norm": 4.59375, "learning_rate": 4.8420362217503225e-05, "loss": 7.1082, "step": 6010 }, { "epoch": 0.14058768923581716, "grad_norm": 4.78125, "learning_rate": 4.841374054033622e-05, "loss": 7.0644, "step": 6020 }, { "epoch": 0.14082122360331853, "grad_norm": 4.34375, "learning_rate": 4.840710546818839e-05, "loss": 7.086, "step": 6030 }, { "epoch": 0.14105475797081987, "grad_norm": 4.4375, "learning_rate": 4.8400457004855634e-05, "loss": 7.1116, "step": 6040 }, { "epoch": 0.14128829233832124, "grad_norm": 3.46875, "learning_rate": 4.8393795154141555e-05, "loss": 7.0524, "step": 6050 }, { "epoch": 0.14152182670582258, "grad_norm": 3.34375, "learning_rate": 4.838711991985738e-05, "loss": 7.0273, "step": 6060 }, { "epoch": 0.14175536107332395, "grad_norm": 3.75, "learning_rate": 4.8380431305822004e-05, "loss": 7.0867, "step": 6070 }, { "epoch": 0.1419888954408253, "grad_norm": 4.25, "learning_rate": 4.837372931586198e-05, "loss": 6.9708, "step": 6080 }, { "epoch": 0.14222242980832667, "grad_norm": 4.25, "learning_rate": 4.836701395381152e-05, "loss": 7.1376, "step": 6090 }, { "epoch": 0.14245596417582804, "grad_norm": 5.1875, "learning_rate": 4.8360285223512456e-05, "loss": 7.0752, "step": 6100 }, { "epoch": 0.14268949854332938, "grad_norm": 3.484375, "learning_rate": 4.8353543128814305e-05, "loss": 7.1244, "step": 6110 }, { "epoch": 0.14292303291083075, "grad_norm": 4.03125, "learning_rate": 4.8346787673574214e-05, "loss": 7.082, "step": 6120 }, { "epoch": 0.1431565672783321, "grad_norm": 4.5, "learning_rate": 4.834001886165697e-05, "loss": 7.0629, "step": 6130 }, { "epoch": 0.14339010164583346, "grad_norm": 5.46875, "learning_rate": 4.8333236696934994e-05, "loss": 7.0409, "step": 6140 }, { "epoch": 0.1436236360133348, "grad_norm": 4.71875, "learning_rate": 4.832644118328836e-05, "loss": 7.0658, "step": 6150 }, { "epoch": 0.14385717038083617, "grad_norm": 4.0625, "learning_rate": 4.831963232460479e-05, "loss": 7.0736, "step": 6160 }, { "epoch": 0.14409070474833752, "grad_norm": 5.40625, "learning_rate": 4.831281012477962e-05, "loss": 7.115, "step": 6170 }, { "epoch": 0.1443242391158389, "grad_norm": 4.6875, "learning_rate": 4.830597458771582e-05, "loss": 7.1192, "step": 6180 }, { "epoch": 0.14455777348334023, "grad_norm": 4.625, "learning_rate": 4.829912571732399e-05, "loss": 7.0101, "step": 6190 }, { "epoch": 0.1447913078508416, "grad_norm": 4.0625, "learning_rate": 4.8292263517522376e-05, "loss": 7.0575, "step": 6200 }, { "epoch": 0.14502484221834297, "grad_norm": 4.3125, "learning_rate": 4.8285387992236825e-05, "loss": 7.0829, "step": 6210 }, { "epoch": 0.1452583765858443, "grad_norm": 6.09375, "learning_rate": 4.827849914540082e-05, "loss": 7.0797, "step": 6220 }, { "epoch": 0.14549191095334568, "grad_norm": 4.1875, "learning_rate": 4.827159698095548e-05, "loss": 7.0937, "step": 6230 }, { "epoch": 0.14572544532084702, "grad_norm": 4.0, "learning_rate": 4.8264681502849505e-05, "loss": 7.002, "step": 6240 }, { "epoch": 0.1459589796883484, "grad_norm": 4.75, "learning_rate": 4.8257752715039254e-05, "loss": 7.0699, "step": 6250 }, { "epoch": 0.14619251405584974, "grad_norm": 4.4375, "learning_rate": 4.8250810621488676e-05, "loss": 7.0562, "step": 6260 }, { "epoch": 0.1464260484233511, "grad_norm": 3.96875, "learning_rate": 4.824385522616932e-05, "loss": 7.1186, "step": 6270 }, { "epoch": 0.14665958279085245, "grad_norm": 3.703125, "learning_rate": 4.823688653306038e-05, "loss": 7.018, "step": 6280 }, { "epoch": 0.14689311715835382, "grad_norm": 4.875, "learning_rate": 4.822990454614863e-05, "loss": 7.0491, "step": 6290 }, { "epoch": 0.14712665152585516, "grad_norm": 5.625, "learning_rate": 4.822290926942846e-05, "loss": 7.0207, "step": 6300 }, { "epoch": 0.14736018589335653, "grad_norm": 4.125, "learning_rate": 4.821590070690187e-05, "loss": 7.0871, "step": 6310 }, { "epoch": 0.14759372026085787, "grad_norm": 4.59375, "learning_rate": 4.8208878862578446e-05, "loss": 7.0253, "step": 6320 }, { "epoch": 0.14782725462835924, "grad_norm": 5.0625, "learning_rate": 4.820184374047537e-05, "loss": 7.1245, "step": 6330 }, { "epoch": 0.14806078899586061, "grad_norm": 4.71875, "learning_rate": 4.819479534461745e-05, "loss": 7.0317, "step": 6340 }, { "epoch": 0.14829432336336196, "grad_norm": 4.28125, "learning_rate": 4.818773367903705e-05, "loss": 7.0956, "step": 6350 }, { "epoch": 0.14852785773086333, "grad_norm": 5.25, "learning_rate": 4.8180658747774146e-05, "loss": 7.0843, "step": 6360 }, { "epoch": 0.14876139209836467, "grad_norm": 4.46875, "learning_rate": 4.8173570554876315e-05, "loss": 7.0623, "step": 6370 }, { "epoch": 0.14899492646586604, "grad_norm": 4.9375, "learning_rate": 4.816646910439868e-05, "loss": 7.1905, "step": 6380 }, { "epoch": 0.14922846083336738, "grad_norm": 4.125, "learning_rate": 4.815935440040399e-05, "loss": 7.1055, "step": 6390 }, { "epoch": 0.14946199520086875, "grad_norm": 4.59375, "learning_rate": 4.815222644696257e-05, "loss": 7.0543, "step": 6400 }, { "epoch": 0.1496955295683701, "grad_norm": 4.96875, "learning_rate": 4.81450852481523e-05, "loss": 7.1226, "step": 6410 }, { "epoch": 0.14992906393587146, "grad_norm": 5.15625, "learning_rate": 4.813793080805866e-05, "loss": 7.0105, "step": 6420 }, { "epoch": 0.1501625983033728, "grad_norm": 3.515625, "learning_rate": 4.813076313077469e-05, "loss": 7.1476, "step": 6430 }, { "epoch": 0.15039613267087418, "grad_norm": 3.953125, "learning_rate": 4.8123582220401034e-05, "loss": 7.1623, "step": 6440 }, { "epoch": 0.15062966703837555, "grad_norm": 4.8125, "learning_rate": 4.811638808104586e-05, "loss": 7.1197, "step": 6450 }, { "epoch": 0.1508632014058769, "grad_norm": 3.71875, "learning_rate": 4.810918071682495e-05, "loss": 7.0898, "step": 6460 }, { "epoch": 0.15109673577337826, "grad_norm": 4.5625, "learning_rate": 4.8101960131861615e-05, "loss": 7.082, "step": 6470 }, { "epoch": 0.1513302701408796, "grad_norm": 4.8125, "learning_rate": 4.809472633028675e-05, "loss": 7.0892, "step": 6480 }, { "epoch": 0.15156380450838097, "grad_norm": 3.8125, "learning_rate": 4.808747931623881e-05, "loss": 7.051, "step": 6490 }, { "epoch": 0.15179733887588232, "grad_norm": 5.09375, "learning_rate": 4.8080219093863806e-05, "loss": 7.1156, "step": 6500 }, { "epoch": 0.15179733887588232, "eval_loss": 7.091143608093262, "eval_runtime": 79.1724, "eval_samples_per_second": 12.631, "eval_steps_per_second": 12.631, "step": 6500 }, { "epoch": 0.15203087324338369, "grad_norm": 5.1875, "learning_rate": 4.807294566731529e-05, "loss": 7.1146, "step": 6510 }, { "epoch": 0.15226440761088503, "grad_norm": 4.8125, "learning_rate": 4.806565904075441e-05, "loss": 7.1223, "step": 6520 }, { "epoch": 0.1524979419783864, "grad_norm": 3.328125, "learning_rate": 4.805835921834982e-05, "loss": 7.1081, "step": 6530 }, { "epoch": 0.15273147634588774, "grad_norm": 4.75, "learning_rate": 4.805104620427775e-05, "loss": 7.1767, "step": 6540 }, { "epoch": 0.1529650107133891, "grad_norm": 4.53125, "learning_rate": 4.804372000272196e-05, "loss": 7.0785, "step": 6550 }, { "epoch": 0.15319854508089048, "grad_norm": 3.609375, "learning_rate": 4.803638061787377e-05, "loss": 7.1335, "step": 6560 }, { "epoch": 0.15343207944839182, "grad_norm": 4.96875, "learning_rate": 4.802902805393203e-05, "loss": 7.0863, "step": 6570 }, { "epoch": 0.1536656138158932, "grad_norm": 4.1875, "learning_rate": 4.802166231510315e-05, "loss": 7.0804, "step": 6580 }, { "epoch": 0.15389914818339454, "grad_norm": 4.6875, "learning_rate": 4.8014283405601055e-05, "loss": 7.0969, "step": 6590 }, { "epoch": 0.1541326825508959, "grad_norm": 5.09375, "learning_rate": 4.800689132964721e-05, "loss": 7.08, "step": 6600 }, { "epoch": 0.15436621691839725, "grad_norm": 5.4375, "learning_rate": 4.799948609147061e-05, "loss": 7.0192, "step": 6610 }, { "epoch": 0.15459975128589862, "grad_norm": 3.90625, "learning_rate": 4.799206769530779e-05, "loss": 7.1187, "step": 6620 }, { "epoch": 0.15483328565339996, "grad_norm": 5.21875, "learning_rate": 4.7984636145402814e-05, "loss": 7.0585, "step": 6630 }, { "epoch": 0.15506682002090133, "grad_norm": 4.75, "learning_rate": 4.7977191446007266e-05, "loss": 7.113, "step": 6640 }, { "epoch": 0.15530035438840267, "grad_norm": 4.09375, "learning_rate": 4.796973360138024e-05, "loss": 7.0007, "step": 6650 }, { "epoch": 0.15553388875590404, "grad_norm": 4.625, "learning_rate": 4.796226261578837e-05, "loss": 7.0948, "step": 6660 }, { "epoch": 0.15576742312340539, "grad_norm": 5.15625, "learning_rate": 4.79547784935058e-05, "loss": 7.0912, "step": 6670 }, { "epoch": 0.15600095749090676, "grad_norm": 4.59375, "learning_rate": 4.794728123881419e-05, "loss": 7.2019, "step": 6680 }, { "epoch": 0.15623449185840813, "grad_norm": 3.6875, "learning_rate": 4.793977085600272e-05, "loss": 7.2123, "step": 6690 }, { "epoch": 0.15646802622590947, "grad_norm": 4.15625, "learning_rate": 4.793224734936806e-05, "loss": 7.019, "step": 6700 }, { "epoch": 0.15670156059341084, "grad_norm": 3.78125, "learning_rate": 4.79247107232144e-05, "loss": 7.074, "step": 6710 }, { "epoch": 0.15693509496091218, "grad_norm": 4.0625, "learning_rate": 4.791716098185347e-05, "loss": 7.1178, "step": 6720 }, { "epoch": 0.15716862932841355, "grad_norm": 4.0, "learning_rate": 4.7909598129604424e-05, "loss": 7.0829, "step": 6730 }, { "epoch": 0.1574021636959149, "grad_norm": 5.75, "learning_rate": 4.790202217079399e-05, "loss": 7.119, "step": 6740 }, { "epoch": 0.15763569806341626, "grad_norm": 3.625, "learning_rate": 4.789443310975637e-05, "loss": 7.1152, "step": 6750 }, { "epoch": 0.1578692324309176, "grad_norm": 4.125, "learning_rate": 4.788683095083325e-05, "loss": 7.1083, "step": 6760 }, { "epoch": 0.15810276679841898, "grad_norm": 4.15625, "learning_rate": 4.7879215698373815e-05, "loss": 7.0925, "step": 6770 }, { "epoch": 0.15833630116592032, "grad_norm": 5.03125, "learning_rate": 4.787158735673476e-05, "loss": 7.0218, "step": 6780 }, { "epoch": 0.1585698355334217, "grad_norm": 4.6875, "learning_rate": 4.786394593028023e-05, "loss": 7.0207, "step": 6790 }, { "epoch": 0.15880336990092306, "grad_norm": 4.84375, "learning_rate": 4.7856291423381904e-05, "loss": 7.0546, "step": 6800 }, { "epoch": 0.1590369042684244, "grad_norm": 4.6875, "learning_rate": 4.78486238404189e-05, "loss": 7.145, "step": 6810 }, { "epoch": 0.15927043863592577, "grad_norm": 4.15625, "learning_rate": 4.784094318577784e-05, "loss": 7.1635, "step": 6820 }, { "epoch": 0.1595039730034271, "grad_norm": 3.921875, "learning_rate": 4.783324946385282e-05, "loss": 7.0884, "step": 6830 }, { "epoch": 0.15973750737092848, "grad_norm": 5.1875, "learning_rate": 4.782554267904542e-05, "loss": 7.077, "step": 6840 }, { "epoch": 0.15997104173842983, "grad_norm": 4.0625, "learning_rate": 4.781782283576468e-05, "loss": 7.0691, "step": 6850 }, { "epoch": 0.1602045761059312, "grad_norm": 3.890625, "learning_rate": 4.781008993842711e-05, "loss": 7.1004, "step": 6860 }, { "epoch": 0.16043811047343254, "grad_norm": 4.90625, "learning_rate": 4.78023439914567e-05, "loss": 7.0659, "step": 6870 }, { "epoch": 0.1606716448409339, "grad_norm": 3.8125, "learning_rate": 4.77945849992849e-05, "loss": 7.0199, "step": 6880 }, { "epoch": 0.16090517920843525, "grad_norm": 3.484375, "learning_rate": 4.778681296635062e-05, "loss": 7.0804, "step": 6890 }, { "epoch": 0.16113871357593662, "grad_norm": 4.1875, "learning_rate": 4.777902789710022e-05, "loss": 7.0691, "step": 6900 }, { "epoch": 0.16137224794343796, "grad_norm": 3.953125, "learning_rate": 4.777122979598756e-05, "loss": 7.0471, "step": 6910 }, { "epoch": 0.16160578231093933, "grad_norm": 4.5625, "learning_rate": 4.77634186674739e-05, "loss": 7.086, "step": 6920 }, { "epoch": 0.1618393166784407, "grad_norm": 4.15625, "learning_rate": 4.7755594516028e-05, "loss": 7.0685, "step": 6930 }, { "epoch": 0.16207285104594205, "grad_norm": 4.9375, "learning_rate": 4.774775734612604e-05, "loss": 7.0444, "step": 6940 }, { "epoch": 0.16230638541344342, "grad_norm": 4.53125, "learning_rate": 4.773990716225166e-05, "loss": 7.0249, "step": 6950 }, { "epoch": 0.16253991978094476, "grad_norm": 4.09375, "learning_rate": 4.773204396889596e-05, "loss": 7.0298, "step": 6960 }, { "epoch": 0.16277345414844613, "grad_norm": 4.1875, "learning_rate": 4.772416777055743e-05, "loss": 7.0308, "step": 6970 }, { "epoch": 0.16300698851594747, "grad_norm": 4.5, "learning_rate": 4.7716278571742065e-05, "loss": 7.0229, "step": 6980 }, { "epoch": 0.16324052288344884, "grad_norm": 5.15625, "learning_rate": 4.770837637696325e-05, "loss": 7.0918, "step": 6990 }, { "epoch": 0.16347405725095018, "grad_norm": 4.5625, "learning_rate": 4.770046119074185e-05, "loss": 7.071, "step": 7000 }, { "epoch": 0.16347405725095018, "eval_loss": 7.091245651245117, "eval_runtime": 78.7578, "eval_samples_per_second": 12.697, "eval_steps_per_second": 12.697, "step": 7000 }, { "epoch": 0.16370759161845155, "grad_norm": 4.59375, "learning_rate": 4.769253301760611e-05, "loss": 7.039, "step": 7010 }, { "epoch": 0.1639411259859529, "grad_norm": 4.40625, "learning_rate": 4.7684591862091746e-05, "loss": 7.0825, "step": 7020 }, { "epoch": 0.16417466035345427, "grad_norm": 4.625, "learning_rate": 4.767663772874188e-05, "loss": 7.0961, "step": 7030 }, { "epoch": 0.16440819472095564, "grad_norm": 4.4375, "learning_rate": 4.7668670622107066e-05, "loss": 7.1633, "step": 7040 }, { "epoch": 0.16464172908845698, "grad_norm": 4.4375, "learning_rate": 4.766069054674528e-05, "loss": 7.0584, "step": 7050 }, { "epoch": 0.16487526345595835, "grad_norm": 4.03125, "learning_rate": 4.765269750722191e-05, "loss": 7.182, "step": 7060 }, { "epoch": 0.1651087978234597, "grad_norm": 5.75, "learning_rate": 4.764469150810978e-05, "loss": 7.093, "step": 7070 }, { "epoch": 0.16534233219096106, "grad_norm": 3.96875, "learning_rate": 4.7636672553989106e-05, "loss": 7.0757, "step": 7080 }, { "epoch": 0.1655758665584624, "grad_norm": 4.375, "learning_rate": 4.762864064944752e-05, "loss": 7.1379, "step": 7090 }, { "epoch": 0.16580940092596377, "grad_norm": 4.03125, "learning_rate": 4.7620595799080084e-05, "loss": 7.0697, "step": 7100 }, { "epoch": 0.16604293529346512, "grad_norm": 4.09375, "learning_rate": 4.761253800748924e-05, "loss": 7.0492, "step": 7110 }, { "epoch": 0.16627646966096649, "grad_norm": 4.3125, "learning_rate": 4.760446727928484e-05, "loss": 7.0301, "step": 7120 }, { "epoch": 0.16651000402846783, "grad_norm": 4.03125, "learning_rate": 4.759638361908414e-05, "loss": 7.0706, "step": 7130 }, { "epoch": 0.1667435383959692, "grad_norm": 4.3125, "learning_rate": 4.7588287031511805e-05, "loss": 7.1107, "step": 7140 }, { "epoch": 0.16697707276347054, "grad_norm": 4.6875, "learning_rate": 4.7580177521199884e-05, "loss": 7.0635, "step": 7150 }, { "epoch": 0.1672106071309719, "grad_norm": 4.0625, "learning_rate": 4.757205509278781e-05, "loss": 7.1043, "step": 7160 }, { "epoch": 0.16744414149847328, "grad_norm": 4.5625, "learning_rate": 4.756391975092243e-05, "loss": 7.0919, "step": 7170 }, { "epoch": 0.16767767586597462, "grad_norm": 4.53125, "learning_rate": 4.7555771500257954e-05, "loss": 7.1117, "step": 7180 }, { "epoch": 0.167911210233476, "grad_norm": 3.1875, "learning_rate": 4.7547610345456004e-05, "loss": 7.0909, "step": 7190 }, { "epoch": 0.16814474460097734, "grad_norm": 4.4375, "learning_rate": 4.753943629118556e-05, "loss": 7.1342, "step": 7200 }, { "epoch": 0.1683782789684787, "grad_norm": 4.0625, "learning_rate": 4.753124934212299e-05, "loss": 7.1149, "step": 7210 }, { "epoch": 0.16861181333598005, "grad_norm": 4.25, "learning_rate": 4.752304950295204e-05, "loss": 7.1046, "step": 7220 }, { "epoch": 0.16884534770348142, "grad_norm": 4.25, "learning_rate": 4.751483677836384e-05, "loss": 7.0758, "step": 7230 }, { "epoch": 0.16907888207098276, "grad_norm": 3.8125, "learning_rate": 4.750661117305687e-05, "loss": 7.0861, "step": 7240 }, { "epoch": 0.16931241643848413, "grad_norm": 4.53125, "learning_rate": 4.749837269173701e-05, "loss": 7.0797, "step": 7250 }, { "epoch": 0.16954595080598547, "grad_norm": 3.796875, "learning_rate": 4.7490121339117466e-05, "loss": 7.0617, "step": 7260 }, { "epoch": 0.16977948517348684, "grad_norm": 4.25, "learning_rate": 4.748185711991885e-05, "loss": 7.0637, "step": 7270 }, { "epoch": 0.17001301954098821, "grad_norm": 5.59375, "learning_rate": 4.7473580038869106e-05, "loss": 7.0878, "step": 7280 }, { "epoch": 0.17024655390848956, "grad_norm": 5.03125, "learning_rate": 4.746529010070353e-05, "loss": 7.0513, "step": 7290 }, { "epoch": 0.17048008827599093, "grad_norm": 4.09375, "learning_rate": 4.745698731016481e-05, "loss": 7.1282, "step": 7300 }, { "epoch": 0.17071362264349227, "grad_norm": 4.125, "learning_rate": 4.7448671672002966e-05, "loss": 7.0572, "step": 7310 }, { "epoch": 0.17094715701099364, "grad_norm": 3.640625, "learning_rate": 4.744034319097535e-05, "loss": 7.0331, "step": 7320 }, { "epoch": 0.17118069137849498, "grad_norm": 3.90625, "learning_rate": 4.74320018718467e-05, "loss": 7.0784, "step": 7330 }, { "epoch": 0.17141422574599635, "grad_norm": 4.09375, "learning_rate": 4.742364771938905e-05, "loss": 7.0736, "step": 7340 }, { "epoch": 0.1716477601134977, "grad_norm": 4.03125, "learning_rate": 4.741528073838183e-05, "loss": 7.023, "step": 7350 }, { "epoch": 0.17188129448099906, "grad_norm": 4.90625, "learning_rate": 4.740690093361177e-05, "loss": 7.1313, "step": 7360 }, { "epoch": 0.1721148288485004, "grad_norm": 5.09375, "learning_rate": 4.739850830987295e-05, "loss": 7.0784, "step": 7370 }, { "epoch": 0.17234836321600178, "grad_norm": 4.0625, "learning_rate": 4.739010287196678e-05, "loss": 7.0733, "step": 7380 }, { "epoch": 0.17258189758350312, "grad_norm": 3.765625, "learning_rate": 4.7381684624702015e-05, "loss": 7.1392, "step": 7390 }, { "epoch": 0.1728154319510045, "grad_norm": 3.875, "learning_rate": 4.737325357289472e-05, "loss": 7.0924, "step": 7400 }, { "epoch": 0.17304896631850586, "grad_norm": 4.65625, "learning_rate": 4.736480972136829e-05, "loss": 7.0936, "step": 7410 }, { "epoch": 0.1732825006860072, "grad_norm": 5.1875, "learning_rate": 4.735635307495344e-05, "loss": 7.0745, "step": 7420 }, { "epoch": 0.17351603505350857, "grad_norm": 3.671875, "learning_rate": 4.734788363848822e-05, "loss": 7.0919, "step": 7430 }, { "epoch": 0.17374956942100991, "grad_norm": 4.0625, "learning_rate": 4.7339401416817994e-05, "loss": 7.0874, "step": 7440 }, { "epoch": 0.17398310378851128, "grad_norm": 3.796875, "learning_rate": 4.7330906414795417e-05, "loss": 7.0813, "step": 7450 }, { "epoch": 0.17421663815601263, "grad_norm": 3.8125, "learning_rate": 4.7322398637280485e-05, "loss": 6.9973, "step": 7460 }, { "epoch": 0.174450172523514, "grad_norm": 4.15625, "learning_rate": 4.731387808914048e-05, "loss": 7.1057, "step": 7470 }, { "epoch": 0.17468370689101534, "grad_norm": 4.09375, "learning_rate": 4.7305344775250005e-05, "loss": 7.0368, "step": 7480 }, { "epoch": 0.1749172412585167, "grad_norm": 4.5625, "learning_rate": 4.729679870049096e-05, "loss": 7.1103, "step": 7490 }, { "epoch": 0.17515077562601805, "grad_norm": 4.75, "learning_rate": 4.728823986975256e-05, "loss": 7.0864, "step": 7500 }, { "epoch": 0.17515077562601805, "eval_loss": 7.089359283447266, "eval_runtime": 78.6054, "eval_samples_per_second": 12.722, "eval_steps_per_second": 12.722, "step": 7500 }, { "epoch": 0.17538430999351942, "grad_norm": 4.40625, "learning_rate": 4.7279668287931286e-05, "loss": 7.0643, "step": 7510 }, { "epoch": 0.1756178443610208, "grad_norm": 3.5625, "learning_rate": 4.727108395993094e-05, "loss": 7.0624, "step": 7520 }, { "epoch": 0.17585137872852213, "grad_norm": 5.15625, "learning_rate": 4.726248689066261e-05, "loss": 7.103, "step": 7530 }, { "epoch": 0.1760849130960235, "grad_norm": 5.4375, "learning_rate": 4.725387708504467e-05, "loss": 7.121, "step": 7540 }, { "epoch": 0.17631844746352485, "grad_norm": 4.34375, "learning_rate": 4.724525454800278e-05, "loss": 7.1449, "step": 7550 }, { "epoch": 0.17655198183102622, "grad_norm": 4.75, "learning_rate": 4.7236619284469896e-05, "loss": 7.0931, "step": 7560 }, { "epoch": 0.17678551619852756, "grad_norm": 4.0625, "learning_rate": 4.722797129938624e-05, "loss": 6.9898, "step": 7570 }, { "epoch": 0.17701905056602893, "grad_norm": 4.46875, "learning_rate": 4.72193105976993e-05, "loss": 7.1038, "step": 7580 }, { "epoch": 0.17725258493353027, "grad_norm": 4.09375, "learning_rate": 4.721063718436389e-05, "loss": 7.0301, "step": 7590 }, { "epoch": 0.17748611930103164, "grad_norm": 4.46875, "learning_rate": 4.720195106434203e-05, "loss": 7.0399, "step": 7600 }, { "epoch": 0.17771965366853298, "grad_norm": 4.625, "learning_rate": 4.719325224260306e-05, "loss": 7.1074, "step": 7610 }, { "epoch": 0.17795318803603435, "grad_norm": 4.6875, "learning_rate": 4.7184540724123565e-05, "loss": 7.0434, "step": 7620 }, { "epoch": 0.1781867224035357, "grad_norm": 4.65625, "learning_rate": 4.7175816513887395e-05, "loss": 7.0775, "step": 7630 }, { "epoch": 0.17842025677103707, "grad_norm": 5.0625, "learning_rate": 4.716707961688566e-05, "loss": 7.0809, "step": 7640 }, { "epoch": 0.17865379113853844, "grad_norm": 4.15625, "learning_rate": 4.715833003811674e-05, "loss": 7.0669, "step": 7650 }, { "epoch": 0.17888732550603978, "grad_norm": 3.75, "learning_rate": 4.714956778258626e-05, "loss": 7.0637, "step": 7660 }, { "epoch": 0.17912085987354115, "grad_norm": 4.875, "learning_rate": 4.714079285530708e-05, "loss": 7.0756, "step": 7670 }, { "epoch": 0.1793543942410425, "grad_norm": 4.90625, "learning_rate": 4.713200526129936e-05, "loss": 7.0885, "step": 7680 }, { "epoch": 0.17958792860854386, "grad_norm": 4.34375, "learning_rate": 4.7123205005590455e-05, "loss": 7.0838, "step": 7690 }, { "epoch": 0.1798214629760452, "grad_norm": 4.21875, "learning_rate": 4.711439209321498e-05, "loss": 7.1088, "step": 7700 }, { "epoch": 0.18005499734354657, "grad_norm": 4.0, "learning_rate": 4.710556652921481e-05, "loss": 7.0496, "step": 7710 }, { "epoch": 0.18028853171104792, "grad_norm": 4.28125, "learning_rate": 4.7096728318639025e-05, "loss": 7.0155, "step": 7720 }, { "epoch": 0.1805220660785493, "grad_norm": 4.21875, "learning_rate": 4.708787746654398e-05, "loss": 7.0489, "step": 7730 }, { "epoch": 0.18075560044605063, "grad_norm": 3.703125, "learning_rate": 4.707901397799322e-05, "loss": 7.0771, "step": 7740 }, { "epoch": 0.180989134813552, "grad_norm": 3.46875, "learning_rate": 4.707013785805755e-05, "loss": 7.0717, "step": 7750 }, { "epoch": 0.18122266918105337, "grad_norm": 4.71875, "learning_rate": 4.7061249111814986e-05, "loss": 7.0087, "step": 7760 }, { "epoch": 0.1814562035485547, "grad_norm": 3.71875, "learning_rate": 4.705234774435077e-05, "loss": 7.1089, "step": 7770 }, { "epoch": 0.18168973791605608, "grad_norm": 4.46875, "learning_rate": 4.704343376075737e-05, "loss": 7.0614, "step": 7780 }, { "epoch": 0.18192327228355742, "grad_norm": 4.96875, "learning_rate": 4.703450716613447e-05, "loss": 7.0628, "step": 7790 }, { "epoch": 0.1821568066510588, "grad_norm": 4.6875, "learning_rate": 4.7025567965588966e-05, "loss": 7.073, "step": 7800 }, { "epoch": 0.18239034101856014, "grad_norm": 4.5, "learning_rate": 4.701661616423497e-05, "loss": 7.0687, "step": 7810 }, { "epoch": 0.1826238753860615, "grad_norm": 3.625, "learning_rate": 4.700765176719379e-05, "loss": 7.0463, "step": 7820 }, { "epoch": 0.18285740975356285, "grad_norm": 4.90625, "learning_rate": 4.6998674779593956e-05, "loss": 7.0918, "step": 7830 }, { "epoch": 0.18309094412106422, "grad_norm": 4.15625, "learning_rate": 4.69896852065712e-05, "loss": 6.982, "step": 7840 }, { "epoch": 0.18332447848856556, "grad_norm": 5.75, "learning_rate": 4.6980683053268434e-05, "loss": 7.1136, "step": 7850 }, { "epoch": 0.18355801285606693, "grad_norm": 4.96875, "learning_rate": 4.69716683248358e-05, "loss": 7.0632, "step": 7860 }, { "epoch": 0.18379154722356827, "grad_norm": 4.21875, "learning_rate": 4.696264102643061e-05, "loss": 7.0729, "step": 7870 }, { "epoch": 0.18402508159106964, "grad_norm": 3.75, "learning_rate": 4.695360116321737e-05, "loss": 7.055, "step": 7880 }, { "epoch": 0.18425861595857101, "grad_norm": 3.734375, "learning_rate": 4.694454874036777e-05, "loss": 7.1376, "step": 7890 }, { "epoch": 0.18449215032607236, "grad_norm": 4.5, "learning_rate": 4.6935483763060715e-05, "loss": 7.1492, "step": 7900 }, { "epoch": 0.18472568469357373, "grad_norm": 4.28125, "learning_rate": 4.6926406236482256e-05, "loss": 7.0483, "step": 7910 }, { "epoch": 0.18495921906107507, "grad_norm": 5.21875, "learning_rate": 4.691731616582564e-05, "loss": 7.0607, "step": 7920 }, { "epoch": 0.18519275342857644, "grad_norm": 4.78125, "learning_rate": 4.690821355629129e-05, "loss": 7.069, "step": 7930 }, { "epoch": 0.18542628779607778, "grad_norm": 5.15625, "learning_rate": 4.68990984130868e-05, "loss": 7.1053, "step": 7940 }, { "epoch": 0.18565982216357915, "grad_norm": 4.28125, "learning_rate": 4.688997074142694e-05, "loss": 7.0431, "step": 7950 }, { "epoch": 0.1858933565310805, "grad_norm": 4.28125, "learning_rate": 4.688083054653363e-05, "loss": 7.0505, "step": 7960 }, { "epoch": 0.18612689089858186, "grad_norm": 5.0, "learning_rate": 4.6871677833635994e-05, "loss": 7.0326, "step": 7970 }, { "epoch": 0.1863604252660832, "grad_norm": 4.71875, "learning_rate": 4.686251260797026e-05, "loss": 7.0796, "step": 7980 }, { "epoch": 0.18659395963358458, "grad_norm": 4.03125, "learning_rate": 4.685333487477986e-05, "loss": 7.0962, "step": 7990 }, { "epoch": 0.18682749400108595, "grad_norm": 4.125, "learning_rate": 4.684414463931538e-05, "loss": 7.0925, "step": 8000 }, { "epoch": 0.18682749400108595, "eval_loss": 7.085883617401123, "eval_runtime": 78.6913, "eval_samples_per_second": 12.708, "eval_steps_per_second": 12.708, "step": 8000 }, { "epoch": 0.1870610283685873, "grad_norm": 4.59375, "learning_rate": 4.6834941906834524e-05, "loss": 7.0736, "step": 8010 }, { "epoch": 0.18729456273608866, "grad_norm": 4.78125, "learning_rate": 4.682572668260218e-05, "loss": 7.0512, "step": 8020 }, { "epoch": 0.18752809710359, "grad_norm": 4.0625, "learning_rate": 4.681649897189036e-05, "loss": 7.149, "step": 8030 }, { "epoch": 0.18776163147109137, "grad_norm": 4.09375, "learning_rate": 4.6807258779978235e-05, "loss": 7.1378, "step": 8040 }, { "epoch": 0.18799516583859271, "grad_norm": 3.90625, "learning_rate": 4.679800611215212e-05, "loss": 7.055, "step": 8050 }, { "epoch": 0.18822870020609409, "grad_norm": 3.75, "learning_rate": 4.678874097370545e-05, "loss": 7.0222, "step": 8060 }, { "epoch": 0.18846223457359543, "grad_norm": 5.125, "learning_rate": 4.6779463369938794e-05, "loss": 7.1322, "step": 8070 }, { "epoch": 0.1886957689410968, "grad_norm": 4.375, "learning_rate": 4.677017330615988e-05, "loss": 7.1085, "step": 8080 }, { "epoch": 0.18892930330859814, "grad_norm": 5.09375, "learning_rate": 4.676087078768353e-05, "loss": 6.9933, "step": 8090 }, { "epoch": 0.1891628376760995, "grad_norm": 4.09375, "learning_rate": 4.67515558198317e-05, "loss": 7.1194, "step": 8100 }, { "epoch": 0.18939637204360088, "grad_norm": 4.9375, "learning_rate": 4.674222840793351e-05, "loss": 7.1464, "step": 8110 }, { "epoch": 0.18962990641110222, "grad_norm": 4.53125, "learning_rate": 4.673288855732513e-05, "loss": 7.1182, "step": 8120 }, { "epoch": 0.1898634407786036, "grad_norm": 4.1875, "learning_rate": 4.672353627334989e-05, "loss": 7.0546, "step": 8130 }, { "epoch": 0.19009697514610494, "grad_norm": 3.46875, "learning_rate": 4.6714171561358226e-05, "loss": 7.08, "step": 8140 }, { "epoch": 0.1903305095136063, "grad_norm": 4.34375, "learning_rate": 4.6704794426707685e-05, "loss": 7.0706, "step": 8150 }, { "epoch": 0.19056404388110765, "grad_norm": 4.75, "learning_rate": 4.669540487476291e-05, "loss": 7.0407, "step": 8160 }, { "epoch": 0.19079757824860902, "grad_norm": 3.578125, "learning_rate": 4.6686002910895666e-05, "loss": 7.0655, "step": 8170 }, { "epoch": 0.19103111261611036, "grad_norm": 3.84375, "learning_rate": 4.66765885404848e-05, "loss": 7.1056, "step": 8180 }, { "epoch": 0.19126464698361173, "grad_norm": 3.859375, "learning_rate": 4.666716176891627e-05, "loss": 7.1022, "step": 8190 }, { "epoch": 0.19149818135111307, "grad_norm": 4.40625, "learning_rate": 4.665772260158311e-05, "loss": 7.0981, "step": 8200 }, { "epoch": 0.19173171571861444, "grad_norm": 4.40625, "learning_rate": 4.6648271043885474e-05, "loss": 7.1108, "step": 8210 }, { "epoch": 0.19196525008611579, "grad_norm": 3.484375, "learning_rate": 4.66388071012306e-05, "loss": 7.1017, "step": 8220 }, { "epoch": 0.19219878445361716, "grad_norm": 4.46875, "learning_rate": 4.662933077903277e-05, "loss": 7.1122, "step": 8230 }, { "epoch": 0.19243231882111853, "grad_norm": 4.71875, "learning_rate": 4.66198420827134e-05, "loss": 7.1125, "step": 8240 }, { "epoch": 0.19266585318861987, "grad_norm": 5.25, "learning_rate": 4.661034101770098e-05, "loss": 7.1062, "step": 8250 }, { "epoch": 0.19289938755612124, "grad_norm": 4.625, "learning_rate": 4.660082758943103e-05, "loss": 7.0651, "step": 8260 }, { "epoch": 0.19313292192362258, "grad_norm": 4.6875, "learning_rate": 4.6591301803346185e-05, "loss": 7.1083, "step": 8270 }, { "epoch": 0.19336645629112395, "grad_norm": 3.9375, "learning_rate": 4.658176366489615e-05, "loss": 7.0432, "step": 8280 }, { "epoch": 0.1935999906586253, "grad_norm": 4.21875, "learning_rate": 4.657221317953768e-05, "loss": 7.1022, "step": 8290 }, { "epoch": 0.19383352502612666, "grad_norm": 4.4375, "learning_rate": 4.6562650352734596e-05, "loss": 7.1296, "step": 8300 }, { "epoch": 0.194067059393628, "grad_norm": 3.234375, "learning_rate": 4.655307518995778e-05, "loss": 7.0734, "step": 8310 }, { "epoch": 0.19430059376112938, "grad_norm": 3.6875, "learning_rate": 4.6543487696685186e-05, "loss": 7.0804, "step": 8320 }, { "epoch": 0.19453412812863072, "grad_norm": 3.6875, "learning_rate": 4.653388787840179e-05, "loss": 7.1387, "step": 8330 }, { "epoch": 0.1947676624961321, "grad_norm": 4.1875, "learning_rate": 4.652427574059967e-05, "loss": 7.0769, "step": 8340 }, { "epoch": 0.19500119686363346, "grad_norm": 4.78125, "learning_rate": 4.6514651288777895e-05, "loss": 7.0355, "step": 8350 }, { "epoch": 0.1952347312311348, "grad_norm": 4.59375, "learning_rate": 4.650501452844261e-05, "loss": 7.0994, "step": 8360 }, { "epoch": 0.19546826559863617, "grad_norm": 4.125, "learning_rate": 4.6495365465107024e-05, "loss": 7.0847, "step": 8370 }, { "epoch": 0.1957017999661375, "grad_norm": 5.1875, "learning_rate": 4.6485704104291317e-05, "loss": 7.0772, "step": 8380 }, { "epoch": 0.19593533433363888, "grad_norm": 3.828125, "learning_rate": 4.647603045152277e-05, "loss": 7.1217, "step": 8390 }, { "epoch": 0.19616886870114023, "grad_norm": 4.09375, "learning_rate": 4.646634451233567e-05, "loss": 7.0237, "step": 8400 }, { "epoch": 0.1964024030686416, "grad_norm": 3.9375, "learning_rate": 4.645664629227133e-05, "loss": 7.1434, "step": 8410 }, { "epoch": 0.19663593743614294, "grad_norm": 4.53125, "learning_rate": 4.6446935796878093e-05, "loss": 7.0863, "step": 8420 }, { "epoch": 0.1968694718036443, "grad_norm": 4.1875, "learning_rate": 4.643721303171133e-05, "loss": 7.0777, "step": 8430 }, { "epoch": 0.19710300617114565, "grad_norm": 3.859375, "learning_rate": 4.642747800233342e-05, "loss": 7.0665, "step": 8440 }, { "epoch": 0.19733654053864702, "grad_norm": 4.3125, "learning_rate": 4.641773071431377e-05, "loss": 7.1153, "step": 8450 }, { "epoch": 0.19757007490614836, "grad_norm": 4.03125, "learning_rate": 4.640797117322879e-05, "loss": 7.0493, "step": 8460 }, { "epoch": 0.19780360927364973, "grad_norm": 4.75, "learning_rate": 4.63981993846619e-05, "loss": 7.0699, "step": 8470 }, { "epoch": 0.1980371436411511, "grad_norm": 4.25, "learning_rate": 4.638841535420355e-05, "loss": 7.0792, "step": 8480 }, { "epoch": 0.19827067800865245, "grad_norm": 5.25, "learning_rate": 4.6378619087451167e-05, "loss": 7.0778, "step": 8490 }, { "epoch": 0.19850421237615382, "grad_norm": 5.09375, "learning_rate": 4.6368810590009185e-05, "loss": 7.085, "step": 8500 }, { "epoch": 0.19850421237615382, "eval_loss": 7.086668968200684, "eval_runtime": 78.5487, "eval_samples_per_second": 12.731, "eval_steps_per_second": 12.731, "step": 8500 }, { "epoch": 0.19873774674365516, "grad_norm": 3.859375, "learning_rate": 4.6358989867489035e-05, "loss": 7.058, "step": 8510 }, { "epoch": 0.19897128111115653, "grad_norm": 3.5, "learning_rate": 4.634915692550915e-05, "loss": 7.1478, "step": 8520 }, { "epoch": 0.19920481547865787, "grad_norm": 4.96875, "learning_rate": 4.633931176969494e-05, "loss": 7.0332, "step": 8530 }, { "epoch": 0.19943834984615924, "grad_norm": 4.46875, "learning_rate": 4.6329454405678805e-05, "loss": 7.0968, "step": 8540 }, { "epoch": 0.19967188421366058, "grad_norm": 5.21875, "learning_rate": 4.631958483910015e-05, "loss": 7.0692, "step": 8550 }, { "epoch": 0.19990541858116195, "grad_norm": 4.1875, "learning_rate": 4.630970307560535e-05, "loss": 7.0244, "step": 8560 }, { "epoch": 0.2001389529486633, "grad_norm": 5.3125, "learning_rate": 4.629980912084773e-05, "loss": 6.9845, "step": 8570 }, { "epoch": 0.20037248731616467, "grad_norm": 3.765625, "learning_rate": 4.628990298048762e-05, "loss": 7.0023, "step": 8580 }, { "epoch": 0.20060602168366604, "grad_norm": 4.5625, "learning_rate": 4.627998466019233e-05, "loss": 7.0751, "step": 8590 }, { "epoch": 0.20083955605116738, "grad_norm": 3.875, "learning_rate": 4.627005416563611e-05, "loss": 7.0871, "step": 8600 }, { "epoch": 0.20107309041866875, "grad_norm": 4.90625, "learning_rate": 4.626011150250019e-05, "loss": 7.0721, "step": 8610 }, { "epoch": 0.2013066247861701, "grad_norm": 3.671875, "learning_rate": 4.625015667647275e-05, "loss": 7.1819, "step": 8620 }, { "epoch": 0.20154015915367146, "grad_norm": 4.59375, "learning_rate": 4.624018969324896e-05, "loss": 7.0574, "step": 8630 }, { "epoch": 0.2017736935211728, "grad_norm": 5.59375, "learning_rate": 4.623021055853091e-05, "loss": 7.0581, "step": 8640 }, { "epoch": 0.20200722788867417, "grad_norm": 4.5, "learning_rate": 4.622021927802764e-05, "loss": 7.1088, "step": 8650 }, { "epoch": 0.20224076225617552, "grad_norm": 4.03125, "learning_rate": 4.621021585745517e-05, "loss": 7.0536, "step": 8660 }, { "epoch": 0.20247429662367689, "grad_norm": 4.28125, "learning_rate": 4.6200200302536444e-05, "loss": 7.0794, "step": 8670 }, { "epoch": 0.20270783099117823, "grad_norm": 4.96875, "learning_rate": 4.619017261900135e-05, "loss": 7.0523, "step": 8680 }, { "epoch": 0.2029413653586796, "grad_norm": 4.5, "learning_rate": 4.618013281258673e-05, "loss": 7.0718, "step": 8690 }, { "epoch": 0.20317489972618094, "grad_norm": 4.125, "learning_rate": 4.6170080889036334e-05, "loss": 7.1112, "step": 8700 }, { "epoch": 0.2034084340936823, "grad_norm": 4.9375, "learning_rate": 4.616001685410086e-05, "loss": 7.1493, "step": 8710 }, { "epoch": 0.20364196846118368, "grad_norm": 4.4375, "learning_rate": 4.614994071353793e-05, "loss": 7.0951, "step": 8720 }, { "epoch": 0.20387550282868502, "grad_norm": 4.0625, "learning_rate": 4.613985247311211e-05, "loss": 7.0185, "step": 8730 }, { "epoch": 0.2041090371961864, "grad_norm": 4.6875, "learning_rate": 4.6129752138594874e-05, "loss": 7.1193, "step": 8740 }, { "epoch": 0.20434257156368774, "grad_norm": 5.21875, "learning_rate": 4.61196397157646e-05, "loss": 7.1095, "step": 8750 }, { "epoch": 0.2045761059311891, "grad_norm": 3.96875, "learning_rate": 4.610951521040659e-05, "loss": 7.0829, "step": 8760 }, { "epoch": 0.20480964029869045, "grad_norm": 4.46875, "learning_rate": 4.60993786283131e-05, "loss": 7.1075, "step": 8770 }, { "epoch": 0.20504317466619182, "grad_norm": 3.8125, "learning_rate": 4.608922997528322e-05, "loss": 7.0323, "step": 8780 }, { "epoch": 0.20527670903369316, "grad_norm": 4.4375, "learning_rate": 4.607906925712302e-05, "loss": 7.0605, "step": 8790 }, { "epoch": 0.20551024340119453, "grad_norm": 4.53125, "learning_rate": 4.606889647964541e-05, "loss": 7.1252, "step": 8800 }, { "epoch": 0.20574377776869587, "grad_norm": 4.28125, "learning_rate": 4.605871164867024e-05, "loss": 7.0631, "step": 8810 }, { "epoch": 0.20597731213619724, "grad_norm": 4.90625, "learning_rate": 4.6048514770024245e-05, "loss": 7.0321, "step": 8820 }, { "epoch": 0.2062108465036986, "grad_norm": 4.46875, "learning_rate": 4.603830584954104e-05, "loss": 7.1026, "step": 8830 }, { "epoch": 0.20644438087119996, "grad_norm": 3.703125, "learning_rate": 4.6028084893061155e-05, "loss": 7.0239, "step": 8840 }, { "epoch": 0.20667791523870133, "grad_norm": 3.859375, "learning_rate": 4.601785190643198e-05, "loss": 7.0108, "step": 8850 }, { "epoch": 0.20691144960620267, "grad_norm": 3.375, "learning_rate": 4.6007606895507795e-05, "loss": 7.0934, "step": 8860 }, { "epoch": 0.20714498397370404, "grad_norm": 4.125, "learning_rate": 4.599734986614978e-05, "loss": 7.0604, "step": 8870 }, { "epoch": 0.20737851834120538, "grad_norm": 4.21875, "learning_rate": 4.598708082422594e-05, "loss": 7.0574, "step": 8880 }, { "epoch": 0.20761205270870675, "grad_norm": 5.15625, "learning_rate": 4.597679977561122e-05, "loss": 7.0665, "step": 8890 }, { "epoch": 0.2078455870762081, "grad_norm": 4.5625, "learning_rate": 4.596650672618738e-05, "loss": 7.042, "step": 8900 }, { "epoch": 0.20807912144370946, "grad_norm": 4.03125, "learning_rate": 4.595620168184307e-05, "loss": 7.1056, "step": 8910 }, { "epoch": 0.2083126558112108, "grad_norm": 4.5625, "learning_rate": 4.5945884648473794e-05, "loss": 7.078, "step": 8920 }, { "epoch": 0.20854619017871218, "grad_norm": 5.4375, "learning_rate": 4.593555563198192e-05, "loss": 7.0621, "step": 8930 }, { "epoch": 0.20877972454621352, "grad_norm": 4.53125, "learning_rate": 4.5925214638276673e-05, "loss": 7.078, "step": 8940 }, { "epoch": 0.2090132589137149, "grad_norm": 5.46875, "learning_rate": 4.591486167327411e-05, "loss": 7.0942, "step": 8950 }, { "epoch": 0.20924679328121626, "grad_norm": 3.46875, "learning_rate": 4.590449674289718e-05, "loss": 7.0988, "step": 8960 }, { "epoch": 0.2094803276487176, "grad_norm": 4.125, "learning_rate": 4.589411985307563e-05, "loss": 7.108, "step": 8970 }, { "epoch": 0.20971386201621897, "grad_norm": 4.5, "learning_rate": 4.588373100974608e-05, "loss": 7.1071, "step": 8980 }, { "epoch": 0.2099473963837203, "grad_norm": 3.71875, "learning_rate": 4.587333021885197e-05, "loss": 7.1148, "step": 8990 }, { "epoch": 0.21018093075122168, "grad_norm": 4.4375, "learning_rate": 4.5862917486343595e-05, "loss": 7.0367, "step": 9000 }, { "epoch": 0.21018093075122168, "eval_loss": 7.078995704650879, "eval_runtime": 78.5025, "eval_samples_per_second": 12.738, "eval_steps_per_second": 12.738, "step": 9000 }, { "epoch": 0.21041446511872303, "grad_norm": 3.96875, "learning_rate": 4.585249281817806e-05, "loss": 7.0568, "step": 9010 }, { "epoch": 0.2106479994862244, "grad_norm": 5.1875, "learning_rate": 4.584205622031931e-05, "loss": 7.1305, "step": 9020 }, { "epoch": 0.21088153385372574, "grad_norm": 5.875, "learning_rate": 4.583160769873812e-05, "loss": 7.1364, "step": 9030 }, { "epoch": 0.2111150682212271, "grad_norm": 4.625, "learning_rate": 4.5821147259412076e-05, "loss": 7.0556, "step": 9040 }, { "epoch": 0.21134860258872845, "grad_norm": 3.65625, "learning_rate": 4.581067490832559e-05, "loss": 7.0174, "step": 9050 }, { "epoch": 0.21158213695622982, "grad_norm": 3.84375, "learning_rate": 4.5800190651469874e-05, "loss": 7.0409, "step": 9060 }, { "epoch": 0.2118156713237312, "grad_norm": 4.75, "learning_rate": 4.578969449484297e-05, "loss": 7.0767, "step": 9070 }, { "epoch": 0.21204920569123253, "grad_norm": 4.125, "learning_rate": 4.5779186444449726e-05, "loss": 7.0508, "step": 9080 }, { "epoch": 0.2122827400587339, "grad_norm": 5.0625, "learning_rate": 4.576866650630178e-05, "loss": 6.9706, "step": 9090 }, { "epoch": 0.21251627442623525, "grad_norm": 4.3125, "learning_rate": 4.575813468641758e-05, "loss": 7.0635, "step": 9100 }, { "epoch": 0.21274980879373662, "grad_norm": 4.34375, "learning_rate": 4.5747590990822375e-05, "loss": 7.1166, "step": 9110 }, { "epoch": 0.21298334316123796, "grad_norm": 4.6875, "learning_rate": 4.573703542554821e-05, "loss": 7.1313, "step": 9120 }, { "epoch": 0.21321687752873933, "grad_norm": 4.15625, "learning_rate": 4.57264679966339e-05, "loss": 7.0247, "step": 9130 }, { "epoch": 0.21345041189624067, "grad_norm": 4.53125, "learning_rate": 4.571588871012507e-05, "loss": 7.0877, "step": 9140 }, { "epoch": 0.21368394626374204, "grad_norm": 5.375, "learning_rate": 4.570529757207412e-05, "loss": 7.1347, "step": 9150 }, { "epoch": 0.21391748063124338, "grad_norm": 4.21875, "learning_rate": 4.5694694588540235e-05, "loss": 7.1196, "step": 9160 }, { "epoch": 0.21415101499874475, "grad_norm": 3.59375, "learning_rate": 4.568407976558937e-05, "loss": 7.0623, "step": 9170 }, { "epoch": 0.2143845493662461, "grad_norm": 4.4375, "learning_rate": 4.567345310929425e-05, "loss": 7.0431, "step": 9180 }, { "epoch": 0.21461808373374747, "grad_norm": 5.25, "learning_rate": 4.566281462573438e-05, "loss": 7.0693, "step": 9190 }, { "epoch": 0.21485161810124884, "grad_norm": 3.921875, "learning_rate": 4.565216432099603e-05, "loss": 7.0459, "step": 9200 }, { "epoch": 0.21508515246875018, "grad_norm": 5.09375, "learning_rate": 4.564150220117223e-05, "loss": 7.1128, "step": 9210 }, { "epoch": 0.21531868683625155, "grad_norm": 4.09375, "learning_rate": 4.563082827236277e-05, "loss": 7.0471, "step": 9220 }, { "epoch": 0.2155522212037529, "grad_norm": 3.96875, "learning_rate": 4.5620142540674183e-05, "loss": 7.0634, "step": 9230 }, { "epoch": 0.21578575557125426, "grad_norm": 4.46875, "learning_rate": 4.5609445012219786e-05, "loss": 7.0979, "step": 9240 }, { "epoch": 0.2160192899387556, "grad_norm": 4.90625, "learning_rate": 4.5598735693119624e-05, "loss": 7.115, "step": 9250 }, { "epoch": 0.21625282430625697, "grad_norm": 4.3125, "learning_rate": 4.558801458950047e-05, "loss": 7.1101, "step": 9260 }, { "epoch": 0.21648635867375832, "grad_norm": 4.71875, "learning_rate": 4.557728170749587e-05, "loss": 7.0465, "step": 9270 }, { "epoch": 0.2167198930412597, "grad_norm": 3.859375, "learning_rate": 4.55665370532461e-05, "loss": 7.0817, "step": 9280 }, { "epoch": 0.21695342740876103, "grad_norm": 5.03125, "learning_rate": 4.5555780632898174e-05, "loss": 7.1071, "step": 9290 }, { "epoch": 0.2171869617762624, "grad_norm": 4.84375, "learning_rate": 4.554501245260581e-05, "loss": 7.1189, "step": 9300 }, { "epoch": 0.21742049614376377, "grad_norm": 4.28125, "learning_rate": 4.55342325185295e-05, "loss": 7.0776, "step": 9310 }, { "epoch": 0.2176540305112651, "grad_norm": 3.984375, "learning_rate": 4.552344083683641e-05, "loss": 7.0712, "step": 9320 }, { "epoch": 0.21788756487876648, "grad_norm": 3.65625, "learning_rate": 4.5512637413700456e-05, "loss": 7.0466, "step": 9330 }, { "epoch": 0.21812109924626782, "grad_norm": 3.90625, "learning_rate": 4.550182225530228e-05, "loss": 7.0545, "step": 9340 }, { "epoch": 0.2183546336137692, "grad_norm": 4.71875, "learning_rate": 4.549099536782922e-05, "loss": 7.0325, "step": 9350 }, { "epoch": 0.21858816798127054, "grad_norm": 4.375, "learning_rate": 4.548015675747533e-05, "loss": 7.0233, "step": 9360 }, { "epoch": 0.2188217023487719, "grad_norm": 4.3125, "learning_rate": 4.546930643044138e-05, "loss": 7.1135, "step": 9370 }, { "epoch": 0.21905523671627325, "grad_norm": 4.25, "learning_rate": 4.545844439293481e-05, "loss": 6.9697, "step": 9380 }, { "epoch": 0.21928877108377462, "grad_norm": 3.390625, "learning_rate": 4.544757065116978e-05, "loss": 7.055, "step": 9390 }, { "epoch": 0.21952230545127596, "grad_norm": 3.546875, "learning_rate": 4.5436685211367176e-05, "loss": 7.0537, "step": 9400 }, { "epoch": 0.21975583981877733, "grad_norm": 4.8125, "learning_rate": 4.542578807975453e-05, "loss": 7.0907, "step": 9410 }, { "epoch": 0.21998937418627867, "grad_norm": 4.375, "learning_rate": 4.5414879262566086e-05, "loss": 7.0037, "step": 9420 }, { "epoch": 0.22022290855378004, "grad_norm": 4.34375, "learning_rate": 4.5403958766042767e-05, "loss": 7.0528, "step": 9430 }, { "epoch": 0.22045644292128141, "grad_norm": 4.0, "learning_rate": 4.539302659643218e-05, "loss": 7.0773, "step": 9440 }, { "epoch": 0.22068997728878276, "grad_norm": 4.09375, "learning_rate": 4.538208275998861e-05, "loss": 7.1002, "step": 9450 }, { "epoch": 0.22092351165628413, "grad_norm": 4.34375, "learning_rate": 4.5371127262973024e-05, "loss": 7.0881, "step": 9460 }, { "epoch": 0.22115704602378547, "grad_norm": 3.4375, "learning_rate": 4.536016011165304e-05, "loss": 7.1279, "step": 9470 }, { "epoch": 0.22139058039128684, "grad_norm": 4.59375, "learning_rate": 4.5349181312302956e-05, "loss": 7.0726, "step": 9480 }, { "epoch": 0.22162411475878818, "grad_norm": 4.40625, "learning_rate": 4.533819087120375e-05, "loss": 7.099, "step": 9490 }, { "epoch": 0.22185764912628955, "grad_norm": 5.125, "learning_rate": 4.532718879464303e-05, "loss": 7.1385, "step": 9500 }, { "epoch": 0.22185764912628955, "eval_loss": 7.077002048492432, "eval_runtime": 79.3244, "eval_samples_per_second": 12.606, "eval_steps_per_second": 12.606, "step": 9500 }, { "epoch": 0.2220911834937909, "grad_norm": 5.0625, "learning_rate": 4.531617508891508e-05, "loss": 7.0539, "step": 9510 }, { "epoch": 0.22232471786129226, "grad_norm": 4.0, "learning_rate": 4.530514976032082e-05, "loss": 7.0237, "step": 9520 }, { "epoch": 0.2225582522287936, "grad_norm": 4.84375, "learning_rate": 4.529411281516784e-05, "loss": 7.0307, "step": 9530 }, { "epoch": 0.22279178659629498, "grad_norm": 5.0625, "learning_rate": 4.5283064259770383e-05, "loss": 7.1154, "step": 9540 }, { "epoch": 0.22302532096379635, "grad_norm": 4.53125, "learning_rate": 4.527200410044929e-05, "loss": 7.0054, "step": 9550 }, { "epoch": 0.2232588553312977, "grad_norm": 5.5625, "learning_rate": 4.526093234353208e-05, "loss": 7.1062, "step": 9560 }, { "epoch": 0.22349238969879906, "grad_norm": 4.03125, "learning_rate": 4.52498489953529e-05, "loss": 7.0629, "step": 9570 }, { "epoch": 0.2237259240663004, "grad_norm": 3.296875, "learning_rate": 4.523875406225252e-05, "loss": 7.0822, "step": 9580 }, { "epoch": 0.22395945843380177, "grad_norm": 4.1875, "learning_rate": 4.5227647550578343e-05, "loss": 7.0955, "step": 9590 }, { "epoch": 0.22419299280130311, "grad_norm": 3.75, "learning_rate": 4.5216529466684385e-05, "loss": 7.0785, "step": 9600 }, { "epoch": 0.22442652716880448, "grad_norm": 4.40625, "learning_rate": 4.520539981693132e-05, "loss": 7.0417, "step": 9610 }, { "epoch": 0.22466006153630583, "grad_norm": 3.671875, "learning_rate": 4.5194258607686373e-05, "loss": 7.0996, "step": 9620 }, { "epoch": 0.2248935959038072, "grad_norm": 4.46875, "learning_rate": 4.518310584532345e-05, "loss": 7.0556, "step": 9630 }, { "epoch": 0.22512713027130854, "grad_norm": 4.0, "learning_rate": 4.5171941536223015e-05, "loss": 7.0388, "step": 9640 }, { "epoch": 0.2253606646388099, "grad_norm": 4.875, "learning_rate": 4.516076568677218e-05, "loss": 7.0583, "step": 9650 }, { "epoch": 0.22559419900631128, "grad_norm": 4.4375, "learning_rate": 4.514957830336463e-05, "loss": 7.0834, "step": 9660 }, { "epoch": 0.22582773337381262, "grad_norm": 3.6875, "learning_rate": 4.5138379392400655e-05, "loss": 7.0899, "step": 9670 }, { "epoch": 0.226061267741314, "grad_norm": 4.75, "learning_rate": 4.5127168960287144e-05, "loss": 7.0465, "step": 9680 }, { "epoch": 0.22629480210881533, "grad_norm": 4.15625, "learning_rate": 4.5115947013437576e-05, "loss": 7.0337, "step": 9690 }, { "epoch": 0.2265283364763167, "grad_norm": 4.25, "learning_rate": 4.510471355827202e-05, "loss": 7.0889, "step": 9700 }, { "epoch": 0.22676187084381805, "grad_norm": 4.0625, "learning_rate": 4.509346860121712e-05, "loss": 7.0, "step": 9710 }, { "epoch": 0.22699540521131942, "grad_norm": 3.546875, "learning_rate": 4.508221214870611e-05, "loss": 7.0567, "step": 9720 }, { "epoch": 0.22722893957882076, "grad_norm": 4.6875, "learning_rate": 4.50709442071788e-05, "loss": 7.1008, "step": 9730 }, { "epoch": 0.22746247394632213, "grad_norm": 4.25, "learning_rate": 4.505966478308156e-05, "loss": 7.0628, "step": 9740 }, { "epoch": 0.22769600831382347, "grad_norm": 3.765625, "learning_rate": 4.504837388286735e-05, "loss": 7.1064, "step": 9750 }, { "epoch": 0.22792954268132484, "grad_norm": 4.8125, "learning_rate": 4.503707151299566e-05, "loss": 7.0654, "step": 9760 }, { "epoch": 0.22816307704882619, "grad_norm": 3.734375, "learning_rate": 4.502575767993261e-05, "loss": 7.1075, "step": 9770 }, { "epoch": 0.22839661141632756, "grad_norm": 4.125, "learning_rate": 4.50144323901508e-05, "loss": 7.0781, "step": 9780 }, { "epoch": 0.22863014578382893, "grad_norm": 4.625, "learning_rate": 4.500309565012942e-05, "loss": 7.0912, "step": 9790 }, { "epoch": 0.22886368015133027, "grad_norm": 4.9375, "learning_rate": 4.499174746635423e-05, "loss": 7.1194, "step": 9800 }, { "epoch": 0.22909721451883164, "grad_norm": 4.5625, "learning_rate": 4.4980387845317494e-05, "loss": 7.0914, "step": 9810 }, { "epoch": 0.22933074888633298, "grad_norm": 4.5, "learning_rate": 4.496901679351806e-05, "loss": 7.0414, "step": 9820 }, { "epoch": 0.22956428325383435, "grad_norm": 4.125, "learning_rate": 4.495763431746127e-05, "loss": 7.0668, "step": 9830 }, { "epoch": 0.2297978176213357, "grad_norm": 4.15625, "learning_rate": 4.494624042365907e-05, "loss": 7.0526, "step": 9840 }, { "epoch": 0.23003135198883706, "grad_norm": 4.34375, "learning_rate": 4.4934835118629856e-05, "loss": 7.0127, "step": 9850 }, { "epoch": 0.2302648863563384, "grad_norm": 4.84375, "learning_rate": 4.492341840889861e-05, "loss": 7.0579, "step": 9860 }, { "epoch": 0.23049842072383978, "grad_norm": 3.40625, "learning_rate": 4.491199030099683e-05, "loss": 7.1213, "step": 9870 }, { "epoch": 0.23073195509134112, "grad_norm": 4.59375, "learning_rate": 4.490055080146252e-05, "loss": 7.0378, "step": 9880 }, { "epoch": 0.2309654894588425, "grad_norm": 4.5, "learning_rate": 4.48890999168402e-05, "loss": 7.1403, "step": 9890 }, { "epoch": 0.23119902382634386, "grad_norm": 3.859375, "learning_rate": 4.487763765368092e-05, "loss": 7.0937, "step": 9900 }, { "epoch": 0.2314325581938452, "grad_norm": 4.09375, "learning_rate": 4.486616401854223e-05, "loss": 7.126, "step": 9910 }, { "epoch": 0.23166609256134657, "grad_norm": 3.953125, "learning_rate": 4.4854679017988187e-05, "loss": 7.0559, "step": 9920 }, { "epoch": 0.2318996269288479, "grad_norm": 4.0625, "learning_rate": 4.484318265858934e-05, "loss": 7.0788, "step": 9930 }, { "epoch": 0.23213316129634928, "grad_norm": 4.40625, "learning_rate": 4.483167494692277e-05, "loss": 6.9937, "step": 9940 }, { "epoch": 0.23236669566385063, "grad_norm": 4.03125, "learning_rate": 4.4820155889572004e-05, "loss": 7.034, "step": 9950 }, { "epoch": 0.232600230031352, "grad_norm": 4.1875, "learning_rate": 4.4808625493127096e-05, "loss": 7.0625, "step": 9960 }, { "epoch": 0.23283376439885334, "grad_norm": 4.5, "learning_rate": 4.479708376418458e-05, "loss": 7.064, "step": 9970 }, { "epoch": 0.2330672987663547, "grad_norm": 3.71875, "learning_rate": 4.478553070934746e-05, "loss": 7.0042, "step": 9980 }, { "epoch": 0.23330083313385605, "grad_norm": 3.59375, "learning_rate": 4.4773966335225224e-05, "loss": 7.024, "step": 9990 }, { "epoch": 0.23353436750135742, "grad_norm": 4.21875, "learning_rate": 4.476239064843386e-05, "loss": 7.047, "step": 10000 }, { "epoch": 0.23353436750135742, "eval_loss": 7.0740203857421875, "eval_runtime": 78.8478, "eval_samples_per_second": 12.683, "eval_steps_per_second": 12.683, "step": 10000 }, { "epoch": 0.23376790186885876, "grad_norm": 4.90625, "learning_rate": 4.47508036555958e-05, "loss": 7.0746, "step": 10010 }, { "epoch": 0.23400143623636013, "grad_norm": 4.59375, "learning_rate": 4.473920536333994e-05, "loss": 7.0734, "step": 10020 }, { "epoch": 0.2342349706038615, "grad_norm": 4.375, "learning_rate": 4.472759577830166e-05, "loss": 7.0521, "step": 10030 }, { "epoch": 0.23446850497136285, "grad_norm": 3.859375, "learning_rate": 4.4715974907122805e-05, "loss": 7.0938, "step": 10040 }, { "epoch": 0.23470203933886422, "grad_norm": 4.75, "learning_rate": 4.470434275645166e-05, "loss": 7.0224, "step": 10050 }, { "epoch": 0.23493557370636556, "grad_norm": 3.671875, "learning_rate": 4.469269933294296e-05, "loss": 7.0761, "step": 10060 }, { "epoch": 0.23516910807386693, "grad_norm": 4.1875, "learning_rate": 4.468104464325791e-05, "loss": 7.033, "step": 10070 }, { "epoch": 0.23540264244136827, "grad_norm": 4.28125, "learning_rate": 4.4669378694064126e-05, "loss": 7.0559, "step": 10080 }, { "epoch": 0.23563617680886964, "grad_norm": 4.375, "learning_rate": 4.465770149203572e-05, "loss": 7.0683, "step": 10090 }, { "epoch": 0.23586971117637098, "grad_norm": 5.375, "learning_rate": 4.464601304385319e-05, "loss": 7.0413, "step": 10100 }, { "epoch": 0.23610324554387235, "grad_norm": 3.734375, "learning_rate": 4.463431335620347e-05, "loss": 7.1245, "step": 10110 }, { "epoch": 0.2363367799113737, "grad_norm": 3.90625, "learning_rate": 4.462260243577998e-05, "loss": 7.1213, "step": 10120 }, { "epoch": 0.23657031427887507, "grad_norm": 4.03125, "learning_rate": 4.4610880289282496e-05, "loss": 7.0781, "step": 10130 }, { "epoch": 0.23680384864637644, "grad_norm": 5.875, "learning_rate": 4.459914692341727e-05, "loss": 7.0322, "step": 10140 }, { "epoch": 0.23703738301387778, "grad_norm": 4.78125, "learning_rate": 4.4587402344896934e-05, "loss": 7.0823, "step": 10150 }, { "epoch": 0.23727091738137915, "grad_norm": 3.65625, "learning_rate": 4.457564656044056e-05, "loss": 6.9947, "step": 10160 }, { "epoch": 0.2375044517488805, "grad_norm": 3.765625, "learning_rate": 4.456387957677361e-05, "loss": 6.9977, "step": 10170 }, { "epoch": 0.23773798611638186, "grad_norm": 4.375, "learning_rate": 4.455210140062799e-05, "loss": 7.0529, "step": 10180 }, { "epoch": 0.2379715204838832, "grad_norm": 5.0, "learning_rate": 4.454031203874196e-05, "loss": 7.1245, "step": 10190 }, { "epoch": 0.23820505485138457, "grad_norm": 3.34375, "learning_rate": 4.452851149786021e-05, "loss": 7.1532, "step": 10200 }, { "epoch": 0.23843858921888592, "grad_norm": 4.15625, "learning_rate": 4.4516699784733834e-05, "loss": 7.0349, "step": 10210 }, { "epoch": 0.23867212358638729, "grad_norm": 4.34375, "learning_rate": 4.450487690612029e-05, "loss": 7.0073, "step": 10220 }, { "epoch": 0.23890565795388863, "grad_norm": 4.53125, "learning_rate": 4.449304286878343e-05, "loss": 7.0898, "step": 10230 }, { "epoch": 0.23913919232139, "grad_norm": 4.25, "learning_rate": 4.4481197679493505e-05, "loss": 7.035, "step": 10240 }, { "epoch": 0.23937272668889134, "grad_norm": 4.71875, "learning_rate": 4.4469341345027145e-05, "loss": 7.1195, "step": 10250 }, { "epoch": 0.2396062610563927, "grad_norm": 4.71875, "learning_rate": 4.4457473872167334e-05, "loss": 7.0493, "step": 10260 }, { "epoch": 0.23983979542389408, "grad_norm": 4.0625, "learning_rate": 4.4445595267703455e-05, "loss": 7.1028, "step": 10270 }, { "epoch": 0.24007332979139542, "grad_norm": 4.8125, "learning_rate": 4.443370553843123e-05, "loss": 7.0172, "step": 10280 }, { "epoch": 0.2403068641588968, "grad_norm": 4.21875, "learning_rate": 4.442180469115279e-05, "loss": 7.1182, "step": 10290 }, { "epoch": 0.24054039852639814, "grad_norm": 4.25, "learning_rate": 4.440989273267658e-05, "loss": 7.0854, "step": 10300 }, { "epoch": 0.2407739328938995, "grad_norm": 4.6875, "learning_rate": 4.439796966981743e-05, "loss": 7.075, "step": 10310 }, { "epoch": 0.24100746726140085, "grad_norm": 4.78125, "learning_rate": 4.438603550939651e-05, "loss": 7.0813, "step": 10320 }, { "epoch": 0.24124100162890222, "grad_norm": 4.84375, "learning_rate": 4.437409025824134e-05, "loss": 7.0956, "step": 10330 }, { "epoch": 0.24147453599640356, "grad_norm": 5.09375, "learning_rate": 4.436213392318579e-05, "loss": 7.0909, "step": 10340 }, { "epoch": 0.24170807036390493, "grad_norm": 5.15625, "learning_rate": 4.435016651107007e-05, "loss": 7.0788, "step": 10350 }, { "epoch": 0.24194160473140627, "grad_norm": 4.375, "learning_rate": 4.4338188028740725e-05, "loss": 6.9913, "step": 10360 }, { "epoch": 0.24217513909890764, "grad_norm": 4.28125, "learning_rate": 4.432619848305063e-05, "loss": 7.0862, "step": 10370 }, { "epoch": 0.242408673466409, "grad_norm": 4.90625, "learning_rate": 4.4314197880859007e-05, "loss": 7.0329, "step": 10380 }, { "epoch": 0.24264220783391036, "grad_norm": 5.15625, "learning_rate": 4.4302186229031386e-05, "loss": 7.0704, "step": 10390 }, { "epoch": 0.24287574220141173, "grad_norm": 4.96875, "learning_rate": 4.429016353443961e-05, "loss": 7.0921, "step": 10400 }, { "epoch": 0.24310927656891307, "grad_norm": 4.34375, "learning_rate": 4.427812980396186e-05, "loss": 7.0299, "step": 10410 }, { "epoch": 0.24334281093641444, "grad_norm": 4.0625, "learning_rate": 4.426608504448263e-05, "loss": 7.045, "step": 10420 }, { "epoch": 0.24357634530391578, "grad_norm": 4.0, "learning_rate": 4.4254029262892714e-05, "loss": 7.1141, "step": 10430 }, { "epoch": 0.24380987967141715, "grad_norm": 4.03125, "learning_rate": 4.424196246608921e-05, "loss": 7.0692, "step": 10440 }, { "epoch": 0.2440434140389185, "grad_norm": 3.75, "learning_rate": 4.422988466097552e-05, "loss": 7.0162, "step": 10450 }, { "epoch": 0.24427694840641986, "grad_norm": 4.125, "learning_rate": 4.421779585446135e-05, "loss": 7.0297, "step": 10460 }, { "epoch": 0.2445104827739212, "grad_norm": 3.96875, "learning_rate": 4.420569605346269e-05, "loss": 7.0434, "step": 10470 }, { "epoch": 0.24474401714142258, "grad_norm": 4.96875, "learning_rate": 4.419358526490184e-05, "loss": 7.1074, "step": 10480 }, { "epoch": 0.24497755150892392, "grad_norm": 4.96875, "learning_rate": 4.418146349570735e-05, "loss": 7.1171, "step": 10490 }, { "epoch": 0.2452110858764253, "grad_norm": 4.0625, "learning_rate": 4.41693307528141e-05, "loss": 7.1027, "step": 10500 }, { "epoch": 0.2452110858764253, "eval_loss": 7.071328163146973, "eval_runtime": 78.6347, "eval_samples_per_second": 12.717, "eval_steps_per_second": 12.717, "step": 10500 }, { "epoch": 0.24544462024392666, "grad_norm": 4.625, "learning_rate": 4.4157187043163195e-05, "loss": 7.0383, "step": 10510 }, { "epoch": 0.245678154611428, "grad_norm": 4.21875, "learning_rate": 4.4145032373702065e-05, "loss": 7.0464, "step": 10520 }, { "epoch": 0.24591168897892937, "grad_norm": 4.71875, "learning_rate": 4.413286675138437e-05, "loss": 7.0298, "step": 10530 }, { "epoch": 0.2461452233464307, "grad_norm": 4.15625, "learning_rate": 4.4120690183170053e-05, "loss": 7.0419, "step": 10540 }, { "epoch": 0.24637875771393208, "grad_norm": 3.78125, "learning_rate": 4.410850267602533e-05, "loss": 7.0641, "step": 10550 }, { "epoch": 0.24661229208143343, "grad_norm": 3.375, "learning_rate": 4.409630423692265e-05, "loss": 7.0524, "step": 10560 }, { "epoch": 0.2468458264489348, "grad_norm": 4.125, "learning_rate": 4.408409487284073e-05, "loss": 7.0727, "step": 10570 }, { "epoch": 0.24707936081643614, "grad_norm": 4.59375, "learning_rate": 4.407187459076455e-05, "loss": 7.1232, "step": 10580 }, { "epoch": 0.2473128951839375, "grad_norm": 4.125, "learning_rate": 4.4059643397685315e-05, "loss": 7.1042, "step": 10590 }, { "epoch": 0.24754642955143885, "grad_norm": 4.6875, "learning_rate": 4.404740130060048e-05, "loss": 7.0265, "step": 10600 }, { "epoch": 0.24777996391894022, "grad_norm": 5.3125, "learning_rate": 4.403514830651373e-05, "loss": 7.047, "step": 10610 }, { "epoch": 0.2480134982864416, "grad_norm": 3.453125, "learning_rate": 4.4022884422435e-05, "loss": 7.042, "step": 10620 }, { "epoch": 0.24824703265394293, "grad_norm": 4.15625, "learning_rate": 4.401060965538045e-05, "loss": 7.0062, "step": 10630 }, { "epoch": 0.2484805670214443, "grad_norm": 5.6875, "learning_rate": 4.399832401237246e-05, "loss": 7.0679, "step": 10640 }, { "epoch": 0.24871410138894565, "grad_norm": 5.84375, "learning_rate": 4.3986027500439645e-05, "loss": 6.9971, "step": 10650 }, { "epoch": 0.24894763575644702, "grad_norm": 4.34375, "learning_rate": 4.397372012661681e-05, "loss": 7.0095, "step": 10660 }, { "epoch": 0.24918117012394836, "grad_norm": 3.703125, "learning_rate": 4.3961401897945006e-05, "loss": 7.0724, "step": 10670 }, { "epoch": 0.24941470449144973, "grad_norm": 4.1875, "learning_rate": 4.3949072821471484e-05, "loss": 7.0568, "step": 10680 }, { "epoch": 0.24964823885895107, "grad_norm": 4.09375, "learning_rate": 4.3936732904249694e-05, "loss": 7.1053, "step": 10690 }, { "epoch": 0.24988177322645244, "grad_norm": 4.71875, "learning_rate": 4.392438215333929e-05, "loss": 7.0208, "step": 10700 }, { "epoch": 0.2501153075939538, "grad_norm": 5.0, "learning_rate": 4.3912020575806123e-05, "loss": 7.0913, "step": 10710 }, { "epoch": 0.2503488419614551, "grad_norm": 4.53125, "learning_rate": 4.389964817872225e-05, "loss": 7.0585, "step": 10720 }, { "epoch": 0.2505823763289565, "grad_norm": 5.03125, "learning_rate": 4.38872649691659e-05, "loss": 7.0212, "step": 10730 }, { "epoch": 0.25081591069645787, "grad_norm": 4.6875, "learning_rate": 4.3874870954221504e-05, "loss": 7.05, "step": 10740 }, { "epoch": 0.2510494450639592, "grad_norm": 4.75, "learning_rate": 4.386246614097965e-05, "loss": 7.085, "step": 10750 }, { "epoch": 0.2512829794314606, "grad_norm": 4.625, "learning_rate": 4.385005053653715e-05, "loss": 7.1025, "step": 10760 }, { "epoch": 0.25151651379896195, "grad_norm": 4.1875, "learning_rate": 4.3837624147996945e-05, "loss": 7.0371, "step": 10770 }, { "epoch": 0.2517500481664633, "grad_norm": 4.21875, "learning_rate": 4.382518698246815e-05, "loss": 7.043, "step": 10780 }, { "epoch": 0.25198358253396463, "grad_norm": 5.0, "learning_rate": 4.381273904706608e-05, "loss": 7.0481, "step": 10790 }, { "epoch": 0.25221711690146603, "grad_norm": 5.125, "learning_rate": 4.380028034891216e-05, "loss": 7.0799, "step": 10800 }, { "epoch": 0.2524506512689674, "grad_norm": 4.46875, "learning_rate": 4.378781089513403e-05, "loss": 7.0409, "step": 10810 }, { "epoch": 0.2526841856364687, "grad_norm": 3.953125, "learning_rate": 4.377533069286544e-05, "loss": 7.0728, "step": 10820 }, { "epoch": 0.25291772000397006, "grad_norm": 3.625, "learning_rate": 4.37628397492463e-05, "loss": 7.0392, "step": 10830 }, { "epoch": 0.25315125437147146, "grad_norm": 3.859375, "learning_rate": 4.375033807142267e-05, "loss": 7.0732, "step": 10840 }, { "epoch": 0.2533847887389728, "grad_norm": 4.375, "learning_rate": 4.3737825666546755e-05, "loss": 7.0758, "step": 10850 }, { "epoch": 0.25361832310647414, "grad_norm": 6.0625, "learning_rate": 4.372530254177689e-05, "loss": 7.0808, "step": 10860 }, { "epoch": 0.25385185747397554, "grad_norm": 5.5625, "learning_rate": 4.371276870427753e-05, "loss": 7.0343, "step": 10870 }, { "epoch": 0.2540853918414769, "grad_norm": 3.90625, "learning_rate": 4.370022416121929e-05, "loss": 7.0516, "step": 10880 }, { "epoch": 0.2543189262089782, "grad_norm": 4.125, "learning_rate": 4.368766891977888e-05, "loss": 7.0445, "step": 10890 }, { "epoch": 0.25455246057647957, "grad_norm": 5.03125, "learning_rate": 4.367510298713915e-05, "loss": 7.1263, "step": 10900 }, { "epoch": 0.25478599494398096, "grad_norm": 3.859375, "learning_rate": 4.366252637048906e-05, "loss": 7.0443, "step": 10910 }, { "epoch": 0.2550195293114823, "grad_norm": 4.8125, "learning_rate": 4.3649939077023684e-05, "loss": 7.0305, "step": 10920 }, { "epoch": 0.25525306367898365, "grad_norm": 4.53125, "learning_rate": 4.363734111394419e-05, "loss": 7.1227, "step": 10930 }, { "epoch": 0.255486598046485, "grad_norm": 3.28125, "learning_rate": 4.3624732488457877e-05, "loss": 7.1114, "step": 10940 }, { "epoch": 0.2557201324139864, "grad_norm": 3.625, "learning_rate": 4.3612113207778115e-05, "loss": 7.0617, "step": 10950 }, { "epoch": 0.25595366678148773, "grad_norm": 5.375, "learning_rate": 4.359948327912439e-05, "loss": 7.1203, "step": 10960 }, { "epoch": 0.2561872011489891, "grad_norm": 3.9375, "learning_rate": 4.358684270972226e-05, "loss": 7.1038, "step": 10970 }, { "epoch": 0.2564207355164905, "grad_norm": 4.09375, "learning_rate": 4.3574191506803416e-05, "loss": 7.042, "step": 10980 }, { "epoch": 0.2566542698839918, "grad_norm": 4.03125, "learning_rate": 4.3561529677605574e-05, "loss": 7.0476, "step": 10990 }, { "epoch": 0.25688780425149316, "grad_norm": 4.15625, "learning_rate": 4.354885722937256e-05, "loss": 7.0468, "step": 11000 }, { "epoch": 0.25688780425149316, "eval_loss": 7.0689239501953125, "eval_runtime": 78.5047, "eval_samples_per_second": 12.738, "eval_steps_per_second": 12.738, "step": 11000 }, { "epoch": 0.2571213386189945, "grad_norm": 3.9375, "learning_rate": 4.353617416935427e-05, "loss": 7.0234, "step": 11010 }, { "epoch": 0.2573548729864959, "grad_norm": 4.71875, "learning_rate": 4.352348050480668e-05, "loss": 7.1327, "step": 11020 }, { "epoch": 0.25758840735399724, "grad_norm": 3.6875, "learning_rate": 4.351077624299182e-05, "loss": 7.0663, "step": 11030 }, { "epoch": 0.2578219417214986, "grad_norm": 3.71875, "learning_rate": 4.349806139117779e-05, "loss": 7.0423, "step": 11040 }, { "epoch": 0.2580554760889999, "grad_norm": 4.53125, "learning_rate": 4.348533595663874e-05, "loss": 7.1569, "step": 11050 }, { "epoch": 0.2582890104565013, "grad_norm": 4.65625, "learning_rate": 4.3472599946654876e-05, "loss": 7.057, "step": 11060 }, { "epoch": 0.25852254482400266, "grad_norm": 4.28125, "learning_rate": 4.345985336851247e-05, "loss": 7.0916, "step": 11070 }, { "epoch": 0.258756079191504, "grad_norm": 4.9375, "learning_rate": 4.344709622950382e-05, "loss": 7.1092, "step": 11080 }, { "epoch": 0.25898961355900535, "grad_norm": 3.390625, "learning_rate": 4.343432853692728e-05, "loss": 7.0525, "step": 11090 }, { "epoch": 0.25922314792650675, "grad_norm": 4.28125, "learning_rate": 4.342155029808724e-05, "loss": 7.1256, "step": 11100 }, { "epoch": 0.2594566822940081, "grad_norm": 4.5625, "learning_rate": 4.3408761520294105e-05, "loss": 7.1116, "step": 11110 }, { "epoch": 0.25969021666150943, "grad_norm": 3.953125, "learning_rate": 4.339596221086434e-05, "loss": 7.058, "step": 11120 }, { "epoch": 0.25992375102901083, "grad_norm": 4.25, "learning_rate": 4.3383152377120404e-05, "loss": 7.0134, "step": 11130 }, { "epoch": 0.2601572853965122, "grad_norm": 3.890625, "learning_rate": 4.337033202639079e-05, "loss": 7.0983, "step": 11140 }, { "epoch": 0.2603908197640135, "grad_norm": 4.78125, "learning_rate": 4.335750116601004e-05, "loss": 7.0455, "step": 11150 }, { "epoch": 0.26062435413151486, "grad_norm": 4.09375, "learning_rate": 4.334465980331864e-05, "loss": 7.0542, "step": 11160 }, { "epoch": 0.26085788849901625, "grad_norm": 4.28125, "learning_rate": 4.333180794566314e-05, "loss": 7.0983, "step": 11170 }, { "epoch": 0.2610914228665176, "grad_norm": 4.40625, "learning_rate": 4.3318945600396075e-05, "loss": 7.0608, "step": 11180 }, { "epoch": 0.26132495723401894, "grad_norm": 4.25, "learning_rate": 4.330607277487599e-05, "loss": 7.1057, "step": 11190 }, { "epoch": 0.2615584916015203, "grad_norm": 3.734375, "learning_rate": 4.329318947646741e-05, "loss": 7.0853, "step": 11200 }, { "epoch": 0.2617920259690217, "grad_norm": 4.5, "learning_rate": 4.328029571254085e-05, "loss": 7.0892, "step": 11210 }, { "epoch": 0.262025560336523, "grad_norm": 3.71875, "learning_rate": 4.326739149047284e-05, "loss": 7.1274, "step": 11220 }, { "epoch": 0.26225909470402436, "grad_norm": 5.28125, "learning_rate": 4.325447681764586e-05, "loss": 7.0506, "step": 11230 }, { "epoch": 0.26249262907152576, "grad_norm": 4.625, "learning_rate": 4.324155170144839e-05, "loss": 7.0616, "step": 11240 }, { "epoch": 0.2627261634390271, "grad_norm": 4.28125, "learning_rate": 4.322861614927487e-05, "loss": 6.9829, "step": 11250 }, { "epoch": 0.26295969780652845, "grad_norm": 5.125, "learning_rate": 4.321567016852572e-05, "loss": 7.0674, "step": 11260 }, { "epoch": 0.2631932321740298, "grad_norm": 5.0, "learning_rate": 4.320271376660734e-05, "loss": 7.102, "step": 11270 }, { "epoch": 0.2634267665415312, "grad_norm": 4.84375, "learning_rate": 4.318974695093206e-05, "loss": 7.0489, "step": 11280 }, { "epoch": 0.26366030090903253, "grad_norm": 4.09375, "learning_rate": 4.3176769728918185e-05, "loss": 6.9859, "step": 11290 }, { "epoch": 0.2638938352765339, "grad_norm": 4.84375, "learning_rate": 4.316378210798998e-05, "loss": 7.0434, "step": 11300 }, { "epoch": 0.2641273696440352, "grad_norm": 4.09375, "learning_rate": 4.315078409557764e-05, "loss": 7.0514, "step": 11310 }, { "epoch": 0.2643609040115366, "grad_norm": 4.5625, "learning_rate": 4.3137775699117336e-05, "loss": 7.0058, "step": 11320 }, { "epoch": 0.26459443837903796, "grad_norm": 3.828125, "learning_rate": 4.3124756926051144e-05, "loss": 7.0412, "step": 11330 }, { "epoch": 0.2648279727465393, "grad_norm": 4.71875, "learning_rate": 4.311172778382711e-05, "loss": 7.0084, "step": 11340 }, { "epoch": 0.2650615071140407, "grad_norm": 4.78125, "learning_rate": 4.3098688279899166e-05, "loss": 7.1299, "step": 11350 }, { "epoch": 0.26529504148154204, "grad_norm": 4.96875, "learning_rate": 4.3085638421727225e-05, "loss": 6.9919, "step": 11360 }, { "epoch": 0.2655285758490434, "grad_norm": 3.765625, "learning_rate": 4.3072578216777095e-05, "loss": 7.0046, "step": 11370 }, { "epoch": 0.2657621102165447, "grad_norm": 3.625, "learning_rate": 4.305950767252049e-05, "loss": 7.1175, "step": 11380 }, { "epoch": 0.2659956445840461, "grad_norm": 5.0, "learning_rate": 4.304642679643509e-05, "loss": 7.0532, "step": 11390 }, { "epoch": 0.26622917895154746, "grad_norm": 3.9375, "learning_rate": 4.303333559600443e-05, "loss": 7.1318, "step": 11400 }, { "epoch": 0.2664627133190488, "grad_norm": 5.59375, "learning_rate": 4.302023407871799e-05, "loss": 6.9802, "step": 11410 }, { "epoch": 0.26669624768655015, "grad_norm": 3.703125, "learning_rate": 4.300712225207112e-05, "loss": 7.0288, "step": 11420 }, { "epoch": 0.26692978205405155, "grad_norm": 3.53125, "learning_rate": 4.299400012356508e-05, "loss": 7.0844, "step": 11430 }, { "epoch": 0.2671633164215529, "grad_norm": 4.4375, "learning_rate": 4.2980867700707054e-05, "loss": 7.0807, "step": 11440 }, { "epoch": 0.26739685078905423, "grad_norm": 5.0625, "learning_rate": 4.296772499101008e-05, "loss": 7.0382, "step": 11450 }, { "epoch": 0.26763038515655563, "grad_norm": 3.546875, "learning_rate": 4.2954572001993076e-05, "loss": 7.0356, "step": 11460 }, { "epoch": 0.26786391952405697, "grad_norm": 4.65625, "learning_rate": 4.294140874118087e-05, "loss": 7.0473, "step": 11470 }, { "epoch": 0.2680974538915583, "grad_norm": 5.09375, "learning_rate": 4.292823521610415e-05, "loss": 7.1049, "step": 11480 }, { "epoch": 0.26833098825905966, "grad_norm": 4.125, "learning_rate": 4.291505143429948e-05, "loss": 7.0482, "step": 11490 }, { "epoch": 0.26856452262656105, "grad_norm": 5.21875, "learning_rate": 4.290185740330929e-05, "loss": 7.0001, "step": 11500 }, { "epoch": 0.26856452262656105, "eval_loss": 7.06677770614624, "eval_runtime": 78.4133, "eval_samples_per_second": 12.753, "eval_steps_per_second": 12.753, "step": 11500 }, { "epoch": 0.2687980569940624, "grad_norm": 5.34375, "learning_rate": 4.2888653130681855e-05, "loss": 7.0725, "step": 11510 }, { "epoch": 0.26903159136156374, "grad_norm": 4.34375, "learning_rate": 4.287543862397136e-05, "loss": 7.0659, "step": 11520 }, { "epoch": 0.2692651257290651, "grad_norm": 4.25, "learning_rate": 4.286221389073779e-05, "loss": 7.0804, "step": 11530 }, { "epoch": 0.2694986600965665, "grad_norm": 4.6875, "learning_rate": 4.284897893854701e-05, "loss": 7.0152, "step": 11540 }, { "epoch": 0.2697321944640678, "grad_norm": 4.25, "learning_rate": 4.283573377497073e-05, "loss": 7.0433, "step": 11550 }, { "epoch": 0.26996572883156916, "grad_norm": 4.8125, "learning_rate": 4.28224784075865e-05, "loss": 7.0939, "step": 11560 }, { "epoch": 0.27019926319907056, "grad_norm": 4.59375, "learning_rate": 4.280921284397769e-05, "loss": 6.9997, "step": 11570 }, { "epoch": 0.2704327975665719, "grad_norm": 5.59375, "learning_rate": 4.2795937091733515e-05, "loss": 7.0371, "step": 11580 }, { "epoch": 0.27066633193407325, "grad_norm": 4.53125, "learning_rate": 4.278265115844904e-05, "loss": 7.0105, "step": 11590 }, { "epoch": 0.2708998663015746, "grad_norm": 4.59375, "learning_rate": 4.2769355051725125e-05, "loss": 7.0016, "step": 11600 }, { "epoch": 0.271133400669076, "grad_norm": 3.75, "learning_rate": 4.2756048779168464e-05, "loss": 7.1114, "step": 11610 }, { "epoch": 0.27136693503657733, "grad_norm": 3.796875, "learning_rate": 4.274273234839156e-05, "loss": 7.0848, "step": 11620 }, { "epoch": 0.27160046940407867, "grad_norm": 4.53125, "learning_rate": 4.272940576701273e-05, "loss": 7.0519, "step": 11630 }, { "epoch": 0.27183400377158, "grad_norm": 4.125, "learning_rate": 4.2716069042656116e-05, "loss": 7.0155, "step": 11640 }, { "epoch": 0.2720675381390814, "grad_norm": 3.703125, "learning_rate": 4.270272218295163e-05, "loss": 7.0365, "step": 11650 }, { "epoch": 0.27230107250658275, "grad_norm": 3.5625, "learning_rate": 4.2689365195535e-05, "loss": 7.0432, "step": 11660 }, { "epoch": 0.2725346068740841, "grad_norm": 4.90625, "learning_rate": 4.267599808804775e-05, "loss": 7.0783, "step": 11670 }, { "epoch": 0.27276814124158544, "grad_norm": 4.59375, "learning_rate": 4.2662620868137195e-05, "loss": 7.0787, "step": 11680 }, { "epoch": 0.27300167560908684, "grad_norm": 5.25, "learning_rate": 4.264923354345642e-05, "loss": 7.0677, "step": 11690 }, { "epoch": 0.2732352099765882, "grad_norm": 5.59375, "learning_rate": 4.263583612166432e-05, "loss": 7.0429, "step": 11700 }, { "epoch": 0.2734687443440895, "grad_norm": 4.5625, "learning_rate": 4.262242861042553e-05, "loss": 7.0483, "step": 11710 }, { "epoch": 0.2737022787115909, "grad_norm": 4.625, "learning_rate": 4.260901101741049e-05, "loss": 7.0867, "step": 11720 }, { "epoch": 0.27393581307909226, "grad_norm": 5.125, "learning_rate": 4.2595583350295384e-05, "loss": 7.0683, "step": 11730 }, { "epoch": 0.2741693474465936, "grad_norm": 4.0, "learning_rate": 4.258214561676217e-05, "loss": 7.0799, "step": 11740 }, { "epoch": 0.27440288181409495, "grad_norm": 5.0625, "learning_rate": 4.2568697824498574e-05, "loss": 7.038, "step": 11750 }, { "epoch": 0.27463641618159634, "grad_norm": 3.765625, "learning_rate": 4.255523998119805e-05, "loss": 7.0861, "step": 11760 }, { "epoch": 0.2748699505490977, "grad_norm": 4.625, "learning_rate": 4.254177209455983e-05, "loss": 7.0652, "step": 11770 }, { "epoch": 0.27510348491659903, "grad_norm": 4.75, "learning_rate": 4.2528294172288874e-05, "loss": 7.0648, "step": 11780 }, { "epoch": 0.27533701928410037, "grad_norm": 4.375, "learning_rate": 4.251480622209589e-05, "loss": 7.0109, "step": 11790 }, { "epoch": 0.27557055365160177, "grad_norm": 4.78125, "learning_rate": 4.250130825169733e-05, "loss": 7.0178, "step": 11800 }, { "epoch": 0.2758040880191031, "grad_norm": 4.09375, "learning_rate": 4.248780026881536e-05, "loss": 7.0795, "step": 11810 }, { "epoch": 0.27603762238660445, "grad_norm": 6.1875, "learning_rate": 4.2474282281177905e-05, "loss": 7.1417, "step": 11820 }, { "epoch": 0.27627115675410585, "grad_norm": 4.375, "learning_rate": 4.246075429651857e-05, "loss": 7.0335, "step": 11830 }, { "epoch": 0.2765046911216072, "grad_norm": 3.75, "learning_rate": 4.244721632257672e-05, "loss": 6.992, "step": 11840 }, { "epoch": 0.27673822548910854, "grad_norm": 4.65625, "learning_rate": 4.243366836709741e-05, "loss": 7.0223, "step": 11850 }, { "epoch": 0.2769717598566099, "grad_norm": 5.0625, "learning_rate": 4.2420110437831416e-05, "loss": 7.038, "step": 11860 }, { "epoch": 0.2772052942241113, "grad_norm": 5.875, "learning_rate": 4.2406542542535224e-05, "loss": 7.0503, "step": 11870 }, { "epoch": 0.2774388285916126, "grad_norm": 4.03125, "learning_rate": 4.2392964688971e-05, "loss": 7.0628, "step": 11880 }, { "epoch": 0.27767236295911396, "grad_norm": 3.515625, "learning_rate": 4.2379376884906633e-05, "loss": 7.05, "step": 11890 }, { "epoch": 0.2779058973266153, "grad_norm": 5.0, "learning_rate": 4.2365779138115704e-05, "loss": 7.0972, "step": 11900 }, { "epoch": 0.2781394316941167, "grad_norm": 4.71875, "learning_rate": 4.2352171456377454e-05, "loss": 7.0277, "step": 11910 }, { "epoch": 0.27837296606161804, "grad_norm": 4.59375, "learning_rate": 4.2338553847476826e-05, "loss": 7.0883, "step": 11920 }, { "epoch": 0.2786065004291194, "grad_norm": 4.53125, "learning_rate": 4.232492631920446e-05, "loss": 7.0766, "step": 11930 }, { "epoch": 0.2788400347966208, "grad_norm": 4.21875, "learning_rate": 4.2311288879356636e-05, "loss": 7.1431, "step": 11940 }, { "epoch": 0.2790735691641221, "grad_norm": 6.90625, "learning_rate": 4.229764153573533e-05, "loss": 7.0493, "step": 11950 }, { "epoch": 0.27930710353162347, "grad_norm": 3.59375, "learning_rate": 4.228398429614817e-05, "loss": 7.0788, "step": 11960 }, { "epoch": 0.2795406378991248, "grad_norm": 4.5, "learning_rate": 4.227031716840846e-05, "loss": 7.0474, "step": 11970 }, { "epoch": 0.2797741722666262, "grad_norm": 3.8125, "learning_rate": 4.2256640160335145e-05, "loss": 7.073, "step": 11980 }, { "epoch": 0.28000770663412755, "grad_norm": 4.78125, "learning_rate": 4.224295327975283e-05, "loss": 7.0371, "step": 11990 }, { "epoch": 0.2802412410016289, "grad_norm": 4.0, "learning_rate": 4.2229256534491775e-05, "loss": 7.0588, "step": 12000 }, { "epoch": 0.2802412410016289, "eval_loss": 7.067704200744629, "eval_runtime": 79.1766, "eval_samples_per_second": 12.63, "eval_steps_per_second": 12.63, "step": 12000 }, { "epoch": 0.28047477536913024, "grad_norm": 4.46875, "learning_rate": 4.2215549932387864e-05, "loss": 7.0827, "step": 12010 }, { "epoch": 0.28070830973663163, "grad_norm": 4.9375, "learning_rate": 4.220183348128264e-05, "loss": 7.0895, "step": 12020 }, { "epoch": 0.280941844104133, "grad_norm": 5.78125, "learning_rate": 4.2188107189023265e-05, "loss": 7.0291, "step": 12030 }, { "epoch": 0.2811753784716343, "grad_norm": 3.78125, "learning_rate": 4.2174371063462556e-05, "loss": 7.0756, "step": 12040 }, { "epoch": 0.2814089128391357, "grad_norm": 4.84375, "learning_rate": 4.2160625112458924e-05, "loss": 7.0418, "step": 12050 }, { "epoch": 0.28164244720663706, "grad_norm": 3.53125, "learning_rate": 4.214686934387641e-05, "loss": 7.0888, "step": 12060 }, { "epoch": 0.2818759815741384, "grad_norm": 5.0, "learning_rate": 4.2133103765584705e-05, "loss": 7.1039, "step": 12070 }, { "epoch": 0.28210951594163974, "grad_norm": 4.375, "learning_rate": 4.211932838545906e-05, "loss": 7.0562, "step": 12080 }, { "epoch": 0.28234305030914114, "grad_norm": 4.3125, "learning_rate": 4.210554321138036e-05, "loss": 7.0262, "step": 12090 }, { "epoch": 0.2825765846766425, "grad_norm": 3.953125, "learning_rate": 4.2091748251235095e-05, "loss": 7.0799, "step": 12100 }, { "epoch": 0.2828101190441438, "grad_norm": 4.125, "learning_rate": 4.2077943512915357e-05, "loss": 7.04, "step": 12110 }, { "epoch": 0.28304365341164517, "grad_norm": 4.8125, "learning_rate": 4.206412900431882e-05, "loss": 7.0313, "step": 12120 }, { "epoch": 0.28327718777914657, "grad_norm": 5.28125, "learning_rate": 4.205030473334874e-05, "loss": 7.0491, "step": 12130 }, { "epoch": 0.2835107221466479, "grad_norm": 4.84375, "learning_rate": 4.203647070791399e-05, "loss": 7.0404, "step": 12140 }, { "epoch": 0.28374425651414925, "grad_norm": 4.875, "learning_rate": 4.202262693592899e-05, "loss": 7.0875, "step": 12150 }, { "epoch": 0.2839777908816506, "grad_norm": 3.53125, "learning_rate": 4.2008773425313765e-05, "loss": 7.0523, "step": 12160 }, { "epoch": 0.284211325249152, "grad_norm": 4.53125, "learning_rate": 4.199491018399388e-05, "loss": 7.0819, "step": 12170 }, { "epoch": 0.28444485961665333, "grad_norm": 4.15625, "learning_rate": 4.198103721990049e-05, "loss": 7.0687, "step": 12180 }, { "epoch": 0.2846783939841547, "grad_norm": 4.03125, "learning_rate": 4.196715454097031e-05, "loss": 7.0549, "step": 12190 }, { "epoch": 0.2849119283516561, "grad_norm": 4.5625, "learning_rate": 4.1953262155145604e-05, "loss": 7.0774, "step": 12200 }, { "epoch": 0.2851454627191574, "grad_norm": 3.84375, "learning_rate": 4.19393600703742e-05, "loss": 7.0235, "step": 12210 }, { "epoch": 0.28537899708665876, "grad_norm": 4.375, "learning_rate": 4.192544829460946e-05, "loss": 7.0662, "step": 12220 }, { "epoch": 0.2856125314541601, "grad_norm": 4.96875, "learning_rate": 4.19115268358103e-05, "loss": 7.0541, "step": 12230 }, { "epoch": 0.2858460658216615, "grad_norm": 3.8125, "learning_rate": 4.189759570194118e-05, "loss": 7.1232, "step": 12240 }, { "epoch": 0.28607960018916284, "grad_norm": 4.1875, "learning_rate": 4.1883654900972094e-05, "loss": 7.0521, "step": 12250 }, { "epoch": 0.2863131345566642, "grad_norm": 4.3125, "learning_rate": 4.186970444087854e-05, "loss": 7.0324, "step": 12260 }, { "epoch": 0.2865466689241655, "grad_norm": 4.46875, "learning_rate": 4.1855744329641594e-05, "loss": 7.0495, "step": 12270 }, { "epoch": 0.2867802032916669, "grad_norm": 4.4375, "learning_rate": 4.184177457524781e-05, "loss": 7.1093, "step": 12280 }, { "epoch": 0.28701373765916827, "grad_norm": 4.3125, "learning_rate": 4.182779518568926e-05, "loss": 7.0929, "step": 12290 }, { "epoch": 0.2872472720266696, "grad_norm": 4.3125, "learning_rate": 4.1813806168963556e-05, "loss": 7.0695, "step": 12300 }, { "epoch": 0.287480806394171, "grad_norm": 5.25, "learning_rate": 4.17998075330738e-05, "loss": 7.0535, "step": 12310 }, { "epoch": 0.28771434076167235, "grad_norm": 5.59375, "learning_rate": 4.1785799286028606e-05, "loss": 7.1194, "step": 12320 }, { "epoch": 0.2879478751291737, "grad_norm": 4.46875, "learning_rate": 4.1771781435842064e-05, "loss": 7.0199, "step": 12330 }, { "epoch": 0.28818140949667503, "grad_norm": 3.96875, "learning_rate": 4.1757753990533786e-05, "loss": 7.0855, "step": 12340 }, { "epoch": 0.28841494386417643, "grad_norm": 4.5625, "learning_rate": 4.1743716958128855e-05, "loss": 7.1406, "step": 12350 }, { "epoch": 0.2886484782316778, "grad_norm": 3.609375, "learning_rate": 4.172967034665785e-05, "loss": 7.0353, "step": 12360 }, { "epoch": 0.2888820125991791, "grad_norm": 4.53125, "learning_rate": 4.1715614164156824e-05, "loss": 7.0784, "step": 12370 }, { "epoch": 0.28911554696668046, "grad_norm": 4.03125, "learning_rate": 4.170154841866731e-05, "loss": 6.9773, "step": 12380 }, { "epoch": 0.28934908133418186, "grad_norm": 3.65625, "learning_rate": 4.1687473118236295e-05, "loss": 7.0499, "step": 12390 }, { "epoch": 0.2895826157016832, "grad_norm": 4.75, "learning_rate": 4.167338827091627e-05, "loss": 7.108, "step": 12400 }, { "epoch": 0.28981615006918454, "grad_norm": 4.25, "learning_rate": 4.165929388476515e-05, "loss": 7.0142, "step": 12410 }, { "epoch": 0.29004968443668594, "grad_norm": 5.375, "learning_rate": 4.164518996784631e-05, "loss": 7.0688, "step": 12420 }, { "epoch": 0.2902832188041873, "grad_norm": 4.09375, "learning_rate": 4.163107652822861e-05, "loss": 7.1018, "step": 12430 }, { "epoch": 0.2905167531716886, "grad_norm": 3.65625, "learning_rate": 4.161695357398633e-05, "loss": 7.0968, "step": 12440 }, { "epoch": 0.29075028753918997, "grad_norm": 3.796875, "learning_rate": 4.160282111319919e-05, "loss": 7.0677, "step": 12450 }, { "epoch": 0.29098382190669136, "grad_norm": 3.984375, "learning_rate": 4.158867915395237e-05, "loss": 7.0214, "step": 12460 }, { "epoch": 0.2912173562741927, "grad_norm": 4.6875, "learning_rate": 4.1574527704336465e-05, "loss": 7.0646, "step": 12470 }, { "epoch": 0.29145089064169405, "grad_norm": 4.28125, "learning_rate": 4.156036677244751e-05, "loss": 7.0453, "step": 12480 }, { "epoch": 0.2916844250091954, "grad_norm": 3.703125, "learning_rate": 4.154619636638697e-05, "loss": 7.066, "step": 12490 }, { "epoch": 0.2919179593766968, "grad_norm": 4.03125, "learning_rate": 4.153201649426171e-05, "loss": 7.0684, "step": 12500 }, { "epoch": 0.2919179593766968, "eval_loss": 7.059619426727295, "eval_runtime": 78.9819, "eval_samples_per_second": 12.661, "eval_steps_per_second": 12.661, "step": 12500 }, { "epoch": 0.29215149374419813, "grad_norm": 4.34375, "learning_rate": 4.151782716418403e-05, "loss": 7.0358, "step": 12510 }, { "epoch": 0.2923850281116995, "grad_norm": 4.25, "learning_rate": 4.150362838427163e-05, "loss": 7.0372, "step": 12520 }, { "epoch": 0.29261856247920087, "grad_norm": 4.59375, "learning_rate": 4.1489420162647616e-05, "loss": 7.0977, "step": 12530 }, { "epoch": 0.2928520968467022, "grad_norm": 5.25, "learning_rate": 4.1475202507440516e-05, "loss": 7.0858, "step": 12540 }, { "epoch": 0.29308563121420356, "grad_norm": 4.78125, "learning_rate": 4.146097542678422e-05, "loss": 7.0215, "step": 12550 }, { "epoch": 0.2933191655817049, "grad_norm": 4.4375, "learning_rate": 4.144673892881803e-05, "loss": 7.0174, "step": 12560 }, { "epoch": 0.2935526999492063, "grad_norm": 5.0625, "learning_rate": 4.1432493021686647e-05, "loss": 7.1164, "step": 12570 }, { "epoch": 0.29378623431670764, "grad_norm": 4.9375, "learning_rate": 4.141823771354013e-05, "loss": 7.0019, "step": 12580 }, { "epoch": 0.294019768684209, "grad_norm": 5.0, "learning_rate": 4.140397301253394e-05, "loss": 7.0493, "step": 12590 }, { "epoch": 0.2942533030517103, "grad_norm": 4.65625, "learning_rate": 4.138969892682889e-05, "loss": 7.0218, "step": 12600 }, { "epoch": 0.2944868374192117, "grad_norm": 4.4375, "learning_rate": 4.137541546459117e-05, "loss": 7.0048, "step": 12610 }, { "epoch": 0.29472037178671306, "grad_norm": 3.640625, "learning_rate": 4.136112263399235e-05, "loss": 7.0938, "step": 12620 }, { "epoch": 0.2949539061542144, "grad_norm": 4.53125, "learning_rate": 4.1346820443209336e-05, "loss": 7.0516, "step": 12630 }, { "epoch": 0.29518744052171575, "grad_norm": 4.25, "learning_rate": 4.1332508900424395e-05, "loss": 7.0201, "step": 12640 }, { "epoch": 0.29542097488921715, "grad_norm": 3.9375, "learning_rate": 4.131818801382516e-05, "loss": 7.0479, "step": 12650 }, { "epoch": 0.2956545092567185, "grad_norm": 3.515625, "learning_rate": 4.1303857791604594e-05, "loss": 7.0331, "step": 12660 }, { "epoch": 0.29588804362421983, "grad_norm": 5.09375, "learning_rate": 4.1289518241961e-05, "loss": 7.0888, "step": 12670 }, { "epoch": 0.29612157799172123, "grad_norm": 4.375, "learning_rate": 4.127516937309802e-05, "loss": 7.1185, "step": 12680 }, { "epoch": 0.29635511235922257, "grad_norm": 4.8125, "learning_rate": 4.126081119322465e-05, "loss": 7.0607, "step": 12690 }, { "epoch": 0.2965886467267239, "grad_norm": 4.03125, "learning_rate": 4.124644371055517e-05, "loss": 7.0983, "step": 12700 }, { "epoch": 0.29682218109422526, "grad_norm": 4.6875, "learning_rate": 4.123206693330921e-05, "loss": 7.0853, "step": 12710 }, { "epoch": 0.29705571546172665, "grad_norm": 4.4375, "learning_rate": 4.1217680869711714e-05, "loss": 7.0316, "step": 12720 }, { "epoch": 0.297289249829228, "grad_norm": 4.125, "learning_rate": 4.120328552799294e-05, "loss": 7.0576, "step": 12730 }, { "epoch": 0.29752278419672934, "grad_norm": 3.96875, "learning_rate": 4.118888091638844e-05, "loss": 7.0629, "step": 12740 }, { "epoch": 0.2977563185642307, "grad_norm": 5.375, "learning_rate": 4.117446704313909e-05, "loss": 6.995, "step": 12750 }, { "epoch": 0.2979898529317321, "grad_norm": 5.09375, "learning_rate": 4.1160043916491055e-05, "loss": 7.0983, "step": 12760 }, { "epoch": 0.2982233872992334, "grad_norm": 4.25, "learning_rate": 4.114561154469578e-05, "loss": 7.0076, "step": 12770 }, { "epoch": 0.29845692166673476, "grad_norm": 4.09375, "learning_rate": 4.113116993601003e-05, "loss": 7.0592, "step": 12780 }, { "epoch": 0.29869045603423616, "grad_norm": 4.3125, "learning_rate": 4.111671909869582e-05, "loss": 7.0741, "step": 12790 }, { "epoch": 0.2989239904017375, "grad_norm": 5.15625, "learning_rate": 4.110225904102048e-05, "loss": 7.053, "step": 12800 }, { "epoch": 0.29915752476923885, "grad_norm": 4.84375, "learning_rate": 4.108778977125659e-05, "loss": 7.0437, "step": 12810 }, { "epoch": 0.2993910591367402, "grad_norm": 4.5, "learning_rate": 4.107331129768199e-05, "loss": 7.0157, "step": 12820 }, { "epoch": 0.2996245935042416, "grad_norm": 4.28125, "learning_rate": 4.105882362857983e-05, "loss": 7.061, "step": 12830 }, { "epoch": 0.29985812787174293, "grad_norm": 4.03125, "learning_rate": 4.1044326772238475e-05, "loss": 7.1094, "step": 12840 }, { "epoch": 0.3000916622392443, "grad_norm": 4.21875, "learning_rate": 4.102982073695156e-05, "loss": 7.0669, "step": 12850 }, { "epoch": 0.3003251966067456, "grad_norm": 3.921875, "learning_rate": 4.1015305531017986e-05, "loss": 7.0195, "step": 12860 }, { "epoch": 0.300558730974247, "grad_norm": 4.03125, "learning_rate": 4.1000781162741894e-05, "loss": 7.1463, "step": 12870 }, { "epoch": 0.30079226534174835, "grad_norm": 4.1875, "learning_rate": 4.098624764043266e-05, "loss": 7.0309, "step": 12880 }, { "epoch": 0.3010257997092497, "grad_norm": 3.953125, "learning_rate": 4.097170497240489e-05, "loss": 7.0876, "step": 12890 }, { "epoch": 0.3012593340767511, "grad_norm": 4.625, "learning_rate": 4.095715316697844e-05, "loss": 7.0331, "step": 12900 }, { "epoch": 0.30149286844425244, "grad_norm": 4.5, "learning_rate": 4.094259223247838e-05, "loss": 7.0759, "step": 12910 }, { "epoch": 0.3017264028117538, "grad_norm": 4.5625, "learning_rate": 4.092802217723501e-05, "loss": 7.1006, "step": 12920 }, { "epoch": 0.3019599371792551, "grad_norm": 3.796875, "learning_rate": 4.0913443009583844e-05, "loss": 7.0546, "step": 12930 }, { "epoch": 0.3021934715467565, "grad_norm": 5.53125, "learning_rate": 4.089885473786562e-05, "loss": 7.0465, "step": 12940 }, { "epoch": 0.30242700591425786, "grad_norm": 4.03125, "learning_rate": 4.088425737042627e-05, "loss": 7.0223, "step": 12950 }, { "epoch": 0.3026605402817592, "grad_norm": 3.875, "learning_rate": 4.086965091561693e-05, "loss": 7.0267, "step": 12960 }, { "epoch": 0.30289407464926055, "grad_norm": 4.34375, "learning_rate": 4.085503538179396e-05, "loss": 6.9711, "step": 12970 }, { "epoch": 0.30312760901676195, "grad_norm": 4.4375, "learning_rate": 4.0840410777318866e-05, "loss": 7.0669, "step": 12980 }, { "epoch": 0.3033611433842633, "grad_norm": 3.65625, "learning_rate": 4.082577711055839e-05, "loss": 7.1216, "step": 12990 }, { "epoch": 0.30359467775176463, "grad_norm": 4.125, "learning_rate": 4.0811134389884433e-05, "loss": 7.0816, "step": 13000 }, { "epoch": 0.30359467775176463, "eval_loss": 7.05569314956665, "eval_runtime": 78.9545, "eval_samples_per_second": 12.666, "eval_steps_per_second": 12.666, "step": 13000 }, { "epoch": 0.30382821211926603, "grad_norm": 6.15625, "learning_rate": 4.0796482623674085e-05, "loss": 7.0273, "step": 13010 }, { "epoch": 0.30406174648676737, "grad_norm": 3.953125, "learning_rate": 4.078182182030962e-05, "loss": 7.0566, "step": 13020 }, { "epoch": 0.3042952808542687, "grad_norm": 4.15625, "learning_rate": 4.076715198817845e-05, "loss": 7.064, "step": 13030 }, { "epoch": 0.30452881522177006, "grad_norm": 4.15625, "learning_rate": 4.075247313567318e-05, "loss": 7.0475, "step": 13040 }, { "epoch": 0.30476234958927145, "grad_norm": 3.796875, "learning_rate": 4.0737785271191585e-05, "loss": 7.0579, "step": 13050 }, { "epoch": 0.3049958839567728, "grad_norm": 3.40625, "learning_rate": 4.0723088403136566e-05, "loss": 7.0015, "step": 13060 }, { "epoch": 0.30522941832427414, "grad_norm": 3.84375, "learning_rate": 4.070838253991619e-05, "loss": 7.0593, "step": 13070 }, { "epoch": 0.3054629526917755, "grad_norm": 4.625, "learning_rate": 4.069366768994366e-05, "loss": 7.0496, "step": 13080 }, { "epoch": 0.3056964870592769, "grad_norm": 3.96875, "learning_rate": 4.067894386163734e-05, "loss": 7.1086, "step": 13090 }, { "epoch": 0.3059300214267782, "grad_norm": 4.84375, "learning_rate": 4.0664211063420724e-05, "loss": 7.0746, "step": 13100 }, { "epoch": 0.30616355579427956, "grad_norm": 3.421875, "learning_rate": 4.0649469303722426e-05, "loss": 6.9944, "step": 13110 }, { "epoch": 0.30639709016178096, "grad_norm": 4.21875, "learning_rate": 4.06347185909762e-05, "loss": 7.0414, "step": 13120 }, { "epoch": 0.3066306245292823, "grad_norm": 4.71875, "learning_rate": 4.0619958933620896e-05, "loss": 7.0744, "step": 13130 }, { "epoch": 0.30686415889678365, "grad_norm": 4.75, "learning_rate": 4.060519034010052e-05, "loss": 7.0254, "step": 13140 }, { "epoch": 0.307097693264285, "grad_norm": 4.5, "learning_rate": 4.059041281886417e-05, "loss": 7.0906, "step": 13150 }, { "epoch": 0.3073312276317864, "grad_norm": 6.25, "learning_rate": 4.057562637836605e-05, "loss": 6.9954, "step": 13160 }, { "epoch": 0.30756476199928773, "grad_norm": 4.5625, "learning_rate": 4.056083102706547e-05, "loss": 7.003, "step": 13170 }, { "epoch": 0.30779829636678907, "grad_norm": 5.09375, "learning_rate": 4.054602677342684e-05, "loss": 7.102, "step": 13180 }, { "epoch": 0.3080318307342904, "grad_norm": 4.09375, "learning_rate": 4.053121362591966e-05, "loss": 7.0755, "step": 13190 }, { "epoch": 0.3082653651017918, "grad_norm": 4.375, "learning_rate": 4.051639159301852e-05, "loss": 7.0307, "step": 13200 }, { "epoch": 0.30849889946929315, "grad_norm": 5.34375, "learning_rate": 4.050156068320309e-05, "loss": 7.0226, "step": 13210 }, { "epoch": 0.3087324338367945, "grad_norm": 5.25, "learning_rate": 4.0486720904958116e-05, "loss": 7.0566, "step": 13220 }, { "epoch": 0.30896596820429584, "grad_norm": 4.0, "learning_rate": 4.047187226677343e-05, "loss": 7.0041, "step": 13230 }, { "epoch": 0.30919950257179724, "grad_norm": 3.75, "learning_rate": 4.045701477714392e-05, "loss": 7.0713, "step": 13240 }, { "epoch": 0.3094330369392986, "grad_norm": 5.15625, "learning_rate": 4.044214844456955e-05, "loss": 7.0669, "step": 13250 }, { "epoch": 0.3096665713067999, "grad_norm": 4.53125, "learning_rate": 4.0427273277555334e-05, "loss": 7.0532, "step": 13260 }, { "epoch": 0.3099001056743013, "grad_norm": 4.4375, "learning_rate": 4.041238928461133e-05, "loss": 7.0416, "step": 13270 }, { "epoch": 0.31013364004180266, "grad_norm": 4.46875, "learning_rate": 4.039749647425268e-05, "loss": 7.0678, "step": 13280 }, { "epoch": 0.310367174409304, "grad_norm": 4.25, "learning_rate": 4.038259485499953e-05, "loss": 7.0294, "step": 13290 }, { "epoch": 0.31060070877680535, "grad_norm": 5.125, "learning_rate": 4.036768443537708e-05, "loss": 7.0106, "step": 13300 }, { "epoch": 0.31083424314430674, "grad_norm": 4.40625, "learning_rate": 4.0352765223915594e-05, "loss": 7.0935, "step": 13310 }, { "epoch": 0.3110677775118081, "grad_norm": 3.640625, "learning_rate": 4.033783722915032e-05, "loss": 6.9692, "step": 13320 }, { "epoch": 0.31130131187930943, "grad_norm": 4.59375, "learning_rate": 4.032290045962155e-05, "loss": 7.0805, "step": 13330 }, { "epoch": 0.31153484624681077, "grad_norm": 4.9375, "learning_rate": 4.03079549238746e-05, "loss": 7.0491, "step": 13340 }, { "epoch": 0.31176838061431217, "grad_norm": 5.03125, "learning_rate": 4.02930006304598e-05, "loss": 7.1004, "step": 13350 }, { "epoch": 0.3120019149818135, "grad_norm": 4.625, "learning_rate": 4.027803758793249e-05, "loss": 7.0768, "step": 13360 }, { "epoch": 0.31223544934931485, "grad_norm": 4.4375, "learning_rate": 4.026306580485301e-05, "loss": 7.1201, "step": 13370 }, { "epoch": 0.31246898371681625, "grad_norm": 5.15625, "learning_rate": 4.0248085289786706e-05, "loss": 7.0822, "step": 13380 }, { "epoch": 0.3127025180843176, "grad_norm": 4.46875, "learning_rate": 4.023309605130392e-05, "loss": 7.0594, "step": 13390 }, { "epoch": 0.31293605245181894, "grad_norm": 3.734375, "learning_rate": 4.021809809797998e-05, "loss": 6.9734, "step": 13400 }, { "epoch": 0.3131695868193203, "grad_norm": 4.28125, "learning_rate": 4.020309143839521e-05, "loss": 7.0823, "step": 13410 }, { "epoch": 0.3134031211868217, "grad_norm": 4.21875, "learning_rate": 4.0188076081134884e-05, "loss": 6.9787, "step": 13420 }, { "epoch": 0.313636655554323, "grad_norm": 4.5625, "learning_rate": 4.01730520347893e-05, "loss": 7.0713, "step": 13430 }, { "epoch": 0.31387018992182436, "grad_norm": 3.828125, "learning_rate": 4.0158019307953684e-05, "loss": 7.0503, "step": 13440 }, { "epoch": 0.3141037242893257, "grad_norm": 3.75, "learning_rate": 4.014297790922827e-05, "loss": 7.0239, "step": 13450 }, { "epoch": 0.3143372586568271, "grad_norm": 4.53125, "learning_rate": 4.01279278472182e-05, "loss": 7.0616, "step": 13460 }, { "epoch": 0.31457079302432844, "grad_norm": 5.4375, "learning_rate": 4.011286913053363e-05, "loss": 7.0441, "step": 13470 }, { "epoch": 0.3148043273918298, "grad_norm": 4.375, "learning_rate": 4.009780176778961e-05, "loss": 7.098, "step": 13480 }, { "epoch": 0.3150378617593312, "grad_norm": 4.375, "learning_rate": 4.008272576760618e-05, "loss": 7.0798, "step": 13490 }, { "epoch": 0.3152713961268325, "grad_norm": 4.40625, "learning_rate": 4.006764113860832e-05, "loss": 7.0234, "step": 13500 }, { "epoch": 0.3152713961268325, "eval_loss": 7.054307460784912, "eval_runtime": 79.0063, "eval_samples_per_second": 12.657, "eval_steps_per_second": 12.657, "step": 13500 }, { "epoch": 0.31550493049433387, "grad_norm": 3.8125, "learning_rate": 4.005254788942591e-05, "loss": 6.9812, "step": 13510 }, { "epoch": 0.3157384648618352, "grad_norm": 4.0, "learning_rate": 4.0037446028693786e-05, "loss": 7.0307, "step": 13520 }, { "epoch": 0.3159719992293366, "grad_norm": 4.4375, "learning_rate": 4.002233556505173e-05, "loss": 7.0336, "step": 13530 }, { "epoch": 0.31620553359683795, "grad_norm": 3.84375, "learning_rate": 4.0007216507144405e-05, "loss": 7.0794, "step": 13540 }, { "epoch": 0.3164390679643393, "grad_norm": 3.90625, "learning_rate": 3.999208886362143e-05, "loss": 7.0206, "step": 13550 }, { "epoch": 0.31667260233184064, "grad_norm": 5.46875, "learning_rate": 3.99769526431373e-05, "loss": 7.104, "step": 13560 }, { "epoch": 0.31690613669934203, "grad_norm": 4.125, "learning_rate": 3.996180785435144e-05, "loss": 7.0474, "step": 13570 }, { "epoch": 0.3171396710668434, "grad_norm": 4.53125, "learning_rate": 3.9946654505928174e-05, "loss": 7.0312, "step": 13580 }, { "epoch": 0.3173732054343447, "grad_norm": 3.75, "learning_rate": 3.9931492606536706e-05, "loss": 6.9783, "step": 13590 }, { "epoch": 0.3176067398018461, "grad_norm": 5.0625, "learning_rate": 3.991632216485116e-05, "loss": 7.0807, "step": 13600 }, { "epoch": 0.31784027416934746, "grad_norm": 3.640625, "learning_rate": 3.990114318955053e-05, "loss": 7.099, "step": 13610 }, { "epoch": 0.3180738085368488, "grad_norm": 4.1875, "learning_rate": 3.98859556893187e-05, "loss": 7.0708, "step": 13620 }, { "epoch": 0.31830734290435014, "grad_norm": 3.9375, "learning_rate": 3.987075967284441e-05, "loss": 7.0798, "step": 13630 }, { "epoch": 0.31854087727185154, "grad_norm": 3.765625, "learning_rate": 3.98555551488213e-05, "loss": 7.1137, "step": 13640 }, { "epoch": 0.3187744116393529, "grad_norm": 4.40625, "learning_rate": 3.9840342125947864e-05, "loss": 7.058, "step": 13650 }, { "epoch": 0.3190079460068542, "grad_norm": 3.953125, "learning_rate": 3.9825120612927454e-05, "loss": 7.0513, "step": 13660 }, { "epoch": 0.31924148037435557, "grad_norm": 4.75, "learning_rate": 3.980989061846828e-05, "loss": 7.0945, "step": 13670 }, { "epoch": 0.31947501474185697, "grad_norm": 4.5, "learning_rate": 3.979465215128343e-05, "loss": 6.9833, "step": 13680 }, { "epoch": 0.3197085491093583, "grad_norm": 4.03125, "learning_rate": 3.9779405220090806e-05, "loss": 7.0733, "step": 13690 }, { "epoch": 0.31994208347685965, "grad_norm": 4.8125, "learning_rate": 3.976414983361315e-05, "loss": 7.0067, "step": 13700 }, { "epoch": 0.320175617844361, "grad_norm": 4.25, "learning_rate": 3.974888600057808e-05, "loss": 7.0143, "step": 13710 }, { "epoch": 0.3204091522118624, "grad_norm": 4.0625, "learning_rate": 3.9733613729718e-05, "loss": 7.0431, "step": 13720 }, { "epoch": 0.32064268657936373, "grad_norm": 4.40625, "learning_rate": 3.971833302977018e-05, "loss": 7.0063, "step": 13730 }, { "epoch": 0.3208762209468651, "grad_norm": 4.03125, "learning_rate": 3.970304390947668e-05, "loss": 7.0333, "step": 13740 }, { "epoch": 0.3211097553143665, "grad_norm": 4.09375, "learning_rate": 3.968774637758441e-05, "loss": 7.0082, "step": 13750 }, { "epoch": 0.3213432896818678, "grad_norm": 4.90625, "learning_rate": 3.967244044284506e-05, "loss": 7.031, "step": 13760 }, { "epoch": 0.32157682404936916, "grad_norm": 3.515625, "learning_rate": 3.9657126114015156e-05, "loss": 6.9799, "step": 13770 }, { "epoch": 0.3218103584168705, "grad_norm": 3.828125, "learning_rate": 3.964180339985599e-05, "loss": 7.0159, "step": 13780 }, { "epoch": 0.3220438927843719, "grad_norm": 3.84375, "learning_rate": 3.96264723091337e-05, "loss": 7.0522, "step": 13790 }, { "epoch": 0.32227742715187324, "grad_norm": 4.5625, "learning_rate": 3.961113285061917e-05, "loss": 7.0541, "step": 13800 }, { "epoch": 0.3225109615193746, "grad_norm": 4.8125, "learning_rate": 3.9595785033088106e-05, "loss": 7.0226, "step": 13810 }, { "epoch": 0.3227444958868759, "grad_norm": 3.828125, "learning_rate": 3.9580428865320966e-05, "loss": 7.0396, "step": 13820 }, { "epoch": 0.3229780302543773, "grad_norm": 4.6875, "learning_rate": 3.956506435610302e-05, "loss": 7.0336, "step": 13830 }, { "epoch": 0.32321156462187867, "grad_norm": 3.75, "learning_rate": 3.954969151422428e-05, "loss": 7.0619, "step": 13840 }, { "epoch": 0.32344509898938, "grad_norm": 4.25, "learning_rate": 3.953431034847951e-05, "loss": 7.0321, "step": 13850 }, { "epoch": 0.3236786333568814, "grad_norm": 4.15625, "learning_rate": 3.951892086766831e-05, "loss": 7.0179, "step": 13860 }, { "epoch": 0.32391216772438275, "grad_norm": 4.15625, "learning_rate": 3.950352308059495e-05, "loss": 7.0182, "step": 13870 }, { "epoch": 0.3241457020918841, "grad_norm": 5.375, "learning_rate": 3.948811699606851e-05, "loss": 7.0778, "step": 13880 }, { "epoch": 0.32437923645938543, "grad_norm": 3.53125, "learning_rate": 3.9472702622902796e-05, "loss": 7.0112, "step": 13890 }, { "epoch": 0.32461277082688683, "grad_norm": 4.875, "learning_rate": 3.945727996991636e-05, "loss": 7.0832, "step": 13900 }, { "epoch": 0.3248463051943882, "grad_norm": 4.34375, "learning_rate": 3.9441849045932476e-05, "loss": 6.9651, "step": 13910 }, { "epoch": 0.3250798395618895, "grad_norm": 4.96875, "learning_rate": 3.942640985977917e-05, "loss": 7.0665, "step": 13920 }, { "epoch": 0.32531337392939086, "grad_norm": 4.6875, "learning_rate": 3.9410962420289185e-05, "loss": 7.0204, "step": 13930 }, { "epoch": 0.32554690829689226, "grad_norm": 4.53125, "learning_rate": 3.9395506736299994e-05, "loss": 6.988, "step": 13940 }, { "epoch": 0.3257804426643936, "grad_norm": 4.53125, "learning_rate": 3.938004281665377e-05, "loss": 6.9828, "step": 13950 }, { "epoch": 0.32601397703189494, "grad_norm": 5.0, "learning_rate": 3.936457067019742e-05, "loss": 7.0265, "step": 13960 }, { "epoch": 0.32624751139939634, "grad_norm": 4.65625, "learning_rate": 3.9349090305782545e-05, "loss": 7.108, "step": 13970 }, { "epoch": 0.3264810457668977, "grad_norm": 4.53125, "learning_rate": 3.933360173226543e-05, "loss": 7.0936, "step": 13980 }, { "epoch": 0.326714580134399, "grad_norm": 4.9375, "learning_rate": 3.93181049585071e-05, "loss": 7.0625, "step": 13990 }, { "epoch": 0.32694811450190037, "grad_norm": 4.5, "learning_rate": 3.9302599993373226e-05, "loss": 7.0302, "step": 14000 }, { "epoch": 0.32694811450190037, "eval_loss": 7.04740571975708, "eval_runtime": 78.9249, "eval_samples_per_second": 12.67, "eval_steps_per_second": 12.67, "step": 14000 }, { "epoch": 0.32718164886940176, "grad_norm": 4.53125, "learning_rate": 3.92870868457342e-05, "loss": 7.0223, "step": 14010 }, { "epoch": 0.3274151832369031, "grad_norm": 4.40625, "learning_rate": 3.927156552446507e-05, "loss": 7.0209, "step": 14020 }, { "epoch": 0.32764871760440445, "grad_norm": 4.40625, "learning_rate": 3.925603603844557e-05, "loss": 7.1066, "step": 14030 }, { "epoch": 0.3278822519719058, "grad_norm": 4.375, "learning_rate": 3.924049839656012e-05, "loss": 7.0611, "step": 14040 }, { "epoch": 0.3281157863394072, "grad_norm": 3.703125, "learning_rate": 3.922495260769777e-05, "loss": 7.0497, "step": 14050 }, { "epoch": 0.32834932070690853, "grad_norm": 5.125, "learning_rate": 3.920939868075227e-05, "loss": 6.9962, "step": 14060 }, { "epoch": 0.3285828550744099, "grad_norm": 4.03125, "learning_rate": 3.9193836624622e-05, "loss": 7.086, "step": 14070 }, { "epoch": 0.32881638944191127, "grad_norm": 4.0625, "learning_rate": 3.917826644821e-05, "loss": 7.0172, "step": 14080 }, { "epoch": 0.3290499238094126, "grad_norm": 5.3125, "learning_rate": 3.916268816042395e-05, "loss": 7.0874, "step": 14090 }, { "epoch": 0.32928345817691396, "grad_norm": 4.6875, "learning_rate": 3.9147101770176186e-05, "loss": 7.0581, "step": 14100 }, { "epoch": 0.3295169925444153, "grad_norm": 3.71875, "learning_rate": 3.9131507286383664e-05, "loss": 7.097, "step": 14110 }, { "epoch": 0.3297505269119167, "grad_norm": 3.859375, "learning_rate": 3.9115904717967976e-05, "loss": 7.1141, "step": 14120 }, { "epoch": 0.32998406127941804, "grad_norm": 4.15625, "learning_rate": 3.910029407385533e-05, "loss": 7.062, "step": 14130 }, { "epoch": 0.3302175956469194, "grad_norm": 4.4375, "learning_rate": 3.908467536297657e-05, "loss": 7.0427, "step": 14140 }, { "epoch": 0.3304511300144207, "grad_norm": 4.875, "learning_rate": 3.906904859426714e-05, "loss": 7.1074, "step": 14150 }, { "epoch": 0.3306846643819221, "grad_norm": 4.90625, "learning_rate": 3.905341377666711e-05, "loss": 7.0863, "step": 14160 }, { "epoch": 0.33091819874942346, "grad_norm": 4.8125, "learning_rate": 3.903777091912114e-05, "loss": 7.0773, "step": 14170 }, { "epoch": 0.3311517331169248, "grad_norm": 4.5625, "learning_rate": 3.90221200305785e-05, "loss": 7.0306, "step": 14180 }, { "epoch": 0.33138526748442615, "grad_norm": 4.3125, "learning_rate": 3.9006461119993045e-05, "loss": 7.0437, "step": 14190 }, { "epoch": 0.33161880185192755, "grad_norm": 3.5625, "learning_rate": 3.899079419632322e-05, "loss": 7.0225, "step": 14200 }, { "epoch": 0.3318523362194289, "grad_norm": 4.3125, "learning_rate": 3.8975119268532075e-05, "loss": 7.0867, "step": 14210 }, { "epoch": 0.33208587058693023, "grad_norm": 4.46875, "learning_rate": 3.895943634558721e-05, "loss": 7.0533, "step": 14220 }, { "epoch": 0.33231940495443163, "grad_norm": 3.796875, "learning_rate": 3.8943745436460805e-05, "loss": 7.0796, "step": 14230 }, { "epoch": 0.33255293932193297, "grad_norm": 4.84375, "learning_rate": 3.892804655012963e-05, "loss": 7.0303, "step": 14240 }, { "epoch": 0.3327864736894343, "grad_norm": 4.09375, "learning_rate": 3.8912339695574996e-05, "loss": 7.0404, "step": 14250 }, { "epoch": 0.33302000805693566, "grad_norm": 4.53125, "learning_rate": 3.889662488178279e-05, "loss": 6.9606, "step": 14260 }, { "epoch": 0.33325354242443705, "grad_norm": 4.4375, "learning_rate": 3.8880902117743436e-05, "loss": 7.0029, "step": 14270 }, { "epoch": 0.3334870767919384, "grad_norm": 4.8125, "learning_rate": 3.886517141245191e-05, "loss": 7.0161, "step": 14280 }, { "epoch": 0.33372061115943974, "grad_norm": 4.875, "learning_rate": 3.884943277490774e-05, "loss": 7.0007, "step": 14290 }, { "epoch": 0.3339541455269411, "grad_norm": 4.28125, "learning_rate": 3.883368621411499e-05, "loss": 7.0013, "step": 14300 }, { "epoch": 0.3341876798944425, "grad_norm": 4.71875, "learning_rate": 3.881793173908225e-05, "loss": 7.0608, "step": 14310 }, { "epoch": 0.3344212142619438, "grad_norm": 4.5, "learning_rate": 3.880216935882264e-05, "loss": 7.1073, "step": 14320 }, { "epoch": 0.33465474862944516, "grad_norm": 4.3125, "learning_rate": 3.878639908235381e-05, "loss": 7.1094, "step": 14330 }, { "epoch": 0.33488828299694656, "grad_norm": 4.0625, "learning_rate": 3.877062091869792e-05, "loss": 7.0263, "step": 14340 }, { "epoch": 0.3351218173644479, "grad_norm": 3.765625, "learning_rate": 3.875483487688164e-05, "loss": 6.994, "step": 14350 }, { "epoch": 0.33535535173194925, "grad_norm": 4.4375, "learning_rate": 3.873904096593616e-05, "loss": 7.0709, "step": 14360 }, { "epoch": 0.3355888860994506, "grad_norm": 4.71875, "learning_rate": 3.872323919489714e-05, "loss": 6.9709, "step": 14370 }, { "epoch": 0.335822420466952, "grad_norm": 3.6875, "learning_rate": 3.8707429572804785e-05, "loss": 7.0767, "step": 14380 }, { "epoch": 0.33605595483445333, "grad_norm": 3.890625, "learning_rate": 3.8691612108703763e-05, "loss": 7.0146, "step": 14390 }, { "epoch": 0.33628948920195467, "grad_norm": 5.4375, "learning_rate": 3.8675786811643214e-05, "loss": 7.0729, "step": 14400 }, { "epoch": 0.336523023569456, "grad_norm": 4.75, "learning_rate": 3.8659953690676795e-05, "loss": 6.9985, "step": 14410 }, { "epoch": 0.3367565579369574, "grad_norm": 4.125, "learning_rate": 3.8644112754862614e-05, "loss": 7.0782, "step": 14420 }, { "epoch": 0.33699009230445875, "grad_norm": 5.0, "learning_rate": 3.862826401326326e-05, "loss": 7.0175, "step": 14430 }, { "epoch": 0.3372236266719601, "grad_norm": 5.03125, "learning_rate": 3.8612407474945775e-05, "loss": 7.0291, "step": 14440 }, { "epoch": 0.3374571610394615, "grad_norm": 4.4375, "learning_rate": 3.859654314898168e-05, "loss": 7.0094, "step": 14450 }, { "epoch": 0.33769069540696284, "grad_norm": 3.71875, "learning_rate": 3.858067104444694e-05, "loss": 7.0075, "step": 14460 }, { "epoch": 0.3379242297744642, "grad_norm": 4.75, "learning_rate": 3.8564791170421974e-05, "loss": 6.9627, "step": 14470 }, { "epoch": 0.3381577641419655, "grad_norm": 3.671875, "learning_rate": 3.854890353599164e-05, "loss": 7.0777, "step": 14480 }, { "epoch": 0.3383912985094669, "grad_norm": 3.90625, "learning_rate": 3.853300815024524e-05, "loss": 6.9602, "step": 14490 }, { "epoch": 0.33862483287696826, "grad_norm": 4.1875, "learning_rate": 3.851710502227651e-05, "loss": 7.0478, "step": 14500 }, { "epoch": 0.33862483287696826, "eval_loss": 7.041965007781982, "eval_runtime": 79.0417, "eval_samples_per_second": 12.652, "eval_steps_per_second": 12.652, "step": 14500 }, { "epoch": 0.3388583672444696, "grad_norm": 4.3125, "learning_rate": 3.850119416118363e-05, "loss": 7.0493, "step": 14510 }, { "epoch": 0.33909190161197095, "grad_norm": 4.25, "learning_rate": 3.848527557606917e-05, "loss": 7.0068, "step": 14520 }, { "epoch": 0.33932543597947235, "grad_norm": 5.34375, "learning_rate": 3.8469349276040145e-05, "loss": 7.0369, "step": 14530 }, { "epoch": 0.3395589703469737, "grad_norm": 4.1875, "learning_rate": 3.8453415270207984e-05, "loss": 7.0876, "step": 14540 }, { "epoch": 0.33979250471447503, "grad_norm": 4.375, "learning_rate": 3.843747356768852e-05, "loss": 7.0503, "step": 14550 }, { "epoch": 0.34002603908197643, "grad_norm": 4.625, "learning_rate": 3.8421524177601976e-05, "loss": 7.0109, "step": 14560 }, { "epoch": 0.34025957344947777, "grad_norm": 4.84375, "learning_rate": 3.840556710907298e-05, "loss": 7.0024, "step": 14570 }, { "epoch": 0.3404931078169791, "grad_norm": 5.09375, "learning_rate": 3.838960237123058e-05, "loss": 7.0987, "step": 14580 }, { "epoch": 0.34072664218448045, "grad_norm": 3.78125, "learning_rate": 3.8373629973208175e-05, "loss": 7.0459, "step": 14590 }, { "epoch": 0.34096017655198185, "grad_norm": 3.921875, "learning_rate": 3.8357649924143555e-05, "loss": 7.0325, "step": 14600 }, { "epoch": 0.3411937109194832, "grad_norm": 4.375, "learning_rate": 3.8341662233178896e-05, "loss": 7.0315, "step": 14610 }, { "epoch": 0.34142724528698454, "grad_norm": 4.6875, "learning_rate": 3.832566690946076e-05, "loss": 6.9594, "step": 14620 }, { "epoch": 0.3416607796544859, "grad_norm": 5.125, "learning_rate": 3.8309663962140024e-05, "loss": 6.9767, "step": 14630 }, { "epoch": 0.3418943140219873, "grad_norm": 5.25, "learning_rate": 3.829365340037199e-05, "loss": 7.0627, "step": 14640 }, { "epoch": 0.3421278483894886, "grad_norm": 4.90625, "learning_rate": 3.8277635233316265e-05, "loss": 7.0824, "step": 14650 }, { "epoch": 0.34236138275698996, "grad_norm": 4.3125, "learning_rate": 3.826160947013685e-05, "loss": 7.0877, "step": 14660 }, { "epoch": 0.34259491712449136, "grad_norm": 4.71875, "learning_rate": 3.824557612000205e-05, "loss": 7.0934, "step": 14670 }, { "epoch": 0.3428284514919927, "grad_norm": 3.484375, "learning_rate": 3.822953519208455e-05, "loss": 7.0397, "step": 14680 }, { "epoch": 0.34306198585949405, "grad_norm": 5.9375, "learning_rate": 3.8213486695561336e-05, "loss": 7.0425, "step": 14690 }, { "epoch": 0.3432955202269954, "grad_norm": 4.09375, "learning_rate": 3.819743063961374e-05, "loss": 6.9815, "step": 14700 }, { "epoch": 0.3435290545944968, "grad_norm": 5.25, "learning_rate": 3.8181367033427427e-05, "loss": 7.0314, "step": 14710 }, { "epoch": 0.34376258896199813, "grad_norm": 4.0625, "learning_rate": 3.816529588619236e-05, "loss": 7.176, "step": 14720 }, { "epoch": 0.34399612332949947, "grad_norm": 4.1875, "learning_rate": 3.814921720710285e-05, "loss": 6.9946, "step": 14730 }, { "epoch": 0.3442296576970008, "grad_norm": 5.15625, "learning_rate": 3.813313100535747e-05, "loss": 7.0439, "step": 14740 }, { "epoch": 0.3444631920645022, "grad_norm": 5.03125, "learning_rate": 3.811703729015914e-05, "loss": 7.0708, "step": 14750 }, { "epoch": 0.34469672643200355, "grad_norm": 4.875, "learning_rate": 3.810093607071505e-05, "loss": 7.1022, "step": 14760 }, { "epoch": 0.3449302607995049, "grad_norm": 4.625, "learning_rate": 3.8084827356236706e-05, "loss": 7.0683, "step": 14770 }, { "epoch": 0.34516379516700624, "grad_norm": 3.875, "learning_rate": 3.806871115593987e-05, "loss": 7.0358, "step": 14780 }, { "epoch": 0.34539732953450764, "grad_norm": 4.875, "learning_rate": 3.805258747904462e-05, "loss": 7.0351, "step": 14790 }, { "epoch": 0.345630863902009, "grad_norm": 3.828125, "learning_rate": 3.8036456334775306e-05, "loss": 7.1023, "step": 14800 }, { "epoch": 0.3458643982695103, "grad_norm": 4.5, "learning_rate": 3.802031773236052e-05, "loss": 6.9901, "step": 14810 }, { "epoch": 0.3460979326370117, "grad_norm": 4.75, "learning_rate": 3.800417168103315e-05, "loss": 6.9765, "step": 14820 }, { "epoch": 0.34633146700451306, "grad_norm": 4.71875, "learning_rate": 3.7988018190030356e-05, "loss": 7.0208, "step": 14830 }, { "epoch": 0.3465650013720144, "grad_norm": 4.25, "learning_rate": 3.797185726859352e-05, "loss": 7.0354, "step": 14840 }, { "epoch": 0.34679853573951575, "grad_norm": 3.703125, "learning_rate": 3.7955688925968294e-05, "loss": 7.0499, "step": 14850 }, { "epoch": 0.34703207010701714, "grad_norm": 4.75, "learning_rate": 3.7939513171404564e-05, "loss": 7.0466, "step": 14860 }, { "epoch": 0.3472656044745185, "grad_norm": 4.65625, "learning_rate": 3.792333001415648e-05, "loss": 7.0601, "step": 14870 }, { "epoch": 0.34749913884201983, "grad_norm": 4.125, "learning_rate": 3.79071394634824e-05, "loss": 7.0366, "step": 14880 }, { "epoch": 0.34773267320952117, "grad_norm": 4.90625, "learning_rate": 3.7890941528644943e-05, "loss": 7.0572, "step": 14890 }, { "epoch": 0.34796620757702257, "grad_norm": 5.0, "learning_rate": 3.787473621891091e-05, "loss": 7.1052, "step": 14900 }, { "epoch": 0.3481997419445239, "grad_norm": 3.890625, "learning_rate": 3.785852354355136e-05, "loss": 7.0739, "step": 14910 }, { "epoch": 0.34843327631202525, "grad_norm": 3.65625, "learning_rate": 3.784230351184155e-05, "loss": 7.0874, "step": 14920 }, { "epoch": 0.34866681067952665, "grad_norm": 4.53125, "learning_rate": 3.7826076133060936e-05, "loss": 7.0502, "step": 14930 }, { "epoch": 0.348900345047028, "grad_norm": 4.53125, "learning_rate": 3.7809841416493185e-05, "loss": 6.9635, "step": 14940 }, { "epoch": 0.34913387941452934, "grad_norm": 4.5, "learning_rate": 3.779359937142619e-05, "loss": 7.078, "step": 14950 }, { "epoch": 0.3493674137820307, "grad_norm": 4.71875, "learning_rate": 3.777735000715199e-05, "loss": 7.0009, "step": 14960 }, { "epoch": 0.3496009481495321, "grad_norm": 4.59375, "learning_rate": 3.776109333296683e-05, "loss": 7.0931, "step": 14970 }, { "epoch": 0.3498344825170334, "grad_norm": 5.375, "learning_rate": 3.774482935817116e-05, "loss": 7.0708, "step": 14980 }, { "epoch": 0.35006801688453476, "grad_norm": 5.34375, "learning_rate": 3.772855809206956e-05, "loss": 7.0305, "step": 14990 }, { "epoch": 0.3503015512520361, "grad_norm": 4.78125, "learning_rate": 3.771227954397082e-05, "loss": 7.0196, "step": 15000 }, { "epoch": 0.3503015512520361, "eval_loss": 7.039089679718018, "eval_runtime": 78.9245, "eval_samples_per_second": 12.67, "eval_steps_per_second": 12.67, "step": 15000 }, { "epoch": 0.3505350856195375, "grad_norm": 4.6875, "learning_rate": 3.769599372318788e-05, "loss": 6.9965, "step": 15010 }, { "epoch": 0.35076861998703884, "grad_norm": 4.5, "learning_rate": 3.7679700639037865e-05, "loss": 7.0086, "step": 15020 }, { "epoch": 0.3510021543545402, "grad_norm": 3.984375, "learning_rate": 3.7663400300841996e-05, "loss": 7.078, "step": 15030 }, { "epoch": 0.3512356887220416, "grad_norm": 3.6875, "learning_rate": 3.764709271792571e-05, "loss": 7.0551, "step": 15040 }, { "epoch": 0.3514692230895429, "grad_norm": 4.75, "learning_rate": 3.763077789961856e-05, "loss": 6.9877, "step": 15050 }, { "epoch": 0.35170275745704427, "grad_norm": 5.03125, "learning_rate": 3.761445585525422e-05, "loss": 6.9933, "step": 15060 }, { "epoch": 0.3519362918245456, "grad_norm": 4.5, "learning_rate": 3.759812659417054e-05, "loss": 7.0575, "step": 15070 }, { "epoch": 0.352169826192047, "grad_norm": 3.75, "learning_rate": 3.7581790125709473e-05, "loss": 7.0149, "step": 15080 }, { "epoch": 0.35240336055954835, "grad_norm": 3.765625, "learning_rate": 3.756544645921708e-05, "loss": 7.0228, "step": 15090 }, { "epoch": 0.3526368949270497, "grad_norm": 4.0, "learning_rate": 3.754909560404357e-05, "loss": 7.0085, "step": 15100 }, { "epoch": 0.35287042929455104, "grad_norm": 3.625, "learning_rate": 3.753273756954326e-05, "loss": 7.0185, "step": 15110 }, { "epoch": 0.35310396366205243, "grad_norm": 4.03125, "learning_rate": 3.751637236507457e-05, "loss": 7.0408, "step": 15120 }, { "epoch": 0.3533374980295538, "grad_norm": 5.46875, "learning_rate": 3.7500000000000003e-05, "loss": 7.0068, "step": 15130 }, { "epoch": 0.3535710323970551, "grad_norm": 3.78125, "learning_rate": 3.748362048368619e-05, "loss": 7.0923, "step": 15140 }, { "epoch": 0.3538045667645565, "grad_norm": 4.28125, "learning_rate": 3.7467233825503826e-05, "loss": 7.052, "step": 15150 }, { "epoch": 0.35403810113205786, "grad_norm": 4.6875, "learning_rate": 3.7450840034827704e-05, "loss": 7.0065, "step": 15160 }, { "epoch": 0.3542716354995592, "grad_norm": 4.3125, "learning_rate": 3.7434439121036714e-05, "loss": 7.0299, "step": 15170 }, { "epoch": 0.35450516986706054, "grad_norm": 3.859375, "learning_rate": 3.7418031093513784e-05, "loss": 7.0373, "step": 15180 }, { "epoch": 0.35473870423456194, "grad_norm": 4.09375, "learning_rate": 3.740161596164595e-05, "loss": 7.0333, "step": 15190 }, { "epoch": 0.3549722386020633, "grad_norm": 3.828125, "learning_rate": 3.738519373482429e-05, "loss": 6.9786, "step": 15200 }, { "epoch": 0.3552057729695646, "grad_norm": 4.6875, "learning_rate": 3.736876442244392e-05, "loss": 7.0499, "step": 15210 }, { "epoch": 0.35543930733706597, "grad_norm": 4.9375, "learning_rate": 3.735232803390408e-05, "loss": 6.9647, "step": 15220 }, { "epoch": 0.35567284170456737, "grad_norm": 3.9375, "learning_rate": 3.7335884578607995e-05, "loss": 7.0731, "step": 15230 }, { "epoch": 0.3559063760720687, "grad_norm": 4.5, "learning_rate": 3.7319434065962935e-05, "loss": 7.0401, "step": 15240 }, { "epoch": 0.35613991043957005, "grad_norm": 4.53125, "learning_rate": 3.730297650538024e-05, "loss": 7.0718, "step": 15250 }, { "epoch": 0.3563734448070714, "grad_norm": 3.375, "learning_rate": 3.728651190627527e-05, "loss": 7.0591, "step": 15260 }, { "epoch": 0.3566069791745728, "grad_norm": 3.96875, "learning_rate": 3.727004027806741e-05, "loss": 7.0773, "step": 15270 }, { "epoch": 0.35684051354207413, "grad_norm": 4.65625, "learning_rate": 3.725356163018004e-05, "loss": 7.036, "step": 15280 }, { "epoch": 0.3570740479095755, "grad_norm": 4.375, "learning_rate": 3.7237075972040605e-05, "loss": 6.9794, "step": 15290 }, { "epoch": 0.3573075822770769, "grad_norm": 4.78125, "learning_rate": 3.722058331308053e-05, "loss": 7.1081, "step": 15300 }, { "epoch": 0.3575411166445782, "grad_norm": 5.75, "learning_rate": 3.7204083662735246e-05, "loss": 7.0323, "step": 15310 }, { "epoch": 0.35777465101207956, "grad_norm": 3.90625, "learning_rate": 3.718757703044419e-05, "loss": 7.0878, "step": 15320 }, { "epoch": 0.3580081853795809, "grad_norm": 3.546875, "learning_rate": 3.71710634256508e-05, "loss": 7.0968, "step": 15330 }, { "epoch": 0.3582417197470823, "grad_norm": 4.78125, "learning_rate": 3.715454285780248e-05, "loss": 7.0128, "step": 15340 }, { "epoch": 0.35847525411458364, "grad_norm": 4.6875, "learning_rate": 3.713801533635065e-05, "loss": 7.0707, "step": 15350 }, { "epoch": 0.358708788482085, "grad_norm": 4.4375, "learning_rate": 3.712148087075067e-05, "loss": 7.0388, "step": 15360 }, { "epoch": 0.3589423228495863, "grad_norm": 3.84375, "learning_rate": 3.710493947046191e-05, "loss": 7.0214, "step": 15370 }, { "epoch": 0.3591758572170877, "grad_norm": 6.1875, "learning_rate": 3.708839114494769e-05, "loss": 7.0935, "step": 15380 }, { "epoch": 0.35940939158458907, "grad_norm": 4.78125, "learning_rate": 3.707183590367529e-05, "loss": 7.0578, "step": 15390 }, { "epoch": 0.3596429259520904, "grad_norm": 4.09375, "learning_rate": 3.7055273756115944e-05, "loss": 7.0502, "step": 15400 }, { "epoch": 0.3598764603195918, "grad_norm": 4.84375, "learning_rate": 3.703870471174486e-05, "loss": 7.0363, "step": 15410 }, { "epoch": 0.36010999468709315, "grad_norm": 3.828125, "learning_rate": 3.702212878004114e-05, "loss": 7.0716, "step": 15420 }, { "epoch": 0.3603435290545945, "grad_norm": 5.1875, "learning_rate": 3.70055459704879e-05, "loss": 7.0112, "step": 15430 }, { "epoch": 0.36057706342209583, "grad_norm": 5.46875, "learning_rate": 3.6988956292572126e-05, "loss": 6.9219, "step": 15440 }, { "epoch": 0.36081059778959723, "grad_norm": 4.34375, "learning_rate": 3.6972359755784764e-05, "loss": 7.0395, "step": 15450 }, { "epoch": 0.3610441321570986, "grad_norm": 4.1875, "learning_rate": 3.695575636962068e-05, "loss": 7.0117, "step": 15460 }, { "epoch": 0.3612776665245999, "grad_norm": 4.75, "learning_rate": 3.693914614357865e-05, "loss": 7.0896, "step": 15470 }, { "epoch": 0.36151120089210126, "grad_norm": 4.4375, "learning_rate": 3.692252908716139e-05, "loss": 7.0335, "step": 15480 }, { "epoch": 0.36174473525960266, "grad_norm": 3.875, "learning_rate": 3.690590520987548e-05, "loss": 7.0495, "step": 15490 }, { "epoch": 0.361978269627104, "grad_norm": 3.5, "learning_rate": 3.688927452123144e-05, "loss": 7.0163, "step": 15500 }, { "epoch": 0.361978269627104, "eval_loss": 7.0349955558776855, "eval_runtime": 78.7463, "eval_samples_per_second": 12.699, "eval_steps_per_second": 12.699, "step": 15500 }, { "epoch": 0.36221180399460534, "grad_norm": 4.875, "learning_rate": 3.687263703074367e-05, "loss": 7.0404, "step": 15510 }, { "epoch": 0.36244533836210674, "grad_norm": 5.53125, "learning_rate": 3.685599274793046e-05, "loss": 7.0495, "step": 15520 }, { "epoch": 0.3626788727296081, "grad_norm": 3.8125, "learning_rate": 3.683934168231399e-05, "loss": 7.0249, "step": 15530 }, { "epoch": 0.3629124070971094, "grad_norm": 4.59375, "learning_rate": 3.682268384342033e-05, "loss": 7.081, "step": 15540 }, { "epoch": 0.36314594146461077, "grad_norm": 4.25, "learning_rate": 3.68060192407794e-05, "loss": 7.0313, "step": 15550 }, { "epoch": 0.36337947583211216, "grad_norm": 3.796875, "learning_rate": 3.678934788392502e-05, "loss": 7.0067, "step": 15560 }, { "epoch": 0.3636130101996135, "grad_norm": 3.609375, "learning_rate": 3.677266978239484e-05, "loss": 6.9921, "step": 15570 }, { "epoch": 0.36384654456711485, "grad_norm": 4.375, "learning_rate": 3.675598494573041e-05, "loss": 7.0649, "step": 15580 }, { "epoch": 0.3640800789346162, "grad_norm": 5.0, "learning_rate": 3.6739293383477094e-05, "loss": 7.0632, "step": 15590 }, { "epoch": 0.3643136133021176, "grad_norm": 3.640625, "learning_rate": 3.672259510518414e-05, "loss": 7.054, "step": 15600 }, { "epoch": 0.36454714766961893, "grad_norm": 4.5, "learning_rate": 3.670589012040459e-05, "loss": 7.0125, "step": 15610 }, { "epoch": 0.3647806820371203, "grad_norm": 4.1875, "learning_rate": 3.668917843869538e-05, "loss": 7.0061, "step": 15620 }, { "epoch": 0.36501421640462167, "grad_norm": 3.5625, "learning_rate": 3.667246006961722e-05, "loss": 7.0119, "step": 15630 }, { "epoch": 0.365247750772123, "grad_norm": 4.625, "learning_rate": 3.66557350227347e-05, "loss": 6.9915, "step": 15640 }, { "epoch": 0.36548128513962436, "grad_norm": 4.1875, "learning_rate": 3.663900330761619e-05, "loss": 6.9622, "step": 15650 }, { "epoch": 0.3657148195071257, "grad_norm": 3.984375, "learning_rate": 3.662226493383391e-05, "loss": 6.9263, "step": 15660 }, { "epoch": 0.3659483538746271, "grad_norm": 4.03125, "learning_rate": 3.660551991096383e-05, "loss": 7.0317, "step": 15670 }, { "epoch": 0.36618188824212844, "grad_norm": 3.484375, "learning_rate": 3.65887682485858e-05, "loss": 7.0721, "step": 15680 }, { "epoch": 0.3664154226096298, "grad_norm": 4.09375, "learning_rate": 3.657200995628342e-05, "loss": 7.0276, "step": 15690 }, { "epoch": 0.3666489569771311, "grad_norm": 4.0, "learning_rate": 3.655524504364409e-05, "loss": 6.9833, "step": 15700 }, { "epoch": 0.3668824913446325, "grad_norm": 3.953125, "learning_rate": 3.653847352025901e-05, "loss": 6.9942, "step": 15710 }, { "epoch": 0.36711602571213386, "grad_norm": 4.90625, "learning_rate": 3.652169539572315e-05, "loss": 7.055, "step": 15720 }, { "epoch": 0.3673495600796352, "grad_norm": 3.828125, "learning_rate": 3.650491067963525e-05, "loss": 7.0381, "step": 15730 }, { "epoch": 0.36758309444713655, "grad_norm": 4.09375, "learning_rate": 3.648811938159785e-05, "loss": 6.9702, "step": 15740 }, { "epoch": 0.36781662881463795, "grad_norm": 5.25, "learning_rate": 3.647132151121723e-05, "loss": 6.9671, "step": 15750 }, { "epoch": 0.3680501631821393, "grad_norm": 4.84375, "learning_rate": 3.6454517078103425e-05, "loss": 7.1368, "step": 15760 }, { "epoch": 0.36828369754964063, "grad_norm": 3.609375, "learning_rate": 3.6437706091870266e-05, "loss": 7.0542, "step": 15770 }, { "epoch": 0.36851723191714203, "grad_norm": 4.5625, "learning_rate": 3.642088856213528e-05, "loss": 7.0234, "step": 15780 }, { "epoch": 0.36875076628464337, "grad_norm": 4.46875, "learning_rate": 3.6404064498519765e-05, "loss": 7.0099, "step": 15790 }, { "epoch": 0.3689843006521447, "grad_norm": 4.125, "learning_rate": 3.638723391064875e-05, "loss": 7.021, "step": 15800 }, { "epoch": 0.36921783501964606, "grad_norm": 4.0, "learning_rate": 3.637039680815101e-05, "loss": 7.0191, "step": 15810 }, { "epoch": 0.36945136938714745, "grad_norm": 4.40625, "learning_rate": 3.635355320065903e-05, "loss": 6.9853, "step": 15820 }, { "epoch": 0.3696849037546488, "grad_norm": 5.15625, "learning_rate": 3.6336703097809024e-05, "loss": 6.9919, "step": 15830 }, { "epoch": 0.36991843812215014, "grad_norm": 4.28125, "learning_rate": 3.631984650924094e-05, "loss": 7.0879, "step": 15840 }, { "epoch": 0.3701519724896515, "grad_norm": 4.53125, "learning_rate": 3.630298344459838e-05, "loss": 6.911, "step": 15850 }, { "epoch": 0.3703855068571529, "grad_norm": 4.96875, "learning_rate": 3.628611391352872e-05, "loss": 7.0097, "step": 15860 }, { "epoch": 0.3706190412246542, "grad_norm": 4.1875, "learning_rate": 3.6269237925682995e-05, "loss": 7.0528, "step": 15870 }, { "epoch": 0.37085257559215556, "grad_norm": 4.625, "learning_rate": 3.625235549071593e-05, "loss": 7.0454, "step": 15880 }, { "epoch": 0.37108610995965696, "grad_norm": 4.03125, "learning_rate": 3.6235466618285977e-05, "loss": 7.0226, "step": 15890 }, { "epoch": 0.3713196443271583, "grad_norm": 4.34375, "learning_rate": 3.621857131805522e-05, "loss": 6.9688, "step": 15900 }, { "epoch": 0.37155317869465965, "grad_norm": 5.125, "learning_rate": 3.6201669599689465e-05, "loss": 7.0629, "step": 15910 }, { "epoch": 0.371786713062161, "grad_norm": 5.1875, "learning_rate": 3.618476147285816e-05, "loss": 7.0291, "step": 15920 }, { "epoch": 0.3720202474296624, "grad_norm": 3.859375, "learning_rate": 3.616784694723444e-05, "loss": 7.0401, "step": 15930 }, { "epoch": 0.37225378179716373, "grad_norm": 4.1875, "learning_rate": 3.615092603249507e-05, "loss": 7.0627, "step": 15940 }, { "epoch": 0.37248731616466507, "grad_norm": 3.4375, "learning_rate": 3.6133998738320515e-05, "loss": 7.1126, "step": 15950 }, { "epoch": 0.3727208505321664, "grad_norm": 4.28125, "learning_rate": 3.6117065074394854e-05, "loss": 6.9996, "step": 15960 }, { "epoch": 0.3729543848996678, "grad_norm": 4.71875, "learning_rate": 3.610012505040581e-05, "loss": 6.9356, "step": 15970 }, { "epoch": 0.37318791926716915, "grad_norm": 4.21875, "learning_rate": 3.6083178676044785e-05, "loss": 6.9576, "step": 15980 }, { "epoch": 0.3734214536346705, "grad_norm": 4.6875, "learning_rate": 3.6066225961006765e-05, "loss": 7.0751, "step": 15990 }, { "epoch": 0.3736549880021719, "grad_norm": 3.921875, "learning_rate": 3.604926691499038e-05, "loss": 7.0196, "step": 16000 }, { "epoch": 0.3736549880021719, "eval_loss": 7.029934406280518, "eval_runtime": 79.0474, "eval_samples_per_second": 12.651, "eval_steps_per_second": 12.651, "step": 16000 }, { "epoch": 0.37388852236967324, "grad_norm": 4.3125, "learning_rate": 3.60323015476979e-05, "loss": 7.031, "step": 16010 }, { "epoch": 0.3741220567371746, "grad_norm": 4.8125, "learning_rate": 3.601532986883518e-05, "loss": 7.0243, "step": 16020 }, { "epoch": 0.3743555911046759, "grad_norm": 4.125, "learning_rate": 3.5998351888111724e-05, "loss": 7.0304, "step": 16030 }, { "epoch": 0.3745891254721773, "grad_norm": 3.890625, "learning_rate": 3.59813676152406e-05, "loss": 7.0658, "step": 16040 }, { "epoch": 0.37482265983967866, "grad_norm": 8.25, "learning_rate": 3.596437705993852e-05, "loss": 7.0969, "step": 16050 }, { "epoch": 0.37505619420718, "grad_norm": 5.09375, "learning_rate": 3.594738023192574e-05, "loss": 7.0048, "step": 16060 }, { "epoch": 0.37528972857468135, "grad_norm": 4.65625, "learning_rate": 3.5930377140926145e-05, "loss": 6.9642, "step": 16070 }, { "epoch": 0.37552326294218275, "grad_norm": 4.5625, "learning_rate": 3.591336779666718e-05, "loss": 6.9992, "step": 16080 }, { "epoch": 0.3757567973096841, "grad_norm": 5.125, "learning_rate": 3.589635220887988e-05, "loss": 7.0616, "step": 16090 }, { "epoch": 0.37599033167718543, "grad_norm": 4.03125, "learning_rate": 3.587933038729886e-05, "loss": 7.0514, "step": 16100 }, { "epoch": 0.3762238660446868, "grad_norm": 4.375, "learning_rate": 3.586230234166227e-05, "loss": 7.0547, "step": 16110 }, { "epoch": 0.37645740041218817, "grad_norm": 4.875, "learning_rate": 3.584526808171186e-05, "loss": 7.0271, "step": 16120 }, { "epoch": 0.3766909347796895, "grad_norm": 5.03125, "learning_rate": 3.582822761719289e-05, "loss": 6.9727, "step": 16130 }, { "epoch": 0.37692446914719085, "grad_norm": 5.03125, "learning_rate": 3.5811180957854224e-05, "loss": 6.9901, "step": 16140 }, { "epoch": 0.37715800351469225, "grad_norm": 4.46875, "learning_rate": 3.5794128113448225e-05, "loss": 7.0212, "step": 16150 }, { "epoch": 0.3773915378821936, "grad_norm": 4.09375, "learning_rate": 3.577706909373081e-05, "loss": 7.0258, "step": 16160 }, { "epoch": 0.37762507224969494, "grad_norm": 4.65625, "learning_rate": 3.576000390846143e-05, "loss": 7.0069, "step": 16170 }, { "epoch": 0.3778586066171963, "grad_norm": 3.796875, "learning_rate": 3.574293256740307e-05, "loss": 7.0046, "step": 16180 }, { "epoch": 0.3780921409846977, "grad_norm": 3.8125, "learning_rate": 3.5725855080322217e-05, "loss": 7.0604, "step": 16190 }, { "epoch": 0.378325675352199, "grad_norm": 3.625, "learning_rate": 3.5708771456988905e-05, "loss": 7.0163, "step": 16200 }, { "epoch": 0.37855920971970036, "grad_norm": 4.875, "learning_rate": 3.5691681707176644e-05, "loss": 7.0254, "step": 16210 }, { "epoch": 0.37879274408720176, "grad_norm": 3.875, "learning_rate": 3.567458584066247e-05, "loss": 6.9966, "step": 16220 }, { "epoch": 0.3790262784547031, "grad_norm": 5.09375, "learning_rate": 3.565748386722691e-05, "loss": 7.0325, "step": 16230 }, { "epoch": 0.37925981282220445, "grad_norm": 4.75, "learning_rate": 3.5640375796654006e-05, "loss": 7.0524, "step": 16240 }, { "epoch": 0.3794933471897058, "grad_norm": 4.6875, "learning_rate": 3.562326163873125e-05, "loss": 7.0209, "step": 16250 }, { "epoch": 0.3797268815572072, "grad_norm": 5.15625, "learning_rate": 3.560614140324964e-05, "loss": 7.0564, "step": 16260 }, { "epoch": 0.37996041592470853, "grad_norm": 4.6875, "learning_rate": 3.5589015100003666e-05, "loss": 6.9606, "step": 16270 }, { "epoch": 0.38019395029220987, "grad_norm": 3.5, "learning_rate": 3.5571882738791245e-05, "loss": 7.0012, "step": 16280 }, { "epoch": 0.3804274846597112, "grad_norm": 3.984375, "learning_rate": 3.5554744329413804e-05, "loss": 7.0378, "step": 16290 }, { "epoch": 0.3806610190272126, "grad_norm": 4.25, "learning_rate": 3.55375998816762e-05, "loss": 7.0768, "step": 16300 }, { "epoch": 0.38089455339471395, "grad_norm": 4.0625, "learning_rate": 3.552044940538677e-05, "loss": 7.0619, "step": 16310 }, { "epoch": 0.3811280877622153, "grad_norm": 4.59375, "learning_rate": 3.550329291035728e-05, "loss": 7.0292, "step": 16320 }, { "epoch": 0.38136162212971664, "grad_norm": 4.84375, "learning_rate": 3.5486130406402946e-05, "loss": 7.0516, "step": 16330 }, { "epoch": 0.38159515649721804, "grad_norm": 4.0, "learning_rate": 3.5468961903342426e-05, "loss": 7.0457, "step": 16340 }, { "epoch": 0.3818286908647194, "grad_norm": 4.4375, "learning_rate": 3.54517874109978e-05, "loss": 7.1227, "step": 16350 }, { "epoch": 0.3820622252322207, "grad_norm": 4.375, "learning_rate": 3.543460693919458e-05, "loss": 7.0094, "step": 16360 }, { "epoch": 0.3822957595997221, "grad_norm": 4.59375, "learning_rate": 3.54174204977617e-05, "loss": 7.079, "step": 16370 }, { "epoch": 0.38252929396722346, "grad_norm": 4.46875, "learning_rate": 3.5400228096531504e-05, "loss": 6.9842, "step": 16380 }, { "epoch": 0.3827628283347248, "grad_norm": 4.53125, "learning_rate": 3.5383029745339767e-05, "loss": 7.0227, "step": 16390 }, { "epoch": 0.38299636270222615, "grad_norm": 5.53125, "learning_rate": 3.536582545402564e-05, "loss": 7.0233, "step": 16400 }, { "epoch": 0.38322989706972754, "grad_norm": 5.3125, "learning_rate": 3.534861523243168e-05, "loss": 6.8674, "step": 16410 }, { "epoch": 0.3834634314372289, "grad_norm": 5.6875, "learning_rate": 3.533139909040385e-05, "loss": 6.9977, "step": 16420 }, { "epoch": 0.38369696580473023, "grad_norm": 4.5, "learning_rate": 3.531417703779147e-05, "loss": 7.047, "step": 16430 }, { "epoch": 0.38393050017223157, "grad_norm": 5.59375, "learning_rate": 3.5296949084447286e-05, "loss": 7.0548, "step": 16440 }, { "epoch": 0.38416403453973297, "grad_norm": 4.5, "learning_rate": 3.527971524022738e-05, "loss": 7.0915, "step": 16450 }, { "epoch": 0.3843975689072343, "grad_norm": 3.765625, "learning_rate": 3.5262475514991216e-05, "loss": 7.0579, "step": 16460 }, { "epoch": 0.38463110327473565, "grad_norm": 4.5625, "learning_rate": 3.524522991860164e-05, "loss": 7.0032, "step": 16470 }, { "epoch": 0.38486463764223705, "grad_norm": 4.75, "learning_rate": 3.5227978460924846e-05, "loss": 7.0227, "step": 16480 }, { "epoch": 0.3850981720097384, "grad_norm": 4.5625, "learning_rate": 3.521072115183035e-05, "loss": 7.0036, "step": 16490 }, { "epoch": 0.38533170637723974, "grad_norm": 4.875, "learning_rate": 3.519345800119108e-05, "loss": 7.0835, "step": 16500 }, { "epoch": 0.38533170637723974, "eval_loss": 7.026010513305664, "eval_runtime": 78.7963, "eval_samples_per_second": 12.691, "eval_steps_per_second": 12.691, "step": 16500 }, { "epoch": 0.3855652407447411, "grad_norm": 4.3125, "learning_rate": 3.517618901888324e-05, "loss": 7.0021, "step": 16510 }, { "epoch": 0.3857987751122425, "grad_norm": 4.6875, "learning_rate": 3.515891421478641e-05, "loss": 7.0409, "step": 16520 }, { "epoch": 0.3860323094797438, "grad_norm": 3.875, "learning_rate": 3.5141633598783494e-05, "loss": 7.0361, "step": 16530 }, { "epoch": 0.38626584384724516, "grad_norm": 3.59375, "learning_rate": 3.512434718076072e-05, "loss": 6.9909, "step": 16540 }, { "epoch": 0.3864993782147465, "grad_norm": 4.71875, "learning_rate": 3.510705497060762e-05, "loss": 7.002, "step": 16550 }, { "epoch": 0.3867329125822479, "grad_norm": 4.5, "learning_rate": 3.5089756978217084e-05, "loss": 7.034, "step": 16560 }, { "epoch": 0.38696644694974924, "grad_norm": 4.125, "learning_rate": 3.5072453213485233e-05, "loss": 7.1009, "step": 16570 }, { "epoch": 0.3871999813172506, "grad_norm": 4.6875, "learning_rate": 3.505514368631156e-05, "loss": 7.0227, "step": 16580 }, { "epoch": 0.387433515684752, "grad_norm": 4.46875, "learning_rate": 3.5037828406598835e-05, "loss": 7.0587, "step": 16590 }, { "epoch": 0.3876670500522533, "grad_norm": 4.9375, "learning_rate": 3.5020507384253116e-05, "loss": 7.0105, "step": 16600 }, { "epoch": 0.38790058441975467, "grad_norm": 4.53125, "learning_rate": 3.5003180629183726e-05, "loss": 7.0269, "step": 16610 }, { "epoch": 0.388134118787256, "grad_norm": 4.53125, "learning_rate": 3.4985848151303304e-05, "loss": 7.0731, "step": 16620 }, { "epoch": 0.3883676531547574, "grad_norm": 5.40625, "learning_rate": 3.4968509960527746e-05, "loss": 7.007, "step": 16630 }, { "epoch": 0.38860118752225875, "grad_norm": 4.1875, "learning_rate": 3.49511660667762e-05, "loss": 7.0275, "step": 16640 }, { "epoch": 0.3888347218897601, "grad_norm": 3.734375, "learning_rate": 3.493381647997111e-05, "loss": 6.9982, "step": 16650 }, { "epoch": 0.38906825625726144, "grad_norm": 3.734375, "learning_rate": 3.4916461210038154e-05, "loss": 7.0518, "step": 16660 }, { "epoch": 0.38930179062476283, "grad_norm": 3.578125, "learning_rate": 3.4899100266906254e-05, "loss": 7.0393, "step": 16670 }, { "epoch": 0.3895353249922642, "grad_norm": 5.0625, "learning_rate": 3.488173366050761e-05, "loss": 7.0486, "step": 16680 }, { "epoch": 0.3897688593597655, "grad_norm": 5.125, "learning_rate": 3.486436140077764e-05, "loss": 6.9939, "step": 16690 }, { "epoch": 0.3900023937272669, "grad_norm": 3.46875, "learning_rate": 3.484698349765499e-05, "loss": 7.0549, "step": 16700 }, { "epoch": 0.39023592809476826, "grad_norm": 5.15625, "learning_rate": 3.482959996108155e-05, "loss": 7.0832, "step": 16710 }, { "epoch": 0.3904694624622696, "grad_norm": 3.921875, "learning_rate": 3.481221080100242e-05, "loss": 7.0458, "step": 16720 }, { "epoch": 0.39070299682977094, "grad_norm": 4.15625, "learning_rate": 3.4794816027365925e-05, "loss": 7.0546, "step": 16730 }, { "epoch": 0.39093653119727234, "grad_norm": 4.53125, "learning_rate": 3.47774156501236e-05, "loss": 7.0516, "step": 16740 }, { "epoch": 0.3911700655647737, "grad_norm": 4.40625, "learning_rate": 3.47600096792302e-05, "loss": 7.0276, "step": 16750 }, { "epoch": 0.391403599932275, "grad_norm": 4.21875, "learning_rate": 3.4742598124643646e-05, "loss": 7.0544, "step": 16760 }, { "epoch": 0.39163713429977637, "grad_norm": 3.828125, "learning_rate": 3.472518099632508e-05, "loss": 7.0634, "step": 16770 }, { "epoch": 0.39187066866727777, "grad_norm": 5.8125, "learning_rate": 3.470775830423883e-05, "loss": 7.0237, "step": 16780 }, { "epoch": 0.3921042030347791, "grad_norm": 3.15625, "learning_rate": 3.46903300583524e-05, "loss": 7.0394, "step": 16790 }, { "epoch": 0.39233773740228045, "grad_norm": 4.5625, "learning_rate": 3.467289626863647e-05, "loss": 7.0342, "step": 16800 }, { "epoch": 0.3925712717697818, "grad_norm": 4.15625, "learning_rate": 3.4655456945064915e-05, "loss": 7.047, "step": 16810 }, { "epoch": 0.3928048061372832, "grad_norm": 4.3125, "learning_rate": 3.4638012097614735e-05, "loss": 7.0246, "step": 16820 }, { "epoch": 0.39303834050478453, "grad_norm": 5.28125, "learning_rate": 3.4620561736266115e-05, "loss": 7.0099, "step": 16830 }, { "epoch": 0.3932718748722859, "grad_norm": 5.03125, "learning_rate": 3.460310587100241e-05, "loss": 6.932, "step": 16840 }, { "epoch": 0.3935054092397873, "grad_norm": 5.03125, "learning_rate": 3.458564451181009e-05, "loss": 6.9617, "step": 16850 }, { "epoch": 0.3937389436072886, "grad_norm": 3.8125, "learning_rate": 3.456817766867877e-05, "loss": 7.016, "step": 16860 }, { "epoch": 0.39397247797478996, "grad_norm": 3.96875, "learning_rate": 3.455070535160126e-05, "loss": 6.9113, "step": 16870 }, { "epoch": 0.3942060123422913, "grad_norm": 4.0, "learning_rate": 3.4533227570573405e-05, "loss": 7.0844, "step": 16880 }, { "epoch": 0.3944395467097927, "grad_norm": 4.875, "learning_rate": 3.451574433559426e-05, "loss": 6.9957, "step": 16890 }, { "epoch": 0.39467308107729404, "grad_norm": 4.84375, "learning_rate": 3.449825565666597e-05, "loss": 7.041, "step": 16900 }, { "epoch": 0.3949066154447954, "grad_norm": 4.0, "learning_rate": 3.448076154379378e-05, "loss": 7.036, "step": 16910 }, { "epoch": 0.3951401498122967, "grad_norm": 3.53125, "learning_rate": 3.4463262006986065e-05, "loss": 7.0052, "step": 16920 }, { "epoch": 0.3953736841797981, "grad_norm": 4.59375, "learning_rate": 3.444575705625429e-05, "loss": 7.0173, "step": 16930 }, { "epoch": 0.39560721854729947, "grad_norm": 5.3125, "learning_rate": 3.442824670161302e-05, "loss": 6.9775, "step": 16940 }, { "epoch": 0.3958407529148008, "grad_norm": 4.09375, "learning_rate": 3.441073095307992e-05, "loss": 6.9556, "step": 16950 }, { "epoch": 0.3960742872823022, "grad_norm": 4.4375, "learning_rate": 3.439320982067574e-05, "loss": 6.9682, "step": 16960 }, { "epoch": 0.39630782164980355, "grad_norm": 4.78125, "learning_rate": 3.437568331442429e-05, "loss": 7.0454, "step": 16970 }, { "epoch": 0.3965413560173049, "grad_norm": 5.21875, "learning_rate": 3.435815144435248e-05, "loss": 7.0236, "step": 16980 }, { "epoch": 0.39677489038480623, "grad_norm": 3.71875, "learning_rate": 3.4340614220490285e-05, "loss": 7.0281, "step": 16990 }, { "epoch": 0.39700842475230763, "grad_norm": 4.875, "learning_rate": 3.4323071652870706e-05, "loss": 7.0292, "step": 17000 }, { "epoch": 0.39700842475230763, "eval_loss": 7.025566577911377, "eval_runtime": 78.7869, "eval_samples_per_second": 12.692, "eval_steps_per_second": 12.692, "step": 17000 }, { "epoch": 0.397241959119809, "grad_norm": 4.59375, "learning_rate": 3.430552375152987e-05, "loss": 6.9872, "step": 17010 }, { "epoch": 0.3974754934873103, "grad_norm": 4.46875, "learning_rate": 3.428797052650688e-05, "loss": 7.1328, "step": 17020 }, { "epoch": 0.39770902785481166, "grad_norm": 4.40625, "learning_rate": 3.427041198784395e-05, "loss": 6.9809, "step": 17030 }, { "epoch": 0.39794256222231306, "grad_norm": 4.1875, "learning_rate": 3.425284814558628e-05, "loss": 7.0237, "step": 17040 }, { "epoch": 0.3981760965898144, "grad_norm": 4.34375, "learning_rate": 3.423527900978215e-05, "loss": 7.0359, "step": 17050 }, { "epoch": 0.39840963095731574, "grad_norm": 4.78125, "learning_rate": 3.4217704590482836e-05, "loss": 6.9606, "step": 17060 }, { "epoch": 0.39864316532481714, "grad_norm": 4.46875, "learning_rate": 3.420012489774265e-05, "loss": 7.0171, "step": 17070 }, { "epoch": 0.3988766996923185, "grad_norm": 4.25, "learning_rate": 3.418253994161892e-05, "loss": 7.0538, "step": 17080 }, { "epoch": 0.3991102340598198, "grad_norm": 4.71875, "learning_rate": 3.416494973217199e-05, "loss": 7.0396, "step": 17090 }, { "epoch": 0.39934376842732117, "grad_norm": 4.3125, "learning_rate": 3.414735427946519e-05, "loss": 6.9944, "step": 17100 }, { "epoch": 0.39957730279482256, "grad_norm": 4.53125, "learning_rate": 3.412975359356488e-05, "loss": 7.0634, "step": 17110 }, { "epoch": 0.3998108371623239, "grad_norm": 4.1875, "learning_rate": 3.4112147684540386e-05, "loss": 7.0738, "step": 17120 }, { "epoch": 0.40004437152982525, "grad_norm": 3.84375, "learning_rate": 3.409453656246403e-05, "loss": 7.0054, "step": 17130 }, { "epoch": 0.4002779058973266, "grad_norm": 5.90625, "learning_rate": 3.407692023741114e-05, "loss": 6.9616, "step": 17140 }, { "epoch": 0.400511440264828, "grad_norm": 4.8125, "learning_rate": 3.4059298719459976e-05, "loss": 7.0398, "step": 17150 }, { "epoch": 0.40074497463232933, "grad_norm": 4.46875, "learning_rate": 3.4041672018691794e-05, "loss": 7.0089, "step": 17160 }, { "epoch": 0.4009785089998307, "grad_norm": 4.21875, "learning_rate": 3.402404014519084e-05, "loss": 7.1132, "step": 17170 }, { "epoch": 0.40121204336733207, "grad_norm": 4.09375, "learning_rate": 3.400640310904427e-05, "loss": 7.0922, "step": 17180 }, { "epoch": 0.4014455777348334, "grad_norm": 4.6875, "learning_rate": 3.3988760920342225e-05, "loss": 6.9997, "step": 17190 }, { "epoch": 0.40167911210233476, "grad_norm": 4.1875, "learning_rate": 3.3971113589177795e-05, "loss": 7.0743, "step": 17200 }, { "epoch": 0.4019126464698361, "grad_norm": 4.625, "learning_rate": 3.395346112564699e-05, "loss": 7.0252, "step": 17210 }, { "epoch": 0.4021461808373375, "grad_norm": 5.25, "learning_rate": 3.393580353984877e-05, "loss": 7.0916, "step": 17220 }, { "epoch": 0.40237971520483884, "grad_norm": 4.59375, "learning_rate": 3.391814084188502e-05, "loss": 7.0737, "step": 17230 }, { "epoch": 0.4026132495723402, "grad_norm": 3.9375, "learning_rate": 3.390047304186057e-05, "loss": 6.9209, "step": 17240 }, { "epoch": 0.4028467839398415, "grad_norm": 5.375, "learning_rate": 3.3882800149883144e-05, "loss": 6.9733, "step": 17250 }, { "epoch": 0.4030803183073429, "grad_norm": 4.34375, "learning_rate": 3.386512217606339e-05, "loss": 6.9729, "step": 17260 }, { "epoch": 0.40331385267484426, "grad_norm": 4.21875, "learning_rate": 3.3847439130514866e-05, "loss": 7.0692, "step": 17270 }, { "epoch": 0.4035473870423456, "grad_norm": 4.5625, "learning_rate": 3.382975102335402e-05, "loss": 6.9809, "step": 17280 }, { "epoch": 0.40378092140984695, "grad_norm": 4.5, "learning_rate": 3.38120578647002e-05, "loss": 6.9784, "step": 17290 }, { "epoch": 0.40401445577734835, "grad_norm": 5.09375, "learning_rate": 3.3794359664675655e-05, "loss": 7.0823, "step": 17300 }, { "epoch": 0.4042479901448497, "grad_norm": 4.4375, "learning_rate": 3.3776656433405506e-05, "loss": 7.0673, "step": 17310 }, { "epoch": 0.40448152451235103, "grad_norm": 6.03125, "learning_rate": 3.375894818101776e-05, "loss": 6.9996, "step": 17320 }, { "epoch": 0.40471505887985243, "grad_norm": 5.25, "learning_rate": 3.3741234917643295e-05, "loss": 7.009, "step": 17330 }, { "epoch": 0.40494859324735377, "grad_norm": 4.3125, "learning_rate": 3.3723516653415846e-05, "loss": 6.9943, "step": 17340 }, { "epoch": 0.4051821276148551, "grad_norm": 4.90625, "learning_rate": 3.370579339847201e-05, "loss": 7.0327, "step": 17350 }, { "epoch": 0.40541566198235646, "grad_norm": 4.34375, "learning_rate": 3.368806516295127e-05, "loss": 7.0068, "step": 17360 }, { "epoch": 0.40564919634985785, "grad_norm": 3.953125, "learning_rate": 3.367033195699591e-05, "loss": 6.9617, "step": 17370 }, { "epoch": 0.4058827307173592, "grad_norm": 4.125, "learning_rate": 3.3652593790751095e-05, "loss": 7.0387, "step": 17380 }, { "epoch": 0.40611626508486054, "grad_norm": 3.921875, "learning_rate": 3.3634850674364815e-05, "loss": 7.0643, "step": 17390 }, { "epoch": 0.4063497994523619, "grad_norm": 3.859375, "learning_rate": 3.3617102617987885e-05, "loss": 7.05, "step": 17400 }, { "epoch": 0.4065833338198633, "grad_norm": 5.0625, "learning_rate": 3.359934963177396e-05, "loss": 7.0046, "step": 17410 }, { "epoch": 0.4068168681873646, "grad_norm": 3.828125, "learning_rate": 3.35815917258795e-05, "loss": 7.0201, "step": 17420 }, { "epoch": 0.40705040255486596, "grad_norm": 4.5, "learning_rate": 3.356382891046379e-05, "loss": 7.0835, "step": 17430 }, { "epoch": 0.40728393692236736, "grad_norm": 4.34375, "learning_rate": 3.3546061195688925e-05, "loss": 7.0335, "step": 17440 }, { "epoch": 0.4075174712898687, "grad_norm": 5.5625, "learning_rate": 3.3528288591719805e-05, "loss": 7.0292, "step": 17450 }, { "epoch": 0.40775100565737005, "grad_norm": 4.3125, "learning_rate": 3.3510511108724106e-05, "loss": 7.0347, "step": 17460 }, { "epoch": 0.4079845400248714, "grad_norm": 4.53125, "learning_rate": 3.349272875687232e-05, "loss": 7.1018, "step": 17470 }, { "epoch": 0.4082180743923728, "grad_norm": 4.3125, "learning_rate": 3.347494154633772e-05, "loss": 7.0494, "step": 17480 }, { "epoch": 0.40845160875987413, "grad_norm": 3.765625, "learning_rate": 3.345714948729634e-05, "loss": 7.0489, "step": 17490 }, { "epoch": 0.40868514312737547, "grad_norm": 4.4375, "learning_rate": 3.343935258992701e-05, "loss": 7.0629, "step": 17500 }, { "epoch": 0.40868514312737547, "eval_loss": 7.0206522941589355, "eval_runtime": 78.803, "eval_samples_per_second": 12.69, "eval_steps_per_second": 12.69, "step": 17500 }, { "epoch": 0.4089186774948768, "grad_norm": 4.59375, "learning_rate": 3.3421550864411334e-05, "loss": 7.057, "step": 17510 }, { "epoch": 0.4091522118623782, "grad_norm": 3.71875, "learning_rate": 3.340374432093363e-05, "loss": 6.9815, "step": 17520 }, { "epoch": 0.40938574622987955, "grad_norm": 5.375, "learning_rate": 3.338593296968103e-05, "loss": 7.027, "step": 17530 }, { "epoch": 0.4096192805973809, "grad_norm": 3.3125, "learning_rate": 3.33681168208434e-05, "loss": 6.9688, "step": 17540 }, { "epoch": 0.4098528149648823, "grad_norm": 4.3125, "learning_rate": 3.335029588461332e-05, "loss": 7.0619, "step": 17550 }, { "epoch": 0.41008634933238364, "grad_norm": 4.65625, "learning_rate": 3.333247017118615e-05, "loss": 7.0402, "step": 17560 }, { "epoch": 0.410319883699885, "grad_norm": 3.828125, "learning_rate": 3.331463969075996e-05, "loss": 7.098, "step": 17570 }, { "epoch": 0.4105534180673863, "grad_norm": 3.78125, "learning_rate": 3.329680445353554e-05, "loss": 6.9734, "step": 17580 }, { "epoch": 0.4107869524348877, "grad_norm": 4.5625, "learning_rate": 3.3278964469716435e-05, "loss": 6.9749, "step": 17590 }, { "epoch": 0.41102048680238906, "grad_norm": 4.5, "learning_rate": 3.326111974950887e-05, "loss": 7.0289, "step": 17600 }, { "epoch": 0.4112540211698904, "grad_norm": 4.75, "learning_rate": 3.324327030312181e-05, "loss": 7.0039, "step": 17610 }, { "epoch": 0.41148755553739175, "grad_norm": 5.125, "learning_rate": 3.322541614076689e-05, "loss": 7.0176, "step": 17620 }, { "epoch": 0.41172108990489314, "grad_norm": 4.65625, "learning_rate": 3.320755727265847e-05, "loss": 7.0968, "step": 17630 }, { "epoch": 0.4119546242723945, "grad_norm": 4.3125, "learning_rate": 3.318969370901358e-05, "loss": 6.9595, "step": 17640 }, { "epoch": 0.41218815863989583, "grad_norm": 4.125, "learning_rate": 3.317182546005197e-05, "loss": 6.9834, "step": 17650 }, { "epoch": 0.4124216930073972, "grad_norm": 4.6875, "learning_rate": 3.315395253599604e-05, "loss": 7.0016, "step": 17660 }, { "epoch": 0.41265522737489857, "grad_norm": 4.5625, "learning_rate": 3.313607494707088e-05, "loss": 6.9908, "step": 17670 }, { "epoch": 0.4128887617423999, "grad_norm": 4.78125, "learning_rate": 3.3118192703504226e-05, "loss": 7.0133, "step": 17680 }, { "epoch": 0.41312229610990125, "grad_norm": 4.53125, "learning_rate": 3.310030581552651e-05, "loss": 7.0502, "step": 17690 }, { "epoch": 0.41335583047740265, "grad_norm": 4.5625, "learning_rate": 3.308241429337081e-05, "loss": 7.0434, "step": 17700 }, { "epoch": 0.413589364844904, "grad_norm": 3.609375, "learning_rate": 3.306451814727284e-05, "loss": 6.963, "step": 17710 }, { "epoch": 0.41382289921240534, "grad_norm": 4.71875, "learning_rate": 3.304661738747096e-05, "loss": 7.0392, "step": 17720 }, { "epoch": 0.4140564335799067, "grad_norm": 5.96875, "learning_rate": 3.3028712024206207e-05, "loss": 6.9873, "step": 17730 }, { "epoch": 0.4142899679474081, "grad_norm": 4.65625, "learning_rate": 3.301080206772219e-05, "loss": 7.0501, "step": 17740 }, { "epoch": 0.4145235023149094, "grad_norm": 4.5625, "learning_rate": 3.299288752826521e-05, "loss": 7.0632, "step": 17750 }, { "epoch": 0.41475703668241076, "grad_norm": 4.09375, "learning_rate": 3.297496841608415e-05, "loss": 7.0122, "step": 17760 }, { "epoch": 0.41499057104991216, "grad_norm": 5.125, "learning_rate": 3.295704474143051e-05, "loss": 7.0672, "step": 17770 }, { "epoch": 0.4152241054174135, "grad_norm": 4.4375, "learning_rate": 3.2939116514558416e-05, "loss": 7.0045, "step": 17780 }, { "epoch": 0.41545763978491485, "grad_norm": 3.9375, "learning_rate": 3.292118374572459e-05, "loss": 6.9915, "step": 17790 }, { "epoch": 0.4156911741524162, "grad_norm": 4.375, "learning_rate": 3.2903246445188346e-05, "loss": 6.9606, "step": 17800 }, { "epoch": 0.4159247085199176, "grad_norm": 5.0625, "learning_rate": 3.288530462321162e-05, "loss": 6.9933, "step": 17810 }, { "epoch": 0.4161582428874189, "grad_norm": 4.78125, "learning_rate": 3.2867358290058886e-05, "loss": 6.9722, "step": 17820 }, { "epoch": 0.41639177725492027, "grad_norm": 4.28125, "learning_rate": 3.2849407455997236e-05, "loss": 6.9792, "step": 17830 }, { "epoch": 0.4166253116224216, "grad_norm": 3.90625, "learning_rate": 3.283145213129634e-05, "loss": 7.0359, "step": 17840 }, { "epoch": 0.416858845989923, "grad_norm": 4.0625, "learning_rate": 3.281349232622842e-05, "loss": 6.9947, "step": 17850 }, { "epoch": 0.41709238035742435, "grad_norm": 4.21875, "learning_rate": 3.2795528051068244e-05, "loss": 6.9797, "step": 17860 }, { "epoch": 0.4173259147249257, "grad_norm": 3.734375, "learning_rate": 3.277755931609318e-05, "loss": 7.0288, "step": 17870 }, { "epoch": 0.41755944909242704, "grad_norm": 5.1875, "learning_rate": 3.275958613158311e-05, "loss": 7.0569, "step": 17880 }, { "epoch": 0.41779298345992844, "grad_norm": 3.625, "learning_rate": 3.274160850782049e-05, "loss": 7.0182, "step": 17890 }, { "epoch": 0.4180265178274298, "grad_norm": 4.4375, "learning_rate": 3.27236264550903e-05, "loss": 7.0453, "step": 17900 }, { "epoch": 0.4182600521949311, "grad_norm": 5.21875, "learning_rate": 3.270563998368006e-05, "loss": 6.9636, "step": 17910 }, { "epoch": 0.4184935865624325, "grad_norm": 3.5625, "learning_rate": 3.2687649103879805e-05, "loss": 7.0547, "step": 17920 }, { "epoch": 0.41872712092993386, "grad_norm": 3.6875, "learning_rate": 3.266965382598211e-05, "loss": 7.0508, "step": 17930 }, { "epoch": 0.4189606552974352, "grad_norm": 5.5, "learning_rate": 3.265165416028205e-05, "loss": 6.9389, "step": 17940 }, { "epoch": 0.41919418966493655, "grad_norm": 4.65625, "learning_rate": 3.263365011707723e-05, "loss": 7.0565, "step": 17950 }, { "epoch": 0.41942772403243794, "grad_norm": 4.46875, "learning_rate": 3.2615641706667735e-05, "loss": 7.0073, "step": 17960 }, { "epoch": 0.4196612583999393, "grad_norm": 5.03125, "learning_rate": 3.2597628939356175e-05, "loss": 7.0409, "step": 17970 }, { "epoch": 0.4198947927674406, "grad_norm": 4.0625, "learning_rate": 3.257961182544762e-05, "loss": 7.0293, "step": 17980 }, { "epoch": 0.42012832713494197, "grad_norm": 4.71875, "learning_rate": 3.256159037524967e-05, "loss": 6.974, "step": 17990 }, { "epoch": 0.42036186150244337, "grad_norm": 4.6875, "learning_rate": 3.2543564599072366e-05, "loss": 6.9877, "step": 18000 }, { "epoch": 0.42036186150244337, "eval_loss": 7.017457008361816, "eval_runtime": 78.7422, "eval_samples_per_second": 12.7, "eval_steps_per_second": 12.7, "step": 18000 }, { "epoch": 0.4205953958699447, "grad_norm": 3.5625, "learning_rate": 3.252553450722823e-05, "loss": 7.0551, "step": 18010 }, { "epoch": 0.42082893023744605, "grad_norm": 4.53125, "learning_rate": 3.250750011003228e-05, "loss": 7.1037, "step": 18020 }, { "epoch": 0.42106246460494745, "grad_norm": 4.1875, "learning_rate": 3.248946141780198e-05, "loss": 7.0071, "step": 18030 }, { "epoch": 0.4212959989724488, "grad_norm": 3.90625, "learning_rate": 3.247141844085722e-05, "loss": 6.997, "step": 18040 }, { "epoch": 0.42152953333995014, "grad_norm": 4.8125, "learning_rate": 3.245337118952041e-05, "loss": 6.9701, "step": 18050 }, { "epoch": 0.4217630677074515, "grad_norm": 4.5625, "learning_rate": 3.243531967411636e-05, "loss": 6.9591, "step": 18060 }, { "epoch": 0.4219966020749529, "grad_norm": 4.65625, "learning_rate": 3.2417263904972296e-05, "loss": 7.0072, "step": 18070 }, { "epoch": 0.4222301364424542, "grad_norm": 5.0, "learning_rate": 3.239920389241794e-05, "loss": 7.003, "step": 18080 }, { "epoch": 0.42246367080995556, "grad_norm": 3.859375, "learning_rate": 3.2381139646785394e-05, "loss": 6.984, "step": 18090 }, { "epoch": 0.4226972051774569, "grad_norm": 4.21875, "learning_rate": 3.2363071178409204e-05, "loss": 6.9678, "step": 18100 }, { "epoch": 0.4229307395449583, "grad_norm": 4.25, "learning_rate": 3.2344998497626324e-05, "loss": 6.9863, "step": 18110 }, { "epoch": 0.42316427391245964, "grad_norm": 4.5625, "learning_rate": 3.232692161477612e-05, "loss": 6.957, "step": 18120 }, { "epoch": 0.423397808279961, "grad_norm": 4.75, "learning_rate": 3.2308840540200365e-05, "loss": 7.0459, "step": 18130 }, { "epoch": 0.4236313426474624, "grad_norm": 5.4375, "learning_rate": 3.229075528424321e-05, "loss": 7.0404, "step": 18140 }, { "epoch": 0.4238648770149637, "grad_norm": 4.375, "learning_rate": 3.227266585725124e-05, "loss": 7.0355, "step": 18150 }, { "epoch": 0.42409841138246507, "grad_norm": 5.25, "learning_rate": 3.225457226957337e-05, "loss": 7.0466, "step": 18160 }, { "epoch": 0.4243319457499664, "grad_norm": 6.0, "learning_rate": 3.2236474531560945e-05, "loss": 6.9725, "step": 18170 }, { "epoch": 0.4245654801174678, "grad_norm": 3.734375, "learning_rate": 3.221837265356767e-05, "loss": 6.9462, "step": 18180 }, { "epoch": 0.42479901448496915, "grad_norm": 4.46875, "learning_rate": 3.220026664594961e-05, "loss": 6.9818, "step": 18190 }, { "epoch": 0.4250325488524705, "grad_norm": 4.875, "learning_rate": 3.2182156519065176e-05, "loss": 7.0122, "step": 18200 }, { "epoch": 0.42526608321997184, "grad_norm": 4.21875, "learning_rate": 3.216404228327519e-05, "loss": 7.0557, "step": 18210 }, { "epoch": 0.42549961758747323, "grad_norm": 4.4375, "learning_rate": 3.214592394894276e-05, "loss": 7.064, "step": 18220 }, { "epoch": 0.4257331519549746, "grad_norm": 4.3125, "learning_rate": 3.2127801526433386e-05, "loss": 6.9979, "step": 18230 }, { "epoch": 0.4259666863224759, "grad_norm": 3.921875, "learning_rate": 3.2109675026114887e-05, "loss": 7.042, "step": 18240 }, { "epoch": 0.4262002206899773, "grad_norm": 5.0625, "learning_rate": 3.209154445835742e-05, "loss": 7.014, "step": 18250 }, { "epoch": 0.42643375505747866, "grad_norm": 4.25, "learning_rate": 3.2073409833533466e-05, "loss": 7.0067, "step": 18260 }, { "epoch": 0.42666728942498, "grad_norm": 4.84375, "learning_rate": 3.2055271162017825e-05, "loss": 7.0046, "step": 18270 }, { "epoch": 0.42690082379248134, "grad_norm": 5.40625, "learning_rate": 3.203712845418762e-05, "loss": 7.0168, "step": 18280 }, { "epoch": 0.42713435815998274, "grad_norm": 4.25, "learning_rate": 3.201898172042228e-05, "loss": 7.0652, "step": 18290 }, { "epoch": 0.4273678925274841, "grad_norm": 5.65625, "learning_rate": 3.200083097110353e-05, "loss": 7.0198, "step": 18300 }, { "epoch": 0.4276014268949854, "grad_norm": 4.40625, "learning_rate": 3.1982676216615404e-05, "loss": 7.0331, "step": 18310 }, { "epoch": 0.42783496126248677, "grad_norm": 4.84375, "learning_rate": 3.196451746734421e-05, "loss": 6.9921, "step": 18320 }, { "epoch": 0.42806849562998817, "grad_norm": 3.953125, "learning_rate": 3.1946354733678564e-05, "loss": 6.9661, "step": 18330 }, { "epoch": 0.4283020299974895, "grad_norm": 5.375, "learning_rate": 3.192818802600935e-05, "loss": 7.0108, "step": 18340 }, { "epoch": 0.42853556436499085, "grad_norm": 3.859375, "learning_rate": 3.1910017354729725e-05, "loss": 7.0018, "step": 18350 }, { "epoch": 0.4287690987324922, "grad_norm": 4.8125, "learning_rate": 3.1891842730235113e-05, "loss": 7.0447, "step": 18360 }, { "epoch": 0.4290026330999936, "grad_norm": 5.03125, "learning_rate": 3.187366416292319e-05, "loss": 6.9903, "step": 18370 }, { "epoch": 0.42923616746749493, "grad_norm": 4.0, "learning_rate": 3.185548166319392e-05, "loss": 7.0017, "step": 18380 }, { "epoch": 0.4294697018349963, "grad_norm": 4.6875, "learning_rate": 3.183729524144948e-05, "loss": 6.9141, "step": 18390 }, { "epoch": 0.4297032362024977, "grad_norm": 4.71875, "learning_rate": 3.181910490809431e-05, "loss": 7.0913, "step": 18400 }, { "epoch": 0.429936770569999, "grad_norm": 4.875, "learning_rate": 3.180091067353508e-05, "loss": 6.9493, "step": 18410 }, { "epoch": 0.43017030493750036, "grad_norm": 4.78125, "learning_rate": 3.178271254818072e-05, "loss": 6.9826, "step": 18420 }, { "epoch": 0.4304038393050017, "grad_norm": 3.75, "learning_rate": 3.176451054244233e-05, "loss": 7.0489, "step": 18430 }, { "epoch": 0.4306373736725031, "grad_norm": 4.78125, "learning_rate": 3.174630466673327e-05, "loss": 6.9108, "step": 18440 }, { "epoch": 0.43087090804000444, "grad_norm": 4.125, "learning_rate": 3.1728094931469124e-05, "loss": 6.9625, "step": 18450 }, { "epoch": 0.4311044424075058, "grad_norm": 4.25, "learning_rate": 3.170988134706764e-05, "loss": 6.9186, "step": 18460 }, { "epoch": 0.4313379767750071, "grad_norm": 4.71875, "learning_rate": 3.169166392394881e-05, "loss": 6.9786, "step": 18470 }, { "epoch": 0.4315715111425085, "grad_norm": 4.78125, "learning_rate": 3.1673442672534804e-05, "loss": 7.0029, "step": 18480 }, { "epoch": 0.43180504551000987, "grad_norm": 4.34375, "learning_rate": 3.165521760324998e-05, "loss": 7.0563, "step": 18490 }, { "epoch": 0.4320385798775112, "grad_norm": 4.125, "learning_rate": 3.16369887265209e-05, "loss": 7.022, "step": 18500 }, { "epoch": 0.4320385798775112, "eval_loss": 7.014063358306885, "eval_runtime": 78.7788, "eval_samples_per_second": 12.694, "eval_steps_per_second": 12.694, "step": 18500 }, { "epoch": 0.4322721142450126, "grad_norm": 5.40625, "learning_rate": 3.161875605277626e-05, "loss": 7.0432, "step": 18510 }, { "epoch": 0.43250564861251395, "grad_norm": 5.5, "learning_rate": 3.160051959244698e-05, "loss": 7.0014, "step": 18520 }, { "epoch": 0.4327391829800153, "grad_norm": 3.921875, "learning_rate": 3.158227935596611e-05, "loss": 6.9722, "step": 18530 }, { "epoch": 0.43297271734751663, "grad_norm": 4.875, "learning_rate": 3.156403535376888e-05, "loss": 6.9834, "step": 18540 }, { "epoch": 0.43320625171501803, "grad_norm": 4.28125, "learning_rate": 3.154578759629268e-05, "loss": 6.9776, "step": 18550 }, { "epoch": 0.4334397860825194, "grad_norm": 5.1875, "learning_rate": 3.1527536093977016e-05, "loss": 7.0023, "step": 18560 }, { "epoch": 0.4336733204500207, "grad_norm": 3.890625, "learning_rate": 3.1509280857263566e-05, "loss": 7.0189, "step": 18570 }, { "epoch": 0.43390685481752206, "grad_norm": 4.9375, "learning_rate": 3.1491021896596136e-05, "loss": 6.9686, "step": 18580 }, { "epoch": 0.43414038918502346, "grad_norm": 3.484375, "learning_rate": 3.147275922242065e-05, "loss": 6.9608, "step": 18590 }, { "epoch": 0.4343739235525248, "grad_norm": 4.53125, "learning_rate": 3.145449284518518e-05, "loss": 7.0212, "step": 18600 }, { "epoch": 0.43460745792002614, "grad_norm": 3.875, "learning_rate": 3.143622277533992e-05, "loss": 7.0211, "step": 18610 }, { "epoch": 0.43484099228752754, "grad_norm": 4.875, "learning_rate": 3.141794902333712e-05, "loss": 6.9001, "step": 18620 }, { "epoch": 0.4350745266550289, "grad_norm": 4.125, "learning_rate": 3.139967159963122e-05, "loss": 7.0095, "step": 18630 }, { "epoch": 0.4353080610225302, "grad_norm": 3.1875, "learning_rate": 3.1381390514678696e-05, "loss": 6.9811, "step": 18640 }, { "epoch": 0.43554159539003157, "grad_norm": 4.0625, "learning_rate": 3.136310577893814e-05, "loss": 7.028, "step": 18650 }, { "epoch": 0.43577512975753296, "grad_norm": 4.03125, "learning_rate": 3.134481740287025e-05, "loss": 7.023, "step": 18660 }, { "epoch": 0.4360086641250343, "grad_norm": 4.34375, "learning_rate": 3.1326525396937766e-05, "loss": 7.0235, "step": 18670 }, { "epoch": 0.43624219849253565, "grad_norm": 4.3125, "learning_rate": 3.130822977160554e-05, "loss": 6.9879, "step": 18680 }, { "epoch": 0.436475732860037, "grad_norm": 5.21875, "learning_rate": 3.128993053734049e-05, "loss": 6.9989, "step": 18690 }, { "epoch": 0.4367092672275384, "grad_norm": 4.25, "learning_rate": 3.12716277046116e-05, "loss": 7.0022, "step": 18700 }, { "epoch": 0.43694280159503973, "grad_norm": 4.84375, "learning_rate": 3.125332128388988e-05, "loss": 6.9927, "step": 18710 }, { "epoch": 0.4371763359625411, "grad_norm": 4.125, "learning_rate": 3.1235011285648436e-05, "loss": 7.0204, "step": 18720 }, { "epoch": 0.43740987033004247, "grad_norm": 5.03125, "learning_rate": 3.121669772036239e-05, "loss": 7.0062, "step": 18730 }, { "epoch": 0.4376434046975438, "grad_norm": 4.59375, "learning_rate": 3.119838059850893e-05, "loss": 6.9906, "step": 18740 }, { "epoch": 0.43787693906504516, "grad_norm": 5.5, "learning_rate": 3.118005993056725e-05, "loss": 7.0038, "step": 18750 }, { "epoch": 0.4381104734325465, "grad_norm": 5.3125, "learning_rate": 3.11617357270186e-05, "loss": 6.9496, "step": 18760 }, { "epoch": 0.4383440078000479, "grad_norm": 3.78125, "learning_rate": 3.114340799834624e-05, "loss": 7.0579, "step": 18770 }, { "epoch": 0.43857754216754924, "grad_norm": 4.5, "learning_rate": 3.112507675503545e-05, "loss": 7.0549, "step": 18780 }, { "epoch": 0.4388110765350506, "grad_norm": 3.84375, "learning_rate": 3.110674200757351e-05, "loss": 6.9446, "step": 18790 }, { "epoch": 0.4390446109025519, "grad_norm": 4.46875, "learning_rate": 3.108840376644972e-05, "loss": 6.9755, "step": 18800 }, { "epoch": 0.4392781452700533, "grad_norm": 6.46875, "learning_rate": 3.107006204215537e-05, "loss": 6.9582, "step": 18810 }, { "epoch": 0.43951167963755466, "grad_norm": 4.5, "learning_rate": 3.105171684518375e-05, "loss": 6.9671, "step": 18820 }, { "epoch": 0.439745214005056, "grad_norm": 4.71875, "learning_rate": 3.103336818603011e-05, "loss": 6.9893, "step": 18830 }, { "epoch": 0.43997874837255735, "grad_norm": 4.625, "learning_rate": 3.101501607519173e-05, "loss": 6.9805, "step": 18840 }, { "epoch": 0.44021228274005875, "grad_norm": 4.8125, "learning_rate": 3.099666052316783e-05, "loss": 7.0077, "step": 18850 }, { "epoch": 0.4404458171075601, "grad_norm": 3.9375, "learning_rate": 3.097830154045959e-05, "loss": 6.9853, "step": 18860 }, { "epoch": 0.44067935147506143, "grad_norm": 4.0625, "learning_rate": 3.095993913757018e-05, "loss": 7.0115, "step": 18870 }, { "epoch": 0.44091288584256283, "grad_norm": 4.59375, "learning_rate": 3.094157332500472e-05, "loss": 6.998, "step": 18880 }, { "epoch": 0.44114642021006417, "grad_norm": 5.6875, "learning_rate": 3.092320411327026e-05, "loss": 6.9834, "step": 18890 }, { "epoch": 0.4413799545775655, "grad_norm": 4.5, "learning_rate": 3.090483151287583e-05, "loss": 6.968, "step": 18900 }, { "epoch": 0.44161348894506686, "grad_norm": 5.5, "learning_rate": 3.088645553433236e-05, "loss": 7.0632, "step": 18910 }, { "epoch": 0.44184702331256825, "grad_norm": 3.53125, "learning_rate": 3.086807618815275e-05, "loss": 6.9826, "step": 18920 }, { "epoch": 0.4420805576800696, "grad_norm": 3.9375, "learning_rate": 3.084969348485179e-05, "loss": 7.0809, "step": 18930 }, { "epoch": 0.44231409204757094, "grad_norm": 5.1875, "learning_rate": 3.083130743494622e-05, "loss": 7.0107, "step": 18940 }, { "epoch": 0.4425476264150723, "grad_norm": 4.0625, "learning_rate": 3.081291804895467e-05, "loss": 6.9983, "step": 18950 }, { "epoch": 0.4427811607825737, "grad_norm": 4.75, "learning_rate": 3.079452533739771e-05, "loss": 7.0551, "step": 18960 }, { "epoch": 0.443014695150075, "grad_norm": 5.0, "learning_rate": 3.077612931079779e-05, "loss": 6.9624, "step": 18970 }, { "epoch": 0.44324822951757636, "grad_norm": 4.65625, "learning_rate": 3.0757729979679253e-05, "loss": 7.0096, "step": 18980 }, { "epoch": 0.44348176388507776, "grad_norm": 4.3125, "learning_rate": 3.073932735456835e-05, "loss": 7.0771, "step": 18990 }, { "epoch": 0.4437152982525791, "grad_norm": 4.3125, "learning_rate": 3.07209214459932e-05, "loss": 6.9649, "step": 19000 }, { "epoch": 0.4437152982525791, "eval_loss": 7.008757591247559, "eval_runtime": 78.9279, "eval_samples_per_second": 12.67, "eval_steps_per_second": 12.67, "step": 19000 }, { "epoch": 0.44394883262008045, "grad_norm": 4.15625, "learning_rate": 3.07025122644838e-05, "loss": 7.028, "step": 19010 }, { "epoch": 0.4441823669875818, "grad_norm": 4.0, "learning_rate": 3.068409982057205e-05, "loss": 7.0228, "step": 19020 }, { "epoch": 0.4444159013550832, "grad_norm": 3.3125, "learning_rate": 3.066568412479167e-05, "loss": 7.0548, "step": 19030 }, { "epoch": 0.44464943572258453, "grad_norm": 4.15625, "learning_rate": 3.064726518767828e-05, "loss": 7.0849, "step": 19040 }, { "epoch": 0.44488297009008587, "grad_norm": 3.78125, "learning_rate": 3.062884301976933e-05, "loss": 7.0182, "step": 19050 }, { "epoch": 0.4451165044575872, "grad_norm": 7.375, "learning_rate": 3.0610417631604124e-05, "loss": 7.0248, "step": 19060 }, { "epoch": 0.4453500388250886, "grad_norm": 4.75, "learning_rate": 3.0591989033723815e-05, "loss": 6.9567, "step": 19070 }, { "epoch": 0.44558357319258995, "grad_norm": 4.75, "learning_rate": 3.05735572366714e-05, "loss": 7.0679, "step": 19080 }, { "epoch": 0.4458171075600913, "grad_norm": 5.09375, "learning_rate": 3.0555122250991666e-05, "loss": 7.0807, "step": 19090 }, { "epoch": 0.4460506419275927, "grad_norm": 4.5, "learning_rate": 3.053668408723128e-05, "loss": 7.0693, "step": 19100 }, { "epoch": 0.44628417629509404, "grad_norm": 5.125, "learning_rate": 3.051824275593868e-05, "loss": 7.1147, "step": 19110 }, { "epoch": 0.4465177106625954, "grad_norm": 3.890625, "learning_rate": 3.049979826766415e-05, "loss": 6.9937, "step": 19120 }, { "epoch": 0.4467512450300967, "grad_norm": 4.28125, "learning_rate": 3.0481350632959755e-05, "loss": 6.9308, "step": 19130 }, { "epoch": 0.4469847793975981, "grad_norm": 4.8125, "learning_rate": 3.046289986237938e-05, "loss": 6.9898, "step": 19140 }, { "epoch": 0.44721831376509946, "grad_norm": 5.125, "learning_rate": 3.0444445966478687e-05, "loss": 6.9791, "step": 19150 }, { "epoch": 0.4474518481326008, "grad_norm": 4.625, "learning_rate": 3.042598895581514e-05, "loss": 7.0483, "step": 19160 }, { "epoch": 0.44768538250010215, "grad_norm": 4.90625, "learning_rate": 3.0407528840947967e-05, "loss": 6.9655, "step": 19170 }, { "epoch": 0.44791891686760354, "grad_norm": 4.1875, "learning_rate": 3.0389065632438208e-05, "loss": 6.9716, "step": 19180 }, { "epoch": 0.4481524512351049, "grad_norm": 5.15625, "learning_rate": 3.0370599340848627e-05, "loss": 7.0186, "step": 19190 }, { "epoch": 0.44838598560260623, "grad_norm": 4.25, "learning_rate": 3.0352129976743778e-05, "loss": 7.0191, "step": 19200 }, { "epoch": 0.4486195199701076, "grad_norm": 4.8125, "learning_rate": 3.0333657550689976e-05, "loss": 7.0266, "step": 19210 }, { "epoch": 0.44885305433760897, "grad_norm": 4.375, "learning_rate": 3.0315182073255277e-05, "loss": 6.9918, "step": 19220 }, { "epoch": 0.4490865887051103, "grad_norm": 4.1875, "learning_rate": 3.0296703555009474e-05, "loss": 6.9314, "step": 19230 }, { "epoch": 0.44932012307261165, "grad_norm": 3.671875, "learning_rate": 3.0278222006524133e-05, "loss": 6.9974, "step": 19240 }, { "epoch": 0.44955365744011305, "grad_norm": 5.125, "learning_rate": 3.0259737438372516e-05, "loss": 7.0348, "step": 19250 }, { "epoch": 0.4497871918076144, "grad_norm": 4.78125, "learning_rate": 3.0241249861129624e-05, "loss": 6.9768, "step": 19260 }, { "epoch": 0.45002072617511574, "grad_norm": 3.84375, "learning_rate": 3.0222759285372203e-05, "loss": 6.9752, "step": 19270 }, { "epoch": 0.4502542605426171, "grad_norm": 4.375, "learning_rate": 3.0204265721678687e-05, "loss": 6.9876, "step": 19280 }, { "epoch": 0.4504877949101185, "grad_norm": 5.8125, "learning_rate": 3.018576918062922e-05, "loss": 7.0228, "step": 19290 }, { "epoch": 0.4507213292776198, "grad_norm": 5.15625, "learning_rate": 3.0167269672805663e-05, "loss": 6.9799, "step": 19300 }, { "epoch": 0.45095486364512116, "grad_norm": 4.375, "learning_rate": 3.0148767208791573e-05, "loss": 6.9221, "step": 19310 }, { "epoch": 0.45118839801262256, "grad_norm": 4.5625, "learning_rate": 3.0130261799172183e-05, "loss": 6.997, "step": 19320 }, { "epoch": 0.4514219323801239, "grad_norm": 4.53125, "learning_rate": 3.0111753454534437e-05, "loss": 6.9939, "step": 19330 }, { "epoch": 0.45165546674762524, "grad_norm": 3.734375, "learning_rate": 3.009324218546693e-05, "loss": 7.0214, "step": 19340 }, { "epoch": 0.4518890011151266, "grad_norm": 4.1875, "learning_rate": 3.007472800255995e-05, "loss": 7.0498, "step": 19350 }, { "epoch": 0.452122535482628, "grad_norm": 3.84375, "learning_rate": 3.0056210916405443e-05, "loss": 6.964, "step": 19360 }, { "epoch": 0.4523560698501293, "grad_norm": 6.03125, "learning_rate": 3.0037690937597017e-05, "loss": 7.0067, "step": 19370 }, { "epoch": 0.45258960421763067, "grad_norm": 4.15625, "learning_rate": 3.001916807672993e-05, "loss": 6.9703, "step": 19380 }, { "epoch": 0.452823138585132, "grad_norm": 4.40625, "learning_rate": 3.0000642344401113e-05, "loss": 7.0354, "step": 19390 }, { "epoch": 0.4530566729526334, "grad_norm": 4.8125, "learning_rate": 2.9982113751209107e-05, "loss": 7.0216, "step": 19400 }, { "epoch": 0.45329020732013475, "grad_norm": 4.28125, "learning_rate": 2.99635823077541e-05, "loss": 7.0667, "step": 19410 }, { "epoch": 0.4535237416876361, "grad_norm": 4.625, "learning_rate": 2.9945048024637935e-05, "loss": 7.0132, "step": 19420 }, { "epoch": 0.45375727605513744, "grad_norm": 4.71875, "learning_rate": 2.9926510912464046e-05, "loss": 7.0063, "step": 19430 }, { "epoch": 0.45399081042263884, "grad_norm": 3.453125, "learning_rate": 2.9907970981837485e-05, "loss": 7.0191, "step": 19440 }, { "epoch": 0.4542243447901402, "grad_norm": 4.78125, "learning_rate": 2.9889428243364958e-05, "loss": 6.969, "step": 19450 }, { "epoch": 0.4544578791576415, "grad_norm": 4.46875, "learning_rate": 2.9870882707654736e-05, "loss": 6.9716, "step": 19460 }, { "epoch": 0.4546914135251429, "grad_norm": 4.90625, "learning_rate": 2.9852334385316692e-05, "loss": 7.0274, "step": 19470 }, { "epoch": 0.45492494789264426, "grad_norm": 3.921875, "learning_rate": 2.983378328696233e-05, "loss": 7.0598, "step": 19480 }, { "epoch": 0.4551584822601456, "grad_norm": 3.984375, "learning_rate": 2.9815229423204706e-05, "loss": 7.0004, "step": 19490 }, { "epoch": 0.45539201662764695, "grad_norm": 4.6875, "learning_rate": 2.9796672804658466e-05, "loss": 6.9928, "step": 19500 }, { "epoch": 0.45539201662764695, "eval_loss": 7.0054521560668945, "eval_runtime": 78.7625, "eval_samples_per_second": 12.696, "eval_steps_per_second": 12.696, "step": 19500 }, { "epoch": 0.45562555099514834, "grad_norm": 5.4375, "learning_rate": 2.977811344193984e-05, "loss": 6.9788, "step": 19510 }, { "epoch": 0.4558590853626497, "grad_norm": 4.8125, "learning_rate": 2.9759551345666626e-05, "loss": 6.9686, "step": 19520 }, { "epoch": 0.456092619730151, "grad_norm": 3.953125, "learning_rate": 2.974098652645817e-05, "loss": 7.0387, "step": 19530 }, { "epoch": 0.45632615409765237, "grad_norm": 3.71875, "learning_rate": 2.9722418994935404e-05, "loss": 6.9983, "step": 19540 }, { "epoch": 0.45655968846515377, "grad_norm": 4.125, "learning_rate": 2.9703848761720788e-05, "loss": 7.0184, "step": 19550 }, { "epoch": 0.4567932228326551, "grad_norm": 3.875, "learning_rate": 2.968527583743834e-05, "loss": 6.9768, "step": 19560 }, { "epoch": 0.45702675720015645, "grad_norm": 5.375, "learning_rate": 2.9666700232713624e-05, "loss": 6.9196, "step": 19570 }, { "epoch": 0.45726029156765785, "grad_norm": 4.21875, "learning_rate": 2.9648121958173708e-05, "loss": 7.0408, "step": 19580 }, { "epoch": 0.4574938259351592, "grad_norm": 4.4375, "learning_rate": 2.962954102444721e-05, "loss": 7.024, "step": 19590 }, { "epoch": 0.45772736030266054, "grad_norm": 4.71875, "learning_rate": 2.9610957442164284e-05, "loss": 6.9552, "step": 19600 }, { "epoch": 0.4579608946701619, "grad_norm": 4.78125, "learning_rate": 2.9592371221956566e-05, "loss": 7.0991, "step": 19610 }, { "epoch": 0.4581944290376633, "grad_norm": 3.84375, "learning_rate": 2.9573782374457222e-05, "loss": 6.9681, "step": 19620 }, { "epoch": 0.4584279634051646, "grad_norm": 4.625, "learning_rate": 2.9555190910300905e-05, "loss": 6.9644, "step": 19630 }, { "epoch": 0.45866149777266596, "grad_norm": 4.3125, "learning_rate": 2.9536596840123798e-05, "loss": 7.0025, "step": 19640 }, { "epoch": 0.4588950321401673, "grad_norm": 4.0625, "learning_rate": 2.951800017456353e-05, "loss": 6.9215, "step": 19650 }, { "epoch": 0.4591285665076687, "grad_norm": 5.75, "learning_rate": 2.9499400924259246e-05, "loss": 6.9719, "step": 19660 }, { "epoch": 0.45936210087517004, "grad_norm": 4.875, "learning_rate": 2.948079909985157e-05, "loss": 7.0287, "step": 19670 }, { "epoch": 0.4595956352426714, "grad_norm": 3.796875, "learning_rate": 2.9462194711982577e-05, "loss": 6.9941, "step": 19680 }, { "epoch": 0.4598291696101728, "grad_norm": 3.609375, "learning_rate": 2.9443587771295823e-05, "loss": 6.9007, "step": 19690 }, { "epoch": 0.4600627039776741, "grad_norm": 4.9375, "learning_rate": 2.9424978288436328e-05, "loss": 6.9757, "step": 19700 }, { "epoch": 0.46029623834517547, "grad_norm": 4.9375, "learning_rate": 2.9406366274050555e-05, "loss": 6.9769, "step": 19710 }, { "epoch": 0.4605297727126768, "grad_norm": 4.125, "learning_rate": 2.9387751738786435e-05, "loss": 7.0669, "step": 19720 }, { "epoch": 0.4607633070801782, "grad_norm": 4.6875, "learning_rate": 2.936913469329331e-05, "loss": 7.0071, "step": 19730 }, { "epoch": 0.46099684144767955, "grad_norm": 3.890625, "learning_rate": 2.935051514822198e-05, "loss": 7.0054, "step": 19740 }, { "epoch": 0.4612303758151809, "grad_norm": 4.3125, "learning_rate": 2.9331893114224684e-05, "loss": 7.0669, "step": 19750 }, { "epoch": 0.46146391018268224, "grad_norm": 5.65625, "learning_rate": 2.931326860195506e-05, "loss": 6.9353, "step": 19760 }, { "epoch": 0.46169744455018363, "grad_norm": 4.625, "learning_rate": 2.9294641622068174e-05, "loss": 6.9916, "step": 19770 }, { "epoch": 0.461930978917685, "grad_norm": 4.03125, "learning_rate": 2.9276012185220507e-05, "loss": 6.9809, "step": 19780 }, { "epoch": 0.4621645132851863, "grad_norm": 4.15625, "learning_rate": 2.9257380302069947e-05, "loss": 7.0003, "step": 19790 }, { "epoch": 0.4623980476526877, "grad_norm": 4.21875, "learning_rate": 2.9238745983275772e-05, "loss": 7.0106, "step": 19800 }, { "epoch": 0.46263158202018906, "grad_norm": 4.5625, "learning_rate": 2.922010923949867e-05, "loss": 6.9567, "step": 19810 }, { "epoch": 0.4628651163876904, "grad_norm": 4.21875, "learning_rate": 2.9201470081400706e-05, "loss": 6.9462, "step": 19820 }, { "epoch": 0.46309865075519174, "grad_norm": 4.53125, "learning_rate": 2.9182828519645312e-05, "loss": 7.0213, "step": 19830 }, { "epoch": 0.46333218512269314, "grad_norm": 5.4375, "learning_rate": 2.916418456489732e-05, "loss": 6.9812, "step": 19840 }, { "epoch": 0.4635657194901945, "grad_norm": 5.0625, "learning_rate": 2.9145538227822923e-05, "loss": 7.0499, "step": 19850 }, { "epoch": 0.4637992538576958, "grad_norm": 4.46875, "learning_rate": 2.912688951908966e-05, "loss": 7.0103, "step": 19860 }, { "epoch": 0.46403278822519717, "grad_norm": 4.0, "learning_rate": 2.9108238449366467e-05, "loss": 6.9938, "step": 19870 }, { "epoch": 0.46426632259269857, "grad_norm": 5.09375, "learning_rate": 2.908958502932358e-05, "loss": 7.0825, "step": 19880 }, { "epoch": 0.4644998569601999, "grad_norm": 5.40625, "learning_rate": 2.9070929269632612e-05, "loss": 6.9986, "step": 19890 }, { "epoch": 0.46473339132770125, "grad_norm": 4.375, "learning_rate": 2.9052271180966506e-05, "loss": 6.9054, "step": 19900 }, { "epoch": 0.4649669256952026, "grad_norm": 5.1875, "learning_rate": 2.9033610773999547e-05, "loss": 6.9939, "step": 19910 }, { "epoch": 0.465200460062704, "grad_norm": 4.5625, "learning_rate": 2.9014948059407337e-05, "loss": 7.0091, "step": 19920 }, { "epoch": 0.46543399443020533, "grad_norm": 4.0625, "learning_rate": 2.899628304786679e-05, "loss": 7.0074, "step": 19930 }, { "epoch": 0.4656675287977067, "grad_norm": 3.96875, "learning_rate": 2.8977615750056148e-05, "loss": 6.926, "step": 19940 }, { "epoch": 0.4659010631652081, "grad_norm": 4.875, "learning_rate": 2.8958946176654956e-05, "loss": 7.1085, "step": 19950 }, { "epoch": 0.4661345975327094, "grad_norm": 4.96875, "learning_rate": 2.894027433834405e-05, "loss": 7.0007, "step": 19960 }, { "epoch": 0.46636813190021076, "grad_norm": 5.03125, "learning_rate": 2.892160024580559e-05, "loss": 6.9827, "step": 19970 }, { "epoch": 0.4666016662677121, "grad_norm": 3.828125, "learning_rate": 2.8902923909723008e-05, "loss": 7.0586, "step": 19980 }, { "epoch": 0.4668352006352135, "grad_norm": 4.40625, "learning_rate": 2.8884245340781003e-05, "loss": 6.9476, "step": 19990 }, { "epoch": 0.46706873500271484, "grad_norm": 4.3125, "learning_rate": 2.8865564549665587e-05, "loss": 7.0291, "step": 20000 }, { "epoch": 0.46706873500271484, "eval_loss": 7.002224922180176, "eval_runtime": 78.5178, "eval_samples_per_second": 12.736, "eval_steps_per_second": 12.736, "step": 20000 }, { "epoch": 0.4673022693702162, "grad_norm": 4.59375, "learning_rate": 2.884688154706401e-05, "loss": 7.0734, "step": 20010 }, { "epoch": 0.4675358037377175, "grad_norm": 3.765625, "learning_rate": 2.8828196343664803e-05, "loss": 6.9495, "step": 20020 }, { "epoch": 0.4677693381052189, "grad_norm": 4.78125, "learning_rate": 2.8809508950157766e-05, "loss": 7.0606, "step": 20030 }, { "epoch": 0.46800287247272027, "grad_norm": 4.09375, "learning_rate": 2.8790819377233934e-05, "loss": 6.9668, "step": 20040 }, { "epoch": 0.4682364068402216, "grad_norm": 4.21875, "learning_rate": 2.8772127635585588e-05, "loss": 7.065, "step": 20050 }, { "epoch": 0.468469941207723, "grad_norm": 3.953125, "learning_rate": 2.8753433735906272e-05, "loss": 6.9422, "step": 20060 }, { "epoch": 0.46870347557522435, "grad_norm": 5.625, "learning_rate": 2.873473768889075e-05, "loss": 7.0199, "step": 20070 }, { "epoch": 0.4689370099427257, "grad_norm": 4.5625, "learning_rate": 2.8716039505234995e-05, "loss": 6.9928, "step": 20080 }, { "epoch": 0.46917054431022703, "grad_norm": 3.46875, "learning_rate": 2.8697339195636236e-05, "loss": 7.0, "step": 20090 }, { "epoch": 0.46940407867772843, "grad_norm": 4.5625, "learning_rate": 2.8678636770792906e-05, "loss": 7.0412, "step": 20100 }, { "epoch": 0.4696376130452298, "grad_norm": 4.1875, "learning_rate": 2.8659932241404635e-05, "loss": 7.0206, "step": 20110 }, { "epoch": 0.4698711474127311, "grad_norm": 3.515625, "learning_rate": 2.8641225618172286e-05, "loss": 7.0226, "step": 20120 }, { "epoch": 0.47010468178023246, "grad_norm": 3.953125, "learning_rate": 2.8622516911797886e-05, "loss": 7.0237, "step": 20130 }, { "epoch": 0.47033821614773386, "grad_norm": 4.75, "learning_rate": 2.8603806132984673e-05, "loss": 6.9877, "step": 20140 }, { "epoch": 0.4705717505152352, "grad_norm": 4.4375, "learning_rate": 2.8585093292437077e-05, "loss": 7.0747, "step": 20150 }, { "epoch": 0.47080528488273654, "grad_norm": 4.3125, "learning_rate": 2.8566378400860688e-05, "loss": 7.0987, "step": 20160 }, { "epoch": 0.47103881925023794, "grad_norm": 4.75, "learning_rate": 2.8547661468962272e-05, "loss": 6.963, "step": 20170 }, { "epoch": 0.4712723536177393, "grad_norm": 3.953125, "learning_rate": 2.852894250744979e-05, "loss": 6.8837, "step": 20180 }, { "epoch": 0.4715058879852406, "grad_norm": 4.8125, "learning_rate": 2.8510221527032334e-05, "loss": 6.9949, "step": 20190 }, { "epoch": 0.47173942235274197, "grad_norm": 5.625, "learning_rate": 2.8491498538420153e-05, "loss": 6.9868, "step": 20200 }, { "epoch": 0.47197295672024336, "grad_norm": 5.125, "learning_rate": 2.847277355232467e-05, "loss": 7.0125, "step": 20210 }, { "epoch": 0.4722064910877447, "grad_norm": 4.5625, "learning_rate": 2.8454046579458426e-05, "loss": 7.0074, "step": 20220 }, { "epoch": 0.47244002545524605, "grad_norm": 4.5625, "learning_rate": 2.8435317630535092e-05, "loss": 6.9951, "step": 20230 }, { "epoch": 0.4726735598227474, "grad_norm": 4.125, "learning_rate": 2.84165867162695e-05, "loss": 6.9892, "step": 20240 }, { "epoch": 0.4729070941902488, "grad_norm": 4.125, "learning_rate": 2.8397853847377587e-05, "loss": 6.8997, "step": 20250 }, { "epoch": 0.47314062855775013, "grad_norm": 5.25, "learning_rate": 2.8379119034576403e-05, "loss": 6.9618, "step": 20260 }, { "epoch": 0.4733741629252515, "grad_norm": 4.3125, "learning_rate": 2.836038228858413e-05, "loss": 6.9815, "step": 20270 }, { "epoch": 0.47360769729275287, "grad_norm": 4.25, "learning_rate": 2.8341643620120038e-05, "loss": 6.9544, "step": 20280 }, { "epoch": 0.4738412316602542, "grad_norm": 4.8125, "learning_rate": 2.8322903039904502e-05, "loss": 6.9808, "step": 20290 }, { "epoch": 0.47407476602775556, "grad_norm": 3.671875, "learning_rate": 2.8304160558658998e-05, "loss": 7.0083, "step": 20300 }, { "epoch": 0.4743083003952569, "grad_norm": 4.40625, "learning_rate": 2.8285416187106077e-05, "loss": 6.9637, "step": 20310 }, { "epoch": 0.4745418347627583, "grad_norm": 4.375, "learning_rate": 2.826666993596938e-05, "loss": 7.0069, "step": 20320 }, { "epoch": 0.47477536913025964, "grad_norm": 4.375, "learning_rate": 2.824792181597362e-05, "loss": 7.0044, "step": 20330 }, { "epoch": 0.475008903497761, "grad_norm": 3.890625, "learning_rate": 2.8229171837844586e-05, "loss": 6.929, "step": 20340 }, { "epoch": 0.4752424378652623, "grad_norm": 5.25, "learning_rate": 2.8210420012309118e-05, "loss": 7.0014, "step": 20350 }, { "epoch": 0.4754759722327637, "grad_norm": 4.53125, "learning_rate": 2.819166635009512e-05, "loss": 7.0437, "step": 20360 }, { "epoch": 0.47570950660026506, "grad_norm": 5.375, "learning_rate": 2.8172910861931552e-05, "loss": 6.9901, "step": 20370 }, { "epoch": 0.4759430409677664, "grad_norm": 4.4375, "learning_rate": 2.8154153558548396e-05, "loss": 7.0374, "step": 20380 }, { "epoch": 0.47617657533526775, "grad_norm": 4.5, "learning_rate": 2.8135394450676712e-05, "loss": 7.0216, "step": 20390 }, { "epoch": 0.47641010970276915, "grad_norm": 4.5, "learning_rate": 2.8116633549048554e-05, "loss": 7.0921, "step": 20400 }, { "epoch": 0.4766436440702705, "grad_norm": 3.890625, "learning_rate": 2.8097870864397023e-05, "loss": 7.0398, "step": 20410 }, { "epoch": 0.47687717843777183, "grad_norm": 5.4375, "learning_rate": 2.807910640745623e-05, "loss": 6.9516, "step": 20420 }, { "epoch": 0.47711071280527323, "grad_norm": 4.125, "learning_rate": 2.806034018896132e-05, "loss": 6.9945, "step": 20430 }, { "epoch": 0.47734424717277457, "grad_norm": 5.15625, "learning_rate": 2.804157221964841e-05, "loss": 6.9551, "step": 20440 }, { "epoch": 0.4775777815402759, "grad_norm": 4.90625, "learning_rate": 2.8022802510254647e-05, "loss": 7.0094, "step": 20450 }, { "epoch": 0.47781131590777726, "grad_norm": 4.96875, "learning_rate": 2.8004031071518173e-05, "loss": 7.0166, "step": 20460 }, { "epoch": 0.47804485027527865, "grad_norm": 4.1875, "learning_rate": 2.7985257914178103e-05, "loss": 6.9164, "step": 20470 }, { "epoch": 0.47827838464278, "grad_norm": 4.4375, "learning_rate": 2.7966483048974545e-05, "loss": 6.9722, "step": 20480 }, { "epoch": 0.47851191901028134, "grad_norm": 4.53125, "learning_rate": 2.794770648664859e-05, "loss": 6.9962, "step": 20490 }, { "epoch": 0.4787454533777827, "grad_norm": 3.71875, "learning_rate": 2.7928928237942288e-05, "loss": 6.9196, "step": 20500 }, { "epoch": 0.4787454533777827, "eval_loss": 6.997897148132324, "eval_runtime": 78.8268, "eval_samples_per_second": 12.686, "eval_steps_per_second": 12.686, "step": 20500 }, { "epoch": 0.4789789877452841, "grad_norm": 4.03125, "learning_rate": 2.7910148313598655e-05, "loss": 6.9567, "step": 20510 }, { "epoch": 0.4792125221127854, "grad_norm": 4.21875, "learning_rate": 2.7891366724361677e-05, "loss": 6.9937, "step": 20520 }, { "epoch": 0.47944605648028676, "grad_norm": 4.09375, "learning_rate": 2.7872583480976283e-05, "loss": 7.0072, "step": 20530 }, { "epoch": 0.47967959084778816, "grad_norm": 4.21875, "learning_rate": 2.785379859418834e-05, "loss": 7.1122, "step": 20540 }, { "epoch": 0.4799131252152895, "grad_norm": 4.1875, "learning_rate": 2.783501207474468e-05, "loss": 7.0493, "step": 20550 }, { "epoch": 0.48014665958279085, "grad_norm": 5.25, "learning_rate": 2.7816223933393058e-05, "loss": 6.9339, "step": 20560 }, { "epoch": 0.4803801939502922, "grad_norm": 3.359375, "learning_rate": 2.7797434180882136e-05, "loss": 7.0392, "step": 20570 }, { "epoch": 0.4806137283177936, "grad_norm": 3.765625, "learning_rate": 2.7778642827961533e-05, "loss": 6.9216, "step": 20580 }, { "epoch": 0.48084726268529493, "grad_norm": 4.65625, "learning_rate": 2.775984988538175e-05, "loss": 7.0, "step": 20590 }, { "epoch": 0.48108079705279627, "grad_norm": 4.59375, "learning_rate": 2.774105536389422e-05, "loss": 6.9793, "step": 20600 }, { "epoch": 0.4813143314202976, "grad_norm": 5.15625, "learning_rate": 2.7722259274251272e-05, "loss": 6.9803, "step": 20610 }, { "epoch": 0.481547865787799, "grad_norm": 4.6875, "learning_rate": 2.770346162720614e-05, "loss": 7.0765, "step": 20620 }, { "epoch": 0.48178140015530035, "grad_norm": 4.5, "learning_rate": 2.7684662433512925e-05, "loss": 7.0005, "step": 20630 }, { "epoch": 0.4820149345228017, "grad_norm": 5.03125, "learning_rate": 2.7665861703926655e-05, "loss": 7.018, "step": 20640 }, { "epoch": 0.4822484688903031, "grad_norm": 4.8125, "learning_rate": 2.7647059449203188e-05, "loss": 7.0123, "step": 20650 }, { "epoch": 0.48248200325780444, "grad_norm": 5.1875, "learning_rate": 2.762825568009928e-05, "loss": 6.961, "step": 20660 }, { "epoch": 0.4827155376253058, "grad_norm": 3.921875, "learning_rate": 2.7609450407372568e-05, "loss": 6.9819, "step": 20670 }, { "epoch": 0.4829490719928071, "grad_norm": 4.25, "learning_rate": 2.7590643641781517e-05, "loss": 6.9759, "step": 20680 }, { "epoch": 0.4831826063603085, "grad_norm": 4.6875, "learning_rate": 2.757183539408546e-05, "loss": 7.0383, "step": 20690 }, { "epoch": 0.48341614072780986, "grad_norm": 4.03125, "learning_rate": 2.755302567504459e-05, "loss": 6.9794, "step": 20700 }, { "epoch": 0.4836496750953112, "grad_norm": 4.78125, "learning_rate": 2.7534214495419935e-05, "loss": 7.0383, "step": 20710 }, { "epoch": 0.48388320946281255, "grad_norm": 3.96875, "learning_rate": 2.7515401865973333e-05, "loss": 7.0191, "step": 20720 }, { "epoch": 0.48411674383031394, "grad_norm": 5.21875, "learning_rate": 2.7496587797467494e-05, "loss": 6.9892, "step": 20730 }, { "epoch": 0.4843502781978153, "grad_norm": 5.0625, "learning_rate": 2.747777230066592e-05, "loss": 7.0199, "step": 20740 }, { "epoch": 0.48458381256531663, "grad_norm": 5.40625, "learning_rate": 2.745895538633294e-05, "loss": 7.0567, "step": 20750 }, { "epoch": 0.484817346932818, "grad_norm": 4.34375, "learning_rate": 2.7440137065233707e-05, "loss": 7.0369, "step": 20760 }, { "epoch": 0.48505088130031937, "grad_norm": 4.28125, "learning_rate": 2.742131734813415e-05, "loss": 6.9676, "step": 20770 }, { "epoch": 0.4852844156678207, "grad_norm": 4.0625, "learning_rate": 2.7402496245801012e-05, "loss": 7.056, "step": 20780 }, { "epoch": 0.48551795003532205, "grad_norm": 3.484375, "learning_rate": 2.7383673769001856e-05, "loss": 7.0115, "step": 20790 }, { "epoch": 0.48575148440282345, "grad_norm": 4.15625, "learning_rate": 2.7364849928504977e-05, "loss": 7.0524, "step": 20800 }, { "epoch": 0.4859850187703248, "grad_norm": 4.71875, "learning_rate": 2.7346024735079486e-05, "loss": 6.9684, "step": 20810 }, { "epoch": 0.48621855313782614, "grad_norm": 3.625, "learning_rate": 2.7327198199495262e-05, "loss": 6.9913, "step": 20820 }, { "epoch": 0.4864520875053275, "grad_norm": 3.71875, "learning_rate": 2.730837033252295e-05, "loss": 6.9868, "step": 20830 }, { "epoch": 0.4866856218728289, "grad_norm": 3.875, "learning_rate": 2.728954114493395e-05, "loss": 6.9902, "step": 20840 }, { "epoch": 0.4869191562403302, "grad_norm": 4.375, "learning_rate": 2.7270710647500432e-05, "loss": 6.9314, "step": 20850 }, { "epoch": 0.48715269060783156, "grad_norm": 5.25, "learning_rate": 2.7251878850995315e-05, "loss": 6.9094, "step": 20860 }, { "epoch": 0.48738622497533296, "grad_norm": 4.28125, "learning_rate": 2.7233045766192233e-05, "loss": 7.0097, "step": 20870 }, { "epoch": 0.4876197593428343, "grad_norm": 4.875, "learning_rate": 2.7214211403865585e-05, "loss": 7.001, "step": 20880 }, { "epoch": 0.48785329371033564, "grad_norm": 5.6875, "learning_rate": 2.7195375774790505e-05, "loss": 6.9207, "step": 20890 }, { "epoch": 0.488086828077837, "grad_norm": 4.65625, "learning_rate": 2.7176538889742826e-05, "loss": 7.0005, "step": 20900 }, { "epoch": 0.4883203624453384, "grad_norm": 4.3125, "learning_rate": 2.7157700759499122e-05, "loss": 6.971, "step": 20910 }, { "epoch": 0.4885538968128397, "grad_norm": 4.28125, "learning_rate": 2.7138861394836672e-05, "loss": 7.0351, "step": 20920 }, { "epoch": 0.48878743118034107, "grad_norm": 5.84375, "learning_rate": 2.7120020806533452e-05, "loss": 6.9963, "step": 20930 }, { "epoch": 0.4890209655478424, "grad_norm": 4.09375, "learning_rate": 2.7101179005368156e-05, "loss": 6.9284, "step": 20940 }, { "epoch": 0.4892544999153438, "grad_norm": 4.53125, "learning_rate": 2.7082336002120156e-05, "loss": 6.9969, "step": 20950 }, { "epoch": 0.48948803428284515, "grad_norm": 4.40625, "learning_rate": 2.7063491807569514e-05, "loss": 6.9464, "step": 20960 }, { "epoch": 0.4897215686503465, "grad_norm": 4.53125, "learning_rate": 2.704464643249699e-05, "loss": 6.973, "step": 20970 }, { "epoch": 0.48995510301784784, "grad_norm": 4.34375, "learning_rate": 2.7025799887684002e-05, "loss": 6.9378, "step": 20980 }, { "epoch": 0.49018863738534924, "grad_norm": 5.25, "learning_rate": 2.700695218391263e-05, "loss": 6.9868, "step": 20990 }, { "epoch": 0.4904221717528506, "grad_norm": 4.53125, "learning_rate": 2.6988103331965648e-05, "loss": 7.0561, "step": 21000 }, { "epoch": 0.4904221717528506, "eval_loss": 6.994281768798828, "eval_runtime": 78.9842, "eval_samples_per_second": 12.661, "eval_steps_per_second": 12.661, "step": 21000 }, { "epoch": 0.4906557061203519, "grad_norm": 5.1875, "learning_rate": 2.6969253342626454e-05, "loss": 6.9523, "step": 21010 }, { "epoch": 0.4908892404878533, "grad_norm": 4.75, "learning_rate": 2.6950402226679116e-05, "loss": 6.9896, "step": 21020 }, { "epoch": 0.49112277485535466, "grad_norm": 4.375, "learning_rate": 2.6931549994908344e-05, "loss": 7.0182, "step": 21030 }, { "epoch": 0.491356309222856, "grad_norm": 4.4375, "learning_rate": 2.691269665809948e-05, "loss": 6.9883, "step": 21040 }, { "epoch": 0.49158984359035734, "grad_norm": 4.53125, "learning_rate": 2.6893842227038503e-05, "loss": 6.9576, "step": 21050 }, { "epoch": 0.49182337795785874, "grad_norm": 5.1875, "learning_rate": 2.6874986712512007e-05, "loss": 6.9447, "step": 21060 }, { "epoch": 0.4920569123253601, "grad_norm": 4.375, "learning_rate": 2.6856130125307243e-05, "loss": 7.0328, "step": 21070 }, { "epoch": 0.4922904466928614, "grad_norm": 3.734375, "learning_rate": 2.6837272476212018e-05, "loss": 6.9563, "step": 21080 }, { "epoch": 0.49252398106036277, "grad_norm": 4.59375, "learning_rate": 2.681841377601479e-05, "loss": 6.9609, "step": 21090 }, { "epoch": 0.49275751542786417, "grad_norm": 4.3125, "learning_rate": 2.6799554035504603e-05, "loss": 7.0117, "step": 21100 }, { "epoch": 0.4929910497953655, "grad_norm": 5.40625, "learning_rate": 2.6780693265471097e-05, "loss": 6.9773, "step": 21110 }, { "epoch": 0.49322458416286685, "grad_norm": 3.890625, "learning_rate": 2.676183147670451e-05, "loss": 6.9236, "step": 21120 }, { "epoch": 0.49345811853036825, "grad_norm": 3.8125, "learning_rate": 2.674296867999565e-05, "loss": 7.0316, "step": 21130 }, { "epoch": 0.4936916528978696, "grad_norm": 4.3125, "learning_rate": 2.672410488613591e-05, "loss": 6.9881, "step": 21140 }, { "epoch": 0.49392518726537094, "grad_norm": 5.40625, "learning_rate": 2.6705240105917244e-05, "loss": 7.0291, "step": 21150 }, { "epoch": 0.4941587216328723, "grad_norm": 3.5, "learning_rate": 2.6686374350132183e-05, "loss": 7.0271, "step": 21160 }, { "epoch": 0.4943922560003737, "grad_norm": 5.5, "learning_rate": 2.6667507629573797e-05, "loss": 7.0144, "step": 21170 }, { "epoch": 0.494625790367875, "grad_norm": 5.625, "learning_rate": 2.664863995503573e-05, "loss": 7.0004, "step": 21180 }, { "epoch": 0.49485932473537636, "grad_norm": 4.96875, "learning_rate": 2.662977133731217e-05, "loss": 6.941, "step": 21190 }, { "epoch": 0.4950928591028777, "grad_norm": 5.03125, "learning_rate": 2.6610901787197827e-05, "loss": 6.9528, "step": 21200 }, { "epoch": 0.4953263934703791, "grad_norm": 3.90625, "learning_rate": 2.6592031315487952e-05, "loss": 6.9766, "step": 21210 }, { "epoch": 0.49555992783788044, "grad_norm": 5.25, "learning_rate": 2.657315993297834e-05, "loss": 6.9845, "step": 21220 }, { "epoch": 0.4957934622053818, "grad_norm": 4.625, "learning_rate": 2.655428765046527e-05, "loss": 6.9641, "step": 21230 }, { "epoch": 0.4960269965728832, "grad_norm": 5.0, "learning_rate": 2.6535414478745578e-05, "loss": 7.024, "step": 21240 }, { "epoch": 0.4962605309403845, "grad_norm": 4.21875, "learning_rate": 2.6516540428616586e-05, "loss": 6.9735, "step": 21250 }, { "epoch": 0.49649406530788587, "grad_norm": 5.59375, "learning_rate": 2.6497665510876118e-05, "loss": 6.9469, "step": 21260 }, { "epoch": 0.4967275996753872, "grad_norm": 4.1875, "learning_rate": 2.6478789736322496e-05, "loss": 6.9605, "step": 21270 }, { "epoch": 0.4969611340428886, "grad_norm": 4.15625, "learning_rate": 2.6459913115754536e-05, "loss": 7.0107, "step": 21280 }, { "epoch": 0.49719466841038995, "grad_norm": 4.1875, "learning_rate": 2.644103565997154e-05, "loss": 6.9739, "step": 21290 }, { "epoch": 0.4974282027778913, "grad_norm": 5.4375, "learning_rate": 2.642215737977328e-05, "loss": 7.0299, "step": 21300 }, { "epoch": 0.49766173714539264, "grad_norm": 3.84375, "learning_rate": 2.6403278285960015e-05, "loss": 7.0699, "step": 21310 }, { "epoch": 0.49789527151289403, "grad_norm": 4.71875, "learning_rate": 2.6384398389332442e-05, "loss": 7.0199, "step": 21320 }, { "epoch": 0.4981288058803954, "grad_norm": 3.96875, "learning_rate": 2.6365517700691732e-05, "loss": 6.9879, "step": 21330 }, { "epoch": 0.4983623402478967, "grad_norm": 5.125, "learning_rate": 2.6346636230839532e-05, "loss": 6.9738, "step": 21340 }, { "epoch": 0.4985958746153981, "grad_norm": 5.46875, "learning_rate": 2.6327753990577897e-05, "loss": 6.9428, "step": 21350 }, { "epoch": 0.49882940898289946, "grad_norm": 4.75, "learning_rate": 2.630887099070935e-05, "loss": 7.0121, "step": 21360 }, { "epoch": 0.4990629433504008, "grad_norm": 4.9375, "learning_rate": 2.6289987242036827e-05, "loss": 7.0031, "step": 21370 }, { "epoch": 0.49929647771790214, "grad_norm": 4.34375, "learning_rate": 2.627110275536372e-05, "loss": 6.9942, "step": 21380 }, { "epoch": 0.49953001208540354, "grad_norm": 3.875, "learning_rate": 2.6252217541493813e-05, "loss": 6.9998, "step": 21390 }, { "epoch": 0.4997635464529049, "grad_norm": 4.59375, "learning_rate": 2.623333161123133e-05, "loss": 6.9171, "step": 21400 }, { "epoch": 0.4999970808204062, "grad_norm": 4.125, "learning_rate": 2.6214444975380893e-05, "loss": 6.9698, "step": 21410 }, { "epoch": 0.5002306151879076, "grad_norm": 4.28125, "learning_rate": 2.6195557644747527e-05, "loss": 7.0142, "step": 21420 }, { "epoch": 0.500464149555409, "grad_norm": 3.875, "learning_rate": 2.6176669630136668e-05, "loss": 6.9984, "step": 21430 }, { "epoch": 0.5006976839229103, "grad_norm": 5.96875, "learning_rate": 2.615778094235412e-05, "loss": 6.9862, "step": 21440 }, { "epoch": 0.5009312182904117, "grad_norm": 4.6875, "learning_rate": 2.6138891592206087e-05, "loss": 7.0485, "step": 21450 }, { "epoch": 0.501164752657913, "grad_norm": 5.0625, "learning_rate": 2.6120001590499153e-05, "loss": 6.9145, "step": 21460 }, { "epoch": 0.5013982870254143, "grad_norm": 4.53125, "learning_rate": 2.610111094804028e-05, "loss": 6.898, "step": 21470 }, { "epoch": 0.5016318213929157, "grad_norm": 4.75, "learning_rate": 2.608221967563677e-05, "loss": 7.0134, "step": 21480 }, { "epoch": 0.5018653557604171, "grad_norm": 3.859375, "learning_rate": 2.606332778409632e-05, "loss": 7.0626, "step": 21490 }, { "epoch": 0.5020988901279184, "grad_norm": 4.0625, "learning_rate": 2.604443528422697e-05, "loss": 6.9943, "step": 21500 }, { "epoch": 0.5020988901279184, "eval_loss": 6.9926276206970215, "eval_runtime": 78.9539, "eval_samples_per_second": 12.666, "eval_steps_per_second": 12.666, "step": 21500 }, { "epoch": 0.5023324244954198, "grad_norm": 4.0, "learning_rate": 2.6025542186837076e-05, "loss": 6.9448, "step": 21510 }, { "epoch": 0.5025659588629212, "grad_norm": 4.28125, "learning_rate": 2.600664850273538e-05, "loss": 7.0208, "step": 21520 }, { "epoch": 0.5027994932304225, "grad_norm": 4.84375, "learning_rate": 2.5987754242730948e-05, "loss": 7.0281, "step": 21530 }, { "epoch": 0.5030330275979239, "grad_norm": 3.890625, "learning_rate": 2.5968859417633158e-05, "loss": 7.0618, "step": 21540 }, { "epoch": 0.5032665619654252, "grad_norm": 5.0625, "learning_rate": 2.594996403825173e-05, "loss": 7.0082, "step": 21550 }, { "epoch": 0.5035000963329266, "grad_norm": 5.15625, "learning_rate": 2.5931068115396688e-05, "loss": 7.048, "step": 21560 }, { "epoch": 0.503733630700428, "grad_norm": 4.0, "learning_rate": 2.5912171659878388e-05, "loss": 7.0134, "step": 21570 }, { "epoch": 0.5039671650679293, "grad_norm": 5.21875, "learning_rate": 2.5893274682507457e-05, "loss": 6.957, "step": 21580 }, { "epoch": 0.5042006994354307, "grad_norm": 4.28125, "learning_rate": 2.587437719409485e-05, "loss": 6.9004, "step": 21590 }, { "epoch": 0.5044342338029321, "grad_norm": 3.75, "learning_rate": 2.5855479205451798e-05, "loss": 6.9769, "step": 21600 }, { "epoch": 0.5046677681704334, "grad_norm": 4.03125, "learning_rate": 2.5836580727389836e-05, "loss": 6.9483, "step": 21610 }, { "epoch": 0.5049013025379347, "grad_norm": 5.5625, "learning_rate": 2.5817681770720757e-05, "loss": 6.9289, "step": 21620 }, { "epoch": 0.5051348369054361, "grad_norm": 3.96875, "learning_rate": 2.5798782346256635e-05, "loss": 6.9672, "step": 21630 }, { "epoch": 0.5053683712729374, "grad_norm": 6.3125, "learning_rate": 2.5779882464809822e-05, "loss": 6.9645, "step": 21640 }, { "epoch": 0.5056019056404388, "grad_norm": 4.25, "learning_rate": 2.576098213719293e-05, "loss": 6.9183, "step": 21650 }, { "epoch": 0.5058354400079401, "grad_norm": 3.90625, "learning_rate": 2.57420813742188e-05, "loss": 7.0393, "step": 21660 }, { "epoch": 0.5060689743754415, "grad_norm": 4.46875, "learning_rate": 2.5723180186700563e-05, "loss": 6.9912, "step": 21670 }, { "epoch": 0.5063025087429429, "grad_norm": 4.625, "learning_rate": 2.570427858545156e-05, "loss": 6.9592, "step": 21680 }, { "epoch": 0.5065360431104442, "grad_norm": 4.5, "learning_rate": 2.5685376581285377e-05, "loss": 6.9786, "step": 21690 }, { "epoch": 0.5067695774779456, "grad_norm": 5.09375, "learning_rate": 2.5666474185015843e-05, "loss": 7.005, "step": 21700 }, { "epoch": 0.507003111845447, "grad_norm": 4.5625, "learning_rate": 2.5647571407457006e-05, "loss": 6.9652, "step": 21710 }, { "epoch": 0.5072366462129483, "grad_norm": 5.25, "learning_rate": 2.5628668259423122e-05, "loss": 6.9747, "step": 21720 }, { "epoch": 0.5074701805804497, "grad_norm": 4.625, "learning_rate": 2.560976475172867e-05, "loss": 6.9736, "step": 21730 }, { "epoch": 0.5077037149479511, "grad_norm": 3.59375, "learning_rate": 2.5590860895188322e-05, "loss": 6.9761, "step": 21740 }, { "epoch": 0.5079372493154524, "grad_norm": 4.78125, "learning_rate": 2.5571956700616968e-05, "loss": 7.0114, "step": 21750 }, { "epoch": 0.5081707836829538, "grad_norm": 3.984375, "learning_rate": 2.555305217882967e-05, "loss": 7.0109, "step": 21760 }, { "epoch": 0.508404318050455, "grad_norm": 4.0625, "learning_rate": 2.5534147340641705e-05, "loss": 7.0362, "step": 21770 }, { "epoch": 0.5086378524179564, "grad_norm": 4.90625, "learning_rate": 2.5515242196868506e-05, "loss": 7.012, "step": 21780 }, { "epoch": 0.5088713867854578, "grad_norm": 4.28125, "learning_rate": 2.5496336758325684e-05, "loss": 7.054, "step": 21790 }, { "epoch": 0.5091049211529591, "grad_norm": 4.625, "learning_rate": 2.5477431035829035e-05, "loss": 7.0334, "step": 21800 }, { "epoch": 0.5093384555204605, "grad_norm": 4.34375, "learning_rate": 2.545852504019449e-05, "loss": 7.0182, "step": 21810 }, { "epoch": 0.5095719898879619, "grad_norm": 4.28125, "learning_rate": 2.543961878223817e-05, "loss": 6.9951, "step": 21820 }, { "epoch": 0.5098055242554632, "grad_norm": 4.3125, "learning_rate": 2.5420712272776327e-05, "loss": 7.0253, "step": 21830 }, { "epoch": 0.5100390586229646, "grad_norm": 4.40625, "learning_rate": 2.540180552262535e-05, "loss": 6.9555, "step": 21840 }, { "epoch": 0.510272592990466, "grad_norm": 4.53125, "learning_rate": 2.5382898542601773e-05, "loss": 6.9678, "step": 21850 }, { "epoch": 0.5105061273579673, "grad_norm": 5.1875, "learning_rate": 2.5363991343522288e-05, "loss": 6.99, "step": 21860 }, { "epoch": 0.5107396617254687, "grad_norm": 3.578125, "learning_rate": 2.5345083936203652e-05, "loss": 6.9921, "step": 21870 }, { "epoch": 0.51097319609297, "grad_norm": 5.15625, "learning_rate": 2.53261763314628e-05, "loss": 6.9553, "step": 21880 }, { "epoch": 0.5112067304604714, "grad_norm": 4.59375, "learning_rate": 2.5307268540116742e-05, "loss": 7.0161, "step": 21890 }, { "epoch": 0.5114402648279728, "grad_norm": 4.5, "learning_rate": 2.5288360572982624e-05, "loss": 6.8979, "step": 21900 }, { "epoch": 0.5116737991954741, "grad_norm": 5.3125, "learning_rate": 2.5269452440877662e-05, "loss": 7.0775, "step": 21910 }, { "epoch": 0.5119073335629755, "grad_norm": 5.84375, "learning_rate": 2.5250544154619192e-05, "loss": 7.0054, "step": 21920 }, { "epoch": 0.5121408679304769, "grad_norm": 4.03125, "learning_rate": 2.5231635725024634e-05, "loss": 7.0556, "step": 21930 }, { "epoch": 0.5123744022979781, "grad_norm": 4.40625, "learning_rate": 2.521272716291147e-05, "loss": 6.9351, "step": 21940 }, { "epoch": 0.5126079366654795, "grad_norm": 5.3125, "learning_rate": 2.519381847909728e-05, "loss": 6.9089, "step": 21950 }, { "epoch": 0.512841471032981, "grad_norm": 4.21875, "learning_rate": 2.517490968439971e-05, "loss": 7.059, "step": 21960 }, { "epoch": 0.5130750054004822, "grad_norm": 4.53125, "learning_rate": 2.515600078963645e-05, "loss": 7.0697, "step": 21970 }, { "epoch": 0.5133085397679836, "grad_norm": 4.65625, "learning_rate": 2.513709180562528e-05, "loss": 6.9785, "step": 21980 }, { "epoch": 0.5135420741354849, "grad_norm": 4.59375, "learning_rate": 2.5118182743184004e-05, "loss": 6.9846, "step": 21990 }, { "epoch": 0.5137756085029863, "grad_norm": 4.84375, "learning_rate": 2.5099273613130486e-05, "loss": 6.9857, "step": 22000 }, { "epoch": 0.5137756085029863, "eval_loss": 6.988227367401123, "eval_runtime": 79.0013, "eval_samples_per_second": 12.658, "eval_steps_per_second": 12.658, "step": 22000 }, { "epoch": 0.5140091428704877, "grad_norm": 3.8125, "learning_rate": 2.5080364426282615e-05, "loss": 6.9906, "step": 22010 }, { "epoch": 0.514242677237989, "grad_norm": 4.25, "learning_rate": 2.5061455193458328e-05, "loss": 7.0204, "step": 22020 }, { "epoch": 0.5144762116054904, "grad_norm": 4.6875, "learning_rate": 2.504254592547557e-05, "loss": 6.9508, "step": 22030 }, { "epoch": 0.5147097459729918, "grad_norm": 4.34375, "learning_rate": 2.502363663315233e-05, "loss": 7.0044, "step": 22040 }, { "epoch": 0.5149432803404931, "grad_norm": 4.40625, "learning_rate": 2.5004727327306594e-05, "loss": 7.0357, "step": 22050 }, { "epoch": 0.5151768147079945, "grad_norm": 4.8125, "learning_rate": 2.4985818018756352e-05, "loss": 6.991, "step": 22060 }, { "epoch": 0.5154103490754959, "grad_norm": 5.59375, "learning_rate": 2.496690871831962e-05, "loss": 6.9994, "step": 22070 }, { "epoch": 0.5156438834429972, "grad_norm": 4.46875, "learning_rate": 2.4947999436814372e-05, "loss": 7.0153, "step": 22080 }, { "epoch": 0.5158774178104986, "grad_norm": 4.9375, "learning_rate": 2.4929090185058607e-05, "loss": 7.05, "step": 22090 }, { "epoch": 0.5161109521779998, "grad_norm": 4.03125, "learning_rate": 2.491018097387028e-05, "loss": 6.9712, "step": 22100 }, { "epoch": 0.5163444865455012, "grad_norm": 5.71875, "learning_rate": 2.489127181406735e-05, "loss": 7.0495, "step": 22110 }, { "epoch": 0.5165780209130026, "grad_norm": 5.25, "learning_rate": 2.4872362716467716e-05, "loss": 7.0106, "step": 22120 }, { "epoch": 0.5168115552805039, "grad_norm": 3.9375, "learning_rate": 2.4853453691889258e-05, "loss": 6.9814, "step": 22130 }, { "epoch": 0.5170450896480053, "grad_norm": 5.375, "learning_rate": 2.483454475114983e-05, "loss": 6.9798, "step": 22140 }, { "epoch": 0.5172786240155067, "grad_norm": 4.125, "learning_rate": 2.481563590506719e-05, "loss": 6.9345, "step": 22150 }, { "epoch": 0.517512158383008, "grad_norm": 4.5, "learning_rate": 2.4796727164459104e-05, "loss": 6.9939, "step": 22160 }, { "epoch": 0.5177456927505094, "grad_norm": 4.53125, "learning_rate": 2.477781854014322e-05, "loss": 6.9549, "step": 22170 }, { "epoch": 0.5179792271180107, "grad_norm": 3.5625, "learning_rate": 2.475891004293717e-05, "loss": 7.0115, "step": 22180 }, { "epoch": 0.5182127614855121, "grad_norm": 4.28125, "learning_rate": 2.474000168365848e-05, "loss": 7.0486, "step": 22190 }, { "epoch": 0.5184462958530135, "grad_norm": 4.6875, "learning_rate": 2.472109347312459e-05, "loss": 6.9574, "step": 22200 }, { "epoch": 0.5186798302205148, "grad_norm": 4.5, "learning_rate": 2.4702185422152894e-05, "loss": 6.9822, "step": 22210 }, { "epoch": 0.5189133645880162, "grad_norm": 3.3125, "learning_rate": 2.468327754156065e-05, "loss": 6.9061, "step": 22220 }, { "epoch": 0.5191468989555176, "grad_norm": 5.3125, "learning_rate": 2.4664369842165068e-05, "loss": 6.9837, "step": 22230 }, { "epoch": 0.5193804333230189, "grad_norm": 4.6875, "learning_rate": 2.4645462334783202e-05, "loss": 6.9811, "step": 22240 }, { "epoch": 0.5196139676905203, "grad_norm": 3.90625, "learning_rate": 2.4626555030232028e-05, "loss": 6.9526, "step": 22250 }, { "epoch": 0.5198475020580217, "grad_norm": 4.125, "learning_rate": 2.4607647939328405e-05, "loss": 7.0654, "step": 22260 }, { "epoch": 0.520081036425523, "grad_norm": 4.78125, "learning_rate": 2.458874107288905e-05, "loss": 6.9897, "step": 22270 }, { "epoch": 0.5203145707930243, "grad_norm": 4.0625, "learning_rate": 2.456983444173059e-05, "loss": 6.9092, "step": 22280 }, { "epoch": 0.5205481051605256, "grad_norm": 4.875, "learning_rate": 2.455092805666946e-05, "loss": 6.9877, "step": 22290 }, { "epoch": 0.520781639528027, "grad_norm": 3.96875, "learning_rate": 2.4532021928522012e-05, "loss": 7.0369, "step": 22300 }, { "epoch": 0.5210151738955284, "grad_norm": 3.546875, "learning_rate": 2.4513116068104408e-05, "loss": 6.9383, "step": 22310 }, { "epoch": 0.5212487082630297, "grad_norm": 4.40625, "learning_rate": 2.4494210486232695e-05, "loss": 6.9554, "step": 22320 }, { "epoch": 0.5214822426305311, "grad_norm": 4.71875, "learning_rate": 2.447530519372272e-05, "loss": 7.0564, "step": 22330 }, { "epoch": 0.5217157769980325, "grad_norm": 4.375, "learning_rate": 2.445640020139019e-05, "loss": 7.0612, "step": 22340 }, { "epoch": 0.5219493113655338, "grad_norm": 5.125, "learning_rate": 2.4437495520050648e-05, "loss": 7.0151, "step": 22350 }, { "epoch": 0.5221828457330352, "grad_norm": 4.625, "learning_rate": 2.441859116051942e-05, "loss": 6.9192, "step": 22360 }, { "epoch": 0.5224163801005366, "grad_norm": 5.28125, "learning_rate": 2.4399687133611703e-05, "loss": 6.9902, "step": 22370 }, { "epoch": 0.5226499144680379, "grad_norm": 5.4375, "learning_rate": 2.438078345014245e-05, "loss": 7.0124, "step": 22380 }, { "epoch": 0.5228834488355393, "grad_norm": 3.921875, "learning_rate": 2.4361880120926455e-05, "loss": 6.9879, "step": 22390 }, { "epoch": 0.5231169832030406, "grad_norm": 5.15625, "learning_rate": 2.4342977156778305e-05, "loss": 6.9135, "step": 22400 }, { "epoch": 0.523350517570542, "grad_norm": 4.125, "learning_rate": 2.4324074568512345e-05, "loss": 6.9147, "step": 22410 }, { "epoch": 0.5235840519380434, "grad_norm": 4.90625, "learning_rate": 2.4305172366942752e-05, "loss": 7.0027, "step": 22420 }, { "epoch": 0.5238175863055446, "grad_norm": 4.625, "learning_rate": 2.4286270562883442e-05, "loss": 7.0106, "step": 22430 }, { "epoch": 0.524051120673046, "grad_norm": 3.953125, "learning_rate": 2.426736916714814e-05, "loss": 7.0228, "step": 22440 }, { "epoch": 0.5242846550405474, "grad_norm": 4.09375, "learning_rate": 2.42484681905503e-05, "loss": 7.0376, "step": 22450 }, { "epoch": 0.5245181894080487, "grad_norm": 3.984375, "learning_rate": 2.4229567643903163e-05, "loss": 6.9161, "step": 22460 }, { "epoch": 0.5247517237755501, "grad_norm": 4.375, "learning_rate": 2.4210667538019724e-05, "loss": 6.9559, "step": 22470 }, { "epoch": 0.5249852581430515, "grad_norm": 5.0625, "learning_rate": 2.419176788371269e-05, "loss": 7.0383, "step": 22480 }, { "epoch": 0.5252187925105528, "grad_norm": 5.03125, "learning_rate": 2.417286869179457e-05, "loss": 6.9725, "step": 22490 }, { "epoch": 0.5254523268780542, "grad_norm": 3.578125, "learning_rate": 2.415396997307755e-05, "loss": 7.0279, "step": 22500 }, { "epoch": 0.5254523268780542, "eval_loss": 6.9850592613220215, "eval_runtime": 78.8511, "eval_samples_per_second": 12.682, "eval_steps_per_second": 12.682, "step": 22500 }, { "epoch": 0.5256858612455555, "grad_norm": 3.875, "learning_rate": 2.4135071738373588e-05, "loss": 7.0908, "step": 22510 }, { "epoch": 0.5259193956130569, "grad_norm": 4.78125, "learning_rate": 2.411617399849433e-05, "loss": 7.0324, "step": 22520 }, { "epoch": 0.5261529299805583, "grad_norm": 4.0625, "learning_rate": 2.4097276764251186e-05, "loss": 7.0205, "step": 22530 }, { "epoch": 0.5263864643480596, "grad_norm": 3.46875, "learning_rate": 2.4078380046455222e-05, "loss": 6.9755, "step": 22540 }, { "epoch": 0.526619998715561, "grad_norm": 6.625, "learning_rate": 2.405948385591723e-05, "loss": 6.8589, "step": 22550 }, { "epoch": 0.5268535330830624, "grad_norm": 4.9375, "learning_rate": 2.404058820344773e-05, "loss": 6.9594, "step": 22560 }, { "epoch": 0.5270870674505637, "grad_norm": 4.125, "learning_rate": 2.4021693099856885e-05, "loss": 6.9427, "step": 22570 }, { "epoch": 0.5273206018180651, "grad_norm": 5.40625, "learning_rate": 2.4002798555954582e-05, "loss": 6.9364, "step": 22580 }, { "epoch": 0.5275541361855665, "grad_norm": 4.53125, "learning_rate": 2.3983904582550355e-05, "loss": 7.035, "step": 22590 }, { "epoch": 0.5277876705530677, "grad_norm": 4.03125, "learning_rate": 2.3965011190453456e-05, "loss": 7.0361, "step": 22600 }, { "epoch": 0.5280212049205691, "grad_norm": 4.53125, "learning_rate": 2.394611839047277e-05, "loss": 7.0402, "step": 22610 }, { "epoch": 0.5282547392880704, "grad_norm": 4.875, "learning_rate": 2.392722619341683e-05, "loss": 6.9939, "step": 22620 }, { "epoch": 0.5284882736555718, "grad_norm": 4.40625, "learning_rate": 2.3908334610093874e-05, "loss": 6.9907, "step": 22630 }, { "epoch": 0.5287218080230732, "grad_norm": 4.59375, "learning_rate": 2.388944365131174e-05, "loss": 7.0185, "step": 22640 }, { "epoch": 0.5289553423905745, "grad_norm": 4.09375, "learning_rate": 2.387055332787795e-05, "loss": 6.9788, "step": 22650 }, { "epoch": 0.5291888767580759, "grad_norm": 5.875, "learning_rate": 2.385166365059962e-05, "loss": 6.9832, "step": 22660 }, { "epoch": 0.5294224111255773, "grad_norm": 5.5625, "learning_rate": 2.3832774630283534e-05, "loss": 6.9334, "step": 22670 }, { "epoch": 0.5296559454930786, "grad_norm": 3.96875, "learning_rate": 2.381388627773608e-05, "loss": 7.0311, "step": 22680 }, { "epoch": 0.52988947986058, "grad_norm": 4.40625, "learning_rate": 2.379499860376326e-05, "loss": 6.9765, "step": 22690 }, { "epoch": 0.5301230142280814, "grad_norm": 4.75, "learning_rate": 2.3776111619170708e-05, "loss": 6.9524, "step": 22700 }, { "epoch": 0.5303565485955827, "grad_norm": 5.03125, "learning_rate": 2.3757225334763638e-05, "loss": 6.9327, "step": 22710 }, { "epoch": 0.5305900829630841, "grad_norm": 3.65625, "learning_rate": 2.3738339761346894e-05, "loss": 6.9636, "step": 22720 }, { "epoch": 0.5308236173305854, "grad_norm": 4.875, "learning_rate": 2.371945490972488e-05, "loss": 6.9263, "step": 22730 }, { "epoch": 0.5310571516980868, "grad_norm": 4.71875, "learning_rate": 2.3700570790701623e-05, "loss": 6.9743, "step": 22740 }, { "epoch": 0.5312906860655882, "grad_norm": 6.59375, "learning_rate": 2.3681687415080688e-05, "loss": 6.9546, "step": 22750 }, { "epoch": 0.5315242204330894, "grad_norm": 4.40625, "learning_rate": 2.3662804793665254e-05, "loss": 7.013, "step": 22760 }, { "epoch": 0.5317577548005908, "grad_norm": 4.53125, "learning_rate": 2.3643922937258056e-05, "loss": 6.9555, "step": 22770 }, { "epoch": 0.5319912891680922, "grad_norm": 4.15625, "learning_rate": 2.3625041856661368e-05, "loss": 6.9598, "step": 22780 }, { "epoch": 0.5322248235355935, "grad_norm": 3.953125, "learning_rate": 2.3606161562677056e-05, "loss": 6.9529, "step": 22790 }, { "epoch": 0.5324583579030949, "grad_norm": 4.5, "learning_rate": 2.358728206610651e-05, "loss": 6.8504, "step": 22800 }, { "epoch": 0.5326918922705963, "grad_norm": 4.84375, "learning_rate": 2.356840337775069e-05, "loss": 6.9914, "step": 22810 }, { "epoch": 0.5329254266380976, "grad_norm": 5.21875, "learning_rate": 2.3549525508410053e-05, "loss": 6.9429, "step": 22820 }, { "epoch": 0.533158961005599, "grad_norm": 3.75, "learning_rate": 2.353064846888464e-05, "loss": 6.969, "step": 22830 }, { "epoch": 0.5333924953731003, "grad_norm": 4.875, "learning_rate": 2.351177226997397e-05, "loss": 6.9652, "step": 22840 }, { "epoch": 0.5336260297406017, "grad_norm": 4.53125, "learning_rate": 2.3492896922477097e-05, "loss": 6.9311, "step": 22850 }, { "epoch": 0.5338595641081031, "grad_norm": 3.796875, "learning_rate": 2.3474022437192613e-05, "loss": 6.9068, "step": 22860 }, { "epoch": 0.5340930984756044, "grad_norm": 4.375, "learning_rate": 2.345514882491857e-05, "loss": 6.9592, "step": 22870 }, { "epoch": 0.5343266328431058, "grad_norm": 4.28125, "learning_rate": 2.343627609645256e-05, "loss": 6.9601, "step": 22880 }, { "epoch": 0.5345601672106072, "grad_norm": 6.03125, "learning_rate": 2.341740426259164e-05, "loss": 6.9324, "step": 22890 }, { "epoch": 0.5347937015781085, "grad_norm": 4.0625, "learning_rate": 2.33985333341324e-05, "loss": 6.9989, "step": 22900 }, { "epoch": 0.5350272359456099, "grad_norm": 4.40625, "learning_rate": 2.3379663321870854e-05, "loss": 7.0445, "step": 22910 }, { "epoch": 0.5352607703131113, "grad_norm": 5.09375, "learning_rate": 2.336079423660252e-05, "loss": 7.0431, "step": 22920 }, { "epoch": 0.5354943046806125, "grad_norm": 4.1875, "learning_rate": 2.334192608912241e-05, "loss": 6.9607, "step": 22930 }, { "epoch": 0.5357278390481139, "grad_norm": 4.75, "learning_rate": 2.3323058890224938e-05, "loss": 7.036, "step": 22940 }, { "epoch": 0.5359613734156152, "grad_norm": 4.09375, "learning_rate": 2.330419265070405e-05, "loss": 6.9877, "step": 22950 }, { "epoch": 0.5361949077831166, "grad_norm": 5.53125, "learning_rate": 2.328532738135308e-05, "loss": 6.9784, "step": 22960 }, { "epoch": 0.536428442150618, "grad_norm": 5.34375, "learning_rate": 2.3266463092964848e-05, "loss": 6.9244, "step": 22970 }, { "epoch": 0.5366619765181193, "grad_norm": 4.9375, "learning_rate": 2.324759979633159e-05, "loss": 6.9878, "step": 22980 }, { "epoch": 0.5368955108856207, "grad_norm": 5.3125, "learning_rate": 2.3228737502244975e-05, "loss": 6.9817, "step": 22990 }, { "epoch": 0.5371290452531221, "grad_norm": 3.734375, "learning_rate": 2.3209876221496114e-05, "loss": 6.9796, "step": 23000 }, { "epoch": 0.5371290452531221, "eval_loss": 6.981297016143799, "eval_runtime": 78.6114, "eval_samples_per_second": 12.721, "eval_steps_per_second": 12.721, "step": 23000 }, { "epoch": 0.5373625796206234, "grad_norm": 4.375, "learning_rate": 2.3191015964875525e-05, "loss": 7.0315, "step": 23010 }, { "epoch": 0.5375961139881248, "grad_norm": 4.9375, "learning_rate": 2.3172156743173157e-05, "loss": 6.9779, "step": 23020 }, { "epoch": 0.5378296483556262, "grad_norm": 4.4375, "learning_rate": 2.3153298567178335e-05, "loss": 7.0598, "step": 23030 }, { "epoch": 0.5380631827231275, "grad_norm": 4.53125, "learning_rate": 2.313444144767982e-05, "loss": 6.989, "step": 23040 }, { "epoch": 0.5382967170906289, "grad_norm": 4.375, "learning_rate": 2.3115585395465756e-05, "loss": 6.9594, "step": 23050 }, { "epoch": 0.5385302514581302, "grad_norm": 5.21875, "learning_rate": 2.3096730421323645e-05, "loss": 6.9361, "step": 23060 }, { "epoch": 0.5387637858256316, "grad_norm": 4.3125, "learning_rate": 2.3077876536040443e-05, "loss": 6.9355, "step": 23070 }, { "epoch": 0.538997320193133, "grad_norm": 3.796875, "learning_rate": 2.3059023750402416e-05, "loss": 6.9569, "step": 23080 }, { "epoch": 0.5392308545606342, "grad_norm": 5.09375, "learning_rate": 2.304017207519523e-05, "loss": 6.9096, "step": 23090 }, { "epoch": 0.5394643889281356, "grad_norm": 3.65625, "learning_rate": 2.3021321521203916e-05, "loss": 6.9956, "step": 23100 }, { "epoch": 0.539697923295637, "grad_norm": 5.75, "learning_rate": 2.3002472099212863e-05, "loss": 6.9709, "step": 23110 }, { "epoch": 0.5399314576631383, "grad_norm": 4.5, "learning_rate": 2.2983623820005804e-05, "loss": 7.0129, "step": 23120 }, { "epoch": 0.5401649920306397, "grad_norm": 4.0, "learning_rate": 2.2964776694365818e-05, "loss": 6.9925, "step": 23130 }, { "epoch": 0.5403985263981411, "grad_norm": 4.46875, "learning_rate": 2.2945930733075342e-05, "loss": 7.0046, "step": 23140 }, { "epoch": 0.5406320607656424, "grad_norm": 4.4375, "learning_rate": 2.292708594691612e-05, "loss": 6.9113, "step": 23150 }, { "epoch": 0.5408655951331438, "grad_norm": 4.9375, "learning_rate": 2.290824234666925e-05, "loss": 7.0283, "step": 23160 }, { "epoch": 0.5410991295006451, "grad_norm": 5.75, "learning_rate": 2.2889399943115132e-05, "loss": 6.9856, "step": 23170 }, { "epoch": 0.5413326638681465, "grad_norm": 3.796875, "learning_rate": 2.2870558747033497e-05, "loss": 6.9267, "step": 23180 }, { "epoch": 0.5415661982356479, "grad_norm": 6.0625, "learning_rate": 2.2851718769203375e-05, "loss": 6.9045, "step": 23190 }, { "epoch": 0.5417997326031492, "grad_norm": 4.71875, "learning_rate": 2.2832880020403087e-05, "loss": 7.0161, "step": 23200 }, { "epoch": 0.5420332669706506, "grad_norm": 4.15625, "learning_rate": 2.2814042511410284e-05, "loss": 6.9754, "step": 23210 }, { "epoch": 0.542266801338152, "grad_norm": 5.03125, "learning_rate": 2.2795206253001875e-05, "loss": 6.9552, "step": 23220 }, { "epoch": 0.5425003357056533, "grad_norm": 4.84375, "learning_rate": 2.2776371255954083e-05, "loss": 7.0234, "step": 23230 }, { "epoch": 0.5427338700731547, "grad_norm": 3.484375, "learning_rate": 2.275753753104237e-05, "loss": 7.0379, "step": 23240 }, { "epoch": 0.5429674044406559, "grad_norm": 4.3125, "learning_rate": 2.2738705089041516e-05, "loss": 6.9706, "step": 23250 }, { "epoch": 0.5432009388081573, "grad_norm": 3.8125, "learning_rate": 2.271987394072554e-05, "loss": 7.099, "step": 23260 }, { "epoch": 0.5434344731756587, "grad_norm": 4.0, "learning_rate": 2.2701044096867713e-05, "loss": 6.8828, "step": 23270 }, { "epoch": 0.54366800754316, "grad_norm": 6.125, "learning_rate": 2.268221556824059e-05, "loss": 6.9464, "step": 23280 }, { "epoch": 0.5439015419106614, "grad_norm": 4.90625, "learning_rate": 2.266338836561594e-05, "loss": 6.998, "step": 23290 }, { "epoch": 0.5441350762781628, "grad_norm": 5.625, "learning_rate": 2.26445624997648e-05, "loss": 6.9738, "step": 23300 }, { "epoch": 0.5443686106456641, "grad_norm": 4.1875, "learning_rate": 2.2625737981457422e-05, "loss": 7.0209, "step": 23310 }, { "epoch": 0.5446021450131655, "grad_norm": 4.0, "learning_rate": 2.2606914821463317e-05, "loss": 7.0668, "step": 23320 }, { "epoch": 0.5448356793806669, "grad_norm": 4.625, "learning_rate": 2.258809303055118e-05, "loss": 6.8961, "step": 23330 }, { "epoch": 0.5450692137481682, "grad_norm": 4.5, "learning_rate": 2.256927261948894e-05, "loss": 7.0193, "step": 23340 }, { "epoch": 0.5453027481156696, "grad_norm": 4.65625, "learning_rate": 2.2550453599043755e-05, "loss": 7.0416, "step": 23350 }, { "epoch": 0.5455362824831709, "grad_norm": 3.953125, "learning_rate": 2.253163597998195e-05, "loss": 7.0475, "step": 23360 }, { "epoch": 0.5457698168506723, "grad_norm": 4.90625, "learning_rate": 2.2512819773069083e-05, "loss": 6.9311, "step": 23370 }, { "epoch": 0.5460033512181737, "grad_norm": 4.09375, "learning_rate": 2.2494004989069882e-05, "loss": 6.9939, "step": 23380 }, { "epoch": 0.546236885585675, "grad_norm": 4.40625, "learning_rate": 2.2475191638748284e-05, "loss": 7.0351, "step": 23390 }, { "epoch": 0.5464704199531764, "grad_norm": 4.25, "learning_rate": 2.245637973286737e-05, "loss": 7.0312, "step": 23400 }, { "epoch": 0.5467039543206778, "grad_norm": 4.4375, "learning_rate": 2.2437569282189418e-05, "loss": 6.9387, "step": 23410 }, { "epoch": 0.546937488688179, "grad_norm": 4.15625, "learning_rate": 2.2418760297475884e-05, "loss": 7.0545, "step": 23420 }, { "epoch": 0.5471710230556804, "grad_norm": 5.125, "learning_rate": 2.239995278948736e-05, "loss": 6.9206, "step": 23430 }, { "epoch": 0.5474045574231818, "grad_norm": 4.125, "learning_rate": 2.238114676898362e-05, "loss": 7.0171, "step": 23440 }, { "epoch": 0.5476380917906831, "grad_norm": 4.0625, "learning_rate": 2.2362342246723546e-05, "loss": 6.979, "step": 23450 }, { "epoch": 0.5478716261581845, "grad_norm": 5.15625, "learning_rate": 2.2343539233465215e-05, "loss": 6.9522, "step": 23460 }, { "epoch": 0.5481051605256858, "grad_norm": 4.71875, "learning_rate": 2.232473773996581e-05, "loss": 7.007, "step": 23470 }, { "epoch": 0.5483386948931872, "grad_norm": 3.328125, "learning_rate": 2.2305937776981632e-05, "loss": 7.0342, "step": 23480 }, { "epoch": 0.5485722292606886, "grad_norm": 5.1875, "learning_rate": 2.228713935526814e-05, "loss": 6.9259, "step": 23490 }, { "epoch": 0.5488057636281899, "grad_norm": 5.3125, "learning_rate": 2.2268342485579885e-05, "loss": 7.0152, "step": 23500 }, { "epoch": 0.5488057636281899, "eval_loss": 6.978837490081787, "eval_runtime": 78.3547, "eval_samples_per_second": 12.762, "eval_steps_per_second": 12.762, "step": 23500 }, { "epoch": 0.5490392979956913, "grad_norm": 5.3125, "learning_rate": 2.224954717867056e-05, "loss": 6.925, "step": 23510 }, { "epoch": 0.5492728323631927, "grad_norm": 4.8125, "learning_rate": 2.223075344529291e-05, "loss": 6.9914, "step": 23520 }, { "epoch": 0.549506366730694, "grad_norm": 3.953125, "learning_rate": 2.221196129619885e-05, "loss": 7.0301, "step": 23530 }, { "epoch": 0.5497399010981954, "grad_norm": 5.28125, "learning_rate": 2.2193170742139325e-05, "loss": 6.9505, "step": 23540 }, { "epoch": 0.5499734354656968, "grad_norm": 4.40625, "learning_rate": 2.2174381793864408e-05, "loss": 6.9566, "step": 23550 }, { "epoch": 0.5502069698331981, "grad_norm": 5.375, "learning_rate": 2.2155594462123245e-05, "loss": 7.0124, "step": 23560 }, { "epoch": 0.5504405042006995, "grad_norm": 5.09375, "learning_rate": 2.2136808757664032e-05, "loss": 6.9435, "step": 23570 }, { "epoch": 0.5506740385682007, "grad_norm": 4.25, "learning_rate": 2.2118024691234074e-05, "loss": 6.9831, "step": 23580 }, { "epoch": 0.5509075729357021, "grad_norm": 4.03125, "learning_rate": 2.2099242273579704e-05, "loss": 6.9492, "step": 23590 }, { "epoch": 0.5511411073032035, "grad_norm": 3.609375, "learning_rate": 2.2080461515446348e-05, "loss": 6.9764, "step": 23600 }, { "epoch": 0.5513746416707048, "grad_norm": 4.59375, "learning_rate": 2.206168242757844e-05, "loss": 7.0107, "step": 23610 }, { "epoch": 0.5516081760382062, "grad_norm": 6.0625, "learning_rate": 2.2042905020719492e-05, "loss": 6.9865, "step": 23620 }, { "epoch": 0.5518417104057076, "grad_norm": 4.40625, "learning_rate": 2.202412930561204e-05, "loss": 7.0168, "step": 23630 }, { "epoch": 0.5520752447732089, "grad_norm": 4.1875, "learning_rate": 2.200535529299765e-05, "loss": 6.9466, "step": 23640 }, { "epoch": 0.5523087791407103, "grad_norm": 4.6875, "learning_rate": 2.1986582993616926e-05, "loss": 7.0212, "step": 23650 }, { "epoch": 0.5525423135082117, "grad_norm": 4.34375, "learning_rate": 2.1967812418209476e-05, "loss": 7.0019, "step": 23660 }, { "epoch": 0.552775847875713, "grad_norm": 5.6875, "learning_rate": 2.194904357751394e-05, "loss": 7.0126, "step": 23670 }, { "epoch": 0.5530093822432144, "grad_norm": 4.5625, "learning_rate": 2.1930276482267943e-05, "loss": 6.93, "step": 23680 }, { "epoch": 0.5532429166107157, "grad_norm": 3.6875, "learning_rate": 2.191151114320814e-05, "loss": 7.0098, "step": 23690 }, { "epoch": 0.5534764509782171, "grad_norm": 4.28125, "learning_rate": 2.1892747571070155e-05, "loss": 7.0098, "step": 23700 }, { "epoch": 0.5537099853457185, "grad_norm": 4.53125, "learning_rate": 2.187398577658861e-05, "loss": 6.9714, "step": 23710 }, { "epoch": 0.5539435197132198, "grad_norm": 4.59375, "learning_rate": 2.185522577049712e-05, "loss": 6.9321, "step": 23720 }, { "epoch": 0.5541770540807212, "grad_norm": 4.40625, "learning_rate": 2.183646756352825e-05, "loss": 6.9664, "step": 23730 }, { "epoch": 0.5544105884482226, "grad_norm": 3.53125, "learning_rate": 2.1817711166413578e-05, "loss": 6.9746, "step": 23740 }, { "epoch": 0.5546441228157238, "grad_norm": 4.71875, "learning_rate": 2.1798956589883603e-05, "loss": 6.9993, "step": 23750 }, { "epoch": 0.5548776571832252, "grad_norm": 5.25, "learning_rate": 2.1780203844667818e-05, "loss": 7.0101, "step": 23760 }, { "epoch": 0.5551111915507266, "grad_norm": 5.53125, "learning_rate": 2.1761452941494648e-05, "loss": 6.9783, "step": 23770 }, { "epoch": 0.5553447259182279, "grad_norm": 3.8125, "learning_rate": 2.1742703891091457e-05, "loss": 6.9116, "step": 23780 }, { "epoch": 0.5555782602857293, "grad_norm": 4.5625, "learning_rate": 2.172395670418457e-05, "loss": 6.917, "step": 23790 }, { "epoch": 0.5558117946532306, "grad_norm": 4.46875, "learning_rate": 2.170521139149924e-05, "loss": 6.9668, "step": 23800 }, { "epoch": 0.556045329020732, "grad_norm": 4.09375, "learning_rate": 2.1686467963759643e-05, "loss": 6.9843, "step": 23810 }, { "epoch": 0.5562788633882334, "grad_norm": 4.84375, "learning_rate": 2.1667726431688868e-05, "loss": 6.9899, "step": 23820 }, { "epoch": 0.5565123977557347, "grad_norm": 4.46875, "learning_rate": 2.1648986806008943e-05, "loss": 6.9595, "step": 23830 }, { "epoch": 0.5567459321232361, "grad_norm": 4.6875, "learning_rate": 2.1630249097440785e-05, "loss": 6.8924, "step": 23840 }, { "epoch": 0.5569794664907375, "grad_norm": 4.28125, "learning_rate": 2.1611513316704206e-05, "loss": 7.019, "step": 23850 }, { "epoch": 0.5572130008582388, "grad_norm": 5.25, "learning_rate": 2.1592779474517964e-05, "loss": 7.0227, "step": 23860 }, { "epoch": 0.5574465352257402, "grad_norm": 4.0, "learning_rate": 2.1574047581599648e-05, "loss": 6.9197, "step": 23870 }, { "epoch": 0.5576800695932416, "grad_norm": 4.96875, "learning_rate": 2.1555317648665763e-05, "loss": 7.0361, "step": 23880 }, { "epoch": 0.5579136039607429, "grad_norm": 4.9375, "learning_rate": 2.153658968643169e-05, "loss": 6.9909, "step": 23890 }, { "epoch": 0.5581471383282443, "grad_norm": 3.96875, "learning_rate": 2.1517863705611684e-05, "loss": 7.0021, "step": 23900 }, { "epoch": 0.5583806726957455, "grad_norm": 4.90625, "learning_rate": 2.1499139716918856e-05, "loss": 7.0217, "step": 23910 }, { "epoch": 0.5586142070632469, "grad_norm": 4.21875, "learning_rate": 2.1480417731065184e-05, "loss": 6.8921, "step": 23920 }, { "epoch": 0.5588477414307483, "grad_norm": 6.03125, "learning_rate": 2.1461697758761506e-05, "loss": 6.9909, "step": 23930 }, { "epoch": 0.5590812757982496, "grad_norm": 4.1875, "learning_rate": 2.144297981071749e-05, "loss": 6.997, "step": 23940 }, { "epoch": 0.559314810165751, "grad_norm": 4.71875, "learning_rate": 2.1424263897641675e-05, "loss": 6.8429, "step": 23950 }, { "epoch": 0.5595483445332524, "grad_norm": 4.125, "learning_rate": 2.14055500302414e-05, "loss": 6.9521, "step": 23960 }, { "epoch": 0.5597818789007537, "grad_norm": 5.15625, "learning_rate": 2.138683821922287e-05, "loss": 6.9268, "step": 23970 }, { "epoch": 0.5600154132682551, "grad_norm": 4.09375, "learning_rate": 2.1368128475291094e-05, "loss": 6.9853, "step": 23980 }, { "epoch": 0.5602489476357565, "grad_norm": 4.0625, "learning_rate": 2.134942080914988e-05, "loss": 6.896, "step": 23990 }, { "epoch": 0.5604824820032578, "grad_norm": 4.03125, "learning_rate": 2.133071523150189e-05, "loss": 6.9324, "step": 24000 }, { "epoch": 0.5604824820032578, "eval_loss": 6.976040363311768, "eval_runtime": 79.0289, "eval_samples_per_second": 12.654, "eval_steps_per_second": 12.654, "step": 24000 }, { "epoch": 0.5607160163707592, "grad_norm": 3.703125, "learning_rate": 2.131201175304856e-05, "loss": 6.9779, "step": 24010 }, { "epoch": 0.5609495507382605, "grad_norm": 4.59375, "learning_rate": 2.129331038449014e-05, "loss": 6.9618, "step": 24020 }, { "epoch": 0.5611830851057619, "grad_norm": 5.90625, "learning_rate": 2.1274611136525656e-05, "loss": 7.0183, "step": 24030 }, { "epoch": 0.5614166194732633, "grad_norm": 4.75, "learning_rate": 2.125591401985294e-05, "loss": 6.9786, "step": 24040 }, { "epoch": 0.5616501538407646, "grad_norm": 4.625, "learning_rate": 2.12372190451686e-05, "loss": 6.9574, "step": 24050 }, { "epoch": 0.561883688208266, "grad_norm": 4.28125, "learning_rate": 2.1218526223168e-05, "loss": 6.9652, "step": 24060 }, { "epoch": 0.5621172225757674, "grad_norm": 4.34375, "learning_rate": 2.1199835564545297e-05, "loss": 6.9548, "step": 24070 }, { "epoch": 0.5623507569432686, "grad_norm": 5.6875, "learning_rate": 2.11811470799934e-05, "loss": 6.9167, "step": 24080 }, { "epoch": 0.56258429131077, "grad_norm": 5.5, "learning_rate": 2.1162460780203976e-05, "loss": 6.9612, "step": 24090 }, { "epoch": 0.5628178256782714, "grad_norm": 4.1875, "learning_rate": 2.114377667586744e-05, "loss": 6.945, "step": 24100 }, { "epoch": 0.5630513600457727, "grad_norm": 4.8125, "learning_rate": 2.112509477767296e-05, "loss": 6.9761, "step": 24110 }, { "epoch": 0.5632848944132741, "grad_norm": 4.21875, "learning_rate": 2.110641509630842e-05, "loss": 6.9818, "step": 24120 }, { "epoch": 0.5635184287807754, "grad_norm": 4.25, "learning_rate": 2.1087737642460453e-05, "loss": 6.9954, "step": 24130 }, { "epoch": 0.5637519631482768, "grad_norm": 4.65625, "learning_rate": 2.1069062426814428e-05, "loss": 7.0556, "step": 24140 }, { "epoch": 0.5639854975157782, "grad_norm": 5.90625, "learning_rate": 2.1050389460054393e-05, "loss": 6.9751, "step": 24150 }, { "epoch": 0.5642190318832795, "grad_norm": 4.15625, "learning_rate": 2.103171875286316e-05, "loss": 7.0363, "step": 24160 }, { "epoch": 0.5644525662507809, "grad_norm": 5.0, "learning_rate": 2.1013050315922215e-05, "loss": 7.0107, "step": 24170 }, { "epoch": 0.5646861006182823, "grad_norm": 4.3125, "learning_rate": 2.0994384159911763e-05, "loss": 7.0198, "step": 24180 }, { "epoch": 0.5649196349857836, "grad_norm": 4.25, "learning_rate": 2.0975720295510687e-05, "loss": 6.9128, "step": 24190 }, { "epoch": 0.565153169353285, "grad_norm": 5.0625, "learning_rate": 2.095705873339656e-05, "loss": 6.9549, "step": 24200 }, { "epoch": 0.5653867037207864, "grad_norm": 5.0, "learning_rate": 2.0938399484245663e-05, "loss": 6.9541, "step": 24210 }, { "epoch": 0.5656202380882877, "grad_norm": 4.375, "learning_rate": 2.0919742558732924e-05, "loss": 7.0461, "step": 24220 }, { "epoch": 0.565853772455789, "grad_norm": 5.09375, "learning_rate": 2.090108796753197e-05, "loss": 6.9264, "step": 24230 }, { "epoch": 0.5660873068232903, "grad_norm": 3.921875, "learning_rate": 2.0882435721315054e-05, "loss": 6.991, "step": 24240 }, { "epoch": 0.5663208411907917, "grad_norm": 3.875, "learning_rate": 2.086378583075313e-05, "loss": 6.8931, "step": 24250 }, { "epoch": 0.5665543755582931, "grad_norm": 4.4375, "learning_rate": 2.084513830651578e-05, "loss": 6.9815, "step": 24260 }, { "epoch": 0.5667879099257944, "grad_norm": 4.0625, "learning_rate": 2.0826493159271222e-05, "loss": 6.9724, "step": 24270 }, { "epoch": 0.5670214442932958, "grad_norm": 4.0625, "learning_rate": 2.080785039968634e-05, "loss": 6.8777, "step": 24280 }, { "epoch": 0.5672549786607972, "grad_norm": 3.875, "learning_rate": 2.0789210038426637e-05, "loss": 6.883, "step": 24290 }, { "epoch": 0.5674885130282985, "grad_norm": 5.84375, "learning_rate": 2.0770572086156265e-05, "loss": 6.9434, "step": 24300 }, { "epoch": 0.5677220473957999, "grad_norm": 5.40625, "learning_rate": 2.0751936553537944e-05, "loss": 6.9882, "step": 24310 }, { "epoch": 0.5679555817633012, "grad_norm": 3.921875, "learning_rate": 2.073330345123309e-05, "loss": 7.0127, "step": 24320 }, { "epoch": 0.5681891161308026, "grad_norm": 4.59375, "learning_rate": 2.0714672789901656e-05, "loss": 6.9111, "step": 24330 }, { "epoch": 0.568422650498304, "grad_norm": 5.0625, "learning_rate": 2.0696044580202227e-05, "loss": 6.9888, "step": 24340 }, { "epoch": 0.5686561848658053, "grad_norm": 4.71875, "learning_rate": 2.0677418832792005e-05, "loss": 7.0025, "step": 24350 }, { "epoch": 0.5688897192333067, "grad_norm": 6.9375, "learning_rate": 2.0658795558326743e-05, "loss": 7.0061, "step": 24360 }, { "epoch": 0.5691232536008081, "grad_norm": 4.375, "learning_rate": 2.0640174767460814e-05, "loss": 6.9437, "step": 24370 }, { "epoch": 0.5693567879683094, "grad_norm": 4.3125, "learning_rate": 2.062155647084715e-05, "loss": 6.992, "step": 24380 }, { "epoch": 0.5695903223358108, "grad_norm": 5.34375, "learning_rate": 2.0602940679137266e-05, "loss": 6.9236, "step": 24390 }, { "epoch": 0.5698238567033121, "grad_norm": 4.53125, "learning_rate": 2.058432740298124e-05, "loss": 7.0154, "step": 24400 }, { "epoch": 0.5700573910708134, "grad_norm": 4.28125, "learning_rate": 2.0565716653027707e-05, "loss": 6.8876, "step": 24410 }, { "epoch": 0.5702909254383148, "grad_norm": 4.0625, "learning_rate": 2.054710843992387e-05, "loss": 6.9442, "step": 24420 }, { "epoch": 0.5705244598058161, "grad_norm": 4.875, "learning_rate": 2.0528502774315454e-05, "loss": 7.0152, "step": 24430 }, { "epoch": 0.5707579941733175, "grad_norm": 4.125, "learning_rate": 2.050989966684677e-05, "loss": 6.9385, "step": 24440 }, { "epoch": 0.5709915285408189, "grad_norm": 4.4375, "learning_rate": 2.0491299128160617e-05, "loss": 7.0455, "step": 24450 }, { "epoch": 0.5712250629083202, "grad_norm": 4.15625, "learning_rate": 2.047270116889836e-05, "loss": 6.907, "step": 24460 }, { "epoch": 0.5714585972758216, "grad_norm": 3.671875, "learning_rate": 2.0454105799699867e-05, "loss": 7.0215, "step": 24470 }, { "epoch": 0.571692131643323, "grad_norm": 4.875, "learning_rate": 2.0435513031203545e-05, "loss": 6.9653, "step": 24480 }, { "epoch": 0.5719256660108243, "grad_norm": 4.59375, "learning_rate": 2.0416922874046293e-05, "loss": 7.0428, "step": 24490 }, { "epoch": 0.5721592003783257, "grad_norm": 4.84375, "learning_rate": 2.0398335338863516e-05, "loss": 7.038, "step": 24500 }, { "epoch": 0.5721592003783257, "eval_loss": 6.973873138427734, "eval_runtime": 78.9579, "eval_samples_per_second": 12.665, "eval_steps_per_second": 12.665, "step": 24500 }, { "epoch": 0.5723927347458271, "grad_norm": 4.53125, "learning_rate": 2.0379750436289145e-05, "loss": 6.9865, "step": 24510 }, { "epoch": 0.5726262691133284, "grad_norm": 4.34375, "learning_rate": 2.036116817695557e-05, "loss": 6.9475, "step": 24520 }, { "epoch": 0.5728598034808298, "grad_norm": 4.75, "learning_rate": 2.0342588571493697e-05, "loss": 6.9672, "step": 24530 }, { "epoch": 0.573093337848331, "grad_norm": 4.1875, "learning_rate": 2.0324011630532892e-05, "loss": 7.0141, "step": 24540 }, { "epoch": 0.5733268722158325, "grad_norm": 4.875, "learning_rate": 2.0305437364701018e-05, "loss": 7.0094, "step": 24550 }, { "epoch": 0.5735604065833338, "grad_norm": 5.0625, "learning_rate": 2.0286865784624395e-05, "loss": 6.9552, "step": 24560 }, { "epoch": 0.5737939409508351, "grad_norm": 4.15625, "learning_rate": 2.0268296900927796e-05, "loss": 6.9054, "step": 24570 }, { "epoch": 0.5740274753183365, "grad_norm": 4.0625, "learning_rate": 2.0249730724234477e-05, "loss": 7.0413, "step": 24580 }, { "epoch": 0.5742610096858379, "grad_norm": 4.21875, "learning_rate": 2.0231167265166117e-05, "loss": 7.0277, "step": 24590 }, { "epoch": 0.5744945440533392, "grad_norm": 5.84375, "learning_rate": 2.021260653434288e-05, "loss": 6.9303, "step": 24600 }, { "epoch": 0.5747280784208406, "grad_norm": 3.578125, "learning_rate": 2.0194048542383316e-05, "loss": 6.9882, "step": 24610 }, { "epoch": 0.574961612788342, "grad_norm": 4.71875, "learning_rate": 2.0175493299904454e-05, "loss": 7.0113, "step": 24620 }, { "epoch": 0.5751951471558433, "grad_norm": 4.15625, "learning_rate": 2.0156940817521733e-05, "loss": 6.948, "step": 24630 }, { "epoch": 0.5754286815233447, "grad_norm": 4.96875, "learning_rate": 2.0138391105848997e-05, "loss": 6.9092, "step": 24640 }, { "epoch": 0.575662215890846, "grad_norm": 5.4375, "learning_rate": 2.0119844175498532e-05, "loss": 6.9008, "step": 24650 }, { "epoch": 0.5758957502583474, "grad_norm": 4.4375, "learning_rate": 2.0101300037081015e-05, "loss": 6.9394, "step": 24660 }, { "epoch": 0.5761292846258488, "grad_norm": 4.3125, "learning_rate": 2.008275870120554e-05, "loss": 6.9398, "step": 24670 }, { "epoch": 0.5763628189933501, "grad_norm": 4.375, "learning_rate": 2.0064220178479574e-05, "loss": 6.9396, "step": 24680 }, { "epoch": 0.5765963533608515, "grad_norm": 4.40625, "learning_rate": 2.0045684479509017e-05, "loss": 6.9662, "step": 24690 }, { "epoch": 0.5768298877283529, "grad_norm": 5.25, "learning_rate": 2.0027151614898103e-05, "loss": 6.9523, "step": 24700 }, { "epoch": 0.5770634220958542, "grad_norm": 5.34375, "learning_rate": 2.0008621595249467e-05, "loss": 7.0103, "step": 24710 }, { "epoch": 0.5772969564633555, "grad_norm": 4.53125, "learning_rate": 1.9990094431164138e-05, "loss": 6.9521, "step": 24720 }, { "epoch": 0.577530490830857, "grad_norm": 4.40625, "learning_rate": 1.9971570133241466e-05, "loss": 6.9472, "step": 24730 }, { "epoch": 0.5777640251983582, "grad_norm": 4.9375, "learning_rate": 1.9953048712079204e-05, "loss": 7.001, "step": 24740 }, { "epoch": 0.5779975595658596, "grad_norm": 4.25, "learning_rate": 1.9934530178273428e-05, "loss": 7.0479, "step": 24750 }, { "epoch": 0.5782310939333609, "grad_norm": 4.78125, "learning_rate": 1.9916014542418594e-05, "loss": 6.9856, "step": 24760 }, { "epoch": 0.5784646283008623, "grad_norm": 3.75, "learning_rate": 1.989750181510746e-05, "loss": 6.9505, "step": 24770 }, { "epoch": 0.5786981626683637, "grad_norm": 4.1875, "learning_rate": 1.9878992006931147e-05, "loss": 7.0181, "step": 24780 }, { "epoch": 0.578931697035865, "grad_norm": 4.625, "learning_rate": 1.986048512847911e-05, "loss": 6.8808, "step": 24790 }, { "epoch": 0.5791652314033664, "grad_norm": 4.875, "learning_rate": 1.9841981190339103e-05, "loss": 6.9896, "step": 24800 }, { "epoch": 0.5793987657708678, "grad_norm": 5.15625, "learning_rate": 1.982348020309724e-05, "loss": 7.0241, "step": 24810 }, { "epoch": 0.5796323001383691, "grad_norm": 4.0625, "learning_rate": 1.9804982177337886e-05, "loss": 6.8973, "step": 24820 }, { "epoch": 0.5798658345058705, "grad_norm": 4.96875, "learning_rate": 1.978648712364377e-05, "loss": 6.8803, "step": 24830 }, { "epoch": 0.5800993688733719, "grad_norm": 5.4375, "learning_rate": 1.9767995052595888e-05, "loss": 6.9632, "step": 24840 }, { "epoch": 0.5803329032408732, "grad_norm": 4.875, "learning_rate": 1.9749505974773534e-05, "loss": 7.0299, "step": 24850 }, { "epoch": 0.5805664376083746, "grad_norm": 5.34375, "learning_rate": 1.97310199007543e-05, "loss": 6.9622, "step": 24860 }, { "epoch": 0.5807999719758759, "grad_norm": 5.65625, "learning_rate": 1.9712536841114047e-05, "loss": 7.0463, "step": 24870 }, { "epoch": 0.5810335063433772, "grad_norm": 5.40625, "learning_rate": 1.9694056806426928e-05, "loss": 6.9458, "step": 24880 }, { "epoch": 0.5812670407108786, "grad_norm": 4.71875, "learning_rate": 1.9675579807265334e-05, "loss": 7.0626, "step": 24890 }, { "epoch": 0.5815005750783799, "grad_norm": 5.84375, "learning_rate": 1.965710585419997e-05, "loss": 6.9198, "step": 24900 }, { "epoch": 0.5817341094458813, "grad_norm": 4.25, "learning_rate": 1.9638634957799752e-05, "loss": 7.0194, "step": 24910 }, { "epoch": 0.5819676438133827, "grad_norm": 5.5625, "learning_rate": 1.9620167128631852e-05, "loss": 7.0011, "step": 24920 }, { "epoch": 0.582201178180884, "grad_norm": 4.46875, "learning_rate": 1.960170237726173e-05, "loss": 6.951, "step": 24930 }, { "epoch": 0.5824347125483854, "grad_norm": 4.46875, "learning_rate": 1.958324071425302e-05, "loss": 6.9515, "step": 24940 }, { "epoch": 0.5826682469158868, "grad_norm": 4.84375, "learning_rate": 1.956478215016765e-05, "loss": 6.9703, "step": 24950 }, { "epoch": 0.5829017812833881, "grad_norm": 4.84375, "learning_rate": 1.954632669556573e-05, "loss": 6.9284, "step": 24960 }, { "epoch": 0.5831353156508895, "grad_norm": 4.96875, "learning_rate": 1.9527874361005634e-05, "loss": 6.9998, "step": 24970 }, { "epoch": 0.5833688500183908, "grad_norm": 4.75, "learning_rate": 1.95094251570439e-05, "loss": 6.9801, "step": 24980 }, { "epoch": 0.5836023843858922, "grad_norm": 4.15625, "learning_rate": 1.949097909423531e-05, "loss": 6.9253, "step": 24990 }, { "epoch": 0.5838359187533936, "grad_norm": 5.25, "learning_rate": 1.947253618313285e-05, "loss": 7.0035, "step": 25000 }, { "epoch": 0.5838359187533936, "eval_loss": 6.971224784851074, "eval_runtime": 78.7669, "eval_samples_per_second": 12.696, "eval_steps_per_second": 12.696, "step": 25000 }, { "epoch": 0.5840694531208949, "grad_norm": 4.0625, "learning_rate": 1.9454096434287683e-05, "loss": 6.9279, "step": 25010 }, { "epoch": 0.5843029874883963, "grad_norm": 4.3125, "learning_rate": 1.9435659858249187e-05, "loss": 6.9688, "step": 25020 }, { "epoch": 0.5845365218558977, "grad_norm": 5.09375, "learning_rate": 1.941722646556489e-05, "loss": 6.971, "step": 25030 }, { "epoch": 0.584770056223399, "grad_norm": 3.96875, "learning_rate": 1.9398796266780535e-05, "loss": 6.9892, "step": 25040 }, { "epoch": 0.5850035905909003, "grad_norm": 4.625, "learning_rate": 1.938036927244003e-05, "loss": 6.9865, "step": 25050 }, { "epoch": 0.5852371249584017, "grad_norm": 3.6875, "learning_rate": 1.936194549308542e-05, "loss": 6.9409, "step": 25060 }, { "epoch": 0.585470659325903, "grad_norm": 4.25, "learning_rate": 1.934352493925695e-05, "loss": 6.9849, "step": 25070 }, { "epoch": 0.5857041936934044, "grad_norm": 5.71875, "learning_rate": 1.9325107621492998e-05, "loss": 6.9448, "step": 25080 }, { "epoch": 0.5859377280609057, "grad_norm": 5.65625, "learning_rate": 1.9306693550330103e-05, "loss": 6.9095, "step": 25090 }, { "epoch": 0.5861712624284071, "grad_norm": 4.9375, "learning_rate": 1.9288282736302922e-05, "loss": 6.9621, "step": 25100 }, { "epoch": 0.5864047967959085, "grad_norm": 5.0, "learning_rate": 1.9269875189944287e-05, "loss": 6.9898, "step": 25110 }, { "epoch": 0.5866383311634098, "grad_norm": 5.875, "learning_rate": 1.9251470921785124e-05, "loss": 6.9811, "step": 25120 }, { "epoch": 0.5868718655309112, "grad_norm": 6.375, "learning_rate": 1.92330699423545e-05, "loss": 6.9949, "step": 25130 }, { "epoch": 0.5871053998984126, "grad_norm": 3.9375, "learning_rate": 1.9214672262179615e-05, "loss": 6.9642, "step": 25140 }, { "epoch": 0.5873389342659139, "grad_norm": 4.3125, "learning_rate": 1.9196277891785745e-05, "loss": 7.0067, "step": 25150 }, { "epoch": 0.5875724686334153, "grad_norm": 3.953125, "learning_rate": 1.9177886841696304e-05, "loss": 6.9873, "step": 25160 }, { "epoch": 0.5878060030009167, "grad_norm": 5.25, "learning_rate": 1.9159499122432794e-05, "loss": 6.9265, "step": 25170 }, { "epoch": 0.588039537368418, "grad_norm": 4.59375, "learning_rate": 1.9141114744514822e-05, "loss": 6.9784, "step": 25180 }, { "epoch": 0.5882730717359194, "grad_norm": 4.625, "learning_rate": 1.9122733718460067e-05, "loss": 6.9823, "step": 25190 }, { "epoch": 0.5885066061034206, "grad_norm": 5.53125, "learning_rate": 1.9104356054784288e-05, "loss": 6.9867, "step": 25200 }, { "epoch": 0.588740140470922, "grad_norm": 4.09375, "learning_rate": 1.9085981764001358e-05, "loss": 6.9468, "step": 25210 }, { "epoch": 0.5889736748384234, "grad_norm": 4.375, "learning_rate": 1.9067610856623153e-05, "loss": 6.9764, "step": 25220 }, { "epoch": 0.5892072092059247, "grad_norm": 4.75, "learning_rate": 1.9049243343159696e-05, "loss": 7.003, "step": 25230 }, { "epoch": 0.5894407435734261, "grad_norm": 4.8125, "learning_rate": 1.9030879234118995e-05, "loss": 6.9221, "step": 25240 }, { "epoch": 0.5896742779409275, "grad_norm": 4.25, "learning_rate": 1.9012518540007157e-05, "loss": 6.9653, "step": 25250 }, { "epoch": 0.5899078123084288, "grad_norm": 4.875, "learning_rate": 1.899416127132832e-05, "loss": 6.9509, "step": 25260 }, { "epoch": 0.5901413466759302, "grad_norm": 5.3125, "learning_rate": 1.8975807438584642e-05, "loss": 6.9326, "step": 25270 }, { "epoch": 0.5903748810434315, "grad_norm": 4.3125, "learning_rate": 1.8957457052276358e-05, "loss": 6.9378, "step": 25280 }, { "epoch": 0.5906084154109329, "grad_norm": 6.0625, "learning_rate": 1.8939110122901693e-05, "loss": 6.9886, "step": 25290 }, { "epoch": 0.5908419497784343, "grad_norm": 4.875, "learning_rate": 1.8920766660956926e-05, "loss": 6.9778, "step": 25300 }, { "epoch": 0.5910754841459356, "grad_norm": 4.1875, "learning_rate": 1.8902426676936312e-05, "loss": 7.0374, "step": 25310 }, { "epoch": 0.591309018513437, "grad_norm": 4.9375, "learning_rate": 1.8884090181332163e-05, "loss": 6.9864, "step": 25320 }, { "epoch": 0.5915425528809384, "grad_norm": 6.0, "learning_rate": 1.886575718463476e-05, "loss": 6.9918, "step": 25330 }, { "epoch": 0.5917760872484397, "grad_norm": 5.25, "learning_rate": 1.8847427697332395e-05, "loss": 7.0038, "step": 25340 }, { "epoch": 0.5920096216159411, "grad_norm": 5.15625, "learning_rate": 1.8829101729911362e-05, "loss": 7.0035, "step": 25350 }, { "epoch": 0.5922431559834425, "grad_norm": 4.28125, "learning_rate": 1.8810779292855918e-05, "loss": 6.9801, "step": 25360 }, { "epoch": 0.5924766903509437, "grad_norm": 5.125, "learning_rate": 1.879246039664832e-05, "loss": 7.0127, "step": 25370 }, { "epoch": 0.5927102247184451, "grad_norm": 4.0625, "learning_rate": 1.8774145051768787e-05, "loss": 6.9431, "step": 25380 }, { "epoch": 0.5929437590859464, "grad_norm": 4.25, "learning_rate": 1.8755833268695532e-05, "loss": 6.9387, "step": 25390 }, { "epoch": 0.5931772934534478, "grad_norm": 3.59375, "learning_rate": 1.873752505790468e-05, "loss": 6.9527, "step": 25400 }, { "epoch": 0.5934108278209492, "grad_norm": 4.375, "learning_rate": 1.8719220429870366e-05, "loss": 6.9367, "step": 25410 }, { "epoch": 0.5936443621884505, "grad_norm": 4.21875, "learning_rate": 1.8700919395064648e-05, "loss": 6.9223, "step": 25420 }, { "epoch": 0.5938778965559519, "grad_norm": 5.3125, "learning_rate": 1.8682621963957515e-05, "loss": 6.972, "step": 25430 }, { "epoch": 0.5941114309234533, "grad_norm": 4.1875, "learning_rate": 1.8664328147016934e-05, "loss": 6.9459, "step": 25440 }, { "epoch": 0.5943449652909546, "grad_norm": 4.6875, "learning_rate": 1.8646037954708763e-05, "loss": 6.9895, "step": 25450 }, { "epoch": 0.594578499658456, "grad_norm": 4.5625, "learning_rate": 1.862775139749682e-05, "loss": 7.0071, "step": 25460 }, { "epoch": 0.5948120340259574, "grad_norm": 4.5, "learning_rate": 1.8609468485842826e-05, "loss": 6.9383, "step": 25470 }, { "epoch": 0.5950455683934587, "grad_norm": 3.8125, "learning_rate": 1.8591189230206424e-05, "loss": 6.9893, "step": 25480 }, { "epoch": 0.5952791027609601, "grad_norm": 4.875, "learning_rate": 1.8572913641045148e-05, "loss": 6.9702, "step": 25490 }, { "epoch": 0.5955126371284614, "grad_norm": 4.125, "learning_rate": 1.855464172881444e-05, "loss": 6.9785, "step": 25500 }, { "epoch": 0.5955126371284614, "eval_loss": 6.9691267013549805, "eval_runtime": 78.7112, "eval_samples_per_second": 12.705, "eval_steps_per_second": 12.705, "step": 25500 }, { "epoch": 0.5957461714959628, "grad_norm": 6.125, "learning_rate": 1.8536373503967677e-05, "loss": 6.9823, "step": 25510 }, { "epoch": 0.5959797058634642, "grad_norm": 4.34375, "learning_rate": 1.851810897695606e-05, "loss": 7.0174, "step": 25520 }, { "epoch": 0.5962132402309654, "grad_norm": 3.828125, "learning_rate": 1.8499848158228733e-05, "loss": 6.9699, "step": 25530 }, { "epoch": 0.5964467745984668, "grad_norm": 4.03125, "learning_rate": 1.8481591058232676e-05, "loss": 6.9858, "step": 25540 }, { "epoch": 0.5966803089659682, "grad_norm": 4.59375, "learning_rate": 1.8463337687412784e-05, "loss": 7.0105, "step": 25550 }, { "epoch": 0.5969138433334695, "grad_norm": 4.0, "learning_rate": 1.8445088056211774e-05, "loss": 7.0092, "step": 25560 }, { "epoch": 0.5971473777009709, "grad_norm": 4.03125, "learning_rate": 1.8426842175070248e-05, "loss": 6.9775, "step": 25570 }, { "epoch": 0.5973809120684723, "grad_norm": 4.0625, "learning_rate": 1.8408600054426666e-05, "loss": 6.9082, "step": 25580 }, { "epoch": 0.5976144464359736, "grad_norm": 3.484375, "learning_rate": 1.8390361704717317e-05, "loss": 7.0029, "step": 25590 }, { "epoch": 0.597847980803475, "grad_norm": 4.8125, "learning_rate": 1.837212713637637e-05, "loss": 7.0239, "step": 25600 }, { "epoch": 0.5980815151709763, "grad_norm": 4.9375, "learning_rate": 1.8353896359835777e-05, "loss": 7.0253, "step": 25610 }, { "epoch": 0.5983150495384777, "grad_norm": 4.46875, "learning_rate": 1.8335669385525365e-05, "loss": 6.957, "step": 25620 }, { "epoch": 0.5985485839059791, "grad_norm": 5.40625, "learning_rate": 1.8317446223872774e-05, "loss": 6.9207, "step": 25630 }, { "epoch": 0.5987821182734804, "grad_norm": 5.25, "learning_rate": 1.8299226885303448e-05, "loss": 6.9338, "step": 25640 }, { "epoch": 0.5990156526409818, "grad_norm": 4.375, "learning_rate": 1.828101138024066e-05, "loss": 6.9858, "step": 25650 }, { "epoch": 0.5992491870084832, "grad_norm": 4.09375, "learning_rate": 1.8262799719105478e-05, "loss": 6.9566, "step": 25660 }, { "epoch": 0.5994827213759845, "grad_norm": 4.3125, "learning_rate": 1.8244591912316795e-05, "loss": 6.8967, "step": 25670 }, { "epoch": 0.5997162557434859, "grad_norm": 4.875, "learning_rate": 1.8226387970291257e-05, "loss": 6.9143, "step": 25680 }, { "epoch": 0.5999497901109873, "grad_norm": 3.421875, "learning_rate": 1.8208187903443353e-05, "loss": 6.9532, "step": 25690 }, { "epoch": 0.6001833244784885, "grad_norm": 4.6875, "learning_rate": 1.8189991722185304e-05, "loss": 7.0583, "step": 25700 }, { "epoch": 0.6004168588459899, "grad_norm": 4.9375, "learning_rate": 1.817179943692713e-05, "loss": 6.9948, "step": 25710 }, { "epoch": 0.6006503932134912, "grad_norm": 4.21875, "learning_rate": 1.815361105807664e-05, "loss": 6.9291, "step": 25720 }, { "epoch": 0.6008839275809926, "grad_norm": 4.3125, "learning_rate": 1.8135426596039362e-05, "loss": 6.9238, "step": 25730 }, { "epoch": 0.601117461948494, "grad_norm": 4.59375, "learning_rate": 1.8117246061218638e-05, "loss": 6.9678, "step": 25740 }, { "epoch": 0.6013509963159953, "grad_norm": 4.6875, "learning_rate": 1.8099069464015514e-05, "loss": 6.999, "step": 25750 }, { "epoch": 0.6015845306834967, "grad_norm": 5.34375, "learning_rate": 1.808089681482883e-05, "loss": 6.8941, "step": 25760 }, { "epoch": 0.6018180650509981, "grad_norm": 4.25, "learning_rate": 1.8062728124055123e-05, "loss": 6.9529, "step": 25770 }, { "epoch": 0.6020515994184994, "grad_norm": 3.921875, "learning_rate": 1.8044563402088684e-05, "loss": 6.959, "step": 25780 }, { "epoch": 0.6022851337860008, "grad_norm": 5.8125, "learning_rate": 1.8026402659321555e-05, "loss": 6.9472, "step": 25790 }, { "epoch": 0.6025186681535022, "grad_norm": 3.84375, "learning_rate": 1.8008245906143454e-05, "loss": 6.9885, "step": 25800 }, { "epoch": 0.6027522025210035, "grad_norm": 4.4375, "learning_rate": 1.799009315294187e-05, "loss": 6.9306, "step": 25810 }, { "epoch": 0.6029857368885049, "grad_norm": 4.34375, "learning_rate": 1.7971944410101966e-05, "loss": 7.0107, "step": 25820 }, { "epoch": 0.6032192712560062, "grad_norm": 4.09375, "learning_rate": 1.7953799688006616e-05, "loss": 6.9826, "step": 25830 }, { "epoch": 0.6034528056235076, "grad_norm": 4.34375, "learning_rate": 1.793565899703642e-05, "loss": 6.9653, "step": 25840 }, { "epoch": 0.603686339991009, "grad_norm": 4.8125, "learning_rate": 1.7917522347569622e-05, "loss": 6.9202, "step": 25850 }, { "epoch": 0.6039198743585102, "grad_norm": 3.734375, "learning_rate": 1.789938974998221e-05, "loss": 6.9321, "step": 25860 }, { "epoch": 0.6041534087260116, "grad_norm": 4.90625, "learning_rate": 1.7881261214647803e-05, "loss": 6.9771, "step": 25870 }, { "epoch": 0.604386943093513, "grad_norm": 4.34375, "learning_rate": 1.7863136751937755e-05, "loss": 6.8702, "step": 25880 }, { "epoch": 0.6046204774610143, "grad_norm": 6.25, "learning_rate": 1.784501637222102e-05, "loss": 6.9755, "step": 25890 }, { "epoch": 0.6048540118285157, "grad_norm": 4.28125, "learning_rate": 1.782690008586427e-05, "loss": 6.955, "step": 25900 }, { "epoch": 0.6050875461960171, "grad_norm": 5.0625, "learning_rate": 1.780878790323182e-05, "loss": 6.9682, "step": 25910 }, { "epoch": 0.6053210805635184, "grad_norm": 4.6875, "learning_rate": 1.7790679834685614e-05, "loss": 6.9786, "step": 25920 }, { "epoch": 0.6055546149310198, "grad_norm": 4.9375, "learning_rate": 1.7772575890585292e-05, "loss": 6.966, "step": 25930 }, { "epoch": 0.6057881492985211, "grad_norm": 4.34375, "learning_rate": 1.7754476081288073e-05, "loss": 6.9786, "step": 25940 }, { "epoch": 0.6060216836660225, "grad_norm": 5.65625, "learning_rate": 1.7736380417148867e-05, "loss": 7.0204, "step": 25950 }, { "epoch": 0.6062552180335239, "grad_norm": 4.25, "learning_rate": 1.7718288908520177e-05, "loss": 6.9751, "step": 25960 }, { "epoch": 0.6064887524010252, "grad_norm": 5.09375, "learning_rate": 1.770020156575215e-05, "loss": 6.9965, "step": 25970 }, { "epoch": 0.6067222867685266, "grad_norm": 4.25, "learning_rate": 1.7682118399192527e-05, "loss": 6.9535, "step": 25980 }, { "epoch": 0.606955821136028, "grad_norm": 5.59375, "learning_rate": 1.7664039419186667e-05, "loss": 6.9616, "step": 25990 }, { "epoch": 0.6071893555035293, "grad_norm": 3.796875, "learning_rate": 1.764596463607756e-05, "loss": 6.964, "step": 26000 }, { "epoch": 0.6071893555035293, "eval_loss": 6.967626571655273, "eval_runtime": 78.6837, "eval_samples_per_second": 12.709, "eval_steps_per_second": 12.709, "step": 26000 }, { "epoch": 0.6074228898710307, "grad_norm": 4.34375, "learning_rate": 1.762789406020575e-05, "loss": 6.9732, "step": 26010 }, { "epoch": 0.6076564242385321, "grad_norm": 5.3125, "learning_rate": 1.760982770190942e-05, "loss": 6.9274, "step": 26020 }, { "epoch": 0.6078899586060333, "grad_norm": 5.40625, "learning_rate": 1.7591765571524298e-05, "loss": 6.9071, "step": 26030 }, { "epoch": 0.6081234929735347, "grad_norm": 5.65625, "learning_rate": 1.757370767938373e-05, "loss": 6.918, "step": 26040 }, { "epoch": 0.608357027341036, "grad_norm": 4.84375, "learning_rate": 1.755565403581862e-05, "loss": 7.0177, "step": 26050 }, { "epoch": 0.6085905617085374, "grad_norm": 4.5625, "learning_rate": 1.7537604651157422e-05, "loss": 6.9405, "step": 26060 }, { "epoch": 0.6088240960760388, "grad_norm": 4.90625, "learning_rate": 1.7519559535726193e-05, "loss": 7.0195, "step": 26070 }, { "epoch": 0.6090576304435401, "grad_norm": 4.46875, "learning_rate": 1.750151869984852e-05, "loss": 6.9814, "step": 26080 }, { "epoch": 0.6092911648110415, "grad_norm": 3.78125, "learning_rate": 1.748348215384556e-05, "loss": 6.9927, "step": 26090 }, { "epoch": 0.6095246991785429, "grad_norm": 4.625, "learning_rate": 1.7465449908035987e-05, "loss": 6.9723, "step": 26100 }, { "epoch": 0.6097582335460442, "grad_norm": 4.5, "learning_rate": 1.7447421972736044e-05, "loss": 6.9286, "step": 26110 }, { "epoch": 0.6099917679135456, "grad_norm": 4.46875, "learning_rate": 1.7429398358259508e-05, "loss": 6.9473, "step": 26120 }, { "epoch": 0.610225302281047, "grad_norm": 3.484375, "learning_rate": 1.741137907491765e-05, "loss": 6.958, "step": 26130 }, { "epoch": 0.6104588366485483, "grad_norm": 4.0625, "learning_rate": 1.73933641330193e-05, "loss": 7.0203, "step": 26140 }, { "epoch": 0.6106923710160497, "grad_norm": 5.375, "learning_rate": 1.7375353542870793e-05, "loss": 6.9764, "step": 26150 }, { "epoch": 0.610925905383551, "grad_norm": 3.734375, "learning_rate": 1.735734731477597e-05, "loss": 6.9833, "step": 26160 }, { "epoch": 0.6111594397510524, "grad_norm": 5.875, "learning_rate": 1.7339345459036165e-05, "loss": 6.9136, "step": 26170 }, { "epoch": 0.6113929741185538, "grad_norm": 4.0625, "learning_rate": 1.7321347985950255e-05, "loss": 6.9912, "step": 26180 }, { "epoch": 0.611626508486055, "grad_norm": 4.34375, "learning_rate": 1.7303354905814545e-05, "loss": 6.9567, "step": 26190 }, { "epoch": 0.6118600428535564, "grad_norm": 3.375, "learning_rate": 1.7285366228922884e-05, "loss": 7.0014, "step": 26200 }, { "epoch": 0.6120935772210578, "grad_norm": 5.09375, "learning_rate": 1.7267381965566577e-05, "loss": 6.9898, "step": 26210 }, { "epoch": 0.6123271115885591, "grad_norm": 4.28125, "learning_rate": 1.7249402126034386e-05, "loss": 6.9534, "step": 26220 }, { "epoch": 0.6125606459560605, "grad_norm": 5.28125, "learning_rate": 1.7231426720612582e-05, "loss": 6.9419, "step": 26230 }, { "epoch": 0.6127941803235619, "grad_norm": 5.09375, "learning_rate": 1.7213455759584863e-05, "loss": 6.9239, "step": 26240 }, { "epoch": 0.6130277146910632, "grad_norm": 4.71875, "learning_rate": 1.719548925323242e-05, "loss": 6.981, "step": 26250 }, { "epoch": 0.6132612490585646, "grad_norm": 4.53125, "learning_rate": 1.7177527211833856e-05, "loss": 6.9557, "step": 26260 }, { "epoch": 0.6134947834260659, "grad_norm": 4.28125, "learning_rate": 1.7159569645665256e-05, "loss": 6.9563, "step": 26270 }, { "epoch": 0.6137283177935673, "grad_norm": 4.0625, "learning_rate": 1.714161656500012e-05, "loss": 6.9663, "step": 26280 }, { "epoch": 0.6139618521610687, "grad_norm": 5.0625, "learning_rate": 1.7123667980109385e-05, "loss": 7.0277, "step": 26290 }, { "epoch": 0.61419538652857, "grad_norm": 4.625, "learning_rate": 1.7105723901261437e-05, "loss": 7.0361, "step": 26300 }, { "epoch": 0.6144289208960714, "grad_norm": 3.796875, "learning_rate": 1.7087784338722047e-05, "loss": 6.9991, "step": 26310 }, { "epoch": 0.6146624552635728, "grad_norm": 3.734375, "learning_rate": 1.706984930275444e-05, "loss": 6.9081, "step": 26320 }, { "epoch": 0.6148959896310741, "grad_norm": 5.28125, "learning_rate": 1.705191880361922e-05, "loss": 7.0471, "step": 26330 }, { "epoch": 0.6151295239985755, "grad_norm": 5.25, "learning_rate": 1.703399285157443e-05, "loss": 6.9806, "step": 26340 }, { "epoch": 0.6153630583660767, "grad_norm": 3.890625, "learning_rate": 1.7016071456875476e-05, "loss": 6.9124, "step": 26350 }, { "epoch": 0.6155965927335781, "grad_norm": 3.96875, "learning_rate": 1.6998154629775173e-05, "loss": 6.9872, "step": 26360 }, { "epoch": 0.6158301271010795, "grad_norm": 4.6875, "learning_rate": 1.6980242380523736e-05, "loss": 6.9459, "step": 26370 }, { "epoch": 0.6160636614685808, "grad_norm": 3.796875, "learning_rate": 1.6962334719368722e-05, "loss": 6.9361, "step": 26380 }, { "epoch": 0.6162971958360822, "grad_norm": 3.953125, "learning_rate": 1.694443165655512e-05, "loss": 7.0144, "step": 26390 }, { "epoch": 0.6165307302035836, "grad_norm": 5.65625, "learning_rate": 1.6926533202325236e-05, "loss": 6.9917, "step": 26400 }, { "epoch": 0.6167642645710849, "grad_norm": 4.09375, "learning_rate": 1.6908639366918765e-05, "loss": 7.0127, "step": 26410 }, { "epoch": 0.6169977989385863, "grad_norm": 5.03125, "learning_rate": 1.6890750160572762e-05, "loss": 6.9757, "step": 26420 }, { "epoch": 0.6172313333060877, "grad_norm": 4.75, "learning_rate": 1.6872865593521607e-05, "loss": 6.9353, "step": 26430 }, { "epoch": 0.617464867673589, "grad_norm": 4.15625, "learning_rate": 1.6854985675997066e-05, "loss": 6.9931, "step": 26440 }, { "epoch": 0.6176984020410904, "grad_norm": 4.625, "learning_rate": 1.6837110418228212e-05, "loss": 6.9575, "step": 26450 }, { "epoch": 0.6179319364085917, "grad_norm": 5.125, "learning_rate": 1.681923983044148e-05, "loss": 6.8986, "step": 26460 }, { "epoch": 0.6181654707760931, "grad_norm": 4.59375, "learning_rate": 1.6801373922860595e-05, "loss": 6.9853, "step": 26470 }, { "epoch": 0.6183990051435945, "grad_norm": 4.5, "learning_rate": 1.6783512705706644e-05, "loss": 7.0047, "step": 26480 }, { "epoch": 0.6186325395110958, "grad_norm": 4.46875, "learning_rate": 1.6765656189198013e-05, "loss": 6.9485, "step": 26490 }, { "epoch": 0.6188660738785972, "grad_norm": 3.828125, "learning_rate": 1.674780438355039e-05, "loss": 6.9349, "step": 26500 }, { "epoch": 0.6188660738785972, "eval_loss": 6.965602397918701, "eval_runtime": 78.7338, "eval_samples_per_second": 12.701, "eval_steps_per_second": 12.701, "step": 26500 }, { "epoch": 0.6190996082460986, "grad_norm": 6.21875, "learning_rate": 1.6729957298976796e-05, "loss": 6.9847, "step": 26510 }, { "epoch": 0.6193331426135998, "grad_norm": 4.6875, "learning_rate": 1.671211494568751e-05, "loss": 6.961, "step": 26520 }, { "epoch": 0.6195666769811012, "grad_norm": 4.03125, "learning_rate": 1.6694277333890136e-05, "loss": 6.9829, "step": 26530 }, { "epoch": 0.6198002113486026, "grad_norm": 5.40625, "learning_rate": 1.667644447378956e-05, "loss": 7.0187, "step": 26540 }, { "epoch": 0.6200337457161039, "grad_norm": 4.71875, "learning_rate": 1.665861637558795e-05, "loss": 6.9146, "step": 26550 }, { "epoch": 0.6202672800836053, "grad_norm": 5.09375, "learning_rate": 1.6640793049484738e-05, "loss": 7.0055, "step": 26560 }, { "epoch": 0.6205008144511066, "grad_norm": 7.03125, "learning_rate": 1.6622974505676632e-05, "loss": 6.9417, "step": 26570 }, { "epoch": 0.620734348818608, "grad_norm": 4.09375, "learning_rate": 1.6605160754357614e-05, "loss": 6.8958, "step": 26580 }, { "epoch": 0.6209678831861094, "grad_norm": 4.59375, "learning_rate": 1.6587351805718897e-05, "loss": 7.0206, "step": 26590 }, { "epoch": 0.6212014175536107, "grad_norm": 4.59375, "learning_rate": 1.6569547669948997e-05, "loss": 7.0279, "step": 26600 }, { "epoch": 0.6214349519211121, "grad_norm": 5.96875, "learning_rate": 1.6551748357233615e-05, "loss": 7.0144, "step": 26610 }, { "epoch": 0.6216684862886135, "grad_norm": 4.1875, "learning_rate": 1.6533953877755736e-05, "loss": 6.9701, "step": 26620 }, { "epoch": 0.6219020206561148, "grad_norm": 4.8125, "learning_rate": 1.6516164241695574e-05, "loss": 7.0158, "step": 26630 }, { "epoch": 0.6221355550236162, "grad_norm": 3.890625, "learning_rate": 1.6498379459230544e-05, "loss": 6.9508, "step": 26640 }, { "epoch": 0.6223690893911176, "grad_norm": 4.59375, "learning_rate": 1.648059954053532e-05, "loss": 6.9296, "step": 26650 }, { "epoch": 0.6226026237586189, "grad_norm": 4.8125, "learning_rate": 1.646282449578177e-05, "loss": 6.9156, "step": 26660 }, { "epoch": 0.6228361581261203, "grad_norm": 4.5, "learning_rate": 1.6445054335138994e-05, "loss": 7.0256, "step": 26670 }, { "epoch": 0.6230696924936215, "grad_norm": 5.53125, "learning_rate": 1.6427289068773266e-05, "loss": 6.9002, "step": 26680 }, { "epoch": 0.6233032268611229, "grad_norm": 4.6875, "learning_rate": 1.64095287068481e-05, "loss": 6.9958, "step": 26690 }, { "epoch": 0.6235367612286243, "grad_norm": 5.25, "learning_rate": 1.639177325952418e-05, "loss": 6.9646, "step": 26700 }, { "epoch": 0.6237702955961256, "grad_norm": 4.71875, "learning_rate": 1.637402273695936e-05, "loss": 6.9095, "step": 26710 }, { "epoch": 0.624003829963627, "grad_norm": 4.84375, "learning_rate": 1.635627714930873e-05, "loss": 6.9504, "step": 26720 }, { "epoch": 0.6242373643311284, "grad_norm": 3.609375, "learning_rate": 1.63385365067245e-05, "loss": 6.964, "step": 26730 }, { "epoch": 0.6244708986986297, "grad_norm": 4.46875, "learning_rate": 1.6320800819356096e-05, "loss": 6.9255, "step": 26740 }, { "epoch": 0.6247044330661311, "grad_norm": 4.90625, "learning_rate": 1.6303070097350074e-05, "loss": 6.9873, "step": 26750 }, { "epoch": 0.6249379674336325, "grad_norm": 5.21875, "learning_rate": 1.6285344350850183e-05, "loss": 6.8976, "step": 26760 }, { "epoch": 0.6251715018011338, "grad_norm": 4.65625, "learning_rate": 1.626762358999729e-05, "loss": 6.9568, "step": 26770 }, { "epoch": 0.6254050361686352, "grad_norm": 3.96875, "learning_rate": 1.6249907824929427e-05, "loss": 6.8977, "step": 26780 }, { "epoch": 0.6256385705361365, "grad_norm": 4.75, "learning_rate": 1.6232197065781783e-05, "loss": 6.9602, "step": 26790 }, { "epoch": 0.6258721049036379, "grad_norm": 4.8125, "learning_rate": 1.6214491322686655e-05, "loss": 6.9572, "step": 26800 }, { "epoch": 0.6261056392711393, "grad_norm": 3.375, "learning_rate": 1.6196790605773488e-05, "loss": 6.966, "step": 26810 }, { "epoch": 0.6263391736386406, "grad_norm": 4.34375, "learning_rate": 1.6179094925168846e-05, "loss": 6.9069, "step": 26820 }, { "epoch": 0.626572708006142, "grad_norm": 3.921875, "learning_rate": 1.6161404290996412e-05, "loss": 6.9301, "step": 26830 }, { "epoch": 0.6268062423736434, "grad_norm": 4.71875, "learning_rate": 1.6143718713376995e-05, "loss": 6.9303, "step": 26840 }, { "epoch": 0.6270397767411446, "grad_norm": 4.28125, "learning_rate": 1.6126038202428472e-05, "loss": 6.9641, "step": 26850 }, { "epoch": 0.627273311108646, "grad_norm": 4.8125, "learning_rate": 1.6108362768265872e-05, "loss": 6.9618, "step": 26860 }, { "epoch": 0.6275068454761474, "grad_norm": 4.96875, "learning_rate": 1.609069242100128e-05, "loss": 6.9254, "step": 26870 }, { "epoch": 0.6277403798436487, "grad_norm": 3.984375, "learning_rate": 1.607302717074391e-05, "loss": 6.9969, "step": 26880 }, { "epoch": 0.6279739142111501, "grad_norm": 5.46875, "learning_rate": 1.6055367027600005e-05, "loss": 6.9138, "step": 26890 }, { "epoch": 0.6282074485786514, "grad_norm": 5.9375, "learning_rate": 1.6037712001672938e-05, "loss": 6.9968, "step": 26900 }, { "epoch": 0.6284409829461528, "grad_norm": 4.625, "learning_rate": 1.6020062103063132e-05, "loss": 6.9945, "step": 26910 }, { "epoch": 0.6286745173136542, "grad_norm": 4.65625, "learning_rate": 1.600241734186807e-05, "loss": 6.9484, "step": 26920 }, { "epoch": 0.6289080516811555, "grad_norm": 4.03125, "learning_rate": 1.5984777728182316e-05, "loss": 6.9871, "step": 26930 }, { "epoch": 0.6291415860486569, "grad_norm": 4.15625, "learning_rate": 1.596714327209747e-05, "loss": 6.8404, "step": 26940 }, { "epoch": 0.6293751204161583, "grad_norm": 4.6875, "learning_rate": 1.594951398370219e-05, "loss": 6.9884, "step": 26950 }, { "epoch": 0.6296086547836596, "grad_norm": 4.65625, "learning_rate": 1.5931889873082177e-05, "loss": 6.9489, "step": 26960 }, { "epoch": 0.629842189151161, "grad_norm": 4.46875, "learning_rate": 1.5914270950320178e-05, "loss": 6.9424, "step": 26970 }, { "epoch": 0.6300757235186624, "grad_norm": 4.65625, "learning_rate": 1.5896657225495948e-05, "loss": 6.9551, "step": 26980 }, { "epoch": 0.6303092578861637, "grad_norm": 5.15625, "learning_rate": 1.58790487086863e-05, "loss": 6.9584, "step": 26990 }, { "epoch": 0.630542792253665, "grad_norm": 5.5625, "learning_rate": 1.5861445409965048e-05, "loss": 6.9337, "step": 27000 }, { "epoch": 0.630542792253665, "eval_loss": 6.964228630065918, "eval_runtime": 78.7267, "eval_samples_per_second": 12.702, "eval_steps_per_second": 12.702, "step": 27000 }, { "epoch": 0.6307763266211663, "grad_norm": 4.5625, "learning_rate": 1.5843847339403006e-05, "loss": 6.9519, "step": 27010 }, { "epoch": 0.6310098609886677, "grad_norm": 4.25, "learning_rate": 1.582625450706804e-05, "loss": 6.9939, "step": 27020 }, { "epoch": 0.6312433953561691, "grad_norm": 5.125, "learning_rate": 1.5808666923024973e-05, "loss": 7.0106, "step": 27030 }, { "epoch": 0.6314769297236704, "grad_norm": 5.40625, "learning_rate": 1.579108459733567e-05, "loss": 7.0314, "step": 27040 }, { "epoch": 0.6317104640911718, "grad_norm": 4.15625, "learning_rate": 1.5773507540058934e-05, "loss": 6.9136, "step": 27050 }, { "epoch": 0.6319439984586732, "grad_norm": 3.890625, "learning_rate": 1.575593576125062e-05, "loss": 6.9882, "step": 27060 }, { "epoch": 0.6321775328261745, "grad_norm": 4.15625, "learning_rate": 1.5738369270963495e-05, "loss": 6.9366, "step": 27070 }, { "epoch": 0.6324110671936759, "grad_norm": 4.3125, "learning_rate": 1.572080807924735e-05, "loss": 6.908, "step": 27080 }, { "epoch": 0.6326446015611773, "grad_norm": 3.703125, "learning_rate": 1.5703252196148932e-05, "loss": 7.0789, "step": 27090 }, { "epoch": 0.6328781359286786, "grad_norm": 3.90625, "learning_rate": 1.568570163171193e-05, "loss": 6.9423, "step": 27100 }, { "epoch": 0.63311167029618, "grad_norm": 4.65625, "learning_rate": 1.5668156395977023e-05, "loss": 7.0353, "step": 27110 }, { "epoch": 0.6333452046636813, "grad_norm": 4.21875, "learning_rate": 1.5650616498981813e-05, "loss": 6.9787, "step": 27120 }, { "epoch": 0.6335787390311827, "grad_norm": 3.9375, "learning_rate": 1.5633081950760872e-05, "loss": 6.9312, "step": 27130 }, { "epoch": 0.6338122733986841, "grad_norm": 5.0, "learning_rate": 1.561555276134569e-05, "loss": 6.956, "step": 27140 }, { "epoch": 0.6340458077661854, "grad_norm": 5.1875, "learning_rate": 1.5598028940764704e-05, "loss": 6.9767, "step": 27150 }, { "epoch": 0.6342793421336868, "grad_norm": 4.6875, "learning_rate": 1.5580510499043283e-05, "loss": 6.9804, "step": 27160 }, { "epoch": 0.6345128765011881, "grad_norm": 5.5, "learning_rate": 1.556299744620369e-05, "loss": 6.9084, "step": 27170 }, { "epoch": 0.6347464108686894, "grad_norm": 4.78125, "learning_rate": 1.554548979226516e-05, "loss": 7.0211, "step": 27180 }, { "epoch": 0.6349799452361908, "grad_norm": 5.0625, "learning_rate": 1.5527987547243785e-05, "loss": 6.9236, "step": 27190 }, { "epoch": 0.6352134796036922, "grad_norm": 5.25, "learning_rate": 1.5510490721152592e-05, "loss": 6.9681, "step": 27200 }, { "epoch": 0.6354470139711935, "grad_norm": 4.71875, "learning_rate": 1.5492999324001506e-05, "loss": 6.9815, "step": 27210 }, { "epoch": 0.6356805483386949, "grad_norm": 6.15625, "learning_rate": 1.5475513365797318e-05, "loss": 6.9357, "step": 27220 }, { "epoch": 0.6359140827061962, "grad_norm": 4.5, "learning_rate": 1.5458032856543753e-05, "loss": 6.9274, "step": 27230 }, { "epoch": 0.6361476170736976, "grad_norm": 3.890625, "learning_rate": 1.544055780624138e-05, "loss": 6.9743, "step": 27240 }, { "epoch": 0.636381151441199, "grad_norm": 5.34375, "learning_rate": 1.542308822488768e-05, "loss": 6.9741, "step": 27250 }, { "epoch": 0.6366146858087003, "grad_norm": 3.921875, "learning_rate": 1.5405624122476962e-05, "loss": 6.9811, "step": 27260 }, { "epoch": 0.6368482201762017, "grad_norm": 3.8125, "learning_rate": 1.5388165509000435e-05, "loss": 7.0043, "step": 27270 }, { "epoch": 0.6370817545437031, "grad_norm": 4.71875, "learning_rate": 1.5370712394446165e-05, "loss": 6.9303, "step": 27280 }, { "epoch": 0.6373152889112044, "grad_norm": 3.546875, "learning_rate": 1.5353264788799038e-05, "loss": 6.9912, "step": 27290 }, { "epoch": 0.6375488232787058, "grad_norm": 4.3125, "learning_rate": 1.5335822702040846e-05, "loss": 6.9379, "step": 27300 }, { "epoch": 0.6377823576462072, "grad_norm": 3.75, "learning_rate": 1.531838614415017e-05, "loss": 6.9566, "step": 27310 }, { "epoch": 0.6380158920137085, "grad_norm": 5.125, "learning_rate": 1.530095512510246e-05, "loss": 6.9488, "step": 27320 }, { "epoch": 0.6382494263812099, "grad_norm": 4.3125, "learning_rate": 1.5283529654869984e-05, "loss": 6.9774, "step": 27330 }, { "epoch": 0.6384829607487111, "grad_norm": 4.96875, "learning_rate": 1.526610974342184e-05, "loss": 6.9633, "step": 27340 }, { "epoch": 0.6387164951162125, "grad_norm": 4.625, "learning_rate": 1.5248695400723942e-05, "loss": 6.9781, "step": 27350 }, { "epoch": 0.6389500294837139, "grad_norm": 5.28125, "learning_rate": 1.5231286636739015e-05, "loss": 6.9961, "step": 27360 }, { "epoch": 0.6391835638512152, "grad_norm": 3.703125, "learning_rate": 1.5213883461426614e-05, "loss": 6.9977, "step": 27370 }, { "epoch": 0.6394170982187166, "grad_norm": 7.1875, "learning_rate": 1.519648588474306e-05, "loss": 6.8759, "step": 27380 }, { "epoch": 0.639650632586218, "grad_norm": 5.40625, "learning_rate": 1.517909391664151e-05, "loss": 6.9476, "step": 27390 }, { "epoch": 0.6398841669537193, "grad_norm": 5.3125, "learning_rate": 1.5161707567071882e-05, "loss": 6.9036, "step": 27400 }, { "epoch": 0.6401177013212207, "grad_norm": 3.421875, "learning_rate": 1.5144326845980896e-05, "loss": 7.0057, "step": 27410 }, { "epoch": 0.640351235688722, "grad_norm": 4.0625, "learning_rate": 1.5126951763312055e-05, "loss": 6.9748, "step": 27420 }, { "epoch": 0.6405847700562234, "grad_norm": 4.4375, "learning_rate": 1.5109582329005612e-05, "loss": 7.0432, "step": 27430 }, { "epoch": 0.6408183044237248, "grad_norm": 5.0, "learning_rate": 1.5092218552998621e-05, "loss": 6.9762, "step": 27440 }, { "epoch": 0.6410518387912261, "grad_norm": 5.09375, "learning_rate": 1.507486044522487e-05, "loss": 6.9876, "step": 27450 }, { "epoch": 0.6412853731587275, "grad_norm": 5.125, "learning_rate": 1.5057508015614939e-05, "loss": 6.9235, "step": 27460 }, { "epoch": 0.6415189075262289, "grad_norm": 4.40625, "learning_rate": 1.504016127409611e-05, "loss": 6.9835, "step": 27470 }, { "epoch": 0.6417524418937302, "grad_norm": 4.40625, "learning_rate": 1.5022820230592457e-05, "loss": 6.9415, "step": 27480 }, { "epoch": 0.6419859762612316, "grad_norm": 3.8125, "learning_rate": 1.5005484895024779e-05, "loss": 7.0018, "step": 27490 }, { "epoch": 0.642219510628733, "grad_norm": 4.0, "learning_rate": 1.498815527731059e-05, "loss": 6.9478, "step": 27500 }, { "epoch": 0.642219510628733, "eval_loss": 6.9629693031311035, "eval_runtime": 79.055, "eval_samples_per_second": 12.649, "eval_steps_per_second": 12.649, "step": 27500 }, { "epoch": 0.6424530449962342, "grad_norm": 4.28125, "learning_rate": 1.4970831387364163e-05, "loss": 6.9186, "step": 27510 }, { "epoch": 0.6426865793637356, "grad_norm": 3.890625, "learning_rate": 1.495351323509647e-05, "loss": 6.9031, "step": 27520 }, { "epoch": 0.6429201137312369, "grad_norm": 4.25, "learning_rate": 1.4936200830415222e-05, "loss": 7.0371, "step": 27530 }, { "epoch": 0.6431536480987383, "grad_norm": 4.28125, "learning_rate": 1.4918894183224825e-05, "loss": 6.9364, "step": 27540 }, { "epoch": 0.6433871824662397, "grad_norm": 3.8125, "learning_rate": 1.4901593303426403e-05, "loss": 6.9126, "step": 27550 }, { "epoch": 0.643620716833741, "grad_norm": 5.46875, "learning_rate": 1.4884298200917767e-05, "loss": 6.9264, "step": 27560 }, { "epoch": 0.6438542512012424, "grad_norm": 4.28125, "learning_rate": 1.4867008885593431e-05, "loss": 6.9388, "step": 27570 }, { "epoch": 0.6440877855687438, "grad_norm": 5.0625, "learning_rate": 1.4849725367344606e-05, "loss": 6.9052, "step": 27580 }, { "epoch": 0.6443213199362451, "grad_norm": 4.71875, "learning_rate": 1.4832447656059162e-05, "loss": 7.0697, "step": 27590 }, { "epoch": 0.6445548543037465, "grad_norm": 3.625, "learning_rate": 1.4815175761621675e-05, "loss": 6.913, "step": 27600 }, { "epoch": 0.6447883886712479, "grad_norm": 4.40625, "learning_rate": 1.4797909693913376e-05, "loss": 6.9693, "step": 27610 }, { "epoch": 0.6450219230387492, "grad_norm": 4.625, "learning_rate": 1.478064946281218e-05, "loss": 6.9017, "step": 27620 }, { "epoch": 0.6452554574062506, "grad_norm": 5.96875, "learning_rate": 1.4763395078192631e-05, "loss": 6.9561, "step": 27630 }, { "epoch": 0.6454889917737519, "grad_norm": 4.4375, "learning_rate": 1.4746146549925955e-05, "loss": 7.0072, "step": 27640 }, { "epoch": 0.6457225261412533, "grad_norm": 4.96875, "learning_rate": 1.4728903887880024e-05, "loss": 6.959, "step": 27650 }, { "epoch": 0.6459560605087546, "grad_norm": 4.4375, "learning_rate": 1.4711667101919346e-05, "loss": 7.025, "step": 27660 }, { "epoch": 0.6461895948762559, "grad_norm": 4.0625, "learning_rate": 1.4694436201905082e-05, "loss": 6.9861, "step": 27670 }, { "epoch": 0.6464231292437573, "grad_norm": 3.640625, "learning_rate": 1.4677211197694995e-05, "loss": 7.0175, "step": 27680 }, { "epoch": 0.6466566636112587, "grad_norm": 4.125, "learning_rate": 1.465999209914351e-05, "loss": 6.9716, "step": 27690 }, { "epoch": 0.64689019797876, "grad_norm": 5.1875, "learning_rate": 1.4642778916101663e-05, "loss": 6.9101, "step": 27700 }, { "epoch": 0.6471237323462614, "grad_norm": 4.09375, "learning_rate": 1.462557165841708e-05, "loss": 6.9266, "step": 27710 }, { "epoch": 0.6473572667137628, "grad_norm": 3.59375, "learning_rate": 1.4608370335934041e-05, "loss": 6.9575, "step": 27720 }, { "epoch": 0.6475908010812641, "grad_norm": 3.703125, "learning_rate": 1.4591174958493392e-05, "loss": 6.9697, "step": 27730 }, { "epoch": 0.6478243354487655, "grad_norm": 4.96875, "learning_rate": 1.4573985535932608e-05, "loss": 6.9509, "step": 27740 }, { "epoch": 0.6480578698162668, "grad_norm": 4.3125, "learning_rate": 1.4556802078085724e-05, "loss": 6.9387, "step": 27750 }, { "epoch": 0.6482914041837682, "grad_norm": 4.28125, "learning_rate": 1.4539624594783394e-05, "loss": 6.9449, "step": 27760 }, { "epoch": 0.6485249385512696, "grad_norm": 5.03125, "learning_rate": 1.452245309585285e-05, "loss": 6.9071, "step": 27770 }, { "epoch": 0.6487584729187709, "grad_norm": 4.1875, "learning_rate": 1.4505287591117873e-05, "loss": 6.9787, "step": 27780 }, { "epoch": 0.6489920072862723, "grad_norm": 5.0625, "learning_rate": 1.4488128090398845e-05, "loss": 6.9749, "step": 27790 }, { "epoch": 0.6492255416537737, "grad_norm": 4.71875, "learning_rate": 1.4470974603512694e-05, "loss": 6.9648, "step": 27800 }, { "epoch": 0.649459076021275, "grad_norm": 5.09375, "learning_rate": 1.445382714027293e-05, "loss": 7.0137, "step": 27810 }, { "epoch": 0.6496926103887763, "grad_norm": 5.46875, "learning_rate": 1.4436685710489579e-05, "loss": 6.939, "step": 27820 }, { "epoch": 0.6499261447562777, "grad_norm": 4.375, "learning_rate": 1.4419550323969253e-05, "loss": 6.9864, "step": 27830 }, { "epoch": 0.650159679123779, "grad_norm": 5.15625, "learning_rate": 1.4402420990515086e-05, "loss": 6.9599, "step": 27840 }, { "epoch": 0.6503932134912804, "grad_norm": 4.21875, "learning_rate": 1.4385297719926774e-05, "loss": 6.9909, "step": 27850 }, { "epoch": 0.6506267478587817, "grad_norm": 4.1875, "learning_rate": 1.4368180522000508e-05, "loss": 6.9468, "step": 27860 }, { "epoch": 0.6508602822262831, "grad_norm": 5.25, "learning_rate": 1.435106940652901e-05, "loss": 6.9981, "step": 27870 }, { "epoch": 0.6510938165937845, "grad_norm": 5.03125, "learning_rate": 1.4333964383301557e-05, "loss": 6.9424, "step": 27880 }, { "epoch": 0.6513273509612858, "grad_norm": 4.65625, "learning_rate": 1.4316865462103904e-05, "loss": 7.0079, "step": 27890 }, { "epoch": 0.6515608853287872, "grad_norm": 3.84375, "learning_rate": 1.4299772652718346e-05, "loss": 6.9848, "step": 27900 }, { "epoch": 0.6517944196962886, "grad_norm": 5.0, "learning_rate": 1.4282685964923642e-05, "loss": 6.9529, "step": 27910 }, { "epoch": 0.6520279540637899, "grad_norm": 4.46875, "learning_rate": 1.4265605408495098e-05, "loss": 6.9447, "step": 27920 }, { "epoch": 0.6522614884312913, "grad_norm": 4.03125, "learning_rate": 1.4248530993204465e-05, "loss": 7.0214, "step": 27930 }, { "epoch": 0.6524950227987927, "grad_norm": 3.5, "learning_rate": 1.423146272882e-05, "loss": 6.96, "step": 27940 }, { "epoch": 0.652728557166294, "grad_norm": 5.21875, "learning_rate": 1.4214400625106445e-05, "loss": 7.0326, "step": 27950 }, { "epoch": 0.6529620915337954, "grad_norm": 5.71875, "learning_rate": 1.4197344691825023e-05, "loss": 6.9979, "step": 27960 }, { "epoch": 0.6531956259012967, "grad_norm": 5.09375, "learning_rate": 1.4180294938733424e-05, "loss": 6.9189, "step": 27970 }, { "epoch": 0.653429160268798, "grad_norm": 3.8125, "learning_rate": 1.4163251375585779e-05, "loss": 6.9197, "step": 27980 }, { "epoch": 0.6536626946362994, "grad_norm": 5.65625, "learning_rate": 1.4146214012132714e-05, "loss": 6.9294, "step": 27990 }, { "epoch": 0.6538962290038007, "grad_norm": 4.375, "learning_rate": 1.412918285812127e-05, "loss": 7.0045, "step": 28000 }, { "epoch": 0.6538962290038007, "eval_loss": 6.961223602294922, "eval_runtime": 78.7849, "eval_samples_per_second": 12.693, "eval_steps_per_second": 12.693, "step": 28000 }, { "epoch": 0.6541297633713021, "grad_norm": 4.75, "learning_rate": 1.4112157923294967e-05, "loss": 7.0021, "step": 28010 }, { "epoch": 0.6543632977388035, "grad_norm": 4.125, "learning_rate": 1.4095139217393764e-05, "loss": 7.0028, "step": 28020 }, { "epoch": 0.6545968321063048, "grad_norm": 3.890625, "learning_rate": 1.4078126750154028e-05, "loss": 6.9884, "step": 28030 }, { "epoch": 0.6548303664738062, "grad_norm": 4.96875, "learning_rate": 1.4061120531308597e-05, "loss": 6.9467, "step": 28040 }, { "epoch": 0.6550639008413076, "grad_norm": 4.59375, "learning_rate": 1.4044120570586699e-05, "loss": 6.9543, "step": 28050 }, { "epoch": 0.6552974352088089, "grad_norm": 4.28125, "learning_rate": 1.4027126877714008e-05, "loss": 6.9651, "step": 28060 }, { "epoch": 0.6555309695763103, "grad_norm": 4.75, "learning_rate": 1.401013946241259e-05, "loss": 6.9638, "step": 28070 }, { "epoch": 0.6557645039438116, "grad_norm": 4.71875, "learning_rate": 1.3993158334400935e-05, "loss": 6.9187, "step": 28080 }, { "epoch": 0.655998038311313, "grad_norm": 4.875, "learning_rate": 1.3976183503393944e-05, "loss": 6.9886, "step": 28090 }, { "epoch": 0.6562315726788144, "grad_norm": 5.8125, "learning_rate": 1.3959214979102884e-05, "loss": 6.8894, "step": 28100 }, { "epoch": 0.6564651070463157, "grad_norm": 4.9375, "learning_rate": 1.3942252771235447e-05, "loss": 6.9976, "step": 28110 }, { "epoch": 0.6566986414138171, "grad_norm": 4.34375, "learning_rate": 1.3925296889495687e-05, "loss": 6.9576, "step": 28120 }, { "epoch": 0.6569321757813185, "grad_norm": 6.28125, "learning_rate": 1.3908347343584055e-05, "loss": 6.9435, "step": 28130 }, { "epoch": 0.6571657101488197, "grad_norm": 4.34375, "learning_rate": 1.3891404143197379e-05, "loss": 6.9443, "step": 28140 }, { "epoch": 0.6573992445163211, "grad_norm": 4.25, "learning_rate": 1.3874467298028831e-05, "loss": 6.9104, "step": 28150 }, { "epoch": 0.6576327788838225, "grad_norm": 4.75, "learning_rate": 1.3857536817767985e-05, "loss": 7.0004, "step": 28160 }, { "epoch": 0.6578663132513238, "grad_norm": 3.71875, "learning_rate": 1.3840612712100736e-05, "loss": 6.922, "step": 28170 }, { "epoch": 0.6580998476188252, "grad_norm": 5.375, "learning_rate": 1.3823694990709374e-05, "loss": 6.9618, "step": 28180 }, { "epoch": 0.6583333819863265, "grad_norm": 5.53125, "learning_rate": 1.3806783663272476e-05, "loss": 6.932, "step": 28190 }, { "epoch": 0.6585669163538279, "grad_norm": 5.1875, "learning_rate": 1.378987873946504e-05, "loss": 6.924, "step": 28200 }, { "epoch": 0.6588004507213293, "grad_norm": 5.03125, "learning_rate": 1.3772980228958337e-05, "loss": 6.9063, "step": 28210 }, { "epoch": 0.6590339850888306, "grad_norm": 4.09375, "learning_rate": 1.3756088141419986e-05, "loss": 6.9935, "step": 28220 }, { "epoch": 0.659267519456332, "grad_norm": 5.0, "learning_rate": 1.373920248651395e-05, "loss": 6.954, "step": 28230 }, { "epoch": 0.6595010538238334, "grad_norm": 5.125, "learning_rate": 1.3722323273900479e-05, "loss": 7.0409, "step": 28240 }, { "epoch": 0.6597345881913347, "grad_norm": 5.375, "learning_rate": 1.3705450513236168e-05, "loss": 6.9218, "step": 28250 }, { "epoch": 0.6599681225588361, "grad_norm": 5.15625, "learning_rate": 1.3688584214173905e-05, "loss": 7.0018, "step": 28260 }, { "epoch": 0.6602016569263375, "grad_norm": 4.78125, "learning_rate": 1.36717243863629e-05, "loss": 7.0019, "step": 28270 }, { "epoch": 0.6604351912938388, "grad_norm": 4.28125, "learning_rate": 1.3654871039448636e-05, "loss": 6.9818, "step": 28280 }, { "epoch": 0.6606687256613402, "grad_norm": 4.875, "learning_rate": 1.3638024183072881e-05, "loss": 6.9079, "step": 28290 }, { "epoch": 0.6609022600288414, "grad_norm": 5.5625, "learning_rate": 1.3621183826873735e-05, "loss": 6.9825, "step": 28300 }, { "epoch": 0.6611357943963428, "grad_norm": 4.625, "learning_rate": 1.3604349980485514e-05, "loss": 6.8982, "step": 28310 }, { "epoch": 0.6613693287638442, "grad_norm": 4.125, "learning_rate": 1.3587522653538897e-05, "loss": 6.9596, "step": 28320 }, { "epoch": 0.6616028631313455, "grad_norm": 4.6875, "learning_rate": 1.3570701855660745e-05, "loss": 6.9415, "step": 28330 }, { "epoch": 0.6618363974988469, "grad_norm": 6.09375, "learning_rate": 1.3553887596474246e-05, "loss": 6.9821, "step": 28340 }, { "epoch": 0.6620699318663483, "grad_norm": 3.796875, "learning_rate": 1.3537079885598808e-05, "loss": 6.9592, "step": 28350 }, { "epoch": 0.6623034662338496, "grad_norm": 4.09375, "learning_rate": 1.3520278732650104e-05, "loss": 6.9377, "step": 28360 }, { "epoch": 0.662537000601351, "grad_norm": 4.03125, "learning_rate": 1.350348414724007e-05, "loss": 6.9251, "step": 28370 }, { "epoch": 0.6627705349688523, "grad_norm": 4.59375, "learning_rate": 1.3486696138976868e-05, "loss": 6.9633, "step": 28380 }, { "epoch": 0.6630040693363537, "grad_norm": 5.21875, "learning_rate": 1.3469914717464916e-05, "loss": 7.0398, "step": 28390 }, { "epoch": 0.6632376037038551, "grad_norm": 4.1875, "learning_rate": 1.345313989230483e-05, "loss": 6.9814, "step": 28400 }, { "epoch": 0.6634711380713564, "grad_norm": 5.40625, "learning_rate": 1.3436371673093495e-05, "loss": 6.9706, "step": 28410 }, { "epoch": 0.6637046724388578, "grad_norm": 3.8125, "learning_rate": 1.3419610069423983e-05, "loss": 6.9653, "step": 28420 }, { "epoch": 0.6639382068063592, "grad_norm": 4.46875, "learning_rate": 1.3402855090885575e-05, "loss": 6.9519, "step": 28430 }, { "epoch": 0.6641717411738605, "grad_norm": 3.875, "learning_rate": 1.3386106747063815e-05, "loss": 6.9976, "step": 28440 }, { "epoch": 0.6644052755413619, "grad_norm": 3.84375, "learning_rate": 1.3369365047540394e-05, "loss": 6.9696, "step": 28450 }, { "epoch": 0.6646388099088633, "grad_norm": 3.8125, "learning_rate": 1.3352630001893234e-05, "loss": 6.9455, "step": 28460 }, { "epoch": 0.6648723442763645, "grad_norm": 4.3125, "learning_rate": 1.3335901619696431e-05, "loss": 6.991, "step": 28470 }, { "epoch": 0.6651058786438659, "grad_norm": 3.671875, "learning_rate": 1.3319179910520291e-05, "loss": 6.9966, "step": 28480 }, { "epoch": 0.6653394130113672, "grad_norm": 4.09375, "learning_rate": 1.3302464883931271e-05, "loss": 6.9623, "step": 28490 }, { "epoch": 0.6655729473788686, "grad_norm": 5.875, "learning_rate": 1.3285756549492032e-05, "loss": 6.9585, "step": 28500 }, { "epoch": 0.6655729473788686, "eval_loss": 6.959700584411621, "eval_runtime": 78.7316, "eval_samples_per_second": 12.701, "eval_steps_per_second": 12.701, "step": 28500 }, { "epoch": 0.66580648174637, "grad_norm": 4.75, "learning_rate": 1.3269054916761408e-05, "loss": 6.9693, "step": 28510 }, { "epoch": 0.6660400161138713, "grad_norm": 4.5, "learning_rate": 1.3252359995294375e-05, "loss": 7.0535, "step": 28520 }, { "epoch": 0.6662735504813727, "grad_norm": 4.03125, "learning_rate": 1.3235671794642096e-05, "loss": 7.0206, "step": 28530 }, { "epoch": 0.6665070848488741, "grad_norm": 4.3125, "learning_rate": 1.321899032435186e-05, "loss": 6.9569, "step": 28540 }, { "epoch": 0.6667406192163754, "grad_norm": 4.40625, "learning_rate": 1.3202315593967126e-05, "loss": 6.9249, "step": 28550 }, { "epoch": 0.6669741535838768, "grad_norm": 3.71875, "learning_rate": 1.318564761302751e-05, "loss": 6.9262, "step": 28560 }, { "epoch": 0.6672076879513782, "grad_norm": 4.1875, "learning_rate": 1.3168986391068727e-05, "loss": 6.9248, "step": 28570 }, { "epoch": 0.6674412223188795, "grad_norm": 5.40625, "learning_rate": 1.3152331937622666e-05, "loss": 6.9135, "step": 28580 }, { "epoch": 0.6676747566863809, "grad_norm": 3.953125, "learning_rate": 1.3135684262217307e-05, "loss": 6.9568, "step": 28590 }, { "epoch": 0.6679082910538822, "grad_norm": 5.875, "learning_rate": 1.3119043374376794e-05, "loss": 6.9289, "step": 28600 }, { "epoch": 0.6681418254213836, "grad_norm": 5.65625, "learning_rate": 1.3102409283621338e-05, "loss": 7.0254, "step": 28610 }, { "epoch": 0.668375359788885, "grad_norm": 5.75, "learning_rate": 1.3085781999467303e-05, "loss": 7.0079, "step": 28620 }, { "epoch": 0.6686088941563862, "grad_norm": 4.3125, "learning_rate": 1.3069161531427155e-05, "loss": 6.9197, "step": 28630 }, { "epoch": 0.6688424285238876, "grad_norm": 5.4375, "learning_rate": 1.305254788900942e-05, "loss": 6.9895, "step": 28640 }, { "epoch": 0.669075962891389, "grad_norm": 4.625, "learning_rate": 1.303594108171878e-05, "loss": 6.9843, "step": 28650 }, { "epoch": 0.6693094972588903, "grad_norm": 4.09375, "learning_rate": 1.301934111905595e-05, "loss": 7.0097, "step": 28660 }, { "epoch": 0.6695430316263917, "grad_norm": 4.125, "learning_rate": 1.3002748010517769e-05, "loss": 6.9202, "step": 28670 }, { "epoch": 0.6697765659938931, "grad_norm": 4.96875, "learning_rate": 1.2986161765597129e-05, "loss": 6.9684, "step": 28680 }, { "epoch": 0.6700101003613944, "grad_norm": 3.65625, "learning_rate": 1.2969582393783025e-05, "loss": 7.0139, "step": 28690 }, { "epoch": 0.6702436347288958, "grad_norm": 4.3125, "learning_rate": 1.2953009904560482e-05, "loss": 7.0066, "step": 28700 }, { "epoch": 0.6704771690963971, "grad_norm": 4.6875, "learning_rate": 1.2936444307410626e-05, "loss": 6.968, "step": 28710 }, { "epoch": 0.6707107034638985, "grad_norm": 5.65625, "learning_rate": 1.2919885611810606e-05, "loss": 6.9779, "step": 28720 }, { "epoch": 0.6709442378313999, "grad_norm": 4.15625, "learning_rate": 1.2903333827233634e-05, "loss": 6.992, "step": 28730 }, { "epoch": 0.6711777721989012, "grad_norm": 4.5625, "learning_rate": 1.2886788963148976e-05, "loss": 6.9125, "step": 28740 }, { "epoch": 0.6714113065664026, "grad_norm": 5.65625, "learning_rate": 1.2870251029021941e-05, "loss": 6.8642, "step": 28750 }, { "epoch": 0.671644840933904, "grad_norm": 3.703125, "learning_rate": 1.285372003431387e-05, "loss": 6.9761, "step": 28760 }, { "epoch": 0.6718783753014053, "grad_norm": 4.46875, "learning_rate": 1.2837195988482121e-05, "loss": 7.0452, "step": 28770 }, { "epoch": 0.6721119096689067, "grad_norm": 6.0, "learning_rate": 1.2820678900980093e-05, "loss": 6.9873, "step": 28780 }, { "epoch": 0.6723454440364081, "grad_norm": 5.46875, "learning_rate": 1.2804168781257186e-05, "loss": 6.937, "step": 28790 }, { "epoch": 0.6725789784039093, "grad_norm": 4.5625, "learning_rate": 1.2787665638758834e-05, "loss": 6.9461, "step": 28800 }, { "epoch": 0.6728125127714107, "grad_norm": 3.953125, "learning_rate": 1.2771169482926476e-05, "loss": 7.0352, "step": 28810 }, { "epoch": 0.673046047138912, "grad_norm": 5.4375, "learning_rate": 1.2754680323197537e-05, "loss": 6.9059, "step": 28820 }, { "epoch": 0.6732795815064134, "grad_norm": 5.71875, "learning_rate": 1.2738198169005466e-05, "loss": 6.8685, "step": 28830 }, { "epoch": 0.6735131158739148, "grad_norm": 4.65625, "learning_rate": 1.272172302977967e-05, "loss": 7.0081, "step": 28840 }, { "epoch": 0.6737466502414161, "grad_norm": 5.625, "learning_rate": 1.270525491494558e-05, "loss": 6.892, "step": 28850 }, { "epoch": 0.6739801846089175, "grad_norm": 5.53125, "learning_rate": 1.2688793833924572e-05, "loss": 6.9851, "step": 28860 }, { "epoch": 0.6742137189764189, "grad_norm": 4.28125, "learning_rate": 1.267233979613403e-05, "loss": 6.9462, "step": 28870 }, { "epoch": 0.6744472533439202, "grad_norm": 4.1875, "learning_rate": 1.26558928109873e-05, "loss": 6.9682, "step": 28880 }, { "epoch": 0.6746807877114216, "grad_norm": 3.96875, "learning_rate": 1.2639452887893668e-05, "loss": 6.9496, "step": 28890 }, { "epoch": 0.674914322078923, "grad_norm": 4.21875, "learning_rate": 1.2623020036258423e-05, "loss": 6.9339, "step": 28900 }, { "epoch": 0.6751478564464243, "grad_norm": 4.53125, "learning_rate": 1.2606594265482763e-05, "loss": 6.92, "step": 28910 }, { "epoch": 0.6753813908139257, "grad_norm": 5.53125, "learning_rate": 1.259017558496387e-05, "loss": 6.9155, "step": 28920 }, { "epoch": 0.675614925181427, "grad_norm": 3.796875, "learning_rate": 1.2573764004094862e-05, "loss": 6.9606, "step": 28930 }, { "epoch": 0.6758484595489284, "grad_norm": 4.34375, "learning_rate": 1.2557359532264773e-05, "loss": 6.9957, "step": 28940 }, { "epoch": 0.6760819939164298, "grad_norm": 4.1875, "learning_rate": 1.2540962178858612e-05, "loss": 6.9508, "step": 28950 }, { "epoch": 0.676315528283931, "grad_norm": 5.78125, "learning_rate": 1.2524571953257263e-05, "loss": 7.0174, "step": 28960 }, { "epoch": 0.6765490626514324, "grad_norm": 4.53125, "learning_rate": 1.2508188864837584e-05, "loss": 6.9368, "step": 28970 }, { "epoch": 0.6767825970189338, "grad_norm": 5.0625, "learning_rate": 1.2491812922972305e-05, "loss": 6.9503, "step": 28980 }, { "epoch": 0.6770161313864351, "grad_norm": 3.515625, "learning_rate": 1.2475444137030098e-05, "loss": 6.9764, "step": 28990 }, { "epoch": 0.6772496657539365, "grad_norm": 3.921875, "learning_rate": 1.2459082516375542e-05, "loss": 6.9521, "step": 29000 }, { "epoch": 0.6772496657539365, "eval_loss": 6.958155632019043, "eval_runtime": 79.1764, "eval_samples_per_second": 12.63, "eval_steps_per_second": 12.63, "step": 29000 }, { "epoch": 0.6774832001214379, "grad_norm": 4.875, "learning_rate": 1.2442728070369087e-05, "loss": 6.9808, "step": 29010 }, { "epoch": 0.6777167344889392, "grad_norm": 4.40625, "learning_rate": 1.2426380808367117e-05, "loss": 6.9487, "step": 29020 }, { "epoch": 0.6779502688564406, "grad_norm": 4.59375, "learning_rate": 1.2410040739721863e-05, "loss": 6.9752, "step": 29030 }, { "epoch": 0.6781838032239419, "grad_norm": 4.40625, "learning_rate": 1.2393707873781479e-05, "loss": 6.8856, "step": 29040 }, { "epoch": 0.6784173375914433, "grad_norm": 4.84375, "learning_rate": 1.237738221988998e-05, "loss": 6.975, "step": 29050 }, { "epoch": 0.6786508719589447, "grad_norm": 5.03125, "learning_rate": 1.2361063787387272e-05, "loss": 6.9352, "step": 29060 }, { "epoch": 0.678884406326446, "grad_norm": 4.84375, "learning_rate": 1.2344752585609104e-05, "loss": 7.0152, "step": 29070 }, { "epoch": 0.6791179406939474, "grad_norm": 4.84375, "learning_rate": 1.232844862388709e-05, "loss": 6.9028, "step": 29080 }, { "epoch": 0.6793514750614488, "grad_norm": 4.3125, "learning_rate": 1.2312151911548733e-05, "loss": 7.0283, "step": 29090 }, { "epoch": 0.6795850094289501, "grad_norm": 4.0, "learning_rate": 1.2295862457917351e-05, "loss": 6.9315, "step": 29100 }, { "epoch": 0.6798185437964515, "grad_norm": 4.5625, "learning_rate": 1.2279580272312133e-05, "loss": 7.077, "step": 29110 }, { "epoch": 0.6800520781639529, "grad_norm": 4.75, "learning_rate": 1.2263305364048105e-05, "loss": 6.9619, "step": 29120 }, { "epoch": 0.6802856125314541, "grad_norm": 4.6875, "learning_rate": 1.2247037742436134e-05, "loss": 6.9939, "step": 29130 }, { "epoch": 0.6805191468989555, "grad_norm": 5.1875, "learning_rate": 1.223077741678291e-05, "loss": 6.9749, "step": 29140 }, { "epoch": 0.6807526812664568, "grad_norm": 5.59375, "learning_rate": 1.2214524396390937e-05, "loss": 6.979, "step": 29150 }, { "epoch": 0.6809862156339582, "grad_norm": 5.78125, "learning_rate": 1.2198278690558562e-05, "loss": 6.9486, "step": 29160 }, { "epoch": 0.6812197500014596, "grad_norm": 5.0625, "learning_rate": 1.2182040308579943e-05, "loss": 7.0009, "step": 29170 }, { "epoch": 0.6814532843689609, "grad_norm": 5.3125, "learning_rate": 1.2165809259745051e-05, "loss": 6.9034, "step": 29180 }, { "epoch": 0.6816868187364623, "grad_norm": 4.8125, "learning_rate": 1.2149585553339635e-05, "loss": 6.9308, "step": 29190 }, { "epoch": 0.6819203531039637, "grad_norm": 4.9375, "learning_rate": 1.213336919864529e-05, "loss": 6.9115, "step": 29200 }, { "epoch": 0.682153887471465, "grad_norm": 5.25, "learning_rate": 1.211716020493936e-05, "loss": 6.9677, "step": 29210 }, { "epoch": 0.6823874218389664, "grad_norm": 3.828125, "learning_rate": 1.2100958581494993e-05, "loss": 6.9499, "step": 29220 }, { "epoch": 0.6826209562064678, "grad_norm": 4.4375, "learning_rate": 1.2084764337581129e-05, "loss": 6.9705, "step": 29230 }, { "epoch": 0.6828544905739691, "grad_norm": 5.40625, "learning_rate": 1.2068577482462484e-05, "loss": 6.8905, "step": 29240 }, { "epoch": 0.6830880249414705, "grad_norm": 5.15625, "learning_rate": 1.2052398025399553e-05, "loss": 6.997, "step": 29250 }, { "epoch": 0.6833215593089718, "grad_norm": 5.40625, "learning_rate": 1.2036225975648568e-05, "loss": 7.0045, "step": 29260 }, { "epoch": 0.6835550936764732, "grad_norm": 4.78125, "learning_rate": 1.2020061342461569e-05, "loss": 6.918, "step": 29270 }, { "epoch": 0.6837886280439746, "grad_norm": 4.6875, "learning_rate": 1.2003904135086305e-05, "loss": 6.9552, "step": 29280 }, { "epoch": 0.6840221624114758, "grad_norm": 4.46875, "learning_rate": 1.198775436276631e-05, "loss": 7.0035, "step": 29290 }, { "epoch": 0.6842556967789772, "grad_norm": 4.375, "learning_rate": 1.1971612034740867e-05, "loss": 6.9413, "step": 29300 }, { "epoch": 0.6844892311464786, "grad_norm": 4.1875, "learning_rate": 1.1955477160244969e-05, "loss": 7.0243, "step": 29310 }, { "epoch": 0.6847227655139799, "grad_norm": 4.15625, "learning_rate": 1.1939349748509382e-05, "loss": 6.8856, "step": 29320 }, { "epoch": 0.6849562998814813, "grad_norm": 4.65625, "learning_rate": 1.1923229808760564e-05, "loss": 6.9557, "step": 29330 }, { "epoch": 0.6851898342489827, "grad_norm": 4.1875, "learning_rate": 1.1907117350220729e-05, "loss": 6.9371, "step": 29340 }, { "epoch": 0.685423368616484, "grad_norm": 4.9375, "learning_rate": 1.1891012382107807e-05, "loss": 6.9837, "step": 29350 }, { "epoch": 0.6856569029839854, "grad_norm": 5.46875, "learning_rate": 1.1874914913635426e-05, "loss": 6.9939, "step": 29360 }, { "epoch": 0.6858904373514867, "grad_norm": 5.125, "learning_rate": 1.1858824954012946e-05, "loss": 6.9045, "step": 29370 }, { "epoch": 0.6861239717189881, "grad_norm": 3.640625, "learning_rate": 1.1842742512445401e-05, "loss": 7.0037, "step": 29380 }, { "epoch": 0.6863575060864895, "grad_norm": 5.25, "learning_rate": 1.1826667598133562e-05, "loss": 6.952, "step": 29390 }, { "epoch": 0.6865910404539908, "grad_norm": 4.4375, "learning_rate": 1.1810600220273854e-05, "loss": 6.9194, "step": 29400 }, { "epoch": 0.6868245748214922, "grad_norm": 4.46875, "learning_rate": 1.179454038805842e-05, "loss": 6.9538, "step": 29410 }, { "epoch": 0.6870581091889936, "grad_norm": 4.875, "learning_rate": 1.1778488110675085e-05, "loss": 6.9406, "step": 29420 }, { "epoch": 0.6872916435564949, "grad_norm": 5.625, "learning_rate": 1.1762443397307326e-05, "loss": 7.0413, "step": 29430 }, { "epoch": 0.6875251779239963, "grad_norm": 4.9375, "learning_rate": 1.1746406257134327e-05, "loss": 6.9303, "step": 29440 }, { "epoch": 0.6877587122914975, "grad_norm": 5.5, "learning_rate": 1.1730376699330903e-05, "loss": 7.0212, "step": 29450 }, { "epoch": 0.6879922466589989, "grad_norm": 4.9375, "learning_rate": 1.1714354733067561e-05, "loss": 6.946, "step": 29460 }, { "epoch": 0.6882257810265003, "grad_norm": 6.34375, "learning_rate": 1.1698340367510452e-05, "loss": 6.9517, "step": 29470 }, { "epoch": 0.6884593153940016, "grad_norm": 4.21875, "learning_rate": 1.1682333611821395e-05, "loss": 6.9391, "step": 29480 }, { "epoch": 0.688692849761503, "grad_norm": 4.65625, "learning_rate": 1.1666334475157824e-05, "loss": 6.9408, "step": 29490 }, { "epoch": 0.6889263841290044, "grad_norm": 4.90625, "learning_rate": 1.1650342966672828e-05, "loss": 6.9349, "step": 29500 }, { "epoch": 0.6889263841290044, "eval_loss": 6.957561492919922, "eval_runtime": 78.5664, "eval_samples_per_second": 12.728, "eval_steps_per_second": 12.728, "step": 29500 }, { "epoch": 0.6891599184965057, "grad_norm": 4.875, "learning_rate": 1.1634359095515155e-05, "loss": 6.9466, "step": 29510 }, { "epoch": 0.6893934528640071, "grad_norm": 4.34375, "learning_rate": 1.161838287082914e-05, "loss": 6.9824, "step": 29520 }, { "epoch": 0.6896269872315085, "grad_norm": 4.8125, "learning_rate": 1.1602414301754777e-05, "loss": 6.9911, "step": 29530 }, { "epoch": 0.6898605215990098, "grad_norm": 4.96875, "learning_rate": 1.1586453397427677e-05, "loss": 6.9636, "step": 29540 }, { "epoch": 0.6900940559665112, "grad_norm": 4.40625, "learning_rate": 1.1570500166979061e-05, "loss": 6.9532, "step": 29550 }, { "epoch": 0.6903275903340125, "grad_norm": 4.90625, "learning_rate": 1.1554554619535746e-05, "loss": 6.9893, "step": 29560 }, { "epoch": 0.6905611247015139, "grad_norm": 4.84375, "learning_rate": 1.1538616764220181e-05, "loss": 7.001, "step": 29570 }, { "epoch": 0.6907946590690153, "grad_norm": 6.53125, "learning_rate": 1.1522686610150386e-05, "loss": 6.9869, "step": 29580 }, { "epoch": 0.6910281934365166, "grad_norm": 5.375, "learning_rate": 1.1506764166439987e-05, "loss": 7.0324, "step": 29590 }, { "epoch": 0.691261727804018, "grad_norm": 3.6875, "learning_rate": 1.149084944219822e-05, "loss": 6.9508, "step": 29600 }, { "epoch": 0.6914952621715194, "grad_norm": 5.21875, "learning_rate": 1.1474942446529865e-05, "loss": 6.9482, "step": 29610 }, { "epoch": 0.6917287965390206, "grad_norm": 4.84375, "learning_rate": 1.1459043188535315e-05, "loss": 6.9426, "step": 29620 }, { "epoch": 0.691962330906522, "grad_norm": 4.53125, "learning_rate": 1.1443151677310504e-05, "loss": 6.9157, "step": 29630 }, { "epoch": 0.6921958652740234, "grad_norm": 5.53125, "learning_rate": 1.1427267921946973e-05, "loss": 6.9924, "step": 29640 }, { "epoch": 0.6924293996415247, "grad_norm": 4.15625, "learning_rate": 1.1411391931531784e-05, "loss": 7.0195, "step": 29650 }, { "epoch": 0.6926629340090261, "grad_norm": 4.28125, "learning_rate": 1.1395523715147587e-05, "loss": 6.9837, "step": 29660 }, { "epoch": 0.6928964683765274, "grad_norm": 5.03125, "learning_rate": 1.1379663281872588e-05, "loss": 6.9014, "step": 29670 }, { "epoch": 0.6931300027440288, "grad_norm": 4.25, "learning_rate": 1.1363810640780503e-05, "loss": 7.0312, "step": 29680 }, { "epoch": 0.6933635371115302, "grad_norm": 4.875, "learning_rate": 1.1347965800940638e-05, "loss": 6.9014, "step": 29690 }, { "epoch": 0.6935970714790315, "grad_norm": 5.34375, "learning_rate": 1.1332128771417789e-05, "loss": 6.9252, "step": 29700 }, { "epoch": 0.6938306058465329, "grad_norm": 4.78125, "learning_rate": 1.1316299561272318e-05, "loss": 6.9585, "step": 29710 }, { "epoch": 0.6940641402140343, "grad_norm": 4.84375, "learning_rate": 1.1300478179560112e-05, "loss": 6.9764, "step": 29720 }, { "epoch": 0.6942976745815356, "grad_norm": 4.84375, "learning_rate": 1.1284664635332553e-05, "loss": 6.9379, "step": 29730 }, { "epoch": 0.694531208949037, "grad_norm": 4.625, "learning_rate": 1.1268858937636573e-05, "loss": 7.0271, "step": 29740 }, { "epoch": 0.6947647433165384, "grad_norm": 4.03125, "learning_rate": 1.125306109551458e-05, "loss": 6.99, "step": 29750 }, { "epoch": 0.6949982776840397, "grad_norm": 5.28125, "learning_rate": 1.1237271118004523e-05, "loss": 7.0397, "step": 29760 }, { "epoch": 0.695231812051541, "grad_norm": 4.5, "learning_rate": 1.1221489014139817e-05, "loss": 7.0612, "step": 29770 }, { "epoch": 0.6954653464190423, "grad_norm": 4.3125, "learning_rate": 1.12057147929494e-05, "loss": 7.0098, "step": 29780 }, { "epoch": 0.6956988807865437, "grad_norm": 4.40625, "learning_rate": 1.11899484634577e-05, "loss": 6.9969, "step": 29790 }, { "epoch": 0.6959324151540451, "grad_norm": 5.1875, "learning_rate": 1.1174190034684598e-05, "loss": 6.9861, "step": 29800 }, { "epoch": 0.6961659495215464, "grad_norm": 4.65625, "learning_rate": 1.1158439515645503e-05, "loss": 6.8454, "step": 29810 }, { "epoch": 0.6963994838890478, "grad_norm": 5.03125, "learning_rate": 1.114269691535125e-05, "loss": 7.0058, "step": 29820 }, { "epoch": 0.6966330182565492, "grad_norm": 4.15625, "learning_rate": 1.1126962242808176e-05, "loss": 7.0044, "step": 29830 }, { "epoch": 0.6968665526240505, "grad_norm": 5.28125, "learning_rate": 1.1111235507018075e-05, "loss": 6.9553, "step": 29840 }, { "epoch": 0.6971000869915519, "grad_norm": 5.09375, "learning_rate": 1.109551671697821e-05, "loss": 6.9052, "step": 29850 }, { "epoch": 0.6973336213590533, "grad_norm": 4.78125, "learning_rate": 1.1079805881681274e-05, "loss": 6.8869, "step": 29860 }, { "epoch": 0.6975671557265546, "grad_norm": 3.984375, "learning_rate": 1.1064103010115418e-05, "loss": 6.9437, "step": 29870 }, { "epoch": 0.697800690094056, "grad_norm": 4.34375, "learning_rate": 1.1048408111264255e-05, "loss": 6.9312, "step": 29880 }, { "epoch": 0.6980342244615573, "grad_norm": 4.03125, "learning_rate": 1.1032721194106806e-05, "loss": 6.9736, "step": 29890 }, { "epoch": 0.6982677588290587, "grad_norm": 4.34375, "learning_rate": 1.1017042267617559e-05, "loss": 6.9894, "step": 29900 }, { "epoch": 0.6985012931965601, "grad_norm": 4.21875, "learning_rate": 1.1001371340766403e-05, "loss": 6.9864, "step": 29910 }, { "epoch": 0.6987348275640614, "grad_norm": 5.0, "learning_rate": 1.0985708422518676e-05, "loss": 7.0216, "step": 29920 }, { "epoch": 0.6989683619315628, "grad_norm": 4.71875, "learning_rate": 1.0970053521835113e-05, "loss": 6.9516, "step": 29930 }, { "epoch": 0.6992018962990642, "grad_norm": 4.5, "learning_rate": 1.0954406647671859e-05, "loss": 6.9522, "step": 29940 }, { "epoch": 0.6994354306665654, "grad_norm": 4.375, "learning_rate": 1.0938767808980486e-05, "loss": 6.9915, "step": 29950 }, { "epoch": 0.6996689650340668, "grad_norm": 5.34375, "learning_rate": 1.0923137014707956e-05, "loss": 6.9151, "step": 29960 }, { "epoch": 0.6999024994015682, "grad_norm": 4.5, "learning_rate": 1.0907514273796648e-05, "loss": 6.942, "step": 29970 }, { "epoch": 0.7001360337690695, "grad_norm": 4.75, "learning_rate": 1.08918995951843e-05, "loss": 6.971, "step": 29980 }, { "epoch": 0.7003695681365709, "grad_norm": 4.125, "learning_rate": 1.0876292987804071e-05, "loss": 6.9231, "step": 29990 }, { "epoch": 0.7006031025040722, "grad_norm": 4.3125, "learning_rate": 1.0860694460584481e-05, "loss": 6.9726, "step": 30000 }, { "epoch": 0.7006031025040722, "eval_loss": 6.957050800323486, "eval_runtime": 78.7049, "eval_samples_per_second": 12.706, "eval_steps_per_second": 12.706, "step": 30000 }, { "epoch": 0.7008366368715736, "grad_norm": 4.15625, "learning_rate": 1.0845104022449424e-05, "loss": 6.95, "step": 30010 }, { "epoch": 0.701070171239075, "grad_norm": 3.59375, "learning_rate": 1.0829521682318186e-05, "loss": 6.9196, "step": 30020 }, { "epoch": 0.7013037056065763, "grad_norm": 4.0625, "learning_rate": 1.0813947449105407e-05, "loss": 6.9429, "step": 30030 }, { "epoch": 0.7015372399740777, "grad_norm": 5.84375, "learning_rate": 1.0798381331721109e-05, "loss": 6.9701, "step": 30040 }, { "epoch": 0.7017707743415791, "grad_norm": 3.78125, "learning_rate": 1.078282333907063e-05, "loss": 6.9463, "step": 30050 }, { "epoch": 0.7020043087090804, "grad_norm": 4.625, "learning_rate": 1.0767273480054702e-05, "loss": 6.9827, "step": 30060 }, { "epoch": 0.7022378430765818, "grad_norm": 4.96875, "learning_rate": 1.0751731763569372e-05, "loss": 6.8696, "step": 30070 }, { "epoch": 0.7024713774440832, "grad_norm": 4.09375, "learning_rate": 1.073619819850605e-05, "loss": 6.9735, "step": 30080 }, { "epoch": 0.7027049118115845, "grad_norm": 5.65625, "learning_rate": 1.0720672793751482e-05, "loss": 6.96, "step": 30090 }, { "epoch": 0.7029384461790859, "grad_norm": 3.84375, "learning_rate": 1.0705155558187723e-05, "loss": 6.9629, "step": 30100 }, { "epoch": 0.7031719805465871, "grad_norm": 5.5, "learning_rate": 1.0689646500692188e-05, "loss": 6.9666, "step": 30110 }, { "epoch": 0.7034055149140885, "grad_norm": 4.34375, "learning_rate": 1.0674145630137577e-05, "loss": 6.9874, "step": 30120 }, { "epoch": 0.7036390492815899, "grad_norm": 5.0, "learning_rate": 1.0658652955391943e-05, "loss": 6.9242, "step": 30130 }, { "epoch": 0.7038725836490912, "grad_norm": 5.375, "learning_rate": 1.0643168485318614e-05, "loss": 6.9714, "step": 30140 }, { "epoch": 0.7041061180165926, "grad_norm": 4.25, "learning_rate": 1.0627692228776252e-05, "loss": 7.0411, "step": 30150 }, { "epoch": 0.704339652384094, "grad_norm": 4.90625, "learning_rate": 1.0612224194618814e-05, "loss": 6.9998, "step": 30160 }, { "epoch": 0.7045731867515953, "grad_norm": 3.515625, "learning_rate": 1.0596764391695538e-05, "loss": 6.9792, "step": 30170 }, { "epoch": 0.7048067211190967, "grad_norm": 3.984375, "learning_rate": 1.0581312828850978e-05, "loss": 6.9501, "step": 30180 }, { "epoch": 0.7050402554865981, "grad_norm": 4.0625, "learning_rate": 1.0565869514924942e-05, "loss": 7.0606, "step": 30190 }, { "epoch": 0.7052737898540994, "grad_norm": 4.0625, "learning_rate": 1.0550434458752547e-05, "loss": 6.9976, "step": 30200 }, { "epoch": 0.7055073242216008, "grad_norm": 4.96875, "learning_rate": 1.0535007669164184e-05, "loss": 6.9717, "step": 30210 }, { "epoch": 0.7057408585891021, "grad_norm": 4.125, "learning_rate": 1.0519589154985488e-05, "loss": 6.9593, "step": 30220 }, { "epoch": 0.7059743929566035, "grad_norm": 4.65625, "learning_rate": 1.0504178925037392e-05, "loss": 7.0118, "step": 30230 }, { "epoch": 0.7062079273241049, "grad_norm": 4.59375, "learning_rate": 1.0488776988136064e-05, "loss": 6.9577, "step": 30240 }, { "epoch": 0.7064414616916062, "grad_norm": 4.375, "learning_rate": 1.0473383353092952e-05, "loss": 6.8819, "step": 30250 }, { "epoch": 0.7066749960591076, "grad_norm": 4.3125, "learning_rate": 1.0457998028714717e-05, "loss": 7.0129, "step": 30260 }, { "epoch": 0.706908530426609, "grad_norm": 3.921875, "learning_rate": 1.0442621023803323e-05, "loss": 7.0276, "step": 30270 }, { "epoch": 0.7071420647941102, "grad_norm": 4.46875, "learning_rate": 1.0427252347155928e-05, "loss": 6.9472, "step": 30280 }, { "epoch": 0.7073755991616116, "grad_norm": 4.375, "learning_rate": 1.0411892007564925e-05, "loss": 6.9283, "step": 30290 }, { "epoch": 0.707609133529113, "grad_norm": 5.0, "learning_rate": 1.0396540013817971e-05, "loss": 6.9512, "step": 30300 }, { "epoch": 0.7078426678966143, "grad_norm": 4.75, "learning_rate": 1.038119637469791e-05, "loss": 6.9435, "step": 30310 }, { "epoch": 0.7080762022641157, "grad_norm": 4.625, "learning_rate": 1.0365861098982832e-05, "loss": 6.9997, "step": 30320 }, { "epoch": 0.708309736631617, "grad_norm": 4.03125, "learning_rate": 1.035053419544604e-05, "loss": 6.904, "step": 30330 }, { "epoch": 0.7085432709991184, "grad_norm": 5.125, "learning_rate": 1.0335215672856046e-05, "loss": 7.0051, "step": 30340 }, { "epoch": 0.7087768053666198, "grad_norm": 4.6875, "learning_rate": 1.031990553997656e-05, "loss": 7.0358, "step": 30350 }, { "epoch": 0.7090103397341211, "grad_norm": 4.4375, "learning_rate": 1.0304603805566481e-05, "loss": 6.8974, "step": 30360 }, { "epoch": 0.7092438741016225, "grad_norm": 4.6875, "learning_rate": 1.028931047837994e-05, "loss": 6.9092, "step": 30370 }, { "epoch": 0.7094774084691239, "grad_norm": 4.90625, "learning_rate": 1.027402556716621e-05, "loss": 6.9529, "step": 30380 }, { "epoch": 0.7097109428366252, "grad_norm": 4.1875, "learning_rate": 1.0258749080669808e-05, "loss": 6.9025, "step": 30390 }, { "epoch": 0.7099444772041266, "grad_norm": 3.96875, "learning_rate": 1.0243481027630371e-05, "loss": 6.9539, "step": 30400 }, { "epoch": 0.710178011571628, "grad_norm": 5.21875, "learning_rate": 1.0228221416782766e-05, "loss": 6.9244, "step": 30410 }, { "epoch": 0.7104115459391293, "grad_norm": 3.9375, "learning_rate": 1.0212970256856975e-05, "loss": 7.0292, "step": 30420 }, { "epoch": 0.7106450803066306, "grad_norm": 3.984375, "learning_rate": 1.0197727556578193e-05, "loss": 6.9742, "step": 30430 }, { "epoch": 0.7108786146741319, "grad_norm": 4.5, "learning_rate": 1.0182493324666745e-05, "loss": 7.0279, "step": 30440 }, { "epoch": 0.7111121490416333, "grad_norm": 5.53125, "learning_rate": 1.0167267569838123e-05, "loss": 7.0132, "step": 30450 }, { "epoch": 0.7113456834091347, "grad_norm": 5.59375, "learning_rate": 1.015205030080298e-05, "loss": 6.8166, "step": 30460 }, { "epoch": 0.711579217776636, "grad_norm": 4.71875, "learning_rate": 1.0136841526267087e-05, "loss": 7.0072, "step": 30470 }, { "epoch": 0.7118127521441374, "grad_norm": 3.703125, "learning_rate": 1.0121641254931386e-05, "loss": 6.9849, "step": 30480 }, { "epoch": 0.7120462865116388, "grad_norm": 5.25, "learning_rate": 1.010644949549192e-05, "loss": 6.9231, "step": 30490 }, { "epoch": 0.7122798208791401, "grad_norm": 4.6875, "learning_rate": 1.0091266256639895e-05, "loss": 7.0168, "step": 30500 }, { "epoch": 0.7122798208791401, "eval_loss": 6.95573616027832, "eval_runtime": 78.8294, "eval_samples_per_second": 12.686, "eval_steps_per_second": 12.686, "step": 30500 }, { "epoch": 0.7125133552466415, "grad_norm": 4.4375, "learning_rate": 1.0076091547061633e-05, "loss": 6.9097, "step": 30510 }, { "epoch": 0.7127468896141428, "grad_norm": 3.984375, "learning_rate": 1.0060925375438557e-05, "loss": 6.9826, "step": 30520 }, { "epoch": 0.7129804239816442, "grad_norm": 5.46875, "learning_rate": 1.0045767750447242e-05, "loss": 6.9374, "step": 30530 }, { "epoch": 0.7132139583491456, "grad_norm": 5.03125, "learning_rate": 1.0030618680759327e-05, "loss": 6.9642, "step": 30540 }, { "epoch": 0.7134474927166469, "grad_norm": 4.09375, "learning_rate": 1.00154781750416e-05, "loss": 6.9645, "step": 30550 }, { "epoch": 0.7136810270841483, "grad_norm": 6.59375, "learning_rate": 1.0000346241955921e-05, "loss": 6.9906, "step": 30560 }, { "epoch": 0.7139145614516497, "grad_norm": 5.34375, "learning_rate": 9.98522289015926e-06, "loss": 6.8978, "step": 30570 }, { "epoch": 0.714148095819151, "grad_norm": 4.65625, "learning_rate": 9.970108128303685e-06, "loss": 6.8557, "step": 30580 }, { "epoch": 0.7143816301866523, "grad_norm": 5.3125, "learning_rate": 9.955001965036315e-06, "loss": 6.9253, "step": 30590 }, { "epoch": 0.7146151645541537, "grad_norm": 5.65625, "learning_rate": 9.939904408999395e-06, "loss": 6.9301, "step": 30600 }, { "epoch": 0.714848698921655, "grad_norm": 5.15625, "learning_rate": 9.924815468830207e-06, "loss": 6.9889, "step": 30610 }, { "epoch": 0.7150822332891564, "grad_norm": 4.71875, "learning_rate": 9.909735153161123e-06, "loss": 6.9333, "step": 30620 }, { "epoch": 0.7153157676566577, "grad_norm": 5.03125, "learning_rate": 9.894663470619589e-06, "loss": 7.0036, "step": 30630 }, { "epoch": 0.7155493020241591, "grad_norm": 4.5625, "learning_rate": 9.8796004298281e-06, "loss": 6.9241, "step": 30640 }, { "epoch": 0.7157828363916605, "grad_norm": 4.34375, "learning_rate": 9.86454603940421e-06, "loss": 6.952, "step": 30650 }, { "epoch": 0.7160163707591618, "grad_norm": 4.4375, "learning_rate": 9.849500307960505e-06, "loss": 6.8671, "step": 30660 }, { "epoch": 0.7162499051266632, "grad_norm": 4.65625, "learning_rate": 9.834463244104657e-06, "loss": 6.9327, "step": 30670 }, { "epoch": 0.7164834394941646, "grad_norm": 4.03125, "learning_rate": 9.81943485643934e-06, "loss": 6.953, "step": 30680 }, { "epoch": 0.7167169738616659, "grad_norm": 4.46875, "learning_rate": 9.804415153562291e-06, "loss": 6.9815, "step": 30690 }, { "epoch": 0.7169505082291673, "grad_norm": 4.9375, "learning_rate": 9.789404144066266e-06, "loss": 6.9015, "step": 30700 }, { "epoch": 0.7171840425966687, "grad_norm": 4.03125, "learning_rate": 9.77440183653906e-06, "loss": 6.8758, "step": 30710 }, { "epoch": 0.71741757696417, "grad_norm": 4.09375, "learning_rate": 9.759408239563474e-06, "loss": 6.9542, "step": 30720 }, { "epoch": 0.7176511113316714, "grad_norm": 4.1875, "learning_rate": 9.744423361717323e-06, "loss": 7.0278, "step": 30730 }, { "epoch": 0.7178846456991727, "grad_norm": 3.5625, "learning_rate": 9.729447211573447e-06, "loss": 6.8862, "step": 30740 }, { "epoch": 0.718118180066674, "grad_norm": 4.3125, "learning_rate": 9.714479797699694e-06, "loss": 6.999, "step": 30750 }, { "epoch": 0.7183517144341754, "grad_norm": 5.09375, "learning_rate": 9.699521128658914e-06, "loss": 7.0098, "step": 30760 }, { "epoch": 0.7185852488016767, "grad_norm": 4.5625, "learning_rate": 9.684571213008933e-06, "loss": 6.9677, "step": 30770 }, { "epoch": 0.7188187831691781, "grad_norm": 5.21875, "learning_rate": 9.669630059302603e-06, "loss": 6.8975, "step": 30780 }, { "epoch": 0.7190523175366795, "grad_norm": 3.75, "learning_rate": 9.654697676087734e-06, "loss": 7.0218, "step": 30790 }, { "epoch": 0.7192858519041808, "grad_norm": 4.1875, "learning_rate": 9.63977407190712e-06, "loss": 6.9508, "step": 30800 }, { "epoch": 0.7195193862716822, "grad_norm": 4.40625, "learning_rate": 9.624859255298557e-06, "loss": 6.9686, "step": 30810 }, { "epoch": 0.7197529206391836, "grad_norm": 4.65625, "learning_rate": 9.609953234794791e-06, "loss": 6.9343, "step": 30820 }, { "epoch": 0.7199864550066849, "grad_norm": 4.9375, "learning_rate": 9.59505601892356e-06, "loss": 6.8442, "step": 30830 }, { "epoch": 0.7202199893741863, "grad_norm": 4.71875, "learning_rate": 9.580167616207523e-06, "loss": 7.012, "step": 30840 }, { "epoch": 0.7204535237416876, "grad_norm": 4.71875, "learning_rate": 9.565288035164346e-06, "loss": 6.9828, "step": 30850 }, { "epoch": 0.720687058109189, "grad_norm": 5.09375, "learning_rate": 9.550417284306604e-06, "loss": 6.9671, "step": 30860 }, { "epoch": 0.7209205924766904, "grad_norm": 5.46875, "learning_rate": 9.53555537214185e-06, "loss": 6.9167, "step": 30870 }, { "epoch": 0.7211541268441917, "grad_norm": 4.78125, "learning_rate": 9.52070230717258e-06, "loss": 6.9803, "step": 30880 }, { "epoch": 0.7213876612116931, "grad_norm": 5.78125, "learning_rate": 9.505858097896204e-06, "loss": 6.9636, "step": 30890 }, { "epoch": 0.7216211955791945, "grad_norm": 5.15625, "learning_rate": 9.491022752805095e-06, "loss": 6.9355, "step": 30900 }, { "epoch": 0.7218547299466958, "grad_norm": 3.9375, "learning_rate": 9.476196280386528e-06, "loss": 6.9886, "step": 30910 }, { "epoch": 0.7220882643141971, "grad_norm": 5.0, "learning_rate": 9.46137868912273e-06, "loss": 7.0062, "step": 30920 }, { "epoch": 0.7223217986816985, "grad_norm": 4.09375, "learning_rate": 9.446569987490816e-06, "loss": 6.9965, "step": 30930 }, { "epoch": 0.7225553330491998, "grad_norm": 4.78125, "learning_rate": 9.431770183962837e-06, "loss": 7.0393, "step": 30940 }, { "epoch": 0.7227888674167012, "grad_norm": 4.5, "learning_rate": 9.416979287005761e-06, "loss": 6.9997, "step": 30950 }, { "epoch": 0.7230224017842025, "grad_norm": 3.90625, "learning_rate": 9.402197305081428e-06, "loss": 7.0262, "step": 30960 }, { "epoch": 0.7232559361517039, "grad_norm": 3.984375, "learning_rate": 9.387424246646614e-06, "loss": 6.9357, "step": 30970 }, { "epoch": 0.7234894705192053, "grad_norm": 4.5625, "learning_rate": 9.372660120152957e-06, "loss": 6.9803, "step": 30980 }, { "epoch": 0.7237230048867066, "grad_norm": 4.1875, "learning_rate": 9.35790493404701e-06, "loss": 6.9159, "step": 30990 }, { "epoch": 0.723956539254208, "grad_norm": 5.15625, "learning_rate": 9.34315869677021e-06, "loss": 6.9832, "step": 31000 }, { "epoch": 0.723956539254208, "eval_loss": 6.955427169799805, "eval_runtime": 79.1312, "eval_samples_per_second": 12.637, "eval_steps_per_second": 12.637, "step": 31000 }, { "epoch": 0.7241900736217094, "grad_norm": 4.125, "learning_rate": 9.328421416758848e-06, "loss": 6.8763, "step": 31010 }, { "epoch": 0.7244236079892107, "grad_norm": 4.9375, "learning_rate": 9.313693102444127e-06, "loss": 6.9902, "step": 31020 }, { "epoch": 0.7246571423567121, "grad_norm": 4.90625, "learning_rate": 9.29897376225209e-06, "loss": 7.0164, "step": 31030 }, { "epoch": 0.7248906767242135, "grad_norm": 4.46875, "learning_rate": 9.284263404603669e-06, "loss": 6.8879, "step": 31040 }, { "epoch": 0.7251242110917148, "grad_norm": 6.0625, "learning_rate": 9.269562037914626e-06, "loss": 6.9123, "step": 31050 }, { "epoch": 0.7253577454592162, "grad_norm": 4.5625, "learning_rate": 9.254869670595634e-06, "loss": 6.9312, "step": 31060 }, { "epoch": 0.7255912798267175, "grad_norm": 3.8125, "learning_rate": 9.240186311052165e-06, "loss": 6.9464, "step": 31070 }, { "epoch": 0.7258248141942188, "grad_norm": 4.4375, "learning_rate": 9.225511967684549e-06, "loss": 6.9248, "step": 31080 }, { "epoch": 0.7260583485617202, "grad_norm": 4.78125, "learning_rate": 9.210846648887986e-06, "loss": 6.9869, "step": 31090 }, { "epoch": 0.7262918829292215, "grad_norm": 4.9375, "learning_rate": 9.196190363052468e-06, "loss": 7.0174, "step": 31100 }, { "epoch": 0.7265254172967229, "grad_norm": 4.53125, "learning_rate": 9.181543118562855e-06, "loss": 6.982, "step": 31110 }, { "epoch": 0.7267589516642243, "grad_norm": 5.40625, "learning_rate": 9.166904923798821e-06, "loss": 7.0604, "step": 31120 }, { "epoch": 0.7269924860317256, "grad_norm": 4.1875, "learning_rate": 9.15227578713488e-06, "loss": 6.9122, "step": 31130 }, { "epoch": 0.727226020399227, "grad_norm": 4.5625, "learning_rate": 9.137655716940336e-06, "loss": 6.9747, "step": 31140 }, { "epoch": 0.7274595547667284, "grad_norm": 5.09375, "learning_rate": 9.123044721579305e-06, "loss": 7.0102, "step": 31150 }, { "epoch": 0.7276930891342297, "grad_norm": 3.625, "learning_rate": 9.10844280941075e-06, "loss": 6.9575, "step": 31160 }, { "epoch": 0.7279266235017311, "grad_norm": 4.84375, "learning_rate": 9.09384998878838e-06, "loss": 7.0254, "step": 31170 }, { "epoch": 0.7281601578692324, "grad_norm": 5.15625, "learning_rate": 9.079266268060776e-06, "loss": 7.024, "step": 31180 }, { "epoch": 0.7283936922367338, "grad_norm": 4.5, "learning_rate": 9.064691655571241e-06, "loss": 6.9259, "step": 31190 }, { "epoch": 0.7286272266042352, "grad_norm": 5.09375, "learning_rate": 9.05012615965792e-06, "loss": 6.9613, "step": 31200 }, { "epoch": 0.7288607609717365, "grad_norm": 4.5, "learning_rate": 9.035569788653707e-06, "loss": 6.9324, "step": 31210 }, { "epoch": 0.7290942953392379, "grad_norm": 4.0625, "learning_rate": 9.021022550886286e-06, "loss": 6.9933, "step": 31220 }, { "epoch": 0.7293278297067393, "grad_norm": 4.75, "learning_rate": 9.006484454678122e-06, "loss": 6.9215, "step": 31230 }, { "epoch": 0.7295613640742405, "grad_norm": 4.5625, "learning_rate": 8.991955508346452e-06, "loss": 7.0303, "step": 31240 }, { "epoch": 0.729794898441742, "grad_norm": 5.34375, "learning_rate": 8.977435720203281e-06, "loss": 6.965, "step": 31250 }, { "epoch": 0.7300284328092433, "grad_norm": 5.09375, "learning_rate": 8.96292509855535e-06, "loss": 6.961, "step": 31260 }, { "epoch": 0.7302619671767446, "grad_norm": 4.8125, "learning_rate": 8.948423651704189e-06, "loss": 6.9761, "step": 31270 }, { "epoch": 0.730495501544246, "grad_norm": 4.125, "learning_rate": 8.933931387946049e-06, "loss": 6.9708, "step": 31280 }, { "epoch": 0.7307290359117473, "grad_norm": 4.40625, "learning_rate": 8.919448315571946e-06, "loss": 6.9016, "step": 31290 }, { "epoch": 0.7309625702792487, "grad_norm": 4.0, "learning_rate": 8.904974442867647e-06, "loss": 6.9031, "step": 31300 }, { "epoch": 0.7311961046467501, "grad_norm": 5.1875, "learning_rate": 8.89050977811362e-06, "loss": 6.9974, "step": 31310 }, { "epoch": 0.7314296390142514, "grad_norm": 5.1875, "learning_rate": 8.87605432958511e-06, "loss": 6.9831, "step": 31320 }, { "epoch": 0.7316631733817528, "grad_norm": 5.0625, "learning_rate": 8.861608105552044e-06, "loss": 6.9093, "step": 31330 }, { "epoch": 0.7318967077492542, "grad_norm": 3.453125, "learning_rate": 8.847171114279115e-06, "loss": 6.9313, "step": 31340 }, { "epoch": 0.7321302421167555, "grad_norm": 4.15625, "learning_rate": 8.832743364025694e-06, "loss": 6.9338, "step": 31350 }, { "epoch": 0.7323637764842569, "grad_norm": 4.46875, "learning_rate": 8.818324863045894e-06, "loss": 7.0143, "step": 31360 }, { "epoch": 0.7325973108517583, "grad_norm": 4.8125, "learning_rate": 8.803915619588534e-06, "loss": 7.0033, "step": 31370 }, { "epoch": 0.7328308452192596, "grad_norm": 5.0625, "learning_rate": 8.789515641897118e-06, "loss": 6.91, "step": 31380 }, { "epoch": 0.733064379586761, "grad_norm": 4.375, "learning_rate": 8.775124938209872e-06, "loss": 6.9594, "step": 31390 }, { "epoch": 0.7332979139542622, "grad_norm": 3.96875, "learning_rate": 8.760743516759692e-06, "loss": 6.9474, "step": 31400 }, { "epoch": 0.7335314483217636, "grad_norm": 4.625, "learning_rate": 8.746371385774182e-06, "loss": 6.9465, "step": 31410 }, { "epoch": 0.733764982689265, "grad_norm": 4.21875, "learning_rate": 8.73200855347563e-06, "loss": 6.9736, "step": 31420 }, { "epoch": 0.7339985170567663, "grad_norm": 4.90625, "learning_rate": 8.717655028081e-06, "loss": 6.9808, "step": 31430 }, { "epoch": 0.7342320514242677, "grad_norm": 4.53125, "learning_rate": 8.703310817801934e-06, "loss": 6.9106, "step": 31440 }, { "epoch": 0.7344655857917691, "grad_norm": 5.15625, "learning_rate": 8.68897593084473e-06, "loss": 6.9328, "step": 31450 }, { "epoch": 0.7346991201592704, "grad_norm": 4.15625, "learning_rate": 8.67465037541038e-06, "loss": 6.9502, "step": 31460 }, { "epoch": 0.7349326545267718, "grad_norm": 4.125, "learning_rate": 8.660334159694503e-06, "loss": 6.9017, "step": 31470 }, { "epoch": 0.7351661888942731, "grad_norm": 4.65625, "learning_rate": 8.646027291887405e-06, "loss": 6.9359, "step": 31480 }, { "epoch": 0.7353997232617745, "grad_norm": 4.34375, "learning_rate": 8.631729780174033e-06, "loss": 6.9331, "step": 31490 }, { "epoch": 0.7356332576292759, "grad_norm": 5.3125, "learning_rate": 8.617441632733988e-06, "loss": 6.9951, "step": 31500 }, { "epoch": 0.7356332576292759, "eval_loss": 6.954710960388184, "eval_runtime": 78.6676, "eval_samples_per_second": 12.712, "eval_steps_per_second": 12.712, "step": 31500 }, { "epoch": 0.7358667919967772, "grad_norm": 4.71875, "learning_rate": 8.6031628577415e-06, "loss": 6.972, "step": 31510 }, { "epoch": 0.7361003263642786, "grad_norm": 4.40625, "learning_rate": 8.588893463365438e-06, "loss": 7.0019, "step": 31520 }, { "epoch": 0.73633386073178, "grad_norm": 4.0625, "learning_rate": 8.574633457769313e-06, "loss": 6.9434, "step": 31530 }, { "epoch": 0.7365673950992813, "grad_norm": 4.15625, "learning_rate": 8.560382849111266e-06, "loss": 6.9661, "step": 31540 }, { "epoch": 0.7368009294667827, "grad_norm": 4.09375, "learning_rate": 8.546141645544062e-06, "loss": 6.9446, "step": 31550 }, { "epoch": 0.7370344638342841, "grad_norm": 6.125, "learning_rate": 8.531909855215067e-06, "loss": 7.0183, "step": 31560 }, { "epoch": 0.7372679982017853, "grad_norm": 4.65625, "learning_rate": 8.517687486266296e-06, "loss": 6.9931, "step": 31570 }, { "epoch": 0.7375015325692867, "grad_norm": 4.125, "learning_rate": 8.503474546834342e-06, "loss": 6.9669, "step": 31580 }, { "epoch": 0.737735066936788, "grad_norm": 4.28125, "learning_rate": 8.489271045050407e-06, "loss": 7.0131, "step": 31590 }, { "epoch": 0.7379686013042894, "grad_norm": 5.3125, "learning_rate": 8.475076989040312e-06, "loss": 6.9254, "step": 31600 }, { "epoch": 0.7382021356717908, "grad_norm": 3.734375, "learning_rate": 8.460892386924463e-06, "loss": 6.9543, "step": 31610 }, { "epoch": 0.7384356700392921, "grad_norm": 4.8125, "learning_rate": 8.446717246817867e-06, "loss": 6.9142, "step": 31620 }, { "epoch": 0.7386692044067935, "grad_norm": 5.0625, "learning_rate": 8.432551576830097e-06, "loss": 6.9087, "step": 31630 }, { "epoch": 0.7389027387742949, "grad_norm": 4.875, "learning_rate": 8.41839538506533e-06, "loss": 6.9673, "step": 31640 }, { "epoch": 0.7391362731417962, "grad_norm": 4.3125, "learning_rate": 8.4042486796223e-06, "loss": 7.0154, "step": 31650 }, { "epoch": 0.7393698075092976, "grad_norm": 3.953125, "learning_rate": 8.39011146859433e-06, "loss": 6.9997, "step": 31660 }, { "epoch": 0.739603341876799, "grad_norm": 4.65625, "learning_rate": 8.375983760069319e-06, "loss": 6.9201, "step": 31670 }, { "epoch": 0.7398368762443003, "grad_norm": 5.15625, "learning_rate": 8.361865562129695e-06, "loss": 6.9537, "step": 31680 }, { "epoch": 0.7400704106118017, "grad_norm": 4.5, "learning_rate": 8.34775688285249e-06, "loss": 6.9803, "step": 31690 }, { "epoch": 0.740303944979303, "grad_norm": 4.46875, "learning_rate": 8.33365773030924e-06, "loss": 7.03, "step": 31700 }, { "epoch": 0.7405374793468044, "grad_norm": 5.53125, "learning_rate": 8.319568112566086e-06, "loss": 6.9283, "step": 31710 }, { "epoch": 0.7407710137143058, "grad_norm": 3.859375, "learning_rate": 8.305488037683665e-06, "loss": 6.932, "step": 31720 }, { "epoch": 0.741004548081807, "grad_norm": 5.40625, "learning_rate": 8.291417513717181e-06, "loss": 6.9337, "step": 31730 }, { "epoch": 0.7412380824493084, "grad_norm": 4.15625, "learning_rate": 8.277356548716384e-06, "loss": 6.9896, "step": 31740 }, { "epoch": 0.7414716168168098, "grad_norm": 4.78125, "learning_rate": 8.263305150725515e-06, "loss": 6.9882, "step": 31750 }, { "epoch": 0.7417051511843111, "grad_norm": 4.3125, "learning_rate": 8.249263327783388e-06, "loss": 6.9666, "step": 31760 }, { "epoch": 0.7419386855518125, "grad_norm": 5.125, "learning_rate": 8.235231087923304e-06, "loss": 7.0226, "step": 31770 }, { "epoch": 0.7421722199193139, "grad_norm": 3.875, "learning_rate": 8.221208439173097e-06, "loss": 6.9952, "step": 31780 }, { "epoch": 0.7424057542868152, "grad_norm": 6.53125, "learning_rate": 8.207195389555128e-06, "loss": 6.9548, "step": 31790 }, { "epoch": 0.7426392886543166, "grad_norm": 3.65625, "learning_rate": 8.193191947086226e-06, "loss": 6.9316, "step": 31800 }, { "epoch": 0.7428728230218179, "grad_norm": 3.984375, "learning_rate": 8.17919811977777e-06, "loss": 6.9666, "step": 31810 }, { "epoch": 0.7431063573893193, "grad_norm": 6.96875, "learning_rate": 8.1652139156356e-06, "loss": 6.963, "step": 31820 }, { "epoch": 0.7433398917568207, "grad_norm": 4.59375, "learning_rate": 8.151239342660083e-06, "loss": 6.9217, "step": 31830 }, { "epoch": 0.743573426124322, "grad_norm": 3.9375, "learning_rate": 8.137274408846044e-06, "loss": 6.9757, "step": 31840 }, { "epoch": 0.7438069604918234, "grad_norm": 4.375, "learning_rate": 8.123319122182815e-06, "loss": 6.9579, "step": 31850 }, { "epoch": 0.7440404948593248, "grad_norm": 5.5, "learning_rate": 8.109373490654218e-06, "loss": 6.971, "step": 31860 }, { "epoch": 0.7442740292268261, "grad_norm": 6.1875, "learning_rate": 8.095437522238514e-06, "loss": 6.9058, "step": 31870 }, { "epoch": 0.7445075635943275, "grad_norm": 5.40625, "learning_rate": 8.081511224908478e-06, "loss": 7.0105, "step": 31880 }, { "epoch": 0.7447410979618289, "grad_norm": 3.609375, "learning_rate": 8.067594606631315e-06, "loss": 6.9915, "step": 31890 }, { "epoch": 0.7449746323293301, "grad_norm": 4.46875, "learning_rate": 8.053687675368718e-06, "loss": 6.9716, "step": 31900 }, { "epoch": 0.7452081666968315, "grad_norm": 5.1875, "learning_rate": 8.039790439076833e-06, "loss": 6.9138, "step": 31910 }, { "epoch": 0.7454417010643328, "grad_norm": 3.734375, "learning_rate": 8.025902905706265e-06, "loss": 6.9523, "step": 31920 }, { "epoch": 0.7456752354318342, "grad_norm": 4.59375, "learning_rate": 8.012025083202048e-06, "loss": 6.9671, "step": 31930 }, { "epoch": 0.7459087697993356, "grad_norm": 3.515625, "learning_rate": 7.998156979503671e-06, "loss": 6.9403, "step": 31940 }, { "epoch": 0.7461423041668369, "grad_norm": 4.53125, "learning_rate": 7.984298602545073e-06, "loss": 6.9256, "step": 31950 }, { "epoch": 0.7463758385343383, "grad_norm": 3.609375, "learning_rate": 7.970449960254606e-06, "loss": 6.9441, "step": 31960 }, { "epoch": 0.7466093729018397, "grad_norm": 4.90625, "learning_rate": 7.956611060555074e-06, "loss": 6.9654, "step": 31970 }, { "epoch": 0.746842907269341, "grad_norm": 5.0, "learning_rate": 7.942781911363703e-06, "loss": 6.9696, "step": 31980 }, { "epoch": 0.7470764416368424, "grad_norm": 5.4375, "learning_rate": 7.928962520592146e-06, "loss": 6.898, "step": 31990 }, { "epoch": 0.7473099760043438, "grad_norm": 3.28125, "learning_rate": 7.915152896146449e-06, "loss": 6.9451, "step": 32000 }, { "epoch": 0.7473099760043438, "eval_loss": 6.954288005828857, "eval_runtime": 78.5885, "eval_samples_per_second": 12.725, "eval_steps_per_second": 12.725, "step": 32000 }, { "epoch": 0.7475435103718451, "grad_norm": 4.4375, "learning_rate": 7.901353045927082e-06, "loss": 6.9556, "step": 32010 }, { "epoch": 0.7477770447393465, "grad_norm": 3.65625, "learning_rate": 7.887562977828939e-06, "loss": 6.9862, "step": 32020 }, { "epoch": 0.7480105791068478, "grad_norm": 5.25, "learning_rate": 7.873782699741302e-06, "loss": 6.9427, "step": 32030 }, { "epoch": 0.7482441134743492, "grad_norm": 3.578125, "learning_rate": 7.860012219547866e-06, "loss": 6.8241, "step": 32040 }, { "epoch": 0.7484776478418506, "grad_norm": 4.625, "learning_rate": 7.846251545126695e-06, "loss": 6.9839, "step": 32050 }, { "epoch": 0.7487111822093518, "grad_norm": 4.9375, "learning_rate": 7.832500684350271e-06, "loss": 6.9451, "step": 32060 }, { "epoch": 0.7489447165768532, "grad_norm": 5.875, "learning_rate": 7.818759645085449e-06, "loss": 6.9774, "step": 32070 }, { "epoch": 0.7491782509443546, "grad_norm": 5.09375, "learning_rate": 7.805028435193445e-06, "loss": 6.9182, "step": 32080 }, { "epoch": 0.7494117853118559, "grad_norm": 4.84375, "learning_rate": 7.791307062529907e-06, "loss": 6.9436, "step": 32090 }, { "epoch": 0.7496453196793573, "grad_norm": 4.84375, "learning_rate": 7.777595534944792e-06, "loss": 6.818, "step": 32100 }, { "epoch": 0.7498788540468587, "grad_norm": 4.5625, "learning_rate": 7.763893860282473e-06, "loss": 6.9603, "step": 32110 }, { "epoch": 0.75011238841436, "grad_norm": 5.15625, "learning_rate": 7.750202046381647e-06, "loss": 6.9718, "step": 32120 }, { "epoch": 0.7503459227818614, "grad_norm": 4.875, "learning_rate": 7.736520101075409e-06, "loss": 6.9516, "step": 32130 }, { "epoch": 0.7505794571493627, "grad_norm": 5.125, "learning_rate": 7.722848032191172e-06, "loss": 6.9427, "step": 32140 }, { "epoch": 0.7508129915168641, "grad_norm": 4.59375, "learning_rate": 7.709185847550718e-06, "loss": 6.9528, "step": 32150 }, { "epoch": 0.7510465258843655, "grad_norm": 4.1875, "learning_rate": 7.695533554970183e-06, "loss": 6.9974, "step": 32160 }, { "epoch": 0.7512800602518668, "grad_norm": 5.53125, "learning_rate": 7.681891162260015e-06, "loss": 6.9466, "step": 32170 }, { "epoch": 0.7515135946193682, "grad_norm": 4.375, "learning_rate": 7.668258677225033e-06, "loss": 7.0201, "step": 32180 }, { "epoch": 0.7517471289868696, "grad_norm": 5.25, "learning_rate": 7.65463610766435e-06, "loss": 6.9217, "step": 32190 }, { "epoch": 0.7519806633543709, "grad_norm": 3.921875, "learning_rate": 7.641023461371438e-06, "loss": 6.9796, "step": 32200 }, { "epoch": 0.7522141977218723, "grad_norm": 4.71875, "learning_rate": 7.627420746134078e-06, "loss": 6.9877, "step": 32210 }, { "epoch": 0.7524477320893737, "grad_norm": 4.25, "learning_rate": 7.61382796973438e-06, "loss": 6.9659, "step": 32220 }, { "epoch": 0.7526812664568749, "grad_norm": 4.25, "learning_rate": 7.600245139948753e-06, "loss": 6.9774, "step": 32230 }, { "epoch": 0.7529148008243763, "grad_norm": 4.09375, "learning_rate": 7.586672264547912e-06, "loss": 6.9206, "step": 32240 }, { "epoch": 0.7531483351918776, "grad_norm": 4.125, "learning_rate": 7.5731093512969055e-06, "loss": 6.9582, "step": 32250 }, { "epoch": 0.753381869559379, "grad_norm": 3.59375, "learning_rate": 7.559556407955049e-06, "loss": 7.0013, "step": 32260 }, { "epoch": 0.7536154039268804, "grad_norm": 4.34375, "learning_rate": 7.546013442275973e-06, "loss": 6.9736, "step": 32270 }, { "epoch": 0.7538489382943817, "grad_norm": 5.75, "learning_rate": 7.532480462007605e-06, "loss": 6.9495, "step": 32280 }, { "epoch": 0.7540824726618831, "grad_norm": 4.0625, "learning_rate": 7.518957474892149e-06, "loss": 6.9536, "step": 32290 }, { "epoch": 0.7543160070293845, "grad_norm": 5.1875, "learning_rate": 7.505444488666094e-06, "loss": 6.9106, "step": 32300 }, { "epoch": 0.7545495413968858, "grad_norm": 4.09375, "learning_rate": 7.491941511060199e-06, "loss": 6.9719, "step": 32310 }, { "epoch": 0.7547830757643872, "grad_norm": 3.828125, "learning_rate": 7.47844854979951e-06, "loss": 6.9913, "step": 32320 }, { "epoch": 0.7550166101318886, "grad_norm": 4.46875, "learning_rate": 7.4649656126033414e-06, "loss": 6.9421, "step": 32330 }, { "epoch": 0.7552501444993899, "grad_norm": 4.875, "learning_rate": 7.451492707185279e-06, "loss": 6.9733, "step": 32340 }, { "epoch": 0.7554836788668913, "grad_norm": 4.625, "learning_rate": 7.438029841253142e-06, "loss": 6.9547, "step": 32350 }, { "epoch": 0.7557172132343926, "grad_norm": 4.28125, "learning_rate": 7.424577022509041e-06, "loss": 6.9118, "step": 32360 }, { "epoch": 0.755950747601894, "grad_norm": 4.9375, "learning_rate": 7.411134258649313e-06, "loss": 6.9152, "step": 32370 }, { "epoch": 0.7561842819693954, "grad_norm": 4.96875, "learning_rate": 7.397701557364548e-06, "loss": 6.8755, "step": 32380 }, { "epoch": 0.7564178163368966, "grad_norm": 3.8125, "learning_rate": 7.3842789263395854e-06, "loss": 6.9373, "step": 32390 }, { "epoch": 0.756651350704398, "grad_norm": 4.21875, "learning_rate": 7.370866373253504e-06, "loss": 6.9393, "step": 32400 }, { "epoch": 0.7568848850718994, "grad_norm": 5.1875, "learning_rate": 7.357463905779621e-06, "loss": 6.9642, "step": 32410 }, { "epoch": 0.7571184194394007, "grad_norm": 6.03125, "learning_rate": 7.344071531585461e-06, "loss": 6.9081, "step": 32420 }, { "epoch": 0.7573519538069021, "grad_norm": 3.609375, "learning_rate": 7.330689258332804e-06, "loss": 7.0421, "step": 32430 }, { "epoch": 0.7575854881744035, "grad_norm": 4.34375, "learning_rate": 7.317317093677622e-06, "loss": 6.9922, "step": 32440 }, { "epoch": 0.7578190225419048, "grad_norm": 4.875, "learning_rate": 7.303955045270131e-06, "loss": 6.9146, "step": 32450 }, { "epoch": 0.7580525569094062, "grad_norm": 4.4375, "learning_rate": 7.290603120754749e-06, "loss": 6.9266, "step": 32460 }, { "epoch": 0.7582860912769075, "grad_norm": 5.40625, "learning_rate": 7.277261327770088e-06, "loss": 7.0116, "step": 32470 }, { "epoch": 0.7585196256444089, "grad_norm": 6.03125, "learning_rate": 7.263929673948991e-06, "loss": 6.9232, "step": 32480 }, { "epoch": 0.7587531600119103, "grad_norm": 4.375, "learning_rate": 7.250608166918471e-06, "loss": 6.9536, "step": 32490 }, { "epoch": 0.7589866943794116, "grad_norm": 4.40625, "learning_rate": 7.237296814299768e-06, "loss": 6.9173, "step": 32500 }, { "epoch": 0.7589866943794116, "eval_loss": 6.953794956207275, "eval_runtime": 78.6126, "eval_samples_per_second": 12.721, "eval_steps_per_second": 12.721, "step": 32500 }, { "epoch": 0.759220228746913, "grad_norm": 4.96875, "learning_rate": 7.223995623708274e-06, "loss": 6.901, "step": 32510 }, { "epoch": 0.7594537631144144, "grad_norm": 4.40625, "learning_rate": 7.210704602753602e-06, "loss": 6.9734, "step": 32520 }, { "epoch": 0.7596872974819157, "grad_norm": 4.9375, "learning_rate": 7.19742375903954e-06, "loss": 6.9079, "step": 32530 }, { "epoch": 0.7599208318494171, "grad_norm": 4.1875, "learning_rate": 7.184153100164029e-06, "loss": 6.9907, "step": 32540 }, { "epoch": 0.7601543662169183, "grad_norm": 4.90625, "learning_rate": 7.17089263371922e-06, "loss": 6.9363, "step": 32550 }, { "epoch": 0.7603879005844197, "grad_norm": 5.1875, "learning_rate": 7.1576423672914004e-06, "loss": 6.9024, "step": 32560 }, { "epoch": 0.7606214349519211, "grad_norm": 4.0, "learning_rate": 7.14440230846104e-06, "loss": 6.9167, "step": 32570 }, { "epoch": 0.7608549693194224, "grad_norm": 4.53125, "learning_rate": 7.131172464802774e-06, "loss": 6.9156, "step": 32580 }, { "epoch": 0.7610885036869238, "grad_norm": 5.1875, "learning_rate": 7.117952843885373e-06, "loss": 6.909, "step": 32590 }, { "epoch": 0.7613220380544252, "grad_norm": 4.8125, "learning_rate": 7.1047434532717814e-06, "loss": 6.922, "step": 32600 }, { "epoch": 0.7615555724219265, "grad_norm": 4.0625, "learning_rate": 7.091544300519065e-06, "loss": 7.011, "step": 32610 }, { "epoch": 0.7617891067894279, "grad_norm": 4.5, "learning_rate": 7.078355393178465e-06, "loss": 6.9836, "step": 32620 }, { "epoch": 0.7620226411569293, "grad_norm": 4.625, "learning_rate": 7.065176738795329e-06, "loss": 6.9259, "step": 32630 }, { "epoch": 0.7622561755244306, "grad_norm": 3.9375, "learning_rate": 7.052008344909159e-06, "loss": 6.9043, "step": 32640 }, { "epoch": 0.762489709891932, "grad_norm": 4.15625, "learning_rate": 7.038850219053591e-06, "loss": 6.9319, "step": 32650 }, { "epoch": 0.7627232442594333, "grad_norm": 5.6875, "learning_rate": 7.025702368756365e-06, "loss": 6.921, "step": 32660 }, { "epoch": 0.7629567786269347, "grad_norm": 3.96875, "learning_rate": 7.0125648015393644e-06, "loss": 7.0246, "step": 32670 }, { "epoch": 0.7631903129944361, "grad_norm": 4.28125, "learning_rate": 6.99943752491857e-06, "loss": 7.0234, "step": 32680 }, { "epoch": 0.7634238473619374, "grad_norm": 5.75, "learning_rate": 6.986320546404091e-06, "loss": 6.8901, "step": 32690 }, { "epoch": 0.7636573817294388, "grad_norm": 4.09375, "learning_rate": 6.973213873500137e-06, "loss": 6.9451, "step": 32700 }, { "epoch": 0.7638909160969402, "grad_norm": 3.640625, "learning_rate": 6.960117513705036e-06, "loss": 6.9816, "step": 32710 }, { "epoch": 0.7641244504644414, "grad_norm": 5.53125, "learning_rate": 6.947031474511196e-06, "loss": 6.9192, "step": 32720 }, { "epoch": 0.7643579848319428, "grad_norm": 4.34375, "learning_rate": 6.933955763405117e-06, "loss": 7.0752, "step": 32730 }, { "epoch": 0.7645915191994442, "grad_norm": 4.71875, "learning_rate": 6.920890387867424e-06, "loss": 7.0676, "step": 32740 }, { "epoch": 0.7648250535669455, "grad_norm": 4.21875, "learning_rate": 6.907835355372786e-06, "loss": 6.9149, "step": 32750 }, { "epoch": 0.7650585879344469, "grad_norm": 4.59375, "learning_rate": 6.894790673389987e-06, "loss": 6.967, "step": 32760 }, { "epoch": 0.7652921223019482, "grad_norm": 4.3125, "learning_rate": 6.881756349381876e-06, "loss": 6.9017, "step": 32770 }, { "epoch": 0.7655256566694496, "grad_norm": 4.71875, "learning_rate": 6.868732390805388e-06, "loss": 6.9648, "step": 32780 }, { "epoch": 0.765759191036951, "grad_norm": 4.09375, "learning_rate": 6.855718805111511e-06, "loss": 6.9437, "step": 32790 }, { "epoch": 0.7659927254044523, "grad_norm": 3.84375, "learning_rate": 6.842715599745298e-06, "loss": 6.884, "step": 32800 }, { "epoch": 0.7662262597719537, "grad_norm": 5.0, "learning_rate": 6.829722782145878e-06, "loss": 6.9469, "step": 32810 }, { "epoch": 0.7664597941394551, "grad_norm": 4.25, "learning_rate": 6.816740359746429e-06, "loss": 6.9303, "step": 32820 }, { "epoch": 0.7666933285069564, "grad_norm": 5.78125, "learning_rate": 6.8037683399741984e-06, "loss": 6.884, "step": 32830 }, { "epoch": 0.7669268628744578, "grad_norm": 4.78125, "learning_rate": 6.7908067302504455e-06, "loss": 6.994, "step": 32840 }, { "epoch": 0.7671603972419592, "grad_norm": 5.15625, "learning_rate": 6.777855537990516e-06, "loss": 6.9854, "step": 32850 }, { "epoch": 0.7673939316094605, "grad_norm": 4.65625, "learning_rate": 6.7649147706037645e-06, "loss": 6.971, "step": 32860 }, { "epoch": 0.7676274659769619, "grad_norm": 3.703125, "learning_rate": 6.751984435493586e-06, "loss": 6.8072, "step": 32870 }, { "epoch": 0.7678610003444631, "grad_norm": 5.5625, "learning_rate": 6.739064540057424e-06, "loss": 6.9919, "step": 32880 }, { "epoch": 0.7680945347119645, "grad_norm": 5.875, "learning_rate": 6.7261550916867395e-06, "loss": 6.9511, "step": 32890 }, { "epoch": 0.7683280690794659, "grad_norm": 4.28125, "learning_rate": 6.7132560977670206e-06, "loss": 6.9525, "step": 32900 }, { "epoch": 0.7685616034469672, "grad_norm": 4.625, "learning_rate": 6.7003675656777596e-06, "loss": 7.0069, "step": 32910 }, { "epoch": 0.7687951378144686, "grad_norm": 4.90625, "learning_rate": 6.687489502792485e-06, "loss": 7.035, "step": 32920 }, { "epoch": 0.76902867218197, "grad_norm": 5.0, "learning_rate": 6.674621916478716e-06, "loss": 6.8871, "step": 32930 }, { "epoch": 0.7692622065494713, "grad_norm": 4.5625, "learning_rate": 6.661764814097992e-06, "loss": 6.9754, "step": 32940 }, { "epoch": 0.7694957409169727, "grad_norm": 4.875, "learning_rate": 6.648918203005858e-06, "loss": 6.9591, "step": 32950 }, { "epoch": 0.7697292752844741, "grad_norm": 4.46875, "learning_rate": 6.636082090551834e-06, "loss": 7.0055, "step": 32960 }, { "epoch": 0.7699628096519754, "grad_norm": 4.28125, "learning_rate": 6.623256484079468e-06, "loss": 6.971, "step": 32970 }, { "epoch": 0.7701963440194768, "grad_norm": 5.28125, "learning_rate": 6.610441390926256e-06, "loss": 7.0247, "step": 32980 }, { "epoch": 0.7704298783869781, "grad_norm": 4.34375, "learning_rate": 6.597636818423722e-06, "loss": 6.9595, "step": 32990 }, { "epoch": 0.7706634127544795, "grad_norm": 3.796875, "learning_rate": 6.5848427738973265e-06, "loss": 6.9434, "step": 33000 }, { "epoch": 0.7706634127544795, "eval_loss": 6.9532599449157715, "eval_runtime": 78.1252, "eval_samples_per_second": 12.8, "eval_steps_per_second": 12.8, "step": 33000 }, { "epoch": 0.7708969471219809, "grad_norm": 4.96875, "learning_rate": 6.572059264666564e-06, "loss": 6.99, "step": 33010 }, { "epoch": 0.7711304814894822, "grad_norm": 4.78125, "learning_rate": 6.5592862980448505e-06, "loss": 7.0177, "step": 33020 }, { "epoch": 0.7713640158569836, "grad_norm": 5.5625, "learning_rate": 6.546523881339584e-06, "loss": 6.9452, "step": 33030 }, { "epoch": 0.771597550224485, "grad_norm": 4.875, "learning_rate": 6.533772021852144e-06, "loss": 6.9845, "step": 33040 }, { "epoch": 0.7718310845919862, "grad_norm": 4.59375, "learning_rate": 6.5210307268778495e-06, "loss": 7.0002, "step": 33050 }, { "epoch": 0.7720646189594876, "grad_norm": 4.5625, "learning_rate": 6.508300003705986e-06, "loss": 6.9783, "step": 33060 }, { "epoch": 0.772298153326989, "grad_norm": 4.5625, "learning_rate": 6.495579859619791e-06, "loss": 6.9639, "step": 33070 }, { "epoch": 0.7725316876944903, "grad_norm": 3.5625, "learning_rate": 6.482870301896457e-06, "loss": 6.9465, "step": 33080 }, { "epoch": 0.7727652220619917, "grad_norm": 5.03125, "learning_rate": 6.470171337807101e-06, "loss": 6.8545, "step": 33090 }, { "epoch": 0.772998756429493, "grad_norm": 6.3125, "learning_rate": 6.4574829746167855e-06, "loss": 6.9498, "step": 33100 }, { "epoch": 0.7732322907969944, "grad_norm": 6.0, "learning_rate": 6.444805219584524e-06, "loss": 6.9211, "step": 33110 }, { "epoch": 0.7734658251644958, "grad_norm": 6.15625, "learning_rate": 6.432138079963229e-06, "loss": 6.9787, "step": 33120 }, { "epoch": 0.7736993595319971, "grad_norm": 4.78125, "learning_rate": 6.419481562999788e-06, "loss": 6.9412, "step": 33130 }, { "epoch": 0.7739328938994985, "grad_norm": 4.78125, "learning_rate": 6.406835675934961e-06, "loss": 6.9522, "step": 33140 }, { "epoch": 0.7741664282669999, "grad_norm": 4.4375, "learning_rate": 6.394200426003466e-06, "loss": 7.0111, "step": 33150 }, { "epoch": 0.7743999626345012, "grad_norm": 4.21875, "learning_rate": 6.38157582043391e-06, "loss": 6.9913, "step": 33160 }, { "epoch": 0.7746334970020026, "grad_norm": 3.96875, "learning_rate": 6.368961866448814e-06, "loss": 6.9758, "step": 33170 }, { "epoch": 0.774867031369504, "grad_norm": 4.25, "learning_rate": 6.356358571264614e-06, "loss": 6.9591, "step": 33180 }, { "epoch": 0.7751005657370053, "grad_norm": 3.53125, "learning_rate": 6.343765942091645e-06, "loss": 6.9724, "step": 33190 }, { "epoch": 0.7753341001045067, "grad_norm": 5.65625, "learning_rate": 6.331183986134149e-06, "loss": 6.9348, "step": 33200 }, { "epoch": 0.7755676344720079, "grad_norm": 4.0, "learning_rate": 6.318612710590235e-06, "loss": 6.9552, "step": 33210 }, { "epoch": 0.7758011688395093, "grad_norm": 4.65625, "learning_rate": 6.306052122651934e-06, "loss": 7.0064, "step": 33220 }, { "epoch": 0.7760347032070107, "grad_norm": 5.5, "learning_rate": 6.293502229505135e-06, "loss": 6.8934, "step": 33230 }, { "epoch": 0.776268237574512, "grad_norm": 4.28125, "learning_rate": 6.280963038329627e-06, "loss": 6.9287, "step": 33240 }, { "epoch": 0.7765017719420134, "grad_norm": 4.75, "learning_rate": 6.268434556299077e-06, "loss": 6.9389, "step": 33250 }, { "epoch": 0.7767353063095148, "grad_norm": 5.09375, "learning_rate": 6.255916790581004e-06, "loss": 6.9087, "step": 33260 }, { "epoch": 0.7769688406770161, "grad_norm": 4.625, "learning_rate": 6.243409748336826e-06, "loss": 6.9847, "step": 33270 }, { "epoch": 0.7772023750445175, "grad_norm": 4.65625, "learning_rate": 6.2309134367218e-06, "loss": 6.9187, "step": 33280 }, { "epoch": 0.7774359094120189, "grad_norm": 4.34375, "learning_rate": 6.218427862885065e-06, "loss": 6.9966, "step": 33290 }, { "epoch": 0.7776694437795202, "grad_norm": 4.90625, "learning_rate": 6.205953033969594e-06, "loss": 6.9047, "step": 33300 }, { "epoch": 0.7779029781470216, "grad_norm": 4.90625, "learning_rate": 6.193488957112232e-06, "loss": 6.9155, "step": 33310 }, { "epoch": 0.7781365125145229, "grad_norm": 5.09375, "learning_rate": 6.1810356394436775e-06, "loss": 6.9834, "step": 33320 }, { "epoch": 0.7783700468820243, "grad_norm": 4.0625, "learning_rate": 6.1685930880884455e-06, "loss": 6.9755, "step": 33330 }, { "epoch": 0.7786035812495257, "grad_norm": 5.21875, "learning_rate": 6.156161310164923e-06, "loss": 7.0141, "step": 33340 }, { "epoch": 0.778837115617027, "grad_norm": 3.609375, "learning_rate": 6.143740312785307e-06, "loss": 6.9964, "step": 33350 }, { "epoch": 0.7790706499845284, "grad_norm": 4.1875, "learning_rate": 6.131330103055646e-06, "loss": 6.9492, "step": 33360 }, { "epoch": 0.7793041843520297, "grad_norm": 4.5, "learning_rate": 6.1189306880758166e-06, "loss": 6.9985, "step": 33370 }, { "epoch": 0.779537718719531, "grad_norm": 4.9375, "learning_rate": 6.1065420749395e-06, "loss": 6.9026, "step": 33380 }, { "epoch": 0.7797712530870324, "grad_norm": 5.1875, "learning_rate": 6.094164270734226e-06, "loss": 7.0094, "step": 33390 }, { "epoch": 0.7800047874545338, "grad_norm": 5.1875, "learning_rate": 6.08179728254131e-06, "loss": 6.9597, "step": 33400 }, { "epoch": 0.7802383218220351, "grad_norm": 4.75, "learning_rate": 6.069441117435909e-06, "loss": 6.9425, "step": 33410 }, { "epoch": 0.7804718561895365, "grad_norm": 4.5, "learning_rate": 6.057095782486963e-06, "loss": 6.9691, "step": 33420 }, { "epoch": 0.7807053905570378, "grad_norm": 4.78125, "learning_rate": 6.044761284757233e-06, "loss": 6.9054, "step": 33430 }, { "epoch": 0.7809389249245392, "grad_norm": 4.8125, "learning_rate": 6.032437631303281e-06, "loss": 6.968, "step": 33440 }, { "epoch": 0.7811724592920406, "grad_norm": 6.0, "learning_rate": 6.020124829175445e-06, "loss": 6.9138, "step": 33450 }, { "epoch": 0.7814059936595419, "grad_norm": 5.03125, "learning_rate": 6.007822885417882e-06, "loss": 7.0122, "step": 33460 }, { "epoch": 0.7816395280270433, "grad_norm": 5.15625, "learning_rate": 5.995531807068511e-06, "loss": 6.8842, "step": 33470 }, { "epoch": 0.7818730623945447, "grad_norm": 4.28125, "learning_rate": 5.983251601159051e-06, "loss": 6.9954, "step": 33480 }, { "epoch": 0.782106596762046, "grad_norm": 4.25, "learning_rate": 5.970982274715001e-06, "loss": 6.9428, "step": 33490 }, { "epoch": 0.7823401311295474, "grad_norm": 5.96875, "learning_rate": 5.958723834755639e-06, "loss": 6.9885, "step": 33500 }, { "epoch": 0.7823401311295474, "eval_loss": 6.952946186065674, "eval_runtime": 78.6044, "eval_samples_per_second": 12.722, "eval_steps_per_second": 12.722, "step": 33500 }, { "epoch": 0.7825736654970488, "grad_norm": 4.1875, "learning_rate": 5.946476288294001e-06, "loss": 6.9591, "step": 33510 }, { "epoch": 0.78280719986455, "grad_norm": 4.09375, "learning_rate": 5.9342396423368865e-06, "loss": 6.9588, "step": 33520 }, { "epoch": 0.7830407342320514, "grad_norm": 4.09375, "learning_rate": 5.922013903884888e-06, "loss": 7.0072, "step": 33530 }, { "epoch": 0.7832742685995527, "grad_norm": 4.65625, "learning_rate": 5.9097990799323235e-06, "loss": 7.022, "step": 33540 }, { "epoch": 0.7835078029670541, "grad_norm": 4.46875, "learning_rate": 5.8975951774672935e-06, "loss": 6.966, "step": 33550 }, { "epoch": 0.7837413373345555, "grad_norm": 5.21875, "learning_rate": 5.885402203471637e-06, "loss": 6.9549, "step": 33560 }, { "epoch": 0.7839748717020568, "grad_norm": 5.0625, "learning_rate": 5.873220164920956e-06, "loss": 6.8287, "step": 33570 }, { "epoch": 0.7842084060695582, "grad_norm": 4.53125, "learning_rate": 5.861049068784572e-06, "loss": 6.967, "step": 33580 }, { "epoch": 0.7844419404370596, "grad_norm": 3.453125, "learning_rate": 5.848888922025553e-06, "loss": 7.0104, "step": 33590 }, { "epoch": 0.7846754748045609, "grad_norm": 3.875, "learning_rate": 5.8367397316007155e-06, "loss": 6.9676, "step": 33600 }, { "epoch": 0.7849090091720623, "grad_norm": 4.46875, "learning_rate": 5.8246015044606e-06, "loss": 6.9843, "step": 33610 }, { "epoch": 0.7851425435395636, "grad_norm": 4.09375, "learning_rate": 5.812474247549487e-06, "loss": 7.0206, "step": 33620 }, { "epoch": 0.785376077907065, "grad_norm": 4.03125, "learning_rate": 5.8003579678053505e-06, "loss": 6.9467, "step": 33630 }, { "epoch": 0.7856096122745664, "grad_norm": 4.9375, "learning_rate": 5.788252672159922e-06, "loss": 6.9605, "step": 33640 }, { "epoch": 0.7858431466420677, "grad_norm": 4.90625, "learning_rate": 5.77615836753862e-06, "loss": 7.0205, "step": 33650 }, { "epoch": 0.7860766810095691, "grad_norm": 3.84375, "learning_rate": 5.76407506086058e-06, "loss": 6.9682, "step": 33660 }, { "epoch": 0.7863102153770705, "grad_norm": 5.5, "learning_rate": 5.752002759038661e-06, "loss": 6.9419, "step": 33670 }, { "epoch": 0.7865437497445718, "grad_norm": 4.4375, "learning_rate": 5.739941468979412e-06, "loss": 6.9551, "step": 33680 }, { "epoch": 0.7867772841120731, "grad_norm": 5.1875, "learning_rate": 5.727891197583099e-06, "loss": 6.9595, "step": 33690 }, { "epoch": 0.7870108184795745, "grad_norm": 4.28125, "learning_rate": 5.715851951743653e-06, "loss": 6.9858, "step": 33700 }, { "epoch": 0.7872443528470758, "grad_norm": 5.28125, "learning_rate": 5.703823738348732e-06, "loss": 6.9271, "step": 33710 }, { "epoch": 0.7874778872145772, "grad_norm": 4.5, "learning_rate": 5.691806564279653e-06, "loss": 6.9999, "step": 33720 }, { "epoch": 0.7877114215820785, "grad_norm": 3.78125, "learning_rate": 5.679800436411439e-06, "loss": 6.9836, "step": 33730 }, { "epoch": 0.7879449559495799, "grad_norm": 4.3125, "learning_rate": 5.667805361612788e-06, "loss": 6.8489, "step": 33740 }, { "epoch": 0.7881784903170813, "grad_norm": 3.921875, "learning_rate": 5.655821346746065e-06, "loss": 6.9073, "step": 33750 }, { "epoch": 0.7884120246845826, "grad_norm": 4.4375, "learning_rate": 5.643848398667323e-06, "loss": 6.8657, "step": 33760 }, { "epoch": 0.788645559052084, "grad_norm": 4.9375, "learning_rate": 5.631886524226263e-06, "loss": 6.9353, "step": 33770 }, { "epoch": 0.7888790934195854, "grad_norm": 4.53125, "learning_rate": 5.619935730266273e-06, "loss": 6.9952, "step": 33780 }, { "epoch": 0.7891126277870867, "grad_norm": 4.84375, "learning_rate": 5.607996023624387e-06, "loss": 7.0297, "step": 33790 }, { "epoch": 0.7893461621545881, "grad_norm": 4.09375, "learning_rate": 5.5960674111313e-06, "loss": 6.9875, "step": 33800 }, { "epoch": 0.7895796965220895, "grad_norm": 5.09375, "learning_rate": 5.584149899611374e-06, "loss": 6.948, "step": 33810 }, { "epoch": 0.7898132308895908, "grad_norm": 5.375, "learning_rate": 5.572243495882587e-06, "loss": 6.9281, "step": 33820 }, { "epoch": 0.7900467652570922, "grad_norm": 5.0625, "learning_rate": 5.560348206756602e-06, "loss": 6.9364, "step": 33830 }, { "epoch": 0.7902802996245935, "grad_norm": 4.53125, "learning_rate": 5.5484640390386865e-06, "loss": 7.0355, "step": 33840 }, { "epoch": 0.7905138339920948, "grad_norm": 4.21875, "learning_rate": 5.536590999527769e-06, "loss": 6.9085, "step": 33850 }, { "epoch": 0.7907473683595962, "grad_norm": 4.28125, "learning_rate": 5.524729095016406e-06, "loss": 6.947, "step": 33860 }, { "epoch": 0.7909809027270975, "grad_norm": 4.09375, "learning_rate": 5.512878332290788e-06, "loss": 6.9565, "step": 33870 }, { "epoch": 0.7912144370945989, "grad_norm": 5.0625, "learning_rate": 5.501038718130725e-06, "loss": 6.976, "step": 33880 }, { "epoch": 0.7914479714621003, "grad_norm": 5.46875, "learning_rate": 5.489210259309638e-06, "loss": 6.8939, "step": 33890 }, { "epoch": 0.7916815058296016, "grad_norm": 3.703125, "learning_rate": 5.477392962594593e-06, "loss": 6.9832, "step": 33900 }, { "epoch": 0.791915040197103, "grad_norm": 3.65625, "learning_rate": 5.465586834746236e-06, "loss": 6.9431, "step": 33910 }, { "epoch": 0.7921485745646044, "grad_norm": 4.5, "learning_rate": 5.453791882518869e-06, "loss": 7.0045, "step": 33920 }, { "epoch": 0.7923821089321057, "grad_norm": 4.53125, "learning_rate": 5.442008112660355e-06, "loss": 6.9844, "step": 33930 }, { "epoch": 0.7926156432996071, "grad_norm": 4.9375, "learning_rate": 5.43023553191219e-06, "loss": 6.9258, "step": 33940 }, { "epoch": 0.7928491776671084, "grad_norm": 4.96875, "learning_rate": 5.4184741470094506e-06, "loss": 6.9149, "step": 33950 }, { "epoch": 0.7930827120346098, "grad_norm": 4.0, "learning_rate": 5.406723964680807e-06, "loss": 7.01, "step": 33960 }, { "epoch": 0.7933162464021112, "grad_norm": 4.96875, "learning_rate": 5.394984991648541e-06, "loss": 6.9168, "step": 33970 }, { "epoch": 0.7935497807696125, "grad_norm": 4.375, "learning_rate": 5.383257234628503e-06, "loss": 6.9924, "step": 33980 }, { "epoch": 0.7937833151371139, "grad_norm": 3.859375, "learning_rate": 5.371540700330141e-06, "loss": 6.9515, "step": 33990 }, { "epoch": 0.7940168495046153, "grad_norm": 4.875, "learning_rate": 5.359835395456464e-06, "loss": 6.9361, "step": 34000 }, { "epoch": 0.7940168495046153, "eval_loss": 6.952498435974121, "eval_runtime": 78.3017, "eval_samples_per_second": 12.771, "eval_steps_per_second": 12.771, "step": 34000 }, { "epoch": 0.7942503838721165, "grad_norm": 5.78125, "learning_rate": 5.348141326704076e-06, "loss": 6.9873, "step": 34010 }, { "epoch": 0.794483918239618, "grad_norm": 4.03125, "learning_rate": 5.336458500763136e-06, "loss": 6.9506, "step": 34020 }, { "epoch": 0.7947174526071193, "grad_norm": 5.71875, "learning_rate": 5.324786924317371e-06, "loss": 6.9701, "step": 34030 }, { "epoch": 0.7949509869746206, "grad_norm": 3.890625, "learning_rate": 5.313126604044103e-06, "loss": 6.9627, "step": 34040 }, { "epoch": 0.795184521342122, "grad_norm": 4.03125, "learning_rate": 5.301477546614172e-06, "loss": 6.9774, "step": 34050 }, { "epoch": 0.7954180557096233, "grad_norm": 4.0, "learning_rate": 5.289839758692006e-06, "loss": 6.9446, "step": 34060 }, { "epoch": 0.7956515900771247, "grad_norm": 4.4375, "learning_rate": 5.278213246935562e-06, "loss": 6.9448, "step": 34070 }, { "epoch": 0.7958851244446261, "grad_norm": 4.90625, "learning_rate": 5.266598017996369e-06, "loss": 6.959, "step": 34080 }, { "epoch": 0.7961186588121274, "grad_norm": 3.71875, "learning_rate": 5.254994078519476e-06, "loss": 6.9709, "step": 34090 }, { "epoch": 0.7963521931796288, "grad_norm": 6.03125, "learning_rate": 5.243401435143494e-06, "loss": 6.9574, "step": 34100 }, { "epoch": 0.7965857275471302, "grad_norm": 4.4375, "learning_rate": 5.231820094500573e-06, "loss": 6.9571, "step": 34110 }, { "epoch": 0.7968192619146315, "grad_norm": 4.625, "learning_rate": 5.22025006321637e-06, "loss": 6.9492, "step": 34120 }, { "epoch": 0.7970527962821329, "grad_norm": 5.0, "learning_rate": 5.208691347910108e-06, "loss": 6.918, "step": 34130 }, { "epoch": 0.7972863306496343, "grad_norm": 5.15625, "learning_rate": 5.1971439551945026e-06, "loss": 6.9763, "step": 34140 }, { "epoch": 0.7975198650171356, "grad_norm": 4.5625, "learning_rate": 5.1856078916758144e-06, "loss": 6.9914, "step": 34150 }, { "epoch": 0.797753399384637, "grad_norm": 4.875, "learning_rate": 5.174083163953822e-06, "loss": 6.9501, "step": 34160 }, { "epoch": 0.7979869337521382, "grad_norm": 4.6875, "learning_rate": 5.162569778621798e-06, "loss": 6.9491, "step": 34170 }, { "epoch": 0.7982204681196396, "grad_norm": 3.5, "learning_rate": 5.151067742266555e-06, "loss": 6.94, "step": 34180 }, { "epoch": 0.798454002487141, "grad_norm": 4.71875, "learning_rate": 5.139577061468382e-06, "loss": 6.9151, "step": 34190 }, { "epoch": 0.7986875368546423, "grad_norm": 4.09375, "learning_rate": 5.128097742801105e-06, "loss": 6.9568, "step": 34200 }, { "epoch": 0.7989210712221437, "grad_norm": 4.25, "learning_rate": 5.1166297928320175e-06, "loss": 6.855, "step": 34210 }, { "epoch": 0.7991546055896451, "grad_norm": 4.09375, "learning_rate": 5.105173218121928e-06, "loss": 7.0183, "step": 34220 }, { "epoch": 0.7993881399571464, "grad_norm": 4.03125, "learning_rate": 5.093728025225144e-06, "loss": 6.934, "step": 34230 }, { "epoch": 0.7996216743246478, "grad_norm": 4.6875, "learning_rate": 5.082294220689435e-06, "loss": 6.9467, "step": 34240 }, { "epoch": 0.7998552086921492, "grad_norm": 4.9375, "learning_rate": 5.070871811056082e-06, "loss": 6.9702, "step": 34250 }, { "epoch": 0.8000887430596505, "grad_norm": 4.375, "learning_rate": 5.059460802859828e-06, "loss": 6.9508, "step": 34260 }, { "epoch": 0.8003222774271519, "grad_norm": 6.1875, "learning_rate": 5.048061202628901e-06, "loss": 6.9084, "step": 34270 }, { "epoch": 0.8005558117946532, "grad_norm": 4.34375, "learning_rate": 5.036673016885007e-06, "loss": 6.9958, "step": 34280 }, { "epoch": 0.8007893461621546, "grad_norm": 4.59375, "learning_rate": 5.025296252143322e-06, "loss": 6.9619, "step": 34290 }, { "epoch": 0.801022880529656, "grad_norm": 4.40625, "learning_rate": 5.013930914912476e-06, "loss": 7.019, "step": 34300 }, { "epoch": 0.8012564148971573, "grad_norm": 5.71875, "learning_rate": 5.002577011694564e-06, "loss": 6.8562, "step": 34310 }, { "epoch": 0.8014899492646587, "grad_norm": 5.03125, "learning_rate": 4.991234548985158e-06, "loss": 6.9216, "step": 34320 }, { "epoch": 0.8017234836321601, "grad_norm": 4.625, "learning_rate": 4.979903533273253e-06, "loss": 6.9828, "step": 34330 }, { "epoch": 0.8019570179996613, "grad_norm": 3.625, "learning_rate": 4.9685839710413265e-06, "loss": 7.0088, "step": 34340 }, { "epoch": 0.8021905523671627, "grad_norm": 5.5625, "learning_rate": 4.9572758687652876e-06, "loss": 6.8895, "step": 34350 }, { "epoch": 0.8024240867346641, "grad_norm": 5.53125, "learning_rate": 4.9459792329145e-06, "loss": 6.9229, "step": 34360 }, { "epoch": 0.8026576211021654, "grad_norm": 4.6875, "learning_rate": 4.934694069951748e-06, "loss": 6.946, "step": 34370 }, { "epoch": 0.8028911554696668, "grad_norm": 5.6875, "learning_rate": 4.92342038633326e-06, "loss": 6.8253, "step": 34380 }, { "epoch": 0.8031246898371681, "grad_norm": 5.21875, "learning_rate": 4.912158188508709e-06, "loss": 7.0125, "step": 34390 }, { "epoch": 0.8033582242046695, "grad_norm": 4.28125, "learning_rate": 4.900907482921185e-06, "loss": 6.9611, "step": 34400 }, { "epoch": 0.8035917585721709, "grad_norm": 4.1875, "learning_rate": 4.889668276007217e-06, "loss": 6.9472, "step": 34410 }, { "epoch": 0.8038252929396722, "grad_norm": 5.0, "learning_rate": 4.8784405741967325e-06, "loss": 6.9783, "step": 34420 }, { "epoch": 0.8040588273071736, "grad_norm": 4.25, "learning_rate": 4.8672243839131e-06, "loss": 6.9369, "step": 34430 }, { "epoch": 0.804292361674675, "grad_norm": 4.46875, "learning_rate": 4.856019711573087e-06, "loss": 6.9329, "step": 34440 }, { "epoch": 0.8045258960421763, "grad_norm": 4.15625, "learning_rate": 4.8448265635868715e-06, "loss": 6.9399, "step": 34450 }, { "epoch": 0.8047594304096777, "grad_norm": 4.84375, "learning_rate": 4.833644946358051e-06, "loss": 7.0032, "step": 34460 }, { "epoch": 0.8049929647771791, "grad_norm": 4.9375, "learning_rate": 4.822474866283616e-06, "loss": 6.9667, "step": 34470 }, { "epoch": 0.8052264991446804, "grad_norm": 4.78125, "learning_rate": 4.8113163297539705e-06, "loss": 6.8596, "step": 34480 }, { "epoch": 0.8054600335121818, "grad_norm": 5.0625, "learning_rate": 4.800169343152888e-06, "loss": 6.9492, "step": 34490 }, { "epoch": 0.805693567879683, "grad_norm": 5.375, "learning_rate": 4.789033912857563e-06, "loss": 6.9141, "step": 34500 }, { "epoch": 0.805693567879683, "eval_loss": 6.952399253845215, "eval_runtime": 78.7878, "eval_samples_per_second": 12.692, "eval_steps_per_second": 12.692, "step": 34500 }, { "epoch": 0.8059271022471844, "grad_norm": 4.46875, "learning_rate": 4.777910045238551e-06, "loss": 6.9898, "step": 34510 }, { "epoch": 0.8061606366146858, "grad_norm": 5.21875, "learning_rate": 4.766797746659815e-06, "loss": 6.9286, "step": 34520 }, { "epoch": 0.8063941709821871, "grad_norm": 4.34375, "learning_rate": 4.7556970234787e-06, "loss": 6.981, "step": 34530 }, { "epoch": 0.8066277053496885, "grad_norm": 4.84375, "learning_rate": 4.7446078820459065e-06, "loss": 6.9585, "step": 34540 }, { "epoch": 0.8068612397171899, "grad_norm": 3.484375, "learning_rate": 4.733530328705535e-06, "loss": 7.0112, "step": 34550 }, { "epoch": 0.8070947740846912, "grad_norm": 4.0625, "learning_rate": 4.722464369795029e-06, "loss": 6.9107, "step": 34560 }, { "epoch": 0.8073283084521926, "grad_norm": 5.15625, "learning_rate": 4.711410011645231e-06, "loss": 7.0934, "step": 34570 }, { "epoch": 0.8075618428196939, "grad_norm": 5.0625, "learning_rate": 4.700367260580313e-06, "loss": 6.9786, "step": 34580 }, { "epoch": 0.8077953771871953, "grad_norm": 4.3125, "learning_rate": 4.6893361229178336e-06, "loss": 6.9166, "step": 34590 }, { "epoch": 0.8080289115546967, "grad_norm": 4.34375, "learning_rate": 4.678316604968702e-06, "loss": 6.9906, "step": 34600 }, { "epoch": 0.808262445922198, "grad_norm": 5.125, "learning_rate": 4.667308713037161e-06, "loss": 6.9335, "step": 34610 }, { "epoch": 0.8084959802896994, "grad_norm": 4.09375, "learning_rate": 4.6563124534208305e-06, "loss": 6.9338, "step": 34620 }, { "epoch": 0.8087295146572008, "grad_norm": 5.34375, "learning_rate": 4.645327832410648e-06, "loss": 7.03, "step": 34630 }, { "epoch": 0.8089630490247021, "grad_norm": 4.53125, "learning_rate": 4.63435485629091e-06, "loss": 6.9931, "step": 34640 }, { "epoch": 0.8091965833922035, "grad_norm": 5.46875, "learning_rate": 4.623393531339257e-06, "loss": 6.9719, "step": 34650 }, { "epoch": 0.8094301177597049, "grad_norm": 4.28125, "learning_rate": 4.612443863826638e-06, "loss": 6.8994, "step": 34660 }, { "epoch": 0.8096636521272061, "grad_norm": 5.1875, "learning_rate": 4.601505860017363e-06, "loss": 6.9454, "step": 34670 }, { "epoch": 0.8098971864947075, "grad_norm": 3.640625, "learning_rate": 4.590579526169039e-06, "loss": 6.9394, "step": 34680 }, { "epoch": 0.8101307208622088, "grad_norm": 5.625, "learning_rate": 4.579664868532627e-06, "loss": 7.0, "step": 34690 }, { "epoch": 0.8103642552297102, "grad_norm": 3.453125, "learning_rate": 4.56876189335238e-06, "loss": 6.9813, "step": 34700 }, { "epoch": 0.8105977895972116, "grad_norm": 3.734375, "learning_rate": 4.557870606865888e-06, "loss": 6.9571, "step": 34710 }, { "epoch": 0.8108313239647129, "grad_norm": 4.03125, "learning_rate": 4.5469910153040475e-06, "loss": 6.9535, "step": 34720 }, { "epoch": 0.8110648583322143, "grad_norm": 4.46875, "learning_rate": 4.536123124891068e-06, "loss": 6.9485, "step": 34730 }, { "epoch": 0.8112983926997157, "grad_norm": 5.21875, "learning_rate": 4.525266941844453e-06, "loss": 6.9358, "step": 34740 }, { "epoch": 0.811531927067217, "grad_norm": 4.125, "learning_rate": 4.5144224723750074e-06, "loss": 6.9893, "step": 34750 }, { "epoch": 0.8117654614347184, "grad_norm": 4.03125, "learning_rate": 4.5035897226868575e-06, "loss": 6.9752, "step": 34760 }, { "epoch": 0.8119989958022198, "grad_norm": 4.5625, "learning_rate": 4.492768698977401e-06, "loss": 6.9493, "step": 34770 }, { "epoch": 0.8122325301697211, "grad_norm": 4.5625, "learning_rate": 4.481959407437347e-06, "loss": 6.9779, "step": 34780 }, { "epoch": 0.8124660645372225, "grad_norm": 4.125, "learning_rate": 4.471161854250669e-06, "loss": 6.8718, "step": 34790 }, { "epoch": 0.8126995989047238, "grad_norm": 4.75, "learning_rate": 4.46037604559465e-06, "loss": 6.9718, "step": 34800 }, { "epoch": 0.8129331332722252, "grad_norm": 4.28125, "learning_rate": 4.449601987639837e-06, "loss": 6.8942, "step": 34810 }, { "epoch": 0.8131666676397266, "grad_norm": 6.46875, "learning_rate": 4.438839686550042e-06, "loss": 6.9759, "step": 34820 }, { "epoch": 0.8134002020072278, "grad_norm": 3.671875, "learning_rate": 4.4280891484824005e-06, "loss": 6.9878, "step": 34830 }, { "epoch": 0.8136337363747292, "grad_norm": 4.34375, "learning_rate": 4.41735037958726e-06, "loss": 6.9743, "step": 34840 }, { "epoch": 0.8138672707422306, "grad_norm": 5.5, "learning_rate": 4.406623386008279e-06, "loss": 6.9693, "step": 34850 }, { "epoch": 0.8141008051097319, "grad_norm": 4.65625, "learning_rate": 4.395908173882346e-06, "loss": 7.0015, "step": 34860 }, { "epoch": 0.8143343394772333, "grad_norm": 4.4375, "learning_rate": 4.385204749339638e-06, "loss": 6.9841, "step": 34870 }, { "epoch": 0.8145678738447347, "grad_norm": 3.75, "learning_rate": 4.374513118503562e-06, "loss": 6.9267, "step": 34880 }, { "epoch": 0.814801408212236, "grad_norm": 5.0, "learning_rate": 4.363833287490799e-06, "loss": 6.96, "step": 34890 }, { "epoch": 0.8150349425797374, "grad_norm": 4.375, "learning_rate": 4.353165262411274e-06, "loss": 6.9394, "step": 34900 }, { "epoch": 0.8152684769472387, "grad_norm": 4.25, "learning_rate": 4.3425090493681455e-06, "loss": 6.9784, "step": 34910 }, { "epoch": 0.8155020113147401, "grad_norm": 4.21875, "learning_rate": 4.331864654457835e-06, "loss": 6.8494, "step": 34920 }, { "epoch": 0.8157355456822415, "grad_norm": 5.78125, "learning_rate": 4.321232083769977e-06, "loss": 6.9357, "step": 34930 }, { "epoch": 0.8159690800497428, "grad_norm": 4.65625, "learning_rate": 4.310611343387466e-06, "loss": 6.8943, "step": 34940 }, { "epoch": 0.8162026144172442, "grad_norm": 4.375, "learning_rate": 4.300002439386422e-06, "loss": 6.9213, "step": 34950 }, { "epoch": 0.8164361487847456, "grad_norm": 4.21875, "learning_rate": 4.289405377836181e-06, "loss": 6.9495, "step": 34960 }, { "epoch": 0.8166696831522469, "grad_norm": 5.75, "learning_rate": 4.278820164799322e-06, "loss": 6.9689, "step": 34970 }, { "epoch": 0.8169032175197483, "grad_norm": 4.40625, "learning_rate": 4.268246806331624e-06, "loss": 7.0188, "step": 34980 }, { "epoch": 0.8171367518872497, "grad_norm": 5.09375, "learning_rate": 4.2576853084821095e-06, "loss": 6.9979, "step": 34990 }, { "epoch": 0.8173702862547509, "grad_norm": 5.6875, "learning_rate": 4.2471356772929905e-06, "loss": 6.9894, "step": 35000 }, { "epoch": 0.8173702862547509, "eval_loss": 6.9522833824157715, "eval_runtime": 78.3381, "eval_samples_per_second": 12.765, "eval_steps_per_second": 12.765, "step": 35000 }, { "epoch": 0.8176038206222523, "grad_norm": 4.78125, "learning_rate": 4.236597918799709e-06, "loss": 6.9501, "step": 35010 }, { "epoch": 0.8178373549897536, "grad_norm": 4.28125, "learning_rate": 4.226072039030909e-06, "loss": 6.9772, "step": 35020 }, { "epoch": 0.818070889357255, "grad_norm": 5.8125, "learning_rate": 4.21555804400843e-06, "loss": 7.0099, "step": 35030 }, { "epoch": 0.8183044237247564, "grad_norm": 5.21875, "learning_rate": 4.20505593974733e-06, "loss": 6.922, "step": 35040 }, { "epoch": 0.8185379580922577, "grad_norm": 5.8125, "learning_rate": 4.19456573225584e-06, "loss": 6.94, "step": 35050 }, { "epoch": 0.8187714924597591, "grad_norm": 4.84375, "learning_rate": 4.1840874275354075e-06, "loss": 6.9378, "step": 35060 }, { "epoch": 0.8190050268272605, "grad_norm": 4.15625, "learning_rate": 4.1736210315806605e-06, "loss": 6.9644, "step": 35070 }, { "epoch": 0.8192385611947618, "grad_norm": 5.21875, "learning_rate": 4.163166550379422e-06, "loss": 6.8456, "step": 35080 }, { "epoch": 0.8194720955622632, "grad_norm": 5.8125, "learning_rate": 4.152723989912683e-06, "loss": 6.8412, "step": 35090 }, { "epoch": 0.8197056299297646, "grad_norm": 3.484375, "learning_rate": 4.142293356154619e-06, "loss": 6.9598, "step": 35100 }, { "epoch": 0.8199391642972659, "grad_norm": 4.53125, "learning_rate": 4.131874655072601e-06, "loss": 6.9485, "step": 35110 }, { "epoch": 0.8201726986647673, "grad_norm": 4.625, "learning_rate": 4.121467892627143e-06, "loss": 7.0035, "step": 35120 }, { "epoch": 0.8204062330322686, "grad_norm": 4.6875, "learning_rate": 4.1110730747719535e-06, "loss": 6.9458, "step": 35130 }, { "epoch": 0.82063976739977, "grad_norm": 4.09375, "learning_rate": 4.100690207453897e-06, "loss": 7.0265, "step": 35140 }, { "epoch": 0.8208733017672714, "grad_norm": 5.0625, "learning_rate": 4.09031929661301e-06, "loss": 6.9709, "step": 35150 }, { "epoch": 0.8211068361347726, "grad_norm": 3.6875, "learning_rate": 4.079960348182477e-06, "loss": 6.8705, "step": 35160 }, { "epoch": 0.821340370502274, "grad_norm": 4.15625, "learning_rate": 4.0696133680886346e-06, "loss": 6.9508, "step": 35170 }, { "epoch": 0.8215739048697754, "grad_norm": 4.96875, "learning_rate": 4.059278362250987e-06, "loss": 6.9298, "step": 35180 }, { "epoch": 0.8218074392372767, "grad_norm": 4.90625, "learning_rate": 4.0489553365821855e-06, "loss": 6.9556, "step": 35190 }, { "epoch": 0.8220409736047781, "grad_norm": 4.53125, "learning_rate": 4.038644296988029e-06, "loss": 6.9504, "step": 35200 }, { "epoch": 0.8222745079722795, "grad_norm": 5.03125, "learning_rate": 4.028345249367443e-06, "loss": 6.89, "step": 35210 }, { "epoch": 0.8225080423397808, "grad_norm": 4.65625, "learning_rate": 4.018058199612512e-06, "loss": 6.9563, "step": 35220 }, { "epoch": 0.8227415767072822, "grad_norm": 3.921875, "learning_rate": 4.007783153608447e-06, "loss": 6.9503, "step": 35230 }, { "epoch": 0.8229751110747835, "grad_norm": 5.71875, "learning_rate": 3.997520117233583e-06, "loss": 6.8631, "step": 35240 }, { "epoch": 0.8232086454422849, "grad_norm": 5.03125, "learning_rate": 3.9872690963594025e-06, "loss": 6.939, "step": 35250 }, { "epoch": 0.8234421798097863, "grad_norm": 3.953125, "learning_rate": 3.977030096850509e-06, "loss": 6.8739, "step": 35260 }, { "epoch": 0.8236757141772876, "grad_norm": 4.21875, "learning_rate": 3.966803124564625e-06, "loss": 6.8962, "step": 35270 }, { "epoch": 0.823909248544789, "grad_norm": 4.09375, "learning_rate": 3.9565881853525836e-06, "loss": 6.9273, "step": 35280 }, { "epoch": 0.8241427829122904, "grad_norm": 3.8125, "learning_rate": 3.9463852850583545e-06, "loss": 6.8931, "step": 35290 }, { "epoch": 0.8243763172797917, "grad_norm": 3.703125, "learning_rate": 3.936194429518997e-06, "loss": 6.9353, "step": 35300 }, { "epoch": 0.8246098516472931, "grad_norm": 4.96875, "learning_rate": 3.926015624564694e-06, "loss": 6.9917, "step": 35310 }, { "epoch": 0.8248433860147945, "grad_norm": 4.375, "learning_rate": 3.91584887601874e-06, "loss": 6.9842, "step": 35320 }, { "epoch": 0.8250769203822957, "grad_norm": 5.1875, "learning_rate": 3.905694189697512e-06, "loss": 6.8717, "step": 35330 }, { "epoch": 0.8253104547497971, "grad_norm": 4.6875, "learning_rate": 3.895551571410505e-06, "loss": 6.9457, "step": 35340 }, { "epoch": 0.8255439891172984, "grad_norm": 4.34375, "learning_rate": 3.885421026960292e-06, "loss": 7.0007, "step": 35350 }, { "epoch": 0.8257775234847998, "grad_norm": 4.0625, "learning_rate": 3.875302562142563e-06, "loss": 6.8721, "step": 35360 }, { "epoch": 0.8260110578523012, "grad_norm": 4.53125, "learning_rate": 3.86519618274607e-06, "loss": 6.9797, "step": 35370 }, { "epoch": 0.8262445922198025, "grad_norm": 5.3125, "learning_rate": 3.85510189455267e-06, "loss": 6.9028, "step": 35380 }, { "epoch": 0.8264781265873039, "grad_norm": 5.3125, "learning_rate": 3.845019703337299e-06, "loss": 6.9278, "step": 35390 }, { "epoch": 0.8267116609548053, "grad_norm": 5.5625, "learning_rate": 3.834949614867964e-06, "loss": 6.8771, "step": 35400 }, { "epoch": 0.8269451953223066, "grad_norm": 5.03125, "learning_rate": 3.8248916349057615e-06, "loss": 6.9633, "step": 35410 }, { "epoch": 0.827178729689808, "grad_norm": 4.9375, "learning_rate": 3.814845769204842e-06, "loss": 6.9888, "step": 35420 }, { "epoch": 0.8274122640573094, "grad_norm": 3.859375, "learning_rate": 3.804812023512444e-06, "loss": 6.9443, "step": 35430 }, { "epoch": 0.8276457984248107, "grad_norm": 4.5625, "learning_rate": 3.794790403568871e-06, "loss": 6.8871, "step": 35440 }, { "epoch": 0.8278793327923121, "grad_norm": 3.859375, "learning_rate": 3.784780915107469e-06, "loss": 6.8656, "step": 35450 }, { "epoch": 0.8281128671598134, "grad_norm": 4.03125, "learning_rate": 3.7747835638546724e-06, "loss": 6.9426, "step": 35460 }, { "epoch": 0.8283464015273148, "grad_norm": 5.34375, "learning_rate": 3.7647983555299415e-06, "loss": 7.0032, "step": 35470 }, { "epoch": 0.8285799358948162, "grad_norm": 4.1875, "learning_rate": 3.754825295845821e-06, "loss": 6.8737, "step": 35480 }, { "epoch": 0.8288134702623174, "grad_norm": 4.8125, "learning_rate": 3.744864390507877e-06, "loss": 6.9586, "step": 35490 }, { "epoch": 0.8290470046298188, "grad_norm": 5.71875, "learning_rate": 3.734915645214737e-06, "loss": 6.9321, "step": 35500 }, { "epoch": 0.8290470046298188, "eval_loss": 6.952016830444336, "eval_runtime": 78.8383, "eval_samples_per_second": 12.684, "eval_steps_per_second": 12.684, "step": 35500 }, { "epoch": 0.8292805389973202, "grad_norm": 5.78125, "learning_rate": 3.7249790656580824e-06, "loss": 6.9617, "step": 35510 }, { "epoch": 0.8295140733648215, "grad_norm": 5.125, "learning_rate": 3.715054657522604e-06, "loss": 6.9186, "step": 35520 }, { "epoch": 0.8297476077323229, "grad_norm": 5.4375, "learning_rate": 3.7051424264860634e-06, "loss": 6.9217, "step": 35530 }, { "epoch": 0.8299811420998243, "grad_norm": 4.78125, "learning_rate": 3.695242378219224e-06, "loss": 6.9574, "step": 35540 }, { "epoch": 0.8302146764673256, "grad_norm": 4.03125, "learning_rate": 3.6853545183859055e-06, "loss": 6.9468, "step": 35550 }, { "epoch": 0.830448210834827, "grad_norm": 4.1875, "learning_rate": 3.6754788526429384e-06, "loss": 6.9596, "step": 35560 }, { "epoch": 0.8306817452023283, "grad_norm": 3.96875, "learning_rate": 3.665615386640192e-06, "loss": 7.0151, "step": 35570 }, { "epoch": 0.8309152795698297, "grad_norm": 5.25, "learning_rate": 3.6557641260205332e-06, "loss": 6.9227, "step": 35580 }, { "epoch": 0.8311488139373311, "grad_norm": 4.9375, "learning_rate": 3.6459250764198734e-06, "loss": 6.9995, "step": 35590 }, { "epoch": 0.8313823483048324, "grad_norm": 4.5625, "learning_rate": 3.636098243467115e-06, "loss": 6.8907, "step": 35600 }, { "epoch": 0.8316158826723338, "grad_norm": 4.59375, "learning_rate": 3.6262836327841804e-06, "loss": 6.9319, "step": 35610 }, { "epoch": 0.8318494170398352, "grad_norm": 4.9375, "learning_rate": 3.6164812499860006e-06, "loss": 6.8793, "step": 35620 }, { "epoch": 0.8320829514073365, "grad_norm": 4.125, "learning_rate": 3.6066911006805122e-06, "loss": 6.9107, "step": 35630 }, { "epoch": 0.8323164857748379, "grad_norm": 4.4375, "learning_rate": 3.596913190468659e-06, "loss": 6.789, "step": 35640 }, { "epoch": 0.8325500201423391, "grad_norm": 3.96875, "learning_rate": 3.587147524944359e-06, "loss": 6.9734, "step": 35650 }, { "epoch": 0.8327835545098405, "grad_norm": 3.578125, "learning_rate": 3.577394109694557e-06, "loss": 6.9038, "step": 35660 }, { "epoch": 0.8330170888773419, "grad_norm": 4.09375, "learning_rate": 3.5676529502991606e-06, "loss": 6.9612, "step": 35670 }, { "epoch": 0.8332506232448432, "grad_norm": 4.625, "learning_rate": 3.5579240523310826e-06, "loss": 6.9387, "step": 35680 }, { "epoch": 0.8334841576123446, "grad_norm": 4.0625, "learning_rate": 3.5482074213562255e-06, "loss": 6.9794, "step": 35690 }, { "epoch": 0.833717691979846, "grad_norm": 3.875, "learning_rate": 3.5385030629334544e-06, "loss": 6.884, "step": 35700 }, { "epoch": 0.8339512263473473, "grad_norm": 4.03125, "learning_rate": 3.528810982614633e-06, "loss": 6.9273, "step": 35710 }, { "epoch": 0.8341847607148487, "grad_norm": 4.78125, "learning_rate": 3.5191311859445796e-06, "loss": 6.8801, "step": 35720 }, { "epoch": 0.8344182950823501, "grad_norm": 6.15625, "learning_rate": 3.509463678461114e-06, "loss": 6.99, "step": 35730 }, { "epoch": 0.8346518294498514, "grad_norm": 5.8125, "learning_rate": 3.4998084656949952e-06, "loss": 6.8465, "step": 35740 }, { "epoch": 0.8348853638173528, "grad_norm": 4.5625, "learning_rate": 3.490165553169966e-06, "loss": 6.9316, "step": 35750 }, { "epoch": 0.8351188981848541, "grad_norm": 5.0, "learning_rate": 3.4805349464027386e-06, "loss": 6.911, "step": 35760 }, { "epoch": 0.8353524325523555, "grad_norm": 5.09375, "learning_rate": 3.4709166509029584e-06, "loss": 6.9712, "step": 35770 }, { "epoch": 0.8355859669198569, "grad_norm": 5.03125, "learning_rate": 3.4613106721732567e-06, "loss": 6.9615, "step": 35780 }, { "epoch": 0.8358195012873582, "grad_norm": 4.8125, "learning_rate": 3.4517170157091965e-06, "loss": 6.958, "step": 35790 }, { "epoch": 0.8360530356548596, "grad_norm": 4.1875, "learning_rate": 3.4421356869993037e-06, "loss": 7.0047, "step": 35800 }, { "epoch": 0.836286570022361, "grad_norm": 4.78125, "learning_rate": 3.4325666915250575e-06, "loss": 7.0042, "step": 35810 }, { "epoch": 0.8365201043898622, "grad_norm": 4.3125, "learning_rate": 3.423010034760854e-06, "loss": 6.9591, "step": 35820 }, { "epoch": 0.8367536387573636, "grad_norm": 4.40625, "learning_rate": 3.413465722174067e-06, "loss": 6.9235, "step": 35830 }, { "epoch": 0.836987173124865, "grad_norm": 5.3125, "learning_rate": 3.4039337592249727e-06, "loss": 6.9493, "step": 35840 }, { "epoch": 0.8372207074923663, "grad_norm": 4.6875, "learning_rate": 3.3944141513668146e-06, "loss": 6.9702, "step": 35850 }, { "epoch": 0.8374542418598677, "grad_norm": 4.0625, "learning_rate": 3.3849069040457313e-06, "loss": 6.8763, "step": 35860 }, { "epoch": 0.837687776227369, "grad_norm": 4.1875, "learning_rate": 3.3754120227008356e-06, "loss": 6.9362, "step": 35870 }, { "epoch": 0.8379213105948704, "grad_norm": 4.03125, "learning_rate": 3.3659295127641268e-06, "loss": 6.9858, "step": 35880 }, { "epoch": 0.8381548449623718, "grad_norm": 4.5625, "learning_rate": 3.3564593796605397e-06, "loss": 6.9196, "step": 35890 }, { "epoch": 0.8383883793298731, "grad_norm": 4.625, "learning_rate": 3.347001628807939e-06, "loss": 6.9309, "step": 35900 }, { "epoch": 0.8386219136973745, "grad_norm": 5.3125, "learning_rate": 3.337556265617084e-06, "loss": 6.9327, "step": 35910 }, { "epoch": 0.8388554480648759, "grad_norm": 4.125, "learning_rate": 3.3281232954916614e-06, "loss": 6.9809, "step": 35920 }, { "epoch": 0.8390889824323772, "grad_norm": 3.859375, "learning_rate": 3.3187027238282707e-06, "loss": 6.9573, "step": 35930 }, { "epoch": 0.8393225167998786, "grad_norm": 5.0625, "learning_rate": 3.309294556016415e-06, "loss": 6.9103, "step": 35940 }, { "epoch": 0.83955605116738, "grad_norm": 5.21875, "learning_rate": 3.2998987974384972e-06, "loss": 6.9409, "step": 35950 }, { "epoch": 0.8397895855348813, "grad_norm": 5.15625, "learning_rate": 3.290515453469811e-06, "loss": 6.9361, "step": 35960 }, { "epoch": 0.8400231199023827, "grad_norm": 5.8125, "learning_rate": 3.2811445294785693e-06, "loss": 6.9507, "step": 35970 }, { "epoch": 0.8402566542698839, "grad_norm": 4.15625, "learning_rate": 3.2717860308258687e-06, "loss": 7.0152, "step": 35980 }, { "epoch": 0.8404901886373853, "grad_norm": 5.15625, "learning_rate": 3.2624399628657043e-06, "loss": 6.9459, "step": 35990 }, { "epoch": 0.8407237230048867, "grad_norm": 4.34375, "learning_rate": 3.253106330944941e-06, "loss": 7.0504, "step": 36000 }, { "epoch": 0.8407237230048867, "eval_loss": 6.951974391937256, "eval_runtime": 78.6216, "eval_samples_per_second": 12.719, "eval_steps_per_second": 12.719, "step": 36000 }, { "epoch": 0.840957257372388, "grad_norm": 4.625, "learning_rate": 3.2437851404033546e-06, "loss": 6.9266, "step": 36010 }, { "epoch": 0.8411907917398894, "grad_norm": 4.6875, "learning_rate": 3.2344763965735824e-06, "loss": 6.9652, "step": 36020 }, { "epoch": 0.8414243261073908, "grad_norm": 3.65625, "learning_rate": 3.2251801047811476e-06, "loss": 6.97, "step": 36030 }, { "epoch": 0.8416578604748921, "grad_norm": 4.75, "learning_rate": 3.2158962703444513e-06, "loss": 6.966, "step": 36040 }, { "epoch": 0.8418913948423935, "grad_norm": 4.78125, "learning_rate": 3.206624898574767e-06, "loss": 7.0265, "step": 36050 }, { "epoch": 0.8421249292098949, "grad_norm": 4.71875, "learning_rate": 3.197365994776247e-06, "loss": 7.0061, "step": 36060 }, { "epoch": 0.8423584635773962, "grad_norm": 4.65625, "learning_rate": 3.1881195642458922e-06, "loss": 6.8952, "step": 36070 }, { "epoch": 0.8425919979448976, "grad_norm": 4.5, "learning_rate": 3.1788856122735854e-06, "loss": 6.9082, "step": 36080 }, { "epoch": 0.8428255323123989, "grad_norm": 4.34375, "learning_rate": 3.169664144142051e-06, "loss": 6.9621, "step": 36090 }, { "epoch": 0.8430590666799003, "grad_norm": 4.75, "learning_rate": 3.160455165126894e-06, "loss": 6.892, "step": 36100 }, { "epoch": 0.8432926010474017, "grad_norm": 5.34375, "learning_rate": 3.151258680496563e-06, "loss": 6.9229, "step": 36110 }, { "epoch": 0.843526135414903, "grad_norm": 4.375, "learning_rate": 3.142074695512351e-06, "loss": 6.9633, "step": 36120 }, { "epoch": 0.8437596697824044, "grad_norm": 4.0625, "learning_rate": 3.132903215428418e-06, "loss": 6.9369, "step": 36130 }, { "epoch": 0.8439932041499058, "grad_norm": 4.625, "learning_rate": 3.1237442454917472e-06, "loss": 7.0345, "step": 36140 }, { "epoch": 0.844226738517407, "grad_norm": 4.40625, "learning_rate": 3.114597790942192e-06, "loss": 6.9331, "step": 36150 }, { "epoch": 0.8444602728849084, "grad_norm": 4.25, "learning_rate": 3.105463857012417e-06, "loss": 6.934, "step": 36160 }, { "epoch": 0.8446938072524098, "grad_norm": 4.96875, "learning_rate": 3.0963424489279442e-06, "loss": 6.9437, "step": 36170 }, { "epoch": 0.8449273416199111, "grad_norm": 5.15625, "learning_rate": 3.0872335719071305e-06, "loss": 6.9387, "step": 36180 }, { "epoch": 0.8451608759874125, "grad_norm": 4.03125, "learning_rate": 3.078137231161146e-06, "loss": 6.8374, "step": 36190 }, { "epoch": 0.8453944103549138, "grad_norm": 4.625, "learning_rate": 3.069053431894006e-06, "loss": 6.9914, "step": 36200 }, { "epoch": 0.8456279447224152, "grad_norm": 3.75, "learning_rate": 3.0599821793025396e-06, "loss": 6.9506, "step": 36210 }, { "epoch": 0.8458614790899166, "grad_norm": 4.21875, "learning_rate": 3.0509234785764005e-06, "loss": 6.9506, "step": 36220 }, { "epoch": 0.8460950134574179, "grad_norm": 4.1875, "learning_rate": 3.041877334898077e-06, "loss": 6.9154, "step": 36230 }, { "epoch": 0.8463285478249193, "grad_norm": 4.40625, "learning_rate": 3.0328437534428434e-06, "loss": 6.9855, "step": 36240 }, { "epoch": 0.8465620821924207, "grad_norm": 5.21875, "learning_rate": 3.0238227393788175e-06, "loss": 6.9671, "step": 36250 }, { "epoch": 0.846795616559922, "grad_norm": 4.53125, "learning_rate": 3.0148142978669027e-06, "loss": 6.9655, "step": 36260 }, { "epoch": 0.8470291509274234, "grad_norm": 4.28125, "learning_rate": 3.0058184340608282e-06, "loss": 6.964, "step": 36270 }, { "epoch": 0.8472626852949248, "grad_norm": 3.8125, "learning_rate": 2.996835153107114e-06, "loss": 7.0441, "step": 36280 }, { "epoch": 0.847496219662426, "grad_norm": 4.1875, "learning_rate": 2.98786446014509e-06, "loss": 6.9748, "step": 36290 }, { "epoch": 0.8477297540299275, "grad_norm": 3.984375, "learning_rate": 2.978906360306891e-06, "loss": 6.9876, "step": 36300 }, { "epoch": 0.8479632883974287, "grad_norm": 4.34375, "learning_rate": 2.9699608587174217e-06, "loss": 6.9959, "step": 36310 }, { "epoch": 0.8481968227649301, "grad_norm": 3.46875, "learning_rate": 2.9610279604944084e-06, "loss": 7.008, "step": 36320 }, { "epoch": 0.8484303571324315, "grad_norm": 6.25, "learning_rate": 2.952107670748347e-06, "loss": 6.9657, "step": 36330 }, { "epoch": 0.8486638914999328, "grad_norm": 4.15625, "learning_rate": 2.9431999945825294e-06, "loss": 6.9856, "step": 36340 }, { "epoch": 0.8488974258674342, "grad_norm": 3.640625, "learning_rate": 2.9343049370930297e-06, "loss": 6.9502, "step": 36350 }, { "epoch": 0.8491309602349356, "grad_norm": 4.9375, "learning_rate": 2.9254225033687105e-06, "loss": 6.8886, "step": 36360 }, { "epoch": 0.8493644946024369, "grad_norm": 4.90625, "learning_rate": 2.9165526984911985e-06, "loss": 6.8771, "step": 36370 }, { "epoch": 0.8495980289699383, "grad_norm": 3.921875, "learning_rate": 2.9076955275348925e-06, "loss": 7.012, "step": 36380 }, { "epoch": 0.8498315633374397, "grad_norm": 7.125, "learning_rate": 2.89885099556699e-06, "loss": 6.8831, "step": 36390 }, { "epoch": 0.850065097704941, "grad_norm": 4.0625, "learning_rate": 2.890019107647421e-06, "loss": 6.9085, "step": 36400 }, { "epoch": 0.8502986320724424, "grad_norm": 4.125, "learning_rate": 2.8811998688289128e-06, "loss": 6.9602, "step": 36410 }, { "epoch": 0.8505321664399437, "grad_norm": 5.53125, "learning_rate": 2.8723932841569434e-06, "loss": 6.9409, "step": 36420 }, { "epoch": 0.8507657008074451, "grad_norm": 5.3125, "learning_rate": 2.8635993586697553e-06, "loss": 6.918, "step": 36430 }, { "epoch": 0.8509992351749465, "grad_norm": 5.1875, "learning_rate": 2.8548180973983347e-06, "loss": 6.9024, "step": 36440 }, { "epoch": 0.8512327695424478, "grad_norm": 4.40625, "learning_rate": 2.8460495053664466e-06, "loss": 7.0065, "step": 36450 }, { "epoch": 0.8514663039099492, "grad_norm": 4.1875, "learning_rate": 2.8372935875905836e-06, "loss": 6.9371, "step": 36460 }, { "epoch": 0.8516998382774505, "grad_norm": 4.34375, "learning_rate": 2.8285503490800058e-06, "loss": 6.9577, "step": 36470 }, { "epoch": 0.8519333726449518, "grad_norm": 4.65625, "learning_rate": 2.8198197948367145e-06, "loss": 7.048, "step": 36480 }, { "epoch": 0.8521669070124532, "grad_norm": 3.890625, "learning_rate": 2.811101929855442e-06, "loss": 6.9712, "step": 36490 }, { "epoch": 0.8524004413799546, "grad_norm": 5.46875, "learning_rate": 2.802396759123685e-06, "loss": 6.947, "step": 36500 }, { "epoch": 0.8524004413799546, "eval_loss": 6.951878070831299, "eval_runtime": 78.7954, "eval_samples_per_second": 12.691, "eval_steps_per_second": 12.691, "step": 36500 }, { "epoch": 0.8526339757474559, "grad_norm": 5.1875, "learning_rate": 2.7937042876216485e-06, "loss": 6.9782, "step": 36510 }, { "epoch": 0.8528675101149573, "grad_norm": 4.84375, "learning_rate": 2.7850245203223045e-06, "loss": 6.9712, "step": 36520 }, { "epoch": 0.8531010444824586, "grad_norm": 4.4375, "learning_rate": 2.7763574621913255e-06, "loss": 7.0043, "step": 36530 }, { "epoch": 0.85333457884996, "grad_norm": 4.90625, "learning_rate": 2.7677031181871344e-06, "loss": 6.9666, "step": 36540 }, { "epoch": 0.8535681132174614, "grad_norm": 4.53125, "learning_rate": 2.759061493260881e-06, "loss": 6.9939, "step": 36550 }, { "epoch": 0.8538016475849627, "grad_norm": 4.28125, "learning_rate": 2.7504325923564196e-06, "loss": 6.922, "step": 36560 }, { "epoch": 0.8540351819524641, "grad_norm": 3.984375, "learning_rate": 2.741816420410348e-06, "loss": 6.9668, "step": 36570 }, { "epoch": 0.8542687163199655, "grad_norm": 5.09375, "learning_rate": 2.733212982351957e-06, "loss": 6.8744, "step": 36580 }, { "epoch": 0.8545022506874668, "grad_norm": 4.15625, "learning_rate": 2.724622283103276e-06, "loss": 6.913, "step": 36590 }, { "epoch": 0.8547357850549682, "grad_norm": 5.0625, "learning_rate": 2.7160443275790397e-06, "loss": 6.9137, "step": 36600 }, { "epoch": 0.8549693194224695, "grad_norm": 4.90625, "learning_rate": 2.707479120686679e-06, "loss": 6.9314, "step": 36610 }, { "epoch": 0.8552028537899709, "grad_norm": 4.25, "learning_rate": 2.698926667326354e-06, "loss": 6.8959, "step": 36620 }, { "epoch": 0.8554363881574722, "grad_norm": 3.96875, "learning_rate": 2.6903869723909013e-06, "loss": 6.9257, "step": 36630 }, { "epoch": 0.8556699225249735, "grad_norm": 7.375, "learning_rate": 2.681860040765885e-06, "loss": 6.9517, "step": 36640 }, { "epoch": 0.8559034568924749, "grad_norm": 4.59375, "learning_rate": 2.6733458773295388e-06, "loss": 6.9021, "step": 36650 }, { "epoch": 0.8561369912599763, "grad_norm": 4.46875, "learning_rate": 2.6648444869528334e-06, "loss": 6.9158, "step": 36660 }, { "epoch": 0.8563705256274776, "grad_norm": 4.78125, "learning_rate": 2.6563558744993915e-06, "loss": 6.9948, "step": 36670 }, { "epoch": 0.856604059994979, "grad_norm": 4.1875, "learning_rate": 2.6478800448255375e-06, "loss": 6.965, "step": 36680 }, { "epoch": 0.8568375943624804, "grad_norm": 4.0625, "learning_rate": 2.639417002780298e-06, "loss": 6.9348, "step": 36690 }, { "epoch": 0.8570711287299817, "grad_norm": 5.53125, "learning_rate": 2.6309667532053605e-06, "loss": 6.9718, "step": 36700 }, { "epoch": 0.8573046630974831, "grad_norm": 6.0, "learning_rate": 2.6225293009351115e-06, "loss": 6.9184, "step": 36710 }, { "epoch": 0.8575381974649844, "grad_norm": 5.375, "learning_rate": 2.614104650796609e-06, "loss": 6.9405, "step": 36720 }, { "epoch": 0.8577717318324858, "grad_norm": 4.25, "learning_rate": 2.6056928076095945e-06, "loss": 6.9551, "step": 36730 }, { "epoch": 0.8580052661999872, "grad_norm": 4.8125, "learning_rate": 2.5972937761864683e-06, "loss": 6.969, "step": 36740 }, { "epoch": 0.8582388005674885, "grad_norm": 3.9375, "learning_rate": 2.5889075613323072e-06, "loss": 6.9463, "step": 36750 }, { "epoch": 0.8584723349349899, "grad_norm": 5.0625, "learning_rate": 2.5805341678448667e-06, "loss": 6.9542, "step": 36760 }, { "epoch": 0.8587058693024913, "grad_norm": 3.8125, "learning_rate": 2.572173600514544e-06, "loss": 6.991, "step": 36770 }, { "epoch": 0.8589394036699926, "grad_norm": 4.1875, "learning_rate": 2.5638258641244306e-06, "loss": 7.0017, "step": 36780 }, { "epoch": 0.859172938037494, "grad_norm": 4.625, "learning_rate": 2.555490963450244e-06, "loss": 6.984, "step": 36790 }, { "epoch": 0.8594064724049953, "grad_norm": 4.6875, "learning_rate": 2.547168903260386e-06, "loss": 6.9812, "step": 36800 }, { "epoch": 0.8596400067724966, "grad_norm": 3.875, "learning_rate": 2.5388596883158923e-06, "loss": 6.9862, "step": 36810 }, { "epoch": 0.859873541139998, "grad_norm": 6.3125, "learning_rate": 2.530563323370455e-06, "loss": 6.9214, "step": 36820 }, { "epoch": 0.8601070755074993, "grad_norm": 5.21875, "learning_rate": 2.522279813170422e-06, "loss": 6.9456, "step": 36830 }, { "epoch": 0.8603406098750007, "grad_norm": 3.78125, "learning_rate": 2.514009162454781e-06, "loss": 7.0248, "step": 36840 }, { "epoch": 0.8605741442425021, "grad_norm": 4.625, "learning_rate": 2.5057513759551704e-06, "loss": 6.9727, "step": 36850 }, { "epoch": 0.8608076786100034, "grad_norm": 3.5, "learning_rate": 2.4975064583958513e-06, "loss": 7.0024, "step": 36860 }, { "epoch": 0.8610412129775048, "grad_norm": 4.71875, "learning_rate": 2.4892744144937475e-06, "loss": 6.9869, "step": 36870 }, { "epoch": 0.8612747473450062, "grad_norm": 3.875, "learning_rate": 2.481055248958394e-06, "loss": 6.9616, "step": 36880 }, { "epoch": 0.8615082817125075, "grad_norm": 4.5625, "learning_rate": 2.472848966491964e-06, "loss": 7.0207, "step": 36890 }, { "epoch": 0.8617418160800089, "grad_norm": 3.875, "learning_rate": 2.464655571789279e-06, "loss": 6.9477, "step": 36900 }, { "epoch": 0.8619753504475103, "grad_norm": 4.125, "learning_rate": 2.4564750695377615e-06, "loss": 6.9663, "step": 36910 }, { "epoch": 0.8622088848150116, "grad_norm": 4.6875, "learning_rate": 2.448307464417479e-06, "loss": 6.916, "step": 36920 }, { "epoch": 0.862442419182513, "grad_norm": 4.5625, "learning_rate": 2.4401527611011027e-06, "loss": 6.9653, "step": 36930 }, { "epoch": 0.8626759535500143, "grad_norm": 4.5625, "learning_rate": 2.432010964253939e-06, "loss": 6.9032, "step": 36940 }, { "epoch": 0.8629094879175156, "grad_norm": 3.828125, "learning_rate": 2.4238820785338927e-06, "loss": 7.0262, "step": 36950 }, { "epoch": 0.863143022285017, "grad_norm": 4.59375, "learning_rate": 2.4157661085915023e-06, "loss": 6.9954, "step": 36960 }, { "epoch": 0.8633765566525183, "grad_norm": 5.34375, "learning_rate": 2.4076630590699062e-06, "loss": 6.8677, "step": 36970 }, { "epoch": 0.8636100910200197, "grad_norm": 4.0625, "learning_rate": 2.3995729346048435e-06, "loss": 6.9393, "step": 36980 }, { "epoch": 0.8638436253875211, "grad_norm": 4.4375, "learning_rate": 2.3914957398246783e-06, "loss": 6.9688, "step": 36990 }, { "epoch": 0.8640771597550224, "grad_norm": 4.1875, "learning_rate": 2.3834314793503594e-06, "loss": 6.9943, "step": 37000 }, { "epoch": 0.8640771597550224, "eval_loss": 6.951925754547119, "eval_runtime": 78.4179, "eval_samples_per_second": 12.752, "eval_steps_per_second": 12.752, "step": 37000 }, { "epoch": 0.8643106941225238, "grad_norm": 5.5625, "learning_rate": 2.3753801577954453e-06, "loss": 7.0421, "step": 37010 }, { "epoch": 0.8645442284900252, "grad_norm": 5.0, "learning_rate": 2.367341779766094e-06, "loss": 6.9629, "step": 37020 }, { "epoch": 0.8647777628575265, "grad_norm": 6.125, "learning_rate": 2.359316349861046e-06, "loss": 6.9115, "step": 37030 }, { "epoch": 0.8650112972250279, "grad_norm": 4.5625, "learning_rate": 2.3513038726716534e-06, "loss": 7.0137, "step": 37040 }, { "epoch": 0.8652448315925292, "grad_norm": 4.21875, "learning_rate": 2.3433043527818387e-06, "loss": 7.0122, "step": 37050 }, { "epoch": 0.8654783659600306, "grad_norm": 4.9375, "learning_rate": 2.335317794768127e-06, "loss": 6.9314, "step": 37060 }, { "epoch": 0.865711900327532, "grad_norm": 3.96875, "learning_rate": 2.327344203199616e-06, "loss": 6.9031, "step": 37070 }, { "epoch": 0.8659454346950333, "grad_norm": 4.1875, "learning_rate": 2.3193835826379904e-06, "loss": 6.9183, "step": 37080 }, { "epoch": 0.8661789690625347, "grad_norm": 3.90625, "learning_rate": 2.3114359376375213e-06, "loss": 6.9877, "step": 37090 }, { "epoch": 0.8664125034300361, "grad_norm": 5.28125, "learning_rate": 2.30350127274504e-06, "loss": 6.9269, "step": 37100 }, { "epoch": 0.8666460377975373, "grad_norm": 5.0, "learning_rate": 2.295579592499972e-06, "loss": 6.9045, "step": 37110 }, { "epoch": 0.8668795721650387, "grad_norm": 4.1875, "learning_rate": 2.287670901434294e-06, "loss": 7.0178, "step": 37120 }, { "epoch": 0.8671131065325401, "grad_norm": 4.09375, "learning_rate": 2.279775204072565e-06, "loss": 6.949, "step": 37130 }, { "epoch": 0.8673466409000414, "grad_norm": 4.90625, "learning_rate": 2.271892504931905e-06, "loss": 6.9673, "step": 37140 }, { "epoch": 0.8675801752675428, "grad_norm": 5.25, "learning_rate": 2.2640228085220095e-06, "loss": 7.0049, "step": 37150 }, { "epoch": 0.8678137096350441, "grad_norm": 4.25, "learning_rate": 2.2561661193451156e-06, "loss": 6.931, "step": 37160 }, { "epoch": 0.8680472440025455, "grad_norm": 5.09375, "learning_rate": 2.2483224418960255e-06, "loss": 6.9121, "step": 37170 }, { "epoch": 0.8682807783700469, "grad_norm": 4.25, "learning_rate": 2.2404917806621102e-06, "loss": 7.0257, "step": 37180 }, { "epoch": 0.8685143127375482, "grad_norm": 4.09375, "learning_rate": 2.2326741401232725e-06, "loss": 6.9984, "step": 37190 }, { "epoch": 0.8687478471050496, "grad_norm": 4.65625, "learning_rate": 2.2248695247519853e-06, "loss": 6.9219, "step": 37200 }, { "epoch": 0.868981381472551, "grad_norm": 3.890625, "learning_rate": 2.2170779390132625e-06, "loss": 6.9219, "step": 37210 }, { "epoch": 0.8692149158400523, "grad_norm": 6.71875, "learning_rate": 2.209299387364666e-06, "loss": 6.9329, "step": 37220 }, { "epoch": 0.8694484502075537, "grad_norm": 4.90625, "learning_rate": 2.201533874256295e-06, "loss": 6.8922, "step": 37230 }, { "epoch": 0.8696819845750551, "grad_norm": 6.53125, "learning_rate": 2.1937814041307873e-06, "loss": 6.9626, "step": 37240 }, { "epoch": 0.8699155189425564, "grad_norm": 4.4375, "learning_rate": 2.1860419814233285e-06, "loss": 6.9418, "step": 37250 }, { "epoch": 0.8701490533100578, "grad_norm": 4.625, "learning_rate": 2.1783156105616405e-06, "loss": 6.9529, "step": 37260 }, { "epoch": 0.870382587677559, "grad_norm": 4.59375, "learning_rate": 2.1706022959659703e-06, "loss": 6.9557, "step": 37270 }, { "epoch": 0.8706161220450604, "grad_norm": 4.5625, "learning_rate": 2.1629020420490975e-06, "loss": 6.9974, "step": 37280 }, { "epoch": 0.8708496564125618, "grad_norm": 3.78125, "learning_rate": 2.1552148532163336e-06, "loss": 7.0494, "step": 37290 }, { "epoch": 0.8710831907800631, "grad_norm": 4.53125, "learning_rate": 2.147540733865508e-06, "loss": 6.9267, "step": 37300 }, { "epoch": 0.8713167251475645, "grad_norm": 5.0625, "learning_rate": 2.1398796883869836e-06, "loss": 6.9913, "step": 37310 }, { "epoch": 0.8715502595150659, "grad_norm": 5.21875, "learning_rate": 2.13223172116363e-06, "loss": 6.9344, "step": 37320 }, { "epoch": 0.8717837938825672, "grad_norm": 5.1875, "learning_rate": 2.1245968365708468e-06, "loss": 6.9423, "step": 37330 }, { "epoch": 0.8720173282500686, "grad_norm": 3.96875, "learning_rate": 2.116975038976551e-06, "loss": 6.9833, "step": 37340 }, { "epoch": 0.87225086261757, "grad_norm": 4.875, "learning_rate": 2.1093663327411554e-06, "loss": 6.9988, "step": 37350 }, { "epoch": 0.8724843969850713, "grad_norm": 4.125, "learning_rate": 2.101770722217605e-06, "loss": 6.9015, "step": 37360 }, { "epoch": 0.8727179313525727, "grad_norm": 4.75, "learning_rate": 2.094188211751333e-06, "loss": 6.9009, "step": 37370 }, { "epoch": 0.872951465720074, "grad_norm": 6.4375, "learning_rate": 2.0866188056802877e-06, "loss": 7.0025, "step": 37380 }, { "epoch": 0.8731850000875754, "grad_norm": 4.96875, "learning_rate": 2.079062508334931e-06, "loss": 6.9654, "step": 37390 }, { "epoch": 0.8734185344550768, "grad_norm": 5.15625, "learning_rate": 2.0715193240382014e-06, "loss": 6.9671, "step": 37400 }, { "epoch": 0.8736520688225781, "grad_norm": 5.84375, "learning_rate": 2.063989257105556e-06, "loss": 6.9544, "step": 37410 }, { "epoch": 0.8738856031900795, "grad_norm": 5.25, "learning_rate": 2.056472311844931e-06, "loss": 6.9908, "step": 37420 }, { "epoch": 0.8741191375575809, "grad_norm": 4.875, "learning_rate": 2.048968492556777e-06, "loss": 6.9493, "step": 37430 }, { "epoch": 0.8743526719250821, "grad_norm": 5.625, "learning_rate": 2.041477803534009e-06, "loss": 7.0042, "step": 37440 }, { "epoch": 0.8745862062925835, "grad_norm": 4.96875, "learning_rate": 2.0340002490620514e-06, "loss": 6.9841, "step": 37450 }, { "epoch": 0.8748197406600849, "grad_norm": 5.46875, "learning_rate": 2.0265358334188106e-06, "loss": 7.0439, "step": 37460 }, { "epoch": 0.8750532750275862, "grad_norm": 5.125, "learning_rate": 2.019084560874662e-06, "loss": 6.922, "step": 37470 }, { "epoch": 0.8752868093950876, "grad_norm": 4.28125, "learning_rate": 2.01164643569248e-06, "loss": 6.9214, "step": 37480 }, { "epoch": 0.8755203437625889, "grad_norm": 5.0, "learning_rate": 2.0042214621276033e-06, "loss": 6.935, "step": 37490 }, { "epoch": 0.8757538781300903, "grad_norm": 6.09375, "learning_rate": 1.996809644427858e-06, "loss": 6.8937, "step": 37500 }, { "epoch": 0.8757538781300903, "eval_loss": 6.951778411865234, "eval_runtime": 78.9789, "eval_samples_per_second": 12.662, "eval_steps_per_second": 12.662, "step": 37500 }, { "epoch": 0.8759874124975917, "grad_norm": 4.875, "learning_rate": 1.989410986833537e-06, "loss": 6.9711, "step": 37510 }, { "epoch": 0.876220946865093, "grad_norm": 4.90625, "learning_rate": 1.982025493577411e-06, "loss": 6.8949, "step": 37520 }, { "epoch": 0.8764544812325944, "grad_norm": 4.625, "learning_rate": 1.97465316888471e-06, "loss": 6.9322, "step": 37530 }, { "epoch": 0.8766880156000958, "grad_norm": 4.125, "learning_rate": 1.9672940169731287e-06, "loss": 6.978, "step": 37540 }, { "epoch": 0.8769215499675971, "grad_norm": 4.03125, "learning_rate": 1.9599480420528393e-06, "loss": 7.0313, "step": 37550 }, { "epoch": 0.8771550843350985, "grad_norm": 4.5, "learning_rate": 1.9526152483264613e-06, "loss": 6.9552, "step": 37560 }, { "epoch": 0.8773886187025999, "grad_norm": 4.46875, "learning_rate": 1.9452956399890895e-06, "loss": 6.9585, "step": 37570 }, { "epoch": 0.8776221530701012, "grad_norm": 4.0625, "learning_rate": 1.9379892212282564e-06, "loss": 6.921, "step": 37580 }, { "epoch": 0.8778556874376026, "grad_norm": 4.59375, "learning_rate": 1.9306959962239664e-06, "loss": 7.0282, "step": 37590 }, { "epoch": 0.8780892218051038, "grad_norm": 4.8125, "learning_rate": 1.923415969148662e-06, "loss": 6.9964, "step": 37600 }, { "epoch": 0.8783227561726052, "grad_norm": 3.84375, "learning_rate": 1.916149144167234e-06, "loss": 6.9723, "step": 37610 }, { "epoch": 0.8785562905401066, "grad_norm": 4.71875, "learning_rate": 1.908895525437035e-06, "loss": 6.9421, "step": 37620 }, { "epoch": 0.8787898249076079, "grad_norm": 4.4375, "learning_rate": 1.9016551171078534e-06, "loss": 6.9301, "step": 37630 }, { "epoch": 0.8790233592751093, "grad_norm": 4.0625, "learning_rate": 1.8944279233219231e-06, "loss": 6.9513, "step": 37640 }, { "epoch": 0.8792568936426107, "grad_norm": 3.984375, "learning_rate": 1.887213948213909e-06, "loss": 6.9595, "step": 37650 }, { "epoch": 0.879490428010112, "grad_norm": 4.9375, "learning_rate": 1.8800131959109262e-06, "loss": 6.9147, "step": 37660 }, { "epoch": 0.8797239623776134, "grad_norm": 4.3125, "learning_rate": 1.8728256705325159e-06, "loss": 6.9382, "step": 37670 }, { "epoch": 0.8799574967451147, "grad_norm": 5.0625, "learning_rate": 1.865651376190647e-06, "loss": 6.9604, "step": 37680 }, { "epoch": 0.8801910311126161, "grad_norm": 3.84375, "learning_rate": 1.8584903169897443e-06, "loss": 6.9303, "step": 37690 }, { "epoch": 0.8804245654801175, "grad_norm": 4.5625, "learning_rate": 1.8513424970266285e-06, "loss": 6.9535, "step": 37700 }, { "epoch": 0.8806580998476188, "grad_norm": 4.21875, "learning_rate": 1.8442079203905726e-06, "loss": 6.9876, "step": 37710 }, { "epoch": 0.8808916342151202, "grad_norm": 5.0, "learning_rate": 1.8370865911632507e-06, "loss": 6.9162, "step": 37720 }, { "epoch": 0.8811251685826216, "grad_norm": 4.65625, "learning_rate": 1.8299785134187813e-06, "loss": 6.9629, "step": 37730 }, { "epoch": 0.8813587029501229, "grad_norm": 4.5625, "learning_rate": 1.8228836912236757e-06, "loss": 6.9351, "step": 37740 }, { "epoch": 0.8815922373176243, "grad_norm": 4.8125, "learning_rate": 1.815802128636887e-06, "loss": 6.947, "step": 37750 }, { "epoch": 0.8818257716851257, "grad_norm": 4.65625, "learning_rate": 1.8087338297097689e-06, "loss": 7.0219, "step": 37760 }, { "epoch": 0.882059306052627, "grad_norm": 5.71875, "learning_rate": 1.801678798486081e-06, "loss": 6.8996, "step": 37770 }, { "epoch": 0.8822928404201283, "grad_norm": 4.78125, "learning_rate": 1.7946370390020146e-06, "loss": 6.9219, "step": 37780 }, { "epoch": 0.8825263747876296, "grad_norm": 3.90625, "learning_rate": 1.787608555286141e-06, "loss": 6.9669, "step": 37790 }, { "epoch": 0.882759909155131, "grad_norm": 4.5, "learning_rate": 1.7805933513594552e-06, "loss": 6.9307, "step": 37800 }, { "epoch": 0.8829934435226324, "grad_norm": 4.65625, "learning_rate": 1.7735914312353553e-06, "loss": 6.9078, "step": 37810 }, { "epoch": 0.8832269778901337, "grad_norm": 4.65625, "learning_rate": 1.7666027989196198e-06, "loss": 6.9032, "step": 37820 }, { "epoch": 0.8834605122576351, "grad_norm": 3.84375, "learning_rate": 1.7596274584104539e-06, "loss": 6.9356, "step": 37830 }, { "epoch": 0.8836940466251365, "grad_norm": 3.921875, "learning_rate": 1.7526654136984343e-06, "loss": 6.9852, "step": 37840 }, { "epoch": 0.8839275809926378, "grad_norm": 4.28125, "learning_rate": 1.7457166687665449e-06, "loss": 6.913, "step": 37850 }, { "epoch": 0.8841611153601392, "grad_norm": 4.125, "learning_rate": 1.7387812275901528e-06, "loss": 6.9527, "step": 37860 }, { "epoch": 0.8843946497276406, "grad_norm": 3.796875, "learning_rate": 1.7318590941370228e-06, "loss": 6.9444, "step": 37870 }, { "epoch": 0.8846281840951419, "grad_norm": 4.875, "learning_rate": 1.724950272367301e-06, "loss": 6.9508, "step": 37880 }, { "epoch": 0.8848617184626433, "grad_norm": 4.875, "learning_rate": 1.7180547662335123e-06, "loss": 6.8817, "step": 37890 }, { "epoch": 0.8850952528301446, "grad_norm": 3.9375, "learning_rate": 1.711172579680581e-06, "loss": 6.8854, "step": 37900 }, { "epoch": 0.885328787197646, "grad_norm": 4.875, "learning_rate": 1.7043037166457858e-06, "loss": 6.9694, "step": 37910 }, { "epoch": 0.8855623215651474, "grad_norm": 4.09375, "learning_rate": 1.6974481810588082e-06, "loss": 6.9184, "step": 37920 }, { "epoch": 0.8857958559326486, "grad_norm": 6.03125, "learning_rate": 1.6906059768416888e-06, "loss": 6.9414, "step": 37930 }, { "epoch": 0.88602939030015, "grad_norm": 5.0, "learning_rate": 1.6837771079088544e-06, "loss": 6.9706, "step": 37940 }, { "epoch": 0.8862629246676514, "grad_norm": 5.34375, "learning_rate": 1.6769615781670882e-06, "loss": 6.9372, "step": 37950 }, { "epoch": 0.8864964590351527, "grad_norm": 5.34375, "learning_rate": 1.6701593915155494e-06, "loss": 6.9414, "step": 37960 }, { "epoch": 0.8867299934026541, "grad_norm": 5.53125, "learning_rate": 1.6633705518457637e-06, "loss": 6.9532, "step": 37970 }, { "epoch": 0.8869635277701555, "grad_norm": 4.53125, "learning_rate": 1.6565950630416189e-06, "loss": 6.985, "step": 37980 }, { "epoch": 0.8871970621376568, "grad_norm": 5.15625, "learning_rate": 1.6498329289793696e-06, "loss": 6.9857, "step": 37990 }, { "epoch": 0.8874305965051582, "grad_norm": 4.09375, "learning_rate": 1.6430841535276242e-06, "loss": 6.9268, "step": 38000 }, { "epoch": 0.8874305965051582, "eval_loss": 6.951807022094727, "eval_runtime": 78.8569, "eval_samples_per_second": 12.681, "eval_steps_per_second": 12.681, "step": 38000 }, { "epoch": 0.8876641308726595, "grad_norm": 6.09375, "learning_rate": 1.6363487405473553e-06, "loss": 6.9086, "step": 38010 }, { "epoch": 0.8878976652401609, "grad_norm": 3.828125, "learning_rate": 1.6296266938918863e-06, "loss": 7.0093, "step": 38020 }, { "epoch": 0.8881311996076623, "grad_norm": 4.0625, "learning_rate": 1.6229180174068881e-06, "loss": 6.9198, "step": 38030 }, { "epoch": 0.8883647339751636, "grad_norm": 3.546875, "learning_rate": 1.6162227149303904e-06, "loss": 6.9157, "step": 38040 }, { "epoch": 0.888598268342665, "grad_norm": 4.3125, "learning_rate": 1.609540790292774e-06, "loss": 7.0068, "step": 38050 }, { "epoch": 0.8888318027101664, "grad_norm": 4.34375, "learning_rate": 1.602872247316764e-06, "loss": 6.9464, "step": 38060 }, { "epoch": 0.8890653370776677, "grad_norm": 4.34375, "learning_rate": 1.5962170898174227e-06, "loss": 6.9506, "step": 38070 }, { "epoch": 0.8892988714451691, "grad_norm": 3.5, "learning_rate": 1.589575321602163e-06, "loss": 6.9466, "step": 38080 }, { "epoch": 0.8895324058126705, "grad_norm": 4.90625, "learning_rate": 1.582946946470737e-06, "loss": 6.9727, "step": 38090 }, { "epoch": 0.8897659401801717, "grad_norm": 5.59375, "learning_rate": 1.5763319682152222e-06, "loss": 6.9557, "step": 38100 }, { "epoch": 0.8899994745476731, "grad_norm": 3.5625, "learning_rate": 1.5697303906200528e-06, "loss": 6.9652, "step": 38110 }, { "epoch": 0.8902330089151744, "grad_norm": 4.53125, "learning_rate": 1.5631422174619797e-06, "loss": 6.9376, "step": 38120 }, { "epoch": 0.8904665432826758, "grad_norm": 4.90625, "learning_rate": 1.5565674525100965e-06, "loss": 6.9689, "step": 38130 }, { "epoch": 0.8907000776501772, "grad_norm": 4.78125, "learning_rate": 1.5500060995258137e-06, "loss": 7.0227, "step": 38140 }, { "epoch": 0.8909336120176785, "grad_norm": 5.6875, "learning_rate": 1.543458162262884e-06, "loss": 6.9672, "step": 38150 }, { "epoch": 0.8911671463851799, "grad_norm": 4.15625, "learning_rate": 1.5369236444673668e-06, "loss": 6.9999, "step": 38160 }, { "epoch": 0.8914006807526813, "grad_norm": 5.375, "learning_rate": 1.5304025498776602e-06, "loss": 6.9342, "step": 38170 }, { "epoch": 0.8916342151201826, "grad_norm": 4.6875, "learning_rate": 1.5238948822244802e-06, "loss": 6.9661, "step": 38180 }, { "epoch": 0.891867749487684, "grad_norm": 4.71875, "learning_rate": 1.5174006452308514e-06, "loss": 6.9007, "step": 38190 }, { "epoch": 0.8921012838551854, "grad_norm": 4.75, "learning_rate": 1.510919842612124e-06, "loss": 6.9729, "step": 38200 }, { "epoch": 0.8923348182226867, "grad_norm": 4.3125, "learning_rate": 1.5044524780759572e-06, "loss": 6.9395, "step": 38210 }, { "epoch": 0.8925683525901881, "grad_norm": 4.375, "learning_rate": 1.4979985553223302e-06, "loss": 6.7813, "step": 38220 }, { "epoch": 0.8928018869576894, "grad_norm": 4.46875, "learning_rate": 1.4915580780435174e-06, "loss": 6.9582, "step": 38230 }, { "epoch": 0.8930354213251908, "grad_norm": 5.4375, "learning_rate": 1.4851310499241183e-06, "loss": 6.942, "step": 38240 }, { "epoch": 0.8932689556926922, "grad_norm": 3.671875, "learning_rate": 1.478717474641031e-06, "loss": 6.8873, "step": 38250 }, { "epoch": 0.8935024900601934, "grad_norm": 3.90625, "learning_rate": 1.4723173558634478e-06, "loss": 6.8978, "step": 38260 }, { "epoch": 0.8937360244276948, "grad_norm": 4.125, "learning_rate": 1.465930697252882e-06, "loss": 7.0061, "step": 38270 }, { "epoch": 0.8939695587951962, "grad_norm": 4.9375, "learning_rate": 1.4595575024631269e-06, "loss": 6.9804, "step": 38280 }, { "epoch": 0.8942030931626975, "grad_norm": 4.25, "learning_rate": 1.453197775140283e-06, "loss": 6.946, "step": 38290 }, { "epoch": 0.8944366275301989, "grad_norm": 4.65625, "learning_rate": 1.4468515189227456e-06, "loss": 6.9739, "step": 38300 }, { "epoch": 0.8946701618977003, "grad_norm": 4.53125, "learning_rate": 1.4405187374412098e-06, "loss": 6.959, "step": 38310 }, { "epoch": 0.8949036962652016, "grad_norm": 4.46875, "learning_rate": 1.4341994343186477e-06, "loss": 6.9536, "step": 38320 }, { "epoch": 0.895137230632703, "grad_norm": 4.125, "learning_rate": 1.4278936131703251e-06, "loss": 6.9044, "step": 38330 }, { "epoch": 0.8953707650002043, "grad_norm": 4.28125, "learning_rate": 1.4216012776038012e-06, "loss": 6.9475, "step": 38340 }, { "epoch": 0.8956042993677057, "grad_norm": 3.953125, "learning_rate": 1.4153224312189101e-06, "loss": 6.9419, "step": 38350 }, { "epoch": 0.8958378337352071, "grad_norm": 5.375, "learning_rate": 1.4090570776077784e-06, "loss": 6.8643, "step": 38360 }, { "epoch": 0.8960713681027084, "grad_norm": 3.8125, "learning_rate": 1.402805220354811e-06, "loss": 6.9912, "step": 38370 }, { "epoch": 0.8963049024702098, "grad_norm": 6.40625, "learning_rate": 1.3965668630366912e-06, "loss": 6.9221, "step": 38380 }, { "epoch": 0.8965384368377112, "grad_norm": 5.875, "learning_rate": 1.3903420092223757e-06, "loss": 6.9352, "step": 38390 }, { "epoch": 0.8967719712052125, "grad_norm": 5.34375, "learning_rate": 1.3841306624730932e-06, "loss": 6.9118, "step": 38400 }, { "epoch": 0.8970055055727139, "grad_norm": 4.0625, "learning_rate": 1.3779328263423513e-06, "loss": 6.9251, "step": 38410 }, { "epoch": 0.8972390399402153, "grad_norm": 5.28125, "learning_rate": 1.3717485043759332e-06, "loss": 6.9901, "step": 38420 }, { "epoch": 0.8974725743077165, "grad_norm": 5.375, "learning_rate": 1.3655777001118836e-06, "loss": 7.0314, "step": 38430 }, { "epoch": 0.8977061086752179, "grad_norm": 4.40625, "learning_rate": 1.3594204170805064e-06, "loss": 6.9485, "step": 38440 }, { "epoch": 0.8979396430427192, "grad_norm": 5.03125, "learning_rate": 1.3532766588043866e-06, "loss": 7.0249, "step": 38450 }, { "epoch": 0.8981731774102206, "grad_norm": 4.15625, "learning_rate": 1.3471464287983597e-06, "loss": 7.0092, "step": 38460 }, { "epoch": 0.898406711777722, "grad_norm": 3.734375, "learning_rate": 1.3410297305695208e-06, "loss": 6.9466, "step": 38470 }, { "epoch": 0.8986402461452233, "grad_norm": 4.25, "learning_rate": 1.3349265676172313e-06, "loss": 6.9938, "step": 38480 }, { "epoch": 0.8988737805127247, "grad_norm": 4.3125, "learning_rate": 1.3288369434331065e-06, "loss": 6.9381, "step": 38490 }, { "epoch": 0.8991073148802261, "grad_norm": 5.40625, "learning_rate": 1.3227608615010184e-06, "loss": 6.9133, "step": 38500 }, { "epoch": 0.8991073148802261, "eval_loss": 6.951699256896973, "eval_runtime": 78.6847, "eval_samples_per_second": 12.709, "eval_steps_per_second": 12.709, "step": 38500 }, { "epoch": 0.8993408492477274, "grad_norm": 4.28125, "learning_rate": 1.3166983252970777e-06, "loss": 6.8936, "step": 38510 }, { "epoch": 0.8995743836152288, "grad_norm": 4.90625, "learning_rate": 1.3106493382896684e-06, "loss": 6.9834, "step": 38520 }, { "epoch": 0.8998079179827302, "grad_norm": 5.0, "learning_rate": 1.3046139039394e-06, "loss": 7.0356, "step": 38530 }, { "epoch": 0.9000414523502315, "grad_norm": 4.59375, "learning_rate": 1.298592025699144e-06, "loss": 6.9648, "step": 38540 }, { "epoch": 0.9002749867177329, "grad_norm": 3.296875, "learning_rate": 1.2925837070140167e-06, "loss": 6.9223, "step": 38550 }, { "epoch": 0.9005085210852342, "grad_norm": 4.1875, "learning_rate": 1.286588951321363e-06, "loss": 6.9745, "step": 38560 }, { "epoch": 0.9007420554527356, "grad_norm": 3.71875, "learning_rate": 1.280607762050784e-06, "loss": 6.9305, "step": 38570 }, { "epoch": 0.900975589820237, "grad_norm": 5.09375, "learning_rate": 1.2746401426241089e-06, "loss": 6.9257, "step": 38580 }, { "epoch": 0.9012091241877382, "grad_norm": 5.28125, "learning_rate": 1.2686860964554125e-06, "loss": 6.954, "step": 38590 }, { "epoch": 0.9014426585552396, "grad_norm": 5.09375, "learning_rate": 1.2627456269509924e-06, "loss": 6.8986, "step": 38600 }, { "epoch": 0.901676192922741, "grad_norm": 4.375, "learning_rate": 1.2568187375093909e-06, "loss": 6.9807, "step": 38610 }, { "epoch": 0.9019097272902423, "grad_norm": 3.53125, "learning_rate": 1.2509054315213791e-06, "loss": 6.8798, "step": 38620 }, { "epoch": 0.9021432616577437, "grad_norm": 4.625, "learning_rate": 1.2450057123699454e-06, "loss": 6.9718, "step": 38630 }, { "epoch": 0.9023767960252451, "grad_norm": 5.03125, "learning_rate": 1.239119583430326e-06, "loss": 6.9624, "step": 38640 }, { "epoch": 0.9026103303927464, "grad_norm": 5.21875, "learning_rate": 1.2332470480699576e-06, "loss": 6.9517, "step": 38650 }, { "epoch": 0.9028438647602478, "grad_norm": 5.0625, "learning_rate": 1.2273881096485196e-06, "loss": 6.9514, "step": 38660 }, { "epoch": 0.9030773991277491, "grad_norm": 5.28125, "learning_rate": 1.2215427715179085e-06, "loss": 6.9084, "step": 38670 }, { "epoch": 0.9033109334952505, "grad_norm": 5.71875, "learning_rate": 1.2157110370222297e-06, "loss": 6.8836, "step": 38680 }, { "epoch": 0.9035444678627519, "grad_norm": 4.3125, "learning_rate": 1.209892909497823e-06, "loss": 6.9825, "step": 38690 }, { "epoch": 0.9037780022302532, "grad_norm": 5.625, "learning_rate": 1.2040883922732199e-06, "loss": 6.9305, "step": 38700 }, { "epoch": 0.9040115365977546, "grad_norm": 4.3125, "learning_rate": 1.1982974886691923e-06, "loss": 6.9851, "step": 38710 }, { "epoch": 0.904245070965256, "grad_norm": 4.34375, "learning_rate": 1.1925202019987064e-06, "loss": 6.9928, "step": 38720 }, { "epoch": 0.9044786053327573, "grad_norm": 4.03125, "learning_rate": 1.1867565355669463e-06, "loss": 6.9576, "step": 38730 }, { "epoch": 0.9047121397002587, "grad_norm": 5.375, "learning_rate": 1.1810064926712965e-06, "loss": 6.9946, "step": 38740 }, { "epoch": 0.9049456740677599, "grad_norm": 4.34375, "learning_rate": 1.1752700766013507e-06, "loss": 6.9604, "step": 38750 }, { "epoch": 0.9051792084352613, "grad_norm": 5.53125, "learning_rate": 1.1695472906389144e-06, "loss": 6.8923, "step": 38760 }, { "epoch": 0.9054127428027627, "grad_norm": 4.1875, "learning_rate": 1.1638381380579795e-06, "loss": 6.9101, "step": 38770 }, { "epoch": 0.905646277170264, "grad_norm": 3.9375, "learning_rate": 1.1581426221247527e-06, "loss": 7.0206, "step": 38780 }, { "epoch": 0.9058798115377654, "grad_norm": 4.75, "learning_rate": 1.1524607460976306e-06, "loss": 6.9736, "step": 38790 }, { "epoch": 0.9061133459052668, "grad_norm": 4.4375, "learning_rate": 1.1467925132272183e-06, "loss": 6.9134, "step": 38800 }, { "epoch": 0.9063468802727681, "grad_norm": 5.03125, "learning_rate": 1.1411379267562972e-06, "loss": 6.901, "step": 38810 }, { "epoch": 0.9065804146402695, "grad_norm": 4.625, "learning_rate": 1.1354969899198542e-06, "loss": 6.9613, "step": 38820 }, { "epoch": 0.9068139490077709, "grad_norm": 4.65625, "learning_rate": 1.1298697059450635e-06, "loss": 6.94, "step": 38830 }, { "epoch": 0.9070474833752722, "grad_norm": 5.90625, "learning_rate": 1.1242560780512918e-06, "loss": 6.9532, "step": 38840 }, { "epoch": 0.9072810177427736, "grad_norm": 3.71875, "learning_rate": 1.1186561094500918e-06, "loss": 6.9243, "step": 38850 }, { "epoch": 0.9075145521102749, "grad_norm": 4.6875, "learning_rate": 1.1130698033451958e-06, "loss": 6.9681, "step": 38860 }, { "epoch": 0.9077480864777763, "grad_norm": 4.53125, "learning_rate": 1.1074971629325304e-06, "loss": 6.8233, "step": 38870 }, { "epoch": 0.9079816208452777, "grad_norm": 5.59375, "learning_rate": 1.101938191400198e-06, "loss": 6.9621, "step": 38880 }, { "epoch": 0.908215155212779, "grad_norm": 3.5625, "learning_rate": 1.0963928919284717e-06, "loss": 6.9469, "step": 38890 }, { "epoch": 0.9084486895802804, "grad_norm": 4.28125, "learning_rate": 1.0908612676898245e-06, "loss": 6.952, "step": 38900 }, { "epoch": 0.9086822239477818, "grad_norm": 4.25, "learning_rate": 1.0853433218488862e-06, "loss": 6.9423, "step": 38910 }, { "epoch": 0.908915758315283, "grad_norm": 4.46875, "learning_rate": 1.0798390575624762e-06, "loss": 6.907, "step": 38920 }, { "epoch": 0.9091492926827844, "grad_norm": 4.59375, "learning_rate": 1.0743484779795727e-06, "loss": 6.9232, "step": 38930 }, { "epoch": 0.9093828270502858, "grad_norm": 4.1875, "learning_rate": 1.068871586241335e-06, "loss": 6.9685, "step": 38940 }, { "epoch": 0.9096163614177871, "grad_norm": 4.78125, "learning_rate": 1.063408385481085e-06, "loss": 6.9116, "step": 38950 }, { "epoch": 0.9098498957852885, "grad_norm": 4.875, "learning_rate": 1.0579588788243195e-06, "loss": 6.974, "step": 38960 }, { "epoch": 0.9100834301527898, "grad_norm": 4.78125, "learning_rate": 1.0525230693886945e-06, "loss": 6.9207, "step": 38970 }, { "epoch": 0.9103169645202912, "grad_norm": 5.5, "learning_rate": 1.0471009602840281e-06, "loss": 6.95, "step": 38980 }, { "epoch": 0.9105504988877926, "grad_norm": 4.375, "learning_rate": 1.041692554612314e-06, "loss": 6.9364, "step": 38990 }, { "epoch": 0.9107840332552939, "grad_norm": 4.90625, "learning_rate": 1.0362978554676855e-06, "loss": 7.0366, "step": 39000 }, { "epoch": 0.9107840332552939, "eval_loss": 6.951646327972412, "eval_runtime": 78.4853, "eval_samples_per_second": 12.741, "eval_steps_per_second": 12.741, "step": 39000 }, { "epoch": 0.9110175676227953, "grad_norm": 4.21875, "learning_rate": 1.0309168659364542e-06, "loss": 6.9168, "step": 39010 }, { "epoch": 0.9112511019902967, "grad_norm": 5.0625, "learning_rate": 1.0255495890970718e-06, "loss": 6.8865, "step": 39020 }, { "epoch": 0.911484636357798, "grad_norm": 4.09375, "learning_rate": 1.0201960280201566e-06, "loss": 6.9754, "step": 39030 }, { "epoch": 0.9117181707252994, "grad_norm": 4.1875, "learning_rate": 1.0148561857684785e-06, "loss": 6.884, "step": 39040 }, { "epoch": 0.9119517050928008, "grad_norm": 6.09375, "learning_rate": 1.0095300653969547e-06, "loss": 6.9723, "step": 39050 }, { "epoch": 0.912185239460302, "grad_norm": 5.96875, "learning_rate": 1.0042176699526534e-06, "loss": 6.8807, "step": 39060 }, { "epoch": 0.9124187738278035, "grad_norm": 4.1875, "learning_rate": 9.989190024747907e-07, "loss": 6.9668, "step": 39070 }, { "epoch": 0.9126523081953047, "grad_norm": 4.78125, "learning_rate": 9.936340659947308e-07, "loss": 6.9899, "step": 39080 }, { "epoch": 0.9128858425628061, "grad_norm": 4.5625, "learning_rate": 9.883628635359798e-07, "loss": 6.9883, "step": 39090 }, { "epoch": 0.9131193769303075, "grad_norm": 4.46875, "learning_rate": 9.831053981141924e-07, "loss": 6.985, "step": 39100 }, { "epoch": 0.9133529112978088, "grad_norm": 4.6875, "learning_rate": 9.778616727371597e-07, "loss": 6.9432, "step": 39110 }, { "epoch": 0.9135864456653102, "grad_norm": 4.78125, "learning_rate": 9.726316904048044e-07, "loss": 6.941, "step": 39120 }, { "epoch": 0.9138199800328116, "grad_norm": 4.5625, "learning_rate": 9.67415454109205e-07, "loss": 6.9572, "step": 39130 }, { "epoch": 0.9140535144003129, "grad_norm": 5.15625, "learning_rate": 9.62212966834561e-07, "loss": 6.9895, "step": 39140 }, { "epoch": 0.9142870487678143, "grad_norm": 5.15625, "learning_rate": 9.570242315572075e-07, "loss": 6.9532, "step": 39150 }, { "epoch": 0.9145205831353157, "grad_norm": 4.75, "learning_rate": 9.518492512456207e-07, "loss": 6.9748, "step": 39160 }, { "epoch": 0.914754117502817, "grad_norm": 5.59375, "learning_rate": 9.466880288604041e-07, "loss": 6.935, "step": 39170 }, { "epoch": 0.9149876518703184, "grad_norm": 4.15625, "learning_rate": 9.415405673542849e-07, "loss": 6.9898, "step": 39180 }, { "epoch": 0.9152211862378197, "grad_norm": 3.4375, "learning_rate": 9.364068696721184e-07, "loss": 7.0166, "step": 39190 }, { "epoch": 0.9154547206053211, "grad_norm": 3.828125, "learning_rate": 9.312869387508943e-07, "loss": 6.9604, "step": 39200 }, { "epoch": 0.9156882549728225, "grad_norm": 4.0, "learning_rate": 9.261807775197167e-07, "loss": 6.9272, "step": 39210 }, { "epoch": 0.9159217893403238, "grad_norm": 5.21875, "learning_rate": 9.210883888998212e-07, "loss": 6.9415, "step": 39220 }, { "epoch": 0.9161553237078252, "grad_norm": 3.921875, "learning_rate": 9.16009775804555e-07, "loss": 6.9697, "step": 39230 }, { "epoch": 0.9163888580753266, "grad_norm": 3.59375, "learning_rate": 9.109449411393883e-07, "loss": 6.9798, "step": 39240 }, { "epoch": 0.9166223924428278, "grad_norm": 4.9375, "learning_rate": 9.058938878019119e-07, "loss": 6.9303, "step": 39250 }, { "epoch": 0.9168559268103292, "grad_norm": 5.84375, "learning_rate": 9.008566186818223e-07, "loss": 6.9527, "step": 39260 }, { "epoch": 0.9170894611778306, "grad_norm": 3.90625, "learning_rate": 8.958331366609423e-07, "loss": 6.9633, "step": 39270 }, { "epoch": 0.9173229955453319, "grad_norm": 5.09375, "learning_rate": 8.90823444613198e-07, "loss": 6.9481, "step": 39280 }, { "epoch": 0.9175565299128333, "grad_norm": 6.03125, "learning_rate": 8.858275454046383e-07, "loss": 6.9575, "step": 39290 }, { "epoch": 0.9177900642803346, "grad_norm": 4.34375, "learning_rate": 8.808454418934021e-07, "loss": 7.0329, "step": 39300 }, { "epoch": 0.918023598647836, "grad_norm": 4.5, "learning_rate": 8.758771369297536e-07, "loss": 6.9998, "step": 39310 }, { "epoch": 0.9182571330153374, "grad_norm": 3.875, "learning_rate": 8.709226333560499e-07, "loss": 6.9706, "step": 39320 }, { "epoch": 0.9184906673828387, "grad_norm": 5.46875, "learning_rate": 8.659819340067654e-07, "loss": 6.93, "step": 39330 }, { "epoch": 0.9187242017503401, "grad_norm": 4.15625, "learning_rate": 8.610550417084667e-07, "loss": 6.932, "step": 39340 }, { "epoch": 0.9189577361178415, "grad_norm": 3.84375, "learning_rate": 8.561419592798215e-07, "loss": 6.9703, "step": 39350 }, { "epoch": 0.9191912704853428, "grad_norm": 3.921875, "learning_rate": 8.512426895316061e-07, "loss": 6.8805, "step": 39360 }, { "epoch": 0.9194248048528442, "grad_norm": 4.03125, "learning_rate": 8.463572352666815e-07, "loss": 6.8921, "step": 39370 }, { "epoch": 0.9196583392203456, "grad_norm": 3.90625, "learning_rate": 8.414855992800203e-07, "loss": 6.9927, "step": 39380 }, { "epoch": 0.9198918735878469, "grad_norm": 4.4375, "learning_rate": 8.366277843586707e-07, "loss": 6.9847, "step": 39390 }, { "epoch": 0.9201254079553483, "grad_norm": 5.25, "learning_rate": 8.317837932817929e-07, "loss": 6.9176, "step": 39400 }, { "epoch": 0.9203589423228495, "grad_norm": 4.6875, "learning_rate": 8.269536288206286e-07, "loss": 6.9077, "step": 39410 }, { "epoch": 0.9205924766903509, "grad_norm": 4.09375, "learning_rate": 8.221372937385091e-07, "loss": 6.9768, "step": 39420 }, { "epoch": 0.9208260110578523, "grad_norm": 4.625, "learning_rate": 8.173347907908579e-07, "loss": 6.9197, "step": 39430 }, { "epoch": 0.9210595454253536, "grad_norm": 4.21875, "learning_rate": 8.125461227251774e-07, "loss": 7.0089, "step": 39440 }, { "epoch": 0.921293079792855, "grad_norm": 5.125, "learning_rate": 8.077712922810649e-07, "loss": 6.9058, "step": 39450 }, { "epoch": 0.9215266141603564, "grad_norm": 4.9375, "learning_rate": 8.030103021901964e-07, "loss": 6.9034, "step": 39460 }, { "epoch": 0.9217601485278577, "grad_norm": 4.5625, "learning_rate": 7.982631551763292e-07, "loss": 6.9642, "step": 39470 }, { "epoch": 0.9219936828953591, "grad_norm": 4.78125, "learning_rate": 7.935298539553049e-07, "loss": 6.8363, "step": 39480 }, { "epoch": 0.9222272172628605, "grad_norm": 4.1875, "learning_rate": 7.88810401235035e-07, "loss": 6.9612, "step": 39490 }, { "epoch": 0.9224607516303618, "grad_norm": 4.09375, "learning_rate": 7.841047997155238e-07, "loss": 6.9913, "step": 39500 }, { "epoch": 0.9224607516303618, "eval_loss": 6.951651096343994, "eval_runtime": 78.9146, "eval_samples_per_second": 12.672, "eval_steps_per_second": 12.672, "step": 39500 }, { "epoch": 0.9226942859978632, "grad_norm": 5.375, "learning_rate": 7.794130520888288e-07, "loss": 7.0239, "step": 39510 }, { "epoch": 0.9229278203653645, "grad_norm": 4.3125, "learning_rate": 7.747351610391057e-07, "loss": 6.9941, "step": 39520 }, { "epoch": 0.9231613547328659, "grad_norm": 4.4375, "learning_rate": 7.700711292425722e-07, "loss": 6.9496, "step": 39530 }, { "epoch": 0.9233948891003673, "grad_norm": 5.5, "learning_rate": 7.654209593675077e-07, "loss": 6.919, "step": 39540 }, { "epoch": 0.9236284234678686, "grad_norm": 4.1875, "learning_rate": 7.607846540742786e-07, "loss": 6.9386, "step": 39550 }, { "epoch": 0.92386195783537, "grad_norm": 4.53125, "learning_rate": 7.561622160153076e-07, "loss": 7.0189, "step": 39560 }, { "epoch": 0.9240954922028713, "grad_norm": 3.65625, "learning_rate": 7.515536478350848e-07, "loss": 6.9265, "step": 39570 }, { "epoch": 0.9243290265703726, "grad_norm": 5.15625, "learning_rate": 7.469589521701737e-07, "loss": 6.9678, "step": 39580 }, { "epoch": 0.924562560937874, "grad_norm": 4.5, "learning_rate": 7.423781316491962e-07, "loss": 7.008, "step": 39590 }, { "epoch": 0.9247960953053754, "grad_norm": 4.90625, "learning_rate": 7.378111888928341e-07, "loss": 6.9509, "step": 39600 }, { "epoch": 0.9250296296728767, "grad_norm": 6.09375, "learning_rate": 7.332581265138277e-07, "loss": 6.9268, "step": 39610 }, { "epoch": 0.9252631640403781, "grad_norm": 5.75, "learning_rate": 7.287189471169853e-07, "loss": 7.0029, "step": 39620 }, { "epoch": 0.9254966984078794, "grad_norm": 4.5625, "learning_rate": 7.241936532991573e-07, "loss": 6.9116, "step": 39630 }, { "epoch": 0.9257302327753808, "grad_norm": 4.84375, "learning_rate": 7.196822476492781e-07, "loss": 6.8938, "step": 39640 }, { "epoch": 0.9259637671428822, "grad_norm": 3.625, "learning_rate": 7.15184732748303e-07, "loss": 6.9332, "step": 39650 }, { "epoch": 0.9261973015103835, "grad_norm": 4.625, "learning_rate": 7.107011111692652e-07, "loss": 6.8848, "step": 39660 }, { "epoch": 0.9264308358778849, "grad_norm": 5.125, "learning_rate": 7.062313854772406e-07, "loss": 6.9454, "step": 39670 }, { "epoch": 0.9266643702453863, "grad_norm": 4.21875, "learning_rate": 7.017755582293478e-07, "loss": 6.8942, "step": 39680 }, { "epoch": 0.9268979046128876, "grad_norm": 4.34375, "learning_rate": 6.97333631974767e-07, "loss": 7.0221, "step": 39690 }, { "epoch": 0.927131438980389, "grad_norm": 4.59375, "learning_rate": 6.929056092547209e-07, "loss": 6.9206, "step": 39700 }, { "epoch": 0.9273649733478903, "grad_norm": 4.40625, "learning_rate": 6.884914926024805e-07, "loss": 6.8799, "step": 39710 }, { "epoch": 0.9275985077153917, "grad_norm": 4.75, "learning_rate": 6.840912845433505e-07, "loss": 6.9056, "step": 39720 }, { "epoch": 0.927832042082893, "grad_norm": 4.21875, "learning_rate": 6.797049875946926e-07, "loss": 6.9773, "step": 39730 }, { "epoch": 0.9280655764503943, "grad_norm": 4.3125, "learning_rate": 6.75332604265902e-07, "loss": 6.9084, "step": 39740 }, { "epoch": 0.9282991108178957, "grad_norm": 6.0, "learning_rate": 6.709741370584083e-07, "loss": 6.9798, "step": 39750 }, { "epoch": 0.9285326451853971, "grad_norm": 3.796875, "learning_rate": 6.666295884657004e-07, "loss": 6.9419, "step": 39760 }, { "epoch": 0.9287661795528984, "grad_norm": 5.375, "learning_rate": 6.622989609732788e-07, "loss": 6.9645, "step": 39770 }, { "epoch": 0.9289997139203998, "grad_norm": 5.03125, "learning_rate": 6.57982257058698e-07, "loss": 6.9016, "step": 39780 }, { "epoch": 0.9292332482879012, "grad_norm": 3.9375, "learning_rate": 6.53679479191538e-07, "loss": 6.9523, "step": 39790 }, { "epoch": 0.9294667826554025, "grad_norm": 4.3125, "learning_rate": 6.493906298334185e-07, "loss": 6.9334, "step": 39800 }, { "epoch": 0.9297003170229039, "grad_norm": 5.21875, "learning_rate": 6.451157114379797e-07, "loss": 6.968, "step": 39810 }, { "epoch": 0.9299338513904052, "grad_norm": 4.6875, "learning_rate": 6.408547264509013e-07, "loss": 6.8701, "step": 39820 }, { "epoch": 0.9301673857579066, "grad_norm": 3.953125, "learning_rate": 6.366076773098945e-07, "loss": 6.9794, "step": 39830 }, { "epoch": 0.930400920125408, "grad_norm": 4.5, "learning_rate": 6.32374566444685e-07, "loss": 6.9903, "step": 39840 }, { "epoch": 0.9306344544929093, "grad_norm": 4.65625, "learning_rate": 6.281553962770387e-07, "loss": 6.9515, "step": 39850 }, { "epoch": 0.9308679888604107, "grad_norm": 4.3125, "learning_rate": 6.239501692207328e-07, "loss": 6.9379, "step": 39860 }, { "epoch": 0.9311015232279121, "grad_norm": 5.375, "learning_rate": 6.197588876815763e-07, "loss": 7.0247, "step": 39870 }, { "epoch": 0.9313350575954134, "grad_norm": 7.0, "learning_rate": 6.155815540573983e-07, "loss": 6.9463, "step": 39880 }, { "epoch": 0.9315685919629147, "grad_norm": 4.8125, "learning_rate": 6.114181707380484e-07, "loss": 6.9607, "step": 39890 }, { "epoch": 0.9318021263304161, "grad_norm": 3.9375, "learning_rate": 6.072687401053961e-07, "loss": 6.8695, "step": 39900 }, { "epoch": 0.9320356606979174, "grad_norm": 4.0625, "learning_rate": 6.031332645333232e-07, "loss": 6.9452, "step": 39910 }, { "epoch": 0.9322691950654188, "grad_norm": 3.65625, "learning_rate": 5.990117463877343e-07, "loss": 6.9687, "step": 39920 }, { "epoch": 0.9325027294329201, "grad_norm": 4.5625, "learning_rate": 5.949041880265432e-07, "loss": 6.9514, "step": 39930 }, { "epoch": 0.9327362638004215, "grad_norm": 4.4375, "learning_rate": 5.908105917996842e-07, "loss": 6.9018, "step": 39940 }, { "epoch": 0.9329697981679229, "grad_norm": 5.84375, "learning_rate": 5.867309600491006e-07, "loss": 7.0325, "step": 39950 }, { "epoch": 0.9332033325354242, "grad_norm": 3.921875, "learning_rate": 5.826652951087447e-07, "loss": 6.9778, "step": 39960 }, { "epoch": 0.9334368669029256, "grad_norm": 3.84375, "learning_rate": 5.786135993045783e-07, "loss": 6.9557, "step": 39970 }, { "epoch": 0.933670401270427, "grad_norm": 6.0625, "learning_rate": 5.745758749545749e-07, "loss": 6.9358, "step": 39980 }, { "epoch": 0.9339039356379283, "grad_norm": 4.15625, "learning_rate": 5.705521243687117e-07, "loss": 7.0155, "step": 39990 }, { "epoch": 0.9341374700054297, "grad_norm": 4.78125, "learning_rate": 5.665423498489724e-07, "loss": 6.9229, "step": 40000 }, { "epoch": 0.9341374700054297, "eval_loss": 6.951622009277344, "eval_runtime": 78.6567, "eval_samples_per_second": 12.713, "eval_steps_per_second": 12.713, "step": 40000 } ], "logging_steps": 10, "max_steps": 42820, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.65313243152384e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }