| { |
| "best_global_step": 720, |
| "best_metric": 0.23617739975452423, |
| "best_model_checkpoint": "saves_stability/prefix-tuning/llama-3-8b-instruct/train_copa_1757340251/checkpoint-720", |
| "epoch": 20.0, |
| "eval_steps": 180, |
| "global_step": 3600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.027777777777777776, |
| "grad_norm": 127.00370788574219, |
| "learning_rate": 5.555555555555556e-07, |
| "loss": 8.3103, |
| "num_input_tokens_seen": 784, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 122.15016174316406, |
| "learning_rate": 1.25e-06, |
| "loss": 7.8191, |
| "num_input_tokens_seen": 1552, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 112.4898452758789, |
| "learning_rate": 1.9444444444444444e-06, |
| "loss": 6.8534, |
| "num_input_tokens_seen": 2320, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 148.39366149902344, |
| "learning_rate": 2.638888888888889e-06, |
| "loss": 5.5534, |
| "num_input_tokens_seen": 3088, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 87.2863998413086, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 3.9203, |
| "num_input_tokens_seen": 3856, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 81.75582885742188, |
| "learning_rate": 4.027777777777779e-06, |
| "loss": 2.4315, |
| "num_input_tokens_seen": 4608, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19444444444444445, |
| "grad_norm": 42.17738723754883, |
| "learning_rate": 4.722222222222222e-06, |
| "loss": 1.3174, |
| "num_input_tokens_seen": 5360, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 41.45945739746094, |
| "learning_rate": 5.416666666666667e-06, |
| "loss": 0.6433, |
| "num_input_tokens_seen": 6112, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 117.64752197265625, |
| "learning_rate": 6.111111111111111e-06, |
| "loss": 0.4838, |
| "num_input_tokens_seen": 6864, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 78.57173156738281, |
| "learning_rate": 6.805555555555556e-06, |
| "loss": 0.4224, |
| "num_input_tokens_seen": 7616, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3055555555555556, |
| "grad_norm": 54.80699157714844, |
| "learning_rate": 7.5e-06, |
| "loss": 0.3504, |
| "num_input_tokens_seen": 8400, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 18.32509422302246, |
| "learning_rate": 8.194444444444445e-06, |
| "loss": 0.3481, |
| "num_input_tokens_seen": 9152, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3611111111111111, |
| "grad_norm": 57.70254135131836, |
| "learning_rate": 8.88888888888889e-06, |
| "loss": 0.3422, |
| "num_input_tokens_seen": 9952, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 17.409032821655273, |
| "learning_rate": 9.583333333333334e-06, |
| "loss": 0.2373, |
| "num_input_tokens_seen": 10720, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 27.374752044677734, |
| "learning_rate": 1.0277777777777777e-05, |
| "loss": 0.311, |
| "num_input_tokens_seen": 11488, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 7.99110746383667, |
| "learning_rate": 1.0972222222222223e-05, |
| "loss": 0.2879, |
| "num_input_tokens_seen": 12272, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4722222222222222, |
| "grad_norm": 32.56344985961914, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.2769, |
| "num_input_tokens_seen": 13040, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 24.26778221130371, |
| "learning_rate": 1.2361111111111112e-05, |
| "loss": 0.4031, |
| "num_input_tokens_seen": 13792, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5277777777777778, |
| "grad_norm": 13.13208293914795, |
| "learning_rate": 1.3055555555555557e-05, |
| "loss": 0.3413, |
| "num_input_tokens_seen": 14544, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 14.814593315124512, |
| "learning_rate": 1.3750000000000002e-05, |
| "loss": 0.2835, |
| "num_input_tokens_seen": 15328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 2.4372589588165283, |
| "learning_rate": 1.4444444444444444e-05, |
| "loss": 0.2891, |
| "num_input_tokens_seen": 16064, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 3.005253314971924, |
| "learning_rate": 1.5138888888888888e-05, |
| "loss": 0.2217, |
| "num_input_tokens_seen": 16832, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6388888888888888, |
| "grad_norm": 8.54447078704834, |
| "learning_rate": 1.5833333333333333e-05, |
| "loss": 0.2413, |
| "num_input_tokens_seen": 17616, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 17.670551300048828, |
| "learning_rate": 1.6527777777777777e-05, |
| "loss": 0.2773, |
| "num_input_tokens_seen": 18368, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 3.102203607559204, |
| "learning_rate": 1.7222222222222224e-05, |
| "loss": 0.2652, |
| "num_input_tokens_seen": 19120, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 7.354095935821533, |
| "learning_rate": 1.7916666666666667e-05, |
| "loss": 0.1998, |
| "num_input_tokens_seen": 19872, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 9.806069374084473, |
| "learning_rate": 1.861111111111111e-05, |
| "loss": 0.3866, |
| "num_input_tokens_seen": 20608, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 5.3900299072265625, |
| "learning_rate": 1.9305555555555558e-05, |
| "loss": 0.3152, |
| "num_input_tokens_seen": 21376, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8055555555555556, |
| "grad_norm": 7.56992769241333, |
| "learning_rate": 2e-05, |
| "loss": 0.2778, |
| "num_input_tokens_seen": 22080, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 2.2404680252075195, |
| "learning_rate": 2.0694444444444445e-05, |
| "loss": 0.2645, |
| "num_input_tokens_seen": 22880, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8611111111111112, |
| "grad_norm": 12.300857543945312, |
| "learning_rate": 2.138888888888889e-05, |
| "loss": 0.2734, |
| "num_input_tokens_seen": 23664, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.1452852487564087, |
| "learning_rate": 2.2083333333333333e-05, |
| "loss": 0.2669, |
| "num_input_tokens_seen": 24416, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 0.9725009799003601, |
| "learning_rate": 2.277777777777778e-05, |
| "loss": 0.2419, |
| "num_input_tokens_seen": 25200, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 0.5348834991455078, |
| "learning_rate": 2.3472222222222223e-05, |
| "loss": 0.2372, |
| "num_input_tokens_seen": 25936, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 3.6036555767059326, |
| "learning_rate": 2.4166666666666667e-05, |
| "loss": 0.1935, |
| "num_input_tokens_seen": 26672, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.211343288421631, |
| "learning_rate": 2.4861111111111114e-05, |
| "loss": 0.5261, |
| "num_input_tokens_seen": 27424, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.26661795377731323, |
| "eval_runtime": 0.86, |
| "eval_samples_per_second": 46.512, |
| "eval_steps_per_second": 23.256, |
| "num_input_tokens_seen": 27424, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0277777777777777, |
| "grad_norm": 2.1466715335845947, |
| "learning_rate": 2.5555555555555554e-05, |
| "loss": 0.2197, |
| "num_input_tokens_seen": 28240, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.0555555555555556, |
| "grad_norm": 1.111944317817688, |
| "learning_rate": 2.625e-05, |
| "loss": 0.264, |
| "num_input_tokens_seen": 28976, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0833333333333333, |
| "grad_norm": 0.7908123135566711, |
| "learning_rate": 2.6944444444444445e-05, |
| "loss": 0.235, |
| "num_input_tokens_seen": 29776, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 8.314372062683105, |
| "learning_rate": 2.7638888888888892e-05, |
| "loss": 0.2339, |
| "num_input_tokens_seen": 30496, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1388888888888888, |
| "grad_norm": 2.806135416030884, |
| "learning_rate": 2.8333333333333335e-05, |
| "loss": 0.1747, |
| "num_input_tokens_seen": 31216, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 8.267914772033691, |
| "learning_rate": 2.9027777777777782e-05, |
| "loss": 0.4313, |
| "num_input_tokens_seen": 31984, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1944444444444444, |
| "grad_norm": 2.079704999923706, |
| "learning_rate": 2.9722222222222223e-05, |
| "loss": 0.1819, |
| "num_input_tokens_seen": 32704, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 2.2734806537628174, |
| "learning_rate": 3.0416666666666666e-05, |
| "loss": 0.3006, |
| "num_input_tokens_seen": 33440, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.48634809255599976, |
| "learning_rate": 3.111111111111111e-05, |
| "loss": 0.2362, |
| "num_input_tokens_seen": 34208, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2777777777777777, |
| "grad_norm": 1.6364030838012695, |
| "learning_rate": 3.180555555555556e-05, |
| "loss": 0.2127, |
| "num_input_tokens_seen": 34960, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3055555555555556, |
| "grad_norm": 1.138373851776123, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.2249, |
| "num_input_tokens_seen": 35728, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.3072881698608398, |
| "learning_rate": 3.3194444444444444e-05, |
| "loss": 0.248, |
| "num_input_tokens_seen": 36464, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3611111111111112, |
| "grad_norm": 2.485381603240967, |
| "learning_rate": 3.388888888888889e-05, |
| "loss": 0.3014, |
| "num_input_tokens_seen": 37264, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.2830923795700073, |
| "learning_rate": 3.458333333333333e-05, |
| "loss": 0.2377, |
| "num_input_tokens_seen": 38000, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4166666666666667, |
| "grad_norm": 1.9528510570526123, |
| "learning_rate": 3.527777777777778e-05, |
| "loss": 0.2443, |
| "num_input_tokens_seen": 38752, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.8921493887901306, |
| "learning_rate": 3.5972222222222225e-05, |
| "loss": 0.2766, |
| "num_input_tokens_seen": 39520, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4722222222222223, |
| "grad_norm": 0.5744603872299194, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.2485, |
| "num_input_tokens_seen": 40288, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 4.022632122039795, |
| "learning_rate": 3.736111111111111e-05, |
| "loss": 0.2624, |
| "num_input_tokens_seen": 41056, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5277777777777777, |
| "grad_norm": 2.419853687286377, |
| "learning_rate": 3.805555555555555e-05, |
| "loss": 0.2496, |
| "num_input_tokens_seen": 41808, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 1.003553867340088, |
| "learning_rate": 3.875e-05, |
| "loss": 0.2213, |
| "num_input_tokens_seen": 42608, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5833333333333335, |
| "grad_norm": 1.9896985292434692, |
| "learning_rate": 3.944444444444445e-05, |
| "loss": 0.2944, |
| "num_input_tokens_seen": 43344, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.6111111111111112, |
| "grad_norm": 0.9683464169502258, |
| "learning_rate": 4.0138888888888894e-05, |
| "loss": 0.2175, |
| "num_input_tokens_seen": 44096, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.6388888888888888, |
| "grad_norm": 4.231605529785156, |
| "learning_rate": 4.0833333333333334e-05, |
| "loss": 0.2541, |
| "num_input_tokens_seen": 44832, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 3.3105416297912598, |
| "learning_rate": 4.152777777777778e-05, |
| "loss": 0.3017, |
| "num_input_tokens_seen": 45568, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6944444444444444, |
| "grad_norm": 5.141078472137451, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.3072, |
| "num_input_tokens_seen": 46336, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.7222222222222223, |
| "grad_norm": 6.590628147125244, |
| "learning_rate": 4.291666666666667e-05, |
| "loss": 0.2321, |
| "num_input_tokens_seen": 47104, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.4629476070404053, |
| "learning_rate": 4.3611111111111116e-05, |
| "loss": 0.3229, |
| "num_input_tokens_seen": 47888, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.34415674209594727, |
| "learning_rate": 4.4305555555555556e-05, |
| "loss": 0.2341, |
| "num_input_tokens_seen": 48672, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.8055555555555556, |
| "grad_norm": 1.390702724456787, |
| "learning_rate": 4.5e-05, |
| "loss": 0.241, |
| "num_input_tokens_seen": 49440, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 0.3445907533168793, |
| "learning_rate": 4.569444444444444e-05, |
| "loss": 0.2414, |
| "num_input_tokens_seen": 50192, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.8611111111111112, |
| "grad_norm": 0.7393403649330139, |
| "learning_rate": 4.638888888888889e-05, |
| "loss": 0.232, |
| "num_input_tokens_seen": 50992, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.7771939635276794, |
| "learning_rate": 4.708333333333334e-05, |
| "loss": 0.2534, |
| "num_input_tokens_seen": 51744, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.9166666666666665, |
| "grad_norm": 0.13477805256843567, |
| "learning_rate": 4.7777777777777784e-05, |
| "loss": 0.2324, |
| "num_input_tokens_seen": 52512, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 0.6720524430274963, |
| "learning_rate": 4.8472222222222224e-05, |
| "loss": 0.2067, |
| "num_input_tokens_seen": 53296, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.9722222222222223, |
| "grad_norm": 0.5370499491691589, |
| "learning_rate": 4.9166666666666665e-05, |
| "loss": 0.173, |
| "num_input_tokens_seen": 54080, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.276262044906616, |
| "learning_rate": 4.986111111111111e-05, |
| "loss": 0.4265, |
| "num_input_tokens_seen": 54832, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.25172701478004456, |
| "eval_runtime": 0.8641, |
| "eval_samples_per_second": 46.29, |
| "eval_steps_per_second": 23.145, |
| "num_input_tokens_seen": 54832, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.0277777777777777, |
| "grad_norm": 0.8999596238136292, |
| "learning_rate": 4.99998119647914e-05, |
| "loss": 0.2602, |
| "num_input_tokens_seen": 55600, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.0555555555555554, |
| "grad_norm": 0.34948647022247314, |
| "learning_rate": 4.999904807660428e-05, |
| "loss": 0.2329, |
| "num_input_tokens_seen": 56352, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 0.5982630252838135, |
| "learning_rate": 4.999769660117901e-05, |
| "loss": 0.264, |
| "num_input_tokens_seen": 57136, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 0.5527498722076416, |
| "learning_rate": 4.999575757028119e-05, |
| "loss": 0.227, |
| "num_input_tokens_seen": 57856, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.138888888888889, |
| "grad_norm": 0.48654255270957947, |
| "learning_rate": 4.9993231029486544e-05, |
| "loss": 0.2217, |
| "num_input_tokens_seen": 58592, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.1666666666666665, |
| "grad_norm": 0.8106479048728943, |
| "learning_rate": 4.999011703817986e-05, |
| "loss": 0.2329, |
| "num_input_tokens_seen": 59328, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.1944444444444446, |
| "grad_norm": 0.26983633637428284, |
| "learning_rate": 4.9986415669553586e-05, |
| "loss": 0.2497, |
| "num_input_tokens_seen": 60064, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 3.985640287399292, |
| "learning_rate": 4.998212701060612e-05, |
| "loss": 0.2642, |
| "num_input_tokens_seen": 60800, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.9668533205986023, |
| "learning_rate": 4.997725116213973e-05, |
| "loss": 0.2568, |
| "num_input_tokens_seen": 61536, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.2777777777777777, |
| "grad_norm": 1.4461299180984497, |
| "learning_rate": 4.997178823875826e-05, |
| "loss": 0.2494, |
| "num_input_tokens_seen": 62304, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.3055555555555554, |
| "grad_norm": 0.20660564303398132, |
| "learning_rate": 4.996573836886435e-05, |
| "loss": 0.2262, |
| "num_input_tokens_seen": 63088, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.8261621594429016, |
| "learning_rate": 4.995910169465646e-05, |
| "loss": 0.2546, |
| "num_input_tokens_seen": 63824, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.361111111111111, |
| "grad_norm": 0.1395564079284668, |
| "learning_rate": 4.9951878372125547e-05, |
| "loss": 0.2324, |
| "num_input_tokens_seen": 64608, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.388888888888889, |
| "grad_norm": 0.7593284249305725, |
| "learning_rate": 4.994406857105136e-05, |
| "loss": 0.2425, |
| "num_input_tokens_seen": 65360, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.4166666666666665, |
| "grad_norm": 0.5143575072288513, |
| "learning_rate": 4.993567247499845e-05, |
| "loss": 0.243, |
| "num_input_tokens_seen": 66144, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.6070240139961243, |
| "learning_rate": 4.9926690281311904e-05, |
| "loss": 0.2422, |
| "num_input_tokens_seen": 66896, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.4722222222222223, |
| "grad_norm": 0.119643434882164, |
| "learning_rate": 4.9917122201112656e-05, |
| "loss": 0.2333, |
| "num_input_tokens_seen": 67648, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.589148223400116, |
| "learning_rate": 4.9906968459292524e-05, |
| "loss": 0.2297, |
| "num_input_tokens_seen": 68416, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.5277777777777777, |
| "grad_norm": 0.4487152695655823, |
| "learning_rate": 4.9896229294508976e-05, |
| "loss": 0.2168, |
| "num_input_tokens_seen": 69184, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 0.43815767765045166, |
| "learning_rate": 4.988490495917947e-05, |
| "loss": 0.2522, |
| "num_input_tokens_seen": 69984, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.5833333333333335, |
| "grad_norm": 0.4412115216255188, |
| "learning_rate": 4.987299571947553e-05, |
| "loss": 0.2473, |
| "num_input_tokens_seen": 70752, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.611111111111111, |
| "grad_norm": 0.0880437046289444, |
| "learning_rate": 4.9860501855316514e-05, |
| "loss": 0.2256, |
| "num_input_tokens_seen": 71504, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.638888888888889, |
| "grad_norm": 0.30497997999191284, |
| "learning_rate": 4.9847423660363e-05, |
| "loss": 0.1775, |
| "num_input_tokens_seen": 72224, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.5846831798553467, |
| "learning_rate": 4.983376144200992e-05, |
| "loss": 0.2647, |
| "num_input_tokens_seen": 72976, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.6944444444444446, |
| "grad_norm": 0.2081085443496704, |
| "learning_rate": 4.981951552137929e-05, |
| "loss": 0.3079, |
| "num_input_tokens_seen": 73728, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.7222222222222223, |
| "grad_norm": 0.12143510580062866, |
| "learning_rate": 4.980468623331273e-05, |
| "loss": 0.237, |
| "num_input_tokens_seen": 74496, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.0849996879696846, |
| "learning_rate": 4.978927392636351e-05, |
| "loss": 0.2329, |
| "num_input_tokens_seen": 75280, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.11367756128311157, |
| "learning_rate": 4.9773278962788436e-05, |
| "loss": 0.2298, |
| "num_input_tokens_seen": 76048, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.8055555555555554, |
| "grad_norm": 0.08262645453214645, |
| "learning_rate": 4.975670171853926e-05, |
| "loss": 0.24, |
| "num_input_tokens_seen": 76832, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.8333333333333335, |
| "grad_norm": 0.09589759260416031, |
| "learning_rate": 4.973954258325392e-05, |
| "loss": 0.2383, |
| "num_input_tokens_seen": 77568, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.861111111111111, |
| "grad_norm": 0.12209156900644302, |
| "learning_rate": 4.972180196024733e-05, |
| "loss": 0.2298, |
| "num_input_tokens_seen": 78352, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.27318504452705383, |
| "learning_rate": 4.97034802665019e-05, |
| "loss": 0.2255, |
| "num_input_tokens_seen": 79152, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 0.5788432955741882, |
| "learning_rate": 4.9684577932657786e-05, |
| "loss": 0.2326, |
| "num_input_tokens_seen": 79904, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.9444444444444446, |
| "grad_norm": 0.4293730556964874, |
| "learning_rate": 4.966509540300269e-05, |
| "loss": 0.269, |
| "num_input_tokens_seen": 80624, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.9722222222222223, |
| "grad_norm": 0.09981006383895874, |
| "learning_rate": 4.9645033135461494e-05, |
| "loss": 0.2429, |
| "num_input_tokens_seen": 81376, |
| "step": 535 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.46701985597610474, |
| "learning_rate": 4.962439160158544e-05, |
| "loss": 0.2294, |
| "num_input_tokens_seen": 82160, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.23996683955192566, |
| "eval_runtime": 0.8775, |
| "eval_samples_per_second": 45.586, |
| "eval_steps_per_second": 22.793, |
| "num_input_tokens_seen": 82160, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.0277777777777777, |
| "grad_norm": 0.09257233142852783, |
| "learning_rate": 4.960317128654108e-05, |
| "loss": 0.2281, |
| "num_input_tokens_seen": 82880, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.0555555555555554, |
| "grad_norm": 0.19769765436649323, |
| "learning_rate": 4.958137268909887e-05, |
| "loss": 0.1958, |
| "num_input_tokens_seen": 83632, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.0833333333333335, |
| "grad_norm": 0.11292538791894913, |
| "learning_rate": 4.9558996321621405e-05, |
| "loss": 0.2951, |
| "num_input_tokens_seen": 84416, |
| "step": 555 |
| }, |
| { |
| "epoch": 3.111111111111111, |
| "grad_norm": 0.30781084299087524, |
| "learning_rate": 4.953604271005144e-05, |
| "loss": 0.2366, |
| "num_input_tokens_seen": 85184, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.138888888888889, |
| "grad_norm": 0.363540917634964, |
| "learning_rate": 4.951251239389948e-05, |
| "loss": 0.2303, |
| "num_input_tokens_seen": 85936, |
| "step": 565 |
| }, |
| { |
| "epoch": 3.1666666666666665, |
| "grad_norm": 0.053299520164728165, |
| "learning_rate": 4.9488405926231144e-05, |
| "loss": 0.2306, |
| "num_input_tokens_seen": 86688, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.1944444444444446, |
| "grad_norm": 0.030355053022503853, |
| "learning_rate": 4.946372387365409e-05, |
| "loss": 0.2238, |
| "num_input_tokens_seen": 87424, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.2222222222222223, |
| "grad_norm": 0.057436492294073105, |
| "learning_rate": 4.943846681630479e-05, |
| "loss": 0.214, |
| "num_input_tokens_seen": 88192, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.23630383610725403, |
| "learning_rate": 4.941263534783482e-05, |
| "loss": 0.2115, |
| "num_input_tokens_seen": 88976, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.2777777777777777, |
| "grad_norm": 0.092474564909935, |
| "learning_rate": 4.9386230075396964e-05, |
| "loss": 0.2541, |
| "num_input_tokens_seen": 89744, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.3055555555555554, |
| "grad_norm": 0.06676716357469559, |
| "learning_rate": 4.9359251619630886e-05, |
| "loss": 0.2639, |
| "num_input_tokens_seen": 90496, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.2717723250389099, |
| "learning_rate": 4.933170061464858e-05, |
| "loss": 0.2006, |
| "num_input_tokens_seen": 91280, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.361111111111111, |
| "grad_norm": 0.05707673728466034, |
| "learning_rate": 4.930357770801947e-05, |
| "loss": 0.225, |
| "num_input_tokens_seen": 92048, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.388888888888889, |
| "grad_norm": 0.0571831539273262, |
| "learning_rate": 4.9274883560755156e-05, |
| "loss": 0.2418, |
| "num_input_tokens_seen": 92800, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.4166666666666665, |
| "grad_norm": 0.26747772097587585, |
| "learning_rate": 4.924561884729391e-05, |
| "loss": 0.24, |
| "num_input_tokens_seen": 93568, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.4444444444444446, |
| "grad_norm": 0.026865195482969284, |
| "learning_rate": 4.921578425548482e-05, |
| "loss": 0.234, |
| "num_input_tokens_seen": 94304, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "grad_norm": 0.3754294216632843, |
| "learning_rate": 4.9185380486571595e-05, |
| "loss": 0.2314, |
| "num_input_tokens_seen": 95056, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.11914645880460739, |
| "learning_rate": 4.915440825517612e-05, |
| "loss": 0.2524, |
| "num_input_tokens_seen": 95856, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.5277777777777777, |
| "grad_norm": 0.361465185880661, |
| "learning_rate": 4.912286828928162e-05, |
| "loss": 0.254, |
| "num_input_tokens_seen": 96608, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 0.2721046507358551, |
| "learning_rate": 4.909076133021557e-05, |
| "loss": 0.2339, |
| "num_input_tokens_seen": 97344, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.5833333333333335, |
| "grad_norm": 0.2542001008987427, |
| "learning_rate": 4.9058088132632306e-05, |
| "loss": 0.224, |
| "num_input_tokens_seen": 98096, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.611111111111111, |
| "grad_norm": 0.2439340353012085, |
| "learning_rate": 4.9024849464495215e-05, |
| "loss": 0.2386, |
| "num_input_tokens_seen": 98864, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.638888888888889, |
| "grad_norm": 0.02642286755144596, |
| "learning_rate": 4.8991046107058735e-05, |
| "loss": 0.2301, |
| "num_input_tokens_seen": 99632, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 0.04620293900370598, |
| "learning_rate": 4.895667885484997e-05, |
| "loss": 0.2337, |
| "num_input_tokens_seen": 100400, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.6944444444444446, |
| "grad_norm": 0.042576540261507034, |
| "learning_rate": 4.892174851565004e-05, |
| "loss": 0.2402, |
| "num_input_tokens_seen": 101168, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.7222222222222223, |
| "grad_norm": 0.2519048750400543, |
| "learning_rate": 4.8886255910475054e-05, |
| "loss": 0.2318, |
| "num_input_tokens_seen": 101936, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.01728709600865841, |
| "learning_rate": 4.885020187355687e-05, |
| "loss": 0.2338, |
| "num_input_tokens_seen": 102656, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.7777777777777777, |
| "grad_norm": 0.23266607522964478, |
| "learning_rate": 4.881358725232342e-05, |
| "loss": 0.2338, |
| "num_input_tokens_seen": 103424, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.8055555555555554, |
| "grad_norm": 0.040413811802864075, |
| "learning_rate": 4.877641290737884e-05, |
| "loss": 0.2254, |
| "num_input_tokens_seen": 104208, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.8333333333333335, |
| "grad_norm": 0.050276659429073334, |
| "learning_rate": 4.873867971248324e-05, |
| "loss": 0.2327, |
| "num_input_tokens_seen": 104960, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.861111111111111, |
| "grad_norm": 0.03541669622063637, |
| "learning_rate": 4.870038855453213e-05, |
| "loss": 0.2327, |
| "num_input_tokens_seen": 105744, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 0.05416063964366913, |
| "learning_rate": 4.866154033353561e-05, |
| "loss": 0.2289, |
| "num_input_tokens_seen": 106512, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.9166666666666665, |
| "grad_norm": 0.015437111258506775, |
| "learning_rate": 4.86221359625972e-05, |
| "loss": 0.2532, |
| "num_input_tokens_seen": 107280, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.9444444444444446, |
| "grad_norm": 0.2228126972913742, |
| "learning_rate": 4.858217636789241e-05, |
| "loss": 0.2275, |
| "num_input_tokens_seen": 108080, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.9722222222222223, |
| "grad_norm": 0.2948314845561981, |
| "learning_rate": 4.854166248864689e-05, |
| "loss": 0.248, |
| "num_input_tokens_seen": 108848, |
| "step": 715 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.0426083505153656, |
| "learning_rate": 4.850059527711444e-05, |
| "loss": 0.2376, |
| "num_input_tokens_seen": 109632, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.23617739975452423, |
| "eval_runtime": 0.8738, |
| "eval_samples_per_second": 45.779, |
| "eval_steps_per_second": 22.89, |
| "num_input_tokens_seen": 109632, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.027777777777778, |
| "grad_norm": 0.037765853106975555, |
| "learning_rate": 4.84589756985546e-05, |
| "loss": 0.2697, |
| "num_input_tokens_seen": 110416, |
| "step": 725 |
| }, |
| { |
| "epoch": 4.055555555555555, |
| "grad_norm": 0.02199905924499035, |
| "learning_rate": 4.8416804731209945e-05, |
| "loss": 0.23, |
| "num_input_tokens_seen": 111200, |
| "step": 730 |
| }, |
| { |
| "epoch": 4.083333333333333, |
| "grad_norm": 0.2124335765838623, |
| "learning_rate": 4.8374083366283096e-05, |
| "loss": 0.2238, |
| "num_input_tokens_seen": 111936, |
| "step": 735 |
| }, |
| { |
| "epoch": 4.111111111111111, |
| "grad_norm": 0.22587046027183533, |
| "learning_rate": 4.833081260791345e-05, |
| "loss": 0.2309, |
| "num_input_tokens_seen": 112704, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.138888888888889, |
| "grad_norm": 0.03960296884179115, |
| "learning_rate": 4.828699347315356e-05, |
| "loss": 0.2446, |
| "num_input_tokens_seen": 113456, |
| "step": 745 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 0.056854937225580215, |
| "learning_rate": 4.82426269919452e-05, |
| "loss": 0.2357, |
| "num_input_tokens_seen": 114224, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.194444444444445, |
| "grad_norm": 0.19557850062847137, |
| "learning_rate": 4.8197714207095205e-05, |
| "loss": 0.207, |
| "num_input_tokens_seen": 114976, |
| "step": 755 |
| }, |
| { |
| "epoch": 4.222222222222222, |
| "grad_norm": 0.20211917161941528, |
| "learning_rate": 4.815225617425095e-05, |
| "loss": 0.226, |
| "num_input_tokens_seen": 115728, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.057014793157577515, |
| "learning_rate": 4.8106253961875506e-05, |
| "loss": 0.2172, |
| "num_input_tokens_seen": 116496, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.277777777777778, |
| "grad_norm": 0.0766286551952362, |
| "learning_rate": 4.805970865122257e-05, |
| "loss": 0.2233, |
| "num_input_tokens_seen": 117248, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.305555555555555, |
| "grad_norm": 0.3298178017139435, |
| "learning_rate": 4.8012621336311016e-05, |
| "loss": 0.2762, |
| "num_input_tokens_seen": 118000, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.333333333333333, |
| "grad_norm": 0.04625969007611275, |
| "learning_rate": 4.7964993123899195e-05, |
| "loss": 0.2508, |
| "num_input_tokens_seen": 118768, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.361111111111111, |
| "grad_norm": 0.037253882735967636, |
| "learning_rate": 4.791682513345892e-05, |
| "loss": 0.205, |
| "num_input_tokens_seen": 119520, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.388888888888889, |
| "grad_norm": 0.24698075652122498, |
| "learning_rate": 4.786811849714918e-05, |
| "loss": 0.2444, |
| "num_input_tokens_seen": 120288, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.416666666666667, |
| "grad_norm": 0.02930288575589657, |
| "learning_rate": 4.781887435978947e-05, |
| "loss": 0.24, |
| "num_input_tokens_seen": 121040, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.2211817353963852, |
| "learning_rate": 4.776909387883292e-05, |
| "loss": 0.232, |
| "num_input_tokens_seen": 121824, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.472222222222222, |
| "grad_norm": 0.04251676797866821, |
| "learning_rate": 4.771877822433911e-05, |
| "loss": 0.2336, |
| "num_input_tokens_seen": 122592, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.2373417764902115, |
| "learning_rate": 4.766792857894652e-05, |
| "loss": 0.2232, |
| "num_input_tokens_seen": 123360, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.527777777777778, |
| "grad_norm": 0.09388674795627594, |
| "learning_rate": 4.761654613784477e-05, |
| "loss": 0.2362, |
| "num_input_tokens_seen": 124128, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.555555555555555, |
| "grad_norm": 0.26708221435546875, |
| "learning_rate": 4.756463210874652e-05, |
| "loss": 0.2136, |
| "num_input_tokens_seen": 124864, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 0.7205080986022949, |
| "learning_rate": 4.751218771185906e-05, |
| "loss": 0.2201, |
| "num_input_tokens_seen": 125632, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.611111111111111, |
| "grad_norm": 0.19915883243083954, |
| "learning_rate": 4.745921417985566e-05, |
| "loss": 0.2112, |
| "num_input_tokens_seen": 126384, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.638888888888889, |
| "grad_norm": 0.1531069576740265, |
| "learning_rate": 4.740571275784659e-05, |
| "loss": 0.2945, |
| "num_input_tokens_seen": 127152, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.2744501829147339, |
| "learning_rate": 4.735168470334984e-05, |
| "loss": 0.2561, |
| "num_input_tokens_seen": 127920, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.694444444444445, |
| "grad_norm": 0.21660907566547394, |
| "learning_rate": 4.729713128626158e-05, |
| "loss": 0.248, |
| "num_input_tokens_seen": 128688, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.722222222222222, |
| "grad_norm": 0.06808862090110779, |
| "learning_rate": 4.72420537888263e-05, |
| "loss": 0.2383, |
| "num_input_tokens_seen": 129456, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.04663322493433952, |
| "learning_rate": 4.7186453505606676e-05, |
| "loss": 0.2379, |
| "num_input_tokens_seen": 130208, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.777777777777778, |
| "grad_norm": 0.22233685851097107, |
| "learning_rate": 4.713033174345314e-05, |
| "loss": 0.2327, |
| "num_input_tokens_seen": 130960, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.805555555555555, |
| "grad_norm": 0.234144389629364, |
| "learning_rate": 4.707368982147318e-05, |
| "loss": 0.2366, |
| "num_input_tokens_seen": 131712, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.833333333333333, |
| "grad_norm": 0.22509251534938812, |
| "learning_rate": 4.701652907100029e-05, |
| "loss": 0.2256, |
| "num_input_tokens_seen": 132496, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "grad_norm": 0.26232126355171204, |
| "learning_rate": 4.695885083556275e-05, |
| "loss": 0.2321, |
| "num_input_tokens_seen": 133280, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "grad_norm": 0.036304328590631485, |
| "learning_rate": 4.6900656470851964e-05, |
| "loss": 0.2288, |
| "num_input_tokens_seen": 134064, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.916666666666667, |
| "grad_norm": 0.23388998210430145, |
| "learning_rate": 4.684194734469067e-05, |
| "loss": 0.2301, |
| "num_input_tokens_seen": 134816, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.944444444444445, |
| "grad_norm": 0.28110790252685547, |
| "learning_rate": 4.678272483700074e-05, |
| "loss": 0.2361, |
| "num_input_tokens_seen": 135616, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.972222222222222, |
| "grad_norm": 0.03570658713579178, |
| "learning_rate": 4.672299033977076e-05, |
| "loss": 0.2297, |
| "num_input_tokens_seen": 136368, |
| "step": 895 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.22953547537326813, |
| "learning_rate": 4.6662745257023325e-05, |
| "loss": 0.2273, |
| "num_input_tokens_seen": 137120, |
| "step": 900 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.2374284714460373, |
| "eval_runtime": 0.8812, |
| "eval_samples_per_second": 45.392, |
| "eval_steps_per_second": 22.696, |
| "num_input_tokens_seen": 137120, |
| "step": 900 |
| }, |
| { |
| "epoch": 5.027777777777778, |
| "grad_norm": 0.05738260596990585, |
| "learning_rate": 4.660199100478202e-05, |
| "loss": 0.2263, |
| "num_input_tokens_seen": 137888, |
| "step": 905 |
| }, |
| { |
| "epoch": 5.055555555555555, |
| "grad_norm": 0.3403209447860718, |
| "learning_rate": 4.6540729011038146e-05, |
| "loss": 0.2385, |
| "num_input_tokens_seen": 138672, |
| "step": 910 |
| }, |
| { |
| "epoch": 5.083333333333333, |
| "grad_norm": 0.082719586789608, |
| "learning_rate": 4.6478960715717176e-05, |
| "loss": 0.2097, |
| "num_input_tokens_seen": 139440, |
| "step": 915 |
| }, |
| { |
| "epoch": 5.111111111111111, |
| "grad_norm": 0.078125961124897, |
| "learning_rate": 4.641668757064486e-05, |
| "loss": 0.2426, |
| "num_input_tokens_seen": 140176, |
| "step": 920 |
| }, |
| { |
| "epoch": 5.138888888888889, |
| "grad_norm": 0.23293739557266235, |
| "learning_rate": 4.6353911039513145e-05, |
| "loss": 0.2398, |
| "num_input_tokens_seen": 140912, |
| "step": 925 |
| }, |
| { |
| "epoch": 5.166666666666667, |
| "grad_norm": 0.27510783076286316, |
| "learning_rate": 4.6290632597845755e-05, |
| "loss": 0.249, |
| "num_input_tokens_seen": 141680, |
| "step": 930 |
| }, |
| { |
| "epoch": 5.194444444444445, |
| "grad_norm": 0.029512202367186546, |
| "learning_rate": 4.622685373296353e-05, |
| "loss": 0.2285, |
| "num_input_tokens_seen": 142432, |
| "step": 935 |
| }, |
| { |
| "epoch": 5.222222222222222, |
| "grad_norm": 0.02141384780406952, |
| "learning_rate": 4.61625759439494e-05, |
| "loss": 0.2402, |
| "num_input_tokens_seen": 143216, |
| "step": 940 |
| }, |
| { |
| "epoch": 5.25, |
| "grad_norm": 0.2061186283826828, |
| "learning_rate": 4.609780074161327e-05, |
| "loss": 0.2217, |
| "num_input_tokens_seen": 143968, |
| "step": 945 |
| }, |
| { |
| "epoch": 5.277777777777778, |
| "grad_norm": 0.2300575077533722, |
| "learning_rate": 4.603252964845638e-05, |
| "loss": 0.2381, |
| "num_input_tokens_seen": 144768, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.305555555555555, |
| "grad_norm": 0.044719330966472626, |
| "learning_rate": 4.5966764198635606e-05, |
| "loss": 0.2356, |
| "num_input_tokens_seen": 145552, |
| "step": 955 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 0.02483515627682209, |
| "learning_rate": 4.590050593792736e-05, |
| "loss": 0.2379, |
| "num_input_tokens_seen": 146288, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.361111111111111, |
| "grad_norm": 0.2347053736448288, |
| "learning_rate": 4.583375642369129e-05, |
| "loss": 0.2357, |
| "num_input_tokens_seen": 147040, |
| "step": 965 |
| }, |
| { |
| "epoch": 5.388888888888889, |
| "grad_norm": 0.05795735865831375, |
| "learning_rate": 4.5766517224833637e-05, |
| "loss": 0.2252, |
| "num_input_tokens_seen": 147824, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.416666666666667, |
| "grad_norm": 0.06415160000324249, |
| "learning_rate": 4.569878992177039e-05, |
| "loss": 0.218, |
| "num_input_tokens_seen": 148560, |
| "step": 975 |
| }, |
| { |
| "epoch": 5.444444444444445, |
| "grad_norm": 0.06607173383235931, |
| "learning_rate": 4.5630576106390114e-05, |
| "loss": 0.2503, |
| "num_input_tokens_seen": 149344, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.472222222222222, |
| "grad_norm": 0.05142849683761597, |
| "learning_rate": 4.556187738201656e-05, |
| "loss": 0.2226, |
| "num_input_tokens_seen": 150128, |
| "step": 985 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.2409844547510147, |
| "learning_rate": 4.549269536337095e-05, |
| "loss": 0.2328, |
| "num_input_tokens_seen": 150880, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.527777777777778, |
| "grad_norm": 0.22324684262275696, |
| "learning_rate": 4.5423031676534065e-05, |
| "loss": 0.2514, |
| "num_input_tokens_seen": 151648, |
| "step": 995 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.21208706498146057, |
| "learning_rate": 4.535288795890798e-05, |
| "loss": 0.238, |
| "num_input_tokens_seen": 152416, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.583333333333333, |
| "grad_norm": 0.011771623976528645, |
| "learning_rate": 4.528226585917761e-05, |
| "loss": 0.236, |
| "num_input_tokens_seen": 153184, |
| "step": 1005 |
| }, |
| { |
| "epoch": 5.611111111111111, |
| "grad_norm": 0.19553421437740326, |
| "learning_rate": 4.521116703727193e-05, |
| "loss": 0.2422, |
| "num_input_tokens_seen": 153936, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.638888888888889, |
| "grad_norm": 0.061913229525089264, |
| "learning_rate": 4.5139593164324986e-05, |
| "loss": 0.2277, |
| "num_input_tokens_seen": 154720, |
| "step": 1015 |
| }, |
| { |
| "epoch": 5.666666666666667, |
| "grad_norm": 0.18774497509002686, |
| "learning_rate": 4.506754592263662e-05, |
| "loss": 0.2296, |
| "num_input_tokens_seen": 155488, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.694444444444445, |
| "grad_norm": 0.06410039216279984, |
| "learning_rate": 4.49950270056329e-05, |
| "loss": 0.2196, |
| "num_input_tokens_seen": 156272, |
| "step": 1025 |
| }, |
| { |
| "epoch": 5.722222222222222, |
| "grad_norm": 0.21270763874053955, |
| "learning_rate": 4.4922038117826334e-05, |
| "loss": 0.2323, |
| "num_input_tokens_seen": 157040, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.75, |
| "grad_norm": 0.01843346282839775, |
| "learning_rate": 4.48485809747758e-05, |
| "loss": 0.2386, |
| "num_input_tokens_seen": 157792, |
| "step": 1035 |
| }, |
| { |
| "epoch": 5.777777777777778, |
| "grad_norm": 0.20497548580169678, |
| "learning_rate": 4.477465730304624e-05, |
| "loss": 0.2322, |
| "num_input_tokens_seen": 158544, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.805555555555555, |
| "grad_norm": 0.1923503577709198, |
| "learning_rate": 4.4700268840168045e-05, |
| "loss": 0.238, |
| "num_input_tokens_seen": 159280, |
| "step": 1045 |
| }, |
| { |
| "epoch": 5.833333333333333, |
| "grad_norm": 0.021967420354485512, |
| "learning_rate": 4.462541733459628e-05, |
| "loss": 0.2255, |
| "num_input_tokens_seen": 160016, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.861111111111111, |
| "grad_norm": 0.19093094766139984, |
| "learning_rate": 4.455010454566947e-05, |
| "loss": 0.2254, |
| "num_input_tokens_seen": 160768, |
| "step": 1055 |
| }, |
| { |
| "epoch": 5.888888888888889, |
| "grad_norm": 0.19332638382911682, |
| "learning_rate": 4.447433224356839e-05, |
| "loss": 0.2382, |
| "num_input_tokens_seen": 161520, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.916666666666667, |
| "grad_norm": 0.023951267823576927, |
| "learning_rate": 4.439810220927436e-05, |
| "loss": 0.2359, |
| "num_input_tokens_seen": 162304, |
| "step": 1065 |
| }, |
| { |
| "epoch": 5.944444444444445, |
| "grad_norm": 0.04824138060212135, |
| "learning_rate": 4.432141623452743e-05, |
| "loss": 0.2315, |
| "num_input_tokens_seen": 163088, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.972222222222222, |
| "grad_norm": 0.18069137632846832, |
| "learning_rate": 4.4244276121784195e-05, |
| "loss": 0.2296, |
| "num_input_tokens_seen": 163856, |
| "step": 1075 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.204057514667511, |
| "learning_rate": 4.416668368417556e-05, |
| "loss": 0.2282, |
| "num_input_tokens_seen": 164592, |
| "step": 1080 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.2412218302488327, |
| "eval_runtime": 0.8694, |
| "eval_samples_per_second": 46.006, |
| "eval_steps_per_second": 23.003, |
| "num_input_tokens_seen": 164592, |
| "step": 1080 |
| }, |
| { |
| "epoch": 6.027777777777778, |
| "grad_norm": 0.03645794838666916, |
| "learning_rate": 4.408864074546401e-05, |
| "loss": 0.2158, |
| "num_input_tokens_seen": 165344, |
| "step": 1085 |
| }, |
| { |
| "epoch": 6.055555555555555, |
| "grad_norm": 0.030243530869483948, |
| "learning_rate": 4.401014914000078e-05, |
| "loss": 0.2433, |
| "num_input_tokens_seen": 166112, |
| "step": 1090 |
| }, |
| { |
| "epoch": 6.083333333333333, |
| "grad_norm": 0.21185517311096191, |
| "learning_rate": 4.393121071268274e-05, |
| "loss": 0.2346, |
| "num_input_tokens_seen": 166880, |
| "step": 1095 |
| }, |
| { |
| "epoch": 6.111111111111111, |
| "grad_norm": 0.1881146878004074, |
| "learning_rate": 4.3851827318909036e-05, |
| "loss": 0.232, |
| "num_input_tokens_seen": 167648, |
| "step": 1100 |
| }, |
| { |
| "epoch": 6.138888888888889, |
| "grad_norm": 0.2051820009946823, |
| "learning_rate": 4.377200082453749e-05, |
| "loss": 0.2136, |
| "num_input_tokens_seen": 168432, |
| "step": 1105 |
| }, |
| { |
| "epoch": 6.166666666666667, |
| "grad_norm": 0.061713505536317825, |
| "learning_rate": 4.36917331058407e-05, |
| "loss": 0.2197, |
| "num_input_tokens_seen": 169168, |
| "step": 1110 |
| }, |
| { |
| "epoch": 6.194444444444445, |
| "grad_norm": 0.23004846274852753, |
| "learning_rate": 4.361102604946201e-05, |
| "loss": 0.2412, |
| "num_input_tokens_seen": 169936, |
| "step": 1115 |
| }, |
| { |
| "epoch": 6.222222222222222, |
| "grad_norm": 0.1857406497001648, |
| "learning_rate": 4.3529881552371096e-05, |
| "loss": 0.2218, |
| "num_input_tokens_seen": 170688, |
| "step": 1120 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.04798297584056854, |
| "learning_rate": 4.344830152181941e-05, |
| "loss": 0.2451, |
| "num_input_tokens_seen": 171440, |
| "step": 1125 |
| }, |
| { |
| "epoch": 6.277777777777778, |
| "grad_norm": 0.22263100743293762, |
| "learning_rate": 4.336628787529538e-05, |
| "loss": 0.2434, |
| "num_input_tokens_seen": 172192, |
| "step": 1130 |
| }, |
| { |
| "epoch": 6.305555555555555, |
| "grad_norm": 0.03338240459561348, |
| "learning_rate": 4.3283842540479264e-05, |
| "loss": 0.2266, |
| "num_input_tokens_seen": 172960, |
| "step": 1135 |
| }, |
| { |
| "epoch": 6.333333333333333, |
| "grad_norm": 0.05144157633185387, |
| "learning_rate": 4.320096745519793e-05, |
| "loss": 0.2276, |
| "num_input_tokens_seen": 173744, |
| "step": 1140 |
| }, |
| { |
| "epoch": 6.361111111111111, |
| "grad_norm": 0.2107207328081131, |
| "learning_rate": 4.3117664567379237e-05, |
| "loss": 0.2437, |
| "num_input_tokens_seen": 174464, |
| "step": 1145 |
| }, |
| { |
| "epoch": 6.388888888888889, |
| "grad_norm": 0.20211313664913177, |
| "learning_rate": 4.303393583500628e-05, |
| "loss": 0.2342, |
| "num_input_tokens_seen": 175200, |
| "step": 1150 |
| }, |
| { |
| "epoch": 6.416666666666667, |
| "grad_norm": 0.053128089755773544, |
| "learning_rate": 4.2949783226071406e-05, |
| "loss": 0.2257, |
| "num_input_tokens_seen": 175936, |
| "step": 1155 |
| }, |
| { |
| "epoch": 6.444444444444445, |
| "grad_norm": 0.17988252639770508, |
| "learning_rate": 4.286520871852987e-05, |
| "loss": 0.2135, |
| "num_input_tokens_seen": 176672, |
| "step": 1160 |
| }, |
| { |
| "epoch": 6.472222222222222, |
| "grad_norm": 0.027884148061275482, |
| "learning_rate": 4.278021430025343e-05, |
| "loss": 0.2257, |
| "num_input_tokens_seen": 177440, |
| "step": 1165 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.17274907231330872, |
| "learning_rate": 4.2694801968983566e-05, |
| "loss": 0.2188, |
| "num_input_tokens_seen": 178240, |
| "step": 1170 |
| }, |
| { |
| "epoch": 6.527777777777778, |
| "grad_norm": 0.25237998366355896, |
| "learning_rate": 4.260897373228456e-05, |
| "loss": 0.2585, |
| "num_input_tokens_seen": 178976, |
| "step": 1175 |
| }, |
| { |
| "epoch": 6.555555555555555, |
| "grad_norm": 0.038305509835481644, |
| "learning_rate": 4.2522731607496275e-05, |
| "loss": 0.2052, |
| "num_input_tokens_seen": 179728, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.583333333333333, |
| "grad_norm": 0.17462146282196045, |
| "learning_rate": 4.2436077621686786e-05, |
| "loss": 0.2104, |
| "num_input_tokens_seen": 180448, |
| "step": 1185 |
| }, |
| { |
| "epoch": 6.611111111111111, |
| "grad_norm": 0.056279465556144714, |
| "learning_rate": 4.234901381160469e-05, |
| "loss": 0.2497, |
| "num_input_tokens_seen": 181168, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.638888888888889, |
| "grad_norm": 0.0892854705452919, |
| "learning_rate": 4.226154222363124e-05, |
| "loss": 0.2343, |
| "num_input_tokens_seen": 181968, |
| "step": 1195 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.0540282167494297, |
| "learning_rate": 4.21736649137323e-05, |
| "loss": 0.2443, |
| "num_input_tokens_seen": 182704, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.694444444444445, |
| "grad_norm": 0.17779448628425598, |
| "learning_rate": 4.208538394740993e-05, |
| "loss": 0.2134, |
| "num_input_tokens_seen": 183456, |
| "step": 1205 |
| }, |
| { |
| "epoch": 6.722222222222222, |
| "grad_norm": 0.2300095558166504, |
| "learning_rate": 4.199670139965393e-05, |
| "loss": 0.2263, |
| "num_input_tokens_seen": 184224, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.75, |
| "grad_norm": 0.06911563873291016, |
| "learning_rate": 4.1907619354892965e-05, |
| "loss": 0.2349, |
| "num_input_tokens_seen": 184992, |
| "step": 1215 |
| }, |
| { |
| "epoch": 6.777777777777778, |
| "grad_norm": 0.05566706135869026, |
| "learning_rate": 4.1818139906945694e-05, |
| "loss": 0.2334, |
| "num_input_tokens_seen": 185728, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.805555555555555, |
| "grad_norm": 0.22851161658763885, |
| "learning_rate": 4.172826515897146e-05, |
| "loss": 0.2342, |
| "num_input_tokens_seen": 186544, |
| "step": 1225 |
| }, |
| { |
| "epoch": 6.833333333333333, |
| "grad_norm": 0.026552407070994377, |
| "learning_rate": 4.163799722342089e-05, |
| "loss": 0.2314, |
| "num_input_tokens_seen": 187296, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.861111111111111, |
| "grad_norm": 0.22173333168029785, |
| "learning_rate": 4.1547338221986266e-05, |
| "loss": 0.2378, |
| "num_input_tokens_seen": 188080, |
| "step": 1235 |
| }, |
| { |
| "epoch": 6.888888888888889, |
| "grad_norm": 0.2135874181985855, |
| "learning_rate": 4.1456290285551596e-05, |
| "loss": 0.2156, |
| "num_input_tokens_seen": 188864, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.916666666666667, |
| "grad_norm": 0.2219124585390091, |
| "learning_rate": 4.13648555541426e-05, |
| "loss": 0.2045, |
| "num_input_tokens_seen": 189648, |
| "step": 1245 |
| }, |
| { |
| "epoch": 6.944444444444445, |
| "grad_norm": 0.08830613642930984, |
| "learning_rate": 4.127303617687636e-05, |
| "loss": 0.2812, |
| "num_input_tokens_seen": 190416, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.972222222222222, |
| "grad_norm": 0.25774773955345154, |
| "learning_rate": 4.118083431191081e-05, |
| "loss": 0.2417, |
| "num_input_tokens_seen": 191152, |
| "step": 1255 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.07823996245861053, |
| "learning_rate": 4.108825212639405e-05, |
| "loss": 0.2299, |
| "num_input_tokens_seen": 191920, |
| "step": 1260 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.2371632307767868, |
| "eval_runtime": 0.8666, |
| "eval_samples_per_second": 46.155, |
| "eval_steps_per_second": 23.077, |
| "num_input_tokens_seen": 191920, |
| "step": 1260 |
| }, |
| { |
| "epoch": 7.027777777777778, |
| "grad_norm": 0.1936909556388855, |
| "learning_rate": 4.099529179641337e-05, |
| "loss": 0.2537, |
| "num_input_tokens_seen": 192688, |
| "step": 1265 |
| }, |
| { |
| "epoch": 7.055555555555555, |
| "grad_norm": 0.01822832226753235, |
| "learning_rate": 4.09019555069441e-05, |
| "loss": 0.2315, |
| "num_input_tokens_seen": 193424, |
| "step": 1270 |
| }, |
| { |
| "epoch": 7.083333333333333, |
| "grad_norm": 0.19782674312591553, |
| "learning_rate": 4.080824545179828e-05, |
| "loss": 0.2183, |
| "num_input_tokens_seen": 194192, |
| "step": 1275 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "grad_norm": 0.2261374592781067, |
| "learning_rate": 4.071416383357307e-05, |
| "loss": 0.2412, |
| "num_input_tokens_seen": 194928, |
| "step": 1280 |
| }, |
| { |
| "epoch": 7.138888888888889, |
| "grad_norm": 0.07102471590042114, |
| "learning_rate": 4.0619712863599e-05, |
| "loss": 0.2203, |
| "num_input_tokens_seen": 195696, |
| "step": 1285 |
| }, |
| { |
| "epoch": 7.166666666666667, |
| "grad_norm": 0.04602295532822609, |
| "learning_rate": 4.0524894761888e-05, |
| "loss": 0.2566, |
| "num_input_tokens_seen": 196416, |
| "step": 1290 |
| }, |
| { |
| "epoch": 7.194444444444445, |
| "grad_norm": 0.07700731605291367, |
| "learning_rate": 4.042971175708118e-05, |
| "loss": 0.2114, |
| "num_input_tokens_seen": 197184, |
| "step": 1295 |
| }, |
| { |
| "epoch": 7.222222222222222, |
| "grad_norm": 0.14833351969718933, |
| "learning_rate": 4.0334166086396484e-05, |
| "loss": 0.2186, |
| "num_input_tokens_seen": 197952, |
| "step": 1300 |
| }, |
| { |
| "epoch": 7.25, |
| "grad_norm": 0.06778162717819214, |
| "learning_rate": 4.0238259995576084e-05, |
| "loss": 0.2327, |
| "num_input_tokens_seen": 198704, |
| "step": 1305 |
| }, |
| { |
| "epoch": 7.277777777777778, |
| "grad_norm": 0.03469262644648552, |
| "learning_rate": 4.0141995738833625e-05, |
| "loss": 0.2221, |
| "num_input_tokens_seen": 199488, |
| "step": 1310 |
| }, |
| { |
| "epoch": 7.305555555555555, |
| "grad_norm": 0.06742622703313828, |
| "learning_rate": 4.0045375578801214e-05, |
| "loss": 0.2141, |
| "num_input_tokens_seen": 200224, |
| "step": 1315 |
| }, |
| { |
| "epoch": 7.333333333333333, |
| "grad_norm": 0.16787341237068176, |
| "learning_rate": 3.994840178647623e-05, |
| "loss": 0.2069, |
| "num_input_tokens_seen": 201024, |
| "step": 1320 |
| }, |
| { |
| "epoch": 7.361111111111111, |
| "grad_norm": 0.07787422835826874, |
| "learning_rate": 3.985107664116798e-05, |
| "loss": 0.2079, |
| "num_input_tokens_seen": 201792, |
| "step": 1325 |
| }, |
| { |
| "epoch": 7.388888888888889, |
| "grad_norm": 0.08745193481445312, |
| "learning_rate": 3.9753402430444116e-05, |
| "loss": 0.2555, |
| "num_input_tokens_seen": 202576, |
| "step": 1330 |
| }, |
| { |
| "epoch": 7.416666666666667, |
| "grad_norm": 0.15857690572738647, |
| "learning_rate": 3.9655381450076826e-05, |
| "loss": 0.2059, |
| "num_input_tokens_seen": 203312, |
| "step": 1335 |
| }, |
| { |
| "epoch": 7.444444444444445, |
| "grad_norm": 0.0623801089823246, |
| "learning_rate": 3.955701600398892e-05, |
| "loss": 0.2344, |
| "num_input_tokens_seen": 204096, |
| "step": 1340 |
| }, |
| { |
| "epoch": 7.472222222222222, |
| "grad_norm": 0.058826789259910583, |
| "learning_rate": 3.945830840419966e-05, |
| "loss": 0.2411, |
| "num_input_tokens_seen": 204880, |
| "step": 1345 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.23193086683750153, |
| "learning_rate": 3.935926097077045e-05, |
| "loss": 0.285, |
| "num_input_tokens_seen": 205632, |
| "step": 1350 |
| }, |
| { |
| "epoch": 7.527777777777778, |
| "grad_norm": 0.05572715774178505, |
| "learning_rate": 3.925987603175023e-05, |
| "loss": 0.2509, |
| "num_input_tokens_seen": 206384, |
| "step": 1355 |
| }, |
| { |
| "epoch": 7.555555555555555, |
| "grad_norm": 0.16730906069278717, |
| "learning_rate": 3.916015592312082e-05, |
| "loss": 0.2345, |
| "num_input_tokens_seen": 207104, |
| "step": 1360 |
| }, |
| { |
| "epoch": 7.583333333333333, |
| "grad_norm": 0.08506715297698975, |
| "learning_rate": 3.9060102988742e-05, |
| "loss": 0.2202, |
| "num_input_tokens_seen": 207888, |
| "step": 1365 |
| }, |
| { |
| "epoch": 7.611111111111111, |
| "grad_norm": 0.0463300496339798, |
| "learning_rate": 3.8959719580296415e-05, |
| "loss": 0.2435, |
| "num_input_tokens_seen": 208656, |
| "step": 1370 |
| }, |
| { |
| "epoch": 7.638888888888889, |
| "grad_norm": 0.17269517481327057, |
| "learning_rate": 3.885900805723429e-05, |
| "loss": 0.2319, |
| "num_input_tokens_seen": 209424, |
| "step": 1375 |
| }, |
| { |
| "epoch": 7.666666666666667, |
| "grad_norm": 0.16922199726104736, |
| "learning_rate": 3.875797078671798e-05, |
| "loss": 0.2319, |
| "num_input_tokens_seen": 210176, |
| "step": 1380 |
| }, |
| { |
| "epoch": 7.694444444444445, |
| "grad_norm": 0.03644829988479614, |
| "learning_rate": 3.865661014356635e-05, |
| "loss": 0.2259, |
| "num_input_tokens_seen": 210944, |
| "step": 1385 |
| }, |
| { |
| "epoch": 7.722222222222222, |
| "grad_norm": 0.18136528134346008, |
| "learning_rate": 3.855492851019893e-05, |
| "loss": 0.2324, |
| "num_input_tokens_seen": 211680, |
| "step": 1390 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 0.21425503492355347, |
| "learning_rate": 3.8452928276579916e-05, |
| "loss": 0.2323, |
| "num_input_tokens_seen": 212432, |
| "step": 1395 |
| }, |
| { |
| "epoch": 7.777777777777778, |
| "grad_norm": 0.045595910400152206, |
| "learning_rate": 3.835061184016203e-05, |
| "loss": 0.2255, |
| "num_input_tokens_seen": 213184, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.805555555555555, |
| "grad_norm": 0.17766867578029633, |
| "learning_rate": 3.824798160583012e-05, |
| "loss": 0.2389, |
| "num_input_tokens_seen": 213952, |
| "step": 1405 |
| }, |
| { |
| "epoch": 7.833333333333333, |
| "grad_norm": 0.18407747149467468, |
| "learning_rate": 3.814503998584471e-05, |
| "loss": 0.22, |
| "num_input_tokens_seen": 214720, |
| "step": 1410 |
| }, |
| { |
| "epoch": 7.861111111111111, |
| "grad_norm": 0.04432946443557739, |
| "learning_rate": 3.804178939978517e-05, |
| "loss": 0.2157, |
| "num_input_tokens_seen": 215488, |
| "step": 1415 |
| }, |
| { |
| "epoch": 7.888888888888889, |
| "grad_norm": 0.08724919706583023, |
| "learning_rate": 3.7938232274493e-05, |
| "loss": 0.2435, |
| "num_input_tokens_seen": 216256, |
| "step": 1420 |
| }, |
| { |
| "epoch": 7.916666666666667, |
| "grad_norm": 0.1724819540977478, |
| "learning_rate": 3.783437104401469e-05, |
| "loss": 0.2059, |
| "num_input_tokens_seen": 217040, |
| "step": 1425 |
| }, |
| { |
| "epoch": 7.944444444444445, |
| "grad_norm": 0.11546836793422699, |
| "learning_rate": 3.773020814954453e-05, |
| "loss": 0.2073, |
| "num_input_tokens_seen": 217824, |
| "step": 1430 |
| }, |
| { |
| "epoch": 7.972222222222222, |
| "grad_norm": 0.1858564019203186, |
| "learning_rate": 3.762574603936725e-05, |
| "loss": 0.2652, |
| "num_input_tokens_seen": 218592, |
| "step": 1435 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.09873568266630173, |
| "learning_rate": 3.752098716880045e-05, |
| "loss": 0.2302, |
| "num_input_tokens_seen": 219344, |
| "step": 1440 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.24155128002166748, |
| "eval_runtime": 0.8836, |
| "eval_samples_per_second": 45.27, |
| "eval_steps_per_second": 22.635, |
| "num_input_tokens_seen": 219344, |
| "step": 1440 |
| }, |
| { |
| "epoch": 8.027777777777779, |
| "grad_norm": 0.22358053922653198, |
| "learning_rate": 3.74159340001369e-05, |
| "loss": 0.2488, |
| "num_input_tokens_seen": 220064, |
| "step": 1445 |
| }, |
| { |
| "epoch": 8.055555555555555, |
| "grad_norm": 0.03358878195285797, |
| "learning_rate": 3.731058900258668e-05, |
| "loss": 0.2119, |
| "num_input_tokens_seen": 220800, |
| "step": 1450 |
| }, |
| { |
| "epoch": 8.083333333333334, |
| "grad_norm": 0.04093853384256363, |
| "learning_rate": 3.7204954652219104e-05, |
| "loss": 0.2448, |
| "num_input_tokens_seen": 221568, |
| "step": 1455 |
| }, |
| { |
| "epoch": 8.11111111111111, |
| "grad_norm": 0.10109119117259979, |
| "learning_rate": 3.7099033431904575e-05, |
| "loss": 0.2349, |
| "num_input_tokens_seen": 222352, |
| "step": 1460 |
| }, |
| { |
| "epoch": 8.13888888888889, |
| "grad_norm": 0.048178769648075104, |
| "learning_rate": 3.699282783125616e-05, |
| "loss": 0.2407, |
| "num_input_tokens_seen": 223136, |
| "step": 1465 |
| }, |
| { |
| "epoch": 8.166666666666666, |
| "grad_norm": 0.07279063761234283, |
| "learning_rate": 3.688634034657115e-05, |
| "loss": 0.2239, |
| "num_input_tokens_seen": 223888, |
| "step": 1470 |
| }, |
| { |
| "epoch": 8.194444444444445, |
| "grad_norm": 0.10608810186386108, |
| "learning_rate": 3.6779573480772325e-05, |
| "loss": 0.2312, |
| "num_input_tokens_seen": 224656, |
| "step": 1475 |
| }, |
| { |
| "epoch": 8.222222222222221, |
| "grad_norm": 0.18366475403308868, |
| "learning_rate": 3.6672529743349146e-05, |
| "loss": 0.2328, |
| "num_input_tokens_seen": 225392, |
| "step": 1480 |
| }, |
| { |
| "epoch": 8.25, |
| "grad_norm": 0.0931817889213562, |
| "learning_rate": 3.656521165029879e-05, |
| "loss": 0.2184, |
| "num_input_tokens_seen": 226192, |
| "step": 1485 |
| }, |
| { |
| "epoch": 8.277777777777779, |
| "grad_norm": 0.06541720777750015, |
| "learning_rate": 3.6457621724066964e-05, |
| "loss": 0.2425, |
| "num_input_tokens_seen": 226944, |
| "step": 1490 |
| }, |
| { |
| "epoch": 8.305555555555555, |
| "grad_norm": 0.05357493460178375, |
| "learning_rate": 3.634976249348867e-05, |
| "loss": 0.2362, |
| "num_input_tokens_seen": 227680, |
| "step": 1495 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.18608205020427704, |
| "learning_rate": 3.6241636493728736e-05, |
| "loss": 0.2254, |
| "num_input_tokens_seen": 228400, |
| "step": 1500 |
| }, |
| { |
| "epoch": 8.36111111111111, |
| "grad_norm": 0.1812172532081604, |
| "learning_rate": 3.613324626622224e-05, |
| "loss": 0.233, |
| "num_input_tokens_seen": 229152, |
| "step": 1505 |
| }, |
| { |
| "epoch": 8.38888888888889, |
| "grad_norm": 0.0836367979645729, |
| "learning_rate": 3.602459435861475e-05, |
| "loss": 0.2158, |
| "num_input_tokens_seen": 229920, |
| "step": 1510 |
| }, |
| { |
| "epoch": 8.416666666666666, |
| "grad_norm": 0.18750832974910736, |
| "learning_rate": 3.591568332470249e-05, |
| "loss": 0.2053, |
| "num_input_tokens_seen": 230704, |
| "step": 1515 |
| }, |
| { |
| "epoch": 8.444444444444445, |
| "grad_norm": 0.032188136130571365, |
| "learning_rate": 3.5806515724372274e-05, |
| "loss": 0.2385, |
| "num_input_tokens_seen": 231456, |
| "step": 1520 |
| }, |
| { |
| "epoch": 8.472222222222221, |
| "grad_norm": 0.18904954195022583, |
| "learning_rate": 3.569709412354136e-05, |
| "loss": 0.229, |
| "num_input_tokens_seen": 232224, |
| "step": 1525 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 0.20962204039096832, |
| "learning_rate": 3.5587421094097115e-05, |
| "loss": 0.2309, |
| "num_input_tokens_seen": 232976, |
| "step": 1530 |
| }, |
| { |
| "epoch": 8.527777777777779, |
| "grad_norm": 0.12048184871673584, |
| "learning_rate": 3.5477499213836616e-05, |
| "loss": 0.2276, |
| "num_input_tokens_seen": 233744, |
| "step": 1535 |
| }, |
| { |
| "epoch": 8.555555555555555, |
| "grad_norm": 0.20690464973449707, |
| "learning_rate": 3.536733106640598e-05, |
| "loss": 0.2264, |
| "num_input_tokens_seen": 234480, |
| "step": 1540 |
| }, |
| { |
| "epoch": 8.583333333333334, |
| "grad_norm": 0.24027937650680542, |
| "learning_rate": 3.525691924123971e-05, |
| "loss": 0.2105, |
| "num_input_tokens_seen": 235248, |
| "step": 1545 |
| }, |
| { |
| "epoch": 8.61111111111111, |
| "grad_norm": 0.07281269878149033, |
| "learning_rate": 3.5146266333499795e-05, |
| "loss": 0.252, |
| "num_input_tokens_seen": 235984, |
| "step": 1550 |
| }, |
| { |
| "epoch": 8.63888888888889, |
| "grad_norm": 0.06904011219739914, |
| "learning_rate": 3.503537494401473e-05, |
| "loss": 0.2535, |
| "num_input_tokens_seen": 236720, |
| "step": 1555 |
| }, |
| { |
| "epoch": 8.666666666666666, |
| "grad_norm": 0.18287572264671326, |
| "learning_rate": 3.4924247679218375e-05, |
| "loss": 0.2373, |
| "num_input_tokens_seen": 237520, |
| "step": 1560 |
| }, |
| { |
| "epoch": 8.694444444444445, |
| "grad_norm": 0.21255025267601013, |
| "learning_rate": 3.481288715108868e-05, |
| "loss": 0.2362, |
| "num_input_tokens_seen": 238288, |
| "step": 1565 |
| }, |
| { |
| "epoch": 8.722222222222221, |
| "grad_norm": 0.1927708387374878, |
| "learning_rate": 3.4701295977086324e-05, |
| "loss": 0.2418, |
| "num_input_tokens_seen": 239072, |
| "step": 1570 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.11111165583133698, |
| "learning_rate": 3.4589476780093166e-05, |
| "loss": 0.2354, |
| "num_input_tokens_seen": 239840, |
| "step": 1575 |
| }, |
| { |
| "epoch": 8.777777777777779, |
| "grad_norm": 0.16939318180084229, |
| "learning_rate": 3.44774321883506e-05, |
| "loss": 0.211, |
| "num_input_tokens_seen": 240624, |
| "step": 1580 |
| }, |
| { |
| "epoch": 8.805555555555555, |
| "grad_norm": 0.20855894684791565, |
| "learning_rate": 3.436516483539781e-05, |
| "loss": 0.2498, |
| "num_input_tokens_seen": 241392, |
| "step": 1585 |
| }, |
| { |
| "epoch": 8.833333333333334, |
| "grad_norm": 0.03823915123939514, |
| "learning_rate": 3.42526773600098e-05, |
| "loss": 0.2384, |
| "num_input_tokens_seen": 242144, |
| "step": 1590 |
| }, |
| { |
| "epoch": 8.86111111111111, |
| "grad_norm": 0.09917465597391129, |
| "learning_rate": 3.4139972406135464e-05, |
| "loss": 0.2292, |
| "num_input_tokens_seen": 242928, |
| "step": 1595 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.1009097546339035, |
| "learning_rate": 3.402705262283537e-05, |
| "loss": 0.2242, |
| "num_input_tokens_seen": 243680, |
| "step": 1600 |
| }, |
| { |
| "epoch": 8.916666666666666, |
| "grad_norm": 0.06256987899541855, |
| "learning_rate": 3.39139206642195e-05, |
| "loss": 0.2125, |
| "num_input_tokens_seen": 244464, |
| "step": 1605 |
| }, |
| { |
| "epoch": 8.944444444444445, |
| "grad_norm": 0.04281611740589142, |
| "learning_rate": 3.3800579189384944e-05, |
| "loss": 0.2132, |
| "num_input_tokens_seen": 245232, |
| "step": 1610 |
| }, |
| { |
| "epoch": 8.972222222222221, |
| "grad_norm": 0.2398250699043274, |
| "learning_rate": 3.3687030862353286e-05, |
| "loss": 0.2451, |
| "num_input_tokens_seen": 245968, |
| "step": 1615 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.07234970480203629, |
| "learning_rate": 3.357327835200807e-05, |
| "loss": 0.264, |
| "num_input_tokens_seen": 246736, |
| "step": 1620 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.24827322363853455, |
| "eval_runtime": 0.8692, |
| "eval_samples_per_second": 46.018, |
| "eval_steps_per_second": 23.009, |
| "num_input_tokens_seen": 246736, |
| "step": 1620 |
| }, |
| { |
| "epoch": 9.027777777777779, |
| "grad_norm": 0.05116798356175423, |
| "learning_rate": 3.3459324332032035e-05, |
| "loss": 0.2321, |
| "num_input_tokens_seen": 247520, |
| "step": 1625 |
| }, |
| { |
| "epoch": 9.055555555555555, |
| "grad_norm": 0.07150240242481232, |
| "learning_rate": 3.3345171480844275e-05, |
| "loss": 0.2154, |
| "num_input_tokens_seen": 248304, |
| "step": 1630 |
| }, |
| { |
| "epoch": 9.083333333333334, |
| "grad_norm": 0.0657280832529068, |
| "learning_rate": 3.32308224815373e-05, |
| "loss": 0.2268, |
| "num_input_tokens_seen": 249056, |
| "step": 1635 |
| }, |
| { |
| "epoch": 9.11111111111111, |
| "grad_norm": 0.17856957018375397, |
| "learning_rate": 3.311628002181398e-05, |
| "loss": 0.2205, |
| "num_input_tokens_seen": 249824, |
| "step": 1640 |
| }, |
| { |
| "epoch": 9.13888888888889, |
| "grad_norm": 0.20213398337364197, |
| "learning_rate": 3.3001546793924285e-05, |
| "loss": 0.2317, |
| "num_input_tokens_seen": 250576, |
| "step": 1645 |
| }, |
| { |
| "epoch": 9.166666666666666, |
| "grad_norm": 0.0875818207859993, |
| "learning_rate": 3.288662549460216e-05, |
| "loss": 0.2283, |
| "num_input_tokens_seen": 251344, |
| "step": 1650 |
| }, |
| { |
| "epoch": 9.194444444444445, |
| "grad_norm": 0.05436839535832405, |
| "learning_rate": 3.277151882500199e-05, |
| "loss": 0.2213, |
| "num_input_tokens_seen": 252064, |
| "step": 1655 |
| }, |
| { |
| "epoch": 9.222222222222221, |
| "grad_norm": 0.1270546168088913, |
| "learning_rate": 3.26562294906352e-05, |
| "loss": 0.2261, |
| "num_input_tokens_seen": 252816, |
| "step": 1660 |
| }, |
| { |
| "epoch": 9.25, |
| "grad_norm": 0.2513698637485504, |
| "learning_rate": 3.254076020130664e-05, |
| "loss": 0.2196, |
| "num_input_tokens_seen": 253536, |
| "step": 1665 |
| }, |
| { |
| "epoch": 9.277777777777779, |
| "grad_norm": 0.42506569623947144, |
| "learning_rate": 3.242511367105087e-05, |
| "loss": 0.2385, |
| "num_input_tokens_seen": 254288, |
| "step": 1670 |
| }, |
| { |
| "epoch": 9.305555555555555, |
| "grad_norm": 0.32191821932792664, |
| "learning_rate": 3.230929261806842e-05, |
| "loss": 0.2056, |
| "num_input_tokens_seen": 255024, |
| "step": 1675 |
| }, |
| { |
| "epoch": 9.333333333333334, |
| "grad_norm": 0.1631459891796112, |
| "learning_rate": 3.2193299764661845e-05, |
| "loss": 0.2768, |
| "num_input_tokens_seen": 255792, |
| "step": 1680 |
| }, |
| { |
| "epoch": 9.36111111111111, |
| "grad_norm": 0.11005749553442001, |
| "learning_rate": 3.207713783717176e-05, |
| "loss": 0.2532, |
| "num_input_tokens_seen": 256560, |
| "step": 1685 |
| }, |
| { |
| "epoch": 9.38888888888889, |
| "grad_norm": 0.19168297946453094, |
| "learning_rate": 3.1960809565912794e-05, |
| "loss": 0.2341, |
| "num_input_tokens_seen": 257312, |
| "step": 1690 |
| }, |
| { |
| "epoch": 9.416666666666666, |
| "grad_norm": 0.11040922999382019, |
| "learning_rate": 3.1844317685109354e-05, |
| "loss": 0.2256, |
| "num_input_tokens_seen": 258048, |
| "step": 1695 |
| }, |
| { |
| "epoch": 9.444444444444445, |
| "grad_norm": 0.07201294600963593, |
| "learning_rate": 3.1727664932831394e-05, |
| "loss": 0.222, |
| "num_input_tokens_seen": 258816, |
| "step": 1700 |
| }, |
| { |
| "epoch": 9.472222222222221, |
| "grad_norm": 0.062040749937295914, |
| "learning_rate": 3.161085405093006e-05, |
| "loss": 0.2195, |
| "num_input_tokens_seen": 259600, |
| "step": 1705 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 0.20329245924949646, |
| "learning_rate": 3.149388778497323e-05, |
| "loss": 0.2417, |
| "num_input_tokens_seen": 260384, |
| "step": 1710 |
| }, |
| { |
| "epoch": 9.527777777777779, |
| "grad_norm": 0.13889265060424805, |
| "learning_rate": 3.137676888418099e-05, |
| "loss": 0.2158, |
| "num_input_tokens_seen": 261152, |
| "step": 1715 |
| }, |
| { |
| "epoch": 9.555555555555555, |
| "grad_norm": 0.25366413593292236, |
| "learning_rate": 3.125950010136104e-05, |
| "loss": 0.2201, |
| "num_input_tokens_seen": 261920, |
| "step": 1720 |
| }, |
| { |
| "epoch": 9.583333333333334, |
| "grad_norm": 0.2369338572025299, |
| "learning_rate": 3.114208419284391e-05, |
| "loss": 0.2369, |
| "num_input_tokens_seen": 262720, |
| "step": 1725 |
| }, |
| { |
| "epoch": 9.61111111111111, |
| "grad_norm": 0.1263570785522461, |
| "learning_rate": 3.102452391841828e-05, |
| "loss": 0.2208, |
| "num_input_tokens_seen": 263488, |
| "step": 1730 |
| }, |
| { |
| "epoch": 9.63888888888889, |
| "grad_norm": 0.18681450188159943, |
| "learning_rate": 3.090682204126604e-05, |
| "loss": 0.2471, |
| "num_input_tokens_seen": 264256, |
| "step": 1735 |
| }, |
| { |
| "epoch": 9.666666666666666, |
| "grad_norm": 0.09681422263383865, |
| "learning_rate": 3.078898132789735e-05, |
| "loss": 0.2201, |
| "num_input_tokens_seen": 265024, |
| "step": 1740 |
| }, |
| { |
| "epoch": 9.694444444444445, |
| "grad_norm": 0.23831580579280853, |
| "learning_rate": 3.0671004548085675e-05, |
| "loss": 0.2455, |
| "num_input_tokens_seen": 265776, |
| "step": 1745 |
| }, |
| { |
| "epoch": 9.722222222222221, |
| "grad_norm": 0.057987093925476074, |
| "learning_rate": 3.0552894474802584e-05, |
| "loss": 0.2518, |
| "num_input_tokens_seen": 266528, |
| "step": 1750 |
| }, |
| { |
| "epoch": 9.75, |
| "grad_norm": 0.11228296905755997, |
| "learning_rate": 3.043465388415267e-05, |
| "loss": 0.2213, |
| "num_input_tokens_seen": 267312, |
| "step": 1755 |
| }, |
| { |
| "epoch": 9.777777777777779, |
| "grad_norm": 0.24755965173244476, |
| "learning_rate": 3.0316285555308233e-05, |
| "loss": 0.2194, |
| "num_input_tokens_seen": 268080, |
| "step": 1760 |
| }, |
| { |
| "epoch": 9.805555555555555, |
| "grad_norm": 0.12314503639936447, |
| "learning_rate": 3.0197792270443982e-05, |
| "loss": 0.243, |
| "num_input_tokens_seen": 268880, |
| "step": 1765 |
| }, |
| { |
| "epoch": 9.833333333333334, |
| "grad_norm": 0.0753416046500206, |
| "learning_rate": 3.0079176814671656e-05, |
| "loss": 0.2476, |
| "num_input_tokens_seen": 269648, |
| "step": 1770 |
| }, |
| { |
| "epoch": 9.86111111111111, |
| "grad_norm": 0.2044837325811386, |
| "learning_rate": 2.9960441975974534e-05, |
| "loss": 0.2283, |
| "num_input_tokens_seen": 270400, |
| "step": 1775 |
| }, |
| { |
| "epoch": 9.88888888888889, |
| "grad_norm": 0.17016810178756714, |
| "learning_rate": 2.9841590545141906e-05, |
| "loss": 0.2242, |
| "num_input_tokens_seen": 271152, |
| "step": 1780 |
| }, |
| { |
| "epoch": 9.916666666666666, |
| "grad_norm": 0.06336583197116852, |
| "learning_rate": 2.9722625315703512e-05, |
| "loss": 0.2243, |
| "num_input_tokens_seen": 271920, |
| "step": 1785 |
| }, |
| { |
| "epoch": 9.944444444444445, |
| "grad_norm": 0.16640453040599823, |
| "learning_rate": 2.9603549083863847e-05, |
| "loss": 0.231, |
| "num_input_tokens_seen": 272672, |
| "step": 1790 |
| }, |
| { |
| "epoch": 9.972222222222221, |
| "grad_norm": 0.1795589029788971, |
| "learning_rate": 2.9484364648436437e-05, |
| "loss": 0.2261, |
| "num_input_tokens_seen": 273440, |
| "step": 1795 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.16841265559196472, |
| "learning_rate": 2.9365074810778094e-05, |
| "loss": 0.2165, |
| "num_input_tokens_seen": 274208, |
| "step": 1800 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.2445867955684662, |
| "eval_runtime": 0.8599, |
| "eval_samples_per_second": 46.519, |
| "eval_steps_per_second": 23.26, |
| "num_input_tokens_seen": 274208, |
| "step": 1800 |
| }, |
| { |
| "epoch": 10.027777777777779, |
| "grad_norm": 0.2173292487859726, |
| "learning_rate": 2.9245682374723016e-05, |
| "loss": 0.2469, |
| "num_input_tokens_seen": 274976, |
| "step": 1805 |
| }, |
| { |
| "epoch": 10.055555555555555, |
| "grad_norm": 0.19157367944717407, |
| "learning_rate": 2.9126190146516942e-05, |
| "loss": 0.2426, |
| "num_input_tokens_seen": 275760, |
| "step": 1810 |
| }, |
| { |
| "epoch": 10.083333333333334, |
| "grad_norm": 0.21250499784946442, |
| "learning_rate": 2.9006600934751145e-05, |
| "loss": 0.2244, |
| "num_input_tokens_seen": 276544, |
| "step": 1815 |
| }, |
| { |
| "epoch": 10.11111111111111, |
| "grad_norm": 0.1177288368344307, |
| "learning_rate": 2.888691755029642e-05, |
| "loss": 0.2185, |
| "num_input_tokens_seen": 277296, |
| "step": 1820 |
| }, |
| { |
| "epoch": 10.13888888888889, |
| "grad_norm": 0.11683861166238785, |
| "learning_rate": 2.876714280623708e-05, |
| "loss": 0.2103, |
| "num_input_tokens_seen": 278048, |
| "step": 1825 |
| }, |
| { |
| "epoch": 10.166666666666666, |
| "grad_norm": 0.2500019073486328, |
| "learning_rate": 2.8647279517804754e-05, |
| "loss": 0.2485, |
| "num_input_tokens_seen": 278832, |
| "step": 1830 |
| }, |
| { |
| "epoch": 10.194444444444445, |
| "grad_norm": 0.2172568142414093, |
| "learning_rate": 2.8527330502312248e-05, |
| "loss": 0.2321, |
| "num_input_tokens_seen": 279584, |
| "step": 1835 |
| }, |
| { |
| "epoch": 10.222222222222221, |
| "grad_norm": 0.12808550894260406, |
| "learning_rate": 2.8407298579087365e-05, |
| "loss": 0.2312, |
| "num_input_tokens_seen": 280368, |
| "step": 1840 |
| }, |
| { |
| "epoch": 10.25, |
| "grad_norm": 0.12107470631599426, |
| "learning_rate": 2.8287186569406566e-05, |
| "loss": 0.2119, |
| "num_input_tokens_seen": 281136, |
| "step": 1845 |
| }, |
| { |
| "epoch": 10.277777777777779, |
| "grad_norm": 0.23338744044303894, |
| "learning_rate": 2.816699729642871e-05, |
| "loss": 0.2219, |
| "num_input_tokens_seen": 281872, |
| "step": 1850 |
| }, |
| { |
| "epoch": 10.305555555555555, |
| "grad_norm": 0.2628016173839569, |
| "learning_rate": 2.8046733585128687e-05, |
| "loss": 0.2355, |
| "num_input_tokens_seen": 282640, |
| "step": 1855 |
| }, |
| { |
| "epoch": 10.333333333333334, |
| "grad_norm": 0.24145862460136414, |
| "learning_rate": 2.792639826223101e-05, |
| "loss": 0.2409, |
| "num_input_tokens_seen": 283376, |
| "step": 1860 |
| }, |
| { |
| "epoch": 10.36111111111111, |
| "grad_norm": 0.1407560408115387, |
| "learning_rate": 2.7805994156143376e-05, |
| "loss": 0.2361, |
| "num_input_tokens_seen": 284112, |
| "step": 1865 |
| }, |
| { |
| "epoch": 10.38888888888889, |
| "grad_norm": 0.22101657092571259, |
| "learning_rate": 2.7685524096890185e-05, |
| "loss": 0.2294, |
| "num_input_tokens_seen": 284816, |
| "step": 1870 |
| }, |
| { |
| "epoch": 10.416666666666666, |
| "grad_norm": 0.11544955521821976, |
| "learning_rate": 2.756499091604603e-05, |
| "loss": 0.2308, |
| "num_input_tokens_seen": 285600, |
| "step": 1875 |
| }, |
| { |
| "epoch": 10.444444444444445, |
| "grad_norm": 0.14251400530338287, |
| "learning_rate": 2.744439744666915e-05, |
| "loss": 0.2352, |
| "num_input_tokens_seen": 286368, |
| "step": 1880 |
| }, |
| { |
| "epoch": 10.472222222222221, |
| "grad_norm": 0.21146194636821747, |
| "learning_rate": 2.732374652323481e-05, |
| "loss": 0.2143, |
| "num_input_tokens_seen": 287136, |
| "step": 1885 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 0.1281888633966446, |
| "learning_rate": 2.72030409815687e-05, |
| "loss": 0.2166, |
| "num_input_tokens_seen": 287920, |
| "step": 1890 |
| }, |
| { |
| "epoch": 10.527777777777779, |
| "grad_norm": 0.2592181861400604, |
| "learning_rate": 2.7082283658780288e-05, |
| "loss": 0.2402, |
| "num_input_tokens_seen": 288672, |
| "step": 1895 |
| }, |
| { |
| "epoch": 10.555555555555555, |
| "grad_norm": 0.31524962186813354, |
| "learning_rate": 2.6961477393196126e-05, |
| "loss": 0.2449, |
| "num_input_tokens_seen": 289424, |
| "step": 1900 |
| }, |
| { |
| "epoch": 10.583333333333334, |
| "grad_norm": 0.17323945462703705, |
| "learning_rate": 2.684062502429312e-05, |
| "loss": 0.2517, |
| "num_input_tokens_seen": 290192, |
| "step": 1905 |
| }, |
| { |
| "epoch": 10.61111111111111, |
| "grad_norm": 0.223087340593338, |
| "learning_rate": 2.6719729392631826e-05, |
| "loss": 0.2138, |
| "num_input_tokens_seen": 290928, |
| "step": 1910 |
| }, |
| { |
| "epoch": 10.63888888888889, |
| "grad_norm": 0.16650070250034332, |
| "learning_rate": 2.659879333978964e-05, |
| "loss": 0.232, |
| "num_input_tokens_seen": 291696, |
| "step": 1915 |
| }, |
| { |
| "epoch": 10.666666666666666, |
| "grad_norm": 0.22007060050964355, |
| "learning_rate": 2.6477819708294064e-05, |
| "loss": 0.2265, |
| "num_input_tokens_seen": 292432, |
| "step": 1920 |
| }, |
| { |
| "epoch": 10.694444444444445, |
| "grad_norm": 0.32412680983543396, |
| "learning_rate": 2.635681134155585e-05, |
| "loss": 0.2418, |
| "num_input_tokens_seen": 293200, |
| "step": 1925 |
| }, |
| { |
| "epoch": 10.722222222222221, |
| "grad_norm": 0.22895711660385132, |
| "learning_rate": 2.623577108380215e-05, |
| "loss": 0.2208, |
| "num_input_tokens_seen": 293952, |
| "step": 1930 |
| }, |
| { |
| "epoch": 10.75, |
| "grad_norm": 0.29938873648643494, |
| "learning_rate": 2.6114701780009753e-05, |
| "loss": 0.2194, |
| "num_input_tokens_seen": 294736, |
| "step": 1935 |
| }, |
| { |
| "epoch": 10.777777777777779, |
| "grad_norm": 0.2544272243976593, |
| "learning_rate": 2.5993606275838117e-05, |
| "loss": 0.2111, |
| "num_input_tokens_seen": 295504, |
| "step": 1940 |
| }, |
| { |
| "epoch": 10.805555555555555, |
| "grad_norm": 0.22686506807804108, |
| "learning_rate": 2.587248741756253e-05, |
| "loss": 0.2199, |
| "num_input_tokens_seen": 296288, |
| "step": 1945 |
| }, |
| { |
| "epoch": 10.833333333333334, |
| "grad_norm": 0.2908959686756134, |
| "learning_rate": 2.5751348052007206e-05, |
| "loss": 0.2162, |
| "num_input_tokens_seen": 297040, |
| "step": 1950 |
| }, |
| { |
| "epoch": 10.86111111111111, |
| "grad_norm": 0.25148704648017883, |
| "learning_rate": 2.5630191026478368e-05, |
| "loss": 0.2349, |
| "num_input_tokens_seen": 297776, |
| "step": 1955 |
| }, |
| { |
| "epoch": 10.88888888888889, |
| "grad_norm": 0.32410892844200134, |
| "learning_rate": 2.5509019188697343e-05, |
| "loss": 0.215, |
| "num_input_tokens_seen": 298528, |
| "step": 1960 |
| }, |
| { |
| "epoch": 10.916666666666666, |
| "grad_norm": 0.40616366267204285, |
| "learning_rate": 2.5387835386733584e-05, |
| "loss": 0.2247, |
| "num_input_tokens_seen": 299296, |
| "step": 1965 |
| }, |
| { |
| "epoch": 10.944444444444445, |
| "grad_norm": 0.3647058606147766, |
| "learning_rate": 2.5266642468937766e-05, |
| "loss": 0.2378, |
| "num_input_tokens_seen": 300080, |
| "step": 1970 |
| }, |
| { |
| "epoch": 10.972222222222221, |
| "grad_norm": 0.29364150762557983, |
| "learning_rate": 2.5145443283874848e-05, |
| "loss": 0.2442, |
| "num_input_tokens_seen": 300848, |
| "step": 1975 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.4087861478328705, |
| "learning_rate": 2.5024240680257055e-05, |
| "loss": 0.254, |
| "num_input_tokens_seen": 301600, |
| "step": 1980 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.2517322301864624, |
| "eval_runtime": 0.8833, |
| "eval_samples_per_second": 45.287, |
| "eval_steps_per_second": 22.643, |
| "num_input_tokens_seen": 301600, |
| "step": 1980 |
| }, |
| { |
| "epoch": 11.027777777777779, |
| "grad_norm": 0.415340781211853, |
| "learning_rate": 2.4903037506876997e-05, |
| "loss": 0.2302, |
| "num_input_tokens_seen": 302320, |
| "step": 1985 |
| }, |
| { |
| "epoch": 11.055555555555555, |
| "grad_norm": 0.4484613537788391, |
| "learning_rate": 2.4781836612540657e-05, |
| "loss": 0.2435, |
| "num_input_tokens_seen": 303072, |
| "step": 1990 |
| }, |
| { |
| "epoch": 11.083333333333334, |
| "grad_norm": 0.26110631227493286, |
| "learning_rate": 2.4660640846000453e-05, |
| "loss": 0.2242, |
| "num_input_tokens_seen": 303840, |
| "step": 1995 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.14913207292556763, |
| "learning_rate": 2.4539453055888297e-05, |
| "loss": 0.2396, |
| "num_input_tokens_seen": 304576, |
| "step": 2000 |
| }, |
| { |
| "epoch": 11.13888888888889, |
| "grad_norm": 0.20138798654079437, |
| "learning_rate": 2.4418276090648596e-05, |
| "loss": 0.218, |
| "num_input_tokens_seen": 305344, |
| "step": 2005 |
| }, |
| { |
| "epoch": 11.166666666666666, |
| "grad_norm": 0.3144391179084778, |
| "learning_rate": 2.4297112798471326e-05, |
| "loss": 0.2182, |
| "num_input_tokens_seen": 306080, |
| "step": 2010 |
| }, |
| { |
| "epoch": 11.194444444444445, |
| "grad_norm": 0.21388863027095795, |
| "learning_rate": 2.4175966027225107e-05, |
| "loss": 0.2423, |
| "num_input_tokens_seen": 306832, |
| "step": 2015 |
| }, |
| { |
| "epoch": 11.222222222222221, |
| "grad_norm": 0.25984007120132446, |
| "learning_rate": 2.405483862439023e-05, |
| "loss": 0.2111, |
| "num_input_tokens_seen": 307632, |
| "step": 2020 |
| }, |
| { |
| "epoch": 11.25, |
| "grad_norm": 0.3656483292579651, |
| "learning_rate": 2.3933733436991732e-05, |
| "loss": 0.2297, |
| "num_input_tokens_seen": 308384, |
| "step": 2025 |
| }, |
| { |
| "epoch": 11.277777777777779, |
| "grad_norm": 0.3722660541534424, |
| "learning_rate": 2.381265331153252e-05, |
| "loss": 0.2238, |
| "num_input_tokens_seen": 309152, |
| "step": 2030 |
| }, |
| { |
| "epoch": 11.305555555555555, |
| "grad_norm": 0.5844541788101196, |
| "learning_rate": 2.3691601093926404e-05, |
| "loss": 0.225, |
| "num_input_tokens_seen": 309904, |
| "step": 2035 |
| }, |
| { |
| "epoch": 11.333333333333334, |
| "grad_norm": 0.5075754523277283, |
| "learning_rate": 2.3570579629431267e-05, |
| "loss": 0.2381, |
| "num_input_tokens_seen": 310672, |
| "step": 2040 |
| }, |
| { |
| "epoch": 11.36111111111111, |
| "grad_norm": 0.3695644736289978, |
| "learning_rate": 2.344959176258212e-05, |
| "loss": 0.2563, |
| "num_input_tokens_seen": 311440, |
| "step": 2045 |
| }, |
| { |
| "epoch": 11.38888888888889, |
| "grad_norm": 0.5124101638793945, |
| "learning_rate": 2.3328640337124326e-05, |
| "loss": 0.1948, |
| "num_input_tokens_seen": 312240, |
| "step": 2050 |
| }, |
| { |
| "epoch": 11.416666666666666, |
| "grad_norm": 0.31323179602622986, |
| "learning_rate": 2.3207728195946688e-05, |
| "loss": 0.2268, |
| "num_input_tokens_seen": 312944, |
| "step": 2055 |
| }, |
| { |
| "epoch": 11.444444444444445, |
| "grad_norm": 0.2486533373594284, |
| "learning_rate": 2.3086858181014653e-05, |
| "loss": 0.2032, |
| "num_input_tokens_seen": 313712, |
| "step": 2060 |
| }, |
| { |
| "epoch": 11.472222222222221, |
| "grad_norm": 0.5734115242958069, |
| "learning_rate": 2.2966033133303545e-05, |
| "loss": 0.2351, |
| "num_input_tokens_seen": 314448, |
| "step": 2065 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 0.42716243863105774, |
| "learning_rate": 2.2845255892731733e-05, |
| "loss": 0.2103, |
| "num_input_tokens_seen": 315200, |
| "step": 2070 |
| }, |
| { |
| "epoch": 11.527777777777779, |
| "grad_norm": 0.4750475287437439, |
| "learning_rate": 2.2724529298093915e-05, |
| "loss": 0.2522, |
| "num_input_tokens_seen": 315968, |
| "step": 2075 |
| }, |
| { |
| "epoch": 11.555555555555555, |
| "grad_norm": 0.2699427604675293, |
| "learning_rate": 2.26038561869944e-05, |
| "loss": 0.2137, |
| "num_input_tokens_seen": 316720, |
| "step": 2080 |
| }, |
| { |
| "epoch": 11.583333333333334, |
| "grad_norm": 0.3729088306427002, |
| "learning_rate": 2.248323939578039e-05, |
| "loss": 0.2354, |
| "num_input_tokens_seen": 317488, |
| "step": 2085 |
| }, |
| { |
| "epoch": 11.61111111111111, |
| "grad_norm": 0.44618797302246094, |
| "learning_rate": 2.2362681759475307e-05, |
| "loss": 0.2655, |
| "num_input_tokens_seen": 318208, |
| "step": 2090 |
| }, |
| { |
| "epoch": 11.63888888888889, |
| "grad_norm": 0.3804112374782562, |
| "learning_rate": 2.2242186111712208e-05, |
| "loss": 0.2315, |
| "num_input_tokens_seen": 318944, |
| "step": 2095 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 0.18952308595180511, |
| "learning_rate": 2.212175528466712e-05, |
| "loss": 0.2254, |
| "num_input_tokens_seen": 319712, |
| "step": 2100 |
| }, |
| { |
| "epoch": 11.694444444444445, |
| "grad_norm": 0.16058814525604248, |
| "learning_rate": 2.2001392108992504e-05, |
| "loss": 0.2436, |
| "num_input_tokens_seen": 320480, |
| "step": 2105 |
| }, |
| { |
| "epoch": 11.722222222222221, |
| "grad_norm": 0.2892865836620331, |
| "learning_rate": 2.1881099413750733e-05, |
| "loss": 0.2182, |
| "num_input_tokens_seen": 321296, |
| "step": 2110 |
| }, |
| { |
| "epoch": 11.75, |
| "grad_norm": 0.22166113555431366, |
| "learning_rate": 2.1760880026347562e-05, |
| "loss": 0.2188, |
| "num_input_tokens_seen": 322064, |
| "step": 2115 |
| }, |
| { |
| "epoch": 11.777777777777779, |
| "grad_norm": 0.19897451996803284, |
| "learning_rate": 2.16407367724657e-05, |
| "loss": 0.2411, |
| "num_input_tokens_seen": 322832, |
| "step": 2120 |
| }, |
| { |
| "epoch": 11.805555555555555, |
| "grad_norm": 0.1611793488264084, |
| "learning_rate": 2.1520672475998373e-05, |
| "loss": 0.2344, |
| "num_input_tokens_seen": 323600, |
| "step": 2125 |
| }, |
| { |
| "epoch": 11.833333333333334, |
| "grad_norm": 0.155122309923172, |
| "learning_rate": 2.140068995898297e-05, |
| "loss": 0.2243, |
| "num_input_tokens_seen": 324384, |
| "step": 2130 |
| }, |
| { |
| "epoch": 11.86111111111111, |
| "grad_norm": 0.23747070133686066, |
| "learning_rate": 2.1280792041534714e-05, |
| "loss": 0.2198, |
| "num_input_tokens_seen": 325152, |
| "step": 2135 |
| }, |
| { |
| "epoch": 11.88888888888889, |
| "grad_norm": 0.24674496054649353, |
| "learning_rate": 2.116098154178035e-05, |
| "loss": 0.2318, |
| "num_input_tokens_seen": 325904, |
| "step": 2140 |
| }, |
| { |
| "epoch": 11.916666666666666, |
| "grad_norm": 0.18954938650131226, |
| "learning_rate": 2.1041261275791933e-05, |
| "loss": 0.2125, |
| "num_input_tokens_seen": 326672, |
| "step": 2145 |
| }, |
| { |
| "epoch": 11.944444444444445, |
| "grad_norm": 0.4533662497997284, |
| "learning_rate": 2.092163405752063e-05, |
| "loss": 0.2239, |
| "num_input_tokens_seen": 327440, |
| "step": 2150 |
| }, |
| { |
| "epoch": 11.972222222222221, |
| "grad_norm": 0.438764750957489, |
| "learning_rate": 2.0802102698730574e-05, |
| "loss": 0.2081, |
| "num_input_tokens_seen": 328224, |
| "step": 2155 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.2310199737548828, |
| "learning_rate": 2.0682670008932785e-05, |
| "loss": 0.2522, |
| "num_input_tokens_seen": 328976, |
| "step": 2160 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.24894733726978302, |
| "eval_runtime": 0.8684, |
| "eval_samples_per_second": 46.062, |
| "eval_steps_per_second": 23.031, |
| "num_input_tokens_seen": 328976, |
| "step": 2160 |
| }, |
| { |
| "epoch": 12.027777777777779, |
| "grad_norm": 0.3067687749862671, |
| "learning_rate": 2.0563338795319123e-05, |
| "loss": 0.2306, |
| "num_input_tokens_seen": 329760, |
| "step": 2165 |
| }, |
| { |
| "epoch": 12.055555555555555, |
| "grad_norm": 0.20359157025814056, |
| "learning_rate": 2.0444111862696314e-05, |
| "loss": 0.2389, |
| "num_input_tokens_seen": 330528, |
| "step": 2170 |
| }, |
| { |
| "epoch": 12.083333333333334, |
| "grad_norm": 0.3316972255706787, |
| "learning_rate": 2.032499201342003e-05, |
| "loss": 0.2192, |
| "num_input_tokens_seen": 331312, |
| "step": 2175 |
| }, |
| { |
| "epoch": 12.11111111111111, |
| "grad_norm": 0.34185460209846497, |
| "learning_rate": 2.020598204732901e-05, |
| "loss": 0.2054, |
| "num_input_tokens_seen": 332064, |
| "step": 2180 |
| }, |
| { |
| "epoch": 12.13888888888889, |
| "grad_norm": 0.4968796968460083, |
| "learning_rate": 2.0087084761679245e-05, |
| "loss": 0.2019, |
| "num_input_tokens_seen": 332816, |
| "step": 2185 |
| }, |
| { |
| "epoch": 12.166666666666666, |
| "grad_norm": 0.5224512219429016, |
| "learning_rate": 1.996830295107827e-05, |
| "loss": 0.2533, |
| "num_input_tokens_seen": 333584, |
| "step": 2190 |
| }, |
| { |
| "epoch": 12.194444444444445, |
| "grad_norm": 0.6830374002456665, |
| "learning_rate": 1.9849639407419423e-05, |
| "loss": 0.2338, |
| "num_input_tokens_seen": 334336, |
| "step": 2195 |
| }, |
| { |
| "epoch": 12.222222222222221, |
| "grad_norm": 0.5021493434906006, |
| "learning_rate": 1.973109691981627e-05, |
| "loss": 0.1989, |
| "num_input_tokens_seen": 335104, |
| "step": 2200 |
| }, |
| { |
| "epoch": 12.25, |
| "grad_norm": 0.4624042510986328, |
| "learning_rate": 1.9612678274537005e-05, |
| "loss": 0.204, |
| "num_input_tokens_seen": 335904, |
| "step": 2205 |
| }, |
| { |
| "epoch": 12.277777777777779, |
| "grad_norm": 0.9172624945640564, |
| "learning_rate": 1.9494386254939e-05, |
| "loss": 0.2095, |
| "num_input_tokens_seen": 336704, |
| "step": 2210 |
| }, |
| { |
| "epoch": 12.305555555555555, |
| "grad_norm": 0.578337550163269, |
| "learning_rate": 1.937622364140338e-05, |
| "loss": 0.2207, |
| "num_input_tokens_seen": 337456, |
| "step": 2215 |
| }, |
| { |
| "epoch": 12.333333333333334, |
| "grad_norm": 0.48846668004989624, |
| "learning_rate": 1.925819321126964e-05, |
| "loss": 0.2118, |
| "num_input_tokens_seen": 338208, |
| "step": 2220 |
| }, |
| { |
| "epoch": 12.36111111111111, |
| "grad_norm": 0.5584977865219116, |
| "learning_rate": 1.9140297738770385e-05, |
| "loss": 0.2486, |
| "num_input_tokens_seen": 338976, |
| "step": 2225 |
| }, |
| { |
| "epoch": 12.38888888888889, |
| "grad_norm": 0.42385029792785645, |
| "learning_rate": 1.9022539994966147e-05, |
| "loss": 0.2217, |
| "num_input_tokens_seen": 339712, |
| "step": 2230 |
| }, |
| { |
| "epoch": 12.416666666666666, |
| "grad_norm": 0.3373255431652069, |
| "learning_rate": 1.8904922747680204e-05, |
| "loss": 0.2387, |
| "num_input_tokens_seen": 340464, |
| "step": 2235 |
| }, |
| { |
| "epoch": 12.444444444444445, |
| "grad_norm": 0.5355724692344666, |
| "learning_rate": 1.8787448761433556e-05, |
| "loss": 0.228, |
| "num_input_tokens_seen": 341232, |
| "step": 2240 |
| }, |
| { |
| "epoch": 12.472222222222221, |
| "grad_norm": 0.6969017386436462, |
| "learning_rate": 1.8670120797379958e-05, |
| "loss": 0.2385, |
| "num_input_tokens_seen": 342000, |
| "step": 2245 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.4944399297237396, |
| "learning_rate": 1.8552941613240983e-05, |
| "loss": 0.2027, |
| "num_input_tokens_seen": 342752, |
| "step": 2250 |
| }, |
| { |
| "epoch": 12.527777777777779, |
| "grad_norm": 0.5233724117279053, |
| "learning_rate": 1.8435913963241226e-05, |
| "loss": 0.219, |
| "num_input_tokens_seen": 343504, |
| "step": 2255 |
| }, |
| { |
| "epoch": 12.555555555555555, |
| "grad_norm": 0.5676577687263489, |
| "learning_rate": 1.831904059804358e-05, |
| "loss": 0.226, |
| "num_input_tokens_seen": 344240, |
| "step": 2260 |
| }, |
| { |
| "epoch": 12.583333333333334, |
| "grad_norm": 0.4965709447860718, |
| "learning_rate": 1.8202324264684544e-05, |
| "loss": 0.196, |
| "num_input_tokens_seen": 345008, |
| "step": 2265 |
| }, |
| { |
| "epoch": 12.61111111111111, |
| "grad_norm": 0.5146750211715698, |
| "learning_rate": 1.8085767706509712e-05, |
| "loss": 0.1933, |
| "num_input_tokens_seen": 345776, |
| "step": 2270 |
| }, |
| { |
| "epoch": 12.63888888888889, |
| "grad_norm": 0.6066842675209045, |
| "learning_rate": 1.7969373663109234e-05, |
| "loss": 0.2546, |
| "num_input_tokens_seen": 346544, |
| "step": 2275 |
| }, |
| { |
| "epoch": 12.666666666666666, |
| "grad_norm": 1.1421144008636475, |
| "learning_rate": 1.7853144870253458e-05, |
| "loss": 0.2309, |
| "num_input_tokens_seen": 347312, |
| "step": 2280 |
| }, |
| { |
| "epoch": 12.694444444444445, |
| "grad_norm": 0.7533566355705261, |
| "learning_rate": 1.7737084059828637e-05, |
| "loss": 0.257, |
| "num_input_tokens_seen": 348064, |
| "step": 2285 |
| }, |
| { |
| "epoch": 12.722222222222221, |
| "grad_norm": 0.49484172463417053, |
| "learning_rate": 1.7621193959772657e-05, |
| "loss": 0.201, |
| "num_input_tokens_seen": 348800, |
| "step": 2290 |
| }, |
| { |
| "epoch": 12.75, |
| "grad_norm": 0.5660438537597656, |
| "learning_rate": 1.750547729401101e-05, |
| "loss": 0.2252, |
| "num_input_tokens_seen": 349552, |
| "step": 2295 |
| }, |
| { |
| "epoch": 12.777777777777779, |
| "grad_norm": 0.6149576902389526, |
| "learning_rate": 1.7389936782392695e-05, |
| "loss": 0.2122, |
| "num_input_tokens_seen": 350304, |
| "step": 2300 |
| }, |
| { |
| "epoch": 12.805555555555555, |
| "grad_norm": 0.9000388979911804, |
| "learning_rate": 1.7274575140626318e-05, |
| "loss": 0.2483, |
| "num_input_tokens_seen": 351072, |
| "step": 2305 |
| }, |
| { |
| "epoch": 12.833333333333334, |
| "grad_norm": 0.9429511427879333, |
| "learning_rate": 1.7159395080216273e-05, |
| "loss": 0.2592, |
| "num_input_tokens_seen": 351856, |
| "step": 2310 |
| }, |
| { |
| "epoch": 12.86111111111111, |
| "grad_norm": 0.7645384669303894, |
| "learning_rate": 1.7044399308398983e-05, |
| "loss": 0.216, |
| "num_input_tokens_seen": 352624, |
| "step": 2315 |
| }, |
| { |
| "epoch": 12.88888888888889, |
| "grad_norm": 1.0084342956542969, |
| "learning_rate": 1.692959052807928e-05, |
| "loss": 0.2183, |
| "num_input_tokens_seen": 353376, |
| "step": 2320 |
| }, |
| { |
| "epoch": 12.916666666666666, |
| "grad_norm": 0.5593501329421997, |
| "learning_rate": 1.681497143776689e-05, |
| "loss": 0.2226, |
| "num_input_tokens_seen": 354112, |
| "step": 2325 |
| }, |
| { |
| "epoch": 12.944444444444445, |
| "grad_norm": 0.6755273938179016, |
| "learning_rate": 1.670054473151298e-05, |
| "loss": 0.2377, |
| "num_input_tokens_seen": 354880, |
| "step": 2330 |
| }, |
| { |
| "epoch": 12.972222222222221, |
| "grad_norm": 0.7064863443374634, |
| "learning_rate": 1.658631309884684e-05, |
| "loss": 0.2067, |
| "num_input_tokens_seen": 355632, |
| "step": 2335 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.5242209434509277, |
| "learning_rate": 1.6472279224712702e-05, |
| "loss": 0.2228, |
| "num_input_tokens_seen": 356400, |
| "step": 2340 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.2544581890106201, |
| "eval_runtime": 0.8689, |
| "eval_samples_per_second": 46.034, |
| "eval_steps_per_second": 23.017, |
| "num_input_tokens_seen": 356400, |
| "step": 2340 |
| }, |
| { |
| "epoch": 13.027777777777779, |
| "grad_norm": 0.9090105891227722, |
| "learning_rate": 1.6358445789406584e-05, |
| "loss": 0.2227, |
| "num_input_tokens_seen": 357136, |
| "step": 2345 |
| }, |
| { |
| "epoch": 13.055555555555555, |
| "grad_norm": 0.6356475949287415, |
| "learning_rate": 1.6244815468513315e-05, |
| "loss": 0.2572, |
| "num_input_tokens_seen": 357872, |
| "step": 2350 |
| }, |
| { |
| "epoch": 13.083333333333334, |
| "grad_norm": 0.7765060067176819, |
| "learning_rate": 1.6131390932843648e-05, |
| "loss": 0.1878, |
| "num_input_tokens_seen": 358608, |
| "step": 2355 |
| }, |
| { |
| "epoch": 13.11111111111111, |
| "grad_norm": 0.7711853384971619, |
| "learning_rate": 1.6018174848371494e-05, |
| "loss": 0.2009, |
| "num_input_tokens_seen": 359408, |
| "step": 2360 |
| }, |
| { |
| "epoch": 13.13888888888889, |
| "grad_norm": 0.5515719652175903, |
| "learning_rate": 1.5905169876171223e-05, |
| "loss": 0.2151, |
| "num_input_tokens_seen": 360176, |
| "step": 2365 |
| }, |
| { |
| "epoch": 13.166666666666666, |
| "grad_norm": 0.7772662043571472, |
| "learning_rate": 1.579237867235514e-05, |
| "loss": 0.1831, |
| "num_input_tokens_seen": 360928, |
| "step": 2370 |
| }, |
| { |
| "epoch": 13.194444444444445, |
| "grad_norm": 0.9589880704879761, |
| "learning_rate": 1.567980388801109e-05, |
| "loss": 0.2382, |
| "num_input_tokens_seen": 361680, |
| "step": 2375 |
| }, |
| { |
| "epoch": 13.222222222222221, |
| "grad_norm": 0.742833137512207, |
| "learning_rate": 1.556744816914008e-05, |
| "loss": 0.2503, |
| "num_input_tokens_seen": 362448, |
| "step": 2380 |
| }, |
| { |
| "epoch": 13.25, |
| "grad_norm": 0.5612350702285767, |
| "learning_rate": 1.5455314156594124e-05, |
| "loss": 0.23, |
| "num_input_tokens_seen": 363216, |
| "step": 2385 |
| }, |
| { |
| "epoch": 13.277777777777779, |
| "grad_norm": 0.9716349244117737, |
| "learning_rate": 1.534340448601418e-05, |
| "loss": 0.2322, |
| "num_input_tokens_seen": 363984, |
| "step": 2390 |
| }, |
| { |
| "epoch": 13.305555555555555, |
| "grad_norm": 0.6680596470832825, |
| "learning_rate": 1.523172178776816e-05, |
| "loss": 0.2185, |
| "num_input_tokens_seen": 364736, |
| "step": 2395 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.775281548500061, |
| "learning_rate": 1.512026868688915e-05, |
| "loss": 0.2205, |
| "num_input_tokens_seen": 365456, |
| "step": 2400 |
| }, |
| { |
| "epoch": 13.36111111111111, |
| "grad_norm": 0.6668827533721924, |
| "learning_rate": 1.5009047803013699e-05, |
| "loss": 0.2149, |
| "num_input_tokens_seen": 366240, |
| "step": 2405 |
| }, |
| { |
| "epoch": 13.38888888888889, |
| "grad_norm": 1.0453615188598633, |
| "learning_rate": 1.4898061750320212e-05, |
| "loss": 0.2087, |
| "num_input_tokens_seen": 367024, |
| "step": 2410 |
| }, |
| { |
| "epoch": 13.416666666666666, |
| "grad_norm": 1.2332998514175415, |
| "learning_rate": 1.4787313137467546e-05, |
| "loss": 0.2069, |
| "num_input_tokens_seen": 367792, |
| "step": 2415 |
| }, |
| { |
| "epoch": 13.444444444444445, |
| "grad_norm": 0.6259872913360596, |
| "learning_rate": 1.4676804567533687e-05, |
| "loss": 0.222, |
| "num_input_tokens_seen": 368592, |
| "step": 2420 |
| }, |
| { |
| "epoch": 13.472222222222221, |
| "grad_norm": 1.6646252870559692, |
| "learning_rate": 1.4566538637954554e-05, |
| "loss": 0.2413, |
| "num_input_tokens_seen": 369328, |
| "step": 2425 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 1.4683263301849365, |
| "learning_rate": 1.4456517940462949e-05, |
| "loss": 0.1956, |
| "num_input_tokens_seen": 370064, |
| "step": 2430 |
| }, |
| { |
| "epoch": 13.527777777777779, |
| "grad_norm": 0.7791321873664856, |
| "learning_rate": 1.4346745061027644e-05, |
| "loss": 0.1904, |
| "num_input_tokens_seen": 370832, |
| "step": 2435 |
| }, |
| { |
| "epoch": 13.555555555555555, |
| "grad_norm": 1.082039713859558, |
| "learning_rate": 1.4237222579792618e-05, |
| "loss": 0.2108, |
| "num_input_tokens_seen": 371568, |
| "step": 2440 |
| }, |
| { |
| "epoch": 13.583333333333334, |
| "grad_norm": 0.8041364550590515, |
| "learning_rate": 1.4127953071016383e-05, |
| "loss": 0.2039, |
| "num_input_tokens_seen": 372320, |
| "step": 2445 |
| }, |
| { |
| "epoch": 13.61111111111111, |
| "grad_norm": 0.7636337280273438, |
| "learning_rate": 1.4018939103011472e-05, |
| "loss": 0.1884, |
| "num_input_tokens_seen": 373072, |
| "step": 2450 |
| }, |
| { |
| "epoch": 13.63888888888889, |
| "grad_norm": 1.480279564857483, |
| "learning_rate": 1.3910183238084112e-05, |
| "loss": 0.2047, |
| "num_input_tokens_seen": 373840, |
| "step": 2455 |
| }, |
| { |
| "epoch": 13.666666666666666, |
| "grad_norm": 1.4362431764602661, |
| "learning_rate": 1.3801688032473958e-05, |
| "loss": 0.2926, |
| "num_input_tokens_seen": 374624, |
| "step": 2460 |
| }, |
| { |
| "epoch": 13.694444444444445, |
| "grad_norm": 0.8362478613853455, |
| "learning_rate": 1.369345603629406e-05, |
| "loss": 0.1942, |
| "num_input_tokens_seen": 375392, |
| "step": 2465 |
| }, |
| { |
| "epoch": 13.722222222222221, |
| "grad_norm": 0.9436230063438416, |
| "learning_rate": 1.3585489793470862e-05, |
| "loss": 0.2216, |
| "num_input_tokens_seen": 376144, |
| "step": 2470 |
| }, |
| { |
| "epoch": 13.75, |
| "grad_norm": 1.2598055601119995, |
| "learning_rate": 1.3477791841684451e-05, |
| "loss": 0.1824, |
| "num_input_tokens_seen": 376880, |
| "step": 2475 |
| }, |
| { |
| "epoch": 13.777777777777779, |
| "grad_norm": 1.0539710521697998, |
| "learning_rate": 1.337036471230889e-05, |
| "loss": 0.1747, |
| "num_input_tokens_seen": 377664, |
| "step": 2480 |
| }, |
| { |
| "epoch": 13.805555555555555, |
| "grad_norm": 1.3532763719558716, |
| "learning_rate": 1.3263210930352737e-05, |
| "loss": 0.1954, |
| "num_input_tokens_seen": 378464, |
| "step": 2485 |
| }, |
| { |
| "epoch": 13.833333333333334, |
| "grad_norm": 1.3453116416931152, |
| "learning_rate": 1.3156333014399674e-05, |
| "loss": 0.2218, |
| "num_input_tokens_seen": 379216, |
| "step": 2490 |
| }, |
| { |
| "epoch": 13.86111111111111, |
| "grad_norm": 1.9826656579971313, |
| "learning_rate": 1.3049733476549352e-05, |
| "loss": 0.2116, |
| "num_input_tokens_seen": 379952, |
| "step": 2495 |
| }, |
| { |
| "epoch": 13.88888888888889, |
| "grad_norm": 1.7242130041122437, |
| "learning_rate": 1.2943414822358285e-05, |
| "loss": 0.1947, |
| "num_input_tokens_seen": 380720, |
| "step": 2500 |
| }, |
| { |
| "epoch": 13.916666666666666, |
| "grad_norm": 1.9099639654159546, |
| "learning_rate": 1.2837379550781003e-05, |
| "loss": 0.2132, |
| "num_input_tokens_seen": 381504, |
| "step": 2505 |
| }, |
| { |
| "epoch": 13.944444444444445, |
| "grad_norm": 2.161792516708374, |
| "learning_rate": 1.2731630154111296e-05, |
| "loss": 0.2268, |
| "num_input_tokens_seen": 382272, |
| "step": 2510 |
| }, |
| { |
| "epoch": 13.972222222222221, |
| "grad_norm": 2.0257785320281982, |
| "learning_rate": 1.262616911792365e-05, |
| "loss": 0.2125, |
| "num_input_tokens_seen": 383056, |
| "step": 2515 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 1.314621090888977, |
| "learning_rate": 1.2520998921014792e-05, |
| "loss": 0.1836, |
| "num_input_tokens_seen": 383808, |
| "step": 2520 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.26544028520584106, |
| "eval_runtime": 0.8657, |
| "eval_samples_per_second": 46.206, |
| "eval_steps_per_second": 23.103, |
| "num_input_tokens_seen": 383808, |
| "step": 2520 |
| }, |
| { |
| "epoch": 14.027777777777779, |
| "grad_norm": 1.272783875465393, |
| "learning_rate": 1.2416122035345507e-05, |
| "loss": 0.1602, |
| "num_input_tokens_seen": 384576, |
| "step": 2525 |
| }, |
| { |
| "epoch": 14.055555555555555, |
| "grad_norm": 1.6169074773788452, |
| "learning_rate": 1.2311540925982403e-05, |
| "loss": 0.1931, |
| "num_input_tokens_seen": 385344, |
| "step": 2530 |
| }, |
| { |
| "epoch": 14.083333333333334, |
| "grad_norm": 1.8734256029129028, |
| "learning_rate": 1.2207258051040099e-05, |
| "loss": 0.1959, |
| "num_input_tokens_seen": 386096, |
| "step": 2535 |
| }, |
| { |
| "epoch": 14.11111111111111, |
| "grad_norm": 1.183911919593811, |
| "learning_rate": 1.2103275861623378e-05, |
| "loss": 0.161, |
| "num_input_tokens_seen": 386880, |
| "step": 2540 |
| }, |
| { |
| "epoch": 14.13888888888889, |
| "grad_norm": 1.1330690383911133, |
| "learning_rate": 1.1999596801769616e-05, |
| "loss": 0.1154, |
| "num_input_tokens_seen": 387696, |
| "step": 2545 |
| }, |
| { |
| "epoch": 14.166666666666666, |
| "grad_norm": 1.8644895553588867, |
| "learning_rate": 1.189622330839129e-05, |
| "loss": 0.2176, |
| "num_input_tokens_seen": 388480, |
| "step": 2550 |
| }, |
| { |
| "epoch": 14.194444444444445, |
| "grad_norm": 2.0677993297576904, |
| "learning_rate": 1.179315781121874e-05, |
| "loss": 0.1774, |
| "num_input_tokens_seen": 389248, |
| "step": 2555 |
| }, |
| { |
| "epoch": 14.222222222222221, |
| "grad_norm": 2.278595209121704, |
| "learning_rate": 1.1690402732743042e-05, |
| "loss": 0.1644, |
| "num_input_tokens_seen": 390016, |
| "step": 2560 |
| }, |
| { |
| "epoch": 14.25, |
| "grad_norm": 2.7160439491271973, |
| "learning_rate": 1.158796048815906e-05, |
| "loss": 0.1784, |
| "num_input_tokens_seen": 390832, |
| "step": 2565 |
| }, |
| { |
| "epoch": 14.277777777777779, |
| "grad_norm": 2.1027543544769287, |
| "learning_rate": 1.1485833485308702e-05, |
| "loss": 0.1768, |
| "num_input_tokens_seen": 391568, |
| "step": 2570 |
| }, |
| { |
| "epoch": 14.305555555555555, |
| "grad_norm": 2.1385934352874756, |
| "learning_rate": 1.1384024124624324e-05, |
| "loss": 0.2043, |
| "num_input_tokens_seen": 392352, |
| "step": 2575 |
| }, |
| { |
| "epoch": 14.333333333333334, |
| "grad_norm": 2.0394463539123535, |
| "learning_rate": 1.1282534799072272e-05, |
| "loss": 0.1632, |
| "num_input_tokens_seen": 393104, |
| "step": 2580 |
| }, |
| { |
| "epoch": 14.36111111111111, |
| "grad_norm": 3.263662815093994, |
| "learning_rate": 1.1181367894096684e-05, |
| "loss": 0.1768, |
| "num_input_tokens_seen": 393840, |
| "step": 2585 |
| }, |
| { |
| "epoch": 14.38888888888889, |
| "grad_norm": 2.817535400390625, |
| "learning_rate": 1.1080525787563393e-05, |
| "loss": 0.1862, |
| "num_input_tokens_seen": 394576, |
| "step": 2590 |
| }, |
| { |
| "epoch": 14.416666666666666, |
| "grad_norm": 3.808518648147583, |
| "learning_rate": 1.0980010849704036e-05, |
| "loss": 0.23, |
| "num_input_tokens_seen": 395312, |
| "step": 2595 |
| }, |
| { |
| "epoch": 14.444444444444445, |
| "grad_norm": 4.187227249145508, |
| "learning_rate": 1.0879825443060362e-05, |
| "loss": 0.1612, |
| "num_input_tokens_seen": 396048, |
| "step": 2600 |
| }, |
| { |
| "epoch": 14.472222222222221, |
| "grad_norm": 1.87286376953125, |
| "learning_rate": 1.0779971922428711e-05, |
| "loss": 0.1516, |
| "num_input_tokens_seen": 396800, |
| "step": 2605 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 3.5704548358917236, |
| "learning_rate": 1.0680452634804603e-05, |
| "loss": 0.1282, |
| "num_input_tokens_seen": 397568, |
| "step": 2610 |
| }, |
| { |
| "epoch": 14.527777777777779, |
| "grad_norm": 2.150921583175659, |
| "learning_rate": 1.0581269919327643e-05, |
| "loss": 0.1617, |
| "num_input_tokens_seen": 398352, |
| "step": 2615 |
| }, |
| { |
| "epoch": 14.555555555555555, |
| "grad_norm": 3.5931949615478516, |
| "learning_rate": 1.0482426107226507e-05, |
| "loss": 0.2883, |
| "num_input_tokens_seen": 399104, |
| "step": 2620 |
| }, |
| { |
| "epoch": 14.583333333333334, |
| "grad_norm": 1.6676691770553589, |
| "learning_rate": 1.0383923521764174e-05, |
| "loss": 0.1869, |
| "num_input_tokens_seen": 399872, |
| "step": 2625 |
| }, |
| { |
| "epoch": 14.61111111111111, |
| "grad_norm": 4.933192253112793, |
| "learning_rate": 1.0285764478183284e-05, |
| "loss": 0.1692, |
| "num_input_tokens_seen": 400608, |
| "step": 2630 |
| }, |
| { |
| "epoch": 14.63888888888889, |
| "grad_norm": 3.1867835521698, |
| "learning_rate": 1.0187951283651736e-05, |
| "loss": 0.22, |
| "num_input_tokens_seen": 401344, |
| "step": 2635 |
| }, |
| { |
| "epoch": 14.666666666666666, |
| "grad_norm": 3.876699209213257, |
| "learning_rate": 1.0090486237208463e-05, |
| "loss": 0.1891, |
| "num_input_tokens_seen": 402080, |
| "step": 2640 |
| }, |
| { |
| "epoch": 14.694444444444445, |
| "grad_norm": 5.679113388061523, |
| "learning_rate": 9.993371629709391e-06, |
| "loss": 0.1553, |
| "num_input_tokens_seen": 402848, |
| "step": 2645 |
| }, |
| { |
| "epoch": 14.722222222222221, |
| "grad_norm": 1.1258200407028198, |
| "learning_rate": 9.89660974377359e-06, |
| "loss": 0.1653, |
| "num_input_tokens_seen": 403632, |
| "step": 2650 |
| }, |
| { |
| "epoch": 14.75, |
| "grad_norm": 3.4495646953582764, |
| "learning_rate": 9.800202853729651e-06, |
| "loss": 0.1575, |
| "num_input_tokens_seen": 404416, |
| "step": 2655 |
| }, |
| { |
| "epoch": 14.777777777777779, |
| "grad_norm": 2.117347240447998, |
| "learning_rate": 9.704153225562171e-06, |
| "loss": 0.1467, |
| "num_input_tokens_seen": 405184, |
| "step": 2660 |
| }, |
| { |
| "epoch": 14.805555555555555, |
| "grad_norm": 2.9187674522399902, |
| "learning_rate": 9.608463116858542e-06, |
| "loss": 0.2817, |
| "num_input_tokens_seen": 405952, |
| "step": 2665 |
| }, |
| { |
| "epoch": 14.833333333333334, |
| "grad_norm": 3.9938457012176514, |
| "learning_rate": 9.51313477675588e-06, |
| "loss": 0.1923, |
| "num_input_tokens_seen": 406688, |
| "step": 2670 |
| }, |
| { |
| "epoch": 14.86111111111111, |
| "grad_norm": 5.589292049407959, |
| "learning_rate": 9.418170445888139e-06, |
| "loss": 0.1637, |
| "num_input_tokens_seen": 407440, |
| "step": 2675 |
| }, |
| { |
| "epoch": 14.88888888888889, |
| "grad_norm": 1.3871015310287476, |
| "learning_rate": 9.323572356333454e-06, |
| "loss": 0.1199, |
| "num_input_tokens_seen": 408192, |
| "step": 2680 |
| }, |
| { |
| "epoch": 14.916666666666666, |
| "grad_norm": 1.9564611911773682, |
| "learning_rate": 9.22934273156172e-06, |
| "loss": 0.1095, |
| "num_input_tokens_seen": 408944, |
| "step": 2685 |
| }, |
| { |
| "epoch": 14.944444444444445, |
| "grad_norm": 4.645902633666992, |
| "learning_rate": 9.135483786382262e-06, |
| "loss": 0.1473, |
| "num_input_tokens_seen": 409680, |
| "step": 2690 |
| }, |
| { |
| "epoch": 14.972222222222221, |
| "grad_norm": 4.054070949554443, |
| "learning_rate": 9.0419977268918e-06, |
| "loss": 0.2314, |
| "num_input_tokens_seen": 410464, |
| "step": 2695 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.3633830547332764, |
| "learning_rate": 8.948886750422636e-06, |
| "loss": 0.1791, |
| "num_input_tokens_seen": 411216, |
| "step": 2700 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.2790451645851135, |
| "eval_runtime": 0.87, |
| "eval_samples_per_second": 45.976, |
| "eval_steps_per_second": 22.988, |
| "num_input_tokens_seen": 411216, |
| "step": 2700 |
| }, |
| { |
| "epoch": 15.027777777777779, |
| "grad_norm": 1.7929856777191162, |
| "learning_rate": 8.856153045490948e-06, |
| "loss": 0.1193, |
| "num_input_tokens_seen": 411984, |
| "step": 2705 |
| }, |
| { |
| "epoch": 15.055555555555555, |
| "grad_norm": 2.6607961654663086, |
| "learning_rate": 8.763798791745411e-06, |
| "loss": 0.1421, |
| "num_input_tokens_seen": 412768, |
| "step": 2710 |
| }, |
| { |
| "epoch": 15.083333333333334, |
| "grad_norm": 2.3993401527404785, |
| "learning_rate": 8.671826159915907e-06, |
| "loss": 0.1301, |
| "num_input_tokens_seen": 413552, |
| "step": 2715 |
| }, |
| { |
| "epoch": 15.11111111111111, |
| "grad_norm": 4.58685302734375, |
| "learning_rate": 8.58023731176254e-06, |
| "loss": 0.1253, |
| "num_input_tokens_seen": 414288, |
| "step": 2720 |
| }, |
| { |
| "epoch": 15.13888888888889, |
| "grad_norm": 3.337277889251709, |
| "learning_rate": 8.489034400024812e-06, |
| "loss": 0.0808, |
| "num_input_tokens_seen": 415056, |
| "step": 2725 |
| }, |
| { |
| "epoch": 15.166666666666666, |
| "grad_norm": 1.4871830940246582, |
| "learning_rate": 8.39821956837102e-06, |
| "loss": 0.2168, |
| "num_input_tokens_seen": 415824, |
| "step": 2730 |
| }, |
| { |
| "epoch": 15.194444444444445, |
| "grad_norm": 4.174405574798584, |
| "learning_rate": 8.3077949513479e-06, |
| "loss": 0.1412, |
| "num_input_tokens_seen": 416592, |
| "step": 2735 |
| }, |
| { |
| "epoch": 15.222222222222221, |
| "grad_norm": 5.589694499969482, |
| "learning_rate": 8.217762674330413e-06, |
| "loss": 0.1869, |
| "num_input_tokens_seen": 417344, |
| "step": 2740 |
| }, |
| { |
| "epoch": 15.25, |
| "grad_norm": 3.3008785247802734, |
| "learning_rate": 8.128124853471814e-06, |
| "loss": 0.0887, |
| "num_input_tokens_seen": 418096, |
| "step": 2745 |
| }, |
| { |
| "epoch": 15.277777777777779, |
| "grad_norm": 5.699565410614014, |
| "learning_rate": 8.03888359565391e-06, |
| "loss": 0.1541, |
| "num_input_tokens_seen": 418816, |
| "step": 2750 |
| }, |
| { |
| "epoch": 15.305555555555555, |
| "grad_norm": 2.3659095764160156, |
| "learning_rate": 7.950040998437542e-06, |
| "loss": 0.218, |
| "num_input_tokens_seen": 419568, |
| "step": 2755 |
| }, |
| { |
| "epoch": 15.333333333333334, |
| "grad_norm": 1.412232518196106, |
| "learning_rate": 7.86159915001326e-06, |
| "loss": 0.0468, |
| "num_input_tokens_seen": 420336, |
| "step": 2760 |
| }, |
| { |
| "epoch": 15.36111111111111, |
| "grad_norm": 2.961951971054077, |
| "learning_rate": 7.7735601291523e-06, |
| "loss": 0.077, |
| "num_input_tokens_seen": 421088, |
| "step": 2765 |
| }, |
| { |
| "epoch": 15.38888888888889, |
| "grad_norm": 1.9995497465133667, |
| "learning_rate": 7.685926005157651e-06, |
| "loss": 0.0845, |
| "num_input_tokens_seen": 421872, |
| "step": 2770 |
| }, |
| { |
| "epoch": 15.416666666666666, |
| "grad_norm": 7.176272869110107, |
| "learning_rate": 7.598698837815449e-06, |
| "loss": 0.0801, |
| "num_input_tokens_seen": 422640, |
| "step": 2775 |
| }, |
| { |
| "epoch": 15.444444444444445, |
| "grad_norm": 11.84694766998291, |
| "learning_rate": 7.511880677346578e-06, |
| "loss": 0.1608, |
| "num_input_tokens_seen": 423392, |
| "step": 2780 |
| }, |
| { |
| "epoch": 15.472222222222221, |
| "grad_norm": 7.272697925567627, |
| "learning_rate": 7.4254735643584564e-06, |
| "loss": 0.1564, |
| "num_input_tokens_seen": 424160, |
| "step": 2785 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 8.075713157653809, |
| "learning_rate": 7.339479529797111e-06, |
| "loss": 0.0772, |
| "num_input_tokens_seen": 424928, |
| "step": 2790 |
| }, |
| { |
| "epoch": 15.527777777777779, |
| "grad_norm": 1.1608163118362427, |
| "learning_rate": 7.2539005948993825e-06, |
| "loss": 0.211, |
| "num_input_tokens_seen": 425664, |
| "step": 2795 |
| }, |
| { |
| "epoch": 15.555555555555555, |
| "grad_norm": 11.11883544921875, |
| "learning_rate": 7.168738771145464e-06, |
| "loss": 0.242, |
| "num_input_tokens_seen": 426416, |
| "step": 2800 |
| }, |
| { |
| "epoch": 15.583333333333334, |
| "grad_norm": 14.707919120788574, |
| "learning_rate": 7.083996060211607e-06, |
| "loss": 0.2303, |
| "num_input_tokens_seen": 427184, |
| "step": 2805 |
| }, |
| { |
| "epoch": 15.61111111111111, |
| "grad_norm": 6.5854973793029785, |
| "learning_rate": 6.9996744539230665e-06, |
| "loss": 0.054, |
| "num_input_tokens_seen": 427952, |
| "step": 2810 |
| }, |
| { |
| "epoch": 15.63888888888889, |
| "grad_norm": 2.7222070693969727, |
| "learning_rate": 6.9157759342072995e-06, |
| "loss": 0.0612, |
| "num_input_tokens_seen": 428720, |
| "step": 2815 |
| }, |
| { |
| "epoch": 15.666666666666666, |
| "grad_norm": 5.319427967071533, |
| "learning_rate": 6.832302473047384e-06, |
| "loss": 0.112, |
| "num_input_tokens_seen": 429472, |
| "step": 2820 |
| }, |
| { |
| "epoch": 15.694444444444445, |
| "grad_norm": 3.4745731353759766, |
| "learning_rate": 6.7492560324356355e-06, |
| "loss": 0.1822, |
| "num_input_tokens_seen": 430224, |
| "step": 2825 |
| }, |
| { |
| "epoch": 15.722222222222221, |
| "grad_norm": 1.6407183408737183, |
| "learning_rate": 6.666638564327532e-06, |
| "loss": 0.1439, |
| "num_input_tokens_seen": 430960, |
| "step": 2830 |
| }, |
| { |
| "epoch": 15.75, |
| "grad_norm": 9.03652572631836, |
| "learning_rate": 6.584452010595807e-06, |
| "loss": 0.1255, |
| "num_input_tokens_seen": 431776, |
| "step": 2835 |
| }, |
| { |
| "epoch": 15.777777777777779, |
| "grad_norm": 4.16492223739624, |
| "learning_rate": 6.502698302984811e-06, |
| "loss": 0.1593, |
| "num_input_tokens_seen": 432512, |
| "step": 2840 |
| }, |
| { |
| "epoch": 15.805555555555555, |
| "grad_norm": 1.4153860807418823, |
| "learning_rate": 6.421379363065142e-06, |
| "loss": 0.0567, |
| "num_input_tokens_seen": 433296, |
| "step": 2845 |
| }, |
| { |
| "epoch": 15.833333333333334, |
| "grad_norm": 0.6895349621772766, |
| "learning_rate": 6.340497102188425e-06, |
| "loss": 0.0864, |
| "num_input_tokens_seen": 434080, |
| "step": 2850 |
| }, |
| { |
| "epoch": 15.86111111111111, |
| "grad_norm": 3.851112127304077, |
| "learning_rate": 6.26005342144241e-06, |
| "loss": 0.0645, |
| "num_input_tokens_seen": 434832, |
| "step": 2855 |
| }, |
| { |
| "epoch": 15.88888888888889, |
| "grad_norm": 4.547049045562744, |
| "learning_rate": 6.180050211606303e-06, |
| "loss": 0.1454, |
| "num_input_tokens_seen": 435600, |
| "step": 2860 |
| }, |
| { |
| "epoch": 15.916666666666666, |
| "grad_norm": 0.9072476625442505, |
| "learning_rate": 6.100489353106304e-06, |
| "loss": 0.0997, |
| "num_input_tokens_seen": 436320, |
| "step": 2865 |
| }, |
| { |
| "epoch": 15.944444444444445, |
| "grad_norm": 2.3731799125671387, |
| "learning_rate": 6.021372715971437e-06, |
| "loss": 0.1351, |
| "num_input_tokens_seen": 437072, |
| "step": 2870 |
| }, |
| { |
| "epoch": 15.972222222222221, |
| "grad_norm": 3.2382946014404297, |
| "learning_rate": 5.942702159789554e-06, |
| "loss": 0.1, |
| "num_input_tokens_seen": 437824, |
| "step": 2875 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 6.139621257781982, |
| "learning_rate": 5.864479533663655e-06, |
| "loss": 0.1126, |
| "num_input_tokens_seen": 438592, |
| "step": 2880 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.3588094711303711, |
| "eval_runtime": 0.8659, |
| "eval_samples_per_second": 46.195, |
| "eval_steps_per_second": 23.098, |
| "num_input_tokens_seen": 438592, |
| "step": 2880 |
| }, |
| { |
| "epoch": 16.02777777777778, |
| "grad_norm": 1.339462161064148, |
| "learning_rate": 5.786706676168424e-06, |
| "loss": 0.0558, |
| "num_input_tokens_seen": 439360, |
| "step": 2885 |
| }, |
| { |
| "epoch": 16.055555555555557, |
| "grad_norm": 1.4482659101486206, |
| "learning_rate": 5.709385415307006e-06, |
| "loss": 0.0721, |
| "num_input_tokens_seen": 440144, |
| "step": 2890 |
| }, |
| { |
| "epoch": 16.083333333333332, |
| "grad_norm": 1.2527096271514893, |
| "learning_rate": 5.6325175684680374e-06, |
| "loss": 0.0536, |
| "num_input_tokens_seen": 440880, |
| "step": 2895 |
| }, |
| { |
| "epoch": 16.11111111111111, |
| "grad_norm": 0.44911694526672363, |
| "learning_rate": 5.556104942382964e-06, |
| "loss": 0.0441, |
| "num_input_tokens_seen": 441632, |
| "step": 2900 |
| }, |
| { |
| "epoch": 16.13888888888889, |
| "grad_norm": 2.401498556137085, |
| "learning_rate": 5.48014933308352e-06, |
| "loss": 0.1571, |
| "num_input_tokens_seen": 442384, |
| "step": 2905 |
| }, |
| { |
| "epoch": 16.166666666666668, |
| "grad_norm": 3.040595531463623, |
| "learning_rate": 5.404652525859552e-06, |
| "loss": 0.0203, |
| "num_input_tokens_seen": 443184, |
| "step": 2910 |
| }, |
| { |
| "epoch": 16.194444444444443, |
| "grad_norm": 0.29359209537506104, |
| "learning_rate": 5.329616295217046e-06, |
| "loss": 0.0661, |
| "num_input_tokens_seen": 443952, |
| "step": 2915 |
| }, |
| { |
| "epoch": 16.22222222222222, |
| "grad_norm": 2.6835484504699707, |
| "learning_rate": 5.2550424048364185e-06, |
| "loss": 0.0856, |
| "num_input_tokens_seen": 444720, |
| "step": 2920 |
| }, |
| { |
| "epoch": 16.25, |
| "grad_norm": 2.9343628883361816, |
| "learning_rate": 5.180932607531056e-06, |
| "loss": 0.053, |
| "num_input_tokens_seen": 445504, |
| "step": 2925 |
| }, |
| { |
| "epoch": 16.27777777777778, |
| "grad_norm": 1.0284254550933838, |
| "learning_rate": 5.107288645206149e-06, |
| "loss": 0.0551, |
| "num_input_tokens_seen": 446240, |
| "step": 2930 |
| }, |
| { |
| "epoch": 16.305555555555557, |
| "grad_norm": 3.6044394969940186, |
| "learning_rate": 5.034112248817685e-06, |
| "loss": 0.0884, |
| "num_input_tokens_seen": 446992, |
| "step": 2935 |
| }, |
| { |
| "epoch": 16.333333333333332, |
| "grad_norm": 0.7247748374938965, |
| "learning_rate": 4.961405138331826e-06, |
| "loss": 0.057, |
| "num_input_tokens_seen": 447760, |
| "step": 2940 |
| }, |
| { |
| "epoch": 16.36111111111111, |
| "grad_norm": 2.632530927658081, |
| "learning_rate": 4.88916902268445e-06, |
| "loss": 0.0696, |
| "num_input_tokens_seen": 448528, |
| "step": 2945 |
| }, |
| { |
| "epoch": 16.38888888888889, |
| "grad_norm": 0.6444573402404785, |
| "learning_rate": 4.817405599741004e-06, |
| "loss": 0.0385, |
| "num_input_tokens_seen": 449312, |
| "step": 2950 |
| }, |
| { |
| "epoch": 16.416666666666668, |
| "grad_norm": 17.892915725708008, |
| "learning_rate": 4.746116556256569e-06, |
| "loss": 0.1573, |
| "num_input_tokens_seen": 450064, |
| "step": 2955 |
| }, |
| { |
| "epoch": 16.444444444444443, |
| "grad_norm": 1.6689417362213135, |
| "learning_rate": 4.6753035678362314e-06, |
| "loss": 0.1832, |
| "num_input_tokens_seen": 450800, |
| "step": 2960 |
| }, |
| { |
| "epoch": 16.47222222222222, |
| "grad_norm": 0.3151831328868866, |
| "learning_rate": 4.604968298895703e-06, |
| "loss": 0.0241, |
| "num_input_tokens_seen": 451536, |
| "step": 2965 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 0.10785870254039764, |
| "learning_rate": 4.535112402622185e-06, |
| "loss": 0.0329, |
| "num_input_tokens_seen": 452320, |
| "step": 2970 |
| }, |
| { |
| "epoch": 16.52777777777778, |
| "grad_norm": 13.819798469543457, |
| "learning_rate": 4.465737520935517e-06, |
| "loss": 0.0773, |
| "num_input_tokens_seen": 453072, |
| "step": 2975 |
| }, |
| { |
| "epoch": 16.555555555555557, |
| "grad_norm": 11.992451667785645, |
| "learning_rate": 4.396845284449608e-06, |
| "loss": 0.095, |
| "num_input_tokens_seen": 453856, |
| "step": 2980 |
| }, |
| { |
| "epoch": 16.583333333333332, |
| "grad_norm": 1.3498435020446777, |
| "learning_rate": 4.328437312434067e-06, |
| "loss": 0.0995, |
| "num_input_tokens_seen": 454624, |
| "step": 2985 |
| }, |
| { |
| "epoch": 16.61111111111111, |
| "grad_norm": 1.9748262166976929, |
| "learning_rate": 4.2605152127761675e-06, |
| "loss": 0.0575, |
| "num_input_tokens_seen": 455360, |
| "step": 2990 |
| }, |
| { |
| "epoch": 16.63888888888889, |
| "grad_norm": 0.5040590763092041, |
| "learning_rate": 4.19308058194306e-06, |
| "loss": 0.0243, |
| "num_input_tokens_seen": 456096, |
| "step": 2995 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 15.647078514099121, |
| "learning_rate": 4.126135004944231e-06, |
| "loss": 0.0482, |
| "num_input_tokens_seen": 456848, |
| "step": 3000 |
| }, |
| { |
| "epoch": 16.694444444444443, |
| "grad_norm": 13.681388854980469, |
| "learning_rate": 4.059680055294266e-06, |
| "loss": 0.1086, |
| "num_input_tokens_seen": 457616, |
| "step": 3005 |
| }, |
| { |
| "epoch": 16.72222222222222, |
| "grad_norm": 1.3664271831512451, |
| "learning_rate": 3.993717294975863e-06, |
| "loss": 0.0971, |
| "num_input_tokens_seen": 458384, |
| "step": 3010 |
| }, |
| { |
| "epoch": 16.75, |
| "grad_norm": 0.569031834602356, |
| "learning_rate": 3.92824827440309e-06, |
| "loss": 0.0227, |
| "num_input_tokens_seen": 459152, |
| "step": 3015 |
| }, |
| { |
| "epoch": 16.77777777777778, |
| "grad_norm": 0.708084762096405, |
| "learning_rate": 3.863274532384981e-06, |
| "loss": 0.0432, |
| "num_input_tokens_seen": 459920, |
| "step": 3020 |
| }, |
| { |
| "epoch": 16.805555555555557, |
| "grad_norm": 0.2141667604446411, |
| "learning_rate": 3.798797596089351e-06, |
| "loss": 0.127, |
| "num_input_tokens_seen": 460672, |
| "step": 3025 |
| }, |
| { |
| "epoch": 16.833333333333332, |
| "grad_norm": 0.05808110535144806, |
| "learning_rate": 3.73481898100691e-06, |
| "loss": 0.0333, |
| "num_input_tokens_seen": 461472, |
| "step": 3030 |
| }, |
| { |
| "epoch": 16.86111111111111, |
| "grad_norm": 34.014705657958984, |
| "learning_rate": 3.6713401909156204e-06, |
| "loss": 0.1195, |
| "num_input_tokens_seen": 462224, |
| "step": 3035 |
| }, |
| { |
| "epoch": 16.88888888888889, |
| "grad_norm": 6.969618320465088, |
| "learning_rate": 3.608362717845376e-06, |
| "loss": 0.1465, |
| "num_input_tokens_seen": 462976, |
| "step": 3040 |
| }, |
| { |
| "epoch": 16.916666666666668, |
| "grad_norm": 27.208003997802734, |
| "learning_rate": 3.5458880420429135e-06, |
| "loss": 0.3196, |
| "num_input_tokens_seen": 463712, |
| "step": 3045 |
| }, |
| { |
| "epoch": 16.944444444444443, |
| "grad_norm": 23.051929473876953, |
| "learning_rate": 3.4839176319370394e-06, |
| "loss": 0.0664, |
| "num_input_tokens_seen": 464480, |
| "step": 3050 |
| }, |
| { |
| "epoch": 16.97222222222222, |
| "grad_norm": 23.996009826660156, |
| "learning_rate": 3.4224529441040904e-06, |
| "loss": 0.1817, |
| "num_input_tokens_seen": 465248, |
| "step": 3055 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 7.2588791847229, |
| "learning_rate": 3.3614954232337374e-06, |
| "loss": 0.021, |
| "num_input_tokens_seen": 465984, |
| "step": 3060 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.48010167479515076, |
| "eval_runtime": 0.8596, |
| "eval_samples_per_second": 46.535, |
| "eval_steps_per_second": 23.267, |
| "num_input_tokens_seen": 465984, |
| "step": 3060 |
| }, |
| { |
| "epoch": 17.02777777777778, |
| "grad_norm": 0.5085855722427368, |
| "learning_rate": 3.3010465020949818e-06, |
| "loss": 0.0262, |
| "num_input_tokens_seen": 466768, |
| "step": 3065 |
| }, |
| { |
| "epoch": 17.055555555555557, |
| "grad_norm": 0.024025170132517815, |
| "learning_rate": 3.2411076015025075e-06, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 467568, |
| "step": 3070 |
| }, |
| { |
| "epoch": 17.083333333333332, |
| "grad_norm": 0.26111242175102234, |
| "learning_rate": 3.1816801302832848e-06, |
| "loss": 0.0135, |
| "num_input_tokens_seen": 468352, |
| "step": 3075 |
| }, |
| { |
| "epoch": 17.11111111111111, |
| "grad_norm": 2.5764732360839844, |
| "learning_rate": 3.1227654852434454e-06, |
| "loss": 0.0125, |
| "num_input_tokens_seen": 469120, |
| "step": 3080 |
| }, |
| { |
| "epoch": 17.13888888888889, |
| "grad_norm": 29.602319717407227, |
| "learning_rate": 3.0643650511354484e-06, |
| "loss": 0.1227, |
| "num_input_tokens_seen": 469888, |
| "step": 3085 |
| }, |
| { |
| "epoch": 17.166666666666668, |
| "grad_norm": 0.05500046908855438, |
| "learning_rate": 3.006480200625572e-06, |
| "loss": 0.0542, |
| "num_input_tokens_seen": 470688, |
| "step": 3090 |
| }, |
| { |
| "epoch": 17.194444444444443, |
| "grad_norm": 1.0007386207580566, |
| "learning_rate": 2.949112294261591e-06, |
| "loss": 0.0108, |
| "num_input_tokens_seen": 471472, |
| "step": 3095 |
| }, |
| { |
| "epoch": 17.22222222222222, |
| "grad_norm": 0.1278330683708191, |
| "learning_rate": 2.89226268044083e-06, |
| "loss": 0.0025, |
| "num_input_tokens_seen": 472208, |
| "step": 3100 |
| }, |
| { |
| "epoch": 17.25, |
| "grad_norm": 1.1112349033355713, |
| "learning_rate": 2.8359326953784737e-06, |
| "loss": 0.015, |
| "num_input_tokens_seen": 472960, |
| "step": 3105 |
| }, |
| { |
| "epoch": 17.27777777777778, |
| "grad_norm": 0.07920917123556137, |
| "learning_rate": 2.780123663076142e-06, |
| "loss": 0.0053, |
| "num_input_tokens_seen": 473744, |
| "step": 3110 |
| }, |
| { |
| "epoch": 17.305555555555557, |
| "grad_norm": 2.448739767074585, |
| "learning_rate": 2.7248368952908053e-06, |
| "loss": 0.0203, |
| "num_input_tokens_seen": 474544, |
| "step": 3115 |
| }, |
| { |
| "epoch": 17.333333333333332, |
| "grad_norm": 0.1579727977514267, |
| "learning_rate": 2.670073691503902e-06, |
| "loss": 0.0181, |
| "num_input_tokens_seen": 475280, |
| "step": 3120 |
| }, |
| { |
| "epoch": 17.36111111111111, |
| "grad_norm": 0.3010871112346649, |
| "learning_rate": 2.6158353388908293e-06, |
| "loss": 0.0453, |
| "num_input_tokens_seen": 476048, |
| "step": 3125 |
| }, |
| { |
| "epoch": 17.38888888888889, |
| "grad_norm": 31.98306655883789, |
| "learning_rate": 2.5621231122906873e-06, |
| "loss": 0.0406, |
| "num_input_tokens_seen": 476800, |
| "step": 3130 |
| }, |
| { |
| "epoch": 17.416666666666668, |
| "grad_norm": 2.9089503288269043, |
| "learning_rate": 2.5089382741762925e-06, |
| "loss": 0.0167, |
| "num_input_tokens_seen": 477568, |
| "step": 3135 |
| }, |
| { |
| "epoch": 17.444444444444443, |
| "grad_norm": 4.048463821411133, |
| "learning_rate": 2.4562820746245386e-06, |
| "loss": 0.0783, |
| "num_input_tokens_seen": 478288, |
| "step": 3140 |
| }, |
| { |
| "epoch": 17.47222222222222, |
| "grad_norm": 6.972721576690674, |
| "learning_rate": 2.4041557512869878e-06, |
| "loss": 0.0294, |
| "num_input_tokens_seen": 479056, |
| "step": 3145 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 14.101398468017578, |
| "learning_rate": 2.3525605293607784e-06, |
| "loss": 0.0499, |
| "num_input_tokens_seen": 479824, |
| "step": 3150 |
| }, |
| { |
| "epoch": 17.52777777777778, |
| "grad_norm": 0.31221017241477966, |
| "learning_rate": 2.3014976215598503e-06, |
| "loss": 0.1516, |
| "num_input_tokens_seen": 480560, |
| "step": 3155 |
| }, |
| { |
| "epoch": 17.555555555555557, |
| "grad_norm": 0.8303629159927368, |
| "learning_rate": 2.2509682280864224e-06, |
| "loss": 0.0756, |
| "num_input_tokens_seen": 481360, |
| "step": 3160 |
| }, |
| { |
| "epoch": 17.583333333333332, |
| "grad_norm": 0.35569193959236145, |
| "learning_rate": 2.2009735366027795e-06, |
| "loss": 0.0031, |
| "num_input_tokens_seen": 482112, |
| "step": 3165 |
| }, |
| { |
| "epoch": 17.61111111111111, |
| "grad_norm": 0.22250580787658691, |
| "learning_rate": 2.151514722203385e-06, |
| "loss": 0.0528, |
| "num_input_tokens_seen": 482880, |
| "step": 3170 |
| }, |
| { |
| "epoch": 17.63888888888889, |
| "grad_norm": 0.6709349155426025, |
| "learning_rate": 2.1025929473872274e-06, |
| "loss": 0.0307, |
| "num_input_tokens_seen": 483632, |
| "step": 3175 |
| }, |
| { |
| "epoch": 17.666666666666668, |
| "grad_norm": 1.0654515027999878, |
| "learning_rate": 2.0542093620305042e-06, |
| "loss": 0.0148, |
| "num_input_tokens_seen": 484400, |
| "step": 3180 |
| }, |
| { |
| "epoch": 17.694444444444443, |
| "grad_norm": 10.747461318969727, |
| "learning_rate": 2.0063651033596143e-06, |
| "loss": 0.0472, |
| "num_input_tokens_seen": 485152, |
| "step": 3185 |
| }, |
| { |
| "epoch": 17.72222222222222, |
| "grad_norm": 0.02796984277665615, |
| "learning_rate": 1.9590612959244055e-06, |
| "loss": 0.0115, |
| "num_input_tokens_seen": 485904, |
| "step": 3190 |
| }, |
| { |
| "epoch": 17.75, |
| "grad_norm": 0.09192727506160736, |
| "learning_rate": 1.912299051571764e-06, |
| "loss": 0.1136, |
| "num_input_tokens_seen": 486672, |
| "step": 3195 |
| }, |
| { |
| "epoch": 17.77777777777778, |
| "grad_norm": 0.13285285234451294, |
| "learning_rate": 1.8660794694194573e-06, |
| "loss": 0.2426, |
| "num_input_tokens_seen": 487440, |
| "step": 3200 |
| }, |
| { |
| "epoch": 17.805555555555557, |
| "grad_norm": 0.1345347911119461, |
| "learning_rate": 1.8204036358303173e-06, |
| "loss": 0.0049, |
| "num_input_tokens_seen": 488176, |
| "step": 3205 |
| }, |
| { |
| "epoch": 17.833333333333332, |
| "grad_norm": 5.462226390838623, |
| "learning_rate": 1.775272624386695e-06, |
| "loss": 0.0107, |
| "num_input_tokens_seen": 488944, |
| "step": 3210 |
| }, |
| { |
| "epoch": 17.86111111111111, |
| "grad_norm": 15.519821166992188, |
| "learning_rate": 1.7306874958652408e-06, |
| "loss": 0.038, |
| "num_input_tokens_seen": 489680, |
| "step": 3215 |
| }, |
| { |
| "epoch": 17.88888888888889, |
| "grad_norm": 3.9195749759674072, |
| "learning_rate": 1.686649298211951e-06, |
| "loss": 0.0462, |
| "num_input_tokens_seen": 490432, |
| "step": 3220 |
| }, |
| { |
| "epoch": 17.916666666666668, |
| "grad_norm": 24.604074478149414, |
| "learning_rate": 1.643159066517566e-06, |
| "loss": 0.0269, |
| "num_input_tokens_seen": 491184, |
| "step": 3225 |
| }, |
| { |
| "epoch": 17.944444444444443, |
| "grad_norm": 21.248931884765625, |
| "learning_rate": 1.6002178229932107e-06, |
| "loss": 0.0796, |
| "num_input_tokens_seen": 491952, |
| "step": 3230 |
| }, |
| { |
| "epoch": 17.97222222222222, |
| "grad_norm": 0.939923107624054, |
| "learning_rate": 1.5578265769463806e-06, |
| "loss": 0.0934, |
| "num_input_tokens_seen": 492720, |
| "step": 3235 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 4.183718204498291, |
| "learning_rate": 1.5159863247572236e-06, |
| "loss": 0.0091, |
| "num_input_tokens_seen": 493488, |
| "step": 3240 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.5633367300033569, |
| "eval_runtime": 0.8943, |
| "eval_samples_per_second": 44.726, |
| "eval_steps_per_second": 22.363, |
| "num_input_tokens_seen": 493488, |
| "step": 3240 |
| }, |
| { |
| "epoch": 18.02777777777778, |
| "grad_norm": 0.040560852736234665, |
| "learning_rate": 1.4746980498551112e-06, |
| "loss": 0.0059, |
| "num_input_tokens_seen": 494224, |
| "step": 3245 |
| }, |
| { |
| "epoch": 18.055555555555557, |
| "grad_norm": 7.204439640045166, |
| "learning_rate": 1.4339627226955392e-06, |
| "loss": 0.0083, |
| "num_input_tokens_seen": 494960, |
| "step": 3250 |
| }, |
| { |
| "epoch": 18.083333333333332, |
| "grad_norm": 0.05959023907780647, |
| "learning_rate": 1.3937813007373013e-06, |
| "loss": 0.0044, |
| "num_input_tokens_seen": 495712, |
| "step": 3255 |
| }, |
| { |
| "epoch": 18.11111111111111, |
| "grad_norm": 2.2561721801757812, |
| "learning_rate": 1.354154728419979e-06, |
| "loss": 0.0092, |
| "num_input_tokens_seen": 496480, |
| "step": 3260 |
| }, |
| { |
| "epoch": 18.13888888888889, |
| "grad_norm": 11.598490715026855, |
| "learning_rate": 1.31508393714177e-06, |
| "loss": 0.0193, |
| "num_input_tokens_seen": 497216, |
| "step": 3265 |
| }, |
| { |
| "epoch": 18.166666666666668, |
| "grad_norm": 2.7422096729278564, |
| "learning_rate": 1.276569845237574e-06, |
| "loss": 0.0046, |
| "num_input_tokens_seen": 497968, |
| "step": 3270 |
| }, |
| { |
| "epoch": 18.194444444444443, |
| "grad_norm": 0.027248982340097427, |
| "learning_rate": 1.2386133579574189e-06, |
| "loss": 0.0107, |
| "num_input_tokens_seen": 498704, |
| "step": 3275 |
| }, |
| { |
| "epoch": 18.22222222222222, |
| "grad_norm": 2.369640350341797, |
| "learning_rate": 1.2012153674451715e-06, |
| "loss": 0.1142, |
| "num_input_tokens_seen": 499440, |
| "step": 3280 |
| }, |
| { |
| "epoch": 18.25, |
| "grad_norm": 0.20769014954566956, |
| "learning_rate": 1.1643767527175857e-06, |
| "loss": 0.0073, |
| "num_input_tokens_seen": 500176, |
| "step": 3285 |
| }, |
| { |
| "epoch": 18.27777777777778, |
| "grad_norm": 0.37721478939056396, |
| "learning_rate": 1.1280983796436245e-06, |
| "loss": 0.1195, |
| "num_input_tokens_seen": 500928, |
| "step": 3290 |
| }, |
| { |
| "epoch": 18.305555555555557, |
| "grad_norm": 0.7713701128959656, |
| "learning_rate": 1.0923811009241142e-06, |
| "loss": 0.019, |
| "num_input_tokens_seen": 501696, |
| "step": 3295 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 8.40517807006836, |
| "learning_rate": 1.0572257560717086e-06, |
| "loss": 0.0174, |
| "num_input_tokens_seen": 502480, |
| "step": 3300 |
| }, |
| { |
| "epoch": 18.36111111111111, |
| "grad_norm": 0.6811085939407349, |
| "learning_rate": 1.0226331713911546e-06, |
| "loss": 0.0214, |
| "num_input_tokens_seen": 503264, |
| "step": 3305 |
| }, |
| { |
| "epoch": 18.38888888888889, |
| "grad_norm": 37.946937561035156, |
| "learning_rate": 9.886041599598606e-07, |
| "loss": 0.1155, |
| "num_input_tokens_seen": 504000, |
| "step": 3310 |
| }, |
| { |
| "epoch": 18.416666666666668, |
| "grad_norm": 1.640047311782837, |
| "learning_rate": 9.551395216087944e-07, |
| "loss": 0.0446, |
| "num_input_tokens_seen": 504736, |
| "step": 3315 |
| }, |
| { |
| "epoch": 18.444444444444443, |
| "grad_norm": 0.6278148889541626, |
| "learning_rate": 9.222400429036854e-07, |
| "loss": 0.006, |
| "num_input_tokens_seen": 505504, |
| "step": 3320 |
| }, |
| { |
| "epoch": 18.47222222222222, |
| "grad_norm": 1.218808650970459, |
| "learning_rate": 8.899064971265276e-07, |
| "loss": 0.0058, |
| "num_input_tokens_seen": 506256, |
| "step": 3325 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 0.3586307764053345, |
| "learning_rate": 8.581396442574135e-07, |
| "loss": 0.0033, |
| "num_input_tokens_seen": 507024, |
| "step": 3330 |
| }, |
| { |
| "epoch": 18.52777777777778, |
| "grad_norm": 0.11738912761211395, |
| "learning_rate": 8.269402309566743e-07, |
| "loss": 0.0016, |
| "num_input_tokens_seen": 507808, |
| "step": 3335 |
| }, |
| { |
| "epoch": 18.555555555555557, |
| "grad_norm": 0.5308789610862732, |
| "learning_rate": 7.963089905473092e-07, |
| "loss": 0.0013, |
| "num_input_tokens_seen": 508560, |
| "step": 3340 |
| }, |
| { |
| "epoch": 18.583333333333332, |
| "grad_norm": 34.2119140625, |
| "learning_rate": 7.662466429977699e-07, |
| "loss": 0.0493, |
| "num_input_tokens_seen": 509312, |
| "step": 3345 |
| }, |
| { |
| "epoch": 18.61111111111111, |
| "grad_norm": 0.14879710972309113, |
| "learning_rate": 7.367538949050345e-07, |
| "loss": 0.0152, |
| "num_input_tokens_seen": 510064, |
| "step": 3350 |
| }, |
| { |
| "epoch": 18.63888888888889, |
| "grad_norm": 12.18697452545166, |
| "learning_rate": 7.078314394779961e-07, |
| "loss": 0.0157, |
| "num_input_tokens_seen": 510832, |
| "step": 3355 |
| }, |
| { |
| "epoch": 18.666666666666668, |
| "grad_norm": 0.2048877626657486, |
| "learning_rate": 6.794799565211646e-07, |
| "loss": 0.0163, |
| "num_input_tokens_seen": 511616, |
| "step": 3360 |
| }, |
| { |
| "epoch": 18.694444444444443, |
| "grad_norm": 0.5249136686325073, |
| "learning_rate": 6.517001124186989e-07, |
| "loss": 0.0062, |
| "num_input_tokens_seen": 512400, |
| "step": 3365 |
| }, |
| { |
| "epoch": 18.72222222222222, |
| "grad_norm": 0.056645072996616364, |
| "learning_rate": 6.244925601187363e-07, |
| "loss": 0.0011, |
| "num_input_tokens_seen": 513152, |
| "step": 3370 |
| }, |
| { |
| "epoch": 18.75, |
| "grad_norm": 0.7716118693351746, |
| "learning_rate": 5.978579391180461e-07, |
| "loss": 0.0056, |
| "num_input_tokens_seen": 513888, |
| "step": 3375 |
| }, |
| { |
| "epoch": 18.77777777777778, |
| "grad_norm": 0.3327917754650116, |
| "learning_rate": 5.717968754469977e-07, |
| "loss": 0.0016, |
| "num_input_tokens_seen": 514688, |
| "step": 3380 |
| }, |
| { |
| "epoch": 18.805555555555557, |
| "grad_norm": 2.8160390853881836, |
| "learning_rate": 5.463099816548579e-07, |
| "loss": 0.0102, |
| "num_input_tokens_seen": 515440, |
| "step": 3385 |
| }, |
| { |
| "epoch": 18.833333333333332, |
| "grad_norm": 0.10132934898138046, |
| "learning_rate": 5.213978567953775e-07, |
| "loss": 0.0067, |
| "num_input_tokens_seen": 516224, |
| "step": 3390 |
| }, |
| { |
| "epoch": 18.86111111111111, |
| "grad_norm": 1.5919185876846313, |
| "learning_rate": 4.970610864127173e-07, |
| "loss": 0.0061, |
| "num_input_tokens_seen": 516992, |
| "step": 3395 |
| }, |
| { |
| "epoch": 18.88888888888889, |
| "grad_norm": 1.101412057876587, |
| "learning_rate": 4.7330024252768555e-07, |
| "loss": 0.0037, |
| "num_input_tokens_seen": 517792, |
| "step": 3400 |
| }, |
| { |
| "epoch": 18.916666666666668, |
| "grad_norm": 12.524344444274902, |
| "learning_rate": 4.5011588362429134e-07, |
| "loss": 0.1769, |
| "num_input_tokens_seen": 518528, |
| "step": 3405 |
| }, |
| { |
| "epoch": 18.944444444444443, |
| "grad_norm": 0.13883286714553833, |
| "learning_rate": 4.2750855463662143e-07, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 519296, |
| "step": 3410 |
| }, |
| { |
| "epoch": 18.97222222222222, |
| "grad_norm": 0.5751574635505676, |
| "learning_rate": 4.05478786936031e-07, |
| "loss": 0.0077, |
| "num_input_tokens_seen": 520064, |
| "step": 3415 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.2156560719013214, |
| "learning_rate": 3.8402709831865113e-07, |
| "loss": 0.0818, |
| "num_input_tokens_seen": 520816, |
| "step": 3420 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.5927966833114624, |
| "eval_runtime": 0.8667, |
| "eval_samples_per_second": 46.152, |
| "eval_steps_per_second": 23.076, |
| "num_input_tokens_seen": 520816, |
| "step": 3420 |
| }, |
| { |
| "epoch": 19.02777777777778, |
| "grad_norm": 0.09093061834573746, |
| "learning_rate": 3.6315399299321484e-07, |
| "loss": 0.0027, |
| "num_input_tokens_seen": 521584, |
| "step": 3425 |
| }, |
| { |
| "epoch": 19.055555555555557, |
| "grad_norm": 0.22736388444900513, |
| "learning_rate": 3.428599615692141e-07, |
| "loss": 0.1626, |
| "num_input_tokens_seen": 522368, |
| "step": 3430 |
| }, |
| { |
| "epoch": 19.083333333333332, |
| "grad_norm": 0.39018547534942627, |
| "learning_rate": 3.2314548104537545e-07, |
| "loss": 0.0023, |
| "num_input_tokens_seen": 523168, |
| "step": 3435 |
| }, |
| { |
| "epoch": 19.11111111111111, |
| "grad_norm": 18.854568481445312, |
| "learning_rate": 3.040110147984221e-07, |
| "loss": 0.0252, |
| "num_input_tokens_seen": 523936, |
| "step": 3440 |
| }, |
| { |
| "epoch": 19.13888888888889, |
| "grad_norm": 1.008324384689331, |
| "learning_rate": 2.8545701257221e-07, |
| "loss": 0.0513, |
| "num_input_tokens_seen": 524704, |
| "step": 3445 |
| }, |
| { |
| "epoch": 19.166666666666668, |
| "grad_norm": 1.5237793922424316, |
| "learning_rate": 2.674839104671367e-07, |
| "loss": 0.0035, |
| "num_input_tokens_seen": 525440, |
| "step": 3450 |
| }, |
| { |
| "epoch": 19.194444444444443, |
| "grad_norm": 7.896567344665527, |
| "learning_rate": 2.5009213092991034e-07, |
| "loss": 0.009, |
| "num_input_tokens_seen": 526192, |
| "step": 3455 |
| }, |
| { |
| "epoch": 19.22222222222222, |
| "grad_norm": 1.473685383796692, |
| "learning_rate": 2.3328208274359942e-07, |
| "loss": 0.0075, |
| "num_input_tokens_seen": 526944, |
| "step": 3460 |
| }, |
| { |
| "epoch": 19.25, |
| "grad_norm": 0.9116981029510498, |
| "learning_rate": 2.170541610180432e-07, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 527696, |
| "step": 3465 |
| }, |
| { |
| "epoch": 19.27777777777778, |
| "grad_norm": 0.41270574927330017, |
| "learning_rate": 2.014087471805509e-07, |
| "loss": 0.0029, |
| "num_input_tokens_seen": 528480, |
| "step": 3470 |
| }, |
| { |
| "epoch": 19.305555555555557, |
| "grad_norm": 0.1433163583278656, |
| "learning_rate": 1.8634620896695043e-07, |
| "loss": 0.0072, |
| "num_input_tokens_seen": 529248, |
| "step": 3475 |
| }, |
| { |
| "epoch": 19.333333333333332, |
| "grad_norm": 0.15605846047401428, |
| "learning_rate": 1.7186690041292586e-07, |
| "loss": 0.0018, |
| "num_input_tokens_seen": 530016, |
| "step": 3480 |
| }, |
| { |
| "epoch": 19.36111111111111, |
| "grad_norm": 0.1406908482313156, |
| "learning_rate": 1.5797116184571304e-07, |
| "loss": 0.0104, |
| "num_input_tokens_seen": 530768, |
| "step": 3485 |
| }, |
| { |
| "epoch": 19.38888888888889, |
| "grad_norm": 1.887284278869629, |
| "learning_rate": 1.4465931987609482e-07, |
| "loss": 0.004, |
| "num_input_tokens_seen": 531520, |
| "step": 3490 |
| }, |
| { |
| "epoch": 19.416666666666668, |
| "grad_norm": 0.1923336386680603, |
| "learning_rate": 1.319316873907267e-07, |
| "loss": 0.0393, |
| "num_input_tokens_seen": 532256, |
| "step": 3495 |
| }, |
| { |
| "epoch": 19.444444444444443, |
| "grad_norm": 5.755624294281006, |
| "learning_rate": 1.1978856354477595e-07, |
| "loss": 0.0097, |
| "num_input_tokens_seen": 533008, |
| "step": 3500 |
| }, |
| { |
| "epoch": 19.47222222222222, |
| "grad_norm": 1.6134008169174194, |
| "learning_rate": 1.0823023375489127e-07, |
| "loss": 0.0223, |
| "num_input_tokens_seen": 533760, |
| "step": 3505 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 0.05306548252701759, |
| "learning_rate": 9.725696969249965e-08, |
| "loss": 0.0183, |
| "num_input_tokens_seen": 534512, |
| "step": 3510 |
| }, |
| { |
| "epoch": 19.52777777777778, |
| "grad_norm": 34.357643127441406, |
| "learning_rate": 8.686902927741991e-08, |
| "loss": 0.0611, |
| "num_input_tokens_seen": 535296, |
| "step": 3515 |
| }, |
| { |
| "epoch": 19.555555555555557, |
| "grad_norm": 0.7180253267288208, |
| "learning_rate": 7.706665667180091e-08, |
| "loss": 0.0374, |
| "num_input_tokens_seen": 536080, |
| "step": 3520 |
| }, |
| { |
| "epoch": 19.583333333333332, |
| "grad_norm": 0.3030802011489868, |
| "learning_rate": 6.785008227437329e-08, |
| "loss": 0.001, |
| "num_input_tokens_seen": 536832, |
| "step": 3525 |
| }, |
| { |
| "epoch": 19.61111111111111, |
| "grad_norm": 0.009636443108320236, |
| "learning_rate": 5.921952271504827e-08, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 537584, |
| "step": 3530 |
| }, |
| { |
| "epoch": 19.63888888888889, |
| "grad_norm": 0.7338316440582275, |
| "learning_rate": 5.117518084981621e-08, |
| "loss": 0.0076, |
| "num_input_tokens_seen": 538320, |
| "step": 3535 |
| }, |
| { |
| "epoch": 19.666666666666668, |
| "grad_norm": 7.76223611831665, |
| "learning_rate": 4.371724575597535e-08, |
| "loss": 0.0131, |
| "num_input_tokens_seen": 539072, |
| "step": 3540 |
| }, |
| { |
| "epoch": 19.694444444444443, |
| "grad_norm": 6.488776206970215, |
| "learning_rate": 3.684589272771044e-08, |
| "loss": 0.0141, |
| "num_input_tokens_seen": 539824, |
| "step": 3545 |
| }, |
| { |
| "epoch": 19.72222222222222, |
| "grad_norm": 0.19823488593101501, |
| "learning_rate": 3.056128327193486e-08, |
| "loss": 0.0029, |
| "num_input_tokens_seen": 540592, |
| "step": 3550 |
| }, |
| { |
| "epoch": 19.75, |
| "grad_norm": 23.785985946655273, |
| "learning_rate": 2.486356510453258e-08, |
| "loss": 0.0548, |
| "num_input_tokens_seen": 541392, |
| "step": 3555 |
| }, |
| { |
| "epoch": 19.77777777777778, |
| "grad_norm": 0.09434127807617188, |
| "learning_rate": 1.975287214685817e-08, |
| "loss": 0.0028, |
| "num_input_tokens_seen": 542160, |
| "step": 3560 |
| }, |
| { |
| "epoch": 19.805555555555557, |
| "grad_norm": 0.27645689249038696, |
| "learning_rate": 1.522932452260595e-08, |
| "loss": 0.1098, |
| "num_input_tokens_seen": 542912, |
| "step": 3565 |
| }, |
| { |
| "epoch": 19.833333333333332, |
| "grad_norm": 0.12851421535015106, |
| "learning_rate": 1.1293028554978935e-08, |
| "loss": 0.0012, |
| "num_input_tokens_seen": 543648, |
| "step": 3570 |
| }, |
| { |
| "epoch": 19.86111111111111, |
| "grad_norm": 0.3019014000892639, |
| "learning_rate": 7.944076764190845e-09, |
| "loss": 0.0038, |
| "num_input_tokens_seen": 544432, |
| "step": 3575 |
| }, |
| { |
| "epoch": 19.88888888888889, |
| "grad_norm": 2.531794786453247, |
| "learning_rate": 5.182547865290044e-09, |
| "loss": 0.0207, |
| "num_input_tokens_seen": 545216, |
| "step": 3580 |
| }, |
| { |
| "epoch": 19.916666666666668, |
| "grad_norm": 0.03003126010298729, |
| "learning_rate": 3.008506766313812e-09, |
| "loss": 0.0033, |
| "num_input_tokens_seen": 545952, |
| "step": 3585 |
| }, |
| { |
| "epoch": 19.944444444444443, |
| "grad_norm": 0.3342243432998657, |
| "learning_rate": 1.4220045667645566e-09, |
| "loss": 0.0011, |
| "num_input_tokens_seen": 546720, |
| "step": 3590 |
| }, |
| { |
| "epoch": 19.97222222222222, |
| "grad_norm": 0.189296156167984, |
| "learning_rate": 4.2307855639411865e-10, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 547488, |
| "step": 3595 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.32087770104408264, |
| "learning_rate": 1.1752214348903501e-11, |
| "loss": 0.0025, |
| "num_input_tokens_seen": 548240, |
| "step": 3600 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.6011862754821777, |
| "eval_runtime": 0.8651, |
| "eval_samples_per_second": 46.239, |
| "eval_steps_per_second": 23.119, |
| "num_input_tokens_seen": 548240, |
| "step": 3600 |
| }, |
| { |
| "epoch": 20.0, |
| "num_input_tokens_seen": 548240, |
| "step": 3600, |
| "total_flos": 2.468699941306368e+16, |
| "train_loss": 0.2379362821903649, |
| "train_runtime": 353.13, |
| "train_samples_per_second": 20.389, |
| "train_steps_per_second": 10.195 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3600, |
| "num_input_tokens_seen": 548240, |
| "num_train_epochs": 20, |
| "save_steps": 180, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.468699941306368e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|