{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2, "eval_steps": 100, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.9806057829409838, "epoch": 0.016, "grad_norm": 157.0, "learning_rate": 1.8e-07, "loss": 21.7295, "mean_token_accuracy": 0.695003604888916, "num_tokens": 280941.0, "step": 10 }, { "entropy": 1.0097382467240095, "epoch": 0.032, "grad_norm": 171.0, "learning_rate": 3.8e-07, "loss": 21.935, "mean_token_accuracy": 0.6911116372793913, "num_tokens": 558056.0, "step": 20 }, { "entropy": 1.0059957668185233, "epoch": 0.048, "grad_norm": 146.0, "learning_rate": 5.800000000000001e-07, "loss": 21.4243, "mean_token_accuracy": 0.6972350142896175, "num_tokens": 836753.0, "step": 30 }, { "entropy": 1.0606450594961643, "epoch": 0.064, "grad_norm": 129.0, "learning_rate": 7.8e-07, "loss": 22.2256, "mean_token_accuracy": 0.6895487096160651, "num_tokens": 1126446.0, "step": 40 }, { "entropy": 1.1183118436485529, "epoch": 0.08, "grad_norm": 99.5, "learning_rate": 9.800000000000001e-07, "loss": 21.2808, "mean_token_accuracy": 0.6964452721178531, "num_tokens": 1413596.0, "step": 50 }, { "entropy": 1.1463136691600084, "epoch": 0.096, "grad_norm": 83.0, "learning_rate": 1.1800000000000001e-06, "loss": 20.5501, "mean_token_accuracy": 0.7033276192843914, "num_tokens": 1701193.0, "step": 60 }, { "entropy": 1.1966488853096962, "epoch": 0.112, "grad_norm": 75.5, "learning_rate": 1.3800000000000001e-06, "loss": 20.5984, "mean_token_accuracy": 0.7012545391917229, "num_tokens": 1979232.0, "step": 70 }, { "entropy": 1.2089795324951411, "epoch": 0.128, "grad_norm": 69.0, "learning_rate": 1.5800000000000001e-06, "loss": 20.2753, "mean_token_accuracy": 0.7075193412601948, "num_tokens": 2274177.0, "step": 80 }, { "entropy": 1.1860636565834284, "epoch": 0.144, "grad_norm": 70.0, "learning_rate": 1.7800000000000001e-06, "loss": 19.7829, "mean_token_accuracy": 0.710378497838974, "num_tokens": 2548445.0, "step": 90 }, { "entropy": 1.1838629063218833, "epoch": 0.16, "grad_norm": 85.5, "learning_rate": 1.98e-06, "loss": 19.4179, "mean_token_accuracy": 0.7149847097694874, "num_tokens": 2824418.0, "step": 100 }, { "epoch": 0.16, "eval_biology_entropy": 1.474690469264984, "eval_biology_loss": 1.3576592206954956, "eval_biology_mean_token_accuracy": 0.6709716210365295, "eval_biology_num_tokens": 2824418.0, "eval_biology_runtime": 22.204, "eval_biology_samples_per_second": 22.518, "eval_biology_steps_per_second": 5.63, "step": 100 }, { "epoch": 0.16, "eval_chemistry_entropy": 1.247537253856659, "eval_chemistry_loss": 1.1488269567489624, "eval_chemistry_mean_token_accuracy": 0.7154391117095947, "eval_chemistry_num_tokens": 2824418.0, "eval_chemistry_runtime": 26.8594, "eval_chemistry_samples_per_second": 18.615, "eval_chemistry_steps_per_second": 4.654, "step": 100 }, { "epoch": 0.16, "eval_math_entropy": 1.2081865873336792, "eval_math_loss": 1.2300448417663574, "eval_math_mean_token_accuracy": 0.710663827419281, "eval_math_num_tokens": 2824418.0, "eval_math_runtime": 27.5187, "eval_math_samples_per_second": 18.169, "eval_math_steps_per_second": 4.542, "step": 100 }, { "epoch": 0.16, "eval_cyber_entropy": 3.049607857465744, "eval_cyber_loss": 3.3160624504089355, "eval_cyber_mean_token_accuracy": 0.4259996695816517, "eval_cyber_num_tokens": 2824418.0, "eval_cyber_runtime": 26.1305, "eval_cyber_samples_per_second": 15.193, "eval_cyber_steps_per_second": 3.827, "step": 100 }, { "entropy": 1.1945379309356212, "epoch": 0.176, "grad_norm": 78.5, "learning_rate": 2.1800000000000003e-06, "loss": 19.3659, "mean_token_accuracy": 0.7157268539071083, "num_tokens": 3110313.0, "step": 110 }, { "entropy": 1.1879339709877967, "epoch": 0.192, "grad_norm": 62.5, "learning_rate": 2.38e-06, "loss": 19.2957, "mean_token_accuracy": 0.7150671608746052, "num_tokens": 3394170.0, "step": 120 }, { "entropy": 1.1658376209437846, "epoch": 0.208, "grad_norm": 64.5, "learning_rate": 2.5800000000000003e-06, "loss": 18.9243, "mean_token_accuracy": 0.7201551966369152, "num_tokens": 3673600.0, "step": 130 }, { "entropy": 1.1712732832878827, "epoch": 0.224, "grad_norm": 62.25, "learning_rate": 2.7800000000000005e-06, "loss": 18.8985, "mean_token_accuracy": 0.7195578265935183, "num_tokens": 3953732.0, "step": 140 }, { "entropy": 1.1372143357992173, "epoch": 0.24, "grad_norm": 59.25, "learning_rate": 2.9800000000000003e-06, "loss": 18.3388, "mean_token_accuracy": 0.7276442721486092, "num_tokens": 4243655.0, "step": 150 }, { "entropy": 1.1075575590133666, "epoch": 0.256, "grad_norm": 55.5, "learning_rate": 3.1800000000000005e-06, "loss": 17.7317, "mean_token_accuracy": 0.7332122329622507, "num_tokens": 4531471.0, "step": 160 }, { "entropy": 1.1275247156620025, "epoch": 0.272, "grad_norm": 53.0, "learning_rate": 3.3800000000000007e-06, "loss": 18.2164, "mean_token_accuracy": 0.7298861864954234, "num_tokens": 4810284.0, "step": 170 }, { "entropy": 1.1071187134832143, "epoch": 0.288, "grad_norm": 49.25, "learning_rate": 3.58e-06, "loss": 17.7625, "mean_token_accuracy": 0.7337470225989818, "num_tokens": 5095104.0, "step": 180 }, { "entropy": 1.1132069051265716, "epoch": 0.304, "grad_norm": 54.25, "learning_rate": 3.7800000000000002e-06, "loss": 17.7962, "mean_token_accuracy": 0.7336070898920297, "num_tokens": 5383732.0, "step": 190 }, { "entropy": 1.0761282254010438, "epoch": 0.32, "grad_norm": 51.0, "learning_rate": 3.980000000000001e-06, "loss": 17.3026, "mean_token_accuracy": 0.7393171060830355, "num_tokens": 5676334.0, "step": 200 }, { "epoch": 0.32, "eval_biology_entropy": 1.4338086099624634, "eval_biology_loss": 1.3705029487609863, "eval_biology_mean_token_accuracy": 0.6675603828430176, "eval_biology_num_tokens": 5676334.0, "eval_biology_runtime": 22.0314, "eval_biology_samples_per_second": 22.695, "eval_biology_steps_per_second": 5.674, "step": 200 }, { "epoch": 0.32, "eval_chemistry_entropy": 1.1902209458351136, "eval_chemistry_loss": 1.1403467655181885, "eval_chemistry_mean_token_accuracy": 0.7157996978759765, "eval_chemistry_num_tokens": 5676334.0, "eval_chemistry_runtime": 26.8655, "eval_chemistry_samples_per_second": 18.611, "eval_chemistry_steps_per_second": 4.653, "step": 200 }, { "epoch": 0.32, "eval_math_entropy": 1.1017972359657286, "eval_math_loss": 1.1044487953186035, "eval_math_mean_token_accuracy": 0.7332207527160645, "eval_math_num_tokens": 5676334.0, "eval_math_runtime": 27.5365, "eval_math_samples_per_second": 18.158, "eval_math_steps_per_second": 4.539, "step": 200 }, { "epoch": 0.32, "eval_cyber_entropy": 2.916593015193939, "eval_cyber_loss": 3.2743771076202393, "eval_cyber_mean_token_accuracy": 0.4284310387074947, "eval_cyber_num_tokens": 5676334.0, "eval_cyber_runtime": 26.1262, "eval_cyber_samples_per_second": 15.195, "eval_cyber_steps_per_second": 3.828, "step": 200 }, { "entropy": 1.0972535338252782, "epoch": 0.336, "grad_norm": 48.0, "learning_rate": 4.18e-06, "loss": 17.6967, "mean_token_accuracy": 0.7339770793914795, "num_tokens": 5958480.0, "step": 210 }, { "entropy": 1.0661637954413892, "epoch": 0.352, "grad_norm": 52.0, "learning_rate": 4.38e-06, "loss": 17.0451, "mean_token_accuracy": 0.7420999370515347, "num_tokens": 6242161.0, "step": 220 }, { "entropy": 1.0574691709131003, "epoch": 0.368, "grad_norm": 43.75, "learning_rate": 4.58e-06, "loss": 17.0476, "mean_token_accuracy": 0.7413399379700423, "num_tokens": 6523679.0, "step": 230 }, { "entropy": 1.0383559666574, "epoch": 0.384, "grad_norm": 52.75, "learning_rate": 4.78e-06, "loss": 16.6413, "mean_token_accuracy": 0.7455471660941839, "num_tokens": 6810978.0, "step": 240 }, { "entropy": 1.0844071809202434, "epoch": 0.4, "grad_norm": 54.25, "learning_rate": 4.980000000000001e-06, "loss": 17.5456, "mean_token_accuracy": 0.7374324273318053, "num_tokens": 7096903.0, "step": 250 }, { "entropy": 1.0462343256920577, "epoch": 0.416, "grad_norm": 46.25, "learning_rate": 5.18e-06, "loss": 16.7333, "mean_token_accuracy": 0.7437998823821544, "num_tokens": 7377181.0, "step": 260 }, { "entropy": 1.0309648185968399, "epoch": 0.432, "grad_norm": 53.5, "learning_rate": 5.380000000000001e-06, "loss": 16.3635, "mean_token_accuracy": 0.7494884602725506, "num_tokens": 7650523.0, "step": 270 }, { "entropy": 1.011606451496482, "epoch": 0.448, "grad_norm": 55.5, "learning_rate": 5.580000000000001e-06, "loss": 16.2941, "mean_token_accuracy": 0.7498481426388025, "num_tokens": 7936788.0, "step": 280 }, { "entropy": 1.0099118243902923, "epoch": 0.464, "grad_norm": 46.25, "learning_rate": 5.78e-06, "loss": 16.2711, "mean_token_accuracy": 0.7521415069699288, "num_tokens": 8223147.0, "step": 290 }, { "entropy": 1.0334131706506013, "epoch": 0.48, "grad_norm": 43.25, "learning_rate": 5.98e-06, "loss": 16.4655, "mean_token_accuracy": 0.748864620923996, "num_tokens": 8506030.0, "step": 300 }, { "epoch": 0.48, "eval_biology_entropy": 1.422005220413208, "eval_biology_loss": 1.3784860372543335, "eval_biology_mean_token_accuracy": 0.6655148763656616, "eval_biology_num_tokens": 8506030.0, "eval_biology_runtime": 22.0223, "eval_biology_samples_per_second": 22.704, "eval_biology_steps_per_second": 5.676, "step": 300 }, { "epoch": 0.48, "eval_chemistry_entropy": 1.1605761876106262, "eval_chemistry_loss": 1.1291848421096802, "eval_chemistry_mean_token_accuracy": 0.7179730429649352, "eval_chemistry_num_tokens": 8506030.0, "eval_chemistry_runtime": 26.8904, "eval_chemistry_samples_per_second": 18.594, "eval_chemistry_steps_per_second": 4.648, "step": 300 }, { "epoch": 0.48, "eval_math_entropy": 1.0216443300247193, "eval_math_loss": 1.0201935768127441, "eval_math_mean_token_accuracy": 0.749815523147583, "eval_math_num_tokens": 8506030.0, "eval_math_runtime": 27.5335, "eval_math_samples_per_second": 18.16, "eval_math_steps_per_second": 4.54, "step": 300 }, { "epoch": 0.48, "eval_cyber_entropy": 2.940396952629089, "eval_cyber_loss": 3.153137683868408, "eval_cyber_mean_token_accuracy": 0.4395553506910801, "eval_cyber_num_tokens": 8506030.0, "eval_cyber_runtime": 26.1452, "eval_cyber_samples_per_second": 15.184, "eval_cyber_steps_per_second": 3.825, "step": 300 }, { "entropy": 0.9862126674503088, "epoch": 0.496, "grad_norm": 56.25, "learning_rate": 6.18e-06, "loss": 15.7919, "mean_token_accuracy": 0.7557632889598608, "num_tokens": 8788726.0, "step": 310 }, { "entropy": 0.9811401419341564, "epoch": 0.512, "grad_norm": 46.25, "learning_rate": 6.380000000000001e-06, "loss": 15.7454, "mean_token_accuracy": 0.7576492365449667, "num_tokens": 9078039.0, "step": 320 }, { "entropy": 0.9647420089691877, "epoch": 0.528, "grad_norm": 44.5, "learning_rate": 6.5800000000000005e-06, "loss": 15.4619, "mean_token_accuracy": 0.7620009411126375, "num_tokens": 9373860.0, "step": 330 }, { "entropy": 0.9784351203590631, "epoch": 0.544, "grad_norm": 51.75, "learning_rate": 6.780000000000001e-06, "loss": 15.6775, "mean_token_accuracy": 0.7592026349157095, "num_tokens": 9660940.0, "step": 340 }, { "entropy": 0.9894711822271347, "epoch": 0.56, "grad_norm": 62.25, "learning_rate": 6.98e-06, "loss": 15.8268, "mean_token_accuracy": 0.754706758633256, "num_tokens": 9932302.0, "step": 350 }, { "entropy": 0.9637547507882118, "epoch": 0.576, "grad_norm": 45.0, "learning_rate": 7.180000000000001e-06, "loss": 15.4676, "mean_token_accuracy": 0.7605381533503532, "num_tokens": 10215462.0, "step": 360 }, { "entropy": 0.9582533340901136, "epoch": 0.592, "grad_norm": 42.5, "learning_rate": 7.3800000000000005e-06, "loss": 15.343, "mean_token_accuracy": 0.7618525486439467, "num_tokens": 10504396.0, "step": 370 }, { "entropy": 0.9651506002992392, "epoch": 0.608, "grad_norm": 42.75, "learning_rate": 7.58e-06, "loss": 15.4775, "mean_token_accuracy": 0.7616991735994816, "num_tokens": 10793126.0, "step": 380 }, { "entropy": 0.951299836859107, "epoch": 0.624, "grad_norm": 46.0, "learning_rate": 7.78e-06, "loss": 15.2327, "mean_token_accuracy": 0.7628108691424131, "num_tokens": 11081768.0, "step": 390 }, { "entropy": 0.9370664428919554, "epoch": 0.64, "grad_norm": 45.75, "learning_rate": 7.980000000000002e-06, "loss": 14.9056, "mean_token_accuracy": 0.7668638564646244, "num_tokens": 11370320.0, "step": 400 }, { "epoch": 0.64, "eval_biology_entropy": 1.4101261868476869, "eval_biology_loss": 1.385290503501892, "eval_biology_mean_token_accuracy": 0.6643283500671386, "eval_biology_num_tokens": 11370320.0, "eval_biology_runtime": 22.0311, "eval_biology_samples_per_second": 22.695, "eval_biology_steps_per_second": 5.674, "step": 400 }, { "epoch": 0.64, "eval_chemistry_entropy": 1.134839651107788, "eval_chemistry_loss": 1.117846965789795, "eval_chemistry_mean_token_accuracy": 0.721014030456543, "eval_chemistry_num_tokens": 11370320.0, "eval_chemistry_runtime": 26.9193, "eval_chemistry_samples_per_second": 18.574, "eval_chemistry_steps_per_second": 4.644, "step": 400 }, { "epoch": 0.64, "eval_math_entropy": 0.9476065578460693, "eval_math_loss": 0.9578045606613159, "eval_math_mean_token_accuracy": 0.760874231338501, "eval_math_num_tokens": 11370320.0, "eval_math_runtime": 27.5198, "eval_math_samples_per_second": 18.169, "eval_math_steps_per_second": 4.542, "step": 400 }, { "epoch": 0.64, "eval_cyber_entropy": 2.8283654510974885, "eval_cyber_loss": 3.0569260120391846, "eval_cyber_mean_token_accuracy": 0.4431245893239975, "eval_cyber_num_tokens": 11370320.0, "eval_cyber_runtime": 26.384, "eval_cyber_samples_per_second": 15.047, "eval_cyber_steps_per_second": 3.79, "step": 400 }, { "entropy": 0.9207184508442878, "epoch": 0.656, "grad_norm": 44.5, "learning_rate": 8.18e-06, "loss": 14.748, "mean_token_accuracy": 0.7685097701847553, "num_tokens": 11657835.0, "step": 410 }, { "entropy": 0.9384444292634726, "epoch": 0.672, "grad_norm": 44.5, "learning_rate": 8.380000000000001e-06, "loss": 15.0013, "mean_token_accuracy": 0.7654238797724247, "num_tokens": 11949262.0, "step": 420 }, { "entropy": 0.9148579228669405, "epoch": 0.688, "grad_norm": 35.75, "learning_rate": 8.580000000000001e-06, "loss": 14.6923, "mean_token_accuracy": 0.7695376992225647, "num_tokens": 12227640.0, "step": 430 }, { "entropy": 0.9156919397413731, "epoch": 0.704, "grad_norm": 36.5, "learning_rate": 8.78e-06, "loss": 14.6672, "mean_token_accuracy": 0.7705338027328252, "num_tokens": 12516641.0, "step": 440 }, { "entropy": 0.9278485044836998, "epoch": 0.72, "grad_norm": 42.5, "learning_rate": 8.98e-06, "loss": 14.8435, "mean_token_accuracy": 0.7673114899545908, "num_tokens": 12793343.0, "step": 450 }, { "entropy": 0.9052219696342945, "epoch": 0.736, "grad_norm": 41.0, "learning_rate": 9.180000000000002e-06, "loss": 14.5171, "mean_token_accuracy": 0.7729556966573, "num_tokens": 13077981.0, "step": 460 }, { "entropy": 0.8888643320649863, "epoch": 0.752, "grad_norm": 41.25, "learning_rate": 9.38e-06, "loss": 14.1497, "mean_token_accuracy": 0.776068452000618, "num_tokens": 13358957.0, "step": 470 }, { "entropy": 0.8620530396699906, "epoch": 0.768, "grad_norm": 42.5, "learning_rate": 9.58e-06, "loss": 13.834, "mean_token_accuracy": 0.7819891981780529, "num_tokens": 13653412.0, "step": 480 }, { "entropy": 0.9176285572350025, "epoch": 0.784, "grad_norm": 39.0, "learning_rate": 9.780000000000001e-06, "loss": 14.6357, "mean_token_accuracy": 0.7709478087723255, "num_tokens": 13940856.0, "step": 490 }, { "entropy": 0.9128546692430973, "epoch": 0.8, "grad_norm": 40.25, "learning_rate": 9.980000000000001e-06, "loss": 14.5874, "mean_token_accuracy": 0.7713334109634161, "num_tokens": 14230754.0, "step": 500 }, { "epoch": 0.8, "eval_biology_entropy": 1.4446520280838013, "eval_biology_loss": 1.392669439315796, "eval_biology_mean_token_accuracy": 0.6637401723861694, "eval_biology_num_tokens": 14230754.0, "eval_biology_runtime": 22.2391, "eval_biology_samples_per_second": 22.483, "eval_biology_steps_per_second": 5.621, "step": 500 }, { "epoch": 0.8, "eval_chemistry_entropy": 1.1566239352226257, "eval_chemistry_loss": 1.1047961711883545, "eval_chemistry_mean_token_accuracy": 0.7233014287948608, "eval_chemistry_num_tokens": 14230754.0, "eval_chemistry_runtime": 26.8042, "eval_chemistry_samples_per_second": 18.654, "eval_chemistry_steps_per_second": 4.663, "step": 500 }, { "epoch": 0.8, "eval_math_entropy": 0.9341866765022278, "eval_math_loss": 0.9057817459106445, "eval_math_mean_token_accuracy": 0.7704657621383667, "eval_math_num_tokens": 14230754.0, "eval_math_runtime": 27.5093, "eval_math_samples_per_second": 18.176, "eval_math_steps_per_second": 4.544, "step": 500 }, { "epoch": 0.8, "eval_cyber_entropy": 2.902767553329468, "eval_cyber_loss": 2.947915554046631, "eval_cyber_mean_token_accuracy": 0.45555895671248436, "eval_cyber_num_tokens": 14230754.0, "eval_cyber_runtime": 26.1612, "eval_cyber_samples_per_second": 15.175, "eval_cyber_steps_per_second": 3.822, "step": 500 }, { "entropy": 0.8841698631644249, "epoch": 0.816, "grad_norm": 38.0, "learning_rate": 1.018e-05, "loss": 14.1477, "mean_token_accuracy": 0.7752781912684441, "num_tokens": 14519893.0, "step": 510 }, { "entropy": 0.875461632013321, "epoch": 0.832, "grad_norm": 39.0, "learning_rate": 1.038e-05, "loss": 14.0361, "mean_token_accuracy": 0.7779335591942071, "num_tokens": 14805088.0, "step": 520 }, { "entropy": 0.899658627063036, "epoch": 0.848, "grad_norm": 37.25, "learning_rate": 1.0580000000000002e-05, "loss": 14.3925, "mean_token_accuracy": 0.7728543490171432, "num_tokens": 15086306.0, "step": 530 }, { "entropy": 0.8889949310570955, "epoch": 0.864, "grad_norm": 41.0, "learning_rate": 1.0780000000000002e-05, "loss": 14.1314, "mean_token_accuracy": 0.7759746141731739, "num_tokens": 15370985.0, "step": 540 }, { "entropy": 0.8936371214687824, "epoch": 0.88, "grad_norm": 36.5, "learning_rate": 1.0980000000000002e-05, "loss": 14.2807, "mean_token_accuracy": 0.7754444174468518, "num_tokens": 15653836.0, "step": 550 }, { "entropy": 0.8719608142971993, "epoch": 0.896, "grad_norm": 34.75, "learning_rate": 1.1180000000000001e-05, "loss": 13.9767, "mean_token_accuracy": 0.7785432428121567, "num_tokens": 15932179.0, "step": 560 }, { "entropy": 0.8601628458127379, "epoch": 0.912, "grad_norm": 35.25, "learning_rate": 1.138e-05, "loss": 13.7333, "mean_token_accuracy": 0.77998266518116, "num_tokens": 16219842.0, "step": 570 }, { "entropy": 0.848052042350173, "epoch": 0.928, "grad_norm": 34.25, "learning_rate": 1.1580000000000001e-05, "loss": 13.5598, "mean_token_accuracy": 0.783201026916504, "num_tokens": 16499842.0, "step": 580 }, { "entropy": 0.8647568510845304, "epoch": 0.944, "grad_norm": 98.5, "learning_rate": 1.178e-05, "loss": 13.9513, "mean_token_accuracy": 0.7791629247367382, "num_tokens": 16781882.0, "step": 590 }, { "entropy": 0.8681454580277205, "epoch": 0.96, "grad_norm": 28.375, "learning_rate": 1.198e-05, "loss": 13.8619, "mean_token_accuracy": 0.7806598395109177, "num_tokens": 17067407.0, "step": 600 }, { "epoch": 0.96, "eval_biology_entropy": 1.403841501235962, "eval_biology_loss": 1.3980714082717896, "eval_biology_mean_token_accuracy": 0.6626365647315979, "eval_biology_num_tokens": 17067407.0, "eval_biology_runtime": 22.2675, "eval_biology_samples_per_second": 22.454, "eval_biology_steps_per_second": 5.614, "step": 600 }, { "epoch": 0.96, "eval_chemistry_entropy": 1.099705493927002, "eval_chemistry_loss": 1.0918222665786743, "eval_chemistry_mean_token_accuracy": 0.7260931057929992, "eval_chemistry_num_tokens": 17067407.0, "eval_chemistry_runtime": 26.9078, "eval_chemistry_samples_per_second": 18.582, "eval_chemistry_steps_per_second": 4.645, "step": 600 }, { "epoch": 0.96, "eval_math_entropy": 0.8664147562980652, "eval_math_loss": 0.8665754795074463, "eval_math_mean_token_accuracy": 0.7780344748497009, "eval_math_num_tokens": 17067407.0, "eval_math_runtime": 27.5569, "eval_math_samples_per_second": 18.144, "eval_math_steps_per_second": 4.536, "step": 600 }, { "epoch": 0.96, "eval_cyber_entropy": 2.6499598491191865, "eval_cyber_loss": 2.8601412773132324, "eval_cyber_mean_token_accuracy": 0.4642623996734619, "eval_cyber_num_tokens": 17067407.0, "eval_cyber_runtime": 26.1765, "eval_cyber_samples_per_second": 15.166, "eval_cyber_steps_per_second": 3.82, "step": 600 }, { "entropy": 0.8494564741849899, "epoch": 0.976, "grad_norm": 40.25, "learning_rate": 1.218e-05, "loss": 13.6376, "mean_token_accuracy": 0.7825958080589771, "num_tokens": 17350994.0, "step": 610 }, { "entropy": 0.8730685204267502, "epoch": 0.992, "grad_norm": 38.75, "learning_rate": 1.2380000000000002e-05, "loss": 13.8595, "mean_token_accuracy": 0.7786977473646403, "num_tokens": 17637514.0, "step": 620 }, { "entropy": 0.8468878531828523, "epoch": 1.008, "grad_norm": 32.5, "learning_rate": 1.2580000000000002e-05, "loss": 13.534, "mean_token_accuracy": 0.7822502862662077, "num_tokens": 17926570.0, "step": 630 }, { "entropy": 0.8290399981662631, "epoch": 1.024, "grad_norm": 29.375, "learning_rate": 1.2780000000000001e-05, "loss": 13.2779, "mean_token_accuracy": 0.7864516761153937, "num_tokens": 18207652.0, "step": 640 }, { "entropy": 0.8298395985737443, "epoch": 1.04, "grad_norm": 31.0, "learning_rate": 1.2980000000000001e-05, "loss": 13.1281, "mean_token_accuracy": 0.7878676626831294, "num_tokens": 18484931.0, "step": 650 }, { "entropy": 0.8254991695284843, "epoch": 1.056, "grad_norm": 49.0, "learning_rate": 1.3180000000000001e-05, "loss": 13.2747, "mean_token_accuracy": 0.7866876818239689, "num_tokens": 18773457.0, "step": 660 }, { "entropy": 0.8410865612328052, "epoch": 1.072, "grad_norm": 38.75, "learning_rate": 1.3380000000000002e-05, "loss": 13.3101, "mean_token_accuracy": 0.7853217396885157, "num_tokens": 19055365.0, "step": 670 }, { "entropy": 0.8255538143217563, "epoch": 1.088, "grad_norm": 28.875, "learning_rate": 1.3580000000000002e-05, "loss": 13.2174, "mean_token_accuracy": 0.7872007485479117, "num_tokens": 19345730.0, "step": 680 }, { "entropy": 0.823124579153955, "epoch": 1.104, "grad_norm": 29.375, "learning_rate": 1.378e-05, "loss": 13.1696, "mean_token_accuracy": 0.7877223126590251, "num_tokens": 19637390.0, "step": 690 }, { "entropy": 0.8028364922851324, "epoch": 1.12, "grad_norm": 34.5, "learning_rate": 1.398e-05, "loss": 12.7597, "mean_token_accuracy": 0.792793495580554, "num_tokens": 19923914.0, "step": 700 }, { "epoch": 1.12, "eval_biology_entropy": 1.3986766724586486, "eval_biology_loss": 1.407199740409851, "eval_biology_mean_token_accuracy": 0.6613863172531128, "eval_biology_num_tokens": 19923914.0, "eval_biology_runtime": 21.9946, "eval_biology_samples_per_second": 22.733, "eval_biology_steps_per_second": 5.683, "step": 700 }, { "epoch": 1.12, "eval_chemistry_entropy": 1.0769947800636293, "eval_chemistry_loss": 1.0871174335479736, "eval_chemistry_mean_token_accuracy": 0.7282235732078552, "eval_chemistry_num_tokens": 19923914.0, "eval_chemistry_runtime": 26.8846, "eval_chemistry_samples_per_second": 18.598, "eval_chemistry_steps_per_second": 4.65, "step": 700 }, { "epoch": 1.12, "eval_math_entropy": 0.8362808737754822, "eval_math_loss": 0.8373622894287109, "eval_math_mean_token_accuracy": 0.7839989976882935, "eval_math_num_tokens": 19923914.0, "eval_math_runtime": 27.4992, "eval_math_samples_per_second": 18.182, "eval_math_steps_per_second": 4.546, "step": 700 }, { "epoch": 1.12, "eval_cyber_entropy": 2.5681421542167664, "eval_cyber_loss": 2.8721120357513428, "eval_cyber_mean_token_accuracy": 0.4651792038977146, "eval_cyber_num_tokens": 19923914.0, "eval_cyber_runtime": 26.1144, "eval_cyber_samples_per_second": 15.202, "eval_cyber_steps_per_second": 3.829, "step": 700 }, { "entropy": 0.8095206459984183, "epoch": 1.1360000000000001, "grad_norm": 38.5, "learning_rate": 1.418e-05, "loss": 12.8823, "mean_token_accuracy": 0.7904491990804672, "num_tokens": 20201892.0, "step": 710 }, { "entropy": 0.8196006739512086, "epoch": 1.152, "grad_norm": 30.75, "learning_rate": 1.4380000000000001e-05, "loss": 13.0652, "mean_token_accuracy": 0.7910903133451939, "num_tokens": 20490282.0, "step": 720 }, { "entropy": 0.8046272564679384, "epoch": 1.168, "grad_norm": 29.5, "learning_rate": 1.4580000000000001e-05, "loss": 12.8751, "mean_token_accuracy": 0.7915120176970959, "num_tokens": 20785786.0, "step": 730 }, { "entropy": 0.8037027461454272, "epoch": 1.184, "grad_norm": 30.5, "learning_rate": 1.478e-05, "loss": 12.8554, "mean_token_accuracy": 0.7912269696593285, "num_tokens": 21074205.0, "step": 740 }, { "entropy": 0.79942841604352, "epoch": 1.2, "grad_norm": 30.75, "learning_rate": 1.498e-05, "loss": 12.7343, "mean_token_accuracy": 0.7923291265964508, "num_tokens": 21369159.0, "step": 750 }, { "entropy": 0.807464637234807, "epoch": 1.216, "grad_norm": 35.0, "learning_rate": 1.5180000000000002e-05, "loss": 12.8367, "mean_token_accuracy": 0.7913754984736443, "num_tokens": 21649178.0, "step": 760 }, { "entropy": 0.7876615423709155, "epoch": 1.232, "grad_norm": 30.25, "learning_rate": 1.5380000000000002e-05, "loss": 12.556, "mean_token_accuracy": 0.7947213523089885, "num_tokens": 21930239.0, "step": 770 }, { "entropy": 0.7889078231528401, "epoch": 1.248, "grad_norm": 29.75, "learning_rate": 1.5580000000000003e-05, "loss": 12.5585, "mean_token_accuracy": 0.7938403252512216, "num_tokens": 22216387.0, "step": 780 }, { "entropy": 0.8203166201710701, "epoch": 1.264, "grad_norm": 30.0, "learning_rate": 1.578e-05, "loss": 13.0401, "mean_token_accuracy": 0.7889407943934202, "num_tokens": 22501002.0, "step": 790 }, { "entropy": 0.7915117274969816, "epoch": 1.28, "grad_norm": 33.0, "learning_rate": 1.5980000000000003e-05, "loss": 12.6967, "mean_token_accuracy": 0.7933882053941488, "num_tokens": 22779682.0, "step": 800 }, { "epoch": 1.28, "eval_biology_entropy": 1.4146036610603332, "eval_biology_loss": 1.413214921951294, "eval_biology_mean_token_accuracy": 0.659837914943695, "eval_biology_num_tokens": 22779682.0, "eval_biology_runtime": 22.0253, "eval_biology_samples_per_second": 22.701, "eval_biology_steps_per_second": 5.675, "step": 800 }, { "epoch": 1.28, "eval_chemistry_entropy": 1.0714722080230712, "eval_chemistry_loss": 1.0812031030654907, "eval_chemistry_mean_token_accuracy": 0.7291303877830505, "eval_chemistry_num_tokens": 22779682.0, "eval_chemistry_runtime": 26.8892, "eval_chemistry_samples_per_second": 18.595, "eval_chemistry_steps_per_second": 4.649, "step": 800 }, { "epoch": 1.28, "eval_math_entropy": 0.8192751173973083, "eval_math_loss": 0.8122938275337219, "eval_math_mean_token_accuracy": 0.789606306552887, "eval_math_num_tokens": 22779682.0, "eval_math_runtime": 27.5274, "eval_math_samples_per_second": 18.164, "eval_math_steps_per_second": 4.541, "step": 800 }, { "epoch": 1.28, "eval_cyber_entropy": 2.5670096004009246, "eval_cyber_loss": 2.8539652824401855, "eval_cyber_mean_token_accuracy": 0.46533648878335954, "eval_cyber_num_tokens": 22779682.0, "eval_cyber_runtime": 26.1723, "eval_cyber_samples_per_second": 15.169, "eval_cyber_steps_per_second": 3.821, "step": 800 }, { "entropy": 0.786592660844326, "epoch": 1.296, "grad_norm": 35.5, "learning_rate": 1.618e-05, "loss": 12.5036, "mean_token_accuracy": 0.7954838387668133, "num_tokens": 23057744.0, "step": 810 }, { "entropy": 0.7977361943572759, "epoch": 1.312, "grad_norm": 34.5, "learning_rate": 1.638e-05, "loss": 12.7511, "mean_token_accuracy": 0.7935790359973908, "num_tokens": 23344644.0, "step": 820 }, { "entropy": 0.8038571482524276, "epoch": 1.328, "grad_norm": 38.75, "learning_rate": 1.658e-05, "loss": 12.847, "mean_token_accuracy": 0.7909597154706717, "num_tokens": 23622405.0, "step": 830 }, { "entropy": 0.781531005539, "epoch": 1.3439999999999999, "grad_norm": 28.75, "learning_rate": 1.6780000000000002e-05, "loss": 12.4551, "mean_token_accuracy": 0.795590429380536, "num_tokens": 23899771.0, "step": 840 }, { "entropy": 0.7783096175640821, "epoch": 1.3599999999999999, "grad_norm": 27.0, "learning_rate": 1.698e-05, "loss": 12.4462, "mean_token_accuracy": 0.7967745348811149, "num_tokens": 24187023.0, "step": 850 }, { "entropy": 0.8302321504801512, "epoch": 1.376, "grad_norm": 30.625, "learning_rate": 1.718e-05, "loss": 13.2594, "mean_token_accuracy": 0.7850385505706072, "num_tokens": 24466132.0, "step": 860 }, { "entropy": 0.7808034917339682, "epoch": 1.392, "grad_norm": 34.5, "learning_rate": 1.7380000000000003e-05, "loss": 12.4747, "mean_token_accuracy": 0.7949298892170191, "num_tokens": 24748043.0, "step": 870 }, { "entropy": 0.7715026669204235, "epoch": 1.408, "grad_norm": 36.0, "learning_rate": 1.758e-05, "loss": 12.3399, "mean_token_accuracy": 0.7984749253839254, "num_tokens": 25036674.0, "step": 880 }, { "entropy": 0.7645593881607056, "epoch": 1.424, "grad_norm": 27.25, "learning_rate": 1.7780000000000003e-05, "loss": 12.1973, "mean_token_accuracy": 0.7993213057518005, "num_tokens": 25324579.0, "step": 890 }, { "entropy": 0.7820997565984726, "epoch": 1.44, "grad_norm": 33.0, "learning_rate": 1.798e-05, "loss": 12.5051, "mean_token_accuracy": 0.7951443370431661, "num_tokens": 25606824.0, "step": 900 }, { "epoch": 1.44, "eval_biology_entropy": 1.3808940649032593, "eval_biology_loss": 1.4220765829086304, "eval_biology_mean_token_accuracy": 0.6588086094856263, "eval_biology_num_tokens": 25606824.0, "eval_biology_runtime": 22.0118, "eval_biology_samples_per_second": 22.715, "eval_biology_steps_per_second": 5.679, "step": 900 }, { "epoch": 1.44, "eval_chemistry_entropy": 1.0482762174606324, "eval_chemistry_loss": 1.073889136314392, "eval_chemistry_mean_token_accuracy": 0.7307762913703918, "eval_chemistry_num_tokens": 25606824.0, "eval_chemistry_runtime": 26.8657, "eval_chemistry_samples_per_second": 18.611, "eval_chemistry_steps_per_second": 4.653, "step": 900 }, { "epoch": 1.44, "eval_math_entropy": 0.7846709032058716, "eval_math_loss": 0.7932249903678894, "eval_math_mean_token_accuracy": 0.792679114818573, "eval_math_num_tokens": 25606824.0, "eval_math_runtime": 27.514, "eval_math_samples_per_second": 18.173, "eval_math_steps_per_second": 4.543, "step": 900 }, { "epoch": 1.44, "eval_cyber_entropy": 2.5483840811252594, "eval_cyber_loss": 2.8718831539154053, "eval_cyber_mean_token_accuracy": 0.4638554835319519, "eval_cyber_num_tokens": 25606824.0, "eval_cyber_runtime": 26.216, "eval_cyber_samples_per_second": 15.143, "eval_cyber_steps_per_second": 3.814, "step": 900 }, { "entropy": 0.7723285494372248, "epoch": 1.456, "grad_norm": 28.625, "learning_rate": 1.8180000000000002e-05, "loss": 12.303, "mean_token_accuracy": 0.7964108034968376, "num_tokens": 25886396.0, "step": 910 }, { "entropy": 0.7762986140325665, "epoch": 1.472, "grad_norm": 28.875, "learning_rate": 1.8380000000000004e-05, "loss": 12.4134, "mean_token_accuracy": 0.7955747056752444, "num_tokens": 26163618.0, "step": 920 }, { "entropy": 0.7938198037445545, "epoch": 1.488, "grad_norm": 29.625, "learning_rate": 1.858e-05, "loss": 12.75, "mean_token_accuracy": 0.7917455974966288, "num_tokens": 26438338.0, "step": 930 }, { "entropy": 0.7594615155830979, "epoch": 1.504, "grad_norm": 34.0, "learning_rate": 1.878e-05, "loss": 12.14, "mean_token_accuracy": 0.8009778898209333, "num_tokens": 26729255.0, "step": 940 }, { "entropy": 0.7861603863537312, "epoch": 1.52, "grad_norm": 27.125, "learning_rate": 1.898e-05, "loss": 12.4626, "mean_token_accuracy": 0.7956234533339739, "num_tokens": 27017935.0, "step": 950 }, { "entropy": 0.7631909586489201, "epoch": 1.536, "grad_norm": 24.625, "learning_rate": 1.918e-05, "loss": 12.1955, "mean_token_accuracy": 0.7989141892641782, "num_tokens": 27306339.0, "step": 960 }, { "entropy": 0.7708934009075165, "epoch": 1.552, "grad_norm": 27.75, "learning_rate": 1.938e-05, "loss": 12.1963, "mean_token_accuracy": 0.7984277427196502, "num_tokens": 27591959.0, "step": 970 }, { "entropy": 0.7459486592561007, "epoch": 1.568, "grad_norm": 28.625, "learning_rate": 1.9580000000000002e-05, "loss": 11.9228, "mean_token_accuracy": 0.8039638720452785, "num_tokens": 27884398.0, "step": 980 }, { "entropy": 0.7573445823043585, "epoch": 1.584, "grad_norm": 27.0, "learning_rate": 1.978e-05, "loss": 12.0883, "mean_token_accuracy": 0.8011246718466282, "num_tokens": 28171274.0, "step": 990 }, { "entropy": 0.7612122105434537, "epoch": 1.6, "grad_norm": 27.5, "learning_rate": 1.9980000000000002e-05, "loss": 12.0981, "mean_token_accuracy": 0.7986274570226669, "num_tokens": 28457624.0, "step": 1000 }, { "epoch": 1.6, "eval_biology_entropy": 1.3829385170936583, "eval_biology_loss": 1.4260554313659668, "eval_biology_mean_token_accuracy": 0.6589009766578674, "eval_biology_num_tokens": 28457624.0, "eval_biology_runtime": 22.024, "eval_biology_samples_per_second": 22.703, "eval_biology_steps_per_second": 5.676, "step": 1000 }, { "epoch": 1.6, "eval_chemistry_entropy": 1.0377137541770936, "eval_chemistry_loss": 1.0700007677078247, "eval_chemistry_mean_token_accuracy": 0.7318583874702453, "eval_chemistry_num_tokens": 28457624.0, "eval_chemistry_runtime": 26.9114, "eval_chemistry_samples_per_second": 18.579, "eval_chemistry_steps_per_second": 4.645, "step": 1000 }, { "epoch": 1.6, "eval_math_entropy": 0.775481684923172, "eval_math_loss": 0.7736496329307556, "eval_math_mean_token_accuracy": 0.7959285154342651, "eval_math_num_tokens": 28457624.0, "eval_math_runtime": 27.53, "eval_math_samples_per_second": 18.162, "eval_math_steps_per_second": 4.541, "step": 1000 }, { "epoch": 1.6, "eval_cyber_entropy": 2.5157601726055145, "eval_cyber_loss": 2.8350298404693604, "eval_cyber_mean_token_accuracy": 0.4679137858748436, "eval_cyber_num_tokens": 28457624.0, "eval_cyber_runtime": 26.1345, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.826, "step": 1000 }, { "entropy": 0.7650468161329627, "epoch": 1.616, "grad_norm": 24.875, "learning_rate": 1.9980000000000002e-05, "loss": 12.1883, "mean_token_accuracy": 0.798528803884983, "num_tokens": 28743099.0, "step": 1010 }, { "entropy": 0.7713425377383828, "epoch": 1.6320000000000001, "grad_norm": 25.75, "learning_rate": 1.995777777777778e-05, "loss": 12.2948, "mean_token_accuracy": 0.7963836405426263, "num_tokens": 29017297.0, "step": 1020 }, { "entropy": 0.7499153949320316, "epoch": 1.6480000000000001, "grad_norm": 25.75, "learning_rate": 1.9935555555555557e-05, "loss": 11.9856, "mean_token_accuracy": 0.803160610422492, "num_tokens": 29303707.0, "step": 1030 }, { "entropy": 0.7566261947154999, "epoch": 1.6640000000000001, "grad_norm": 25.0, "learning_rate": 1.9913333333333335e-05, "loss": 12.034, "mean_token_accuracy": 0.7999875675886869, "num_tokens": 29597156.0, "step": 1040 }, { "entropy": 0.7669804213568568, "epoch": 1.6800000000000002, "grad_norm": 24.25, "learning_rate": 1.9891111111111112e-05, "loss": 12.2025, "mean_token_accuracy": 0.7990686308592558, "num_tokens": 29883879.0, "step": 1050 }, { "entropy": 0.7553620956838131, "epoch": 1.696, "grad_norm": 28.125, "learning_rate": 1.986888888888889e-05, "loss": 12.1827, "mean_token_accuracy": 0.8000101692974567, "num_tokens": 30165760.0, "step": 1060 }, { "entropy": 0.7463795414194465, "epoch": 1.712, "grad_norm": 22.375, "learning_rate": 1.9846666666666668e-05, "loss": 11.9561, "mean_token_accuracy": 0.8028988271951676, "num_tokens": 30460367.0, "step": 1070 }, { "entropy": 0.7401833109557628, "epoch": 1.728, "grad_norm": 27.375, "learning_rate": 1.9824444444444445e-05, "loss": 11.7133, "mean_token_accuracy": 0.8052776392549277, "num_tokens": 30739137.0, "step": 1080 }, { "entropy": 0.7436290748417378, "epoch": 1.744, "grad_norm": 27.375, "learning_rate": 1.9802222222222226e-05, "loss": 11.8806, "mean_token_accuracy": 0.8036583166569471, "num_tokens": 31022663.0, "step": 1090 }, { "entropy": 0.7478637570515275, "epoch": 1.76, "grad_norm": 25.25, "learning_rate": 1.978e-05, "loss": 11.9202, "mean_token_accuracy": 0.8017146904021502, "num_tokens": 31306494.0, "step": 1100 }, { "epoch": 1.76, "eval_biology_entropy": 1.3845259475708007, "eval_biology_loss": 1.4283864498138428, "eval_biology_mean_token_accuracy": 0.657891107082367, "eval_biology_num_tokens": 31306494.0, "eval_biology_runtime": 21.9927, "eval_biology_samples_per_second": 22.735, "eval_biology_steps_per_second": 5.684, "step": 1100 }, { "epoch": 1.76, "eval_chemistry_entropy": 1.0231492972373963, "eval_chemistry_loss": 1.063183307647705, "eval_chemistry_mean_token_accuracy": 0.7330445971488952, "eval_chemistry_num_tokens": 31306494.0, "eval_chemistry_runtime": 26.8519, "eval_chemistry_samples_per_second": 18.621, "eval_chemistry_steps_per_second": 4.655, "step": 1100 }, { "epoch": 1.76, "eval_math_entropy": 0.7524698441028594, "eval_math_loss": 0.7613377571105957, "eval_math_mean_token_accuracy": 0.798446418762207, "eval_math_num_tokens": 31306494.0, "eval_math_runtime": 27.5284, "eval_math_samples_per_second": 18.163, "eval_math_steps_per_second": 4.541, "step": 1100 }, { "epoch": 1.76, "eval_cyber_entropy": 2.3844272685050965, "eval_cyber_loss": 2.8584094047546387, "eval_cyber_mean_token_accuracy": 0.47087193533778193, "eval_cyber_num_tokens": 31306494.0, "eval_cyber_runtime": 26.2154, "eval_cyber_samples_per_second": 15.144, "eval_cyber_steps_per_second": 3.815, "step": 1100 }, { "entropy": 0.7506109833717346, "epoch": 1.776, "grad_norm": 22.875, "learning_rate": 1.975777777777778e-05, "loss": 11.957, "mean_token_accuracy": 0.803263409435749, "num_tokens": 31595542.0, "step": 1110 }, { "entropy": 0.7545284632593393, "epoch": 1.792, "grad_norm": 25.25, "learning_rate": 1.9735555555555556e-05, "loss": 12.055, "mean_token_accuracy": 0.8008169520646333, "num_tokens": 31881189.0, "step": 1120 }, { "entropy": 0.7454792723059654, "epoch": 1.808, "grad_norm": 22.625, "learning_rate": 1.9713333333333337e-05, "loss": 11.8818, "mean_token_accuracy": 0.8028201397508383, "num_tokens": 32164196.0, "step": 1130 }, { "entropy": 0.7103133289143443, "epoch": 1.8239999999999998, "grad_norm": 23.625, "learning_rate": 1.969111111111111e-05, "loss": 11.3018, "mean_token_accuracy": 0.8097421944141387, "num_tokens": 32441530.0, "step": 1140 }, { "entropy": 0.7296694969758392, "epoch": 1.8399999999999999, "grad_norm": 23.125, "learning_rate": 1.9668888888888892e-05, "loss": 11.722, "mean_token_accuracy": 0.8063848353922367, "num_tokens": 32723145.0, "step": 1150 }, { "entropy": 0.746064018085599, "epoch": 1.8559999999999999, "grad_norm": 26.375, "learning_rate": 1.9646666666666666e-05, "loss": 11.9198, "mean_token_accuracy": 0.8034628454595805, "num_tokens": 33011263.0, "step": 1160 }, { "entropy": 0.7246854526922106, "epoch": 1.8719999999999999, "grad_norm": 25.125, "learning_rate": 1.9624444444444447e-05, "loss": 11.6702, "mean_token_accuracy": 0.8065225839614868, "num_tokens": 33298921.0, "step": 1170 }, { "entropy": 0.7422073289752007, "epoch": 1.888, "grad_norm": 33.0, "learning_rate": 1.9602222222222225e-05, "loss": 11.8223, "mean_token_accuracy": 0.8029078282415867, "num_tokens": 33576243.0, "step": 1180 }, { "entropy": 0.7377389714121818, "epoch": 1.904, "grad_norm": 21.875, "learning_rate": 1.9580000000000002e-05, "loss": 11.7626, "mean_token_accuracy": 0.803905576467514, "num_tokens": 33850968.0, "step": 1190 }, { "entropy": 0.720432554371655, "epoch": 1.92, "grad_norm": 24.125, "learning_rate": 1.955777777777778e-05, "loss": 11.4648, "mean_token_accuracy": 0.8074128460139036, "num_tokens": 34128558.0, "step": 1200 }, { "epoch": 1.92, "eval_biology_entropy": 1.426067009449005, "eval_biology_loss": 1.4298174381256104, "eval_biology_mean_token_accuracy": 0.6572316522598267, "eval_biology_num_tokens": 34128558.0, "eval_biology_runtime": 21.9751, "eval_biology_samples_per_second": 22.753, "eval_biology_steps_per_second": 5.688, "step": 1200 }, { "epoch": 1.92, "eval_chemistry_entropy": 1.052424753189087, "eval_chemistry_loss": 1.058487057685852, "eval_chemistry_mean_token_accuracy": 0.7344707479476928, "eval_chemistry_num_tokens": 34128558.0, "eval_chemistry_runtime": 26.8451, "eval_chemistry_samples_per_second": 18.625, "eval_chemistry_steps_per_second": 4.656, "step": 1200 }, { "epoch": 1.92, "eval_math_entropy": 0.7641568143367767, "eval_math_loss": 0.7467027306556702, "eval_math_mean_token_accuracy": 0.8014610476493835, "eval_math_num_tokens": 34128558.0, "eval_math_runtime": 27.5216, "eval_math_samples_per_second": 18.168, "eval_math_steps_per_second": 4.542, "step": 1200 }, { "epoch": 1.92, "eval_cyber_entropy": 2.4904652881622313, "eval_cyber_loss": 2.6954379081726074, "eval_cyber_mean_token_accuracy": 0.4830169627070427, "eval_cyber_num_tokens": 34128558.0, "eval_cyber_runtime": 26.1923, "eval_cyber_samples_per_second": 15.157, "eval_cyber_steps_per_second": 3.818, "step": 1200 }, { "entropy": 0.731533533334732, "epoch": 1.936, "grad_norm": 25.125, "learning_rate": 1.9535555555555557e-05, "loss": 11.6804, "mean_token_accuracy": 0.8052534744143486, "num_tokens": 34408056.0, "step": 1210 }, { "entropy": 0.7303263584151864, "epoch": 1.952, "grad_norm": 23.375, "learning_rate": 1.9513333333333335e-05, "loss": 11.5676, "mean_token_accuracy": 0.8064503286033868, "num_tokens": 34684679.0, "step": 1220 }, { "entropy": 0.7569911142811179, "epoch": 1.968, "grad_norm": 25.375, "learning_rate": 1.9491111111111113e-05, "loss": 12.1009, "mean_token_accuracy": 0.8005167040973902, "num_tokens": 34971038.0, "step": 1230 }, { "entropy": 0.7218442076817155, "epoch": 1.984, "grad_norm": 23.375, "learning_rate": 1.946888888888889e-05, "loss": 11.5014, "mean_token_accuracy": 0.808349072188139, "num_tokens": 35262281.0, "step": 1240 }, { "entropy": 0.7173755820840597, "epoch": 2.0, "grad_norm": 24.125, "learning_rate": 1.9446666666666668e-05, "loss": 11.4742, "mean_token_accuracy": 0.8086670659482479, "num_tokens": 35560864.0, "step": 1250 }, { "entropy": 0.6936481088399887, "epoch": 2.016, "grad_norm": 23.125, "learning_rate": 1.9424444444444446e-05, "loss": 10.8847, "mean_token_accuracy": 0.8153551481664181, "num_tokens": 35846704.0, "step": 1260 }, { "entropy": 0.6694988587871193, "epoch": 2.032, "grad_norm": 22.5, "learning_rate": 1.9402222222222223e-05, "loss": 10.711, "mean_token_accuracy": 0.816493459790945, "num_tokens": 36128775.0, "step": 1270 }, { "entropy": 0.6576118635013699, "epoch": 2.048, "grad_norm": 23.25, "learning_rate": 1.938e-05, "loss": 10.4997, "mean_token_accuracy": 0.8206516925245524, "num_tokens": 36419504.0, "step": 1280 }, { "entropy": 0.6648308178409934, "epoch": 2.064, "grad_norm": 22.375, "learning_rate": 1.935777777777778e-05, "loss": 10.5449, "mean_token_accuracy": 0.8189927719533443, "num_tokens": 36706816.0, "step": 1290 }, { "entropy": 0.6633218213915825, "epoch": 2.08, "grad_norm": 24.125, "learning_rate": 1.9335555555555556e-05, "loss": 10.5216, "mean_token_accuracy": 0.8182109944522381, "num_tokens": 36988475.0, "step": 1300 }, { "epoch": 2.08, "eval_biology_entropy": 1.240901198387146, "eval_biology_loss": 1.4575155973434448, "eval_biology_mean_token_accuracy": 0.6538263387680053, "eval_biology_num_tokens": 36988475.0, "eval_biology_runtime": 22.0164, "eval_biology_samples_per_second": 22.71, "eval_biology_steps_per_second": 5.678, "step": 1300 }, { "epoch": 2.08, "eval_chemistry_entropy": 0.9047480673789978, "eval_chemistry_loss": 1.077072262763977, "eval_chemistry_mean_token_accuracy": 0.7334306511878967, "eval_chemistry_num_tokens": 36988475.0, "eval_chemistry_runtime": 26.8608, "eval_chemistry_samples_per_second": 18.614, "eval_chemistry_steps_per_second": 4.654, "step": 1300 }, { "epoch": 2.08, "eval_math_entropy": 0.6772888927459717, "eval_math_loss": 0.7423775792121887, "eval_math_mean_token_accuracy": 0.8029003148078918, "eval_math_num_tokens": 36988475.0, "eval_math_runtime": 27.5039, "eval_math_samples_per_second": 18.179, "eval_math_steps_per_second": 4.545, "step": 1300 }, { "epoch": 2.08, "eval_cyber_entropy": 2.240402947664261, "eval_cyber_loss": 2.8395872116088867, "eval_cyber_mean_token_accuracy": 0.4778119161725044, "eval_cyber_num_tokens": 36988475.0, "eval_cyber_runtime": 26.1457, "eval_cyber_samples_per_second": 15.184, "eval_cyber_steps_per_second": 3.825, "step": 1300 }, { "entropy": 0.6708741160109639, "epoch": 2.096, "grad_norm": 22.375, "learning_rate": 1.9313333333333334e-05, "loss": 10.6881, "mean_token_accuracy": 0.8182591505348682, "num_tokens": 37270131.0, "step": 1310 }, { "entropy": 0.6532387970015406, "epoch": 2.112, "grad_norm": 24.0, "learning_rate": 1.9291111111111115e-05, "loss": 10.4792, "mean_token_accuracy": 0.8189583510160446, "num_tokens": 37563537.0, "step": 1320 }, { "entropy": 0.6555240735411644, "epoch": 2.128, "grad_norm": 22.375, "learning_rate": 1.926888888888889e-05, "loss": 10.369, "mean_token_accuracy": 0.8218111298978329, "num_tokens": 37843959.0, "step": 1330 }, { "entropy": 0.6665401035919786, "epoch": 2.144, "grad_norm": 24.25, "learning_rate": 1.924666666666667e-05, "loss": 10.5665, "mean_token_accuracy": 0.8194981347769499, "num_tokens": 38133092.0, "step": 1340 }, { "entropy": 0.6592796456068755, "epoch": 2.16, "grad_norm": 21.5, "learning_rate": 1.9224444444444444e-05, "loss": 10.5062, "mean_token_accuracy": 0.8200013760477305, "num_tokens": 38421229.0, "step": 1350 }, { "entropy": 0.639416103810072, "epoch": 2.176, "grad_norm": 23.0, "learning_rate": 1.9202222222222225e-05, "loss": 10.1779, "mean_token_accuracy": 0.8243647638708353, "num_tokens": 38708043.0, "step": 1360 }, { "entropy": 0.667258214391768, "epoch": 2.192, "grad_norm": 23.375, "learning_rate": 1.918e-05, "loss": 10.6186, "mean_token_accuracy": 0.8171255987137556, "num_tokens": 38996930.0, "step": 1370 }, { "entropy": 0.6627653013914824, "epoch": 2.208, "grad_norm": 23.75, "learning_rate": 1.915777777777778e-05, "loss": 10.6227, "mean_token_accuracy": 0.818722078576684, "num_tokens": 39279481.0, "step": 1380 }, { "entropy": 0.6583162900060415, "epoch": 2.224, "grad_norm": 24.0, "learning_rate": 1.9135555555555555e-05, "loss": 10.4429, "mean_token_accuracy": 0.8204963516443968, "num_tokens": 39569030.0, "step": 1390 }, { "entropy": 0.6620556140318513, "epoch": 2.24, "grad_norm": 23.625, "learning_rate": 1.9113333333333336e-05, "loss": 10.591, "mean_token_accuracy": 0.8184640970081091, "num_tokens": 39854873.0, "step": 1400 }, { "epoch": 2.24, "eval_biology_entropy": 1.1904019894599915, "eval_biology_loss": 1.473069667816162, "eval_biology_mean_token_accuracy": 0.6529810581207275, "eval_biology_num_tokens": 39854873.0, "eval_biology_runtime": 22.0119, "eval_biology_samples_per_second": 22.715, "eval_biology_steps_per_second": 5.679, "step": 1400 }, { "epoch": 2.24, "eval_chemistry_entropy": 0.8868081021308899, "eval_chemistry_loss": 1.0847948789596558, "eval_chemistry_mean_token_accuracy": 0.7322362198829651, "eval_chemistry_num_tokens": 39854873.0, "eval_chemistry_runtime": 27.1533, "eval_chemistry_samples_per_second": 18.414, "eval_chemistry_steps_per_second": 4.603, "step": 1400 }, { "epoch": 2.24, "eval_math_entropy": 0.6937474160194397, "eval_math_loss": 0.7352772951126099, "eval_math_mean_token_accuracy": 0.8037003560066223, "eval_math_num_tokens": 39854873.0, "eval_math_runtime": 27.5567, "eval_math_samples_per_second": 18.144, "eval_math_steps_per_second": 4.536, "step": 1400 }, { "epoch": 2.24, "eval_cyber_entropy": 2.280633035302162, "eval_cyber_loss": 2.848487615585327, "eval_cyber_mean_token_accuracy": 0.46778192803263663, "eval_cyber_num_tokens": 39854873.0, "eval_cyber_runtime": 26.2399, "eval_cyber_samples_per_second": 15.13, "eval_cyber_steps_per_second": 3.811, "step": 1400 }, { "entropy": 0.6573172532021999, "epoch": 2.2560000000000002, "grad_norm": 22.75, "learning_rate": 1.9091111111111113e-05, "loss": 10.4628, "mean_token_accuracy": 0.8198426373302936, "num_tokens": 40141190.0, "step": 1410 }, { "entropy": 0.6748596677556634, "epoch": 2.2720000000000002, "grad_norm": 26.25, "learning_rate": 1.906888888888889e-05, "loss": 10.7759, "mean_token_accuracy": 0.8157621681690216, "num_tokens": 40415203.0, "step": 1420 }, { "entropy": 0.6660139387473464, "epoch": 2.288, "grad_norm": 25.0, "learning_rate": 1.904666666666667e-05, "loss": 10.5722, "mean_token_accuracy": 0.8170963436365127, "num_tokens": 40702393.0, "step": 1430 }, { "entropy": 0.6447202865034342, "epoch": 2.304, "grad_norm": 24.875, "learning_rate": 1.9024444444444446e-05, "loss": 10.2772, "mean_token_accuracy": 0.8228711977601051, "num_tokens": 40982775.0, "step": 1440 }, { "entropy": 0.667175211571157, "epoch": 2.32, "grad_norm": 25.375, "learning_rate": 1.9002222222222224e-05, "loss": 10.6322, "mean_token_accuracy": 0.817449289560318, "num_tokens": 41263356.0, "step": 1450 }, { "entropy": 0.6582919212058187, "epoch": 2.336, "grad_norm": 24.625, "learning_rate": 1.898e-05, "loss": 10.5061, "mean_token_accuracy": 0.8195808235555887, "num_tokens": 41545235.0, "step": 1460 }, { "entropy": 0.683755399286747, "epoch": 2.352, "grad_norm": 24.25, "learning_rate": 1.895777777777778e-05, "loss": 10.8267, "mean_token_accuracy": 0.8143463153392076, "num_tokens": 41833417.0, "step": 1470 }, { "entropy": 0.6577698297798633, "epoch": 2.368, "grad_norm": 24.625, "learning_rate": 1.8935555555555556e-05, "loss": 10.5268, "mean_token_accuracy": 0.8191198598593473, "num_tokens": 42117030.0, "step": 1480 }, { "entropy": 0.6793028621003032, "epoch": 2.384, "grad_norm": 28.25, "learning_rate": 1.8913333333333334e-05, "loss": 10.7829, "mean_token_accuracy": 0.8163190931081772, "num_tokens": 42410990.0, "step": 1490 }, { "entropy": 0.6641744881868362, "epoch": 2.4, "grad_norm": 24.5, "learning_rate": 1.8891111111111115e-05, "loss": 10.5965, "mean_token_accuracy": 0.8189876776188612, "num_tokens": 42691890.0, "step": 1500 }, { "epoch": 2.4, "eval_biology_entropy": 1.2293707489967347, "eval_biology_loss": 1.4714155197143555, "eval_biology_mean_token_accuracy": 0.6514331855773926, "eval_biology_num_tokens": 42691890.0, "eval_biology_runtime": 21.9938, "eval_biology_samples_per_second": 22.734, "eval_biology_steps_per_second": 5.683, "step": 1500 }, { "epoch": 2.4, "eval_chemistry_entropy": 0.9099567327499389, "eval_chemistry_loss": 1.0838559865951538, "eval_chemistry_mean_token_accuracy": 0.7319689979553222, "eval_chemistry_num_tokens": 42691890.0, "eval_chemistry_runtime": 26.8774, "eval_chemistry_samples_per_second": 18.603, "eval_chemistry_steps_per_second": 4.651, "step": 1500 }, { "epoch": 2.4, "eval_math_entropy": 0.6841141791343689, "eval_math_loss": 0.726224958896637, "eval_math_mean_token_accuracy": 0.8050775575637817, "eval_math_num_tokens": 42691890.0, "eval_math_runtime": 27.5383, "eval_math_samples_per_second": 18.157, "eval_math_steps_per_second": 4.539, "step": 1500 }, { "epoch": 2.4, "eval_cyber_entropy": 2.1712605100870133, "eval_cyber_loss": 2.742515802383423, "eval_cyber_mean_token_accuracy": 0.48678219854831695, "eval_cyber_num_tokens": 42691890.0, "eval_cyber_runtime": 26.1725, "eval_cyber_samples_per_second": 15.169, "eval_cyber_steps_per_second": 3.821, "step": 1500 }, { "entropy": 0.681360544078052, "epoch": 2.416, "grad_norm": 23.875, "learning_rate": 1.886888888888889e-05, "loss": 10.8018, "mean_token_accuracy": 0.8153177864849568, "num_tokens": 42971588.0, "step": 1510 }, { "entropy": 0.6504227627068758, "epoch": 2.432, "grad_norm": 23.625, "learning_rate": 1.884666666666667e-05, "loss": 10.4029, "mean_token_accuracy": 0.8209474917501212, "num_tokens": 43253821.0, "step": 1520 }, { "entropy": 0.6501767633482813, "epoch": 2.448, "grad_norm": 24.375, "learning_rate": 1.8824444444444445e-05, "loss": 10.3415, "mean_token_accuracy": 0.8225077040493488, "num_tokens": 43550902.0, "step": 1530 }, { "entropy": 0.6623956672847271, "epoch": 2.464, "grad_norm": 24.125, "learning_rate": 1.8802222222222226e-05, "loss": 10.5246, "mean_token_accuracy": 0.8181775715202093, "num_tokens": 43844259.0, "step": 1540 }, { "entropy": 0.6859173832461238, "epoch": 2.48, "grad_norm": 23.125, "learning_rate": 1.878e-05, "loss": 10.9239, "mean_token_accuracy": 0.8137104224413634, "num_tokens": 44115701.0, "step": 1550 }, { "entropy": 0.6711945479735733, "epoch": 2.496, "grad_norm": 21.875, "learning_rate": 1.875777777777778e-05, "loss": 10.6685, "mean_token_accuracy": 0.8173221621662379, "num_tokens": 44405520.0, "step": 1560 }, { "entropy": 0.6624481493607164, "epoch": 2.512, "grad_norm": 23.375, "learning_rate": 1.873555555555556e-05, "loss": 10.4945, "mean_token_accuracy": 0.8196087624877691, "num_tokens": 44686477.0, "step": 1570 }, { "entropy": 0.6599110793322325, "epoch": 2.528, "grad_norm": 22.75, "learning_rate": 1.8713333333333336e-05, "loss": 10.4873, "mean_token_accuracy": 0.8193403802812099, "num_tokens": 44969760.0, "step": 1580 }, { "entropy": 0.6503490032628179, "epoch": 2.544, "grad_norm": 22.375, "learning_rate": 1.8691111111111114e-05, "loss": 10.4441, "mean_token_accuracy": 0.8206405211240053, "num_tokens": 45255326.0, "step": 1590 }, { "entropy": 0.6536685146391392, "epoch": 2.56, "grad_norm": 22.875, "learning_rate": 1.866888888888889e-05, "loss": 10.4091, "mean_token_accuracy": 0.8202110458165407, "num_tokens": 45532525.0, "step": 1600 }, { "epoch": 2.56, "eval_biology_entropy": 1.2134844155311584, "eval_biology_loss": 1.478155255317688, "eval_biology_mean_token_accuracy": 0.6511134657859802, "eval_biology_num_tokens": 45532525.0, "eval_biology_runtime": 21.9488, "eval_biology_samples_per_second": 22.78, "eval_biology_steps_per_second": 5.695, "step": 1600 }, { "epoch": 2.56, "eval_chemistry_entropy": 0.8961781821250916, "eval_chemistry_loss": 1.0829113721847534, "eval_chemistry_mean_token_accuracy": 0.7327736926078796, "eval_chemistry_num_tokens": 45532525.0, "eval_chemistry_runtime": 26.8437, "eval_chemistry_samples_per_second": 18.626, "eval_chemistry_steps_per_second": 4.657, "step": 1600 }, { "epoch": 2.56, "eval_math_entropy": 0.6742748956680298, "eval_math_loss": 0.7170487642288208, "eval_math_mean_token_accuracy": 0.8080672206878662, "eval_math_num_tokens": 45532525.0, "eval_math_runtime": 27.5073, "eval_math_samples_per_second": 18.177, "eval_math_steps_per_second": 4.544, "step": 1600 }, { "epoch": 2.56, "eval_cyber_entropy": 2.198235506415367, "eval_cyber_loss": 2.8126513957977295, "eval_cyber_mean_token_accuracy": 0.4754572454094887, "eval_cyber_num_tokens": 45532525.0, "eval_cyber_runtime": 26.3904, "eval_cyber_samples_per_second": 15.043, "eval_cyber_steps_per_second": 3.789, "step": 1600 }, { "entropy": 0.659245578199625, "epoch": 2.576, "grad_norm": 23.125, "learning_rate": 1.864666666666667e-05, "loss": 10.4939, "mean_token_accuracy": 0.8194193851202727, "num_tokens": 45817478.0, "step": 1610 }, { "entropy": 0.6425925368443132, "epoch": 2.592, "grad_norm": 22.375, "learning_rate": 1.8624444444444446e-05, "loss": 10.2816, "mean_token_accuracy": 0.8222486432641745, "num_tokens": 46109575.0, "step": 1620 }, { "entropy": 0.6715872915461659, "epoch": 2.608, "grad_norm": 25.25, "learning_rate": 1.8602222222222224e-05, "loss": 10.6569, "mean_token_accuracy": 0.8173692885786294, "num_tokens": 46391461.0, "step": 1630 }, { "entropy": 0.6294447083026171, "epoch": 2.624, "grad_norm": 22.5, "learning_rate": 1.858e-05, "loss": 10.0396, "mean_token_accuracy": 0.8279551289975643, "num_tokens": 46683117.0, "step": 1640 }, { "entropy": 0.6628140497952699, "epoch": 2.64, "grad_norm": 23.375, "learning_rate": 1.855777777777778e-05, "loss": 10.5068, "mean_token_accuracy": 0.8184260647743941, "num_tokens": 46965534.0, "step": 1650 }, { "entropy": 0.6376811485737562, "epoch": 2.656, "grad_norm": 23.0, "learning_rate": 1.8535555555555557e-05, "loss": 10.0941, "mean_token_accuracy": 0.8235533174127341, "num_tokens": 47245852.0, "step": 1660 }, { "entropy": 0.6615891676396132, "epoch": 2.672, "grad_norm": 24.625, "learning_rate": 1.8513333333333335e-05, "loss": 10.5681, "mean_token_accuracy": 0.8179556384682656, "num_tokens": 47524916.0, "step": 1670 }, { "entropy": 0.6560400146991014, "epoch": 2.6879999999999997, "grad_norm": 22.0, "learning_rate": 1.8491111111111112e-05, "loss": 10.4122, "mean_token_accuracy": 0.820205406472087, "num_tokens": 47807131.0, "step": 1680 }, { "entropy": 0.6595821080729365, "epoch": 2.7039999999999997, "grad_norm": 23.875, "learning_rate": 1.846888888888889e-05, "loss": 10.5383, "mean_token_accuracy": 0.8188040845096112, "num_tokens": 48099654.0, "step": 1690 }, { "entropy": 0.6555765904486179, "epoch": 2.7199999999999998, "grad_norm": 24.125, "learning_rate": 1.8446666666666667e-05, "loss": 10.3768, "mean_token_accuracy": 0.8199018821120262, "num_tokens": 48375019.0, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_biology_entropy": 1.1961839065551758, "eval_biology_loss": 1.474403977394104, "eval_biology_mean_token_accuracy": 0.6521351528167725, "eval_biology_num_tokens": 48375019.0, "eval_biology_runtime": 22.2564, "eval_biology_samples_per_second": 22.465, "eval_biology_steps_per_second": 5.616, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_chemistry_entropy": 0.8893180413246154, "eval_chemistry_loss": 1.0786951780319214, "eval_chemistry_mean_token_accuracy": 0.7333630976676941, "eval_chemistry_num_tokens": 48375019.0, "eval_chemistry_runtime": 26.8892, "eval_chemistry_samples_per_second": 18.595, "eval_chemistry_steps_per_second": 4.649, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_math_entropy": 0.6758732006549836, "eval_math_loss": 0.7107370495796204, "eval_math_mean_token_accuracy": 0.8093916850090027, "eval_math_num_tokens": 48375019.0, "eval_math_runtime": 27.5333, "eval_math_samples_per_second": 18.16, "eval_math_steps_per_second": 4.54, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_cyber_entropy": 2.2349812412261962, "eval_cyber_loss": 2.779714822769165, "eval_cyber_mean_token_accuracy": 0.4819659352302551, "eval_cyber_num_tokens": 48375019.0, "eval_cyber_runtime": 26.1417, "eval_cyber_samples_per_second": 15.186, "eval_cyber_steps_per_second": 3.825, "step": 1700 }, { "entropy": 0.6559532942250371, "epoch": 2.7359999999999998, "grad_norm": 24.0, "learning_rate": 1.842444444444445e-05, "loss": 10.4594, "mean_token_accuracy": 0.8196242570877075, "num_tokens": 48659284.0, "step": 1710 }, { "entropy": 0.6569495009258389, "epoch": 2.752, "grad_norm": 22.25, "learning_rate": 1.8402222222222223e-05, "loss": 10.4467, "mean_token_accuracy": 0.8201438017189503, "num_tokens": 48943804.0, "step": 1720 }, { "entropy": 0.6529736818745733, "epoch": 2.768, "grad_norm": 21.875, "learning_rate": 1.8380000000000004e-05, "loss": 10.4356, "mean_token_accuracy": 0.8206765007227659, "num_tokens": 49230939.0, "step": 1730 }, { "entropy": 0.6720895063132047, "epoch": 2.784, "grad_norm": 23.0, "learning_rate": 1.8357777777777778e-05, "loss": 10.6977, "mean_token_accuracy": 0.8159397479146719, "num_tokens": 49504425.0, "step": 1740 }, { "entropy": 0.6508316185325385, "epoch": 2.8, "grad_norm": 23.125, "learning_rate": 1.833555555555556e-05, "loss": 10.351, "mean_token_accuracy": 0.8222934223711491, "num_tokens": 49782661.0, "step": 1750 }, { "entropy": 0.6627168050035834, "epoch": 2.816, "grad_norm": 24.5, "learning_rate": 1.8313333333333333e-05, "loss": 10.445, "mean_token_accuracy": 0.8191927138715982, "num_tokens": 50073632.0, "step": 1760 }, { "entropy": 0.6389238258823753, "epoch": 2.832, "grad_norm": 25.875, "learning_rate": 1.8291111111111114e-05, "loss": 10.2761, "mean_token_accuracy": 0.8222359851002693, "num_tokens": 50356964.0, "step": 1770 }, { "entropy": 0.6667038291692734, "epoch": 2.848, "grad_norm": 21.375, "learning_rate": 1.8268888888888888e-05, "loss": 10.586, "mean_token_accuracy": 0.818210769072175, "num_tokens": 50644535.0, "step": 1780 }, { "entropy": 0.6509249521419406, "epoch": 2.864, "grad_norm": 24.125, "learning_rate": 1.824666666666667e-05, "loss": 10.4338, "mean_token_accuracy": 0.8203214287757874, "num_tokens": 50925653.0, "step": 1790 }, { "entropy": 0.6507569069042802, "epoch": 2.88, "grad_norm": 23.625, "learning_rate": 1.8224444444444447e-05, "loss": 10.285, "mean_token_accuracy": 0.8213449958711863, "num_tokens": 51204374.0, "step": 1800 }, { "epoch": 2.88, "eval_biology_entropy": 1.1794821062088012, "eval_biology_loss": 1.4819698333740234, "eval_biology_mean_token_accuracy": 0.6517698068618775, "eval_biology_num_tokens": 51204374.0, "eval_biology_runtime": 21.9924, "eval_biology_samples_per_second": 22.735, "eval_biology_steps_per_second": 5.684, "step": 1800 }, { "epoch": 2.88, "eval_chemistry_entropy": 0.8722342405319213, "eval_chemistry_loss": 1.0784211158752441, "eval_chemistry_mean_token_accuracy": 0.7344747610092163, "eval_chemistry_num_tokens": 51204374.0, "eval_chemistry_runtime": 26.8909, "eval_chemistry_samples_per_second": 18.594, "eval_chemistry_steps_per_second": 4.648, "step": 1800 }, { "epoch": 2.88, "eval_math_entropy": 0.6604897229671478, "eval_math_loss": 0.7050039768218994, "eval_math_mean_token_accuracy": 0.8106719055175782, "eval_math_num_tokens": 51204374.0, "eval_math_runtime": 27.5351, "eval_math_samples_per_second": 18.159, "eval_math_steps_per_second": 4.54, "step": 1800 }, { "epoch": 2.88, "eval_cyber_entropy": 2.112373055815697, "eval_cyber_loss": 2.8215885162353516, "eval_cyber_mean_token_accuracy": 0.4802186432480812, "eval_cyber_num_tokens": 51204374.0, "eval_cyber_runtime": 26.1331, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.827, "step": 1800 }, { "entropy": 0.6363504879176617, "epoch": 2.896, "grad_norm": 23.75, "learning_rate": 1.8202222222222225e-05, "loss": 10.224, "mean_token_accuracy": 0.8235703807324171, "num_tokens": 51483944.0, "step": 1810 }, { "entropy": 0.6513262124732137, "epoch": 2.912, "grad_norm": 23.0, "learning_rate": 1.8180000000000002e-05, "loss": 10.3132, "mean_token_accuracy": 0.8217961758375167, "num_tokens": 51765755.0, "step": 1820 }, { "entropy": 0.6529291735962033, "epoch": 2.928, "grad_norm": 23.25, "learning_rate": 1.815777777777778e-05, "loss": 10.3766, "mean_token_accuracy": 0.8222961116582155, "num_tokens": 52056379.0, "step": 1830 }, { "entropy": 0.6346785051748156, "epoch": 2.944, "grad_norm": 23.75, "learning_rate": 1.8135555555555557e-05, "loss": 10.14, "mean_token_accuracy": 0.8229989748448133, "num_tokens": 52346232.0, "step": 1840 }, { "entropy": 0.6526656987145543, "epoch": 2.96, "grad_norm": 21.75, "learning_rate": 1.8113333333333335e-05, "loss": 10.38, "mean_token_accuracy": 0.8202067915350199, "num_tokens": 52633789.0, "step": 1850 }, { "entropy": 0.6502787992358208, "epoch": 2.976, "grad_norm": 21.625, "learning_rate": 1.8091111111111113e-05, "loss": 10.2748, "mean_token_accuracy": 0.8214363507926464, "num_tokens": 52911755.0, "step": 1860 }, { "entropy": 0.6417823160067201, "epoch": 2.992, "grad_norm": 22.875, "learning_rate": 1.806888888888889e-05, "loss": 10.2309, "mean_token_accuracy": 0.8228288643062115, "num_tokens": 53198176.0, "step": 1870 }, { "entropy": 0.6243049314245581, "epoch": 3.008, "grad_norm": 26.25, "learning_rate": 1.8046666666666668e-05, "loss": 9.7241, "mean_token_accuracy": 0.8291641604155302, "num_tokens": 53481893.0, "step": 1880 }, { "entropy": 0.5638323642313481, "epoch": 3.024, "grad_norm": 27.125, "learning_rate": 1.8024444444444445e-05, "loss": 8.9824, "mean_token_accuracy": 0.8390695653855801, "num_tokens": 53771717.0, "step": 1890 }, { "entropy": 0.558561889640987, "epoch": 3.04, "grad_norm": 24.125, "learning_rate": 1.8002222222222223e-05, "loss": 8.9038, "mean_token_accuracy": 0.8408384509384632, "num_tokens": 54058045.0, "step": 1900 }, { "epoch": 3.04, "eval_biology_entropy": 1.0243187880516051, "eval_biology_loss": 1.581600546836853, "eval_biology_mean_token_accuracy": 0.6445312175750733, "eval_biology_num_tokens": 54058045.0, "eval_biology_runtime": 21.9531, "eval_biology_samples_per_second": 22.776, "eval_biology_steps_per_second": 5.694, "step": 1900 }, { "epoch": 3.04, "eval_chemistry_entropy": 0.7532488117218018, "eval_chemistry_loss": 1.1571515798568726, "eval_chemistry_mean_token_accuracy": 0.7285859537124634, "eval_chemistry_num_tokens": 54058045.0, "eval_chemistry_runtime": 26.8231, "eval_chemistry_samples_per_second": 18.641, "eval_chemistry_steps_per_second": 4.66, "step": 1900 }, { "epoch": 3.04, "eval_math_entropy": 0.5930595288276672, "eval_math_loss": 0.7225678563117981, "eval_math_mean_token_accuracy": 0.8094966850280761, "eval_math_num_tokens": 54058045.0, "eval_math_runtime": 27.497, "eval_math_samples_per_second": 18.184, "eval_math_steps_per_second": 4.546, "step": 1900 }, { "epoch": 3.04, "eval_cyber_entropy": 1.8411164230108261, "eval_cyber_loss": 3.063901424407959, "eval_cyber_mean_token_accuracy": 0.47065704002976416, "eval_cyber_num_tokens": 54058045.0, "eval_cyber_runtime": 26.1343, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.826, "step": 1900 }, { "entropy": 0.552313212864101, "epoch": 3.056, "grad_norm": 26.625, "learning_rate": 1.798e-05, "loss": 8.7715, "mean_token_accuracy": 0.8414296887814998, "num_tokens": 54334332.0, "step": 1910 }, { "entropy": 0.5570558808743954, "epoch": 3.072, "grad_norm": 27.125, "learning_rate": 1.7957777777777778e-05, "loss": 8.7638, "mean_token_accuracy": 0.8418575689196587, "num_tokens": 54624543.0, "step": 1920 }, { "entropy": 0.5351565392687917, "epoch": 3.088, "grad_norm": 27.625, "learning_rate": 1.7935555555555556e-05, "loss": 8.5286, "mean_token_accuracy": 0.8457726195454598, "num_tokens": 54907550.0, "step": 1930 }, { "entropy": 0.5519297284074127, "epoch": 3.104, "grad_norm": 27.75, "learning_rate": 1.7913333333333337e-05, "loss": 8.7504, "mean_token_accuracy": 0.8423502463847399, "num_tokens": 55190959.0, "step": 1940 }, { "entropy": 0.544937571324408, "epoch": 3.12, "grad_norm": 27.0, "learning_rate": 1.789111111111111e-05, "loss": 8.6451, "mean_token_accuracy": 0.8444420550018549, "num_tokens": 55481635.0, "step": 1950 }, { "entropy": 0.546911165677011, "epoch": 3.136, "grad_norm": 26.75, "learning_rate": 1.7868888888888892e-05, "loss": 8.6997, "mean_token_accuracy": 0.8431835647672414, "num_tokens": 55769010.0, "step": 1960 }, { "entropy": 0.5542461348697543, "epoch": 3.152, "grad_norm": 26.5, "learning_rate": 1.7846666666666666e-05, "loss": 8.7996, "mean_token_accuracy": 0.842128399387002, "num_tokens": 56053160.0, "step": 1970 }, { "entropy": 0.5676137331873179, "epoch": 3.168, "grad_norm": 26.75, "learning_rate": 1.7824444444444447e-05, "loss": 8.9893, "mean_token_accuracy": 0.8386933848261833, "num_tokens": 56337066.0, "step": 1980 }, { "entropy": 0.5574962265789509, "epoch": 3.184, "grad_norm": 30.0, "learning_rate": 1.780222222222222e-05, "loss": 8.8126, "mean_token_accuracy": 0.8404616348445415, "num_tokens": 56618899.0, "step": 1990 }, { "entropy": 0.5407627185806632, "epoch": 3.2, "grad_norm": 26.0, "learning_rate": 1.7780000000000003e-05, "loss": 8.6062, "mean_token_accuracy": 0.8444454524666071, "num_tokens": 56910071.0, "step": 2000 }, { "epoch": 3.2, "eval_biology_entropy": 0.9433750138282776, "eval_biology_loss": 1.6609779596328735, "eval_biology_mean_token_accuracy": 0.6404439859390259, "eval_biology_num_tokens": 56910071.0, "eval_biology_runtime": 21.9804, "eval_biology_samples_per_second": 22.748, "eval_biology_steps_per_second": 5.687, "step": 2000 }, { "epoch": 3.2, "eval_chemistry_entropy": 0.697946096420288, "eval_chemistry_loss": 1.214890718460083, "eval_chemistry_mean_token_accuracy": 0.7252082509994506, "eval_chemistry_num_tokens": 56910071.0, "eval_chemistry_runtime": 26.8518, "eval_chemistry_samples_per_second": 18.621, "eval_chemistry_steps_per_second": 4.655, "step": 2000 }, { "epoch": 3.2, "eval_math_entropy": 0.5889736828804016, "eval_math_loss": 0.72825688123703, "eval_math_mean_token_accuracy": 0.809436321735382, "eval_math_num_tokens": 56910071.0, "eval_math_runtime": 27.524, "eval_math_samples_per_second": 18.166, "eval_math_steps_per_second": 4.541, "step": 2000 }, { "epoch": 3.2, "eval_cyber_entropy": 1.8025889378786086, "eval_cyber_loss": 3.0566282272338867, "eval_cyber_mean_token_accuracy": 0.46734614998102186, "eval_cyber_num_tokens": 56910071.0, "eval_cyber_runtime": 26.2014, "eval_cyber_samples_per_second": 15.152, "eval_cyber_steps_per_second": 3.817, "step": 2000 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.791878293573609e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }