{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.8, "eval_steps": 100, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.9806057829409838, "epoch": 0.016, "grad_norm": 157.0, "learning_rate": 1.8e-07, "loss": 21.7295, "mean_token_accuracy": 0.695003604888916, "num_tokens": 280941.0, "step": 10 }, { "entropy": 1.0097382467240095, "epoch": 0.032, "grad_norm": 171.0, "learning_rate": 3.8e-07, "loss": 21.935, "mean_token_accuracy": 0.6911116372793913, "num_tokens": 558056.0, "step": 20 }, { "entropy": 1.0059957668185233, "epoch": 0.048, "grad_norm": 146.0, "learning_rate": 5.800000000000001e-07, "loss": 21.4243, "mean_token_accuracy": 0.6972350142896175, "num_tokens": 836753.0, "step": 30 }, { "entropy": 1.0606450594961643, "epoch": 0.064, "grad_norm": 129.0, "learning_rate": 7.8e-07, "loss": 22.2256, "mean_token_accuracy": 0.6895487096160651, "num_tokens": 1126446.0, "step": 40 }, { "entropy": 1.1183118436485529, "epoch": 0.08, "grad_norm": 99.5, "learning_rate": 9.800000000000001e-07, "loss": 21.2808, "mean_token_accuracy": 0.6964452721178531, "num_tokens": 1413596.0, "step": 50 }, { "entropy": 1.1463136691600084, "epoch": 0.096, "grad_norm": 83.0, "learning_rate": 1.1800000000000001e-06, "loss": 20.5501, "mean_token_accuracy": 0.7033276192843914, "num_tokens": 1701193.0, "step": 60 }, { "entropy": 1.1966488853096962, "epoch": 0.112, "grad_norm": 75.5, "learning_rate": 1.3800000000000001e-06, "loss": 20.5984, "mean_token_accuracy": 0.7012545391917229, "num_tokens": 1979232.0, "step": 70 }, { "entropy": 1.2089795324951411, "epoch": 0.128, "grad_norm": 69.0, "learning_rate": 1.5800000000000001e-06, "loss": 20.2753, "mean_token_accuracy": 0.7075193412601948, "num_tokens": 2274177.0, "step": 80 }, { "entropy": 1.1860636565834284, "epoch": 0.144, "grad_norm": 70.0, "learning_rate": 1.7800000000000001e-06, "loss": 19.7829, "mean_token_accuracy": 0.710378497838974, "num_tokens": 2548445.0, "step": 90 }, { "entropy": 1.1838629063218833, "epoch": 0.16, "grad_norm": 85.5, "learning_rate": 1.98e-06, "loss": 19.4179, "mean_token_accuracy": 0.7149847097694874, "num_tokens": 2824418.0, "step": 100 }, { "epoch": 0.16, "eval_biology_entropy": 1.474690469264984, "eval_biology_loss": 1.3576592206954956, "eval_biology_mean_token_accuracy": 0.6709716210365295, "eval_biology_num_tokens": 2824418.0, "eval_biology_runtime": 22.204, "eval_biology_samples_per_second": 22.518, "eval_biology_steps_per_second": 5.63, "step": 100 }, { "epoch": 0.16, "eval_chemistry_entropy": 1.247537253856659, "eval_chemistry_loss": 1.1488269567489624, "eval_chemistry_mean_token_accuracy": 0.7154391117095947, "eval_chemistry_num_tokens": 2824418.0, "eval_chemistry_runtime": 26.8594, "eval_chemistry_samples_per_second": 18.615, "eval_chemistry_steps_per_second": 4.654, "step": 100 }, { "epoch": 0.16, "eval_math_entropy": 1.2081865873336792, "eval_math_loss": 1.2300448417663574, "eval_math_mean_token_accuracy": 0.710663827419281, "eval_math_num_tokens": 2824418.0, "eval_math_runtime": 27.5187, "eval_math_samples_per_second": 18.169, "eval_math_steps_per_second": 4.542, "step": 100 }, { "epoch": 0.16, "eval_cyber_entropy": 3.049607857465744, "eval_cyber_loss": 3.3160624504089355, "eval_cyber_mean_token_accuracy": 0.4259996695816517, "eval_cyber_num_tokens": 2824418.0, "eval_cyber_runtime": 26.1305, "eval_cyber_samples_per_second": 15.193, "eval_cyber_steps_per_second": 3.827, "step": 100 }, { "entropy": 1.1945379309356212, "epoch": 0.176, "grad_norm": 78.5, "learning_rate": 2.1800000000000003e-06, "loss": 19.3659, "mean_token_accuracy": 0.7157268539071083, "num_tokens": 3110313.0, "step": 110 }, { "entropy": 1.1879339709877967, "epoch": 0.192, "grad_norm": 62.5, "learning_rate": 2.38e-06, "loss": 19.2957, "mean_token_accuracy": 0.7150671608746052, "num_tokens": 3394170.0, "step": 120 }, { "entropy": 1.1658376209437846, "epoch": 0.208, "grad_norm": 64.5, "learning_rate": 2.5800000000000003e-06, "loss": 18.9243, "mean_token_accuracy": 0.7201551966369152, "num_tokens": 3673600.0, "step": 130 }, { "entropy": 1.1712732832878827, "epoch": 0.224, "grad_norm": 62.25, "learning_rate": 2.7800000000000005e-06, "loss": 18.8985, "mean_token_accuracy": 0.7195578265935183, "num_tokens": 3953732.0, "step": 140 }, { "entropy": 1.1372143357992173, "epoch": 0.24, "grad_norm": 59.25, "learning_rate": 2.9800000000000003e-06, "loss": 18.3388, "mean_token_accuracy": 0.7276442721486092, "num_tokens": 4243655.0, "step": 150 }, { "entropy": 1.1075575590133666, "epoch": 0.256, "grad_norm": 55.5, "learning_rate": 3.1800000000000005e-06, "loss": 17.7317, "mean_token_accuracy": 0.7332122329622507, "num_tokens": 4531471.0, "step": 160 }, { "entropy": 1.1275247156620025, "epoch": 0.272, "grad_norm": 53.0, "learning_rate": 3.3800000000000007e-06, "loss": 18.2164, "mean_token_accuracy": 0.7298861864954234, "num_tokens": 4810284.0, "step": 170 }, { "entropy": 1.1071187134832143, "epoch": 0.288, "grad_norm": 49.25, "learning_rate": 3.58e-06, "loss": 17.7625, "mean_token_accuracy": 0.7337470225989818, "num_tokens": 5095104.0, "step": 180 }, { "entropy": 1.1132069051265716, "epoch": 0.304, "grad_norm": 54.25, "learning_rate": 3.7800000000000002e-06, "loss": 17.7962, "mean_token_accuracy": 0.7336070898920297, "num_tokens": 5383732.0, "step": 190 }, { "entropy": 1.0761282254010438, "epoch": 0.32, "grad_norm": 51.0, "learning_rate": 3.980000000000001e-06, "loss": 17.3026, "mean_token_accuracy": 0.7393171060830355, "num_tokens": 5676334.0, "step": 200 }, { "epoch": 0.32, "eval_biology_entropy": 1.4338086099624634, "eval_biology_loss": 1.3705029487609863, "eval_biology_mean_token_accuracy": 0.6675603828430176, "eval_biology_num_tokens": 5676334.0, "eval_biology_runtime": 22.0314, "eval_biology_samples_per_second": 22.695, "eval_biology_steps_per_second": 5.674, "step": 200 }, { "epoch": 0.32, "eval_chemistry_entropy": 1.1902209458351136, "eval_chemistry_loss": 1.1403467655181885, "eval_chemistry_mean_token_accuracy": 0.7157996978759765, "eval_chemistry_num_tokens": 5676334.0, "eval_chemistry_runtime": 26.8655, "eval_chemistry_samples_per_second": 18.611, "eval_chemistry_steps_per_second": 4.653, "step": 200 }, { "epoch": 0.32, "eval_math_entropy": 1.1017972359657286, "eval_math_loss": 1.1044487953186035, "eval_math_mean_token_accuracy": 0.7332207527160645, "eval_math_num_tokens": 5676334.0, "eval_math_runtime": 27.5365, "eval_math_samples_per_second": 18.158, "eval_math_steps_per_second": 4.539, "step": 200 }, { "epoch": 0.32, "eval_cyber_entropy": 2.916593015193939, "eval_cyber_loss": 3.2743771076202393, "eval_cyber_mean_token_accuracy": 0.4284310387074947, "eval_cyber_num_tokens": 5676334.0, "eval_cyber_runtime": 26.1262, "eval_cyber_samples_per_second": 15.195, "eval_cyber_steps_per_second": 3.828, "step": 200 }, { "entropy": 1.0972535338252782, "epoch": 0.336, "grad_norm": 48.0, "learning_rate": 4.18e-06, "loss": 17.6967, "mean_token_accuracy": 0.7339770793914795, "num_tokens": 5958480.0, "step": 210 }, { "entropy": 1.0661637954413892, "epoch": 0.352, "grad_norm": 52.0, "learning_rate": 4.38e-06, "loss": 17.0451, "mean_token_accuracy": 0.7420999370515347, "num_tokens": 6242161.0, "step": 220 }, { "entropy": 1.0574691709131003, "epoch": 0.368, "grad_norm": 43.75, "learning_rate": 4.58e-06, "loss": 17.0476, "mean_token_accuracy": 0.7413399379700423, "num_tokens": 6523679.0, "step": 230 }, { "entropy": 1.0383559666574, "epoch": 0.384, "grad_norm": 52.75, "learning_rate": 4.78e-06, "loss": 16.6413, "mean_token_accuracy": 0.7455471660941839, "num_tokens": 6810978.0, "step": 240 }, { "entropy": 1.0844071809202434, "epoch": 0.4, "grad_norm": 54.25, "learning_rate": 4.980000000000001e-06, "loss": 17.5456, "mean_token_accuracy": 0.7374324273318053, "num_tokens": 7096903.0, "step": 250 }, { "entropy": 1.0462343256920577, "epoch": 0.416, "grad_norm": 46.25, "learning_rate": 5.18e-06, "loss": 16.7333, "mean_token_accuracy": 0.7437998823821544, "num_tokens": 7377181.0, "step": 260 }, { "entropy": 1.0309648185968399, "epoch": 0.432, "grad_norm": 53.5, "learning_rate": 5.380000000000001e-06, "loss": 16.3635, "mean_token_accuracy": 0.7494884602725506, "num_tokens": 7650523.0, "step": 270 }, { "entropy": 1.011606451496482, "epoch": 0.448, "grad_norm": 55.5, "learning_rate": 5.580000000000001e-06, "loss": 16.2941, "mean_token_accuracy": 0.7498481426388025, "num_tokens": 7936788.0, "step": 280 }, { "entropy": 1.0099118243902923, "epoch": 0.464, "grad_norm": 46.25, "learning_rate": 5.78e-06, "loss": 16.2711, "mean_token_accuracy": 0.7521415069699288, "num_tokens": 8223147.0, "step": 290 }, { "entropy": 1.0334131706506013, "epoch": 0.48, "grad_norm": 43.25, "learning_rate": 5.98e-06, "loss": 16.4655, "mean_token_accuracy": 0.748864620923996, "num_tokens": 8506030.0, "step": 300 }, { "epoch": 0.48, "eval_biology_entropy": 1.422005220413208, "eval_biology_loss": 1.3784860372543335, "eval_biology_mean_token_accuracy": 0.6655148763656616, "eval_biology_num_tokens": 8506030.0, "eval_biology_runtime": 22.0223, "eval_biology_samples_per_second": 22.704, "eval_biology_steps_per_second": 5.676, "step": 300 }, { "epoch": 0.48, "eval_chemistry_entropy": 1.1605761876106262, "eval_chemistry_loss": 1.1291848421096802, "eval_chemistry_mean_token_accuracy": 0.7179730429649352, "eval_chemistry_num_tokens": 8506030.0, "eval_chemistry_runtime": 26.8904, "eval_chemistry_samples_per_second": 18.594, "eval_chemistry_steps_per_second": 4.648, "step": 300 }, { "epoch": 0.48, "eval_math_entropy": 1.0216443300247193, "eval_math_loss": 1.0201935768127441, "eval_math_mean_token_accuracy": 0.749815523147583, "eval_math_num_tokens": 8506030.0, "eval_math_runtime": 27.5335, "eval_math_samples_per_second": 18.16, "eval_math_steps_per_second": 4.54, "step": 300 }, { "epoch": 0.48, "eval_cyber_entropy": 2.940396952629089, "eval_cyber_loss": 3.153137683868408, "eval_cyber_mean_token_accuracy": 0.4395553506910801, "eval_cyber_num_tokens": 8506030.0, "eval_cyber_runtime": 26.1452, "eval_cyber_samples_per_second": 15.184, "eval_cyber_steps_per_second": 3.825, "step": 300 }, { "entropy": 0.9862126674503088, "epoch": 0.496, "grad_norm": 56.25, "learning_rate": 6.18e-06, "loss": 15.7919, "mean_token_accuracy": 0.7557632889598608, "num_tokens": 8788726.0, "step": 310 }, { "entropy": 0.9811401419341564, "epoch": 0.512, "grad_norm": 46.25, "learning_rate": 6.380000000000001e-06, "loss": 15.7454, "mean_token_accuracy": 0.7576492365449667, "num_tokens": 9078039.0, "step": 320 }, { "entropy": 0.9647420089691877, "epoch": 0.528, "grad_norm": 44.5, "learning_rate": 6.5800000000000005e-06, "loss": 15.4619, "mean_token_accuracy": 0.7620009411126375, "num_tokens": 9373860.0, "step": 330 }, { "entropy": 0.9784351203590631, "epoch": 0.544, "grad_norm": 51.75, "learning_rate": 6.780000000000001e-06, "loss": 15.6775, "mean_token_accuracy": 0.7592026349157095, "num_tokens": 9660940.0, "step": 340 }, { "entropy": 0.9894711822271347, "epoch": 0.56, "grad_norm": 62.25, "learning_rate": 6.98e-06, "loss": 15.8268, "mean_token_accuracy": 0.754706758633256, "num_tokens": 9932302.0, "step": 350 }, { "entropy": 0.9637547507882118, "epoch": 0.576, "grad_norm": 45.0, "learning_rate": 7.180000000000001e-06, "loss": 15.4676, "mean_token_accuracy": 0.7605381533503532, "num_tokens": 10215462.0, "step": 360 }, { "entropy": 0.9582533340901136, "epoch": 0.592, "grad_norm": 42.5, "learning_rate": 7.3800000000000005e-06, "loss": 15.343, "mean_token_accuracy": 0.7618525486439467, "num_tokens": 10504396.0, "step": 370 }, { "entropy": 0.9651506002992392, "epoch": 0.608, "grad_norm": 42.75, "learning_rate": 7.58e-06, "loss": 15.4775, "mean_token_accuracy": 0.7616991735994816, "num_tokens": 10793126.0, "step": 380 }, { "entropy": 0.951299836859107, "epoch": 0.624, "grad_norm": 46.0, "learning_rate": 7.78e-06, "loss": 15.2327, "mean_token_accuracy": 0.7628108691424131, "num_tokens": 11081768.0, "step": 390 }, { "entropy": 0.9370664428919554, "epoch": 0.64, "grad_norm": 45.75, "learning_rate": 7.980000000000002e-06, "loss": 14.9056, "mean_token_accuracy": 0.7668638564646244, "num_tokens": 11370320.0, "step": 400 }, { "epoch": 0.64, "eval_biology_entropy": 1.4101261868476869, "eval_biology_loss": 1.385290503501892, "eval_biology_mean_token_accuracy": 0.6643283500671386, "eval_biology_num_tokens": 11370320.0, "eval_biology_runtime": 22.0311, "eval_biology_samples_per_second": 22.695, "eval_biology_steps_per_second": 5.674, "step": 400 }, { "epoch": 0.64, "eval_chemistry_entropy": 1.134839651107788, "eval_chemistry_loss": 1.117846965789795, "eval_chemistry_mean_token_accuracy": 0.721014030456543, "eval_chemistry_num_tokens": 11370320.0, "eval_chemistry_runtime": 26.9193, "eval_chemistry_samples_per_second": 18.574, "eval_chemistry_steps_per_second": 4.644, "step": 400 }, { "epoch": 0.64, "eval_math_entropy": 0.9476065578460693, "eval_math_loss": 0.9578045606613159, "eval_math_mean_token_accuracy": 0.760874231338501, "eval_math_num_tokens": 11370320.0, "eval_math_runtime": 27.5198, "eval_math_samples_per_second": 18.169, "eval_math_steps_per_second": 4.542, "step": 400 }, { "epoch": 0.64, "eval_cyber_entropy": 2.8283654510974885, "eval_cyber_loss": 3.0569260120391846, "eval_cyber_mean_token_accuracy": 0.4431245893239975, "eval_cyber_num_tokens": 11370320.0, "eval_cyber_runtime": 26.384, "eval_cyber_samples_per_second": 15.047, "eval_cyber_steps_per_second": 3.79, "step": 400 }, { "entropy": 0.9207184508442878, "epoch": 0.656, "grad_norm": 44.5, "learning_rate": 8.18e-06, "loss": 14.748, "mean_token_accuracy": 0.7685097701847553, "num_tokens": 11657835.0, "step": 410 }, { "entropy": 0.9384444292634726, "epoch": 0.672, "grad_norm": 44.5, "learning_rate": 8.380000000000001e-06, "loss": 15.0013, "mean_token_accuracy": 0.7654238797724247, "num_tokens": 11949262.0, "step": 420 }, { "entropy": 0.9148579228669405, "epoch": 0.688, "grad_norm": 35.75, "learning_rate": 8.580000000000001e-06, "loss": 14.6923, "mean_token_accuracy": 0.7695376992225647, "num_tokens": 12227640.0, "step": 430 }, { "entropy": 0.9156919397413731, "epoch": 0.704, "grad_norm": 36.5, "learning_rate": 8.78e-06, "loss": 14.6672, "mean_token_accuracy": 0.7705338027328252, "num_tokens": 12516641.0, "step": 440 }, { "entropy": 0.9278485044836998, "epoch": 0.72, "grad_norm": 42.5, "learning_rate": 8.98e-06, "loss": 14.8435, "mean_token_accuracy": 0.7673114899545908, "num_tokens": 12793343.0, "step": 450 }, { "entropy": 0.9052219696342945, "epoch": 0.736, "grad_norm": 41.0, "learning_rate": 9.180000000000002e-06, "loss": 14.5171, "mean_token_accuracy": 0.7729556966573, "num_tokens": 13077981.0, "step": 460 }, { "entropy": 0.8888643320649863, "epoch": 0.752, "grad_norm": 41.25, "learning_rate": 9.38e-06, "loss": 14.1497, "mean_token_accuracy": 0.776068452000618, "num_tokens": 13358957.0, "step": 470 }, { "entropy": 0.8620530396699906, "epoch": 0.768, "grad_norm": 42.5, "learning_rate": 9.58e-06, "loss": 13.834, "mean_token_accuracy": 0.7819891981780529, "num_tokens": 13653412.0, "step": 480 }, { "entropy": 0.9176285572350025, "epoch": 0.784, "grad_norm": 39.0, "learning_rate": 9.780000000000001e-06, "loss": 14.6357, "mean_token_accuracy": 0.7709478087723255, "num_tokens": 13940856.0, "step": 490 }, { "entropy": 0.9128546692430973, "epoch": 0.8, "grad_norm": 40.25, "learning_rate": 9.980000000000001e-06, "loss": 14.5874, "mean_token_accuracy": 0.7713334109634161, "num_tokens": 14230754.0, "step": 500 }, { "epoch": 0.8, "eval_biology_entropy": 1.4446520280838013, "eval_biology_loss": 1.392669439315796, "eval_biology_mean_token_accuracy": 0.6637401723861694, "eval_biology_num_tokens": 14230754.0, "eval_biology_runtime": 22.2391, "eval_biology_samples_per_second": 22.483, "eval_biology_steps_per_second": 5.621, "step": 500 }, { "epoch": 0.8, "eval_chemistry_entropy": 1.1566239352226257, "eval_chemistry_loss": 1.1047961711883545, "eval_chemistry_mean_token_accuracy": 0.7233014287948608, "eval_chemistry_num_tokens": 14230754.0, "eval_chemistry_runtime": 26.8042, "eval_chemistry_samples_per_second": 18.654, "eval_chemistry_steps_per_second": 4.663, "step": 500 }, { "epoch": 0.8, "eval_math_entropy": 0.9341866765022278, "eval_math_loss": 0.9057817459106445, "eval_math_mean_token_accuracy": 0.7704657621383667, "eval_math_num_tokens": 14230754.0, "eval_math_runtime": 27.5093, "eval_math_samples_per_second": 18.176, "eval_math_steps_per_second": 4.544, "step": 500 }, { "epoch": 0.8, "eval_cyber_entropy": 2.902767553329468, "eval_cyber_loss": 2.947915554046631, "eval_cyber_mean_token_accuracy": 0.45555895671248436, "eval_cyber_num_tokens": 14230754.0, "eval_cyber_runtime": 26.1612, "eval_cyber_samples_per_second": 15.175, "eval_cyber_steps_per_second": 3.822, "step": 500 }, { "entropy": 0.8841698631644249, "epoch": 0.816, "grad_norm": 38.0, "learning_rate": 1.018e-05, "loss": 14.1477, "mean_token_accuracy": 0.7752781912684441, "num_tokens": 14519893.0, "step": 510 }, { "entropy": 0.875461632013321, "epoch": 0.832, "grad_norm": 39.0, "learning_rate": 1.038e-05, "loss": 14.0361, "mean_token_accuracy": 0.7779335591942071, "num_tokens": 14805088.0, "step": 520 }, { "entropy": 0.899658627063036, "epoch": 0.848, "grad_norm": 37.25, "learning_rate": 1.0580000000000002e-05, "loss": 14.3925, "mean_token_accuracy": 0.7728543490171432, "num_tokens": 15086306.0, "step": 530 }, { "entropy": 0.8889949310570955, "epoch": 0.864, "grad_norm": 41.0, "learning_rate": 1.0780000000000002e-05, "loss": 14.1314, "mean_token_accuracy": 0.7759746141731739, "num_tokens": 15370985.0, "step": 540 }, { "entropy": 0.8936371214687824, "epoch": 0.88, "grad_norm": 36.5, "learning_rate": 1.0980000000000002e-05, "loss": 14.2807, "mean_token_accuracy": 0.7754444174468518, "num_tokens": 15653836.0, "step": 550 }, { "entropy": 0.8719608142971993, "epoch": 0.896, "grad_norm": 34.75, "learning_rate": 1.1180000000000001e-05, "loss": 13.9767, "mean_token_accuracy": 0.7785432428121567, "num_tokens": 15932179.0, "step": 560 }, { "entropy": 0.8601628458127379, "epoch": 0.912, "grad_norm": 35.25, "learning_rate": 1.138e-05, "loss": 13.7333, "mean_token_accuracy": 0.77998266518116, "num_tokens": 16219842.0, "step": 570 }, { "entropy": 0.848052042350173, "epoch": 0.928, "grad_norm": 34.25, "learning_rate": 1.1580000000000001e-05, "loss": 13.5598, "mean_token_accuracy": 0.783201026916504, "num_tokens": 16499842.0, "step": 580 }, { "entropy": 0.8647568510845304, "epoch": 0.944, "grad_norm": 98.5, "learning_rate": 1.178e-05, "loss": 13.9513, "mean_token_accuracy": 0.7791629247367382, "num_tokens": 16781882.0, "step": 590 }, { "entropy": 0.8681454580277205, "epoch": 0.96, "grad_norm": 28.375, "learning_rate": 1.198e-05, "loss": 13.8619, "mean_token_accuracy": 0.7806598395109177, "num_tokens": 17067407.0, "step": 600 }, { "epoch": 0.96, "eval_biology_entropy": 1.403841501235962, "eval_biology_loss": 1.3980714082717896, "eval_biology_mean_token_accuracy": 0.6626365647315979, "eval_biology_num_tokens": 17067407.0, "eval_biology_runtime": 22.2675, "eval_biology_samples_per_second": 22.454, "eval_biology_steps_per_second": 5.614, "step": 600 }, { "epoch": 0.96, "eval_chemistry_entropy": 1.099705493927002, "eval_chemistry_loss": 1.0918222665786743, "eval_chemistry_mean_token_accuracy": 0.7260931057929992, "eval_chemistry_num_tokens": 17067407.0, "eval_chemistry_runtime": 26.9078, "eval_chemistry_samples_per_second": 18.582, "eval_chemistry_steps_per_second": 4.645, "step": 600 }, { "epoch": 0.96, "eval_math_entropy": 0.8664147562980652, "eval_math_loss": 0.8665754795074463, "eval_math_mean_token_accuracy": 0.7780344748497009, "eval_math_num_tokens": 17067407.0, "eval_math_runtime": 27.5569, "eval_math_samples_per_second": 18.144, "eval_math_steps_per_second": 4.536, "step": 600 }, { "epoch": 0.96, "eval_cyber_entropy": 2.6499598491191865, "eval_cyber_loss": 2.8601412773132324, "eval_cyber_mean_token_accuracy": 0.4642623996734619, "eval_cyber_num_tokens": 17067407.0, "eval_cyber_runtime": 26.1765, "eval_cyber_samples_per_second": 15.166, "eval_cyber_steps_per_second": 3.82, "step": 600 }, { "entropy": 0.8494564741849899, "epoch": 0.976, "grad_norm": 40.25, "learning_rate": 1.218e-05, "loss": 13.6376, "mean_token_accuracy": 0.7825958080589771, "num_tokens": 17350994.0, "step": 610 }, { "entropy": 0.8730685204267502, "epoch": 0.992, "grad_norm": 38.75, "learning_rate": 1.2380000000000002e-05, "loss": 13.8595, "mean_token_accuracy": 0.7786977473646403, "num_tokens": 17637514.0, "step": 620 }, { "entropy": 0.8468878531828523, "epoch": 1.008, "grad_norm": 32.5, "learning_rate": 1.2580000000000002e-05, "loss": 13.534, "mean_token_accuracy": 0.7822502862662077, "num_tokens": 17926570.0, "step": 630 }, { "entropy": 0.8290399981662631, "epoch": 1.024, "grad_norm": 29.375, "learning_rate": 1.2780000000000001e-05, "loss": 13.2779, "mean_token_accuracy": 0.7864516761153937, "num_tokens": 18207652.0, "step": 640 }, { "entropy": 0.8298395985737443, "epoch": 1.04, "grad_norm": 31.0, "learning_rate": 1.2980000000000001e-05, "loss": 13.1281, "mean_token_accuracy": 0.7878676626831294, "num_tokens": 18484931.0, "step": 650 }, { "entropy": 0.8254991695284843, "epoch": 1.056, "grad_norm": 49.0, "learning_rate": 1.3180000000000001e-05, "loss": 13.2747, "mean_token_accuracy": 0.7866876818239689, "num_tokens": 18773457.0, "step": 660 }, { "entropy": 0.8410865612328052, "epoch": 1.072, "grad_norm": 38.75, "learning_rate": 1.3380000000000002e-05, "loss": 13.3101, "mean_token_accuracy": 0.7853217396885157, "num_tokens": 19055365.0, "step": 670 }, { "entropy": 0.8255538143217563, "epoch": 1.088, "grad_norm": 28.875, "learning_rate": 1.3580000000000002e-05, "loss": 13.2174, "mean_token_accuracy": 0.7872007485479117, "num_tokens": 19345730.0, "step": 680 }, { "entropy": 0.823124579153955, "epoch": 1.104, "grad_norm": 29.375, "learning_rate": 1.378e-05, "loss": 13.1696, "mean_token_accuracy": 0.7877223126590251, "num_tokens": 19637390.0, "step": 690 }, { "entropy": 0.8028364922851324, "epoch": 1.12, "grad_norm": 34.5, "learning_rate": 1.398e-05, "loss": 12.7597, "mean_token_accuracy": 0.792793495580554, "num_tokens": 19923914.0, "step": 700 }, { "epoch": 1.12, "eval_biology_entropy": 1.3986766724586486, "eval_biology_loss": 1.407199740409851, "eval_biology_mean_token_accuracy": 0.6613863172531128, "eval_biology_num_tokens": 19923914.0, "eval_biology_runtime": 21.9946, "eval_biology_samples_per_second": 22.733, "eval_biology_steps_per_second": 5.683, "step": 700 }, { "epoch": 1.12, "eval_chemistry_entropy": 1.0769947800636293, "eval_chemistry_loss": 1.0871174335479736, "eval_chemistry_mean_token_accuracy": 0.7282235732078552, "eval_chemistry_num_tokens": 19923914.0, "eval_chemistry_runtime": 26.8846, "eval_chemistry_samples_per_second": 18.598, "eval_chemistry_steps_per_second": 4.65, "step": 700 }, { "epoch": 1.12, "eval_math_entropy": 0.8362808737754822, "eval_math_loss": 0.8373622894287109, "eval_math_mean_token_accuracy": 0.7839989976882935, "eval_math_num_tokens": 19923914.0, "eval_math_runtime": 27.4992, "eval_math_samples_per_second": 18.182, "eval_math_steps_per_second": 4.546, "step": 700 }, { "epoch": 1.12, "eval_cyber_entropy": 2.5681421542167664, "eval_cyber_loss": 2.8721120357513428, "eval_cyber_mean_token_accuracy": 0.4651792038977146, "eval_cyber_num_tokens": 19923914.0, "eval_cyber_runtime": 26.1144, "eval_cyber_samples_per_second": 15.202, "eval_cyber_steps_per_second": 3.829, "step": 700 }, { "entropy": 0.8095206459984183, "epoch": 1.1360000000000001, "grad_norm": 38.5, "learning_rate": 1.418e-05, "loss": 12.8823, "mean_token_accuracy": 0.7904491990804672, "num_tokens": 20201892.0, "step": 710 }, { "entropy": 0.8196006739512086, "epoch": 1.152, "grad_norm": 30.75, "learning_rate": 1.4380000000000001e-05, "loss": 13.0652, "mean_token_accuracy": 0.7910903133451939, "num_tokens": 20490282.0, "step": 720 }, { "entropy": 0.8046272564679384, "epoch": 1.168, "grad_norm": 29.5, "learning_rate": 1.4580000000000001e-05, "loss": 12.8751, "mean_token_accuracy": 0.7915120176970959, "num_tokens": 20785786.0, "step": 730 }, { "entropy": 0.8037027461454272, "epoch": 1.184, "grad_norm": 30.5, "learning_rate": 1.478e-05, "loss": 12.8554, "mean_token_accuracy": 0.7912269696593285, "num_tokens": 21074205.0, "step": 740 }, { "entropy": 0.79942841604352, "epoch": 1.2, "grad_norm": 30.75, "learning_rate": 1.498e-05, "loss": 12.7343, "mean_token_accuracy": 0.7923291265964508, "num_tokens": 21369159.0, "step": 750 }, { "entropy": 0.807464637234807, "epoch": 1.216, "grad_norm": 35.0, "learning_rate": 1.5180000000000002e-05, "loss": 12.8367, "mean_token_accuracy": 0.7913754984736443, "num_tokens": 21649178.0, "step": 760 }, { "entropy": 0.7876615423709155, "epoch": 1.232, "grad_norm": 30.25, "learning_rate": 1.5380000000000002e-05, "loss": 12.556, "mean_token_accuracy": 0.7947213523089885, "num_tokens": 21930239.0, "step": 770 }, { "entropy": 0.7889078231528401, "epoch": 1.248, "grad_norm": 29.75, "learning_rate": 1.5580000000000003e-05, "loss": 12.5585, "mean_token_accuracy": 0.7938403252512216, "num_tokens": 22216387.0, "step": 780 }, { "entropy": 0.8203166201710701, "epoch": 1.264, "grad_norm": 30.0, "learning_rate": 1.578e-05, "loss": 13.0401, "mean_token_accuracy": 0.7889407943934202, "num_tokens": 22501002.0, "step": 790 }, { "entropy": 0.7915117274969816, "epoch": 1.28, "grad_norm": 33.0, "learning_rate": 1.5980000000000003e-05, "loss": 12.6967, "mean_token_accuracy": 0.7933882053941488, "num_tokens": 22779682.0, "step": 800 }, { "epoch": 1.28, "eval_biology_entropy": 1.4146036610603332, "eval_biology_loss": 1.413214921951294, "eval_biology_mean_token_accuracy": 0.659837914943695, "eval_biology_num_tokens": 22779682.0, "eval_biology_runtime": 22.0253, "eval_biology_samples_per_second": 22.701, "eval_biology_steps_per_second": 5.675, "step": 800 }, { "epoch": 1.28, "eval_chemistry_entropy": 1.0714722080230712, "eval_chemistry_loss": 1.0812031030654907, "eval_chemistry_mean_token_accuracy": 0.7291303877830505, "eval_chemistry_num_tokens": 22779682.0, "eval_chemistry_runtime": 26.8892, "eval_chemistry_samples_per_second": 18.595, "eval_chemistry_steps_per_second": 4.649, "step": 800 }, { "epoch": 1.28, "eval_math_entropy": 0.8192751173973083, "eval_math_loss": 0.8122938275337219, "eval_math_mean_token_accuracy": 0.789606306552887, "eval_math_num_tokens": 22779682.0, "eval_math_runtime": 27.5274, "eval_math_samples_per_second": 18.164, "eval_math_steps_per_second": 4.541, "step": 800 }, { "epoch": 1.28, "eval_cyber_entropy": 2.5670096004009246, "eval_cyber_loss": 2.8539652824401855, "eval_cyber_mean_token_accuracy": 0.46533648878335954, "eval_cyber_num_tokens": 22779682.0, "eval_cyber_runtime": 26.1723, "eval_cyber_samples_per_second": 15.169, "eval_cyber_steps_per_second": 3.821, "step": 800 }, { "entropy": 0.786592660844326, "epoch": 1.296, "grad_norm": 35.5, "learning_rate": 1.618e-05, "loss": 12.5036, "mean_token_accuracy": 0.7954838387668133, "num_tokens": 23057744.0, "step": 810 }, { "entropy": 0.7977361943572759, "epoch": 1.312, "grad_norm": 34.5, "learning_rate": 1.638e-05, "loss": 12.7511, "mean_token_accuracy": 0.7935790359973908, "num_tokens": 23344644.0, "step": 820 }, { "entropy": 0.8038571482524276, "epoch": 1.328, "grad_norm": 38.75, "learning_rate": 1.658e-05, "loss": 12.847, "mean_token_accuracy": 0.7909597154706717, "num_tokens": 23622405.0, "step": 830 }, { "entropy": 0.781531005539, "epoch": 1.3439999999999999, "grad_norm": 28.75, "learning_rate": 1.6780000000000002e-05, "loss": 12.4551, "mean_token_accuracy": 0.795590429380536, "num_tokens": 23899771.0, "step": 840 }, { "entropy": 0.7783096175640821, "epoch": 1.3599999999999999, "grad_norm": 27.0, "learning_rate": 1.698e-05, "loss": 12.4462, "mean_token_accuracy": 0.7967745348811149, "num_tokens": 24187023.0, "step": 850 }, { "entropy": 0.8302321504801512, "epoch": 1.376, "grad_norm": 30.625, "learning_rate": 1.718e-05, "loss": 13.2594, "mean_token_accuracy": 0.7850385505706072, "num_tokens": 24466132.0, "step": 860 }, { "entropy": 0.7808034917339682, "epoch": 1.392, "grad_norm": 34.5, "learning_rate": 1.7380000000000003e-05, "loss": 12.4747, "mean_token_accuracy": 0.7949298892170191, "num_tokens": 24748043.0, "step": 870 }, { "entropy": 0.7715026669204235, "epoch": 1.408, "grad_norm": 36.0, "learning_rate": 1.758e-05, "loss": 12.3399, "mean_token_accuracy": 0.7984749253839254, "num_tokens": 25036674.0, "step": 880 }, { "entropy": 0.7645593881607056, "epoch": 1.424, "grad_norm": 27.25, "learning_rate": 1.7780000000000003e-05, "loss": 12.1973, "mean_token_accuracy": 0.7993213057518005, "num_tokens": 25324579.0, "step": 890 }, { "entropy": 0.7820997565984726, "epoch": 1.44, "grad_norm": 33.0, "learning_rate": 1.798e-05, "loss": 12.5051, "mean_token_accuracy": 0.7951443370431661, "num_tokens": 25606824.0, "step": 900 }, { "epoch": 1.44, "eval_biology_entropy": 1.3808940649032593, "eval_biology_loss": 1.4220765829086304, "eval_biology_mean_token_accuracy": 0.6588086094856263, "eval_biology_num_tokens": 25606824.0, "eval_biology_runtime": 22.0118, "eval_biology_samples_per_second": 22.715, "eval_biology_steps_per_second": 5.679, "step": 900 }, { "epoch": 1.44, "eval_chemistry_entropy": 1.0482762174606324, "eval_chemistry_loss": 1.073889136314392, "eval_chemistry_mean_token_accuracy": 0.7307762913703918, "eval_chemistry_num_tokens": 25606824.0, "eval_chemistry_runtime": 26.8657, "eval_chemistry_samples_per_second": 18.611, "eval_chemistry_steps_per_second": 4.653, "step": 900 }, { "epoch": 1.44, "eval_math_entropy": 0.7846709032058716, "eval_math_loss": 0.7932249903678894, "eval_math_mean_token_accuracy": 0.792679114818573, "eval_math_num_tokens": 25606824.0, "eval_math_runtime": 27.514, "eval_math_samples_per_second": 18.173, "eval_math_steps_per_second": 4.543, "step": 900 }, { "epoch": 1.44, "eval_cyber_entropy": 2.5483840811252594, "eval_cyber_loss": 2.8718831539154053, "eval_cyber_mean_token_accuracy": 0.4638554835319519, "eval_cyber_num_tokens": 25606824.0, "eval_cyber_runtime": 26.216, "eval_cyber_samples_per_second": 15.143, "eval_cyber_steps_per_second": 3.814, "step": 900 }, { "entropy": 0.7723285494372248, "epoch": 1.456, "grad_norm": 28.625, "learning_rate": 1.8180000000000002e-05, "loss": 12.303, "mean_token_accuracy": 0.7964108034968376, "num_tokens": 25886396.0, "step": 910 }, { "entropy": 0.7762986140325665, "epoch": 1.472, "grad_norm": 28.875, "learning_rate": 1.8380000000000004e-05, "loss": 12.4134, "mean_token_accuracy": 0.7955747056752444, "num_tokens": 26163618.0, "step": 920 }, { "entropy": 0.7938198037445545, "epoch": 1.488, "grad_norm": 29.625, "learning_rate": 1.858e-05, "loss": 12.75, "mean_token_accuracy": 0.7917455974966288, "num_tokens": 26438338.0, "step": 930 }, { "entropy": 0.7594615155830979, "epoch": 1.504, "grad_norm": 34.0, "learning_rate": 1.878e-05, "loss": 12.14, "mean_token_accuracy": 0.8009778898209333, "num_tokens": 26729255.0, "step": 940 }, { "entropy": 0.7861603863537312, "epoch": 1.52, "grad_norm": 27.125, "learning_rate": 1.898e-05, "loss": 12.4626, "mean_token_accuracy": 0.7956234533339739, "num_tokens": 27017935.0, "step": 950 }, { "entropy": 0.7631909586489201, "epoch": 1.536, "grad_norm": 24.625, "learning_rate": 1.918e-05, "loss": 12.1955, "mean_token_accuracy": 0.7989141892641782, "num_tokens": 27306339.0, "step": 960 }, { "entropy": 0.7708934009075165, "epoch": 1.552, "grad_norm": 27.75, "learning_rate": 1.938e-05, "loss": 12.1963, "mean_token_accuracy": 0.7984277427196502, "num_tokens": 27591959.0, "step": 970 }, { "entropy": 0.7459486592561007, "epoch": 1.568, "grad_norm": 28.625, "learning_rate": 1.9580000000000002e-05, "loss": 11.9228, "mean_token_accuracy": 0.8039638720452785, "num_tokens": 27884398.0, "step": 980 }, { "entropy": 0.7573445823043585, "epoch": 1.584, "grad_norm": 27.0, "learning_rate": 1.978e-05, "loss": 12.0883, "mean_token_accuracy": 0.8011246718466282, "num_tokens": 28171274.0, "step": 990 }, { "entropy": 0.7612122105434537, "epoch": 1.6, "grad_norm": 27.5, "learning_rate": 1.9980000000000002e-05, "loss": 12.0981, "mean_token_accuracy": 0.7986274570226669, "num_tokens": 28457624.0, "step": 1000 }, { "epoch": 1.6, "eval_biology_entropy": 1.3829385170936583, "eval_biology_loss": 1.4260554313659668, "eval_biology_mean_token_accuracy": 0.6589009766578674, "eval_biology_num_tokens": 28457624.0, "eval_biology_runtime": 22.024, "eval_biology_samples_per_second": 22.703, "eval_biology_steps_per_second": 5.676, "step": 1000 }, { "epoch": 1.6, "eval_chemistry_entropy": 1.0377137541770936, "eval_chemistry_loss": 1.0700007677078247, "eval_chemistry_mean_token_accuracy": 0.7318583874702453, "eval_chemistry_num_tokens": 28457624.0, "eval_chemistry_runtime": 26.9114, "eval_chemistry_samples_per_second": 18.579, "eval_chemistry_steps_per_second": 4.645, "step": 1000 }, { "epoch": 1.6, "eval_math_entropy": 0.775481684923172, "eval_math_loss": 0.7736496329307556, "eval_math_mean_token_accuracy": 0.7959285154342651, "eval_math_num_tokens": 28457624.0, "eval_math_runtime": 27.53, "eval_math_samples_per_second": 18.162, "eval_math_steps_per_second": 4.541, "step": 1000 }, { "epoch": 1.6, "eval_cyber_entropy": 2.5157601726055145, "eval_cyber_loss": 2.8350298404693604, "eval_cyber_mean_token_accuracy": 0.4679137858748436, "eval_cyber_num_tokens": 28457624.0, "eval_cyber_runtime": 26.1345, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.826, "step": 1000 }, { "entropy": 0.7650468161329627, "epoch": 1.616, "grad_norm": 24.875, "learning_rate": 1.9980000000000002e-05, "loss": 12.1883, "mean_token_accuracy": 0.798528803884983, "num_tokens": 28743099.0, "step": 1010 }, { "entropy": 0.7713425377383828, "epoch": 1.6320000000000001, "grad_norm": 25.75, "learning_rate": 1.995777777777778e-05, "loss": 12.2948, "mean_token_accuracy": 0.7963836405426263, "num_tokens": 29017297.0, "step": 1020 }, { "entropy": 0.7499153949320316, "epoch": 1.6480000000000001, "grad_norm": 25.75, "learning_rate": 1.9935555555555557e-05, "loss": 11.9856, "mean_token_accuracy": 0.803160610422492, "num_tokens": 29303707.0, "step": 1030 }, { "entropy": 0.7566261947154999, "epoch": 1.6640000000000001, "grad_norm": 25.0, "learning_rate": 1.9913333333333335e-05, "loss": 12.034, "mean_token_accuracy": 0.7999875675886869, "num_tokens": 29597156.0, "step": 1040 }, { "entropy": 0.7669804213568568, "epoch": 1.6800000000000002, "grad_norm": 24.25, "learning_rate": 1.9891111111111112e-05, "loss": 12.2025, "mean_token_accuracy": 0.7990686308592558, "num_tokens": 29883879.0, "step": 1050 }, { "entropy": 0.7553620956838131, "epoch": 1.696, "grad_norm": 28.125, "learning_rate": 1.986888888888889e-05, "loss": 12.1827, "mean_token_accuracy": 0.8000101692974567, "num_tokens": 30165760.0, "step": 1060 }, { "entropy": 0.7463795414194465, "epoch": 1.712, "grad_norm": 22.375, "learning_rate": 1.9846666666666668e-05, "loss": 11.9561, "mean_token_accuracy": 0.8028988271951676, "num_tokens": 30460367.0, "step": 1070 }, { "entropy": 0.7401833109557628, "epoch": 1.728, "grad_norm": 27.375, "learning_rate": 1.9824444444444445e-05, "loss": 11.7133, "mean_token_accuracy": 0.8052776392549277, "num_tokens": 30739137.0, "step": 1080 }, { "entropy": 0.7436290748417378, "epoch": 1.744, "grad_norm": 27.375, "learning_rate": 1.9802222222222226e-05, "loss": 11.8806, "mean_token_accuracy": 0.8036583166569471, "num_tokens": 31022663.0, "step": 1090 }, { "entropy": 0.7478637570515275, "epoch": 1.76, "grad_norm": 25.25, "learning_rate": 1.978e-05, "loss": 11.9202, "mean_token_accuracy": 0.8017146904021502, "num_tokens": 31306494.0, "step": 1100 }, { "epoch": 1.76, "eval_biology_entropy": 1.3845259475708007, "eval_biology_loss": 1.4283864498138428, "eval_biology_mean_token_accuracy": 0.657891107082367, "eval_biology_num_tokens": 31306494.0, "eval_biology_runtime": 21.9927, "eval_biology_samples_per_second": 22.735, "eval_biology_steps_per_second": 5.684, "step": 1100 }, { "epoch": 1.76, "eval_chemistry_entropy": 1.0231492972373963, "eval_chemistry_loss": 1.063183307647705, "eval_chemistry_mean_token_accuracy": 0.7330445971488952, "eval_chemistry_num_tokens": 31306494.0, "eval_chemistry_runtime": 26.8519, "eval_chemistry_samples_per_second": 18.621, "eval_chemistry_steps_per_second": 4.655, "step": 1100 }, { "epoch": 1.76, "eval_math_entropy": 0.7524698441028594, "eval_math_loss": 0.7613377571105957, "eval_math_mean_token_accuracy": 0.798446418762207, "eval_math_num_tokens": 31306494.0, "eval_math_runtime": 27.5284, "eval_math_samples_per_second": 18.163, "eval_math_steps_per_second": 4.541, "step": 1100 }, { "epoch": 1.76, "eval_cyber_entropy": 2.3844272685050965, "eval_cyber_loss": 2.8584094047546387, "eval_cyber_mean_token_accuracy": 0.47087193533778193, "eval_cyber_num_tokens": 31306494.0, "eval_cyber_runtime": 26.2154, "eval_cyber_samples_per_second": 15.144, "eval_cyber_steps_per_second": 3.815, "step": 1100 }, { "entropy": 0.7506109833717346, "epoch": 1.776, "grad_norm": 22.875, "learning_rate": 1.975777777777778e-05, "loss": 11.957, "mean_token_accuracy": 0.803263409435749, "num_tokens": 31595542.0, "step": 1110 }, { "entropy": 0.7545284632593393, "epoch": 1.792, "grad_norm": 25.25, "learning_rate": 1.9735555555555556e-05, "loss": 12.055, "mean_token_accuracy": 0.8008169520646333, "num_tokens": 31881189.0, "step": 1120 }, { "entropy": 0.7454792723059654, "epoch": 1.808, "grad_norm": 22.625, "learning_rate": 1.9713333333333337e-05, "loss": 11.8818, "mean_token_accuracy": 0.8028201397508383, "num_tokens": 32164196.0, "step": 1130 }, { "entropy": 0.7103133289143443, "epoch": 1.8239999999999998, "grad_norm": 23.625, "learning_rate": 1.969111111111111e-05, "loss": 11.3018, "mean_token_accuracy": 0.8097421944141387, "num_tokens": 32441530.0, "step": 1140 }, { "entropy": 0.7296694969758392, "epoch": 1.8399999999999999, "grad_norm": 23.125, "learning_rate": 1.9668888888888892e-05, "loss": 11.722, "mean_token_accuracy": 0.8063848353922367, "num_tokens": 32723145.0, "step": 1150 }, { "entropy": 0.746064018085599, "epoch": 1.8559999999999999, "grad_norm": 26.375, "learning_rate": 1.9646666666666666e-05, "loss": 11.9198, "mean_token_accuracy": 0.8034628454595805, "num_tokens": 33011263.0, "step": 1160 }, { "entropy": 0.7246854526922106, "epoch": 1.8719999999999999, "grad_norm": 25.125, "learning_rate": 1.9624444444444447e-05, "loss": 11.6702, "mean_token_accuracy": 0.8065225839614868, "num_tokens": 33298921.0, "step": 1170 }, { "entropy": 0.7422073289752007, "epoch": 1.888, "grad_norm": 33.0, "learning_rate": 1.9602222222222225e-05, "loss": 11.8223, "mean_token_accuracy": 0.8029078282415867, "num_tokens": 33576243.0, "step": 1180 }, { "entropy": 0.7377389714121818, "epoch": 1.904, "grad_norm": 21.875, "learning_rate": 1.9580000000000002e-05, "loss": 11.7626, "mean_token_accuracy": 0.803905576467514, "num_tokens": 33850968.0, "step": 1190 }, { "entropy": 0.720432554371655, "epoch": 1.92, "grad_norm": 24.125, "learning_rate": 1.955777777777778e-05, "loss": 11.4648, "mean_token_accuracy": 0.8074128460139036, "num_tokens": 34128558.0, "step": 1200 }, { "epoch": 1.92, "eval_biology_entropy": 1.426067009449005, "eval_biology_loss": 1.4298174381256104, "eval_biology_mean_token_accuracy": 0.6572316522598267, "eval_biology_num_tokens": 34128558.0, "eval_biology_runtime": 21.9751, "eval_biology_samples_per_second": 22.753, "eval_biology_steps_per_second": 5.688, "step": 1200 }, { "epoch": 1.92, "eval_chemistry_entropy": 1.052424753189087, "eval_chemistry_loss": 1.058487057685852, "eval_chemistry_mean_token_accuracy": 0.7344707479476928, "eval_chemistry_num_tokens": 34128558.0, "eval_chemistry_runtime": 26.8451, "eval_chemistry_samples_per_second": 18.625, "eval_chemistry_steps_per_second": 4.656, "step": 1200 }, { "epoch": 1.92, "eval_math_entropy": 0.7641568143367767, "eval_math_loss": 0.7467027306556702, "eval_math_mean_token_accuracy": 0.8014610476493835, "eval_math_num_tokens": 34128558.0, "eval_math_runtime": 27.5216, "eval_math_samples_per_second": 18.168, "eval_math_steps_per_second": 4.542, "step": 1200 }, { "epoch": 1.92, "eval_cyber_entropy": 2.4904652881622313, "eval_cyber_loss": 2.6954379081726074, "eval_cyber_mean_token_accuracy": 0.4830169627070427, "eval_cyber_num_tokens": 34128558.0, "eval_cyber_runtime": 26.1923, "eval_cyber_samples_per_second": 15.157, "eval_cyber_steps_per_second": 3.818, "step": 1200 }, { "entropy": 0.731533533334732, "epoch": 1.936, "grad_norm": 25.125, "learning_rate": 1.9535555555555557e-05, "loss": 11.6804, "mean_token_accuracy": 0.8052534744143486, "num_tokens": 34408056.0, "step": 1210 }, { "entropy": 0.7303263584151864, "epoch": 1.952, "grad_norm": 23.375, "learning_rate": 1.9513333333333335e-05, "loss": 11.5676, "mean_token_accuracy": 0.8064503286033868, "num_tokens": 34684679.0, "step": 1220 }, { "entropy": 0.7569911142811179, "epoch": 1.968, "grad_norm": 25.375, "learning_rate": 1.9491111111111113e-05, "loss": 12.1009, "mean_token_accuracy": 0.8005167040973902, "num_tokens": 34971038.0, "step": 1230 }, { "entropy": 0.7218442076817155, "epoch": 1.984, "grad_norm": 23.375, "learning_rate": 1.946888888888889e-05, "loss": 11.5014, "mean_token_accuracy": 0.808349072188139, "num_tokens": 35262281.0, "step": 1240 }, { "entropy": 0.7173755820840597, "epoch": 2.0, "grad_norm": 24.125, "learning_rate": 1.9446666666666668e-05, "loss": 11.4742, "mean_token_accuracy": 0.8086670659482479, "num_tokens": 35560864.0, "step": 1250 }, { "entropy": 0.6936481088399887, "epoch": 2.016, "grad_norm": 23.125, "learning_rate": 1.9424444444444446e-05, "loss": 10.8847, "mean_token_accuracy": 0.8153551481664181, "num_tokens": 35846704.0, "step": 1260 }, { "entropy": 0.6694988587871193, "epoch": 2.032, "grad_norm": 22.5, "learning_rate": 1.9402222222222223e-05, "loss": 10.711, "mean_token_accuracy": 0.816493459790945, "num_tokens": 36128775.0, "step": 1270 }, { "entropy": 0.6576118635013699, "epoch": 2.048, "grad_norm": 23.25, "learning_rate": 1.938e-05, "loss": 10.4997, "mean_token_accuracy": 0.8206516925245524, "num_tokens": 36419504.0, "step": 1280 }, { "entropy": 0.6648308178409934, "epoch": 2.064, "grad_norm": 22.375, "learning_rate": 1.935777777777778e-05, "loss": 10.5449, "mean_token_accuracy": 0.8189927719533443, "num_tokens": 36706816.0, "step": 1290 }, { "entropy": 0.6633218213915825, "epoch": 2.08, "grad_norm": 24.125, "learning_rate": 1.9335555555555556e-05, "loss": 10.5216, "mean_token_accuracy": 0.8182109944522381, "num_tokens": 36988475.0, "step": 1300 }, { "epoch": 2.08, "eval_biology_entropy": 1.240901198387146, "eval_biology_loss": 1.4575155973434448, "eval_biology_mean_token_accuracy": 0.6538263387680053, "eval_biology_num_tokens": 36988475.0, "eval_biology_runtime": 22.0164, "eval_biology_samples_per_second": 22.71, "eval_biology_steps_per_second": 5.678, "step": 1300 }, { "epoch": 2.08, "eval_chemistry_entropy": 0.9047480673789978, "eval_chemistry_loss": 1.077072262763977, "eval_chemistry_mean_token_accuracy": 0.7334306511878967, "eval_chemistry_num_tokens": 36988475.0, "eval_chemistry_runtime": 26.8608, "eval_chemistry_samples_per_second": 18.614, "eval_chemistry_steps_per_second": 4.654, "step": 1300 }, { "epoch": 2.08, "eval_math_entropy": 0.6772888927459717, "eval_math_loss": 0.7423775792121887, "eval_math_mean_token_accuracy": 0.8029003148078918, "eval_math_num_tokens": 36988475.0, "eval_math_runtime": 27.5039, "eval_math_samples_per_second": 18.179, "eval_math_steps_per_second": 4.545, "step": 1300 }, { "epoch": 2.08, "eval_cyber_entropy": 2.240402947664261, "eval_cyber_loss": 2.8395872116088867, "eval_cyber_mean_token_accuracy": 0.4778119161725044, "eval_cyber_num_tokens": 36988475.0, "eval_cyber_runtime": 26.1457, "eval_cyber_samples_per_second": 15.184, "eval_cyber_steps_per_second": 3.825, "step": 1300 }, { "entropy": 0.6708741160109639, "epoch": 2.096, "grad_norm": 22.375, "learning_rate": 1.9313333333333334e-05, "loss": 10.6881, "mean_token_accuracy": 0.8182591505348682, "num_tokens": 37270131.0, "step": 1310 }, { "entropy": 0.6532387970015406, "epoch": 2.112, "grad_norm": 24.0, "learning_rate": 1.9291111111111115e-05, "loss": 10.4792, "mean_token_accuracy": 0.8189583510160446, "num_tokens": 37563537.0, "step": 1320 }, { "entropy": 0.6555240735411644, "epoch": 2.128, "grad_norm": 22.375, "learning_rate": 1.926888888888889e-05, "loss": 10.369, "mean_token_accuracy": 0.8218111298978329, "num_tokens": 37843959.0, "step": 1330 }, { "entropy": 0.6665401035919786, "epoch": 2.144, "grad_norm": 24.25, "learning_rate": 1.924666666666667e-05, "loss": 10.5665, "mean_token_accuracy": 0.8194981347769499, "num_tokens": 38133092.0, "step": 1340 }, { "entropy": 0.6592796456068755, "epoch": 2.16, "grad_norm": 21.5, "learning_rate": 1.9224444444444444e-05, "loss": 10.5062, "mean_token_accuracy": 0.8200013760477305, "num_tokens": 38421229.0, "step": 1350 }, { "entropy": 0.639416103810072, "epoch": 2.176, "grad_norm": 23.0, "learning_rate": 1.9202222222222225e-05, "loss": 10.1779, "mean_token_accuracy": 0.8243647638708353, "num_tokens": 38708043.0, "step": 1360 }, { "entropy": 0.667258214391768, "epoch": 2.192, "grad_norm": 23.375, "learning_rate": 1.918e-05, "loss": 10.6186, "mean_token_accuracy": 0.8171255987137556, "num_tokens": 38996930.0, "step": 1370 }, { "entropy": 0.6627653013914824, "epoch": 2.208, "grad_norm": 23.75, "learning_rate": 1.915777777777778e-05, "loss": 10.6227, "mean_token_accuracy": 0.818722078576684, "num_tokens": 39279481.0, "step": 1380 }, { "entropy": 0.6583162900060415, "epoch": 2.224, "grad_norm": 24.0, "learning_rate": 1.9135555555555555e-05, "loss": 10.4429, "mean_token_accuracy": 0.8204963516443968, "num_tokens": 39569030.0, "step": 1390 }, { "entropy": 0.6620556140318513, "epoch": 2.24, "grad_norm": 23.625, "learning_rate": 1.9113333333333336e-05, "loss": 10.591, "mean_token_accuracy": 0.8184640970081091, "num_tokens": 39854873.0, "step": 1400 }, { "epoch": 2.24, "eval_biology_entropy": 1.1904019894599915, "eval_biology_loss": 1.473069667816162, "eval_biology_mean_token_accuracy": 0.6529810581207275, "eval_biology_num_tokens": 39854873.0, "eval_biology_runtime": 22.0119, "eval_biology_samples_per_second": 22.715, "eval_biology_steps_per_second": 5.679, "step": 1400 }, { "epoch": 2.24, "eval_chemistry_entropy": 0.8868081021308899, "eval_chemistry_loss": 1.0847948789596558, "eval_chemistry_mean_token_accuracy": 0.7322362198829651, "eval_chemistry_num_tokens": 39854873.0, "eval_chemistry_runtime": 27.1533, "eval_chemistry_samples_per_second": 18.414, "eval_chemistry_steps_per_second": 4.603, "step": 1400 }, { "epoch": 2.24, "eval_math_entropy": 0.6937474160194397, "eval_math_loss": 0.7352772951126099, "eval_math_mean_token_accuracy": 0.8037003560066223, "eval_math_num_tokens": 39854873.0, "eval_math_runtime": 27.5567, "eval_math_samples_per_second": 18.144, "eval_math_steps_per_second": 4.536, "step": 1400 }, { "epoch": 2.24, "eval_cyber_entropy": 2.280633035302162, "eval_cyber_loss": 2.848487615585327, "eval_cyber_mean_token_accuracy": 0.46778192803263663, "eval_cyber_num_tokens": 39854873.0, "eval_cyber_runtime": 26.2399, "eval_cyber_samples_per_second": 15.13, "eval_cyber_steps_per_second": 3.811, "step": 1400 }, { "entropy": 0.6573172532021999, "epoch": 2.2560000000000002, "grad_norm": 22.75, "learning_rate": 1.9091111111111113e-05, "loss": 10.4628, "mean_token_accuracy": 0.8198426373302936, "num_tokens": 40141190.0, "step": 1410 }, { "entropy": 0.6748596677556634, "epoch": 2.2720000000000002, "grad_norm": 26.25, "learning_rate": 1.906888888888889e-05, "loss": 10.7759, "mean_token_accuracy": 0.8157621681690216, "num_tokens": 40415203.0, "step": 1420 }, { "entropy": 0.6660139387473464, "epoch": 2.288, "grad_norm": 25.0, "learning_rate": 1.904666666666667e-05, "loss": 10.5722, "mean_token_accuracy": 0.8170963436365127, "num_tokens": 40702393.0, "step": 1430 }, { "entropy": 0.6447202865034342, "epoch": 2.304, "grad_norm": 24.875, "learning_rate": 1.9024444444444446e-05, "loss": 10.2772, "mean_token_accuracy": 0.8228711977601051, "num_tokens": 40982775.0, "step": 1440 }, { "entropy": 0.667175211571157, "epoch": 2.32, "grad_norm": 25.375, "learning_rate": 1.9002222222222224e-05, "loss": 10.6322, "mean_token_accuracy": 0.817449289560318, "num_tokens": 41263356.0, "step": 1450 }, { "entropy": 0.6582919212058187, "epoch": 2.336, "grad_norm": 24.625, "learning_rate": 1.898e-05, "loss": 10.5061, "mean_token_accuracy": 0.8195808235555887, "num_tokens": 41545235.0, "step": 1460 }, { "entropy": 0.683755399286747, "epoch": 2.352, "grad_norm": 24.25, "learning_rate": 1.895777777777778e-05, "loss": 10.8267, "mean_token_accuracy": 0.8143463153392076, "num_tokens": 41833417.0, "step": 1470 }, { "entropy": 0.6577698297798633, "epoch": 2.368, "grad_norm": 24.625, "learning_rate": 1.8935555555555556e-05, "loss": 10.5268, "mean_token_accuracy": 0.8191198598593473, "num_tokens": 42117030.0, "step": 1480 }, { "entropy": 0.6793028621003032, "epoch": 2.384, "grad_norm": 28.25, "learning_rate": 1.8913333333333334e-05, "loss": 10.7829, "mean_token_accuracy": 0.8163190931081772, "num_tokens": 42410990.0, "step": 1490 }, { "entropy": 0.6641744881868362, "epoch": 2.4, "grad_norm": 24.5, "learning_rate": 1.8891111111111115e-05, "loss": 10.5965, "mean_token_accuracy": 0.8189876776188612, "num_tokens": 42691890.0, "step": 1500 }, { "epoch": 2.4, "eval_biology_entropy": 1.2293707489967347, "eval_biology_loss": 1.4714155197143555, "eval_biology_mean_token_accuracy": 0.6514331855773926, "eval_biology_num_tokens": 42691890.0, "eval_biology_runtime": 21.9938, "eval_biology_samples_per_second": 22.734, "eval_biology_steps_per_second": 5.683, "step": 1500 }, { "epoch": 2.4, "eval_chemistry_entropy": 0.9099567327499389, "eval_chemistry_loss": 1.0838559865951538, "eval_chemistry_mean_token_accuracy": 0.7319689979553222, "eval_chemistry_num_tokens": 42691890.0, "eval_chemistry_runtime": 26.8774, "eval_chemistry_samples_per_second": 18.603, "eval_chemistry_steps_per_second": 4.651, "step": 1500 }, { "epoch": 2.4, "eval_math_entropy": 0.6841141791343689, "eval_math_loss": 0.726224958896637, "eval_math_mean_token_accuracy": 0.8050775575637817, "eval_math_num_tokens": 42691890.0, "eval_math_runtime": 27.5383, "eval_math_samples_per_second": 18.157, "eval_math_steps_per_second": 4.539, "step": 1500 }, { "epoch": 2.4, "eval_cyber_entropy": 2.1712605100870133, "eval_cyber_loss": 2.742515802383423, "eval_cyber_mean_token_accuracy": 0.48678219854831695, "eval_cyber_num_tokens": 42691890.0, "eval_cyber_runtime": 26.1725, "eval_cyber_samples_per_second": 15.169, "eval_cyber_steps_per_second": 3.821, "step": 1500 }, { "entropy": 0.681360544078052, "epoch": 2.416, "grad_norm": 23.875, "learning_rate": 1.886888888888889e-05, "loss": 10.8018, "mean_token_accuracy": 0.8153177864849568, "num_tokens": 42971588.0, "step": 1510 }, { "entropy": 0.6504227627068758, "epoch": 2.432, "grad_norm": 23.625, "learning_rate": 1.884666666666667e-05, "loss": 10.4029, "mean_token_accuracy": 0.8209474917501212, "num_tokens": 43253821.0, "step": 1520 }, { "entropy": 0.6501767633482813, "epoch": 2.448, "grad_norm": 24.375, "learning_rate": 1.8824444444444445e-05, "loss": 10.3415, "mean_token_accuracy": 0.8225077040493488, "num_tokens": 43550902.0, "step": 1530 }, { "entropy": 0.6623956672847271, "epoch": 2.464, "grad_norm": 24.125, "learning_rate": 1.8802222222222226e-05, "loss": 10.5246, "mean_token_accuracy": 0.8181775715202093, "num_tokens": 43844259.0, "step": 1540 }, { "entropy": 0.6859173832461238, "epoch": 2.48, "grad_norm": 23.125, "learning_rate": 1.878e-05, "loss": 10.9239, "mean_token_accuracy": 0.8137104224413634, "num_tokens": 44115701.0, "step": 1550 }, { "entropy": 0.6711945479735733, "epoch": 2.496, "grad_norm": 21.875, "learning_rate": 1.875777777777778e-05, "loss": 10.6685, "mean_token_accuracy": 0.8173221621662379, "num_tokens": 44405520.0, "step": 1560 }, { "entropy": 0.6624481493607164, "epoch": 2.512, "grad_norm": 23.375, "learning_rate": 1.873555555555556e-05, "loss": 10.4945, "mean_token_accuracy": 0.8196087624877691, "num_tokens": 44686477.0, "step": 1570 }, { "entropy": 0.6599110793322325, "epoch": 2.528, "grad_norm": 22.75, "learning_rate": 1.8713333333333336e-05, "loss": 10.4873, "mean_token_accuracy": 0.8193403802812099, "num_tokens": 44969760.0, "step": 1580 }, { "entropy": 0.6503490032628179, "epoch": 2.544, "grad_norm": 22.375, "learning_rate": 1.8691111111111114e-05, "loss": 10.4441, "mean_token_accuracy": 0.8206405211240053, "num_tokens": 45255326.0, "step": 1590 }, { "entropy": 0.6536685146391392, "epoch": 2.56, "grad_norm": 22.875, "learning_rate": 1.866888888888889e-05, "loss": 10.4091, "mean_token_accuracy": 0.8202110458165407, "num_tokens": 45532525.0, "step": 1600 }, { "epoch": 2.56, "eval_biology_entropy": 1.2134844155311584, "eval_biology_loss": 1.478155255317688, "eval_biology_mean_token_accuracy": 0.6511134657859802, "eval_biology_num_tokens": 45532525.0, "eval_biology_runtime": 21.9488, "eval_biology_samples_per_second": 22.78, "eval_biology_steps_per_second": 5.695, "step": 1600 }, { "epoch": 2.56, "eval_chemistry_entropy": 0.8961781821250916, "eval_chemistry_loss": 1.0829113721847534, "eval_chemistry_mean_token_accuracy": 0.7327736926078796, "eval_chemistry_num_tokens": 45532525.0, "eval_chemistry_runtime": 26.8437, "eval_chemistry_samples_per_second": 18.626, "eval_chemistry_steps_per_second": 4.657, "step": 1600 }, { "epoch": 2.56, "eval_math_entropy": 0.6742748956680298, "eval_math_loss": 0.7170487642288208, "eval_math_mean_token_accuracy": 0.8080672206878662, "eval_math_num_tokens": 45532525.0, "eval_math_runtime": 27.5073, "eval_math_samples_per_second": 18.177, "eval_math_steps_per_second": 4.544, "step": 1600 }, { "epoch": 2.56, "eval_cyber_entropy": 2.198235506415367, "eval_cyber_loss": 2.8126513957977295, "eval_cyber_mean_token_accuracy": 0.4754572454094887, "eval_cyber_num_tokens": 45532525.0, "eval_cyber_runtime": 26.3904, "eval_cyber_samples_per_second": 15.043, "eval_cyber_steps_per_second": 3.789, "step": 1600 }, { "entropy": 0.659245578199625, "epoch": 2.576, "grad_norm": 23.125, "learning_rate": 1.864666666666667e-05, "loss": 10.4939, "mean_token_accuracy": 0.8194193851202727, "num_tokens": 45817478.0, "step": 1610 }, { "entropy": 0.6425925368443132, "epoch": 2.592, "grad_norm": 22.375, "learning_rate": 1.8624444444444446e-05, "loss": 10.2816, "mean_token_accuracy": 0.8222486432641745, "num_tokens": 46109575.0, "step": 1620 }, { "entropy": 0.6715872915461659, "epoch": 2.608, "grad_norm": 25.25, "learning_rate": 1.8602222222222224e-05, "loss": 10.6569, "mean_token_accuracy": 0.8173692885786294, "num_tokens": 46391461.0, "step": 1630 }, { "entropy": 0.6294447083026171, "epoch": 2.624, "grad_norm": 22.5, "learning_rate": 1.858e-05, "loss": 10.0396, "mean_token_accuracy": 0.8279551289975643, "num_tokens": 46683117.0, "step": 1640 }, { "entropy": 0.6628140497952699, "epoch": 2.64, "grad_norm": 23.375, "learning_rate": 1.855777777777778e-05, "loss": 10.5068, "mean_token_accuracy": 0.8184260647743941, "num_tokens": 46965534.0, "step": 1650 }, { "entropy": 0.6376811485737562, "epoch": 2.656, "grad_norm": 23.0, "learning_rate": 1.8535555555555557e-05, "loss": 10.0941, "mean_token_accuracy": 0.8235533174127341, "num_tokens": 47245852.0, "step": 1660 }, { "entropy": 0.6615891676396132, "epoch": 2.672, "grad_norm": 24.625, "learning_rate": 1.8513333333333335e-05, "loss": 10.5681, "mean_token_accuracy": 0.8179556384682656, "num_tokens": 47524916.0, "step": 1670 }, { "entropy": 0.6560400146991014, "epoch": 2.6879999999999997, "grad_norm": 22.0, "learning_rate": 1.8491111111111112e-05, "loss": 10.4122, "mean_token_accuracy": 0.820205406472087, "num_tokens": 47807131.0, "step": 1680 }, { "entropy": 0.6595821080729365, "epoch": 2.7039999999999997, "grad_norm": 23.875, "learning_rate": 1.846888888888889e-05, "loss": 10.5383, "mean_token_accuracy": 0.8188040845096112, "num_tokens": 48099654.0, "step": 1690 }, { "entropy": 0.6555765904486179, "epoch": 2.7199999999999998, "grad_norm": 24.125, "learning_rate": 1.8446666666666667e-05, "loss": 10.3768, "mean_token_accuracy": 0.8199018821120262, "num_tokens": 48375019.0, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_biology_entropy": 1.1961839065551758, "eval_biology_loss": 1.474403977394104, "eval_biology_mean_token_accuracy": 0.6521351528167725, "eval_biology_num_tokens": 48375019.0, "eval_biology_runtime": 22.2564, "eval_biology_samples_per_second": 22.465, "eval_biology_steps_per_second": 5.616, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_chemistry_entropy": 0.8893180413246154, "eval_chemistry_loss": 1.0786951780319214, "eval_chemistry_mean_token_accuracy": 0.7333630976676941, "eval_chemistry_num_tokens": 48375019.0, "eval_chemistry_runtime": 26.8892, "eval_chemistry_samples_per_second": 18.595, "eval_chemistry_steps_per_second": 4.649, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_math_entropy": 0.6758732006549836, "eval_math_loss": 0.7107370495796204, "eval_math_mean_token_accuracy": 0.8093916850090027, "eval_math_num_tokens": 48375019.0, "eval_math_runtime": 27.5333, "eval_math_samples_per_second": 18.16, "eval_math_steps_per_second": 4.54, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_cyber_entropy": 2.2349812412261962, "eval_cyber_loss": 2.779714822769165, "eval_cyber_mean_token_accuracy": 0.4819659352302551, "eval_cyber_num_tokens": 48375019.0, "eval_cyber_runtime": 26.1417, "eval_cyber_samples_per_second": 15.186, "eval_cyber_steps_per_second": 3.825, "step": 1700 }, { "entropy": 0.6559532942250371, "epoch": 2.7359999999999998, "grad_norm": 24.0, "learning_rate": 1.842444444444445e-05, "loss": 10.4594, "mean_token_accuracy": 0.8196242570877075, "num_tokens": 48659284.0, "step": 1710 }, { "entropy": 0.6569495009258389, "epoch": 2.752, "grad_norm": 22.25, "learning_rate": 1.8402222222222223e-05, "loss": 10.4467, "mean_token_accuracy": 0.8201438017189503, "num_tokens": 48943804.0, "step": 1720 }, { "entropy": 0.6529736818745733, "epoch": 2.768, "grad_norm": 21.875, "learning_rate": 1.8380000000000004e-05, "loss": 10.4356, "mean_token_accuracy": 0.8206765007227659, "num_tokens": 49230939.0, "step": 1730 }, { "entropy": 0.6720895063132047, "epoch": 2.784, "grad_norm": 23.0, "learning_rate": 1.8357777777777778e-05, "loss": 10.6977, "mean_token_accuracy": 0.8159397479146719, "num_tokens": 49504425.0, "step": 1740 }, { "entropy": 0.6508316185325385, "epoch": 2.8, "grad_norm": 23.125, "learning_rate": 1.833555555555556e-05, "loss": 10.351, "mean_token_accuracy": 0.8222934223711491, "num_tokens": 49782661.0, "step": 1750 }, { "entropy": 0.6627168050035834, "epoch": 2.816, "grad_norm": 24.5, "learning_rate": 1.8313333333333333e-05, "loss": 10.445, "mean_token_accuracy": 0.8191927138715982, "num_tokens": 50073632.0, "step": 1760 }, { "entropy": 0.6389238258823753, "epoch": 2.832, "grad_norm": 25.875, "learning_rate": 1.8291111111111114e-05, "loss": 10.2761, "mean_token_accuracy": 0.8222359851002693, "num_tokens": 50356964.0, "step": 1770 }, { "entropy": 0.6667038291692734, "epoch": 2.848, "grad_norm": 21.375, "learning_rate": 1.8268888888888888e-05, "loss": 10.586, "mean_token_accuracy": 0.818210769072175, "num_tokens": 50644535.0, "step": 1780 }, { "entropy": 0.6509249521419406, "epoch": 2.864, "grad_norm": 24.125, "learning_rate": 1.824666666666667e-05, "loss": 10.4338, "mean_token_accuracy": 0.8203214287757874, "num_tokens": 50925653.0, "step": 1790 }, { "entropy": 0.6507569069042802, "epoch": 2.88, "grad_norm": 23.625, "learning_rate": 1.8224444444444447e-05, "loss": 10.285, "mean_token_accuracy": 0.8213449958711863, "num_tokens": 51204374.0, "step": 1800 }, { "epoch": 2.88, "eval_biology_entropy": 1.1794821062088012, "eval_biology_loss": 1.4819698333740234, "eval_biology_mean_token_accuracy": 0.6517698068618775, "eval_biology_num_tokens": 51204374.0, "eval_biology_runtime": 21.9924, "eval_biology_samples_per_second": 22.735, "eval_biology_steps_per_second": 5.684, "step": 1800 }, { "epoch": 2.88, "eval_chemistry_entropy": 0.8722342405319213, "eval_chemistry_loss": 1.0784211158752441, "eval_chemistry_mean_token_accuracy": 0.7344747610092163, "eval_chemistry_num_tokens": 51204374.0, "eval_chemistry_runtime": 26.8909, "eval_chemistry_samples_per_second": 18.594, "eval_chemistry_steps_per_second": 4.648, "step": 1800 }, { "epoch": 2.88, "eval_math_entropy": 0.6604897229671478, "eval_math_loss": 0.7050039768218994, "eval_math_mean_token_accuracy": 0.8106719055175782, "eval_math_num_tokens": 51204374.0, "eval_math_runtime": 27.5351, "eval_math_samples_per_second": 18.159, "eval_math_steps_per_second": 4.54, "step": 1800 }, { "epoch": 2.88, "eval_cyber_entropy": 2.112373055815697, "eval_cyber_loss": 2.8215885162353516, "eval_cyber_mean_token_accuracy": 0.4802186432480812, "eval_cyber_num_tokens": 51204374.0, "eval_cyber_runtime": 26.1331, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.827, "step": 1800 }, { "entropy": 0.6363504879176617, "epoch": 2.896, "grad_norm": 23.75, "learning_rate": 1.8202222222222225e-05, "loss": 10.224, "mean_token_accuracy": 0.8235703807324171, "num_tokens": 51483944.0, "step": 1810 }, { "entropy": 0.6513262124732137, "epoch": 2.912, "grad_norm": 23.0, "learning_rate": 1.8180000000000002e-05, "loss": 10.3132, "mean_token_accuracy": 0.8217961758375167, "num_tokens": 51765755.0, "step": 1820 }, { "entropy": 0.6529291735962033, "epoch": 2.928, "grad_norm": 23.25, "learning_rate": 1.815777777777778e-05, "loss": 10.3766, "mean_token_accuracy": 0.8222961116582155, "num_tokens": 52056379.0, "step": 1830 }, { "entropy": 0.6346785051748156, "epoch": 2.944, "grad_norm": 23.75, "learning_rate": 1.8135555555555557e-05, "loss": 10.14, "mean_token_accuracy": 0.8229989748448133, "num_tokens": 52346232.0, "step": 1840 }, { "entropy": 0.6526656987145543, "epoch": 2.96, "grad_norm": 21.75, "learning_rate": 1.8113333333333335e-05, "loss": 10.38, "mean_token_accuracy": 0.8202067915350199, "num_tokens": 52633789.0, "step": 1850 }, { "entropy": 0.6502787992358208, "epoch": 2.976, "grad_norm": 21.625, "learning_rate": 1.8091111111111113e-05, "loss": 10.2748, "mean_token_accuracy": 0.8214363507926464, "num_tokens": 52911755.0, "step": 1860 }, { "entropy": 0.6417823160067201, "epoch": 2.992, "grad_norm": 22.875, "learning_rate": 1.806888888888889e-05, "loss": 10.2309, "mean_token_accuracy": 0.8228288643062115, "num_tokens": 53198176.0, "step": 1870 }, { "entropy": 0.6243049314245581, "epoch": 3.008, "grad_norm": 26.25, "learning_rate": 1.8046666666666668e-05, "loss": 9.7241, "mean_token_accuracy": 0.8291641604155302, "num_tokens": 53481893.0, "step": 1880 }, { "entropy": 0.5638323642313481, "epoch": 3.024, "grad_norm": 27.125, "learning_rate": 1.8024444444444445e-05, "loss": 8.9824, "mean_token_accuracy": 0.8390695653855801, "num_tokens": 53771717.0, "step": 1890 }, { "entropy": 0.558561889640987, "epoch": 3.04, "grad_norm": 24.125, "learning_rate": 1.8002222222222223e-05, "loss": 8.9038, "mean_token_accuracy": 0.8408384509384632, "num_tokens": 54058045.0, "step": 1900 }, { "epoch": 3.04, "eval_biology_entropy": 1.0243187880516051, "eval_biology_loss": 1.581600546836853, "eval_biology_mean_token_accuracy": 0.6445312175750733, "eval_biology_num_tokens": 54058045.0, "eval_biology_runtime": 21.9531, "eval_biology_samples_per_second": 22.776, "eval_biology_steps_per_second": 5.694, "step": 1900 }, { "epoch": 3.04, "eval_chemistry_entropy": 0.7532488117218018, "eval_chemistry_loss": 1.1571515798568726, "eval_chemistry_mean_token_accuracy": 0.7285859537124634, "eval_chemistry_num_tokens": 54058045.0, "eval_chemistry_runtime": 26.8231, "eval_chemistry_samples_per_second": 18.641, "eval_chemistry_steps_per_second": 4.66, "step": 1900 }, { "epoch": 3.04, "eval_math_entropy": 0.5930595288276672, "eval_math_loss": 0.7225678563117981, "eval_math_mean_token_accuracy": 0.8094966850280761, "eval_math_num_tokens": 54058045.0, "eval_math_runtime": 27.497, "eval_math_samples_per_second": 18.184, "eval_math_steps_per_second": 4.546, "step": 1900 }, { "epoch": 3.04, "eval_cyber_entropy": 1.8411164230108261, "eval_cyber_loss": 3.063901424407959, "eval_cyber_mean_token_accuracy": 0.47065704002976416, "eval_cyber_num_tokens": 54058045.0, "eval_cyber_runtime": 26.1343, "eval_cyber_samples_per_second": 15.191, "eval_cyber_steps_per_second": 3.826, "step": 1900 }, { "entropy": 0.552313212864101, "epoch": 3.056, "grad_norm": 26.625, "learning_rate": 1.798e-05, "loss": 8.7715, "mean_token_accuracy": 0.8414296887814998, "num_tokens": 54334332.0, "step": 1910 }, { "entropy": 0.5570558808743954, "epoch": 3.072, "grad_norm": 27.125, "learning_rate": 1.7957777777777778e-05, "loss": 8.7638, "mean_token_accuracy": 0.8418575689196587, "num_tokens": 54624543.0, "step": 1920 }, { "entropy": 0.5351565392687917, "epoch": 3.088, "grad_norm": 27.625, "learning_rate": 1.7935555555555556e-05, "loss": 8.5286, "mean_token_accuracy": 0.8457726195454598, "num_tokens": 54907550.0, "step": 1930 }, { "entropy": 0.5519297284074127, "epoch": 3.104, "grad_norm": 27.75, "learning_rate": 1.7913333333333337e-05, "loss": 8.7504, "mean_token_accuracy": 0.8423502463847399, "num_tokens": 55190959.0, "step": 1940 }, { "entropy": 0.544937571324408, "epoch": 3.12, "grad_norm": 27.0, "learning_rate": 1.789111111111111e-05, "loss": 8.6451, "mean_token_accuracy": 0.8444420550018549, "num_tokens": 55481635.0, "step": 1950 }, { "entropy": 0.546911165677011, "epoch": 3.136, "grad_norm": 26.75, "learning_rate": 1.7868888888888892e-05, "loss": 8.6997, "mean_token_accuracy": 0.8431835647672414, "num_tokens": 55769010.0, "step": 1960 }, { "entropy": 0.5542461348697543, "epoch": 3.152, "grad_norm": 26.5, "learning_rate": 1.7846666666666666e-05, "loss": 8.7996, "mean_token_accuracy": 0.842128399387002, "num_tokens": 56053160.0, "step": 1970 }, { "entropy": 0.5676137331873179, "epoch": 3.168, "grad_norm": 26.75, "learning_rate": 1.7824444444444447e-05, "loss": 8.9893, "mean_token_accuracy": 0.8386933848261833, "num_tokens": 56337066.0, "step": 1980 }, { "entropy": 0.5574962265789509, "epoch": 3.184, "grad_norm": 30.0, "learning_rate": 1.780222222222222e-05, "loss": 8.8126, "mean_token_accuracy": 0.8404616348445415, "num_tokens": 56618899.0, "step": 1990 }, { "entropy": 0.5407627185806632, "epoch": 3.2, "grad_norm": 26.0, "learning_rate": 1.7780000000000003e-05, "loss": 8.6062, "mean_token_accuracy": 0.8444454524666071, "num_tokens": 56910071.0, "step": 2000 }, { "epoch": 3.2, "eval_biology_entropy": 0.9433750138282776, "eval_biology_loss": 1.6609779596328735, "eval_biology_mean_token_accuracy": 0.6404439859390259, "eval_biology_num_tokens": 56910071.0, "eval_biology_runtime": 21.9804, "eval_biology_samples_per_second": 22.748, "eval_biology_steps_per_second": 5.687, "step": 2000 }, { "epoch": 3.2, "eval_chemistry_entropy": 0.697946096420288, "eval_chemistry_loss": 1.214890718460083, "eval_chemistry_mean_token_accuracy": 0.7252082509994506, "eval_chemistry_num_tokens": 56910071.0, "eval_chemistry_runtime": 26.8518, "eval_chemistry_samples_per_second": 18.621, "eval_chemistry_steps_per_second": 4.655, "step": 2000 }, { "epoch": 3.2, "eval_math_entropy": 0.5889736828804016, "eval_math_loss": 0.72825688123703, "eval_math_mean_token_accuracy": 0.809436321735382, "eval_math_num_tokens": 56910071.0, "eval_math_runtime": 27.524, "eval_math_samples_per_second": 18.166, "eval_math_steps_per_second": 4.541, "step": 2000 }, { "epoch": 3.2, "eval_cyber_entropy": 1.8025889378786086, "eval_cyber_loss": 3.0566282272338867, "eval_cyber_mean_token_accuracy": 0.46734614998102186, "eval_cyber_num_tokens": 56910071.0, "eval_cyber_runtime": 26.2014, "eval_cyber_samples_per_second": 15.152, "eval_cyber_steps_per_second": 3.817, "step": 2000 }, { "entropy": 0.5716043025255203, "epoch": 3.216, "grad_norm": 26.875, "learning_rate": 1.7757777777777777e-05, "loss": 9.0642, "mean_token_accuracy": 0.8372103173285723, "num_tokens": 57190265.0, "step": 2010 }, { "entropy": 0.5565309930592776, "epoch": 3.232, "grad_norm": 28.875, "learning_rate": 1.7735555555555558e-05, "loss": 8.8558, "mean_token_accuracy": 0.841176188737154, "num_tokens": 57471355.0, "step": 2020 }, { "entropy": 0.5314720202237367, "epoch": 3.248, "grad_norm": 29.5, "learning_rate": 1.7713333333333335e-05, "loss": 8.4627, "mean_token_accuracy": 0.8469300638884306, "num_tokens": 57749030.0, "step": 2030 }, { "entropy": 0.5424867495894432, "epoch": 3.2640000000000002, "grad_norm": 29.375, "learning_rate": 1.7691111111111113e-05, "loss": 8.5914, "mean_token_accuracy": 0.8446186445653439, "num_tokens": 58027567.0, "step": 2040 }, { "entropy": 0.5520286228507757, "epoch": 3.2800000000000002, "grad_norm": 26.125, "learning_rate": 1.766888888888889e-05, "loss": 8.8087, "mean_token_accuracy": 0.8401213694363833, "num_tokens": 58304346.0, "step": 2050 }, { "entropy": 0.5688488876447082, "epoch": 3.296, "grad_norm": 29.375, "learning_rate": 1.7646666666666668e-05, "loss": 9.0084, "mean_token_accuracy": 0.8387346632778645, "num_tokens": 58579831.0, "step": 2060 }, { "entropy": 0.5622111067175866, "epoch": 3.312, "grad_norm": 28.375, "learning_rate": 1.7624444444444446e-05, "loss": 8.8982, "mean_token_accuracy": 0.839327049255371, "num_tokens": 58858426.0, "step": 2070 }, { "entropy": 0.5476189518347383, "epoch": 3.328, "grad_norm": 26.875, "learning_rate": 1.7602222222222223e-05, "loss": 8.7054, "mean_token_accuracy": 0.8415604490786791, "num_tokens": 59142949.0, "step": 2080 }, { "entropy": 0.5446038300171494, "epoch": 3.344, "grad_norm": 28.375, "learning_rate": 1.758e-05, "loss": 8.7213, "mean_token_accuracy": 0.8430991288274526, "num_tokens": 59424654.0, "step": 2090 }, { "entropy": 0.587455852329731, "epoch": 3.36, "grad_norm": 30.125, "learning_rate": 1.755777777777778e-05, "loss": 9.3345, "mean_token_accuracy": 0.8330791313201189, "num_tokens": 59704876.0, "step": 2100 }, { "epoch": 3.36, "eval_biology_entropy": 0.9355038318634034, "eval_biology_loss": 1.6756757497787476, "eval_biology_mean_token_accuracy": 0.639706241607666, "eval_biology_num_tokens": 59704876.0, "eval_biology_runtime": 21.9848, "eval_biology_samples_per_second": 22.743, "eval_biology_steps_per_second": 5.686, "step": 2100 }, { "epoch": 3.36, "eval_chemistry_entropy": 0.6868885807991028, "eval_chemistry_loss": 1.2236160039901733, "eval_chemistry_mean_token_accuracy": 0.7258326210975647, "eval_chemistry_num_tokens": 59704876.0, "eval_chemistry_runtime": 26.8704, "eval_chemistry_samples_per_second": 18.608, "eval_chemistry_steps_per_second": 4.652, "step": 2100 }, { "epoch": 3.36, "eval_math_entropy": 0.573245190858841, "eval_math_loss": 0.7296195030212402, "eval_math_mean_token_accuracy": 0.8088682608604432, "eval_math_num_tokens": 59704876.0, "eval_math_runtime": 27.5381, "eval_math_samples_per_second": 18.157, "eval_math_steps_per_second": 4.539, "step": 2100 }, { "epoch": 3.36, "eval_cyber_entropy": 1.81190150141716, "eval_cyber_loss": 3.099759817123413, "eval_cyber_mean_token_accuracy": 0.46466209664940833, "eval_cyber_num_tokens": 59704876.0, "eval_cyber_runtime": 26.1391, "eval_cyber_samples_per_second": 15.188, "eval_cyber_steps_per_second": 3.826, "step": 2100 }, { "entropy": 0.5630033634603023, "epoch": 3.376, "grad_norm": 26.375, "learning_rate": 1.7535555555555556e-05, "loss": 8.9955, "mean_token_accuracy": 0.8384801875799894, "num_tokens": 59988211.0, "step": 2110 }, { "entropy": 0.541259053349495, "epoch": 3.392, "grad_norm": 28.75, "learning_rate": 1.7513333333333334e-05, "loss": 8.5612, "mean_token_accuracy": 0.8447215680032969, "num_tokens": 60276606.0, "step": 2120 }, { "entropy": 0.5392770981416106, "epoch": 3.408, "grad_norm": 27.0, "learning_rate": 1.749111111111111e-05, "loss": 8.564, "mean_token_accuracy": 0.844776964187622, "num_tokens": 60560228.0, "step": 2130 }, { "entropy": 0.556757252663374, "epoch": 3.424, "grad_norm": 30.0, "learning_rate": 1.746888888888889e-05, "loss": 8.8468, "mean_token_accuracy": 0.8407265573740006, "num_tokens": 60845893.0, "step": 2140 }, { "entropy": 0.5446679562330246, "epoch": 3.44, "grad_norm": 29.0, "learning_rate": 1.7446666666666667e-05, "loss": 8.7112, "mean_token_accuracy": 0.8428826056420803, "num_tokens": 61126410.0, "step": 2150 }, { "entropy": 0.5627840265631676, "epoch": 3.456, "grad_norm": 27.375, "learning_rate": 1.7424444444444444e-05, "loss": 8.8644, "mean_token_accuracy": 0.8408064149320126, "num_tokens": 61406501.0, "step": 2160 }, { "entropy": 0.5712036414071917, "epoch": 3.472, "grad_norm": 26.375, "learning_rate": 1.7402222222222222e-05, "loss": 9.1553, "mean_token_accuracy": 0.8347680065780878, "num_tokens": 61693412.0, "step": 2170 }, { "entropy": 0.551654289662838, "epoch": 3.488, "grad_norm": 27.875, "learning_rate": 1.7380000000000003e-05, "loss": 8.7349, "mean_token_accuracy": 0.8419005110859871, "num_tokens": 61977303.0, "step": 2180 }, { "entropy": 0.5576195014640689, "epoch": 3.504, "grad_norm": 28.125, "learning_rate": 1.735777777777778e-05, "loss": 8.8143, "mean_token_accuracy": 0.8407981790602207, "num_tokens": 62265256.0, "step": 2190 }, { "entropy": 0.549780029989779, "epoch": 3.52, "grad_norm": 28.0, "learning_rate": 1.7335555555555558e-05, "loss": 8.7531, "mean_token_accuracy": 0.8421756789088249, "num_tokens": 62554970.0, "step": 2200 }, { "epoch": 3.52, "eval_biology_entropy": 0.9303290410041809, "eval_biology_loss": 1.678992748260498, "eval_biology_mean_token_accuracy": 0.6394612488746643, "eval_biology_num_tokens": 62554970.0, "eval_biology_runtime": 22.0195, "eval_biology_samples_per_second": 22.707, "eval_biology_steps_per_second": 5.677, "step": 2200 }, { "epoch": 3.52, "eval_chemistry_entropy": 0.6897460157871247, "eval_chemistry_loss": 1.2221547365188599, "eval_chemistry_mean_token_accuracy": 0.7248724555969238, "eval_chemistry_num_tokens": 62554970.0, "eval_chemistry_runtime": 26.8786, "eval_chemistry_samples_per_second": 18.602, "eval_chemistry_steps_per_second": 4.651, "step": 2200 }, { "epoch": 3.52, "eval_math_entropy": 0.5768633635044098, "eval_math_loss": 0.7227700352668762, "eval_math_mean_token_accuracy": 0.8104258437156677, "eval_math_num_tokens": 62554970.0, "eval_math_runtime": 27.517, "eval_math_samples_per_second": 18.171, "eval_math_steps_per_second": 4.543, "step": 2200 }, { "epoch": 3.52, "eval_cyber_entropy": 1.8477186000347137, "eval_cyber_loss": 2.9685397148132324, "eval_cyber_mean_token_accuracy": 0.47293118715286253, "eval_cyber_num_tokens": 62554970.0, "eval_cyber_runtime": 26.1406, "eval_cyber_samples_per_second": 15.187, "eval_cyber_steps_per_second": 3.825, "step": 2200 }, { "entropy": 0.5575708149001002, "epoch": 3.536, "grad_norm": 29.25, "learning_rate": 1.7313333333333336e-05, "loss": 8.8357, "mean_token_accuracy": 0.839980386570096, "num_tokens": 62837055.0, "step": 2210 }, { "entropy": 0.5485213872045278, "epoch": 3.552, "grad_norm": 27.125, "learning_rate": 1.7291111111111113e-05, "loss": 8.702, "mean_token_accuracy": 0.8431675769388676, "num_tokens": 63129767.0, "step": 2220 }, { "entropy": 0.542373401671648, "epoch": 3.568, "grad_norm": 26.25, "learning_rate": 1.726888888888889e-05, "loss": 8.5862, "mean_token_accuracy": 0.8438587125390768, "num_tokens": 63424239.0, "step": 2230 }, { "entropy": 0.5598640512675047, "epoch": 3.584, "grad_norm": 29.25, "learning_rate": 1.724666666666667e-05, "loss": 8.8446, "mean_token_accuracy": 0.8416846722364426, "num_tokens": 63704243.0, "step": 2240 }, { "entropy": 0.5350055942311883, "epoch": 3.6, "grad_norm": 28.25, "learning_rate": 1.7224444444444446e-05, "loss": 8.4988, "mean_token_accuracy": 0.8462501149624586, "num_tokens": 63994604.0, "step": 2250 }, { "entropy": 0.5622709069401026, "epoch": 3.616, "grad_norm": 29.75, "learning_rate": 1.7202222222222224e-05, "loss": 8.9258, "mean_token_accuracy": 0.8391084551811219, "num_tokens": 64280032.0, "step": 2260 }, { "entropy": 0.5400569776073099, "epoch": 3.632, "grad_norm": 28.125, "learning_rate": 1.718e-05, "loss": 8.6102, "mean_token_accuracy": 0.843450180068612, "num_tokens": 64560379.0, "step": 2270 }, { "entropy": 0.5330126417800785, "epoch": 3.648, "grad_norm": 26.25, "learning_rate": 1.715777777777778e-05, "loss": 8.4519, "mean_token_accuracy": 0.8483009889721871, "num_tokens": 64843196.0, "step": 2280 }, { "entropy": 0.5732263812795282, "epoch": 3.664, "grad_norm": 27.125, "learning_rate": 1.7135555555555557e-05, "loss": 9.1082, "mean_token_accuracy": 0.836763308942318, "num_tokens": 65126428.0, "step": 2290 }, { "entropy": 0.5515871224924922, "epoch": 3.68, "grad_norm": 27.0, "learning_rate": 1.7113333333333334e-05, "loss": 8.763, "mean_token_accuracy": 0.8418748516589403, "num_tokens": 65411914.0, "step": 2300 }, { "epoch": 3.68, "eval_biology_entropy": 0.9110841789245605, "eval_biology_loss": 1.711810827255249, "eval_biology_mean_token_accuracy": 0.6378892912864685, "eval_biology_num_tokens": 65411914.0, "eval_biology_runtime": 21.9867, "eval_biology_samples_per_second": 22.741, "eval_biology_steps_per_second": 5.685, "step": 2300 }, { "epoch": 3.68, "eval_chemistry_entropy": 0.6862595796585083, "eval_chemistry_loss": 1.2406351566314697, "eval_chemistry_mean_token_accuracy": 0.7238731870651245, "eval_chemistry_num_tokens": 65411914.0, "eval_chemistry_runtime": 26.8587, "eval_chemistry_samples_per_second": 18.616, "eval_chemistry_steps_per_second": 4.654, "step": 2300 }, { "epoch": 3.68, "eval_math_entropy": 0.5814973845481872, "eval_math_loss": 0.7214853167533875, "eval_math_mean_token_accuracy": 0.8104983367919922, "eval_math_num_tokens": 65411914.0, "eval_math_runtime": 27.5274, "eval_math_samples_per_second": 18.164, "eval_math_steps_per_second": 4.541, "step": 2300 }, { "epoch": 3.68, "eval_cyber_entropy": 1.762675429582596, "eval_cyber_loss": 3.1234960556030273, "eval_cyber_mean_token_accuracy": 0.46845712706446646, "eval_cyber_num_tokens": 65411914.0, "eval_cyber_runtime": 26.126, "eval_cyber_samples_per_second": 15.196, "eval_cyber_steps_per_second": 3.828, "step": 2300 }, { "entropy": 0.5569826386868953, "epoch": 3.6959999999999997, "grad_norm": 28.625, "learning_rate": 1.7091111111111112e-05, "loss": 8.8689, "mean_token_accuracy": 0.8401270765811205, "num_tokens": 65696757.0, "step": 2310 }, { "entropy": 0.5444023845717311, "epoch": 3.7119999999999997, "grad_norm": 29.75, "learning_rate": 1.706888888888889e-05, "loss": 8.6977, "mean_token_accuracy": 0.8423458002507687, "num_tokens": 65975610.0, "step": 2320 }, { "entropy": 0.5523361021652817, "epoch": 3.7279999999999998, "grad_norm": 29.375, "learning_rate": 1.704666666666667e-05, "loss": 8.7284, "mean_token_accuracy": 0.8420968912541866, "num_tokens": 66257358.0, "step": 2330 }, { "entropy": 0.5530321348458529, "epoch": 3.7439999999999998, "grad_norm": 28.875, "learning_rate": 1.7024444444444445e-05, "loss": 8.7557, "mean_token_accuracy": 0.8413629118353129, "num_tokens": 66543693.0, "step": 2340 }, { "entropy": 0.5565059429034591, "epoch": 3.76, "grad_norm": 27.5, "learning_rate": 1.7002222222222226e-05, "loss": 8.8299, "mean_token_accuracy": 0.8407174117863179, "num_tokens": 66825970.0, "step": 2350 }, { "entropy": 0.5809329712763429, "epoch": 3.776, "grad_norm": 28.125, "learning_rate": 1.698e-05, "loss": 9.2537, "mean_token_accuracy": 0.8345801163464784, "num_tokens": 67115478.0, "step": 2360 }, { "entropy": 0.5309191320091486, "epoch": 3.792, "grad_norm": 30.0, "learning_rate": 1.695777777777778e-05, "loss": 8.4481, "mean_token_accuracy": 0.8472430635243654, "num_tokens": 67405183.0, "step": 2370 }, { "entropy": 0.5616891149431467, "epoch": 3.808, "grad_norm": 28.5, "learning_rate": 1.6935555555555555e-05, "loss": 8.9228, "mean_token_accuracy": 0.8399017956107855, "num_tokens": 67688611.0, "step": 2380 }, { "entropy": 0.5439900878816843, "epoch": 3.824, "grad_norm": 27.625, "learning_rate": 1.6913333333333336e-05, "loss": 8.6428, "mean_token_accuracy": 0.8438364278525115, "num_tokens": 67974438.0, "step": 2390 }, { "entropy": 0.5522285526618361, "epoch": 3.84, "grad_norm": 28.875, "learning_rate": 1.689111111111111e-05, "loss": 8.7781, "mean_token_accuracy": 0.8404338449239731, "num_tokens": 68258661.0, "step": 2400 }, { "epoch": 3.84, "eval_biology_entropy": 0.9036206231117249, "eval_biology_loss": 1.7120203971862793, "eval_biology_mean_token_accuracy": 0.6382904124259948, "eval_biology_num_tokens": 68258661.0, "eval_biology_runtime": 21.9483, "eval_biology_samples_per_second": 22.781, "eval_biology_steps_per_second": 5.695, "step": 2400 }, { "epoch": 3.84, "eval_chemistry_entropy": 0.6759644203186035, "eval_chemistry_loss": 1.241569995880127, "eval_chemistry_mean_token_accuracy": 0.7237686243057251, "eval_chemistry_num_tokens": 68258661.0, "eval_chemistry_runtime": 27.1346, "eval_chemistry_samples_per_second": 18.427, "eval_chemistry_steps_per_second": 4.607, "step": 2400 }, { "epoch": 3.84, "eval_math_entropy": 0.576783682346344, "eval_math_loss": 0.71806401014328, "eval_math_mean_token_accuracy": 0.8112592906951904, "eval_math_num_tokens": 68258661.0, "eval_math_runtime": 27.5576, "eval_math_samples_per_second": 18.144, "eval_math_steps_per_second": 4.536, "step": 2400 }, { "epoch": 3.84, "eval_cyber_entropy": 1.9050254535675049, "eval_cyber_loss": 3.006852626800537, "eval_cyber_mean_token_accuracy": 0.4733030904829502, "eval_cyber_num_tokens": 68258661.0, "eval_cyber_runtime": 26.1325, "eval_cyber_samples_per_second": 15.192, "eval_cyber_steps_per_second": 3.827, "step": 2400 }, { "entropy": 0.5429382309317589, "epoch": 3.856, "grad_norm": 28.0, "learning_rate": 1.686888888888889e-05, "loss": 8.6071, "mean_token_accuracy": 0.8441234320402146, "num_tokens": 68541830.0, "step": 2410 }, { "entropy": 0.5582559602335095, "epoch": 3.872, "grad_norm": 28.25, "learning_rate": 1.684666666666667e-05, "loss": 8.8791, "mean_token_accuracy": 0.8397030271589756, "num_tokens": 68832764.0, "step": 2420 }, { "entropy": 0.5590548772364855, "epoch": 3.888, "grad_norm": 29.0, "learning_rate": 1.6824444444444447e-05, "loss": 8.9216, "mean_token_accuracy": 0.839621964097023, "num_tokens": 69117158.0, "step": 2430 }, { "entropy": 0.5586038419976831, "epoch": 3.904, "grad_norm": 28.375, "learning_rate": 1.6802222222222224e-05, "loss": 8.9175, "mean_token_accuracy": 0.8399001814424991, "num_tokens": 69406954.0, "step": 2440 }, { "entropy": 0.5413944207131862, "epoch": 3.92, "grad_norm": 29.0, "learning_rate": 1.6780000000000002e-05, "loss": 8.591, "mean_token_accuracy": 0.8445574171841145, "num_tokens": 69682187.0, "step": 2450 }, { "entropy": 0.5603434385731816, "epoch": 3.936, "grad_norm": 26.75, "learning_rate": 1.675777777777778e-05, "loss": 8.8984, "mean_token_accuracy": 0.8396794117987156, "num_tokens": 69977882.0, "step": 2460 }, { "entropy": 0.5525805668905377, "epoch": 3.952, "grad_norm": 25.5, "learning_rate": 1.6735555555555557e-05, "loss": 8.8119, "mean_token_accuracy": 0.8408492799848318, "num_tokens": 70259269.0, "step": 2470 }, { "entropy": 0.5666335012763739, "epoch": 3.968, "grad_norm": 28.0, "learning_rate": 1.6713333333333335e-05, "loss": 8.9832, "mean_token_accuracy": 0.838411795720458, "num_tokens": 70542461.0, "step": 2480 }, { "entropy": 0.5467899790033698, "epoch": 3.984, "grad_norm": 28.5, "learning_rate": 1.6691111111111112e-05, "loss": 8.6674, "mean_token_accuracy": 0.8445790354162455, "num_tokens": 70842100.0, "step": 2490 }, { "entropy": 0.5574308915063739, "epoch": 4.0, "grad_norm": 29.375, "learning_rate": 1.666888888888889e-05, "loss": 8.8622, "mean_token_accuracy": 0.8396205045282841, "num_tokens": 71121728.0, "step": 2500 }, { "epoch": 4.0, "eval_biology_entropy": 0.9111711072921753, "eval_biology_loss": 1.7112033367156982, "eval_biology_mean_token_accuracy": 0.6378541564941407, "eval_biology_num_tokens": 71121728.0, "eval_biology_runtime": 21.9796, "eval_biology_samples_per_second": 22.748, "eval_biology_steps_per_second": 5.687, "step": 2500 }, { "epoch": 4.0, "eval_chemistry_entropy": 0.6879493939876556, "eval_chemistry_loss": 1.2359343767166138, "eval_chemistry_mean_token_accuracy": 0.7238914380073548, "eval_chemistry_num_tokens": 71121728.0, "eval_chemistry_runtime": 26.8785, "eval_chemistry_samples_per_second": 18.602, "eval_chemistry_steps_per_second": 4.651, "step": 2500 }, { "epoch": 4.0, "eval_math_entropy": 0.5773937346935272, "eval_math_loss": 0.7136553525924683, "eval_math_mean_token_accuracy": 0.8121636147499085, "eval_math_num_tokens": 71121728.0, "eval_math_runtime": 27.5384, "eval_math_samples_per_second": 18.156, "eval_math_steps_per_second": 4.539, "step": 2500 }, { "epoch": 4.0, "eval_cyber_entropy": 1.8897177493572235, "eval_cyber_loss": 3.045076847076416, "eval_cyber_mean_token_accuracy": 0.47337317317724226, "eval_cyber_num_tokens": 71121728.0, "eval_cyber_runtime": 26.2014, "eval_cyber_samples_per_second": 15.152, "eval_cyber_steps_per_second": 3.817, "step": 2500 }, { "entropy": 0.45908909011632204, "epoch": 4.016, "grad_norm": 39.75, "learning_rate": 1.6646666666666668e-05, "loss": 7.2254, "mean_token_accuracy": 0.8667656052857637, "num_tokens": 71414459.0, "step": 2510 }, { "entropy": 0.45149051323533057, "epoch": 4.032, "grad_norm": 40.25, "learning_rate": 1.6624444444444445e-05, "loss": 7.0382, "mean_token_accuracy": 0.868442352488637, "num_tokens": 71701594.0, "step": 2520 }, { "entropy": 0.43811416905373335, "epoch": 4.048, "grad_norm": 32.5, "learning_rate": 1.6602222222222223e-05, "loss": 6.8687, "mean_token_accuracy": 0.8728650230914354, "num_tokens": 71987050.0, "step": 2530 }, { "entropy": 0.4328246159479022, "epoch": 4.064, "grad_norm": 39.0, "learning_rate": 1.658e-05, "loss": 6.8133, "mean_token_accuracy": 0.8725668497383594, "num_tokens": 72265332.0, "step": 2540 }, { "entropy": 0.4209323078393936, "epoch": 4.08, "grad_norm": 36.0, "learning_rate": 1.6557777777777778e-05, "loss": 6.614, "mean_token_accuracy": 0.8763341084122658, "num_tokens": 72554734.0, "step": 2550 }, { "entropy": 0.4248308201320469, "epoch": 4.096, "grad_norm": 38.75, "learning_rate": 1.6535555555555556e-05, "loss": 6.7057, "mean_token_accuracy": 0.8734285194426775, "num_tokens": 72837683.0, "step": 2560 }, { "entropy": 0.4242420919239521, "epoch": 4.112, "grad_norm": 35.5, "learning_rate": 1.6513333333333333e-05, "loss": 6.6544, "mean_token_accuracy": 0.8756396573036909, "num_tokens": 73127027.0, "step": 2570 }, { "entropy": 0.42103672623634336, "epoch": 4.128, "grad_norm": 36.75, "learning_rate": 1.6491111111111114e-05, "loss": 6.6412, "mean_token_accuracy": 0.8757976401597262, "num_tokens": 73408259.0, "step": 2580 }, { "entropy": 0.4266476449556649, "epoch": 4.144, "grad_norm": 37.75, "learning_rate": 1.646888888888889e-05, "loss": 6.711, "mean_token_accuracy": 0.8733838576823473, "num_tokens": 73696460.0, "step": 2590 }, { "entropy": 0.4382867323234677, "epoch": 4.16, "grad_norm": 35.0, "learning_rate": 1.644666666666667e-05, "loss": 6.8821, "mean_token_accuracy": 0.8716344017535448, "num_tokens": 73991298.0, "step": 2600 }, { "epoch": 4.16, "eval_biology_entropy": 0.6756652309894562, "eval_biology_loss": 2.1495590209960938, "eval_biology_mean_token_accuracy": 0.6227014427185058, "eval_biology_num_tokens": 73991298.0, "eval_biology_runtime": 22.263, "eval_biology_samples_per_second": 22.459, "eval_biology_steps_per_second": 5.615, "step": 2600 }, { "epoch": 4.16, "eval_chemistry_entropy": 0.5040421216487885, "eval_chemistry_loss": 1.586193561553955, "eval_chemistry_mean_token_accuracy": 0.711522789478302, "eval_chemistry_num_tokens": 73991298.0, "eval_chemistry_runtime": 26.9429, "eval_chemistry_samples_per_second": 18.558, "eval_chemistry_steps_per_second": 4.639, "step": 2600 }, { "epoch": 4.16, "eval_math_entropy": 0.473518746137619, "eval_math_loss": 0.8168261647224426, "eval_math_mean_token_accuracy": 0.8053260369300842, "eval_math_num_tokens": 73991298.0, "eval_math_runtime": 27.5993, "eval_math_samples_per_second": 18.116, "eval_math_steps_per_second": 4.529, "step": 2600 }, { "epoch": 4.16, "eval_cyber_entropy": 1.4892040437459946, "eval_cyber_loss": 3.6366336345672607, "eval_cyber_mean_token_accuracy": 0.4521488405764103, "eval_cyber_num_tokens": 73991298.0, "eval_cyber_runtime": 26.2013, "eval_cyber_samples_per_second": 15.152, "eval_cyber_steps_per_second": 3.817, "step": 2600 }, { "entropy": 0.4261194946244359, "epoch": 4.176, "grad_norm": 38.0, "learning_rate": 1.6424444444444444e-05, "loss": 6.7312, "mean_token_accuracy": 0.8729683205485343, "num_tokens": 74271332.0, "step": 2610 }, { "entropy": 0.4226545256562531, "epoch": 4.192, "grad_norm": 36.25, "learning_rate": 1.6402222222222225e-05, "loss": 6.675, "mean_token_accuracy": 0.8749130304902792, "num_tokens": 74553659.0, "step": 2620 }, { "entropy": 0.4246135802939534, "epoch": 4.208, "grad_norm": 37.0, "learning_rate": 1.638e-05, "loss": 6.6911, "mean_token_accuracy": 0.8739365771412849, "num_tokens": 74829647.0, "step": 2630 }, { "entropy": 0.4290175061672926, "epoch": 4.224, "grad_norm": 35.25, "learning_rate": 1.635777777777778e-05, "loss": 6.7558, "mean_token_accuracy": 0.8724367342889309, "num_tokens": 75116156.0, "step": 2640 }, { "entropy": 0.439176782220602, "epoch": 4.24, "grad_norm": 35.75, "learning_rate": 1.6335555555555558e-05, "loss": 6.9537, "mean_token_accuracy": 0.8710915297269821, "num_tokens": 75405879.0, "step": 2650 }, { "entropy": 0.4284585755318403, "epoch": 4.256, "grad_norm": 36.0, "learning_rate": 1.6313333333333335e-05, "loss": 6.7606, "mean_token_accuracy": 0.8736028768122196, "num_tokens": 75697735.0, "step": 2660 }, { "entropy": 0.4218846418894827, "epoch": 4.272, "grad_norm": 38.25, "learning_rate": 1.6291111111111113e-05, "loss": 6.689, "mean_token_accuracy": 0.8744386442005634, "num_tokens": 75983249.0, "step": 2670 }, { "entropy": 0.42769992016255853, "epoch": 4.288, "grad_norm": 38.75, "learning_rate": 1.626888888888889e-05, "loss": 6.7453, "mean_token_accuracy": 0.8734827786684036, "num_tokens": 76272093.0, "step": 2680 }, { "entropy": 0.4388342473655939, "epoch": 4.304, "grad_norm": 33.75, "learning_rate": 1.6246666666666668e-05, "loss": 6.9342, "mean_token_accuracy": 0.8703750927001238, "num_tokens": 76560966.0, "step": 2690 }, { "entropy": 0.4267837518826127, "epoch": 4.32, "grad_norm": 35.0, "learning_rate": 1.6224444444444446e-05, "loss": 6.7733, "mean_token_accuracy": 0.8728781186044217, "num_tokens": 76853759.0, "step": 2700 }, { "epoch": 4.32, "eval_biology_entropy": 0.6632317819595337, "eval_biology_loss": 2.1799333095550537, "eval_biology_mean_token_accuracy": 0.6217645144462586, "eval_biology_num_tokens": 76853759.0, "eval_biology_runtime": 22.0118, "eval_biology_samples_per_second": 22.715, "eval_biology_steps_per_second": 5.679, "step": 2700 }, { "epoch": 4.32, "eval_chemistry_entropy": 0.5027244560718537, "eval_chemistry_loss": 1.600482702255249, "eval_chemistry_mean_token_accuracy": 0.711266414642334, "eval_chemistry_num_tokens": 76853759.0, "eval_chemistry_runtime": 26.9684, "eval_chemistry_samples_per_second": 18.54, "eval_chemistry_steps_per_second": 4.635, "step": 2700 }, { "epoch": 4.32, "eval_math_entropy": 0.48136704444885253, "eval_math_loss": 0.8135265111923218, "eval_math_mean_token_accuracy": 0.8057692317962647, "eval_math_num_tokens": 76853759.0, "eval_math_runtime": 27.5551, "eval_math_samples_per_second": 18.145, "eval_math_steps_per_second": 4.536, "step": 2700 }, { "epoch": 4.32, "eval_cyber_entropy": 1.5068341046571732, "eval_cyber_loss": 3.602221965789795, "eval_cyber_mean_token_accuracy": 0.4545079267024994, "eval_cyber_num_tokens": 76853759.0, "eval_cyber_runtime": 26.1942, "eval_cyber_samples_per_second": 15.156, "eval_cyber_steps_per_second": 3.818, "step": 2700 }, { "entropy": 0.4217254978604615, "epoch": 4.336, "grad_norm": 38.25, "learning_rate": 1.6202222222222223e-05, "loss": 6.6901, "mean_token_accuracy": 0.8739183027297258, "num_tokens": 77137443.0, "step": 2710 }, { "entropy": 0.44381179139018057, "epoch": 4.352, "grad_norm": 37.0, "learning_rate": 1.618e-05, "loss": 6.9456, "mean_token_accuracy": 0.8700962405651808, "num_tokens": 77427862.0, "step": 2720 }, { "entropy": 0.4365043812431395, "epoch": 4.368, "grad_norm": 42.25, "learning_rate": 1.615777777777778e-05, "loss": 6.8688, "mean_token_accuracy": 0.8719002742320299, "num_tokens": 77713442.0, "step": 2730 }, { "entropy": 0.43601095052435995, "epoch": 4.384, "grad_norm": 38.75, "learning_rate": 1.6135555555555556e-05, "loss": 6.9251, "mean_token_accuracy": 0.8705421075224876, "num_tokens": 77996972.0, "step": 2740 }, { "entropy": 0.44800137598067524, "epoch": 4.4, "grad_norm": 39.0, "learning_rate": 1.6113333333333334e-05, "loss": 7.0521, "mean_token_accuracy": 0.8675072330981493, "num_tokens": 78282462.0, "step": 2750 }, { "entropy": 0.4330807910300791, "epoch": 4.416, "grad_norm": 39.25, "learning_rate": 1.609111111111111e-05, "loss": 6.8347, "mean_token_accuracy": 0.8713963177055121, "num_tokens": 78560764.0, "step": 2760 }, { "entropy": 0.43517380207777023, "epoch": 4.432, "grad_norm": 39.75, "learning_rate": 1.606888888888889e-05, "loss": 6.9061, "mean_token_accuracy": 0.8708390913903713, "num_tokens": 78846224.0, "step": 2770 }, { "entropy": 0.43355529764667156, "epoch": 4.448, "grad_norm": 38.0, "learning_rate": 1.6046666666666667e-05, "loss": 6.8363, "mean_token_accuracy": 0.8721863590180874, "num_tokens": 79124292.0, "step": 2780 }, { "entropy": 0.4412316353060305, "epoch": 4.464, "grad_norm": 40.25, "learning_rate": 1.6024444444444444e-05, "loss": 7.0196, "mean_token_accuracy": 0.8690844859927893, "num_tokens": 79411317.0, "step": 2790 }, { "entropy": 0.4366396741941571, "epoch": 4.48, "grad_norm": 37.0, "learning_rate": 1.6002222222222222e-05, "loss": 6.8841, "mean_token_accuracy": 0.8708409104496241, "num_tokens": 79688857.0, "step": 2800 }, { "epoch": 4.48, "eval_biology_entropy": 0.6629302854537964, "eval_biology_loss": 2.1800971031188965, "eval_biology_mean_token_accuracy": 0.6212873640060425, "eval_biology_num_tokens": 79688857.0, "eval_biology_runtime": 22.0132, "eval_biology_samples_per_second": 22.714, "eval_biology_steps_per_second": 5.678, "step": 2800 }, { "epoch": 4.48, "eval_chemistry_entropy": 0.4996016290187836, "eval_chemistry_loss": 1.5948313474655151, "eval_chemistry_mean_token_accuracy": 0.7118293223381043, "eval_chemistry_num_tokens": 79688857.0, "eval_chemistry_runtime": 26.8881, "eval_chemistry_samples_per_second": 18.596, "eval_chemistry_steps_per_second": 4.649, "step": 2800 }, { "epoch": 4.48, "eval_math_entropy": 0.4721082389354706, "eval_math_loss": 0.8117740750312805, "eval_math_mean_token_accuracy": 0.8063090562820434, "eval_math_num_tokens": 79688857.0, "eval_math_runtime": 27.5436, "eval_math_samples_per_second": 18.153, "eval_math_steps_per_second": 4.538, "step": 2800 }, { "epoch": 4.48, "eval_cyber_entropy": 1.481595308482647, "eval_cyber_loss": 3.662889242172241, "eval_cyber_mean_token_accuracy": 0.45651811435818673, "eval_cyber_num_tokens": 79688857.0, "eval_cyber_runtime": 26.1581, "eval_cyber_samples_per_second": 15.177, "eval_cyber_steps_per_second": 3.823, "step": 2800 }, { "entropy": 0.4506692230701447, "epoch": 4.496, "grad_norm": 40.0, "learning_rate": 1.5980000000000003e-05, "loss": 7.1385, "mean_token_accuracy": 0.8673205204308033, "num_tokens": 79967298.0, "step": 2810 }, { "entropy": 0.43053146908059714, "epoch": 4.5120000000000005, "grad_norm": 35.75, "learning_rate": 1.5957777777777777e-05, "loss": 6.8069, "mean_token_accuracy": 0.8720490086823702, "num_tokens": 80253719.0, "step": 2820 }, { "entropy": 0.43672142643481493, "epoch": 4.5280000000000005, "grad_norm": 38.25, "learning_rate": 1.5935555555555558e-05, "loss": 6.8917, "mean_token_accuracy": 0.8700784627348185, "num_tokens": 80542942.0, "step": 2830 }, { "entropy": 0.43880522809922695, "epoch": 4.5440000000000005, "grad_norm": 39.5, "learning_rate": 1.5913333333333332e-05, "loss": 6.9503, "mean_token_accuracy": 0.8696860957890749, "num_tokens": 80828165.0, "step": 2840 }, { "entropy": 0.42719278000295163, "epoch": 4.5600000000000005, "grad_norm": 39.0, "learning_rate": 1.5891111111111113e-05, "loss": 6.7468, "mean_token_accuracy": 0.8730927098542451, "num_tokens": 81105894.0, "step": 2850 }, { "entropy": 0.4342146283015609, "epoch": 4.576, "grad_norm": 37.5, "learning_rate": 1.5868888888888888e-05, "loss": 6.9072, "mean_token_accuracy": 0.8706136908382177, "num_tokens": 81385419.0, "step": 2860 }, { "entropy": 0.4446837269701064, "epoch": 4.592, "grad_norm": 39.25, "learning_rate": 1.584666666666667e-05, "loss": 7.0316, "mean_token_accuracy": 0.869326951727271, "num_tokens": 81662404.0, "step": 2870 }, { "entropy": 0.4390052566304803, "epoch": 4.608, "grad_norm": 40.5, "learning_rate": 1.5824444444444446e-05, "loss": 6.9327, "mean_token_accuracy": 0.8691639252007007, "num_tokens": 81939896.0, "step": 2880 }, { "entropy": 0.4321968453936279, "epoch": 4.624, "grad_norm": 40.25, "learning_rate": 1.5802222222222224e-05, "loss": 6.8333, "mean_token_accuracy": 0.8713271267712116, "num_tokens": 82221249.0, "step": 2890 }, { "entropy": 0.4334391921758652, "epoch": 4.64, "grad_norm": 38.75, "learning_rate": 1.578e-05, "loss": 6.8606, "mean_token_accuracy": 0.8711547385901213, "num_tokens": 82499622.0, "step": 2900 }, { "epoch": 4.64, "eval_biology_entropy": 0.6608269560337067, "eval_biology_loss": 2.1894521713256836, "eval_biology_mean_token_accuracy": 0.6219791312217713, "eval_biology_num_tokens": 82499622.0, "eval_biology_runtime": 21.9909, "eval_biology_samples_per_second": 22.737, "eval_biology_steps_per_second": 5.684, "step": 2900 }, { "epoch": 4.64, "eval_chemistry_entropy": 0.5002575476169586, "eval_chemistry_loss": 1.6044608354568481, "eval_chemistry_mean_token_accuracy": 0.7112648162841797, "eval_chemistry_num_tokens": 82499622.0, "eval_chemistry_runtime": 26.8647, "eval_chemistry_samples_per_second": 18.612, "eval_chemistry_steps_per_second": 4.653, "step": 2900 }, { "epoch": 4.64, "eval_math_entropy": 0.4693549032211304, "eval_math_loss": 0.8120828866958618, "eval_math_mean_token_accuracy": 0.8071069555282593, "eval_math_num_tokens": 82499622.0, "eval_math_runtime": 27.522, "eval_math_samples_per_second": 18.167, "eval_math_steps_per_second": 4.542, "step": 2900 }, { "epoch": 4.64, "eval_cyber_entropy": 1.459328743815422, "eval_cyber_loss": 3.653904438018799, "eval_cyber_mean_token_accuracy": 0.45832002356648444, "eval_cyber_num_tokens": 82499622.0, "eval_cyber_runtime": 26.1555, "eval_cyber_samples_per_second": 15.178, "eval_cyber_steps_per_second": 3.823, "step": 2900 }, { "entropy": 0.43671272285282614, "epoch": 4.656, "grad_norm": 37.75, "learning_rate": 1.575777777777778e-05, "loss": 6.8917, "mean_token_accuracy": 0.8711845677345991, "num_tokens": 82777584.0, "step": 2910 }, { "entropy": 0.4397907822392881, "epoch": 4.672, "grad_norm": 37.0, "learning_rate": 1.5735555555555557e-05, "loss": 6.9582, "mean_token_accuracy": 0.8693775832653046, "num_tokens": 83056613.0, "step": 2920 }, { "entropy": 0.4401230952702463, "epoch": 4.688, "grad_norm": 37.25, "learning_rate": 1.5713333333333334e-05, "loss": 6.9897, "mean_token_accuracy": 0.8693465922027827, "num_tokens": 83344785.0, "step": 2930 }, { "entropy": 0.44647922869771717, "epoch": 4.704, "grad_norm": 37.5, "learning_rate": 1.5691111111111112e-05, "loss": 7.0419, "mean_token_accuracy": 0.8678850371390581, "num_tokens": 83630789.0, "step": 2940 }, { "entropy": 0.4565312635153532, "epoch": 4.72, "grad_norm": 37.25, "learning_rate": 1.5668888888888893e-05, "loss": 7.2012, "mean_token_accuracy": 0.8670194402337075, "num_tokens": 83924170.0, "step": 2950 }, { "entropy": 0.44184935493394734, "epoch": 4.736, "grad_norm": 39.25, "learning_rate": 1.5646666666666667e-05, "loss": 7.0097, "mean_token_accuracy": 0.8680846456438303, "num_tokens": 84194101.0, "step": 2960 }, { "entropy": 0.4245687565766275, "epoch": 4.752, "grad_norm": 37.25, "learning_rate": 1.5624444444444448e-05, "loss": 6.7265, "mean_token_accuracy": 0.8729787264019251, "num_tokens": 84477757.0, "step": 2970 }, { "entropy": 0.44007188118994234, "epoch": 4.768, "grad_norm": 35.0, "learning_rate": 1.5602222222222222e-05, "loss": 6.9734, "mean_token_accuracy": 0.8695344276726246, "num_tokens": 84767944.0, "step": 2980 }, { "entropy": 0.44919343683868646, "epoch": 4.784, "grad_norm": 38.25, "learning_rate": 1.5580000000000003e-05, "loss": 7.0871, "mean_token_accuracy": 0.867193479090929, "num_tokens": 85061700.0, "step": 2990 }, { "entropy": 0.4345102840103209, "epoch": 4.8, "grad_norm": 36.0, "learning_rate": 1.5557777777777778e-05, "loss": 6.8912, "mean_token_accuracy": 0.8710467349737883, "num_tokens": 85347473.0, "step": 3000 }, { "epoch": 4.8, "eval_biology_entropy": 0.6513788692951202, "eval_biology_loss": 2.2116551399230957, "eval_biology_mean_token_accuracy": 0.621419846534729, "eval_biology_num_tokens": 85347473.0, "eval_biology_runtime": 22.0073, "eval_biology_samples_per_second": 22.72, "eval_biology_steps_per_second": 5.68, "step": 3000 }, { "epoch": 4.8, "eval_chemistry_entropy": 0.4972455530166626, "eval_chemistry_loss": 1.6100175380706787, "eval_chemistry_mean_token_accuracy": 0.7110178966522217, "eval_chemistry_num_tokens": 85347473.0, "eval_chemistry_runtime": 26.9129, "eval_chemistry_samples_per_second": 18.578, "eval_chemistry_steps_per_second": 4.645, "step": 3000 }, { "epoch": 4.8, "eval_math_entropy": 0.4730637536048889, "eval_math_loss": 0.809832751750946, "eval_math_mean_token_accuracy": 0.806609429359436, "eval_math_num_tokens": 85347473.0, "eval_math_runtime": 27.5512, "eval_math_samples_per_second": 18.148, "eval_math_steps_per_second": 4.537, "step": 3000 }, { "epoch": 4.8, "eval_cyber_entropy": 1.4424864780902862, "eval_cyber_loss": 3.720710277557373, "eval_cyber_mean_token_accuracy": 0.45222695633769033, "eval_cyber_num_tokens": 85347473.0, "eval_cyber_runtime": 26.2183, "eval_cyber_samples_per_second": 15.142, "eval_cyber_steps_per_second": 3.814, "step": 3000 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.687212073540851e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }