{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8901746967842439, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.8410565290600062, "epoch": 0.017803493935684877, "grad_norm": 41.25, "learning_rate": 1.8e-07, "loss": 14.231, "mean_token_accuracy": 0.766436281055212, "num_tokens": 257189.0, "step": 10 }, { "entropy": 0.8121558612212538, "epoch": 0.035606987871369754, "grad_norm": 43.25, "learning_rate": 3.8e-07, "loss": 13.7028, "mean_token_accuracy": 0.7704860582947731, "num_tokens": 528285.0, "step": 20 }, { "entropy": 0.8321617901325226, "epoch": 0.053410481807054634, "grad_norm": 38.75, "learning_rate": 5.800000000000001e-07, "loss": 14.0203, "mean_token_accuracy": 0.7678851690143347, "num_tokens": 801635.0, "step": 30 }, { "entropy": 0.8524315822869539, "epoch": 0.07121397574273951, "grad_norm": 37.75, "learning_rate": 7.8e-07, "loss": 14.3071, "mean_token_accuracy": 0.7648834150284529, "num_tokens": 1060812.0, "step": 40 }, { "entropy": 0.8403167355805635, "epoch": 0.08901746967842439, "grad_norm": 34.5, "learning_rate": 9.800000000000001e-07, "loss": 14.0049, "mean_token_accuracy": 0.7667405389249324, "num_tokens": 1327380.0, "step": 50 }, { "entropy": 0.8425804980099201, "epoch": 0.10682096361410927, "grad_norm": 34.25, "learning_rate": 1.1800000000000001e-06, "loss": 13.9802, "mean_token_accuracy": 0.7680465068668128, "num_tokens": 1597405.0, "step": 60 }, { "entropy": 0.8755916632711888, "epoch": 0.12462445754979415, "grad_norm": 32.25, "learning_rate": 1.3800000000000001e-06, "loss": 14.4557, "mean_token_accuracy": 0.7614609662443399, "num_tokens": 1859684.0, "step": 70 }, { "entropy": 0.8682003870606423, "epoch": 0.14242795148547902, "grad_norm": 30.375, "learning_rate": 1.5800000000000001e-06, "loss": 14.1174, "mean_token_accuracy": 0.7653359699994325, "num_tokens": 2118068.0, "step": 80 }, { "entropy": 0.8360268581658602, "epoch": 0.1602314454211639, "grad_norm": 28.375, "learning_rate": 1.7800000000000001e-06, "loss": 13.6773, "mean_token_accuracy": 0.7724899325519801, "num_tokens": 2388824.0, "step": 90 }, { "entropy": 0.8634540975093842, "epoch": 0.17803493935684878, "grad_norm": 34.0, "learning_rate": 1.98e-06, "loss": 14.0689, "mean_token_accuracy": 0.7672080259770155, "num_tokens": 2644330.0, "step": 100 }, { "epoch": 0.17803493935684878, "eval_biology_entropy": 1.128741333961487, "eval_biology_loss": 1.1601769924163818, "eval_biology_mean_token_accuracy": 0.7042496418952942, "eval_biology_num_tokens": 2644330.0, "eval_biology_runtime": 54.4222, "eval_biology_samples_per_second": 9.187, "eval_biology_steps_per_second": 2.297, "step": 100 }, { "epoch": 0.17803493935684878, "eval_chemistry_entropy": 0.8544474849700928, "eval_chemistry_loss": 0.8545747995376587, "eval_chemistry_mean_token_accuracy": 0.7713311352729797, "eval_chemistry_num_tokens": 2644330.0, "eval_chemistry_runtime": 65.3258, "eval_chemistry_samples_per_second": 7.654, "eval_chemistry_steps_per_second": 1.913, "step": 100 }, { "entropy": 0.8453936260193586, "epoch": 0.19583843329253367, "grad_norm": 29.5, "learning_rate": 2.1800000000000003e-06, "loss": 13.6681, "mean_token_accuracy": 0.7730784911662341, "num_tokens": 2913700.0, "step": 110 }, { "entropy": 0.8460511896759272, "epoch": 0.21364192722821854, "grad_norm": 27.625, "learning_rate": 2.38e-06, "loss": 13.728, "mean_token_accuracy": 0.7713178683072328, "num_tokens": 3185255.0, "step": 120 }, { "entropy": 0.8403830077499151, "epoch": 0.2314454211639034, "grad_norm": 28.25, "learning_rate": 2.5800000000000003e-06, "loss": 13.5642, "mean_token_accuracy": 0.7730850588530302, "num_tokens": 3454750.0, "step": 130 }, { "entropy": 0.8515023712068797, "epoch": 0.2492489150995883, "grad_norm": 30.0, "learning_rate": 2.7800000000000005e-06, "loss": 13.8032, "mean_token_accuracy": 0.7721988521516323, "num_tokens": 3719113.0, "step": 140 }, { "entropy": 0.8474330805242062, "epoch": 0.26705240903527316, "grad_norm": 27.375, "learning_rate": 2.9800000000000003e-06, "loss": 13.728, "mean_token_accuracy": 0.7698215767741203, "num_tokens": 3990505.0, "step": 150 }, { "entropy": 0.8292176539078355, "epoch": 0.28485590297095803, "grad_norm": 24.625, "learning_rate": 3.1800000000000005e-06, "loss": 13.3391, "mean_token_accuracy": 0.776393149420619, "num_tokens": 4267403.0, "step": 160 }, { "entropy": 0.8280835278332234, "epoch": 0.30265939690664295, "grad_norm": 29.875, "learning_rate": 3.3800000000000007e-06, "loss": 13.3175, "mean_token_accuracy": 0.7770637154579163, "num_tokens": 4535458.0, "step": 170 }, { "entropy": 0.842758315615356, "epoch": 0.3204628908423278, "grad_norm": 27.5, "learning_rate": 3.58e-06, "loss": 13.6062, "mean_token_accuracy": 0.7731371156871318, "num_tokens": 4796815.0, "step": 180 }, { "entropy": 0.8247860476374627, "epoch": 0.3382663847780127, "grad_norm": 22.75, "learning_rate": 3.7800000000000002e-06, "loss": 13.2445, "mean_token_accuracy": 0.7772165313363075, "num_tokens": 5066948.0, "step": 190 }, { "entropy": 0.8495354067534209, "epoch": 0.35606987871369755, "grad_norm": 26.125, "learning_rate": 3.980000000000001e-06, "loss": 13.7137, "mean_token_accuracy": 0.7696468211710453, "num_tokens": 5324751.0, "step": 200 }, { "epoch": 0.35606987871369755, "eval_biology_entropy": 1.1391102423667907, "eval_biology_loss": 1.1645851135253906, "eval_biology_mean_token_accuracy": 0.7036963219642639, "eval_biology_num_tokens": 5324751.0, "eval_biology_runtime": 47.5522, "eval_biology_samples_per_second": 10.515, "eval_biology_steps_per_second": 2.629, "step": 200 }, { "epoch": 0.35606987871369755, "eval_chemistry_entropy": 0.8357059621810913, "eval_chemistry_loss": 0.8271888494491577, "eval_chemistry_mean_token_accuracy": 0.7765080814361572, "eval_chemistry_num_tokens": 5324751.0, "eval_chemistry_runtime": 58.2133, "eval_chemistry_samples_per_second": 8.589, "eval_chemistry_steps_per_second": 2.147, "step": 200 }, { "entropy": 0.8138596788048744, "epoch": 0.3738733726493824, "grad_norm": 27.875, "learning_rate": 4.18e-06, "loss": 13.0337, "mean_token_accuracy": 0.7797718059271574, "num_tokens": 5585508.0, "step": 210 }, { "entropy": 0.8215312957763672, "epoch": 0.39167686658506734, "grad_norm": 25.75, "learning_rate": 4.38e-06, "loss": 13.2224, "mean_token_accuracy": 0.7784637857228518, "num_tokens": 5848889.0, "step": 220 }, { "entropy": 0.8100055737420917, "epoch": 0.4094803605207522, "grad_norm": 27.625, "learning_rate": 4.58e-06, "loss": 13.0714, "mean_token_accuracy": 0.7809950839728117, "num_tokens": 6114855.0, "step": 230 }, { "entropy": 0.8117449183017016, "epoch": 0.4272838544564371, "grad_norm": 25.875, "learning_rate": 4.78e-06, "loss": 13.0914, "mean_token_accuracy": 0.7797230206429958, "num_tokens": 6378152.0, "step": 240 }, { "entropy": 0.8300687098875642, "epoch": 0.44508734839212194, "grad_norm": 24.5, "learning_rate": 4.980000000000001e-06, "loss": 13.3199, "mean_token_accuracy": 0.7753712415695191, "num_tokens": 6637273.0, "step": 250 }, { "entropy": 0.8000802919268608, "epoch": 0.4628908423278068, "grad_norm": 23.75, "learning_rate": 5.18e-06, "loss": 12.8911, "mean_token_accuracy": 0.7822641927748919, "num_tokens": 6896684.0, "step": 260 }, { "entropy": 0.8173866732046008, "epoch": 0.48069433626349173, "grad_norm": 25.75, "learning_rate": 5.380000000000001e-06, "loss": 13.1839, "mean_token_accuracy": 0.7791078709065914, "num_tokens": 7166608.0, "step": 270 }, { "entropy": 0.7851757485419512, "epoch": 0.4984978301991766, "grad_norm": 23.125, "learning_rate": 5.580000000000001e-06, "loss": 12.5647, "mean_token_accuracy": 0.7872234936803579, "num_tokens": 7444923.0, "step": 280 }, { "entropy": 0.8196133345365524, "epoch": 0.5163013241348615, "grad_norm": 23.75, "learning_rate": 5.78e-06, "loss": 13.195, "mean_token_accuracy": 0.7780409008264542, "num_tokens": 7706502.0, "step": 290 }, { "entropy": 0.8108824253082275, "epoch": 0.5341048180705463, "grad_norm": 25.75, "learning_rate": 5.98e-06, "loss": 13.1243, "mean_token_accuracy": 0.7791608296334743, "num_tokens": 7969704.0, "step": 300 }, { "epoch": 0.5341048180705463, "eval_biology_entropy": 1.1140506463050843, "eval_biology_loss": 1.1709669828414917, "eval_biology_mean_token_accuracy": 0.7033902740478516, "eval_biology_num_tokens": 7969704.0, "eval_biology_runtime": 47.6418, "eval_biology_samples_per_second": 10.495, "eval_biology_steps_per_second": 2.624, "step": 300 }, { "epoch": 0.5341048180705463, "eval_chemistry_entropy": 0.7940924577713012, "eval_chemistry_loss": 0.8012509942054749, "eval_chemistry_mean_token_accuracy": 0.7820386853218079, "eval_chemistry_num_tokens": 7969704.0, "eval_chemistry_runtime": 59.4475, "eval_chemistry_samples_per_second": 8.411, "eval_chemistry_steps_per_second": 2.103, "step": 300 }, { "entropy": 0.7919084688648581, "epoch": 0.5519083120062312, "grad_norm": 24.375, "learning_rate": 6.18e-06, "loss": 12.7303, "mean_token_accuracy": 0.7834540419280529, "num_tokens": 8242162.0, "step": 310 }, { "entropy": 0.8003328915685415, "epoch": 0.5697118059419161, "grad_norm": 22.875, "learning_rate": 6.380000000000001e-06, "loss": 12.8333, "mean_token_accuracy": 0.7824427511543035, "num_tokens": 8497852.0, "step": 320 }, { "entropy": 0.7982260027900339, "epoch": 0.587515299877601, "grad_norm": 28.0, "learning_rate": 6.5800000000000005e-06, "loss": 12.8827, "mean_token_accuracy": 0.7826927099376917, "num_tokens": 8757753.0, "step": 330 }, { "entropy": 0.7888958260416985, "epoch": 0.6053187938132859, "grad_norm": 25.125, "learning_rate": 6.780000000000001e-06, "loss": 12.7464, "mean_token_accuracy": 0.7863177515566349, "num_tokens": 9024677.0, "step": 340 }, { "entropy": 0.7913781819865108, "epoch": 0.6231222877489707, "grad_norm": 26.0, "learning_rate": 6.98e-06, "loss": 12.7471, "mean_token_accuracy": 0.7842035111039877, "num_tokens": 9291760.0, "step": 350 }, { "entropy": 0.7765169985592365, "epoch": 0.6409257816846556, "grad_norm": 22.0, "learning_rate": 7.180000000000001e-06, "loss": 12.5378, "mean_token_accuracy": 0.7859202962368727, "num_tokens": 9561091.0, "step": 360 }, { "entropy": 0.7792716162279248, "epoch": 0.6587292756203404, "grad_norm": 25.0, "learning_rate": 7.3800000000000005e-06, "loss": 12.4956, "mean_token_accuracy": 0.7880564954131841, "num_tokens": 9827272.0, "step": 370 }, { "entropy": 0.758336128294468, "epoch": 0.6765327695560254, "grad_norm": 25.0, "learning_rate": 7.58e-06, "loss": 12.1732, "mean_token_accuracy": 0.7918921418488025, "num_tokens": 10096065.0, "step": 380 }, { "entropy": 0.7533971995115281, "epoch": 0.6943362634917103, "grad_norm": 24.375, "learning_rate": 7.78e-06, "loss": 12.1132, "mean_token_accuracy": 0.7922064792364836, "num_tokens": 10364601.0, "step": 390 }, { "entropy": 0.7620638139545918, "epoch": 0.7121397574273951, "grad_norm": 23.625, "learning_rate": 7.980000000000002e-06, "loss": 12.3379, "mean_token_accuracy": 0.7897147350013256, "num_tokens": 10633325.0, "step": 400 }, { "epoch": 0.7121397574273951, "eval_biology_entropy": 1.1209784712791442, "eval_biology_loss": 1.175487995147705, "eval_biology_mean_token_accuracy": 0.7023502192497253, "eval_biology_num_tokens": 10633325.0, "eval_biology_runtime": 45.7349, "eval_biology_samples_per_second": 10.933, "eval_biology_steps_per_second": 2.733, "step": 400 }, { "epoch": 0.7121397574273951, "eval_chemistry_entropy": 0.7846209690570831, "eval_chemistry_loss": 0.7767007946968079, "eval_chemistry_mean_token_accuracy": 0.7876848134994506, "eval_chemistry_num_tokens": 10633325.0, "eval_chemistry_runtime": 56.279, "eval_chemistry_samples_per_second": 8.884, "eval_chemistry_steps_per_second": 2.221, "step": 400 }, { "entropy": 0.7557655736804009, "epoch": 0.72994325136308, "grad_norm": 25.0, "learning_rate": 8.18e-06, "loss": 12.1754, "mean_token_accuracy": 0.7914514016360045, "num_tokens": 10897916.0, "step": 410 }, { "entropy": 0.7626162808388471, "epoch": 0.7477467452987648, "grad_norm": 22.375, "learning_rate": 8.380000000000001e-06, "loss": 12.1991, "mean_token_accuracy": 0.7916438620537519, "num_tokens": 11165356.0, "step": 420 }, { "entropy": 0.7673018729314208, "epoch": 0.7655502392344498, "grad_norm": 24.375, "learning_rate": 8.580000000000001e-06, "loss": 12.3987, "mean_token_accuracy": 0.7888187035918236, "num_tokens": 11436799.0, "step": 430 }, { "entropy": 0.7744929634034634, "epoch": 0.7833537331701347, "grad_norm": 24.125, "learning_rate": 8.78e-06, "loss": 12.4708, "mean_token_accuracy": 0.787474300712347, "num_tokens": 11703496.0, "step": 440 }, { "entropy": 0.7655632747337222, "epoch": 0.8011572271058195, "grad_norm": 24.875, "learning_rate": 8.98e-06, "loss": 12.299, "mean_token_accuracy": 0.7900604665279388, "num_tokens": 11965530.0, "step": 450 }, { "entropy": 0.7661685338243842, "epoch": 0.8189607210415044, "grad_norm": 23.0, "learning_rate": 9.180000000000002e-06, "loss": 12.3101, "mean_token_accuracy": 0.7891276117414237, "num_tokens": 12224427.0, "step": 460 }, { "entropy": 0.7463042287155985, "epoch": 0.8367642149771892, "grad_norm": 24.125, "learning_rate": 9.38e-06, "loss": 12.0346, "mean_token_accuracy": 0.7945169288665056, "num_tokens": 12509124.0, "step": 470 }, { "entropy": 0.753023486584425, "epoch": 0.8545677089128741, "grad_norm": 21.25, "learning_rate": 9.58e-06, "loss": 12.0236, "mean_token_accuracy": 0.7931863989681005, "num_tokens": 12778408.0, "step": 480 }, { "entropy": 0.7310503415763379, "epoch": 0.8723712028485591, "grad_norm": 22.25, "learning_rate": 9.780000000000001e-06, "loss": 11.8094, "mean_token_accuracy": 0.7964685469865799, "num_tokens": 13046473.0, "step": 490 }, { "entropy": 0.7645759535953403, "epoch": 0.8901746967842439, "grad_norm": 22.375, "learning_rate": 9.980000000000001e-06, "loss": 12.2645, "mean_token_accuracy": 0.7906601417809724, "num_tokens": 13301659.0, "step": 500 }, { "epoch": 0.8901746967842439, "eval_biology_entropy": 1.092752426624298, "eval_biology_loss": 1.1775906085968018, "eval_biology_mean_token_accuracy": 0.7025688862800599, "eval_biology_num_tokens": 13301659.0, "eval_biology_runtime": 268.8833, "eval_biology_samples_per_second": 1.86, "eval_biology_steps_per_second": 0.465, "step": 500 }, { "epoch": 0.8901746967842439, "eval_chemistry_entropy": 0.7384080934524536, "eval_chemistry_loss": 0.7555699944496155, "eval_chemistry_mean_token_accuracy": 0.7920897974967956, "eval_chemistry_num_tokens": 13301659.0, "eval_chemistry_runtime": 450.4029, "eval_chemistry_samples_per_second": 1.11, "eval_chemistry_steps_per_second": 0.278, "step": 500 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 18, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3874307456966482e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }