{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22255605630668224, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.8066153490915895, "epoch": 0.004451121126133645, "grad_norm": 107.5, "learning_rate": 1.8e-07, "loss": 16.3591, "mean_token_accuracy": 0.7447933696210385, "num_tokens": 63133.0, "step": 10 }, { "entropy": 0.8401238698512316, "epoch": 0.00890224225226729, "grad_norm": 103.5, "learning_rate": 3.8e-07, "loss": 17.2936, "mean_token_accuracy": 0.7330440735444427, "num_tokens": 128968.0, "step": 20 }, { "entropy": 0.828661117143929, "epoch": 0.013353363378400934, "grad_norm": 96.5, "learning_rate": 5.800000000000001e-07, "loss": 16.5809, "mean_token_accuracy": 0.7381731692701579, "num_tokens": 193314.0, "step": 30 }, { "entropy": 0.8600894263014197, "epoch": 0.01780448450453458, "grad_norm": 96.0, "learning_rate": 7.8e-07, "loss": 17.7444, "mean_token_accuracy": 0.7324823886156082, "num_tokens": 257189.0, "step": 40 }, { "entropy": 0.8017022017389536, "epoch": 0.022255605630668224, "grad_norm": 84.5, "learning_rate": 9.800000000000001e-07, "loss": 15.9063, "mean_token_accuracy": 0.7455223135650157, "num_tokens": 324224.0, "step": 50 }, { "entropy": 0.846268966794014, "epoch": 0.026706726756801868, "grad_norm": 78.5, "learning_rate": 1.1800000000000001e-06, "loss": 16.6006, "mean_token_accuracy": 0.7411292420700193, "num_tokens": 394654.0, "step": 60 }, { "entropy": 0.8181243563070894, "epoch": 0.031157847882935515, "grad_norm": 76.5, "learning_rate": 1.3800000000000001e-06, "loss": 15.9846, "mean_token_accuracy": 0.746401545777917, "num_tokens": 461658.0, "step": 70 }, { "entropy": 0.8314600124955177, "epoch": 0.03560896900906916, "grad_norm": 75.0, "learning_rate": 1.5800000000000001e-06, "loss": 15.8227, "mean_token_accuracy": 0.7472479769960045, "num_tokens": 528285.0, "step": 80 }, { "entropy": 0.8807666478678584, "epoch": 0.04006009013520281, "grad_norm": 72.0, "learning_rate": 1.7800000000000001e-06, "loss": 16.6834, "mean_token_accuracy": 0.7396679904311896, "num_tokens": 595336.0, "step": 90 }, { "entropy": 0.8991396678611636, "epoch": 0.04451121126133645, "grad_norm": 79.5, "learning_rate": 1.98e-06, "loss": 16.3968, "mean_token_accuracy": 0.7396075185388327, "num_tokens": 663126.0, "step": 100 }, { "epoch": 0.04451121126133645, "eval_biology_entropy": 1.1188154411315918, "eval_biology_loss": 1.2691835165023804, "eval_biology_mean_token_accuracy": 0.6881385813951493, "eval_biology_num_tokens": 663126.0, "eval_biology_runtime": 40.4565, "eval_biology_samples_per_second": 12.359, "eval_biology_steps_per_second": 12.359, "step": 100 }, { "epoch": 0.04451121126133645, "eval_chemistry_entropy": 0.8804921235442161, "eval_chemistry_loss": 1.0042469501495361, "eval_chemistry_mean_token_accuracy": 0.7444319971203804, "eval_chemistry_num_tokens": 663126.0, "eval_chemistry_runtime": 46.3585, "eval_chemistry_samples_per_second": 10.786, "eval_chemistry_steps_per_second": 10.786, "step": 100 }, { "entropy": 0.8988691195845604, "epoch": 0.048962332387470095, "grad_norm": 100.5, "learning_rate": 2.1800000000000003e-06, "loss": 16.3063, "mean_token_accuracy": 0.7414613764733076, "num_tokens": 731060.0, "step": 110 }, { "entropy": 0.8918870648369193, "epoch": 0.053413453513603736, "grad_norm": 77.5, "learning_rate": 2.38e-06, "loss": 15.8311, "mean_token_accuracy": 0.7483173958957196, "num_tokens": 801635.0, "step": 120 }, { "entropy": 0.9162682231515646, "epoch": 0.05786457463973738, "grad_norm": 66.0, "learning_rate": 2.5800000000000003e-06, "loss": 16.1464, "mean_token_accuracy": 0.7448406910523773, "num_tokens": 867260.0, "step": 130 }, { "entropy": 0.9749668512493372, "epoch": 0.06231569576587103, "grad_norm": 79.5, "learning_rate": 2.7800000000000005e-06, "loss": 16.9562, "mean_token_accuracy": 0.7369577366858721, "num_tokens": 931344.0, "step": 140 }, { "entropy": 0.939477625861764, "epoch": 0.06676681689200467, "grad_norm": 56.75, "learning_rate": 2.9800000000000003e-06, "loss": 15.7327, "mean_token_accuracy": 0.7471803797408938, "num_tokens": 993586.0, "step": 150 }, { "entropy": 0.9718907386064529, "epoch": 0.07121793801813832, "grad_norm": 53.75, "learning_rate": 3.1800000000000005e-06, "loss": 16.2036, "mean_token_accuracy": 0.7392314806580543, "num_tokens": 1060812.0, "step": 160 }, { "entropy": 0.9165311623364687, "epoch": 0.07566905914427197, "grad_norm": 63.25, "learning_rate": 3.3800000000000007e-06, "loss": 15.2657, "mean_token_accuracy": 0.7518215283751488, "num_tokens": 1131832.0, "step": 170 }, { "entropy": 0.9465073021128774, "epoch": 0.08012018027040561, "grad_norm": 50.25, "learning_rate": 3.58e-06, "loss": 15.5603, "mean_token_accuracy": 0.7473610159009695, "num_tokens": 1200650.0, "step": 180 }, { "entropy": 0.9450380651280283, "epoch": 0.08457130139653925, "grad_norm": 54.0, "learning_rate": 3.7800000000000002e-06, "loss": 15.56, "mean_token_accuracy": 0.748077143356204, "num_tokens": 1265107.0, "step": 190 }, { "entropy": 0.9630168141797185, "epoch": 0.0890224225226729, "grad_norm": 58.75, "learning_rate": 3.980000000000001e-06, "loss": 15.5029, "mean_token_accuracy": 0.7486971555277705, "num_tokens": 1327380.0, "step": 200 }, { "epoch": 0.0890224225226729, "eval_biology_entropy": 1.19531831908226, "eval_biology_loss": 1.2584387063980103, "eval_biology_mean_token_accuracy": 0.6882349443435669, "eval_biology_num_tokens": 1327380.0, "eval_biology_runtime": 40.8783, "eval_biology_samples_per_second": 12.231, "eval_biology_steps_per_second": 12.231, "step": 200 }, { "epoch": 0.0890224225226729, "eval_chemistry_entropy": 0.9437598274946213, "eval_chemistry_loss": 0.9655953645706177, "eval_chemistry_mean_token_accuracy": 0.7501006088852883, "eval_chemistry_num_tokens": 1327380.0, "eval_chemistry_runtime": 46.6023, "eval_chemistry_samples_per_second": 10.729, "eval_chemistry_steps_per_second": 10.729, "step": 200 }, { "entropy": 0.9782480053603649, "epoch": 0.09347354364880654, "grad_norm": 64.0, "learning_rate": 4.18e-06, "loss": 15.9821, "mean_token_accuracy": 0.7436690799891948, "num_tokens": 1393379.0, "step": 210 }, { "entropy": 0.9402019061148167, "epoch": 0.09792466477494019, "grad_norm": 54.0, "learning_rate": 4.38e-06, "loss": 15.3729, "mean_token_accuracy": 0.7516820874065161, "num_tokens": 1460130.0, "step": 220 }, { "entropy": 0.9247835712507367, "epoch": 0.10237578590107384, "grad_norm": 54.5, "learning_rate": 4.58e-06, "loss": 15.0731, "mean_token_accuracy": 0.7559274602681398, "num_tokens": 1529183.0, "step": 230 }, { "entropy": 0.9673028320074082, "epoch": 0.10682690702720747, "grad_norm": 71.5, "learning_rate": 4.78e-06, "loss": 15.731, "mean_token_accuracy": 0.7473661951720715, "num_tokens": 1597405.0, "step": 240 }, { "entropy": 0.9974556604400278, "epoch": 0.11127802815334112, "grad_norm": 55.0, "learning_rate": 4.980000000000001e-06, "loss": 15.8962, "mean_token_accuracy": 0.7434635870158672, "num_tokens": 1661767.0, "step": 250 }, { "entropy": 0.985609365440905, "epoch": 0.11572914927947477, "grad_norm": 50.75, "learning_rate": 5.18e-06, "loss": 16.0267, "mean_token_accuracy": 0.7446854375302792, "num_tokens": 1728207.0, "step": 260 }, { "entropy": 0.9456103699281811, "epoch": 0.12018027040560841, "grad_norm": 58.0, "learning_rate": 5.380000000000001e-06, "loss": 15.3377, "mean_token_accuracy": 0.750850186496973, "num_tokens": 1796055.0, "step": 270 }, { "entropy": 0.9541573049500585, "epoch": 0.12463139153174206, "grad_norm": 56.5, "learning_rate": 5.580000000000001e-06, "loss": 15.2532, "mean_token_accuracy": 0.7530262626707553, "num_tokens": 1859684.0, "step": 280 }, { "entropy": 0.9941559780389071, "epoch": 0.1290825126578757, "grad_norm": 59.25, "learning_rate": 5.78e-06, "loss": 15.94, "mean_token_accuracy": 0.745009395852685, "num_tokens": 1921704.0, "step": 290 }, { "entropy": 0.981252990104258, "epoch": 0.13353363378400934, "grad_norm": 56.75, "learning_rate": 5.98e-06, "loss": 15.8943, "mean_token_accuracy": 0.7449573867022992, "num_tokens": 1985766.0, "step": 300 }, { "epoch": 0.13353363378400934, "eval_biology_entropy": 1.2063790675401687, "eval_biology_loss": 1.2592713832855225, "eval_biology_mean_token_accuracy": 0.6874204781055451, "eval_biology_num_tokens": 1985766.0, "eval_biology_runtime": 40.5061, "eval_biology_samples_per_second": 12.344, "eval_biology_steps_per_second": 12.344, "step": 300 }, { "epoch": 0.13353363378400934, "eval_chemistry_entropy": 0.9332761432528496, "eval_chemistry_loss": 0.9383891820907593, "eval_chemistry_mean_token_accuracy": 0.7543213546276093, "eval_chemistry_num_tokens": 1985766.0, "eval_chemistry_runtime": 46.2485, "eval_chemistry_samples_per_second": 10.811, "eval_chemistry_steps_per_second": 10.811, "step": 300 }, { "entropy": 0.9657653540372848, "epoch": 0.137984754910143, "grad_norm": 66.5, "learning_rate": 6.18e-06, "loss": 15.5291, "mean_token_accuracy": 0.7452911786735058, "num_tokens": 2049041.0, "step": 310 }, { "entropy": 0.922235100530088, "epoch": 0.14243587603627664, "grad_norm": 49.75, "learning_rate": 6.380000000000001e-06, "loss": 14.7109, "mean_token_accuracy": 0.75825478695333, "num_tokens": 2118068.0, "step": 320 }, { "entropy": 0.9044711474329233, "epoch": 0.14688699716241027, "grad_norm": 60.25, "learning_rate": 6.5800000000000005e-06, "loss": 14.5687, "mean_token_accuracy": 0.7618395145982504, "num_tokens": 2186387.0, "step": 330 }, { "entropy": 0.946225673891604, "epoch": 0.15133811828854393, "grad_norm": 52.5, "learning_rate": 6.780000000000001e-06, "loss": 15.1959, "mean_token_accuracy": 0.7535316452383996, "num_tokens": 2252650.0, "step": 340 }, { "entropy": 0.9036338411271572, "epoch": 0.15578923941467757, "grad_norm": 57.75, "learning_rate": 6.98e-06, "loss": 14.5854, "mean_token_accuracy": 0.7611672822386026, "num_tokens": 2320358.0, "step": 350 }, { "entropy": 0.9015818448737264, "epoch": 0.16024036054081123, "grad_norm": 49.5, "learning_rate": 7.180000000000001e-06, "loss": 14.5381, "mean_token_accuracy": 0.7606555309146643, "num_tokens": 2388824.0, "step": 360 }, { "entropy": 0.8864203749224544, "epoch": 0.16469148166694486, "grad_norm": 49.25, "learning_rate": 7.3800000000000005e-06, "loss": 14.1936, "mean_token_accuracy": 0.7665066320449114, "num_tokens": 2456144.0, "step": 370 }, { "entropy": 0.9866490814834833, "epoch": 0.1691426027930785, "grad_norm": 49.5, "learning_rate": 7.58e-06, "loss": 15.8412, "mean_token_accuracy": 0.7478409979492426, "num_tokens": 2515325.0, "step": 380 }, { "entropy": 0.9080646676942706, "epoch": 0.17359372391921216, "grad_norm": 48.25, "learning_rate": 7.78e-06, "loss": 14.5343, "mean_token_accuracy": 0.7594566397368908, "num_tokens": 2580490.0, "step": 390 }, { "entropy": 0.9095059128478169, "epoch": 0.1780448450453458, "grad_norm": 43.5, "learning_rate": 7.980000000000002e-06, "loss": 14.6647, "mean_token_accuracy": 0.7607249341905117, "num_tokens": 2644330.0, "step": 400 }, { "epoch": 0.1780448450453458, "eval_biology_entropy": 1.1971934199333192, "eval_biology_loss": 1.2638347148895264, "eval_biology_mean_token_accuracy": 0.6877701328396797, "eval_biology_num_tokens": 2644330.0, "eval_biology_runtime": 39.8022, "eval_biology_samples_per_second": 12.562, "eval_biology_steps_per_second": 12.562, "step": 400 }, { "epoch": 0.1780448450453458, "eval_chemistry_entropy": 0.8957328157424926, "eval_chemistry_loss": 0.9148933291435242, "eval_chemistry_mean_token_accuracy": 0.7592848987579346, "eval_chemistry_num_tokens": 2644330.0, "eval_chemistry_runtime": 46.2334, "eval_chemistry_samples_per_second": 10.815, "eval_chemistry_steps_per_second": 10.815, "step": 400 }, { "entropy": 0.8526191784068942, "epoch": 0.18249596617147945, "grad_norm": 50.0, "learning_rate": 8.18e-06, "loss": 13.7182, "mean_token_accuracy": 0.7745671790093184, "num_tokens": 2713234.0, "step": 410 }, { "entropy": 0.932603782787919, "epoch": 0.18694708729761308, "grad_norm": 47.5, "learning_rate": 8.380000000000001e-06, "loss": 15.028, "mean_token_accuracy": 0.7569812458008528, "num_tokens": 2783261.0, "step": 420 }, { "entropy": 0.9143912255764007, "epoch": 0.19139820842374672, "grad_norm": 43.0, "learning_rate": 8.580000000000001e-06, "loss": 14.7724, "mean_token_accuracy": 0.7596961252391339, "num_tokens": 2850170.0, "step": 430 }, { "entropy": 0.9113649705424904, "epoch": 0.19584932954988038, "grad_norm": 58.5, "learning_rate": 8.78e-06, "loss": 14.6432, "mean_token_accuracy": 0.760087676718831, "num_tokens": 2913700.0, "step": 440 }, { "entropy": 0.9072460785508156, "epoch": 0.200300450676014, "grad_norm": 46.5, "learning_rate": 8.98e-06, "loss": 14.609, "mean_token_accuracy": 0.7596194025129079, "num_tokens": 2981212.0, "step": 450 }, { "entropy": 0.8693920068442822, "epoch": 0.20475157180214767, "grad_norm": 53.25, "learning_rate": 9.180000000000002e-06, "loss": 13.9505, "mean_token_accuracy": 0.771946213953197, "num_tokens": 3048973.0, "step": 460 }, { "entropy": 0.9109398307278752, "epoch": 0.2092026929282813, "grad_norm": 47.25, "learning_rate": 9.38e-06, "loss": 14.614, "mean_token_accuracy": 0.7599313069134951, "num_tokens": 3117033.0, "step": 470 }, { "entropy": 0.8936059167608619, "epoch": 0.21365381405441494, "grad_norm": 50.25, "learning_rate": 9.58e-06, "loss": 14.4662, "mean_token_accuracy": 0.7608913701027632, "num_tokens": 3185255.0, "step": 480 }, { "entropy": 0.9031545946374535, "epoch": 0.2181049351805486, "grad_norm": 51.5, "learning_rate": 9.780000000000001e-06, "loss": 14.5197, "mean_token_accuracy": 0.7608289115130902, "num_tokens": 3253121.0, "step": 490 }, { "entropy": 0.8450184227898717, "epoch": 0.22255605630668224, "grad_norm": 49.5, "learning_rate": 9.980000000000001e-06, "loss": 13.4565, "mean_token_accuracy": 0.7740382503718137, "num_tokens": 3322823.0, "step": 500 }, { "epoch": 0.22255605630668224, "eval_biology_entropy": 1.2025449865460396, "eval_biology_loss": 1.267388939857483, "eval_biology_mean_token_accuracy": 0.6874357106685638, "eval_biology_num_tokens": 3322823.0, "eval_biology_runtime": 40.0827, "eval_biology_samples_per_second": 12.474, "eval_biology_steps_per_second": 12.474, "step": 500 }, { "epoch": 0.22255605630668224, "eval_chemistry_entropy": 0.8795891938209534, "eval_chemistry_loss": 0.8957814574241638, "eval_chemistry_mean_token_accuracy": 0.7629851229190826, "eval_chemistry_num_tokens": 3322823.0, "eval_chemistry_runtime": 46.3478, "eval_chemistry_samples_per_second": 10.788, "eval_chemistry_steps_per_second": 10.788, "step": 500 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2280979081801686e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }