{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.890224225226729, "eval_steps": 100, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.8066153490915895, "epoch": 0.004451121126133645, "grad_norm": 107.5, "learning_rate": 1.8e-07, "loss": 16.3591, "mean_token_accuracy": 0.7447933696210385, "num_tokens": 63133.0, "step": 10 }, { "entropy": 0.8401238698512316, "epoch": 0.00890224225226729, "grad_norm": 103.5, "learning_rate": 3.8e-07, "loss": 17.2936, "mean_token_accuracy": 0.7330440735444427, "num_tokens": 128968.0, "step": 20 }, { "entropy": 0.828661117143929, "epoch": 0.013353363378400934, "grad_norm": 96.5, "learning_rate": 5.800000000000001e-07, "loss": 16.5809, "mean_token_accuracy": 0.7381731692701579, "num_tokens": 193314.0, "step": 30 }, { "entropy": 0.8600894263014197, "epoch": 0.01780448450453458, "grad_norm": 96.0, "learning_rate": 7.8e-07, "loss": 17.7444, "mean_token_accuracy": 0.7324823886156082, "num_tokens": 257189.0, "step": 40 }, { "entropy": 0.8017022017389536, "epoch": 0.022255605630668224, "grad_norm": 84.5, "learning_rate": 9.800000000000001e-07, "loss": 15.9063, "mean_token_accuracy": 0.7455223135650157, "num_tokens": 324224.0, "step": 50 }, { "entropy": 0.846268966794014, "epoch": 0.026706726756801868, "grad_norm": 78.5, "learning_rate": 1.1800000000000001e-06, "loss": 16.6006, "mean_token_accuracy": 0.7411292420700193, "num_tokens": 394654.0, "step": 60 }, { "entropy": 0.8181243563070894, "epoch": 0.031157847882935515, "grad_norm": 76.5, "learning_rate": 1.3800000000000001e-06, "loss": 15.9846, "mean_token_accuracy": 0.746401545777917, "num_tokens": 461658.0, "step": 70 }, { "entropy": 0.8314600124955177, "epoch": 0.03560896900906916, "grad_norm": 75.0, "learning_rate": 1.5800000000000001e-06, "loss": 15.8227, "mean_token_accuracy": 0.7472479769960045, "num_tokens": 528285.0, "step": 80 }, { "entropy": 0.8807666478678584, "epoch": 0.04006009013520281, "grad_norm": 72.0, "learning_rate": 1.7800000000000001e-06, "loss": 16.6834, "mean_token_accuracy": 0.7396679904311896, "num_tokens": 595336.0, "step": 90 }, { "entropy": 0.8991396678611636, "epoch": 0.04451121126133645, "grad_norm": 79.5, "learning_rate": 1.98e-06, "loss": 16.3968, "mean_token_accuracy": 0.7396075185388327, "num_tokens": 663126.0, "step": 100 }, { "epoch": 0.04451121126133645, "eval_biology_entropy": 1.1188154411315918, "eval_biology_loss": 1.2691835165023804, "eval_biology_mean_token_accuracy": 0.6881385813951493, "eval_biology_num_tokens": 663126.0, "eval_biology_runtime": 40.4565, "eval_biology_samples_per_second": 12.359, "eval_biology_steps_per_second": 12.359, "step": 100 }, { "epoch": 0.04451121126133645, "eval_chemistry_entropy": 0.8804921235442161, "eval_chemistry_loss": 1.0042469501495361, "eval_chemistry_mean_token_accuracy": 0.7444319971203804, "eval_chemistry_num_tokens": 663126.0, "eval_chemistry_runtime": 46.3585, "eval_chemistry_samples_per_second": 10.786, "eval_chemistry_steps_per_second": 10.786, "step": 100 }, { "entropy": 0.8988691195845604, "epoch": 0.048962332387470095, "grad_norm": 100.5, "learning_rate": 2.1800000000000003e-06, "loss": 16.3063, "mean_token_accuracy": 0.7414613764733076, "num_tokens": 731060.0, "step": 110 }, { "entropy": 0.8918870648369193, "epoch": 0.053413453513603736, "grad_norm": 77.5, "learning_rate": 2.38e-06, "loss": 15.8311, "mean_token_accuracy": 0.7483173958957196, "num_tokens": 801635.0, "step": 120 }, { "entropy": 0.9162682231515646, "epoch": 0.05786457463973738, "grad_norm": 66.0, "learning_rate": 2.5800000000000003e-06, "loss": 16.1464, "mean_token_accuracy": 0.7448406910523773, "num_tokens": 867260.0, "step": 130 }, { "entropy": 0.9749668512493372, "epoch": 0.06231569576587103, "grad_norm": 79.5, "learning_rate": 2.7800000000000005e-06, "loss": 16.9562, "mean_token_accuracy": 0.7369577366858721, "num_tokens": 931344.0, "step": 140 }, { "entropy": 0.939477625861764, "epoch": 0.06676681689200467, "grad_norm": 56.75, "learning_rate": 2.9800000000000003e-06, "loss": 15.7327, "mean_token_accuracy": 0.7471803797408938, "num_tokens": 993586.0, "step": 150 }, { "entropy": 0.9718907386064529, "epoch": 0.07121793801813832, "grad_norm": 53.75, "learning_rate": 3.1800000000000005e-06, "loss": 16.2036, "mean_token_accuracy": 0.7392314806580543, "num_tokens": 1060812.0, "step": 160 }, { "entropy": 0.9165311623364687, "epoch": 0.07566905914427197, "grad_norm": 63.25, "learning_rate": 3.3800000000000007e-06, "loss": 15.2657, "mean_token_accuracy": 0.7518215283751488, "num_tokens": 1131832.0, "step": 170 }, { "entropy": 0.9465073021128774, "epoch": 0.08012018027040561, "grad_norm": 50.25, "learning_rate": 3.58e-06, "loss": 15.5603, "mean_token_accuracy": 0.7473610159009695, "num_tokens": 1200650.0, "step": 180 }, { "entropy": 0.9450380651280283, "epoch": 0.08457130139653925, "grad_norm": 54.0, "learning_rate": 3.7800000000000002e-06, "loss": 15.56, "mean_token_accuracy": 0.748077143356204, "num_tokens": 1265107.0, "step": 190 }, { "entropy": 0.9630168141797185, "epoch": 0.0890224225226729, "grad_norm": 58.75, "learning_rate": 3.980000000000001e-06, "loss": 15.5029, "mean_token_accuracy": 0.7486971555277705, "num_tokens": 1327380.0, "step": 200 }, { "epoch": 0.0890224225226729, "eval_biology_entropy": 1.19531831908226, "eval_biology_loss": 1.2584387063980103, "eval_biology_mean_token_accuracy": 0.6882349443435669, "eval_biology_num_tokens": 1327380.0, "eval_biology_runtime": 40.8783, "eval_biology_samples_per_second": 12.231, "eval_biology_steps_per_second": 12.231, "step": 200 }, { "epoch": 0.0890224225226729, "eval_chemistry_entropy": 0.9437598274946213, "eval_chemistry_loss": 0.9655953645706177, "eval_chemistry_mean_token_accuracy": 0.7501006088852883, "eval_chemistry_num_tokens": 1327380.0, "eval_chemistry_runtime": 46.6023, "eval_chemistry_samples_per_second": 10.729, "eval_chemistry_steps_per_second": 10.729, "step": 200 }, { "entropy": 0.9782480053603649, "epoch": 0.09347354364880654, "grad_norm": 64.0, "learning_rate": 4.18e-06, "loss": 15.9821, "mean_token_accuracy": 0.7436690799891948, "num_tokens": 1393379.0, "step": 210 }, { "entropy": 0.9402019061148167, "epoch": 0.09792466477494019, "grad_norm": 54.0, "learning_rate": 4.38e-06, "loss": 15.3729, "mean_token_accuracy": 0.7516820874065161, "num_tokens": 1460130.0, "step": 220 }, { "entropy": 0.9247835712507367, "epoch": 0.10237578590107384, "grad_norm": 54.5, "learning_rate": 4.58e-06, "loss": 15.0731, "mean_token_accuracy": 0.7559274602681398, "num_tokens": 1529183.0, "step": 230 }, { "entropy": 0.9673028320074082, "epoch": 0.10682690702720747, "grad_norm": 71.5, "learning_rate": 4.78e-06, "loss": 15.731, "mean_token_accuracy": 0.7473661951720715, "num_tokens": 1597405.0, "step": 240 }, { "entropy": 0.9974556604400278, "epoch": 0.11127802815334112, "grad_norm": 55.0, "learning_rate": 4.980000000000001e-06, "loss": 15.8962, "mean_token_accuracy": 0.7434635870158672, "num_tokens": 1661767.0, "step": 250 }, { "entropy": 0.985609365440905, "epoch": 0.11572914927947477, "grad_norm": 50.75, "learning_rate": 5.18e-06, "loss": 16.0267, "mean_token_accuracy": 0.7446854375302792, "num_tokens": 1728207.0, "step": 260 }, { "entropy": 0.9456103699281811, "epoch": 0.12018027040560841, "grad_norm": 58.0, "learning_rate": 5.380000000000001e-06, "loss": 15.3377, "mean_token_accuracy": 0.750850186496973, "num_tokens": 1796055.0, "step": 270 }, { "entropy": 0.9541573049500585, "epoch": 0.12463139153174206, "grad_norm": 56.5, "learning_rate": 5.580000000000001e-06, "loss": 15.2532, "mean_token_accuracy": 0.7530262626707553, "num_tokens": 1859684.0, "step": 280 }, { "entropy": 0.9941559780389071, "epoch": 0.1290825126578757, "grad_norm": 59.25, "learning_rate": 5.78e-06, "loss": 15.94, "mean_token_accuracy": 0.745009395852685, "num_tokens": 1921704.0, "step": 290 }, { "entropy": 0.981252990104258, "epoch": 0.13353363378400934, "grad_norm": 56.75, "learning_rate": 5.98e-06, "loss": 15.8943, "mean_token_accuracy": 0.7449573867022992, "num_tokens": 1985766.0, "step": 300 }, { "epoch": 0.13353363378400934, "eval_biology_entropy": 1.2063790675401687, "eval_biology_loss": 1.2592713832855225, "eval_biology_mean_token_accuracy": 0.6874204781055451, "eval_biology_num_tokens": 1985766.0, "eval_biology_runtime": 40.5061, "eval_biology_samples_per_second": 12.344, "eval_biology_steps_per_second": 12.344, "step": 300 }, { "epoch": 0.13353363378400934, "eval_chemistry_entropy": 0.9332761432528496, "eval_chemistry_loss": 0.9383891820907593, "eval_chemistry_mean_token_accuracy": 0.7543213546276093, "eval_chemistry_num_tokens": 1985766.0, "eval_chemistry_runtime": 46.2485, "eval_chemistry_samples_per_second": 10.811, "eval_chemistry_steps_per_second": 10.811, "step": 300 }, { "entropy": 0.9657653540372848, "epoch": 0.137984754910143, "grad_norm": 66.5, "learning_rate": 6.18e-06, "loss": 15.5291, "mean_token_accuracy": 0.7452911786735058, "num_tokens": 2049041.0, "step": 310 }, { "entropy": 0.922235100530088, "epoch": 0.14243587603627664, "grad_norm": 49.75, "learning_rate": 6.380000000000001e-06, "loss": 14.7109, "mean_token_accuracy": 0.75825478695333, "num_tokens": 2118068.0, "step": 320 }, { "entropy": 0.9044711474329233, "epoch": 0.14688699716241027, "grad_norm": 60.25, "learning_rate": 6.5800000000000005e-06, "loss": 14.5687, "mean_token_accuracy": 0.7618395145982504, "num_tokens": 2186387.0, "step": 330 }, { "entropy": 0.946225673891604, "epoch": 0.15133811828854393, "grad_norm": 52.5, "learning_rate": 6.780000000000001e-06, "loss": 15.1959, "mean_token_accuracy": 0.7535316452383996, "num_tokens": 2252650.0, "step": 340 }, { "entropy": 0.9036338411271572, "epoch": 0.15578923941467757, "grad_norm": 57.75, "learning_rate": 6.98e-06, "loss": 14.5854, "mean_token_accuracy": 0.7611672822386026, "num_tokens": 2320358.0, "step": 350 }, { "entropy": 0.9015818448737264, "epoch": 0.16024036054081123, "grad_norm": 49.5, "learning_rate": 7.180000000000001e-06, "loss": 14.5381, "mean_token_accuracy": 0.7606555309146643, "num_tokens": 2388824.0, "step": 360 }, { "entropy": 0.8864203749224544, "epoch": 0.16469148166694486, "grad_norm": 49.25, "learning_rate": 7.3800000000000005e-06, "loss": 14.1936, "mean_token_accuracy": 0.7665066320449114, "num_tokens": 2456144.0, "step": 370 }, { "entropy": 0.9866490814834833, "epoch": 0.1691426027930785, "grad_norm": 49.5, "learning_rate": 7.58e-06, "loss": 15.8412, "mean_token_accuracy": 0.7478409979492426, "num_tokens": 2515325.0, "step": 380 }, { "entropy": 0.9080646676942706, "epoch": 0.17359372391921216, "grad_norm": 48.25, "learning_rate": 7.78e-06, "loss": 14.5343, "mean_token_accuracy": 0.7594566397368908, "num_tokens": 2580490.0, "step": 390 }, { "entropy": 0.9095059128478169, "epoch": 0.1780448450453458, "grad_norm": 43.5, "learning_rate": 7.980000000000002e-06, "loss": 14.6647, "mean_token_accuracy": 0.7607249341905117, "num_tokens": 2644330.0, "step": 400 }, { "epoch": 0.1780448450453458, "eval_biology_entropy": 1.1971934199333192, "eval_biology_loss": 1.2638347148895264, "eval_biology_mean_token_accuracy": 0.6877701328396797, "eval_biology_num_tokens": 2644330.0, "eval_biology_runtime": 39.8022, "eval_biology_samples_per_second": 12.562, "eval_biology_steps_per_second": 12.562, "step": 400 }, { "epoch": 0.1780448450453458, "eval_chemistry_entropy": 0.8957328157424926, "eval_chemistry_loss": 0.9148933291435242, "eval_chemistry_mean_token_accuracy": 0.7592848987579346, "eval_chemistry_num_tokens": 2644330.0, "eval_chemistry_runtime": 46.2334, "eval_chemistry_samples_per_second": 10.815, "eval_chemistry_steps_per_second": 10.815, "step": 400 }, { "entropy": 0.8526191784068942, "epoch": 0.18249596617147945, "grad_norm": 50.0, "learning_rate": 8.18e-06, "loss": 13.7182, "mean_token_accuracy": 0.7745671790093184, "num_tokens": 2713234.0, "step": 410 }, { "entropy": 0.932603782787919, "epoch": 0.18694708729761308, "grad_norm": 47.5, "learning_rate": 8.380000000000001e-06, "loss": 15.028, "mean_token_accuracy": 0.7569812458008528, "num_tokens": 2783261.0, "step": 420 }, { "entropy": 0.9143912255764007, "epoch": 0.19139820842374672, "grad_norm": 43.0, "learning_rate": 8.580000000000001e-06, "loss": 14.7724, "mean_token_accuracy": 0.7596961252391339, "num_tokens": 2850170.0, "step": 430 }, { "entropy": 0.9113649705424904, "epoch": 0.19584932954988038, "grad_norm": 58.5, "learning_rate": 8.78e-06, "loss": 14.6432, "mean_token_accuracy": 0.760087676718831, "num_tokens": 2913700.0, "step": 440 }, { "entropy": 0.9072460785508156, "epoch": 0.200300450676014, "grad_norm": 46.5, "learning_rate": 8.98e-06, "loss": 14.609, "mean_token_accuracy": 0.7596194025129079, "num_tokens": 2981212.0, "step": 450 }, { "entropy": 0.8693920068442822, "epoch": 0.20475157180214767, "grad_norm": 53.25, "learning_rate": 9.180000000000002e-06, "loss": 13.9505, "mean_token_accuracy": 0.771946213953197, "num_tokens": 3048973.0, "step": 460 }, { "entropy": 0.9109398307278752, "epoch": 0.2092026929282813, "grad_norm": 47.25, "learning_rate": 9.38e-06, "loss": 14.614, "mean_token_accuracy": 0.7599313069134951, "num_tokens": 3117033.0, "step": 470 }, { "entropy": 0.8936059167608619, "epoch": 0.21365381405441494, "grad_norm": 50.25, "learning_rate": 9.58e-06, "loss": 14.4662, "mean_token_accuracy": 0.7608913701027632, "num_tokens": 3185255.0, "step": 480 }, { "entropy": 0.9031545946374535, "epoch": 0.2181049351805486, "grad_norm": 51.5, "learning_rate": 9.780000000000001e-06, "loss": 14.5197, "mean_token_accuracy": 0.7608289115130902, "num_tokens": 3253121.0, "step": 490 }, { "entropy": 0.8450184227898717, "epoch": 0.22255605630668224, "grad_norm": 49.5, "learning_rate": 9.980000000000001e-06, "loss": 13.4565, "mean_token_accuracy": 0.7740382503718137, "num_tokens": 3322823.0, "step": 500 }, { "epoch": 0.22255605630668224, "eval_biology_entropy": 1.2025449865460396, "eval_biology_loss": 1.267388939857483, "eval_biology_mean_token_accuracy": 0.6874357106685638, "eval_biology_num_tokens": 3322823.0, "eval_biology_runtime": 40.0827, "eval_biology_samples_per_second": 12.474, "eval_biology_steps_per_second": 12.474, "step": 500 }, { "epoch": 0.22255605630668224, "eval_chemistry_entropy": 0.8795891938209534, "eval_chemistry_loss": 0.8957814574241638, "eval_chemistry_mean_token_accuracy": 0.7629851229190826, "eval_chemistry_num_tokens": 3322823.0, "eval_chemistry_runtime": 46.3478, "eval_chemistry_samples_per_second": 10.788, "eval_chemistry_steps_per_second": 10.788, "step": 500 }, { "entropy": 0.8574318964034319, "epoch": 0.2270071774328159, "grad_norm": 45.25, "learning_rate": 1.018e-05, "loss": 13.9042, "mean_token_accuracy": 0.7678481444716454, "num_tokens": 3393393.0, "step": 510 }, { "entropy": 0.9288356432691216, "epoch": 0.23145829855894953, "grad_norm": 57.25, "learning_rate": 1.038e-05, "loss": 15.193, "mean_token_accuracy": 0.7506348451599478, "num_tokens": 3454750.0, "step": 520 }, { "entropy": 0.8564269673079252, "epoch": 0.2359094196850832, "grad_norm": 50.75, "learning_rate": 1.0580000000000002e-05, "loss": 13.7644, "mean_token_accuracy": 0.7751363463699817, "num_tokens": 3526914.0, "step": 530 }, { "entropy": 0.93802858479321, "epoch": 0.24036054081121683, "grad_norm": 45.0, "learning_rate": 1.0780000000000002e-05, "loss": 15.0494, "mean_token_accuracy": 0.7559149663895368, "num_tokens": 3589449.0, "step": 540 }, { "entropy": 0.9012869004160166, "epoch": 0.24481166193735046, "grad_norm": 50.25, "learning_rate": 1.0980000000000002e-05, "loss": 14.2344, "mean_token_accuracy": 0.7642816316336394, "num_tokens": 3655092.0, "step": 550 }, { "entropy": 0.8946880368515849, "epoch": 0.24926278306348412, "grad_norm": 57.25, "learning_rate": 1.1180000000000001e-05, "loss": 14.5832, "mean_token_accuracy": 0.7632339514791966, "num_tokens": 3719113.0, "step": 560 }, { "entropy": 0.8879899585619568, "epoch": 0.25371390418961776, "grad_norm": 44.5, "learning_rate": 1.138e-05, "loss": 14.3692, "mean_token_accuracy": 0.7596106130629778, "num_tokens": 3785282.0, "step": 570 }, { "entropy": 0.9068781601265072, "epoch": 0.2581650253157514, "grad_norm": 42.25, "learning_rate": 1.1580000000000001e-05, "loss": 14.6096, "mean_token_accuracy": 0.761234056390822, "num_tokens": 3852276.0, "step": 580 }, { "entropy": 0.8276193620637059, "epoch": 0.262616146441885, "grad_norm": 43.25, "learning_rate": 1.178e-05, "loss": 13.3892, "mean_token_accuracy": 0.7755540499463678, "num_tokens": 3925649.0, "step": 590 }, { "entropy": 0.8920110030099749, "epoch": 0.2670672675680187, "grad_norm": 52.25, "learning_rate": 1.198e-05, "loss": 14.0197, "mean_token_accuracy": 0.766492671892047, "num_tokens": 3990505.0, "step": 600 }, { "epoch": 0.2670672675680187, "eval_biology_entropy": 1.2123423200249672, "eval_biology_loss": 1.2729928493499756, "eval_biology_mean_token_accuracy": 0.6870606996417046, "eval_biology_num_tokens": 3990505.0, "eval_biology_runtime": 39.7983, "eval_biology_samples_per_second": 12.563, "eval_biology_steps_per_second": 12.563, "step": 600 }, { "epoch": 0.2670672675680187, "eval_chemistry_entropy": 0.8659857953190804, "eval_chemistry_loss": 0.8781383037567139, "eval_chemistry_mean_token_accuracy": 0.7664026654362679, "eval_chemistry_num_tokens": 3990505.0, "eval_chemistry_runtime": 46.1775, "eval_chemistry_samples_per_second": 10.828, "eval_chemistry_steps_per_second": 10.828, "step": 600 }, { "entropy": 0.8550863016396761, "epoch": 0.27151838869415235, "grad_norm": 51.25, "learning_rate": 1.218e-05, "loss": 13.8181, "mean_token_accuracy": 0.7682087656110526, "num_tokens": 4060657.0, "step": 610 }, { "entropy": 0.8537623688578606, "epoch": 0.275969509820286, "grad_norm": 42.25, "learning_rate": 1.2380000000000002e-05, "loss": 13.6601, "mean_token_accuracy": 0.772542554140091, "num_tokens": 4133119.0, "step": 620 }, { "entropy": 0.8621461872011423, "epoch": 0.2804206309464196, "grad_norm": 49.25, "learning_rate": 1.2580000000000002e-05, "loss": 13.9865, "mean_token_accuracy": 0.7680005200207234, "num_tokens": 4200051.0, "step": 630 }, { "entropy": 0.9106066713109613, "epoch": 0.2848717520725533, "grad_norm": 49.25, "learning_rate": 1.2780000000000001e-05, "loss": 14.717, "mean_token_accuracy": 0.7637291874736547, "num_tokens": 4267403.0, "step": 640 }, { "entropy": 0.868153141438961, "epoch": 0.28932287319868694, "grad_norm": 52.25, "learning_rate": 1.2980000000000001e-05, "loss": 13.8988, "mean_token_accuracy": 0.7680647127330303, "num_tokens": 4333268.0, "step": 650 }, { "entropy": 0.8451825473457575, "epoch": 0.29377399432482054, "grad_norm": 53.5, "learning_rate": 1.3180000000000001e-05, "loss": 13.5394, "mean_token_accuracy": 0.7747031616047024, "num_tokens": 4400242.0, "step": 660 }, { "entropy": 0.8826779069378972, "epoch": 0.2982251154509542, "grad_norm": 49.75, "learning_rate": 1.3380000000000002e-05, "loss": 14.0787, "mean_token_accuracy": 0.7660945057868958, "num_tokens": 4464714.0, "step": 670 }, { "entropy": 0.8183048281818628, "epoch": 0.30267623657708786, "grad_norm": 47.75, "learning_rate": 1.3580000000000002e-05, "loss": 13.4564, "mean_token_accuracy": 0.7773742496967315, "num_tokens": 4535458.0, "step": 680 }, { "entropy": 0.8536316430196166, "epoch": 0.3071273577032215, "grad_norm": 48.25, "learning_rate": 1.378e-05, "loss": 13.552, "mean_token_accuracy": 0.7754087567329406, "num_tokens": 4599099.0, "step": 690 }, { "entropy": 0.816833440028131, "epoch": 0.31157847882935513, "grad_norm": 39.0, "learning_rate": 1.398e-05, "loss": 13.1791, "mean_token_accuracy": 0.777547013387084, "num_tokens": 4662903.0, "step": 700 }, { "epoch": 0.31157847882935513, "eval_biology_entropy": 1.2325178788900375, "eval_biology_loss": 1.278289556503296, "eval_biology_mean_token_accuracy": 0.686549211382866, "eval_biology_num_tokens": 4662903.0, "eval_biology_runtime": 39.6837, "eval_biology_samples_per_second": 12.6, "eval_biology_steps_per_second": 12.6, "step": 700 }, { "epoch": 0.31157847882935513, "eval_chemistry_entropy": 0.870070047557354, "eval_chemistry_loss": 0.8674882650375366, "eval_chemistry_mean_token_accuracy": 0.7685190732479096, "eval_chemistry_num_tokens": 4662903.0, "eval_chemistry_runtime": 46.0896, "eval_chemistry_samples_per_second": 10.848, "eval_chemistry_steps_per_second": 10.848, "step": 700 }, { "entropy": 0.8818355791270733, "epoch": 0.3160295999554888, "grad_norm": 54.0, "learning_rate": 1.418e-05, "loss": 14.2266, "mean_token_accuracy": 0.7657015427947045, "num_tokens": 4729166.0, "step": 710 }, { "entropy": 0.910587764903903, "epoch": 0.32048072108162245, "grad_norm": 54.5, "learning_rate": 1.4380000000000001e-05, "loss": 14.7617, "mean_token_accuracy": 0.7591140177100897, "num_tokens": 4796815.0, "step": 720 }, { "entropy": 0.8099086729809641, "epoch": 0.32493184220775606, "grad_norm": 51.25, "learning_rate": 1.4580000000000001e-05, "loss": 12.9674, "mean_token_accuracy": 0.7837777521461249, "num_tokens": 4865172.0, "step": 730 }, { "entropy": 0.8765029039233923, "epoch": 0.3293829633338897, "grad_norm": 44.25, "learning_rate": 1.478e-05, "loss": 14.0705, "mean_token_accuracy": 0.7659166298806668, "num_tokens": 4932671.0, "step": 740 }, { "entropy": 0.8784225210547447, "epoch": 0.3338340844600234, "grad_norm": 44.0, "learning_rate": 1.498e-05, "loss": 14.1774, "mean_token_accuracy": 0.7669123791158199, "num_tokens": 4998710.0, "step": 750 }, { "entropy": 0.8316720003262162, "epoch": 0.338285205586157, "grad_norm": 42.75, "learning_rate": 1.5180000000000002e-05, "loss": 13.3239, "mean_token_accuracy": 0.7748636573553085, "num_tokens": 5066948.0, "step": 760 }, { "entropy": 0.8594924572855234, "epoch": 0.34273632671229065, "grad_norm": 49.5, "learning_rate": 1.5380000000000002e-05, "loss": 13.8153, "mean_token_accuracy": 0.7711022242903709, "num_tokens": 5129950.0, "step": 770 }, { "entropy": 0.895938608981669, "epoch": 0.3471874478384243, "grad_norm": 48.25, "learning_rate": 1.5580000000000003e-05, "loss": 14.4493, "mean_token_accuracy": 0.7591051306575537, "num_tokens": 5193519.0, "step": 780 }, { "entropy": 0.8509046232327819, "epoch": 0.351638568964558, "grad_norm": 56.5, "learning_rate": 1.578e-05, "loss": 13.6439, "mean_token_accuracy": 0.7696408761665225, "num_tokens": 5256503.0, "step": 790 }, { "entropy": 0.9008656185120344, "epoch": 0.3560896900906916, "grad_norm": 48.75, "learning_rate": 1.5980000000000003e-05, "loss": 14.7305, "mean_token_accuracy": 0.7615581404417753, "num_tokens": 5324751.0, "step": 800 }, { "epoch": 0.3560896900906916, "eval_biology_entropy": 1.267573108136654, "eval_biology_loss": 1.2815055847167969, "eval_biology_mean_token_accuracy": 0.6854563910365105, "eval_biology_num_tokens": 5324751.0, "eval_biology_runtime": 39.7889, "eval_biology_samples_per_second": 12.566, "eval_biology_steps_per_second": 12.566, "step": 800 }, { "epoch": 0.3560896900906916, "eval_chemistry_entropy": 0.8897559930682182, "eval_chemistry_loss": 0.8585976362228394, "eval_chemistry_mean_token_accuracy": 0.7700544927716255, "eval_chemistry_num_tokens": 5324751.0, "eval_chemistry_runtime": 45.9968, "eval_chemistry_samples_per_second": 10.87, "eval_chemistry_steps_per_second": 10.87, "step": 800 }, { "entropy": 0.7946027474477887, "epoch": 0.36054081121682524, "grad_norm": 50.0, "learning_rate": 1.618e-05, "loss": 12.8437, "mean_token_accuracy": 0.7842005740851163, "num_tokens": 5391664.0, "step": 810 }, { "entropy": 0.8090571435168386, "epoch": 0.3649919323429589, "grad_norm": 45.0, "learning_rate": 1.638e-05, "loss": 12.7173, "mean_token_accuracy": 0.7849415507167578, "num_tokens": 5458519.0, "step": 820 }, { "entropy": 0.8408348582684994, "epoch": 0.3694430534690925, "grad_norm": 46.0, "learning_rate": 1.658e-05, "loss": 13.9973, "mean_token_accuracy": 0.7693964328616858, "num_tokens": 5523391.0, "step": 830 }, { "entropy": 0.8412857724353671, "epoch": 0.37389417459522617, "grad_norm": 44.0, "learning_rate": 1.6780000000000002e-05, "loss": 13.3081, "mean_token_accuracy": 0.7760034879669547, "num_tokens": 5585508.0, "step": 840 }, { "entropy": 0.836153868213296, "epoch": 0.37834529572135983, "grad_norm": 48.5, "learning_rate": 1.698e-05, "loss": 13.6747, "mean_token_accuracy": 0.775155283510685, "num_tokens": 5650755.0, "step": 850 }, { "entropy": 0.8424218002706766, "epoch": 0.38279641684749344, "grad_norm": 45.0, "learning_rate": 1.718e-05, "loss": 13.4803, "mean_token_accuracy": 0.7742194497957826, "num_tokens": 5716321.0, "step": 860 }, { "entropy": 0.883666661940515, "epoch": 0.3872475379736271, "grad_norm": 50.75, "learning_rate": 1.7380000000000003e-05, "loss": 14.1247, "mean_token_accuracy": 0.7642681807279587, "num_tokens": 5782698.0, "step": 870 }, { "entropy": 0.8131563207134604, "epoch": 0.39169865909976076, "grad_norm": 39.0, "learning_rate": 1.758e-05, "loss": 13.2211, "mean_token_accuracy": 0.7782290887087584, "num_tokens": 5848889.0, "step": 880 }, { "entropy": 0.8608601313084364, "epoch": 0.3961497802258944, "grad_norm": 44.0, "learning_rate": 1.7780000000000003e-05, "loss": 13.9079, "mean_token_accuracy": 0.7717852048575878, "num_tokens": 5912919.0, "step": 890 }, { "entropy": 0.8315491866320371, "epoch": 0.400600901352028, "grad_norm": 42.25, "learning_rate": 1.798e-05, "loss": 13.4828, "mean_token_accuracy": 0.7756550934165716, "num_tokens": 5980229.0, "step": 900 }, { "epoch": 0.400600901352028, "eval_biology_entropy": 1.2335028433203696, "eval_biology_loss": 1.2846676111221313, "eval_biology_mean_token_accuracy": 0.6853172712922097, "eval_biology_num_tokens": 5980229.0, "eval_biology_runtime": 39.7136, "eval_biology_samples_per_second": 12.59, "eval_biology_steps_per_second": 12.59, "step": 900 }, { "epoch": 0.400600901352028, "eval_chemistry_entropy": 0.8548561576008796, "eval_chemistry_loss": 0.8478842973709106, "eval_chemistry_mean_token_accuracy": 0.773112800002098, "eval_chemistry_num_tokens": 5980229.0, "eval_chemistry_runtime": 45.9488, "eval_chemistry_samples_per_second": 10.882, "eval_chemistry_steps_per_second": 10.882, "step": 900 }, { "entropy": 0.7898269753903151, "epoch": 0.4050520224781617, "grad_norm": 40.25, "learning_rate": 1.8180000000000002e-05, "loss": 12.7209, "mean_token_accuracy": 0.784660654142499, "num_tokens": 6048508.0, "step": 910 }, { "entropy": 0.8143722828477621, "epoch": 0.40950314360429535, "grad_norm": 44.5, "learning_rate": 1.8380000000000004e-05, "loss": 13.2591, "mean_token_accuracy": 0.7791141759604215, "num_tokens": 6114855.0, "step": 920 }, { "entropy": 0.8494924793019891, "epoch": 0.41395426473042896, "grad_norm": 40.0, "learning_rate": 1.858e-05, "loss": 13.506, "mean_token_accuracy": 0.776913444697857, "num_tokens": 6179050.0, "step": 930 }, { "entropy": 0.8335931519046426, "epoch": 0.4184053858565626, "grad_norm": 45.25, "learning_rate": 1.878e-05, "loss": 13.6217, "mean_token_accuracy": 0.7724651444703341, "num_tokens": 6244987.0, "step": 940 }, { "entropy": 0.8118610519915819, "epoch": 0.4228565069826963, "grad_norm": 49.75, "learning_rate": 1.898e-05, "loss": 13.0423, "mean_token_accuracy": 0.7783929593861103, "num_tokens": 6310799.0, "step": 950 }, { "entropy": 0.8028015844523907, "epoch": 0.4273076281088299, "grad_norm": 48.25, "learning_rate": 1.918e-05, "loss": 13.0412, "mean_token_accuracy": 0.778607621230185, "num_tokens": 6378152.0, "step": 960 }, { "entropy": 0.8370830919593573, "epoch": 0.43175874923496355, "grad_norm": 44.75, "learning_rate": 1.938e-05, "loss": 13.2798, "mean_token_accuracy": 0.7766136281192303, "num_tokens": 6442491.0, "step": 970 }, { "entropy": 0.862270618416369, "epoch": 0.4362098703610972, "grad_norm": 39.5, "learning_rate": 1.9580000000000002e-05, "loss": 14.0144, "mean_token_accuracy": 0.7675615277141332, "num_tokens": 6510231.0, "step": 980 }, { "entropy": 0.8360511595383286, "epoch": 0.44066099148723087, "grad_norm": 48.0, "learning_rate": 1.978e-05, "loss": 13.4815, "mean_token_accuracy": 0.7760949255898595, "num_tokens": 6572858.0, "step": 990 }, { "entropy": 0.8388594528660178, "epoch": 0.4451121126133645, "grad_norm": 43.25, "learning_rate": 1.9980000000000002e-05, "loss": 13.6609, "mean_token_accuracy": 0.7700441874563694, "num_tokens": 6637273.0, "step": 1000 }, { "epoch": 0.4451121126133645, "eval_biology_entropy": 1.187324990749359, "eval_biology_loss": 1.2962696552276611, "eval_biology_mean_token_accuracy": 0.6844774860739707, "eval_biology_num_tokens": 6637273.0, "eval_biology_runtime": 39.733, "eval_biology_samples_per_second": 12.584, "eval_biology_steps_per_second": 12.584, "step": 1000 }, { "epoch": 0.4451121126133645, "eval_chemistry_entropy": 0.8146472455263137, "eval_chemistry_loss": 0.8452854156494141, "eval_chemistry_mean_token_accuracy": 0.7733621709942817, "eval_chemistry_num_tokens": 6637273.0, "eval_chemistry_runtime": 46.1309, "eval_chemistry_samples_per_second": 10.839, "eval_chemistry_steps_per_second": 10.839, "step": 1000 }, { "entropy": 0.8283287117257714, "epoch": 0.44956323373949814, "grad_norm": 45.25, "learning_rate": 1.9980000000000002e-05, "loss": 13.2785, "mean_token_accuracy": 0.7738786302506924, "num_tokens": 6700799.0, "step": 1010 }, { "entropy": 0.8240121186710894, "epoch": 0.4540143548656318, "grad_norm": 43.0, "learning_rate": 1.995777777777778e-05, "loss": 13.5714, "mean_token_accuracy": 0.7778675271198153, "num_tokens": 6765798.0, "step": 1020 }, { "entropy": 0.8027254937216639, "epoch": 0.4584654759917654, "grad_norm": 45.0, "learning_rate": 1.9935555555555557e-05, "loss": 12.958, "mean_token_accuracy": 0.7836722049862146, "num_tokens": 6832774.0, "step": 1030 }, { "entropy": 0.8506452234461903, "epoch": 0.46291659711789906, "grad_norm": 40.75, "learning_rate": 1.9913333333333335e-05, "loss": 13.6169, "mean_token_accuracy": 0.7709769554436207, "num_tokens": 6896684.0, "step": 1040 }, { "entropy": 0.7970458004623652, "epoch": 0.4673677182440327, "grad_norm": 49.5, "learning_rate": 1.9891111111111112e-05, "loss": 13.1299, "mean_token_accuracy": 0.7849638484418392, "num_tokens": 6964423.0, "step": 1050 }, { "entropy": 0.811302705295384, "epoch": 0.4718188393701664, "grad_norm": 37.5, "learning_rate": 1.986888888888889e-05, "loss": 13.1097, "mean_token_accuracy": 0.7798706620931626, "num_tokens": 7033417.0, "step": 1060 }, { "entropy": 0.8476884752511978, "epoch": 0.4762699604963, "grad_norm": 44.5, "learning_rate": 1.9846666666666668e-05, "loss": 13.6585, "mean_token_accuracy": 0.7750785838812589, "num_tokens": 7101274.0, "step": 1070 }, { "entropy": 0.8404533293098211, "epoch": 0.48072108162243365, "grad_norm": 39.5, "learning_rate": 1.9824444444444445e-05, "loss": 13.6161, "mean_token_accuracy": 0.7751947242766619, "num_tokens": 7166608.0, "step": 1080 }, { "entropy": 0.7973418578505516, "epoch": 0.4851722027485673, "grad_norm": 58.5, "learning_rate": 1.9802222222222226e-05, "loss": 12.8893, "mean_token_accuracy": 0.7859396133571863, "num_tokens": 7240958.0, "step": 1090 }, { "entropy": 0.8283490337431431, "epoch": 0.4896233238747009, "grad_norm": 43.0, "learning_rate": 1.978e-05, "loss": 13.2474, "mean_token_accuracy": 0.7790504809468984, "num_tokens": 7311968.0, "step": 1100 }, { "epoch": 0.4896233238747009, "eval_biology_entropy": 1.2187969796061515, "eval_biology_loss": 1.297374963760376, "eval_biology_mean_token_accuracy": 0.6832944719195366, "eval_biology_num_tokens": 7311968.0, "eval_biology_runtime": 40.0896, "eval_biology_samples_per_second": 12.472, "eval_biology_steps_per_second": 12.472, "step": 1100 }, { "epoch": 0.4896233238747009, "eval_chemistry_entropy": 0.8075549347996712, "eval_chemistry_loss": 0.8330864310264587, "eval_chemistry_mean_token_accuracy": 0.7758210087418557, "eval_chemistry_num_tokens": 7311968.0, "eval_chemistry_runtime": 46.2026, "eval_chemistry_samples_per_second": 10.822, "eval_chemistry_steps_per_second": 10.822, "step": 1100 }, { "entropy": 0.8035251742228866, "epoch": 0.4940744450008346, "grad_norm": 42.25, "learning_rate": 1.975777777777778e-05, "loss": 13.0881, "mean_token_accuracy": 0.7785759992897511, "num_tokens": 7378961.0, "step": 1110 }, { "entropy": 0.7911803729832172, "epoch": 0.49852556612696824, "grad_norm": 47.0, "learning_rate": 1.9735555555555556e-05, "loss": 12.8175, "mean_token_accuracy": 0.7857681257650256, "num_tokens": 7444923.0, "step": 1120 }, { "entropy": 0.8341826571151614, "epoch": 0.5029766872531019, "grad_norm": 42.0, "learning_rate": 1.9713333333333337e-05, "loss": 13.2652, "mean_token_accuracy": 0.7798049133270979, "num_tokens": 7509885.0, "step": 1130 }, { "entropy": 0.8292949998751282, "epoch": 0.5074278083792355, "grad_norm": 42.25, "learning_rate": 1.969111111111111e-05, "loss": 13.6776, "mean_token_accuracy": 0.7743924837559462, "num_tokens": 7576456.0, "step": 1140 }, { "entropy": 0.867630060762167, "epoch": 0.5118789295053692, "grad_norm": 44.25, "learning_rate": 1.9668888888888892e-05, "loss": 13.874, "mean_token_accuracy": 0.7730787601321936, "num_tokens": 7641821.0, "step": 1150 }, { "entropy": 0.800755001604557, "epoch": 0.5163300506315028, "grad_norm": 47.0, "learning_rate": 1.9646666666666666e-05, "loss": 12.939, "mean_token_accuracy": 0.7809992711991072, "num_tokens": 7706502.0, "step": 1160 }, { "entropy": 0.8834998097270728, "epoch": 0.5207811717576365, "grad_norm": 48.0, "learning_rate": 1.9624444444444447e-05, "loss": 14.3515, "mean_token_accuracy": 0.7673096172511578, "num_tokens": 7772353.0, "step": 1170 }, { "entropy": 0.8068421924486756, "epoch": 0.52523229288377, "grad_norm": 38.25, "learning_rate": 1.9602222222222225e-05, "loss": 13.219, "mean_token_accuracy": 0.7816231641918421, "num_tokens": 7838516.0, "step": 1180 }, { "entropy": 0.8032166380435228, "epoch": 0.5296834140099037, "grad_norm": 52.25, "learning_rate": 1.9580000000000002e-05, "loss": 12.8455, "mean_token_accuracy": 0.7809245727956295, "num_tokens": 7902568.0, "step": 1190 }, { "entropy": 0.8334995551034808, "epoch": 0.5341345351360374, "grad_norm": 42.25, "learning_rate": 1.955777777777778e-05, "loss": 13.3201, "mean_token_accuracy": 0.7768194541335106, "num_tokens": 7969704.0, "step": 1200 }, { "epoch": 0.5341345351360374, "eval_biology_entropy": 1.2117234426736831, "eval_biology_loss": 1.300293207168579, "eval_biology_mean_token_accuracy": 0.6825520681738854, "eval_biology_num_tokens": 7969704.0, "eval_biology_runtime": 39.4328, "eval_biology_samples_per_second": 12.68, "eval_biology_steps_per_second": 12.68, "step": 1200 }, { "epoch": 0.5341345351360374, "eval_chemistry_entropy": 0.7991210364103317, "eval_chemistry_loss": 0.8259859681129456, "eval_chemistry_mean_token_accuracy": 0.7772398797273636, "eval_chemistry_num_tokens": 7969704.0, "eval_chemistry_runtime": 45.8311, "eval_chemistry_samples_per_second": 10.91, "eval_chemistry_steps_per_second": 10.91, "step": 1200 }, { "entropy": 0.7765231873840094, "epoch": 0.538585656262171, "grad_norm": 41.25, "learning_rate": 1.9535555555555557e-05, "loss": 12.6868, "mean_token_accuracy": 0.7813247825950385, "num_tokens": 8034765.0, "step": 1210 }, { "entropy": 0.8038391519337893, "epoch": 0.5430367773883047, "grad_norm": 35.5, "learning_rate": 1.9513333333333335e-05, "loss": 13.2505, "mean_token_accuracy": 0.7795963916927576, "num_tokens": 8103869.0, "step": 1220 }, { "entropy": 0.7666595270857215, "epoch": 0.5474878985144384, "grad_norm": 41.25, "learning_rate": 1.9491111111111113e-05, "loss": 12.3886, "mean_token_accuracy": 0.7900463610887527, "num_tokens": 8173386.0, "step": 1230 }, { "entropy": 0.8262818416580557, "epoch": 0.551939019640572, "grad_norm": 37.0, "learning_rate": 1.946888888888889e-05, "loss": 13.2345, "mean_token_accuracy": 0.7755838381126523, "num_tokens": 8242162.0, "step": 1240 }, { "entropy": 0.8054397076368331, "epoch": 0.5563901407667056, "grad_norm": 48.5, "learning_rate": 1.9446666666666668e-05, "loss": 13.0502, "mean_token_accuracy": 0.7804440699517727, "num_tokens": 8308919.0, "step": 1250 }, { "entropy": 0.811793964356184, "epoch": 0.5608412618928392, "grad_norm": 49.25, "learning_rate": 1.9424444444444446e-05, "loss": 13.3739, "mean_token_accuracy": 0.7758540976792574, "num_tokens": 8365617.0, "step": 1260 }, { "entropy": 0.8213885102421046, "epoch": 0.5652923830189729, "grad_norm": 37.5, "learning_rate": 1.9402222222222223e-05, "loss": 13.2316, "mean_token_accuracy": 0.7780250526964665, "num_tokens": 8430728.0, "step": 1270 }, { "entropy": 0.7969466263428331, "epoch": 0.5697435041451065, "grad_norm": 34.25, "learning_rate": 1.938e-05, "loss": 12.6971, "mean_token_accuracy": 0.7868836035951972, "num_tokens": 8497852.0, "step": 1280 }, { "entropy": 0.8166826661676169, "epoch": 0.5741946252712402, "grad_norm": 43.75, "learning_rate": 1.935777777777778e-05, "loss": 13.4076, "mean_token_accuracy": 0.7757051605731249, "num_tokens": 8558952.0, "step": 1290 }, { "entropy": 0.8129263132810592, "epoch": 0.5786457463973739, "grad_norm": 44.5, "learning_rate": 1.9335555555555556e-05, "loss": 13.1537, "mean_token_accuracy": 0.7816494394093751, "num_tokens": 8622868.0, "step": 1300 }, { "epoch": 0.5786457463973739, "eval_biology_entropy": 1.2590930373072624, "eval_biology_loss": 1.3004297018051147, "eval_biology_mean_token_accuracy": 0.6824918667078018, "eval_biology_num_tokens": 8622868.0, "eval_biology_runtime": 39.5147, "eval_biology_samples_per_second": 12.654, "eval_biology_steps_per_second": 12.654, "step": 1300 }, { "epoch": 0.5786457463973739, "eval_chemistry_entropy": 0.8134206305742264, "eval_chemistry_loss": 0.8170297741889954, "eval_chemistry_mean_token_accuracy": 0.7793359256386757, "eval_chemistry_num_tokens": 8622868.0, "eval_chemistry_runtime": 45.8445, "eval_chemistry_samples_per_second": 10.906, "eval_chemistry_steps_per_second": 10.906, "step": 1300 }, { "entropy": 0.7748439759016037, "epoch": 0.5830968675235075, "grad_norm": 40.25, "learning_rate": 1.9313333333333334e-05, "loss": 12.585, "mean_token_accuracy": 0.7868875458836555, "num_tokens": 8694510.0, "step": 1310 }, { "entropy": 0.8616355959326029, "epoch": 0.5875479886496411, "grad_norm": 49.25, "learning_rate": 1.9291111111111115e-05, "loss": 14.0344, "mean_token_accuracy": 0.766650452464819, "num_tokens": 8757753.0, "step": 1320 }, { "entropy": 0.8312898099422454, "epoch": 0.5919991097757747, "grad_norm": 44.0, "learning_rate": 1.926888888888889e-05, "loss": 13.3145, "mean_token_accuracy": 0.7796449743211269, "num_tokens": 8823153.0, "step": 1330 }, { "entropy": 0.8332564871758222, "epoch": 0.5964502309019084, "grad_norm": 41.5, "learning_rate": 1.924666666666667e-05, "loss": 13.599, "mean_token_accuracy": 0.7727281775325536, "num_tokens": 8886401.0, "step": 1340 }, { "entropy": 0.8084427203983069, "epoch": 0.6009013520280421, "grad_norm": 36.75, "learning_rate": 1.9224444444444444e-05, "loss": 12.8022, "mean_token_accuracy": 0.7834546566009521, "num_tokens": 8958530.0, "step": 1350 }, { "entropy": 0.8060288658365607, "epoch": 0.6053524731541757, "grad_norm": 41.0, "learning_rate": 1.9202222222222225e-05, "loss": 13.1094, "mean_token_accuracy": 0.7809524293988943, "num_tokens": 9024677.0, "step": 1360 }, { "entropy": 0.8401720520108938, "epoch": 0.6098035942803094, "grad_norm": 40.75, "learning_rate": 1.918e-05, "loss": 13.5138, "mean_token_accuracy": 0.7706755470484495, "num_tokens": 9090280.0, "step": 1370 }, { "entropy": 0.798151072487235, "epoch": 0.614254715406443, "grad_norm": 43.5, "learning_rate": 1.915777777777778e-05, "loss": 12.9648, "mean_token_accuracy": 0.7831501496955753, "num_tokens": 9153771.0, "step": 1380 }, { "entropy": 0.7643128799274563, "epoch": 0.6187058365325766, "grad_norm": 39.25, "learning_rate": 1.9135555555555555e-05, "loss": 12.2452, "mean_token_accuracy": 0.7925934199243784, "num_tokens": 9227117.0, "step": 1390 }, { "entropy": 0.8085228271782399, "epoch": 0.6231569576587103, "grad_norm": 43.0, "learning_rate": 1.9113333333333336e-05, "loss": 13.2475, "mean_token_accuracy": 0.7788113884627819, "num_tokens": 9291760.0, "step": 1400 }, { "epoch": 0.6231569576587103, "eval_biology_entropy": 1.2567389221787453, "eval_biology_loss": 1.3026176691055298, "eval_biology_mean_token_accuracy": 0.6818688949346542, "eval_biology_num_tokens": 9291760.0, "eval_biology_runtime": 39.2644, "eval_biology_samples_per_second": 12.734, "eval_biology_steps_per_second": 12.734, "step": 1400 }, { "epoch": 0.6231569576587103, "eval_chemistry_entropy": 0.8002244250178338, "eval_chemistry_loss": 0.8070799708366394, "eval_chemistry_mean_token_accuracy": 0.7815486862063408, "eval_chemistry_num_tokens": 9291760.0, "eval_chemistry_runtime": 55.9364, "eval_chemistry_samples_per_second": 8.939, "eval_chemistry_steps_per_second": 8.939, "step": 1400 }, { "entropy": 0.856862205825746, "epoch": 0.6276080787848439, "grad_norm": 43.75, "learning_rate": 1.9091111111111113e-05, "loss": 13.4277, "mean_token_accuracy": 0.7707107689231634, "num_tokens": 9357561.0, "step": 1410 }, { "entropy": 0.7735245639458299, "epoch": 0.6320591999109776, "grad_norm": 48.0, "learning_rate": 1.906888888888889e-05, "loss": 12.943, "mean_token_accuracy": 0.7830769792199135, "num_tokens": 9425022.0, "step": 1420 }, { "entropy": 0.8000387817621231, "epoch": 0.6365103210371112, "grad_norm": 49.0, "learning_rate": 1.904666666666667e-05, "loss": 12.637, "mean_token_accuracy": 0.7861681949347258, "num_tokens": 9492028.0, "step": 1430 }, { "entropy": 0.7522478165104985, "epoch": 0.6409614421632449, "grad_norm": 42.0, "learning_rate": 1.9024444444444446e-05, "loss": 12.2016, "mean_token_accuracy": 0.7882154919207096, "num_tokens": 9561091.0, "step": 1440 }, { "entropy": 0.7836794227361679, "epoch": 0.6454125632893785, "grad_norm": 51.75, "learning_rate": 1.9002222222222224e-05, "loss": 12.744, "mean_token_accuracy": 0.7859560146927833, "num_tokens": 9628383.0, "step": 1450 }, { "entropy": 0.7407985650002956, "epoch": 0.6498636844155121, "grad_norm": 34.0, "learning_rate": 1.898e-05, "loss": 11.8696, "mean_token_accuracy": 0.7983961008489132, "num_tokens": 9695723.0, "step": 1460 }, { "entropy": 0.7636277657002211, "epoch": 0.6543148055416458, "grad_norm": 38.75, "learning_rate": 1.895777777777778e-05, "loss": 12.4201, "mean_token_accuracy": 0.7915539544075727, "num_tokens": 9763393.0, "step": 1470 }, { "entropy": 0.8254500133916736, "epoch": 0.6587659266677794, "grad_norm": 38.25, "learning_rate": 1.8935555555555556e-05, "loss": 13.0763, "mean_token_accuracy": 0.7825288005173207, "num_tokens": 9827272.0, "step": 1480 }, { "entropy": 0.7628985194489359, "epoch": 0.6632170477939131, "grad_norm": 35.75, "learning_rate": 1.8913333333333334e-05, "loss": 12.4827, "mean_token_accuracy": 0.7848137805238367, "num_tokens": 9892418.0, "step": 1490 }, { "entropy": 0.7648764431476593, "epoch": 0.6676681689200468, "grad_norm": 40.25, "learning_rate": 1.8891111111111115e-05, "loss": 12.3863, "mean_token_accuracy": 0.7885230954736471, "num_tokens": 9955369.0, "step": 1500 }, { "epoch": 0.6676681689200468, "eval_biology_entropy": 1.232845685839653, "eval_biology_loss": 1.299937129020691, "eval_biology_mean_token_accuracy": 0.6833411865234374, "eval_biology_num_tokens": 9955369.0, "eval_biology_runtime": 39.6042, "eval_biology_samples_per_second": 12.625, "eval_biology_steps_per_second": 12.625, "step": 1500 }, { "epoch": 0.6676681689200468, "eval_chemistry_entropy": 0.7862835813760758, "eval_chemistry_loss": 0.7996346354484558, "eval_chemistry_mean_token_accuracy": 0.7832771391272545, "eval_chemistry_num_tokens": 9955369.0, "eval_chemistry_runtime": 46.138, "eval_chemistry_samples_per_second": 10.837, "eval_chemistry_steps_per_second": 10.837, "step": 1500 }, { "entropy": 0.7809047346934677, "epoch": 0.6721192900461804, "grad_norm": 41.25, "learning_rate": 1.886888888888889e-05, "loss": 12.5052, "mean_token_accuracy": 0.7869493119418621, "num_tokens": 10026298.0, "step": 1510 }, { "entropy": 0.7423510169610381, "epoch": 0.676570411172314, "grad_norm": 36.0, "learning_rate": 1.884666666666667e-05, "loss": 11.9187, "mean_token_accuracy": 0.7974143566563725, "num_tokens": 10096065.0, "step": 1520 }, { "entropy": 0.771459529362619, "epoch": 0.6810215322984476, "grad_norm": 40.5, "learning_rate": 1.8824444444444445e-05, "loss": 12.7462, "mean_token_accuracy": 0.7863322600722313, "num_tokens": 10160745.0, "step": 1530 }, { "entropy": 0.7509715856052935, "epoch": 0.6854726534245813, "grad_norm": 35.75, "learning_rate": 1.8802222222222226e-05, "loss": 11.927, "mean_token_accuracy": 0.7947466436773538, "num_tokens": 10233806.0, "step": 1540 }, { "entropy": 0.7742771266028285, "epoch": 0.689923774550715, "grad_norm": 42.5, "learning_rate": 1.878e-05, "loss": 12.5918, "mean_token_accuracy": 0.7863378578796982, "num_tokens": 10298845.0, "step": 1550 }, { "entropy": 0.7730063889175653, "epoch": 0.6943748956768486, "grad_norm": 38.25, "learning_rate": 1.875777777777778e-05, "loss": 12.4047, "mean_token_accuracy": 0.7885195638984441, "num_tokens": 10364601.0, "step": 1560 }, { "entropy": 0.7708892775699496, "epoch": 0.6988260168029823, "grad_norm": 40.75, "learning_rate": 1.873555555555556e-05, "loss": 12.6199, "mean_token_accuracy": 0.7856390193104744, "num_tokens": 10434433.0, "step": 1570 }, { "entropy": 0.7958233149722218, "epoch": 0.703277137929116, "grad_norm": 43.75, "learning_rate": 1.8713333333333336e-05, "loss": 12.8595, "mean_token_accuracy": 0.7833300601691008, "num_tokens": 10500891.0, "step": 1580 }, { "entropy": 0.7902805691584944, "epoch": 0.7077282590552495, "grad_norm": 40.0, "learning_rate": 1.8691111111111114e-05, "loss": 12.8921, "mean_token_accuracy": 0.7834575500339269, "num_tokens": 10567784.0, "step": 1590 }, { "entropy": 0.7975674813613296, "epoch": 0.7121793801813832, "grad_norm": 38.5, "learning_rate": 1.866888888888889e-05, "loss": 12.4954, "mean_token_accuracy": 0.7869599737226963, "num_tokens": 10633325.0, "step": 1600 }, { "epoch": 0.7121793801813832, "eval_biology_entropy": 1.2419219986200332, "eval_biology_loss": 1.3013286590576172, "eval_biology_mean_token_accuracy": 0.6823853524923325, "eval_biology_num_tokens": 10633325.0, "eval_biology_runtime": 40.8294, "eval_biology_samples_per_second": 12.246, "eval_biology_steps_per_second": 12.246, "step": 1600 }, { "epoch": 0.7121793801813832, "eval_chemistry_entropy": 0.7956894148588181, "eval_chemistry_loss": 0.7937653660774231, "eval_chemistry_mean_token_accuracy": 0.7838373360037804, "eval_chemistry_num_tokens": 10633325.0, "eval_chemistry_runtime": 46.4829, "eval_chemistry_samples_per_second": 10.757, "eval_chemistry_steps_per_second": 10.757, "step": 1600 }, { "entropy": 0.7231507489457727, "epoch": 0.7166305013075168, "grad_norm": 44.25, "learning_rate": 1.864666666666667e-05, "loss": 11.7926, "mean_token_accuracy": 0.7977068889886141, "num_tokens": 10699737.0, "step": 1610 }, { "entropy": 0.7011674824170768, "epoch": 0.7210816224336505, "grad_norm": 47.25, "learning_rate": 1.8624444444444446e-05, "loss": 11.2514, "mean_token_accuracy": 0.8032249186187983, "num_tokens": 10764319.0, "step": 1620 }, { "entropy": 0.7829309536144138, "epoch": 0.7255327435597841, "grad_norm": 37.0, "learning_rate": 1.8602222222222224e-05, "loss": 12.7194, "mean_token_accuracy": 0.7839431796222925, "num_tokens": 10831617.0, "step": 1630 }, { "entropy": 0.7741082075983285, "epoch": 0.7299838646859178, "grad_norm": 44.0, "learning_rate": 1.858e-05, "loss": 12.5457, "mean_token_accuracy": 0.7877275109291076, "num_tokens": 10897916.0, "step": 1640 }, { "entropy": 0.7683334495872259, "epoch": 0.7344349858120514, "grad_norm": 40.0, "learning_rate": 1.855777777777778e-05, "loss": 12.3848, "mean_token_accuracy": 0.7887299537658692, "num_tokens": 10965483.0, "step": 1650 }, { "entropy": 0.7886540442705154, "epoch": 0.738886106938185, "grad_norm": 37.5, "learning_rate": 1.8535555555555557e-05, "loss": 12.7194, "mean_token_accuracy": 0.7842936536297203, "num_tokens": 11030485.0, "step": 1660 }, { "entropy": 0.7601794632151723, "epoch": 0.7433372280643187, "grad_norm": 42.5, "learning_rate": 1.8513333333333335e-05, "loss": 12.345, "mean_token_accuracy": 0.789710770919919, "num_tokens": 11097108.0, "step": 1670 }, { "entropy": 0.7141751017421484, "epoch": 0.7477883491904523, "grad_norm": 40.5, "learning_rate": 1.8491111111111112e-05, "loss": 11.629, "mean_token_accuracy": 0.8011402323842048, "num_tokens": 11165356.0, "step": 1680 }, { "entropy": 0.7918755512684583, "epoch": 0.752239470316586, "grad_norm": 36.0, "learning_rate": 1.846888888888889e-05, "loss": 12.6792, "mean_token_accuracy": 0.7855523183941842, "num_tokens": 11234429.0, "step": 1690 }, { "entropy": 0.7715185107663274, "epoch": 0.7566905914427197, "grad_norm": 40.75, "learning_rate": 1.8446666666666667e-05, "loss": 12.3163, "mean_token_accuracy": 0.7902058430016041, "num_tokens": 11299223.0, "step": 1700 }, { "epoch": 0.7566905914427197, "eval_biology_entropy": 1.2339779297113418, "eval_biology_loss": 1.3050655126571655, "eval_biology_mean_token_accuracy": 0.6822574281096458, "eval_biology_num_tokens": 11299223.0, "eval_biology_runtime": 39.9731, "eval_biology_samples_per_second": 12.508, "eval_biology_steps_per_second": 12.508, "step": 1700 }, { "epoch": 0.7566905914427197, "eval_chemistry_entropy": 0.7556659379005433, "eval_chemistry_loss": 0.7893115282058716, "eval_chemistry_mean_token_accuracy": 0.7856479023098946, "eval_chemistry_num_tokens": 11299223.0, "eval_chemistry_runtime": 48.0592, "eval_chemistry_samples_per_second": 10.404, "eval_chemistry_steps_per_second": 10.404, "step": 1700 }, { "entropy": 0.7491960693150759, "epoch": 0.7611417125688533, "grad_norm": 37.5, "learning_rate": 1.842444444444445e-05, "loss": 12.221, "mean_token_accuracy": 0.7910046689212322, "num_tokens": 11371200.0, "step": 1710 }, { "entropy": 0.7779044238850474, "epoch": 0.7655928336949869, "grad_norm": 36.0, "learning_rate": 1.8402222222222223e-05, "loss": 12.6537, "mean_token_accuracy": 0.7857811234891414, "num_tokens": 11436799.0, "step": 1720 }, { "entropy": 0.7611921314150095, "epoch": 0.7700439548211205, "grad_norm": 46.25, "learning_rate": 1.8380000000000004e-05, "loss": 12.1793, "mean_token_accuracy": 0.7942970298230648, "num_tokens": 11502965.0, "step": 1730 }, { "entropy": 0.7916558500379324, "epoch": 0.7744950759472542, "grad_norm": 39.5, "learning_rate": 1.8357777777777778e-05, "loss": 12.728, "mean_token_accuracy": 0.7856421928852797, "num_tokens": 11567422.0, "step": 1740 }, { "entropy": 0.7364842056296765, "epoch": 0.7789461970733879, "grad_norm": 30.625, "learning_rate": 1.833555555555556e-05, "loss": 12.1674, "mean_token_accuracy": 0.791620584949851, "num_tokens": 11637076.0, "step": 1750 }, { "entropy": 0.8320787468925118, "epoch": 0.7833973181995215, "grad_norm": 38.0, "learning_rate": 1.8313333333333333e-05, "loss": 13.1941, "mean_token_accuracy": 0.7748327614739537, "num_tokens": 11703496.0, "step": 1760 }, { "entropy": 0.7116687665693462, "epoch": 0.7878484393256552, "grad_norm": 29.625, "learning_rate": 1.8291111111111114e-05, "loss": 11.5027, "mean_token_accuracy": 0.8015623264014721, "num_tokens": 11772773.0, "step": 1770 }, { "entropy": 0.7382532864809036, "epoch": 0.7922995604517888, "grad_norm": 37.25, "learning_rate": 1.8268888888888888e-05, "loss": 12.107, "mean_token_accuracy": 0.7927792508155107, "num_tokens": 11840317.0, "step": 1780 }, { "entropy": 0.8406919915229082, "epoch": 0.7967506815779224, "grad_norm": 40.75, "learning_rate": 1.824666666666667e-05, "loss": 13.4841, "mean_token_accuracy": 0.7751987297087908, "num_tokens": 11903376.0, "step": 1790 }, { "entropy": 0.7729794921353459, "epoch": 0.801201802704056, "grad_norm": 41.25, "learning_rate": 1.8224444444444447e-05, "loss": 12.3404, "mean_token_accuracy": 0.789885114133358, "num_tokens": 11965530.0, "step": 1800 }, { "epoch": 0.801201802704056, "eval_biology_entropy": 1.238894326388836, "eval_biology_loss": 1.3023967742919922, "eval_biology_mean_token_accuracy": 0.6827202830314636, "eval_biology_num_tokens": 11965530.0, "eval_biology_runtime": 39.8685, "eval_biology_samples_per_second": 12.541, "eval_biology_steps_per_second": 12.541, "step": 1800 }, { "epoch": 0.801201802704056, "eval_chemistry_entropy": 0.7590603602528572, "eval_chemistry_loss": 0.7831795811653137, "eval_chemistry_mean_token_accuracy": 0.7866261592507362, "eval_chemistry_num_tokens": 11965530.0, "eval_chemistry_runtime": 48.0982, "eval_chemistry_samples_per_second": 10.395, "eval_chemistry_steps_per_second": 10.395, "step": 1800 }, { "entropy": 0.7708059819415212, "epoch": 0.8056529238301897, "grad_norm": 43.25, "learning_rate": 1.8202222222222225e-05, "loss": 12.5294, "mean_token_accuracy": 0.7862987028434872, "num_tokens": 12031493.0, "step": 1810 }, { "entropy": 0.7691075187176466, "epoch": 0.8101040449563234, "grad_norm": 34.0, "learning_rate": 1.8180000000000002e-05, "loss": 12.552, "mean_token_accuracy": 0.7872704153880477, "num_tokens": 12097917.0, "step": 1820 }, { "entropy": 0.7835509760305286, "epoch": 0.814555166082457, "grad_norm": 35.25, "learning_rate": 1.815777777777778e-05, "loss": 12.581, "mean_token_accuracy": 0.7855123173445463, "num_tokens": 12163029.0, "step": 1830 }, { "entropy": 0.7693919812329113, "epoch": 0.8190062872085907, "grad_norm": 40.25, "learning_rate": 1.8135555555555557e-05, "loss": 12.5332, "mean_token_accuracy": 0.7862321555614471, "num_tokens": 12224427.0, "step": 1840 }, { "entropy": 0.8024251624941826, "epoch": 0.8234574083347244, "grad_norm": 36.5, "learning_rate": 1.8113333333333335e-05, "loss": 12.7152, "mean_token_accuracy": 0.7867181565612554, "num_tokens": 12291917.0, "step": 1850 }, { "entropy": 0.7431695537641645, "epoch": 0.8279085294608579, "grad_norm": 37.5, "learning_rate": 1.8091111111111113e-05, "loss": 12.067, "mean_token_accuracy": 0.7929942118003964, "num_tokens": 12365332.0, "step": 1860 }, { "entropy": 0.7609774840995669, "epoch": 0.8323596505869916, "grad_norm": 32.25, "learning_rate": 1.806888888888889e-05, "loss": 12.307, "mean_token_accuracy": 0.7898187723010779, "num_tokens": 12441423.0, "step": 1870 }, { "entropy": 0.7366092208772897, "epoch": 0.8368107717131252, "grad_norm": 43.0, "learning_rate": 1.8046666666666668e-05, "loss": 11.6312, "mean_token_accuracy": 0.8000296927988529, "num_tokens": 12509124.0, "step": 1880 }, { "entropy": 0.7049085019156337, "epoch": 0.8412618928392589, "grad_norm": 34.5, "learning_rate": 1.8024444444444445e-05, "loss": 11.6414, "mean_token_accuracy": 0.7974906180053949, "num_tokens": 12575355.0, "step": 1890 }, { "entropy": 0.7814744580537081, "epoch": 0.8457130139653926, "grad_norm": 44.25, "learning_rate": 1.8002222222222223e-05, "loss": 12.6335, "mean_token_accuracy": 0.7851035960018635, "num_tokens": 12642651.0, "step": 1900 }, { "epoch": 0.8457130139653926, "eval_biology_entropy": 1.2500024722218515, "eval_biology_loss": 1.3012299537658691, "eval_biology_mean_token_accuracy": 0.6823599907159805, "eval_biology_num_tokens": 12642651.0, "eval_biology_runtime": 39.4244, "eval_biology_samples_per_second": 12.682, "eval_biology_steps_per_second": 12.682, "step": 1900 }, { "epoch": 0.8457130139653926, "eval_chemistry_entropy": 0.7712779935002327, "eval_chemistry_loss": 0.7782201170921326, "eval_chemistry_mean_token_accuracy": 0.787936452627182, "eval_chemistry_num_tokens": 12642651.0, "eval_chemistry_runtime": 45.8268, "eval_chemistry_samples_per_second": 10.911, "eval_chemistry_steps_per_second": 10.911, "step": 1900 }, { "entropy": 0.7286117426119745, "epoch": 0.8501641350915262, "grad_norm": 38.0, "learning_rate": 1.798e-05, "loss": 11.5153, "mean_token_accuracy": 0.8004030931741, "num_tokens": 12710065.0, "step": 1910 }, { "entropy": 0.7636802634224296, "epoch": 0.8546152562176598, "grad_norm": 41.75, "learning_rate": 1.7957777777777778e-05, "loss": 12.3297, "mean_token_accuracy": 0.7917378932237625, "num_tokens": 12778408.0, "step": 1920 }, { "entropy": 0.7151092055253685, "epoch": 0.8590663773437934, "grad_norm": 44.75, "learning_rate": 1.7935555555555556e-05, "loss": 11.6627, "mean_token_accuracy": 0.7980649210512638, "num_tokens": 12845949.0, "step": 1930 }, { "entropy": 0.7645737134851516, "epoch": 0.8635174984699271, "grad_norm": 41.25, "learning_rate": 1.7913333333333337e-05, "loss": 12.1823, "mean_token_accuracy": 0.7940845835953951, "num_tokens": 12910630.0, "step": 1940 }, { "entropy": 0.7626768484711647, "epoch": 0.8679686195960608, "grad_norm": 33.5, "learning_rate": 1.789111111111111e-05, "loss": 12.4, "mean_token_accuracy": 0.7878372304141521, "num_tokens": 12975379.0, "step": 1950 }, { "entropy": 0.728480844758451, "epoch": 0.8724197407221944, "grad_norm": 34.25, "learning_rate": 1.7868888888888892e-05, "loss": 11.683, "mean_token_accuracy": 0.7970448518171906, "num_tokens": 13046473.0, "step": 1960 }, { "entropy": 0.8084357729181647, "epoch": 0.8768708618483281, "grad_norm": 36.75, "learning_rate": 1.7846666666666666e-05, "loss": 13.1812, "mean_token_accuracy": 0.780071578361094, "num_tokens": 13105019.0, "step": 1970 }, { "entropy": 0.8157536951825023, "epoch": 0.8813219829744617, "grad_norm": 39.0, "learning_rate": 1.7824444444444447e-05, "loss": 12.8778, "mean_token_accuracy": 0.7852013517171145, "num_tokens": 13167768.0, "step": 1980 }, { "entropy": 0.747572572156787, "epoch": 0.8857731041005953, "grad_norm": 35.75, "learning_rate": 1.780222222222222e-05, "loss": 11.9019, "mean_token_accuracy": 0.7958117298781872, "num_tokens": 13235899.0, "step": 1990 }, { "entropy": 0.7473404568620026, "epoch": 0.890224225226729, "grad_norm": 33.25, "learning_rate": 1.7780000000000003e-05, "loss": 12.0581, "mean_token_accuracy": 0.7940714538097382, "num_tokens": 13301659.0, "step": 2000 }, { "epoch": 0.890224225226729, "eval_biology_entropy": 1.2476614614725112, "eval_biology_loss": 1.3003644943237305, "eval_biology_mean_token_accuracy": 0.682231693148613, "eval_biology_num_tokens": 13301659.0, "eval_biology_runtime": 39.7204, "eval_biology_samples_per_second": 12.588, "eval_biology_steps_per_second": 12.588, "step": 2000 }, { "epoch": 0.890224225226729, "eval_chemistry_entropy": 0.7678308563828469, "eval_chemistry_loss": 0.7745650410652161, "eval_chemistry_mean_token_accuracy": 0.7882890626788139, "eval_chemistry_num_tokens": 13301659.0, "eval_chemistry_runtime": 46.1368, "eval_chemistry_samples_per_second": 10.837, "eval_chemistry_steps_per_second": 10.837, "step": 2000 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.919343158882048e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }