{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5577254771045457, "eval_steps": 100, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.8066153490915895, "epoch": 0.004451121126133645, "grad_norm": 107.5, "learning_rate": 1.8e-07, "loss": 16.3591, "mean_token_accuracy": 0.7447933696210385, "num_tokens": 63133.0, "step": 10 }, { "entropy": 0.8401238698512316, "epoch": 0.00890224225226729, "grad_norm": 103.5, "learning_rate": 3.8e-07, "loss": 17.2936, "mean_token_accuracy": 0.7330440735444427, "num_tokens": 128968.0, "step": 20 }, { "entropy": 0.828661117143929, "epoch": 0.013353363378400934, "grad_norm": 96.5, "learning_rate": 5.800000000000001e-07, "loss": 16.5809, "mean_token_accuracy": 0.7381731692701579, "num_tokens": 193314.0, "step": 30 }, { "entropy": 0.8600894263014197, "epoch": 0.01780448450453458, "grad_norm": 96.0, "learning_rate": 7.8e-07, "loss": 17.7444, "mean_token_accuracy": 0.7324823886156082, "num_tokens": 257189.0, "step": 40 }, { "entropy": 0.8017022017389536, "epoch": 0.022255605630668224, "grad_norm": 84.5, "learning_rate": 9.800000000000001e-07, "loss": 15.9063, "mean_token_accuracy": 0.7455223135650157, "num_tokens": 324224.0, "step": 50 }, { "entropy": 0.846268966794014, "epoch": 0.026706726756801868, "grad_norm": 78.5, "learning_rate": 1.1800000000000001e-06, "loss": 16.6006, "mean_token_accuracy": 0.7411292420700193, "num_tokens": 394654.0, "step": 60 }, { "entropy": 0.8181243563070894, "epoch": 0.031157847882935515, "grad_norm": 76.5, "learning_rate": 1.3800000000000001e-06, "loss": 15.9846, "mean_token_accuracy": 0.746401545777917, "num_tokens": 461658.0, "step": 70 }, { "entropy": 0.8314600124955177, "epoch": 0.03560896900906916, "grad_norm": 75.0, "learning_rate": 1.5800000000000001e-06, "loss": 15.8227, "mean_token_accuracy": 0.7472479769960045, "num_tokens": 528285.0, "step": 80 }, { "entropy": 0.8807666478678584, "epoch": 0.04006009013520281, "grad_norm": 72.0, "learning_rate": 1.7800000000000001e-06, "loss": 16.6834, "mean_token_accuracy": 0.7396679904311896, "num_tokens": 595336.0, "step": 90 }, { "entropy": 0.8991396678611636, "epoch": 0.04451121126133645, "grad_norm": 79.5, "learning_rate": 1.98e-06, "loss": 16.3968, "mean_token_accuracy": 0.7396075185388327, "num_tokens": 663126.0, "step": 100 }, { "epoch": 0.04451121126133645, "eval_biology_entropy": 1.1188154411315918, "eval_biology_loss": 1.2691835165023804, "eval_biology_mean_token_accuracy": 0.6881385813951493, "eval_biology_num_tokens": 663126.0, "eval_biology_runtime": 40.4565, "eval_biology_samples_per_second": 12.359, "eval_biology_steps_per_second": 12.359, "step": 100 }, { "epoch": 0.04451121126133645, "eval_chemistry_entropy": 0.8804921235442161, "eval_chemistry_loss": 1.0042469501495361, "eval_chemistry_mean_token_accuracy": 0.7444319971203804, "eval_chemistry_num_tokens": 663126.0, "eval_chemistry_runtime": 46.3585, "eval_chemistry_samples_per_second": 10.786, "eval_chemistry_steps_per_second": 10.786, "step": 100 }, { "entropy": 0.8988691195845604, "epoch": 0.048962332387470095, "grad_norm": 100.5, "learning_rate": 2.1800000000000003e-06, "loss": 16.3063, "mean_token_accuracy": 0.7414613764733076, "num_tokens": 731060.0, "step": 110 }, { "entropy": 0.8918870648369193, "epoch": 0.053413453513603736, "grad_norm": 77.5, "learning_rate": 2.38e-06, "loss": 15.8311, "mean_token_accuracy": 0.7483173958957196, "num_tokens": 801635.0, "step": 120 }, { "entropy": 0.9162682231515646, "epoch": 0.05786457463973738, "grad_norm": 66.0, "learning_rate": 2.5800000000000003e-06, "loss": 16.1464, "mean_token_accuracy": 0.7448406910523773, "num_tokens": 867260.0, "step": 130 }, { "entropy": 0.9749668512493372, "epoch": 0.06231569576587103, "grad_norm": 79.5, "learning_rate": 2.7800000000000005e-06, "loss": 16.9562, "mean_token_accuracy": 0.7369577366858721, "num_tokens": 931344.0, "step": 140 }, { "entropy": 0.939477625861764, "epoch": 0.06676681689200467, "grad_norm": 56.75, "learning_rate": 2.9800000000000003e-06, "loss": 15.7327, "mean_token_accuracy": 0.7471803797408938, "num_tokens": 993586.0, "step": 150 }, { "entropy": 0.9718907386064529, "epoch": 0.07121793801813832, "grad_norm": 53.75, "learning_rate": 3.1800000000000005e-06, "loss": 16.2036, "mean_token_accuracy": 0.7392314806580543, "num_tokens": 1060812.0, "step": 160 }, { "entropy": 0.9165311623364687, "epoch": 0.07566905914427197, "grad_norm": 63.25, "learning_rate": 3.3800000000000007e-06, "loss": 15.2657, "mean_token_accuracy": 0.7518215283751488, "num_tokens": 1131832.0, "step": 170 }, { "entropy": 0.9465073021128774, "epoch": 0.08012018027040561, "grad_norm": 50.25, "learning_rate": 3.58e-06, "loss": 15.5603, "mean_token_accuracy": 0.7473610159009695, "num_tokens": 1200650.0, "step": 180 }, { "entropy": 0.9450380651280283, "epoch": 0.08457130139653925, "grad_norm": 54.0, "learning_rate": 3.7800000000000002e-06, "loss": 15.56, "mean_token_accuracy": 0.748077143356204, "num_tokens": 1265107.0, "step": 190 }, { "entropy": 0.9630168141797185, "epoch": 0.0890224225226729, "grad_norm": 58.75, "learning_rate": 3.980000000000001e-06, "loss": 15.5029, "mean_token_accuracy": 0.7486971555277705, "num_tokens": 1327380.0, "step": 200 }, { "epoch": 0.0890224225226729, "eval_biology_entropy": 1.19531831908226, "eval_biology_loss": 1.2584387063980103, "eval_biology_mean_token_accuracy": 0.6882349443435669, "eval_biology_num_tokens": 1327380.0, "eval_biology_runtime": 40.8783, "eval_biology_samples_per_second": 12.231, "eval_biology_steps_per_second": 12.231, "step": 200 }, { "epoch": 0.0890224225226729, "eval_chemistry_entropy": 0.9437598274946213, "eval_chemistry_loss": 0.9655953645706177, "eval_chemistry_mean_token_accuracy": 0.7501006088852883, "eval_chemistry_num_tokens": 1327380.0, "eval_chemistry_runtime": 46.6023, "eval_chemistry_samples_per_second": 10.729, "eval_chemistry_steps_per_second": 10.729, "step": 200 }, { "entropy": 0.9782480053603649, "epoch": 0.09347354364880654, "grad_norm": 64.0, "learning_rate": 4.18e-06, "loss": 15.9821, "mean_token_accuracy": 0.7436690799891948, "num_tokens": 1393379.0, "step": 210 }, { "entropy": 0.9402019061148167, "epoch": 0.09792466477494019, "grad_norm": 54.0, "learning_rate": 4.38e-06, "loss": 15.3729, "mean_token_accuracy": 0.7516820874065161, "num_tokens": 1460130.0, "step": 220 }, { "entropy": 0.9247835712507367, "epoch": 0.10237578590107384, "grad_norm": 54.5, "learning_rate": 4.58e-06, "loss": 15.0731, "mean_token_accuracy": 0.7559274602681398, "num_tokens": 1529183.0, "step": 230 }, { "entropy": 0.9673028320074082, "epoch": 0.10682690702720747, "grad_norm": 71.5, "learning_rate": 4.78e-06, "loss": 15.731, "mean_token_accuracy": 0.7473661951720715, "num_tokens": 1597405.0, "step": 240 }, { "entropy": 0.9974556604400278, "epoch": 0.11127802815334112, "grad_norm": 55.0, "learning_rate": 4.980000000000001e-06, "loss": 15.8962, "mean_token_accuracy": 0.7434635870158672, "num_tokens": 1661767.0, "step": 250 }, { "entropy": 0.985609365440905, "epoch": 0.11572914927947477, "grad_norm": 50.75, "learning_rate": 5.18e-06, "loss": 16.0267, "mean_token_accuracy": 0.7446854375302792, "num_tokens": 1728207.0, "step": 260 }, { "entropy": 0.9456103699281811, "epoch": 0.12018027040560841, "grad_norm": 58.0, "learning_rate": 5.380000000000001e-06, "loss": 15.3377, "mean_token_accuracy": 0.750850186496973, "num_tokens": 1796055.0, "step": 270 }, { "entropy": 0.9541573049500585, "epoch": 0.12463139153174206, "grad_norm": 56.5, "learning_rate": 5.580000000000001e-06, "loss": 15.2532, "mean_token_accuracy": 0.7530262626707553, "num_tokens": 1859684.0, "step": 280 }, { "entropy": 0.9941559780389071, "epoch": 0.1290825126578757, "grad_norm": 59.25, "learning_rate": 5.78e-06, "loss": 15.94, "mean_token_accuracy": 0.745009395852685, "num_tokens": 1921704.0, "step": 290 }, { "entropy": 0.981252990104258, "epoch": 0.13353363378400934, "grad_norm": 56.75, "learning_rate": 5.98e-06, "loss": 15.8943, "mean_token_accuracy": 0.7449573867022992, "num_tokens": 1985766.0, "step": 300 }, { "epoch": 0.13353363378400934, "eval_biology_entropy": 1.2063790675401687, "eval_biology_loss": 1.2592713832855225, "eval_biology_mean_token_accuracy": 0.6874204781055451, "eval_biology_num_tokens": 1985766.0, "eval_biology_runtime": 40.5061, "eval_biology_samples_per_second": 12.344, "eval_biology_steps_per_second": 12.344, "step": 300 }, { "epoch": 0.13353363378400934, "eval_chemistry_entropy": 0.9332761432528496, "eval_chemistry_loss": 0.9383891820907593, "eval_chemistry_mean_token_accuracy": 0.7543213546276093, "eval_chemistry_num_tokens": 1985766.0, "eval_chemistry_runtime": 46.2485, "eval_chemistry_samples_per_second": 10.811, "eval_chemistry_steps_per_second": 10.811, "step": 300 }, { "entropy": 0.9657653540372848, "epoch": 0.137984754910143, "grad_norm": 66.5, "learning_rate": 6.18e-06, "loss": 15.5291, "mean_token_accuracy": 0.7452911786735058, "num_tokens": 2049041.0, "step": 310 }, { "entropy": 0.922235100530088, "epoch": 0.14243587603627664, "grad_norm": 49.75, "learning_rate": 6.380000000000001e-06, "loss": 14.7109, "mean_token_accuracy": 0.75825478695333, "num_tokens": 2118068.0, "step": 320 }, { "entropy": 0.9044711474329233, "epoch": 0.14688699716241027, "grad_norm": 60.25, "learning_rate": 6.5800000000000005e-06, "loss": 14.5687, "mean_token_accuracy": 0.7618395145982504, "num_tokens": 2186387.0, "step": 330 }, { "entropy": 0.946225673891604, "epoch": 0.15133811828854393, "grad_norm": 52.5, "learning_rate": 6.780000000000001e-06, "loss": 15.1959, "mean_token_accuracy": 0.7535316452383996, "num_tokens": 2252650.0, "step": 340 }, { "entropy": 0.9036338411271572, "epoch": 0.15578923941467757, "grad_norm": 57.75, "learning_rate": 6.98e-06, "loss": 14.5854, "mean_token_accuracy": 0.7611672822386026, "num_tokens": 2320358.0, "step": 350 }, { "entropy": 0.9015818448737264, "epoch": 0.16024036054081123, "grad_norm": 49.5, "learning_rate": 7.180000000000001e-06, "loss": 14.5381, "mean_token_accuracy": 0.7606555309146643, "num_tokens": 2388824.0, "step": 360 }, { "entropy": 0.8864203749224544, "epoch": 0.16469148166694486, "grad_norm": 49.25, "learning_rate": 7.3800000000000005e-06, "loss": 14.1936, "mean_token_accuracy": 0.7665066320449114, "num_tokens": 2456144.0, "step": 370 }, { "entropy": 0.9866490814834833, "epoch": 0.1691426027930785, "grad_norm": 49.5, "learning_rate": 7.58e-06, "loss": 15.8412, "mean_token_accuracy": 0.7478409979492426, "num_tokens": 2515325.0, "step": 380 }, { "entropy": 0.9080646676942706, "epoch": 0.17359372391921216, "grad_norm": 48.25, "learning_rate": 7.78e-06, "loss": 14.5343, "mean_token_accuracy": 0.7594566397368908, "num_tokens": 2580490.0, "step": 390 }, { "entropy": 0.9095059128478169, "epoch": 0.1780448450453458, "grad_norm": 43.5, "learning_rate": 7.980000000000002e-06, "loss": 14.6647, "mean_token_accuracy": 0.7607249341905117, "num_tokens": 2644330.0, "step": 400 }, { "epoch": 0.1780448450453458, "eval_biology_entropy": 1.1971934199333192, "eval_biology_loss": 1.2638347148895264, "eval_biology_mean_token_accuracy": 0.6877701328396797, "eval_biology_num_tokens": 2644330.0, "eval_biology_runtime": 39.8022, "eval_biology_samples_per_second": 12.562, "eval_biology_steps_per_second": 12.562, "step": 400 }, { "epoch": 0.1780448450453458, "eval_chemistry_entropy": 0.8957328157424926, "eval_chemistry_loss": 0.9148933291435242, "eval_chemistry_mean_token_accuracy": 0.7592848987579346, "eval_chemistry_num_tokens": 2644330.0, "eval_chemistry_runtime": 46.2334, "eval_chemistry_samples_per_second": 10.815, "eval_chemistry_steps_per_second": 10.815, "step": 400 }, { "entropy": 0.8526191784068942, "epoch": 0.18249596617147945, "grad_norm": 50.0, "learning_rate": 8.18e-06, "loss": 13.7182, "mean_token_accuracy": 0.7745671790093184, "num_tokens": 2713234.0, "step": 410 }, { "entropy": 0.932603782787919, "epoch": 0.18694708729761308, "grad_norm": 47.5, "learning_rate": 8.380000000000001e-06, "loss": 15.028, "mean_token_accuracy": 0.7569812458008528, "num_tokens": 2783261.0, "step": 420 }, { "entropy": 0.9143912255764007, "epoch": 0.19139820842374672, "grad_norm": 43.0, "learning_rate": 8.580000000000001e-06, "loss": 14.7724, "mean_token_accuracy": 0.7596961252391339, "num_tokens": 2850170.0, "step": 430 }, { "entropy": 0.9113649705424904, "epoch": 0.19584932954988038, "grad_norm": 58.5, "learning_rate": 8.78e-06, "loss": 14.6432, "mean_token_accuracy": 0.760087676718831, "num_tokens": 2913700.0, "step": 440 }, { "entropy": 0.9072460785508156, "epoch": 0.200300450676014, "grad_norm": 46.5, "learning_rate": 8.98e-06, "loss": 14.609, "mean_token_accuracy": 0.7596194025129079, "num_tokens": 2981212.0, "step": 450 }, { "entropy": 0.8693920068442822, "epoch": 0.20475157180214767, "grad_norm": 53.25, "learning_rate": 9.180000000000002e-06, "loss": 13.9505, "mean_token_accuracy": 0.771946213953197, "num_tokens": 3048973.0, "step": 460 }, { "entropy": 0.9109398307278752, "epoch": 0.2092026929282813, "grad_norm": 47.25, "learning_rate": 9.38e-06, "loss": 14.614, "mean_token_accuracy": 0.7599313069134951, "num_tokens": 3117033.0, "step": 470 }, { "entropy": 0.8936059167608619, "epoch": 0.21365381405441494, "grad_norm": 50.25, "learning_rate": 9.58e-06, "loss": 14.4662, "mean_token_accuracy": 0.7608913701027632, "num_tokens": 3185255.0, "step": 480 }, { "entropy": 0.9031545946374535, "epoch": 0.2181049351805486, "grad_norm": 51.5, "learning_rate": 9.780000000000001e-06, "loss": 14.5197, "mean_token_accuracy": 0.7608289115130902, "num_tokens": 3253121.0, "step": 490 }, { "entropy": 0.8450184227898717, "epoch": 0.22255605630668224, "grad_norm": 49.5, "learning_rate": 9.980000000000001e-06, "loss": 13.4565, "mean_token_accuracy": 0.7740382503718137, "num_tokens": 3322823.0, "step": 500 }, { "epoch": 0.22255605630668224, "eval_biology_entropy": 1.2025449865460396, "eval_biology_loss": 1.267388939857483, "eval_biology_mean_token_accuracy": 0.6874357106685638, "eval_biology_num_tokens": 3322823.0, "eval_biology_runtime": 40.0827, "eval_biology_samples_per_second": 12.474, "eval_biology_steps_per_second": 12.474, "step": 500 }, { "epoch": 0.22255605630668224, "eval_chemistry_entropy": 0.8795891938209534, "eval_chemistry_loss": 0.8957814574241638, "eval_chemistry_mean_token_accuracy": 0.7629851229190826, "eval_chemistry_num_tokens": 3322823.0, "eval_chemistry_runtime": 46.3478, "eval_chemistry_samples_per_second": 10.788, "eval_chemistry_steps_per_second": 10.788, "step": 500 }, { "entropy": 0.8574318964034319, "epoch": 0.2270071774328159, "grad_norm": 45.25, "learning_rate": 1.018e-05, "loss": 13.9042, "mean_token_accuracy": 0.7678481444716454, "num_tokens": 3393393.0, "step": 510 }, { "entropy": 0.9288356432691216, "epoch": 0.23145829855894953, "grad_norm": 57.25, "learning_rate": 1.038e-05, "loss": 15.193, "mean_token_accuracy": 0.7506348451599478, "num_tokens": 3454750.0, "step": 520 }, { "entropy": 0.8564269673079252, "epoch": 0.2359094196850832, "grad_norm": 50.75, "learning_rate": 1.0580000000000002e-05, "loss": 13.7644, "mean_token_accuracy": 0.7751363463699817, "num_tokens": 3526914.0, "step": 530 }, { "entropy": 0.93802858479321, "epoch": 0.24036054081121683, "grad_norm": 45.0, "learning_rate": 1.0780000000000002e-05, "loss": 15.0494, "mean_token_accuracy": 0.7559149663895368, "num_tokens": 3589449.0, "step": 540 }, { "entropy": 0.9012869004160166, "epoch": 0.24481166193735046, "grad_norm": 50.25, "learning_rate": 1.0980000000000002e-05, "loss": 14.2344, "mean_token_accuracy": 0.7642816316336394, "num_tokens": 3655092.0, "step": 550 }, { "entropy": 0.8946880368515849, "epoch": 0.24926278306348412, "grad_norm": 57.25, "learning_rate": 1.1180000000000001e-05, "loss": 14.5832, "mean_token_accuracy": 0.7632339514791966, "num_tokens": 3719113.0, "step": 560 }, { "entropy": 0.8879899585619568, "epoch": 0.25371390418961776, "grad_norm": 44.5, "learning_rate": 1.138e-05, "loss": 14.3692, "mean_token_accuracy": 0.7596106130629778, "num_tokens": 3785282.0, "step": 570 }, { "entropy": 0.9068781601265072, "epoch": 0.2581650253157514, "grad_norm": 42.25, "learning_rate": 1.1580000000000001e-05, "loss": 14.6096, "mean_token_accuracy": 0.761234056390822, "num_tokens": 3852276.0, "step": 580 }, { "entropy": 0.8276193620637059, "epoch": 0.262616146441885, "grad_norm": 43.25, "learning_rate": 1.178e-05, "loss": 13.3892, "mean_token_accuracy": 0.7755540499463678, "num_tokens": 3925649.0, "step": 590 }, { "entropy": 0.8920110030099749, "epoch": 0.2670672675680187, "grad_norm": 52.25, "learning_rate": 1.198e-05, "loss": 14.0197, "mean_token_accuracy": 0.766492671892047, "num_tokens": 3990505.0, "step": 600 }, { "epoch": 0.2670672675680187, "eval_biology_entropy": 1.2123423200249672, "eval_biology_loss": 1.2729928493499756, "eval_biology_mean_token_accuracy": 0.6870606996417046, "eval_biology_num_tokens": 3990505.0, "eval_biology_runtime": 39.7983, "eval_biology_samples_per_second": 12.563, "eval_biology_steps_per_second": 12.563, "step": 600 }, { "epoch": 0.2670672675680187, "eval_chemistry_entropy": 0.8659857953190804, "eval_chemistry_loss": 0.8781383037567139, "eval_chemistry_mean_token_accuracy": 0.7664026654362679, "eval_chemistry_num_tokens": 3990505.0, "eval_chemistry_runtime": 46.1775, "eval_chemistry_samples_per_second": 10.828, "eval_chemistry_steps_per_second": 10.828, "step": 600 }, { "entropy": 0.8550863016396761, "epoch": 0.27151838869415235, "grad_norm": 51.25, "learning_rate": 1.218e-05, "loss": 13.8181, "mean_token_accuracy": 0.7682087656110526, "num_tokens": 4060657.0, "step": 610 }, { "entropy": 0.8537623688578606, "epoch": 0.275969509820286, "grad_norm": 42.25, "learning_rate": 1.2380000000000002e-05, "loss": 13.6601, "mean_token_accuracy": 0.772542554140091, "num_tokens": 4133119.0, "step": 620 }, { "entropy": 0.8621461872011423, "epoch": 0.2804206309464196, "grad_norm": 49.25, "learning_rate": 1.2580000000000002e-05, "loss": 13.9865, "mean_token_accuracy": 0.7680005200207234, "num_tokens": 4200051.0, "step": 630 }, { "entropy": 0.9106066713109613, "epoch": 0.2848717520725533, "grad_norm": 49.25, "learning_rate": 1.2780000000000001e-05, "loss": 14.717, "mean_token_accuracy": 0.7637291874736547, "num_tokens": 4267403.0, "step": 640 }, { "entropy": 0.868153141438961, "epoch": 0.28932287319868694, "grad_norm": 52.25, "learning_rate": 1.2980000000000001e-05, "loss": 13.8988, "mean_token_accuracy": 0.7680647127330303, "num_tokens": 4333268.0, "step": 650 }, { "entropy": 0.8451825473457575, "epoch": 0.29377399432482054, "grad_norm": 53.5, "learning_rate": 1.3180000000000001e-05, "loss": 13.5394, "mean_token_accuracy": 0.7747031616047024, "num_tokens": 4400242.0, "step": 660 }, { "entropy": 0.8826779069378972, "epoch": 0.2982251154509542, "grad_norm": 49.75, "learning_rate": 1.3380000000000002e-05, "loss": 14.0787, "mean_token_accuracy": 0.7660945057868958, "num_tokens": 4464714.0, "step": 670 }, { "entropy": 0.8183048281818628, "epoch": 0.30267623657708786, "grad_norm": 47.75, "learning_rate": 1.3580000000000002e-05, "loss": 13.4564, "mean_token_accuracy": 0.7773742496967315, "num_tokens": 4535458.0, "step": 680 }, { "entropy": 0.8536316430196166, "epoch": 0.3071273577032215, "grad_norm": 48.25, "learning_rate": 1.378e-05, "loss": 13.552, "mean_token_accuracy": 0.7754087567329406, "num_tokens": 4599099.0, "step": 690 }, { "entropy": 0.816833440028131, "epoch": 0.31157847882935513, "grad_norm": 39.0, "learning_rate": 1.398e-05, "loss": 13.1791, "mean_token_accuracy": 0.777547013387084, "num_tokens": 4662903.0, "step": 700 }, { "epoch": 0.31157847882935513, "eval_biology_entropy": 1.2325178788900375, "eval_biology_loss": 1.278289556503296, "eval_biology_mean_token_accuracy": 0.686549211382866, "eval_biology_num_tokens": 4662903.0, "eval_biology_runtime": 39.6837, "eval_biology_samples_per_second": 12.6, "eval_biology_steps_per_second": 12.6, "step": 700 }, { "epoch": 0.31157847882935513, "eval_chemistry_entropy": 0.870070047557354, "eval_chemistry_loss": 0.8674882650375366, "eval_chemistry_mean_token_accuracy": 0.7685190732479096, "eval_chemistry_num_tokens": 4662903.0, "eval_chemistry_runtime": 46.0896, "eval_chemistry_samples_per_second": 10.848, "eval_chemistry_steps_per_second": 10.848, "step": 700 }, { "entropy": 0.8818355791270733, "epoch": 0.3160295999554888, "grad_norm": 54.0, "learning_rate": 1.418e-05, "loss": 14.2266, "mean_token_accuracy": 0.7657015427947045, "num_tokens": 4729166.0, "step": 710 }, { "entropy": 0.910587764903903, "epoch": 0.32048072108162245, "grad_norm": 54.5, "learning_rate": 1.4380000000000001e-05, "loss": 14.7617, "mean_token_accuracy": 0.7591140177100897, "num_tokens": 4796815.0, "step": 720 }, { "entropy": 0.8099086729809641, "epoch": 0.32493184220775606, "grad_norm": 51.25, "learning_rate": 1.4580000000000001e-05, "loss": 12.9674, "mean_token_accuracy": 0.7837777521461249, "num_tokens": 4865172.0, "step": 730 }, { "entropy": 0.8765029039233923, "epoch": 0.3293829633338897, "grad_norm": 44.25, "learning_rate": 1.478e-05, "loss": 14.0705, "mean_token_accuracy": 0.7659166298806668, "num_tokens": 4932671.0, "step": 740 }, { "entropy": 0.8784225210547447, "epoch": 0.3338340844600234, "grad_norm": 44.0, "learning_rate": 1.498e-05, "loss": 14.1774, "mean_token_accuracy": 0.7669123791158199, "num_tokens": 4998710.0, "step": 750 }, { "entropy": 0.8316720003262162, "epoch": 0.338285205586157, "grad_norm": 42.75, "learning_rate": 1.5180000000000002e-05, "loss": 13.3239, "mean_token_accuracy": 0.7748636573553085, "num_tokens": 5066948.0, "step": 760 }, { "entropy": 0.8594924572855234, "epoch": 0.34273632671229065, "grad_norm": 49.5, "learning_rate": 1.5380000000000002e-05, "loss": 13.8153, "mean_token_accuracy": 0.7711022242903709, "num_tokens": 5129950.0, "step": 770 }, { "entropy": 0.895938608981669, "epoch": 0.3471874478384243, "grad_norm": 48.25, "learning_rate": 1.5580000000000003e-05, "loss": 14.4493, "mean_token_accuracy": 0.7591051306575537, "num_tokens": 5193519.0, "step": 780 }, { "entropy": 0.8509046232327819, "epoch": 0.351638568964558, "grad_norm": 56.5, "learning_rate": 1.578e-05, "loss": 13.6439, "mean_token_accuracy": 0.7696408761665225, "num_tokens": 5256503.0, "step": 790 }, { "entropy": 0.9008656185120344, "epoch": 0.3560896900906916, "grad_norm": 48.75, "learning_rate": 1.5980000000000003e-05, "loss": 14.7305, "mean_token_accuracy": 0.7615581404417753, "num_tokens": 5324751.0, "step": 800 }, { "epoch": 0.3560896900906916, "eval_biology_entropy": 1.267573108136654, "eval_biology_loss": 1.2815055847167969, "eval_biology_mean_token_accuracy": 0.6854563910365105, "eval_biology_num_tokens": 5324751.0, "eval_biology_runtime": 39.7889, "eval_biology_samples_per_second": 12.566, "eval_biology_steps_per_second": 12.566, "step": 800 }, { "epoch": 0.3560896900906916, "eval_chemistry_entropy": 0.8897559930682182, "eval_chemistry_loss": 0.8585976362228394, "eval_chemistry_mean_token_accuracy": 0.7700544927716255, "eval_chemistry_num_tokens": 5324751.0, "eval_chemistry_runtime": 45.9968, "eval_chemistry_samples_per_second": 10.87, "eval_chemistry_steps_per_second": 10.87, "step": 800 }, { "entropy": 0.7946027474477887, "epoch": 0.36054081121682524, "grad_norm": 50.0, "learning_rate": 1.618e-05, "loss": 12.8437, "mean_token_accuracy": 0.7842005740851163, "num_tokens": 5391664.0, "step": 810 }, { "entropy": 0.8090571435168386, "epoch": 0.3649919323429589, "grad_norm": 45.0, "learning_rate": 1.638e-05, "loss": 12.7173, "mean_token_accuracy": 0.7849415507167578, "num_tokens": 5458519.0, "step": 820 }, { "entropy": 0.8408348582684994, "epoch": 0.3694430534690925, "grad_norm": 46.0, "learning_rate": 1.658e-05, "loss": 13.9973, "mean_token_accuracy": 0.7693964328616858, "num_tokens": 5523391.0, "step": 830 }, { "entropy": 0.8412857724353671, "epoch": 0.37389417459522617, "grad_norm": 44.0, "learning_rate": 1.6780000000000002e-05, "loss": 13.3081, "mean_token_accuracy": 0.7760034879669547, "num_tokens": 5585508.0, "step": 840 }, { "entropy": 0.836153868213296, "epoch": 0.37834529572135983, "grad_norm": 48.5, "learning_rate": 1.698e-05, "loss": 13.6747, "mean_token_accuracy": 0.775155283510685, "num_tokens": 5650755.0, "step": 850 }, { "entropy": 0.8424218002706766, "epoch": 0.38279641684749344, "grad_norm": 45.0, "learning_rate": 1.718e-05, "loss": 13.4803, "mean_token_accuracy": 0.7742194497957826, "num_tokens": 5716321.0, "step": 860 }, { "entropy": 0.883666661940515, "epoch": 0.3872475379736271, "grad_norm": 50.75, "learning_rate": 1.7380000000000003e-05, "loss": 14.1247, "mean_token_accuracy": 0.7642681807279587, "num_tokens": 5782698.0, "step": 870 }, { "entropy": 0.8131563207134604, "epoch": 0.39169865909976076, "grad_norm": 39.0, "learning_rate": 1.758e-05, "loss": 13.2211, "mean_token_accuracy": 0.7782290887087584, "num_tokens": 5848889.0, "step": 880 }, { "entropy": 0.8608601313084364, "epoch": 0.3961497802258944, "grad_norm": 44.0, "learning_rate": 1.7780000000000003e-05, "loss": 13.9079, "mean_token_accuracy": 0.7717852048575878, "num_tokens": 5912919.0, "step": 890 }, { "entropy": 0.8315491866320371, "epoch": 0.400600901352028, "grad_norm": 42.25, "learning_rate": 1.798e-05, "loss": 13.4828, "mean_token_accuracy": 0.7756550934165716, "num_tokens": 5980229.0, "step": 900 }, { "epoch": 0.400600901352028, "eval_biology_entropy": 1.2335028433203696, "eval_biology_loss": 1.2846676111221313, "eval_biology_mean_token_accuracy": 0.6853172712922097, "eval_biology_num_tokens": 5980229.0, "eval_biology_runtime": 39.7136, "eval_biology_samples_per_second": 12.59, "eval_biology_steps_per_second": 12.59, "step": 900 }, { "epoch": 0.400600901352028, "eval_chemistry_entropy": 0.8548561576008796, "eval_chemistry_loss": 0.8478842973709106, "eval_chemistry_mean_token_accuracy": 0.773112800002098, "eval_chemistry_num_tokens": 5980229.0, "eval_chemistry_runtime": 45.9488, "eval_chemistry_samples_per_second": 10.882, "eval_chemistry_steps_per_second": 10.882, "step": 900 }, { "entropy": 0.7898269753903151, "epoch": 0.4050520224781617, "grad_norm": 40.25, "learning_rate": 1.8180000000000002e-05, "loss": 12.7209, "mean_token_accuracy": 0.784660654142499, "num_tokens": 6048508.0, "step": 910 }, { "entropy": 0.8143722828477621, "epoch": 0.40950314360429535, "grad_norm": 44.5, "learning_rate": 1.8380000000000004e-05, "loss": 13.2591, "mean_token_accuracy": 0.7791141759604215, "num_tokens": 6114855.0, "step": 920 }, { "entropy": 0.8494924793019891, "epoch": 0.41395426473042896, "grad_norm": 40.0, "learning_rate": 1.858e-05, "loss": 13.506, "mean_token_accuracy": 0.776913444697857, "num_tokens": 6179050.0, "step": 930 }, { "entropy": 0.8335931519046426, "epoch": 0.4184053858565626, "grad_norm": 45.25, "learning_rate": 1.878e-05, "loss": 13.6217, "mean_token_accuracy": 0.7724651444703341, "num_tokens": 6244987.0, "step": 940 }, { "entropy": 0.8118610519915819, "epoch": 0.4228565069826963, "grad_norm": 49.75, "learning_rate": 1.898e-05, "loss": 13.0423, "mean_token_accuracy": 0.7783929593861103, "num_tokens": 6310799.0, "step": 950 }, { "entropy": 0.8028015844523907, "epoch": 0.4273076281088299, "grad_norm": 48.25, "learning_rate": 1.918e-05, "loss": 13.0412, "mean_token_accuracy": 0.778607621230185, "num_tokens": 6378152.0, "step": 960 }, { "entropy": 0.8370830919593573, "epoch": 0.43175874923496355, "grad_norm": 44.75, "learning_rate": 1.938e-05, "loss": 13.2798, "mean_token_accuracy": 0.7766136281192303, "num_tokens": 6442491.0, "step": 970 }, { "entropy": 0.862270618416369, "epoch": 0.4362098703610972, "grad_norm": 39.5, "learning_rate": 1.9580000000000002e-05, "loss": 14.0144, "mean_token_accuracy": 0.7675615277141332, "num_tokens": 6510231.0, "step": 980 }, { "entropy": 0.8360511595383286, "epoch": 0.44066099148723087, "grad_norm": 48.0, "learning_rate": 1.978e-05, "loss": 13.4815, "mean_token_accuracy": 0.7760949255898595, "num_tokens": 6572858.0, "step": 990 }, { "entropy": 0.8388594528660178, "epoch": 0.4451121126133645, "grad_norm": 43.25, "learning_rate": 1.9980000000000002e-05, "loss": 13.6609, "mean_token_accuracy": 0.7700441874563694, "num_tokens": 6637273.0, "step": 1000 }, { "epoch": 0.4451121126133645, "eval_biology_entropy": 1.187324990749359, "eval_biology_loss": 1.2962696552276611, "eval_biology_mean_token_accuracy": 0.6844774860739707, "eval_biology_num_tokens": 6637273.0, "eval_biology_runtime": 39.733, "eval_biology_samples_per_second": 12.584, "eval_biology_steps_per_second": 12.584, "step": 1000 }, { "epoch": 0.4451121126133645, "eval_chemistry_entropy": 0.8146472455263137, "eval_chemistry_loss": 0.8452854156494141, "eval_chemistry_mean_token_accuracy": 0.7733621709942817, "eval_chemistry_num_tokens": 6637273.0, "eval_chemistry_runtime": 46.1309, "eval_chemistry_samples_per_second": 10.839, "eval_chemistry_steps_per_second": 10.839, "step": 1000 }, { "entropy": 0.8283287117257714, "epoch": 0.44956323373949814, "grad_norm": 45.25, "learning_rate": 1.9980000000000002e-05, "loss": 13.2785, "mean_token_accuracy": 0.7738786302506924, "num_tokens": 6700799.0, "step": 1010 }, { "entropy": 0.8240121186710894, "epoch": 0.4540143548656318, "grad_norm": 43.0, "learning_rate": 1.995777777777778e-05, "loss": 13.5714, "mean_token_accuracy": 0.7778675271198153, "num_tokens": 6765798.0, "step": 1020 }, { "entropy": 0.8027254937216639, "epoch": 0.4584654759917654, "grad_norm": 45.0, "learning_rate": 1.9935555555555557e-05, "loss": 12.958, "mean_token_accuracy": 0.7836722049862146, "num_tokens": 6832774.0, "step": 1030 }, { "entropy": 0.8506452234461903, "epoch": 0.46291659711789906, "grad_norm": 40.75, "learning_rate": 1.9913333333333335e-05, "loss": 13.6169, "mean_token_accuracy": 0.7709769554436207, "num_tokens": 6896684.0, "step": 1040 }, { "entropy": 0.7970458004623652, "epoch": 0.4673677182440327, "grad_norm": 49.5, "learning_rate": 1.9891111111111112e-05, "loss": 13.1299, "mean_token_accuracy": 0.7849638484418392, "num_tokens": 6964423.0, "step": 1050 }, { "entropy": 0.811302705295384, "epoch": 0.4718188393701664, "grad_norm": 37.5, "learning_rate": 1.986888888888889e-05, "loss": 13.1097, "mean_token_accuracy": 0.7798706620931626, "num_tokens": 7033417.0, "step": 1060 }, { "entropy": 0.8476884752511978, "epoch": 0.4762699604963, "grad_norm": 44.5, "learning_rate": 1.9846666666666668e-05, "loss": 13.6585, "mean_token_accuracy": 0.7750785838812589, "num_tokens": 7101274.0, "step": 1070 }, { "entropy": 0.8404533293098211, "epoch": 0.48072108162243365, "grad_norm": 39.5, "learning_rate": 1.9824444444444445e-05, "loss": 13.6161, "mean_token_accuracy": 0.7751947242766619, "num_tokens": 7166608.0, "step": 1080 }, { "entropy": 0.7973418578505516, "epoch": 0.4851722027485673, "grad_norm": 58.5, "learning_rate": 1.9802222222222226e-05, "loss": 12.8893, "mean_token_accuracy": 0.7859396133571863, "num_tokens": 7240958.0, "step": 1090 }, { "entropy": 0.8283490337431431, "epoch": 0.4896233238747009, "grad_norm": 43.0, "learning_rate": 1.978e-05, "loss": 13.2474, "mean_token_accuracy": 0.7790504809468984, "num_tokens": 7311968.0, "step": 1100 }, { "epoch": 0.4896233238747009, "eval_biology_entropy": 1.2187969796061515, "eval_biology_loss": 1.297374963760376, "eval_biology_mean_token_accuracy": 0.6832944719195366, "eval_biology_num_tokens": 7311968.0, "eval_biology_runtime": 40.0896, "eval_biology_samples_per_second": 12.472, "eval_biology_steps_per_second": 12.472, "step": 1100 }, { "epoch": 0.4896233238747009, "eval_chemistry_entropy": 0.8075549347996712, "eval_chemistry_loss": 0.8330864310264587, "eval_chemistry_mean_token_accuracy": 0.7758210087418557, "eval_chemistry_num_tokens": 7311968.0, "eval_chemistry_runtime": 46.2026, "eval_chemistry_samples_per_second": 10.822, "eval_chemistry_steps_per_second": 10.822, "step": 1100 }, { "entropy": 0.8035251742228866, "epoch": 0.4940744450008346, "grad_norm": 42.25, "learning_rate": 1.975777777777778e-05, "loss": 13.0881, "mean_token_accuracy": 0.7785759992897511, "num_tokens": 7378961.0, "step": 1110 }, { "entropy": 0.7911803729832172, "epoch": 0.49852556612696824, "grad_norm": 47.0, "learning_rate": 1.9735555555555556e-05, "loss": 12.8175, "mean_token_accuracy": 0.7857681257650256, "num_tokens": 7444923.0, "step": 1120 }, { "entropy": 0.8341826571151614, "epoch": 0.5029766872531019, "grad_norm": 42.0, "learning_rate": 1.9713333333333337e-05, "loss": 13.2652, "mean_token_accuracy": 0.7798049133270979, "num_tokens": 7509885.0, "step": 1130 }, { "entropy": 0.8292949998751282, "epoch": 0.5074278083792355, "grad_norm": 42.25, "learning_rate": 1.969111111111111e-05, "loss": 13.6776, "mean_token_accuracy": 0.7743924837559462, "num_tokens": 7576456.0, "step": 1140 }, { "entropy": 0.867630060762167, "epoch": 0.5118789295053692, "grad_norm": 44.25, "learning_rate": 1.9668888888888892e-05, "loss": 13.874, "mean_token_accuracy": 0.7730787601321936, "num_tokens": 7641821.0, "step": 1150 }, { "entropy": 0.800755001604557, "epoch": 0.5163300506315028, "grad_norm": 47.0, "learning_rate": 1.9646666666666666e-05, "loss": 12.939, "mean_token_accuracy": 0.7809992711991072, "num_tokens": 7706502.0, "step": 1160 }, { "entropy": 0.8834998097270728, "epoch": 0.5207811717576365, "grad_norm": 48.0, "learning_rate": 1.9624444444444447e-05, "loss": 14.3515, "mean_token_accuracy": 0.7673096172511578, "num_tokens": 7772353.0, "step": 1170 }, { "entropy": 0.8068421924486756, "epoch": 0.52523229288377, "grad_norm": 38.25, "learning_rate": 1.9602222222222225e-05, "loss": 13.219, "mean_token_accuracy": 0.7816231641918421, "num_tokens": 7838516.0, "step": 1180 }, { "entropy": 0.8032166380435228, "epoch": 0.5296834140099037, "grad_norm": 52.25, "learning_rate": 1.9580000000000002e-05, "loss": 12.8455, "mean_token_accuracy": 0.7809245727956295, "num_tokens": 7902568.0, "step": 1190 }, { "entropy": 0.8334995551034808, "epoch": 0.5341345351360374, "grad_norm": 42.25, "learning_rate": 1.955777777777778e-05, "loss": 13.3201, "mean_token_accuracy": 0.7768194541335106, "num_tokens": 7969704.0, "step": 1200 }, { "epoch": 0.5341345351360374, "eval_biology_entropy": 1.2117234426736831, "eval_biology_loss": 1.300293207168579, "eval_biology_mean_token_accuracy": 0.6825520681738854, "eval_biology_num_tokens": 7969704.0, "eval_biology_runtime": 39.4328, "eval_biology_samples_per_second": 12.68, "eval_biology_steps_per_second": 12.68, "step": 1200 }, { "epoch": 0.5341345351360374, "eval_chemistry_entropy": 0.7991210364103317, "eval_chemistry_loss": 0.8259859681129456, "eval_chemistry_mean_token_accuracy": 0.7772398797273636, "eval_chemistry_num_tokens": 7969704.0, "eval_chemistry_runtime": 45.8311, "eval_chemistry_samples_per_second": 10.91, "eval_chemistry_steps_per_second": 10.91, "step": 1200 }, { "entropy": 0.7765231873840094, "epoch": 0.538585656262171, "grad_norm": 41.25, "learning_rate": 1.9535555555555557e-05, "loss": 12.6868, "mean_token_accuracy": 0.7813247825950385, "num_tokens": 8034765.0, "step": 1210 }, { "entropy": 0.8038391519337893, "epoch": 0.5430367773883047, "grad_norm": 35.5, "learning_rate": 1.9513333333333335e-05, "loss": 13.2505, "mean_token_accuracy": 0.7795963916927576, "num_tokens": 8103869.0, "step": 1220 }, { "entropy": 0.7666595270857215, "epoch": 0.5474878985144384, "grad_norm": 41.25, "learning_rate": 1.9491111111111113e-05, "loss": 12.3886, "mean_token_accuracy": 0.7900463610887527, "num_tokens": 8173386.0, "step": 1230 }, { "entropy": 0.8262818416580557, "epoch": 0.551939019640572, "grad_norm": 37.0, "learning_rate": 1.946888888888889e-05, "loss": 13.2345, "mean_token_accuracy": 0.7755838381126523, "num_tokens": 8242162.0, "step": 1240 }, { "entropy": 0.8054397076368331, "epoch": 0.5563901407667056, "grad_norm": 48.5, "learning_rate": 1.9446666666666668e-05, "loss": 13.0502, "mean_token_accuracy": 0.7804440699517727, "num_tokens": 8308919.0, "step": 1250 }, { "entropy": 0.811793964356184, "epoch": 0.5608412618928392, "grad_norm": 49.25, "learning_rate": 1.9424444444444446e-05, "loss": 13.3739, "mean_token_accuracy": 0.7758540976792574, "num_tokens": 8365617.0, "step": 1260 }, { "entropy": 0.8213885102421046, "epoch": 0.5652923830189729, "grad_norm": 37.5, "learning_rate": 1.9402222222222223e-05, "loss": 13.2316, "mean_token_accuracy": 0.7780250526964665, "num_tokens": 8430728.0, "step": 1270 }, { "entropy": 0.7969466263428331, "epoch": 0.5697435041451065, "grad_norm": 34.25, "learning_rate": 1.938e-05, "loss": 12.6971, "mean_token_accuracy": 0.7868836035951972, "num_tokens": 8497852.0, "step": 1280 }, { "entropy": 0.8166826661676169, "epoch": 0.5741946252712402, "grad_norm": 43.75, "learning_rate": 1.935777777777778e-05, "loss": 13.4076, "mean_token_accuracy": 0.7757051605731249, "num_tokens": 8558952.0, "step": 1290 }, { "entropy": 0.8129263132810592, "epoch": 0.5786457463973739, "grad_norm": 44.5, "learning_rate": 1.9335555555555556e-05, "loss": 13.1537, "mean_token_accuracy": 0.7816494394093751, "num_tokens": 8622868.0, "step": 1300 }, { "epoch": 0.5786457463973739, "eval_biology_entropy": 1.2590930373072624, "eval_biology_loss": 1.3004297018051147, "eval_biology_mean_token_accuracy": 0.6824918667078018, "eval_biology_num_tokens": 8622868.0, "eval_biology_runtime": 39.5147, "eval_biology_samples_per_second": 12.654, "eval_biology_steps_per_second": 12.654, "step": 1300 }, { "epoch": 0.5786457463973739, "eval_chemistry_entropy": 0.8134206305742264, "eval_chemistry_loss": 0.8170297741889954, "eval_chemistry_mean_token_accuracy": 0.7793359256386757, "eval_chemistry_num_tokens": 8622868.0, "eval_chemistry_runtime": 45.8445, "eval_chemistry_samples_per_second": 10.906, "eval_chemistry_steps_per_second": 10.906, "step": 1300 }, { "entropy": 0.7748439759016037, "epoch": 0.5830968675235075, "grad_norm": 40.25, "learning_rate": 1.9313333333333334e-05, "loss": 12.585, "mean_token_accuracy": 0.7868875458836555, "num_tokens": 8694510.0, "step": 1310 }, { "entropy": 0.8616355959326029, "epoch": 0.5875479886496411, "grad_norm": 49.25, "learning_rate": 1.9291111111111115e-05, "loss": 14.0344, "mean_token_accuracy": 0.766650452464819, "num_tokens": 8757753.0, "step": 1320 }, { "entropy": 0.8312898099422454, "epoch": 0.5919991097757747, "grad_norm": 44.0, "learning_rate": 1.926888888888889e-05, "loss": 13.3145, "mean_token_accuracy": 0.7796449743211269, "num_tokens": 8823153.0, "step": 1330 }, { "entropy": 0.8332564871758222, "epoch": 0.5964502309019084, "grad_norm": 41.5, "learning_rate": 1.924666666666667e-05, "loss": 13.599, "mean_token_accuracy": 0.7727281775325536, "num_tokens": 8886401.0, "step": 1340 }, { "entropy": 0.8084427203983069, "epoch": 0.6009013520280421, "grad_norm": 36.75, "learning_rate": 1.9224444444444444e-05, "loss": 12.8022, "mean_token_accuracy": 0.7834546566009521, "num_tokens": 8958530.0, "step": 1350 }, { "entropy": 0.8060288658365607, "epoch": 0.6053524731541757, "grad_norm": 41.0, "learning_rate": 1.9202222222222225e-05, "loss": 13.1094, "mean_token_accuracy": 0.7809524293988943, "num_tokens": 9024677.0, "step": 1360 }, { "entropy": 0.8401720520108938, "epoch": 0.6098035942803094, "grad_norm": 40.75, "learning_rate": 1.918e-05, "loss": 13.5138, "mean_token_accuracy": 0.7706755470484495, "num_tokens": 9090280.0, "step": 1370 }, { "entropy": 0.798151072487235, "epoch": 0.614254715406443, "grad_norm": 43.5, "learning_rate": 1.915777777777778e-05, "loss": 12.9648, "mean_token_accuracy": 0.7831501496955753, "num_tokens": 9153771.0, "step": 1380 }, { "entropy": 0.7643128799274563, "epoch": 0.6187058365325766, "grad_norm": 39.25, "learning_rate": 1.9135555555555555e-05, "loss": 12.2452, "mean_token_accuracy": 0.7925934199243784, "num_tokens": 9227117.0, "step": 1390 }, { "entropy": 0.8085228271782399, "epoch": 0.6231569576587103, "grad_norm": 43.0, "learning_rate": 1.9113333333333336e-05, "loss": 13.2475, "mean_token_accuracy": 0.7788113884627819, "num_tokens": 9291760.0, "step": 1400 }, { "epoch": 0.6231569576587103, "eval_biology_entropy": 1.2567389221787453, "eval_biology_loss": 1.3026176691055298, "eval_biology_mean_token_accuracy": 0.6818688949346542, "eval_biology_num_tokens": 9291760.0, "eval_biology_runtime": 39.2644, "eval_biology_samples_per_second": 12.734, "eval_biology_steps_per_second": 12.734, "step": 1400 }, { "epoch": 0.6231569576587103, "eval_chemistry_entropy": 0.8002244250178338, "eval_chemistry_loss": 0.8070799708366394, "eval_chemistry_mean_token_accuracy": 0.7815486862063408, "eval_chemistry_num_tokens": 9291760.0, "eval_chemistry_runtime": 55.9364, "eval_chemistry_samples_per_second": 8.939, "eval_chemistry_steps_per_second": 8.939, "step": 1400 }, { "entropy": 0.856862205825746, "epoch": 0.6276080787848439, "grad_norm": 43.75, "learning_rate": 1.9091111111111113e-05, "loss": 13.4277, "mean_token_accuracy": 0.7707107689231634, "num_tokens": 9357561.0, "step": 1410 }, { "entropy": 0.7735245639458299, "epoch": 0.6320591999109776, "grad_norm": 48.0, "learning_rate": 1.906888888888889e-05, "loss": 12.943, "mean_token_accuracy": 0.7830769792199135, "num_tokens": 9425022.0, "step": 1420 }, { "entropy": 0.8000387817621231, "epoch": 0.6365103210371112, "grad_norm": 49.0, "learning_rate": 1.904666666666667e-05, "loss": 12.637, "mean_token_accuracy": 0.7861681949347258, "num_tokens": 9492028.0, "step": 1430 }, { "entropy": 0.7522478165104985, "epoch": 0.6409614421632449, "grad_norm": 42.0, "learning_rate": 1.9024444444444446e-05, "loss": 12.2016, "mean_token_accuracy": 0.7882154919207096, "num_tokens": 9561091.0, "step": 1440 }, { "entropy": 0.7836794227361679, "epoch": 0.6454125632893785, "grad_norm": 51.75, "learning_rate": 1.9002222222222224e-05, "loss": 12.744, "mean_token_accuracy": 0.7859560146927833, "num_tokens": 9628383.0, "step": 1450 }, { "entropy": 0.7407985650002956, "epoch": 0.6498636844155121, "grad_norm": 34.0, "learning_rate": 1.898e-05, "loss": 11.8696, "mean_token_accuracy": 0.7983961008489132, "num_tokens": 9695723.0, "step": 1460 }, { "entropy": 0.7636277657002211, "epoch": 0.6543148055416458, "grad_norm": 38.75, "learning_rate": 1.895777777777778e-05, "loss": 12.4201, "mean_token_accuracy": 0.7915539544075727, "num_tokens": 9763393.0, "step": 1470 }, { "entropy": 0.8254500133916736, "epoch": 0.6587659266677794, "grad_norm": 38.25, "learning_rate": 1.8935555555555556e-05, "loss": 13.0763, "mean_token_accuracy": 0.7825288005173207, "num_tokens": 9827272.0, "step": 1480 }, { "entropy": 0.7628985194489359, "epoch": 0.6632170477939131, "grad_norm": 35.75, "learning_rate": 1.8913333333333334e-05, "loss": 12.4827, "mean_token_accuracy": 0.7848137805238367, "num_tokens": 9892418.0, "step": 1490 }, { "entropy": 0.7648764431476593, "epoch": 0.6676681689200468, "grad_norm": 40.25, "learning_rate": 1.8891111111111115e-05, "loss": 12.3863, "mean_token_accuracy": 0.7885230954736471, "num_tokens": 9955369.0, "step": 1500 }, { "epoch": 0.6676681689200468, "eval_biology_entropy": 1.232845685839653, "eval_biology_loss": 1.299937129020691, "eval_biology_mean_token_accuracy": 0.6833411865234374, "eval_biology_num_tokens": 9955369.0, "eval_biology_runtime": 39.6042, "eval_biology_samples_per_second": 12.625, "eval_biology_steps_per_second": 12.625, "step": 1500 }, { "epoch": 0.6676681689200468, "eval_chemistry_entropy": 0.7862835813760758, "eval_chemistry_loss": 0.7996346354484558, "eval_chemistry_mean_token_accuracy": 0.7832771391272545, "eval_chemistry_num_tokens": 9955369.0, "eval_chemistry_runtime": 46.138, "eval_chemistry_samples_per_second": 10.837, "eval_chemistry_steps_per_second": 10.837, "step": 1500 }, { "entropy": 0.7809047346934677, "epoch": 0.6721192900461804, "grad_norm": 41.25, "learning_rate": 1.886888888888889e-05, "loss": 12.5052, "mean_token_accuracy": 0.7869493119418621, "num_tokens": 10026298.0, "step": 1510 }, { "entropy": 0.7423510169610381, "epoch": 0.676570411172314, "grad_norm": 36.0, "learning_rate": 1.884666666666667e-05, "loss": 11.9187, "mean_token_accuracy": 0.7974143566563725, "num_tokens": 10096065.0, "step": 1520 }, { "entropy": 0.771459529362619, "epoch": 0.6810215322984476, "grad_norm": 40.5, "learning_rate": 1.8824444444444445e-05, "loss": 12.7462, "mean_token_accuracy": 0.7863322600722313, "num_tokens": 10160745.0, "step": 1530 }, { "entropy": 0.7509715856052935, "epoch": 0.6854726534245813, "grad_norm": 35.75, "learning_rate": 1.8802222222222226e-05, "loss": 11.927, "mean_token_accuracy": 0.7947466436773538, "num_tokens": 10233806.0, "step": 1540 }, { "entropy": 0.7742771266028285, "epoch": 0.689923774550715, "grad_norm": 42.5, "learning_rate": 1.878e-05, "loss": 12.5918, "mean_token_accuracy": 0.7863378578796982, "num_tokens": 10298845.0, "step": 1550 }, { "entropy": 0.7730063889175653, "epoch": 0.6943748956768486, "grad_norm": 38.25, "learning_rate": 1.875777777777778e-05, "loss": 12.4047, "mean_token_accuracy": 0.7885195638984441, "num_tokens": 10364601.0, "step": 1560 }, { "entropy": 0.7708892775699496, "epoch": 0.6988260168029823, "grad_norm": 40.75, "learning_rate": 1.873555555555556e-05, "loss": 12.6199, "mean_token_accuracy": 0.7856390193104744, "num_tokens": 10434433.0, "step": 1570 }, { "entropy": 0.7958233149722218, "epoch": 0.703277137929116, "grad_norm": 43.75, "learning_rate": 1.8713333333333336e-05, "loss": 12.8595, "mean_token_accuracy": 0.7833300601691008, "num_tokens": 10500891.0, "step": 1580 }, { "entropy": 0.7902805691584944, "epoch": 0.7077282590552495, "grad_norm": 40.0, "learning_rate": 1.8691111111111114e-05, "loss": 12.8921, "mean_token_accuracy": 0.7834575500339269, "num_tokens": 10567784.0, "step": 1590 }, { "entropy": 0.7975674813613296, "epoch": 0.7121793801813832, "grad_norm": 38.5, "learning_rate": 1.866888888888889e-05, "loss": 12.4954, "mean_token_accuracy": 0.7869599737226963, "num_tokens": 10633325.0, "step": 1600 }, { "epoch": 0.7121793801813832, "eval_biology_entropy": 1.2419219986200332, "eval_biology_loss": 1.3013286590576172, "eval_biology_mean_token_accuracy": 0.6823853524923325, "eval_biology_num_tokens": 10633325.0, "eval_biology_runtime": 40.8294, "eval_biology_samples_per_second": 12.246, "eval_biology_steps_per_second": 12.246, "step": 1600 }, { "epoch": 0.7121793801813832, "eval_chemistry_entropy": 0.7956894148588181, "eval_chemistry_loss": 0.7937653660774231, "eval_chemistry_mean_token_accuracy": 0.7838373360037804, "eval_chemistry_num_tokens": 10633325.0, "eval_chemistry_runtime": 46.4829, "eval_chemistry_samples_per_second": 10.757, "eval_chemistry_steps_per_second": 10.757, "step": 1600 }, { "entropy": 0.7231507489457727, "epoch": 0.7166305013075168, "grad_norm": 44.25, "learning_rate": 1.864666666666667e-05, "loss": 11.7926, "mean_token_accuracy": 0.7977068889886141, "num_tokens": 10699737.0, "step": 1610 }, { "entropy": 0.7011674824170768, "epoch": 0.7210816224336505, "grad_norm": 47.25, "learning_rate": 1.8624444444444446e-05, "loss": 11.2514, "mean_token_accuracy": 0.8032249186187983, "num_tokens": 10764319.0, "step": 1620 }, { "entropy": 0.7829309536144138, "epoch": 0.7255327435597841, "grad_norm": 37.0, "learning_rate": 1.8602222222222224e-05, "loss": 12.7194, "mean_token_accuracy": 0.7839431796222925, "num_tokens": 10831617.0, "step": 1630 }, { "entropy": 0.7741082075983285, "epoch": 0.7299838646859178, "grad_norm": 44.0, "learning_rate": 1.858e-05, "loss": 12.5457, "mean_token_accuracy": 0.7877275109291076, "num_tokens": 10897916.0, "step": 1640 }, { "entropy": 0.7683334495872259, "epoch": 0.7344349858120514, "grad_norm": 40.0, "learning_rate": 1.855777777777778e-05, "loss": 12.3848, "mean_token_accuracy": 0.7887299537658692, "num_tokens": 10965483.0, "step": 1650 }, { "entropy": 0.7886540442705154, "epoch": 0.738886106938185, "grad_norm": 37.5, "learning_rate": 1.8535555555555557e-05, "loss": 12.7194, "mean_token_accuracy": 0.7842936536297203, "num_tokens": 11030485.0, "step": 1660 }, { "entropy": 0.7601794632151723, "epoch": 0.7433372280643187, "grad_norm": 42.5, "learning_rate": 1.8513333333333335e-05, "loss": 12.345, "mean_token_accuracy": 0.789710770919919, "num_tokens": 11097108.0, "step": 1670 }, { "entropy": 0.7141751017421484, "epoch": 0.7477883491904523, "grad_norm": 40.5, "learning_rate": 1.8491111111111112e-05, "loss": 11.629, "mean_token_accuracy": 0.8011402323842048, "num_tokens": 11165356.0, "step": 1680 }, { "entropy": 0.7918755512684583, "epoch": 0.752239470316586, "grad_norm": 36.0, "learning_rate": 1.846888888888889e-05, "loss": 12.6792, "mean_token_accuracy": 0.7855523183941842, "num_tokens": 11234429.0, "step": 1690 }, { "entropy": 0.7715185107663274, "epoch": 0.7566905914427197, "grad_norm": 40.75, "learning_rate": 1.8446666666666667e-05, "loss": 12.3163, "mean_token_accuracy": 0.7902058430016041, "num_tokens": 11299223.0, "step": 1700 }, { "epoch": 0.7566905914427197, "eval_biology_entropy": 1.2339779297113418, "eval_biology_loss": 1.3050655126571655, "eval_biology_mean_token_accuracy": 0.6822574281096458, "eval_biology_num_tokens": 11299223.0, "eval_biology_runtime": 39.9731, "eval_biology_samples_per_second": 12.508, "eval_biology_steps_per_second": 12.508, "step": 1700 }, { "epoch": 0.7566905914427197, "eval_chemistry_entropy": 0.7556659379005433, "eval_chemistry_loss": 0.7893115282058716, "eval_chemistry_mean_token_accuracy": 0.7856479023098946, "eval_chemistry_num_tokens": 11299223.0, "eval_chemistry_runtime": 48.0592, "eval_chemistry_samples_per_second": 10.404, "eval_chemistry_steps_per_second": 10.404, "step": 1700 }, { "entropy": 0.7491960693150759, "epoch": 0.7611417125688533, "grad_norm": 37.5, "learning_rate": 1.842444444444445e-05, "loss": 12.221, "mean_token_accuracy": 0.7910046689212322, "num_tokens": 11371200.0, "step": 1710 }, { "entropy": 0.7779044238850474, "epoch": 0.7655928336949869, "grad_norm": 36.0, "learning_rate": 1.8402222222222223e-05, "loss": 12.6537, "mean_token_accuracy": 0.7857811234891414, "num_tokens": 11436799.0, "step": 1720 }, { "entropy": 0.7611921314150095, "epoch": 0.7700439548211205, "grad_norm": 46.25, "learning_rate": 1.8380000000000004e-05, "loss": 12.1793, "mean_token_accuracy": 0.7942970298230648, "num_tokens": 11502965.0, "step": 1730 }, { "entropy": 0.7916558500379324, "epoch": 0.7744950759472542, "grad_norm": 39.5, "learning_rate": 1.8357777777777778e-05, "loss": 12.728, "mean_token_accuracy": 0.7856421928852797, "num_tokens": 11567422.0, "step": 1740 }, { "entropy": 0.7364842056296765, "epoch": 0.7789461970733879, "grad_norm": 30.625, "learning_rate": 1.833555555555556e-05, "loss": 12.1674, "mean_token_accuracy": 0.791620584949851, "num_tokens": 11637076.0, "step": 1750 }, { "entropy": 0.8320787468925118, "epoch": 0.7833973181995215, "grad_norm": 38.0, "learning_rate": 1.8313333333333333e-05, "loss": 13.1941, "mean_token_accuracy": 0.7748327614739537, "num_tokens": 11703496.0, "step": 1760 }, { "entropy": 0.7116687665693462, "epoch": 0.7878484393256552, "grad_norm": 29.625, "learning_rate": 1.8291111111111114e-05, "loss": 11.5027, "mean_token_accuracy": 0.8015623264014721, "num_tokens": 11772773.0, "step": 1770 }, { "entropy": 0.7382532864809036, "epoch": 0.7922995604517888, "grad_norm": 37.25, "learning_rate": 1.8268888888888888e-05, "loss": 12.107, "mean_token_accuracy": 0.7927792508155107, "num_tokens": 11840317.0, "step": 1780 }, { "entropy": 0.8406919915229082, "epoch": 0.7967506815779224, "grad_norm": 40.75, "learning_rate": 1.824666666666667e-05, "loss": 13.4841, "mean_token_accuracy": 0.7751987297087908, "num_tokens": 11903376.0, "step": 1790 }, { "entropy": 0.7729794921353459, "epoch": 0.801201802704056, "grad_norm": 41.25, "learning_rate": 1.8224444444444447e-05, "loss": 12.3404, "mean_token_accuracy": 0.789885114133358, "num_tokens": 11965530.0, "step": 1800 }, { "epoch": 0.801201802704056, "eval_biology_entropy": 1.238894326388836, "eval_biology_loss": 1.3023967742919922, "eval_biology_mean_token_accuracy": 0.6827202830314636, "eval_biology_num_tokens": 11965530.0, "eval_biology_runtime": 39.8685, "eval_biology_samples_per_second": 12.541, "eval_biology_steps_per_second": 12.541, "step": 1800 }, { "epoch": 0.801201802704056, "eval_chemistry_entropy": 0.7590603602528572, "eval_chemistry_loss": 0.7831795811653137, "eval_chemistry_mean_token_accuracy": 0.7866261592507362, "eval_chemistry_num_tokens": 11965530.0, "eval_chemistry_runtime": 48.0982, "eval_chemistry_samples_per_second": 10.395, "eval_chemistry_steps_per_second": 10.395, "step": 1800 }, { "entropy": 0.7708059819415212, "epoch": 0.8056529238301897, "grad_norm": 43.25, "learning_rate": 1.8202222222222225e-05, "loss": 12.5294, "mean_token_accuracy": 0.7862987028434872, "num_tokens": 12031493.0, "step": 1810 }, { "entropy": 0.7691075187176466, "epoch": 0.8101040449563234, "grad_norm": 34.0, "learning_rate": 1.8180000000000002e-05, "loss": 12.552, "mean_token_accuracy": 0.7872704153880477, "num_tokens": 12097917.0, "step": 1820 }, { "entropy": 0.7835509760305286, "epoch": 0.814555166082457, "grad_norm": 35.25, "learning_rate": 1.815777777777778e-05, "loss": 12.581, "mean_token_accuracy": 0.7855123173445463, "num_tokens": 12163029.0, "step": 1830 }, { "entropy": 0.7693919812329113, "epoch": 0.8190062872085907, "grad_norm": 40.25, "learning_rate": 1.8135555555555557e-05, "loss": 12.5332, "mean_token_accuracy": 0.7862321555614471, "num_tokens": 12224427.0, "step": 1840 }, { "entropy": 0.8024251624941826, "epoch": 0.8234574083347244, "grad_norm": 36.5, "learning_rate": 1.8113333333333335e-05, "loss": 12.7152, "mean_token_accuracy": 0.7867181565612554, "num_tokens": 12291917.0, "step": 1850 }, { "entropy": 0.7431695537641645, "epoch": 0.8279085294608579, "grad_norm": 37.5, "learning_rate": 1.8091111111111113e-05, "loss": 12.067, "mean_token_accuracy": 0.7929942118003964, "num_tokens": 12365332.0, "step": 1860 }, { "entropy": 0.7609774840995669, "epoch": 0.8323596505869916, "grad_norm": 32.25, "learning_rate": 1.806888888888889e-05, "loss": 12.307, "mean_token_accuracy": 0.7898187723010779, "num_tokens": 12441423.0, "step": 1870 }, { "entropy": 0.7366092208772897, "epoch": 0.8368107717131252, "grad_norm": 43.0, "learning_rate": 1.8046666666666668e-05, "loss": 11.6312, "mean_token_accuracy": 0.8000296927988529, "num_tokens": 12509124.0, "step": 1880 }, { "entropy": 0.7049085019156337, "epoch": 0.8412618928392589, "grad_norm": 34.5, "learning_rate": 1.8024444444444445e-05, "loss": 11.6414, "mean_token_accuracy": 0.7974906180053949, "num_tokens": 12575355.0, "step": 1890 }, { "entropy": 0.7814744580537081, "epoch": 0.8457130139653926, "grad_norm": 44.25, "learning_rate": 1.8002222222222223e-05, "loss": 12.6335, "mean_token_accuracy": 0.7851035960018635, "num_tokens": 12642651.0, "step": 1900 }, { "epoch": 0.8457130139653926, "eval_biology_entropy": 1.2500024722218515, "eval_biology_loss": 1.3012299537658691, "eval_biology_mean_token_accuracy": 0.6823599907159805, "eval_biology_num_tokens": 12642651.0, "eval_biology_runtime": 39.4244, "eval_biology_samples_per_second": 12.682, "eval_biology_steps_per_second": 12.682, "step": 1900 }, { "epoch": 0.8457130139653926, "eval_chemistry_entropy": 0.7712779935002327, "eval_chemistry_loss": 0.7782201170921326, "eval_chemistry_mean_token_accuracy": 0.787936452627182, "eval_chemistry_num_tokens": 12642651.0, "eval_chemistry_runtime": 45.8268, "eval_chemistry_samples_per_second": 10.911, "eval_chemistry_steps_per_second": 10.911, "step": 1900 }, { "entropy": 0.7286117426119745, "epoch": 0.8501641350915262, "grad_norm": 38.0, "learning_rate": 1.798e-05, "loss": 11.5153, "mean_token_accuracy": 0.8004030931741, "num_tokens": 12710065.0, "step": 1910 }, { "entropy": 0.7636802634224296, "epoch": 0.8546152562176598, "grad_norm": 41.75, "learning_rate": 1.7957777777777778e-05, "loss": 12.3297, "mean_token_accuracy": 0.7917378932237625, "num_tokens": 12778408.0, "step": 1920 }, { "entropy": 0.7151092055253685, "epoch": 0.8590663773437934, "grad_norm": 44.75, "learning_rate": 1.7935555555555556e-05, "loss": 11.6627, "mean_token_accuracy": 0.7980649210512638, "num_tokens": 12845949.0, "step": 1930 }, { "entropy": 0.7645737134851516, "epoch": 0.8635174984699271, "grad_norm": 41.25, "learning_rate": 1.7913333333333337e-05, "loss": 12.1823, "mean_token_accuracy": 0.7940845835953951, "num_tokens": 12910630.0, "step": 1940 }, { "entropy": 0.7626768484711647, "epoch": 0.8679686195960608, "grad_norm": 33.5, "learning_rate": 1.789111111111111e-05, "loss": 12.4, "mean_token_accuracy": 0.7878372304141521, "num_tokens": 12975379.0, "step": 1950 }, { "entropy": 0.728480844758451, "epoch": 0.8724197407221944, "grad_norm": 34.25, "learning_rate": 1.7868888888888892e-05, "loss": 11.683, "mean_token_accuracy": 0.7970448518171906, "num_tokens": 13046473.0, "step": 1960 }, { "entropy": 0.8084357729181647, "epoch": 0.8768708618483281, "grad_norm": 36.75, "learning_rate": 1.7846666666666666e-05, "loss": 13.1812, "mean_token_accuracy": 0.780071578361094, "num_tokens": 13105019.0, "step": 1970 }, { "entropy": 0.8157536951825023, "epoch": 0.8813219829744617, "grad_norm": 39.0, "learning_rate": 1.7824444444444447e-05, "loss": 12.8778, "mean_token_accuracy": 0.7852013517171145, "num_tokens": 13167768.0, "step": 1980 }, { "entropy": 0.747572572156787, "epoch": 0.8857731041005953, "grad_norm": 35.75, "learning_rate": 1.780222222222222e-05, "loss": 11.9019, "mean_token_accuracy": 0.7958117298781872, "num_tokens": 13235899.0, "step": 1990 }, { "entropy": 0.7473404568620026, "epoch": 0.890224225226729, "grad_norm": 33.25, "learning_rate": 1.7780000000000003e-05, "loss": 12.0581, "mean_token_accuracy": 0.7940714538097382, "num_tokens": 13301659.0, "step": 2000 }, { "epoch": 0.890224225226729, "eval_biology_entropy": 1.2476614614725112, "eval_biology_loss": 1.3003644943237305, "eval_biology_mean_token_accuracy": 0.682231693148613, "eval_biology_num_tokens": 13301659.0, "eval_biology_runtime": 39.7204, "eval_biology_samples_per_second": 12.588, "eval_biology_steps_per_second": 12.588, "step": 2000 }, { "epoch": 0.890224225226729, "eval_chemistry_entropy": 0.7678308563828469, "eval_chemistry_loss": 0.7745650410652161, "eval_chemistry_mean_token_accuracy": 0.7882890626788139, "eval_chemistry_num_tokens": 13301659.0, "eval_chemistry_runtime": 46.1368, "eval_chemistry_samples_per_second": 10.837, "eval_chemistry_steps_per_second": 10.837, "step": 2000 }, { "entropy": 0.7491364620625973, "epoch": 0.8946753463528626, "grad_norm": 40.75, "learning_rate": 1.7757777777777777e-05, "loss": 12.0785, "mean_token_accuracy": 0.7944943491369486, "num_tokens": 13368880.0, "step": 2010 }, { "entropy": 0.7931207174435257, "epoch": 0.8991264674789963, "grad_norm": 43.25, "learning_rate": 1.7735555555555558e-05, "loss": 12.6523, "mean_token_accuracy": 0.78772840090096, "num_tokens": 13434364.0, "step": 2020 }, { "entropy": 0.7574795215390623, "epoch": 0.9035775886051299, "grad_norm": 37.75, "learning_rate": 1.7713333333333335e-05, "loss": 12.3514, "mean_token_accuracy": 0.7918346397578716, "num_tokens": 13499871.0, "step": 2030 }, { "entropy": 0.7294158147647977, "epoch": 0.9080287097312636, "grad_norm": 34.25, "learning_rate": 1.7691111111111113e-05, "loss": 11.6435, "mean_token_accuracy": 0.8019150290638208, "num_tokens": 13570986.0, "step": 2040 }, { "entropy": 0.6758658250793814, "epoch": 0.9124798308573973, "grad_norm": 36.0, "learning_rate": 1.766888888888889e-05, "loss": 10.8889, "mean_token_accuracy": 0.8116297330707312, "num_tokens": 13639400.0, "step": 2050 }, { "entropy": 0.7913496998138726, "epoch": 0.9169309519835308, "grad_norm": 38.25, "learning_rate": 1.7646666666666668e-05, "loss": 12.8282, "mean_token_accuracy": 0.7836458418518305, "num_tokens": 13706343.0, "step": 2060 }, { "entropy": 0.8106502434238791, "epoch": 0.9213820731096645, "grad_norm": 44.5, "learning_rate": 1.7624444444444446e-05, "loss": 13.0573, "mean_token_accuracy": 0.7821768958121538, "num_tokens": 13772145.0, "step": 2070 }, { "entropy": 0.7930893866345287, "epoch": 0.9258331942357981, "grad_norm": 30.0, "learning_rate": 1.7602222222222223e-05, "loss": 12.7334, "mean_token_accuracy": 0.7857527777552604, "num_tokens": 13833901.0, "step": 2080 }, { "entropy": 0.7608680401928722, "epoch": 0.9302843153619318, "grad_norm": 43.25, "learning_rate": 1.758e-05, "loss": 12.2845, "mean_token_accuracy": 0.7906877096742392, "num_tokens": 13890922.0, "step": 2090 }, { "entropy": 0.7530267771333456, "epoch": 0.9347354364880655, "grad_norm": 38.5, "learning_rate": 1.755777777777778e-05, "loss": 12.1729, "mean_token_accuracy": 0.791902843117714, "num_tokens": 13954258.0, "step": 2100 }, { "epoch": 0.9347354364880655, "eval_biology_entropy": 1.2157604062557221, "eval_biology_loss": 1.3008348941802979, "eval_biology_mean_token_accuracy": 0.682707477748394, "eval_biology_num_tokens": 13954258.0, "eval_biology_runtime": 40.7733, "eval_biology_samples_per_second": 12.263, "eval_biology_steps_per_second": 12.263, "step": 2100 }, { "epoch": 0.9347354364880655, "eval_chemistry_entropy": 0.7506245082020759, "eval_chemistry_loss": 0.7717900276184082, "eval_chemistry_mean_token_accuracy": 0.7892645244002342, "eval_chemistry_num_tokens": 13954258.0, "eval_chemistry_runtime": 46.2743, "eval_chemistry_samples_per_second": 10.805, "eval_chemistry_steps_per_second": 10.805, "step": 2100 }, { "entropy": 0.7269412121735513, "epoch": 0.9391865576141991, "grad_norm": 43.0, "learning_rate": 1.7535555555555556e-05, "loss": 11.6708, "mean_token_accuracy": 0.8014364942908287, "num_tokens": 14024422.0, "step": 2110 }, { "entropy": 0.7339699132367968, "epoch": 0.9436376787403328, "grad_norm": 45.25, "learning_rate": 1.7513333333333334e-05, "loss": 11.8606, "mean_token_accuracy": 0.7975826554000378, "num_tokens": 14093888.0, "step": 2120 }, { "entropy": 0.7686109783127903, "epoch": 0.9480887998664663, "grad_norm": 42.75, "learning_rate": 1.749111111111111e-05, "loss": 12.4624, "mean_token_accuracy": 0.7877364981919527, "num_tokens": 14160983.0, "step": 2130 }, { "entropy": 0.7372720196843148, "epoch": 0.9525399209926, "grad_norm": 38.5, "learning_rate": 1.746888888888889e-05, "loss": 12.0564, "mean_token_accuracy": 0.7951818112283945, "num_tokens": 14222626.0, "step": 2140 }, { "entropy": 0.7825360232032835, "epoch": 0.9569910421187336, "grad_norm": 39.5, "learning_rate": 1.7446666666666667e-05, "loss": 12.5673, "mean_token_accuracy": 0.7859856501221657, "num_tokens": 14287730.0, "step": 2150 }, { "entropy": 0.7773288476280868, "epoch": 0.9614421632448673, "grad_norm": 39.0, "learning_rate": 1.7424444444444444e-05, "loss": 12.4443, "mean_token_accuracy": 0.7885882891714573, "num_tokens": 14354918.0, "step": 2160 }, { "entropy": 0.7363769317045807, "epoch": 0.965893284371001, "grad_norm": 44.25, "learning_rate": 1.7402222222222222e-05, "loss": 11.9357, "mean_token_accuracy": 0.7947400573641061, "num_tokens": 14427338.0, "step": 2170 }, { "entropy": 0.7489111572504044, "epoch": 0.9703444054971346, "grad_norm": 37.0, "learning_rate": 1.7380000000000003e-05, "loss": 12.1624, "mean_token_accuracy": 0.7900368690490722, "num_tokens": 14493349.0, "step": 2180 }, { "entropy": 0.7617103135213256, "epoch": 0.9747955266232682, "grad_norm": 37.5, "learning_rate": 1.735777777777778e-05, "loss": 12.1613, "mean_token_accuracy": 0.7906056232750416, "num_tokens": 14555091.0, "step": 2190 }, { "entropy": 0.725032649282366, "epoch": 0.9792466477494018, "grad_norm": 33.5, "learning_rate": 1.7335555555555558e-05, "loss": 11.5895, "mean_token_accuracy": 0.8007797665894032, "num_tokens": 14620244.0, "step": 2200 }, { "epoch": 0.9792466477494018, "eval_biology_entropy": 1.2365998299121856, "eval_biology_loss": 1.303519606590271, "eval_biology_mean_token_accuracy": 0.6815042721629143, "eval_biology_num_tokens": 14620244.0, "eval_biology_runtime": 40.1978, "eval_biology_samples_per_second": 12.438, "eval_biology_steps_per_second": 12.438, "step": 2200 }, { "epoch": 0.9792466477494018, "eval_chemistry_entropy": 0.7446092162430287, "eval_chemistry_loss": 0.7655045390129089, "eval_chemistry_mean_token_accuracy": 0.7904723164439201, "eval_chemistry_num_tokens": 14620244.0, "eval_chemistry_runtime": 46.0753, "eval_chemistry_samples_per_second": 10.852, "eval_chemistry_steps_per_second": 10.852, "step": 2200 }, { "entropy": 0.7139286000281573, "epoch": 0.9836977688755355, "grad_norm": 36.5, "learning_rate": 1.7313333333333336e-05, "loss": 11.5837, "mean_token_accuracy": 0.8020144417881966, "num_tokens": 14688111.0, "step": 2210 }, { "entropy": 0.7395884351804852, "epoch": 0.9881488900016692, "grad_norm": 31.25, "learning_rate": 1.7291111111111113e-05, "loss": 11.9123, "mean_token_accuracy": 0.7961550422012806, "num_tokens": 14760951.0, "step": 2220 }, { "entropy": 0.7878218747675418, "epoch": 0.9926000111278028, "grad_norm": 36.5, "learning_rate": 1.726888888888889e-05, "loss": 12.7326, "mean_token_accuracy": 0.7862138673663139, "num_tokens": 14823141.0, "step": 2230 }, { "entropy": 0.7608288100920617, "epoch": 0.9970511322539365, "grad_norm": 37.0, "learning_rate": 1.724666666666667e-05, "loss": 12.2585, "mean_token_accuracy": 0.7904601756483316, "num_tokens": 14886593.0, "step": 2240 }, { "entropy": 0.7384239917064642, "epoch": 1.00133533633784, "grad_norm": 46.25, "learning_rate": 1.7224444444444446e-05, "loss": 11.0289, "mean_token_accuracy": 0.8005193413852097, "num_tokens": 14951602.0, "step": 2250 }, { "entropy": 0.6310809502378106, "epoch": 1.0057864574639737, "grad_norm": 39.75, "learning_rate": 1.7202222222222224e-05, "loss": 10.1315, "mean_token_accuracy": 0.8204233139753342, "num_tokens": 15016941.0, "step": 2260 }, { "entropy": 0.6455286235548556, "epoch": 1.0102375785901074, "grad_norm": 38.25, "learning_rate": 1.718e-05, "loss": 10.3133, "mean_token_accuracy": 0.8160859376192093, "num_tokens": 15083858.0, "step": 2270 }, { "entropy": 0.5831745907664299, "epoch": 1.014688699716241, "grad_norm": 50.25, "learning_rate": 1.715777777777778e-05, "loss": 9.3342, "mean_token_accuracy": 0.8300177838653326, "num_tokens": 15152865.0, "step": 2280 }, { "entropy": 0.6471488554030657, "epoch": 1.0191398208423748, "grad_norm": 43.75, "learning_rate": 1.7135555555555557e-05, "loss": 10.2835, "mean_token_accuracy": 0.8149789605289698, "num_tokens": 15218564.0, "step": 2290 }, { "entropy": 0.6528089676983655, "epoch": 1.0235909419685083, "grad_norm": 44.25, "learning_rate": 1.7113333333333334e-05, "loss": 10.3278, "mean_token_accuracy": 0.8157306212931872, "num_tokens": 15281217.0, "step": 2300 }, { "epoch": 1.0235909419685083, "eval_biology_entropy": 1.0225402714014054, "eval_biology_loss": 1.3434255123138428, "eval_biology_mean_token_accuracy": 0.6797452400922775, "eval_biology_num_tokens": 15281217.0, "eval_biology_runtime": 39.5295, "eval_biology_samples_per_second": 12.649, "eval_biology_steps_per_second": 12.649, "step": 2300 }, { "epoch": 1.0235909419685083, "eval_chemistry_entropy": 0.6383010303974151, "eval_chemistry_loss": 0.783981204032898, "eval_chemistry_mean_token_accuracy": 0.7899075618982315, "eval_chemistry_num_tokens": 15281217.0, "eval_chemistry_runtime": 45.8468, "eval_chemistry_samples_per_second": 10.906, "eval_chemistry_steps_per_second": 10.906, "step": 2300 }, { "entropy": 0.6117025960236788, "epoch": 1.0280420630946419, "grad_norm": 41.75, "learning_rate": 1.7091111111111112e-05, "loss": 9.897, "mean_token_accuracy": 0.8194273430854082, "num_tokens": 15346128.0, "step": 2310 }, { "entropy": 0.6535170486196875, "epoch": 1.0324931842207756, "grad_norm": 33.5, "learning_rate": 1.706888888888889e-05, "loss": 10.4595, "mean_token_accuracy": 0.8142324227839708, "num_tokens": 15412606.0, "step": 2320 }, { "entropy": 0.6200952081009745, "epoch": 1.0369443053469092, "grad_norm": 41.75, "learning_rate": 1.704666666666667e-05, "loss": 10.0173, "mean_token_accuracy": 0.8197212640196085, "num_tokens": 15484636.0, "step": 2330 }, { "entropy": 0.5985450498759747, "epoch": 1.041395426473043, "grad_norm": 47.5, "learning_rate": 1.7024444444444445e-05, "loss": 9.4372, "mean_token_accuracy": 0.8322823897004128, "num_tokens": 15556398.0, "step": 2340 }, { "entropy": 0.6191821810789406, "epoch": 1.0458465475991765, "grad_norm": 50.25, "learning_rate": 1.7002222222222226e-05, "loss": 9.9681, "mean_token_accuracy": 0.8229743007570505, "num_tokens": 15624248.0, "step": 2350 }, { "entropy": 0.6431950107216835, "epoch": 1.0502976687253103, "grad_norm": 43.25, "learning_rate": 1.698e-05, "loss": 10.2769, "mean_token_accuracy": 0.8181428145617247, "num_tokens": 15689184.0, "step": 2360 }, { "entropy": 0.6467125362716615, "epoch": 1.0547487898514438, "grad_norm": 51.0, "learning_rate": 1.695777777777778e-05, "loss": 10.3969, "mean_token_accuracy": 0.8166089791804552, "num_tokens": 15754370.0, "step": 2370 }, { "entropy": 0.6156461857259273, "epoch": 1.0591999109775774, "grad_norm": 36.5, "learning_rate": 1.6935555555555555e-05, "loss": 10.0446, "mean_token_accuracy": 0.8230110257863998, "num_tokens": 15824975.0, "step": 2380 }, { "entropy": 0.6369591388851404, "epoch": 1.0636510321037111, "grad_norm": 45.75, "learning_rate": 1.6913333333333336e-05, "loss": 10.2334, "mean_token_accuracy": 0.8198270745575428, "num_tokens": 15886149.0, "step": 2390 }, { "entropy": 0.6227695440873504, "epoch": 1.0681021532298447, "grad_norm": 41.5, "learning_rate": 1.689111111111111e-05, "loss": 9.9398, "mean_token_accuracy": 0.821424588188529, "num_tokens": 15956560.0, "step": 2400 }, { "epoch": 1.0681021532298447, "eval_biology_entropy": 1.005448550403118, "eval_biology_loss": 1.3492766618728638, "eval_biology_mean_token_accuracy": 0.6789008138775825, "eval_biology_num_tokens": 15956560.0, "eval_biology_runtime": 39.9693, "eval_biology_samples_per_second": 12.51, "eval_biology_steps_per_second": 12.51, "step": 2400 }, { "epoch": 1.0681021532298447, "eval_chemistry_entropy": 0.624100355386734, "eval_chemistry_loss": 0.7797554731369019, "eval_chemistry_mean_token_accuracy": 0.7903617503643036, "eval_chemistry_num_tokens": 15956560.0, "eval_chemistry_runtime": 46.1352, "eval_chemistry_samples_per_second": 10.838, "eval_chemistry_steps_per_second": 10.838, "step": 2400 }, { "entropy": 0.6266451105475426, "epoch": 1.0725532743559785, "grad_norm": 39.25, "learning_rate": 1.686888888888889e-05, "loss": 10.219, "mean_token_accuracy": 0.8190864365547895, "num_tokens": 16025633.0, "step": 2410 }, { "entropy": 0.6389865916222334, "epoch": 1.077004395482112, "grad_norm": 55.5, "learning_rate": 1.684666666666667e-05, "loss": 10.0901, "mean_token_accuracy": 0.8181588027626276, "num_tokens": 16094535.0, "step": 2420 }, { "entropy": 0.6265366824343801, "epoch": 1.0814555166082458, "grad_norm": 44.5, "learning_rate": 1.6824444444444447e-05, "loss": 10.2274, "mean_token_accuracy": 0.8162553690373897, "num_tokens": 16159781.0, "step": 2430 }, { "entropy": 0.6317514531314373, "epoch": 1.0859066377343793, "grad_norm": 41.5, "learning_rate": 1.6802222222222224e-05, "loss": 10.0703, "mean_token_accuracy": 0.8204928413033485, "num_tokens": 16227695.0, "step": 2440 }, { "entropy": 0.6408463085070253, "epoch": 1.090357758860513, "grad_norm": 52.0, "learning_rate": 1.6780000000000002e-05, "loss": 10.1971, "mean_token_accuracy": 0.8168174952268601, "num_tokens": 16293211.0, "step": 2450 }, { "entropy": 0.624163047131151, "epoch": 1.0948088799866467, "grad_norm": 43.5, "learning_rate": 1.675777777777778e-05, "loss": 10.1139, "mean_token_accuracy": 0.8228958930820227, "num_tokens": 16361111.0, "step": 2460 }, { "entropy": 0.6617415506392718, "epoch": 1.0992600011127802, "grad_norm": 43.5, "learning_rate": 1.6735555555555557e-05, "loss": 10.5201, "mean_token_accuracy": 0.8147166967391968, "num_tokens": 16424587.0, "step": 2470 }, { "entropy": 0.6115672853775322, "epoch": 1.103711122238914, "grad_norm": 41.75, "learning_rate": 1.6713333333333335e-05, "loss": 9.9204, "mean_token_accuracy": 0.824297409504652, "num_tokens": 16495082.0, "step": 2480 }, { "entropy": 0.6393632598221302, "epoch": 1.1081622433650475, "grad_norm": 47.0, "learning_rate": 1.6691111111111112e-05, "loss": 10.2437, "mean_token_accuracy": 0.8169457126408816, "num_tokens": 16557596.0, "step": 2490 }, { "entropy": 0.6181258006952703, "epoch": 1.1126133644911813, "grad_norm": 33.25, "learning_rate": 1.666888888888889e-05, "loss": 10.0825, "mean_token_accuracy": 0.8184908539056778, "num_tokens": 16625999.0, "step": 2500 }, { "epoch": 1.1126133644911813, "eval_biology_entropy": 1.0345592698454857, "eval_biology_loss": 1.3405461311340332, "eval_biology_mean_token_accuracy": 0.6796377742290497, "eval_biology_num_tokens": 16625999.0, "eval_biology_runtime": 39.4133, "eval_biology_samples_per_second": 12.686, "eval_biology_steps_per_second": 12.686, "step": 2500 }, { "epoch": 1.1126133644911813, "eval_chemistry_entropy": 0.6612432144582272, "eval_chemistry_loss": 0.7703909277915955, "eval_chemistry_mean_token_accuracy": 0.7913251945972443, "eval_chemistry_num_tokens": 16625999.0, "eval_chemistry_runtime": 45.9075, "eval_chemistry_samples_per_second": 10.891, "eval_chemistry_steps_per_second": 10.891, "step": 2500 }, { "entropy": 0.6133400545455515, "epoch": 1.1170644856173149, "grad_norm": 40.0, "learning_rate": 1.6646666666666668e-05, "loss": 9.8698, "mean_token_accuracy": 0.8211657289415598, "num_tokens": 16697896.0, "step": 2510 }, { "entropy": 0.7041636547073722, "epoch": 1.1215156067434484, "grad_norm": 35.75, "learning_rate": 1.6624444444444445e-05, "loss": 11.1892, "mean_token_accuracy": 0.8056971203535795, "num_tokens": 16763539.0, "step": 2520 }, { "entropy": 0.630071850027889, "epoch": 1.1259667278695822, "grad_norm": 46.25, "learning_rate": 1.6602222222222223e-05, "loss": 10.0465, "mean_token_accuracy": 0.8180604916065931, "num_tokens": 16829702.0, "step": 2530 }, { "entropy": 0.5941652920097112, "epoch": 1.1304178489957157, "grad_norm": 33.25, "learning_rate": 1.658e-05, "loss": 9.7027, "mean_token_accuracy": 0.8244458321481943, "num_tokens": 16899120.0, "step": 2540 }, { "entropy": 0.617455589864403, "epoch": 1.1348689701218495, "grad_norm": 45.5, "learning_rate": 1.6557777777777778e-05, "loss": 9.9557, "mean_token_accuracy": 0.8236034225672484, "num_tokens": 16964702.0, "step": 2550 }, { "entropy": 0.627051792666316, "epoch": 1.139320091247983, "grad_norm": 56.5, "learning_rate": 1.6535555555555556e-05, "loss": 10.1063, "mean_token_accuracy": 0.8185560010373593, "num_tokens": 17028582.0, "step": 2560 }, { "entropy": 0.6038470237515867, "epoch": 1.1437712123741166, "grad_norm": 40.5, "learning_rate": 1.6513333333333333e-05, "loss": 9.6457, "mean_token_accuracy": 0.8267611656337976, "num_tokens": 17094349.0, "step": 2570 }, { "entropy": 0.6520879605785013, "epoch": 1.1482223335002504, "grad_norm": 52.75, "learning_rate": 1.6491111111111114e-05, "loss": 10.5436, "mean_token_accuracy": 0.8138282421976328, "num_tokens": 17161099.0, "step": 2580 }, { "entropy": 0.6288375724107027, "epoch": 1.152673454626384, "grad_norm": 43.5, "learning_rate": 1.646888888888889e-05, "loss": 9.9975, "mean_token_accuracy": 0.8210668105632066, "num_tokens": 17226592.0, "step": 2590 }, { "entropy": 0.6409570111893117, "epoch": 1.1571245757525177, "grad_norm": 39.75, "learning_rate": 1.644666666666667e-05, "loss": 10.4495, "mean_token_accuracy": 0.812485882639885, "num_tokens": 17293076.0, "step": 2600 }, { "epoch": 1.1571245757525177, "eval_biology_entropy": 1.0119045050740243, "eval_biology_loss": 1.3503239154815674, "eval_biology_mean_token_accuracy": 0.679217532992363, "eval_biology_num_tokens": 17293076.0, "eval_biology_runtime": 39.781, "eval_biology_samples_per_second": 12.569, "eval_biology_steps_per_second": 12.569, "step": 2600 }, { "epoch": 1.1571245757525177, "eval_chemistry_entropy": 0.6560586480796338, "eval_chemistry_loss": 0.7683631777763367, "eval_chemistry_mean_token_accuracy": 0.7914816659092904, "eval_chemistry_num_tokens": 17293076.0, "eval_chemistry_runtime": 45.9922, "eval_chemistry_samples_per_second": 10.871, "eval_chemistry_steps_per_second": 10.871, "step": 2600 }, { "entropy": 0.6387675725854933, "epoch": 1.1615756968786513, "grad_norm": 49.25, "learning_rate": 1.6424444444444444e-05, "loss": 10.4108, "mean_token_accuracy": 0.8148495044559241, "num_tokens": 17360241.0, "step": 2610 }, { "entropy": 0.6907255406491458, "epoch": 1.166026818004785, "grad_norm": 51.5, "learning_rate": 1.6402222222222225e-05, "loss": 10.8962, "mean_token_accuracy": 0.8070711381733418, "num_tokens": 17424840.0, "step": 2620 }, { "entropy": 0.6278068142943084, "epoch": 1.1704779391309186, "grad_norm": 43.25, "learning_rate": 1.638e-05, "loss": 10.2459, "mean_token_accuracy": 0.8145625352859497, "num_tokens": 17493054.0, "step": 2630 }, { "entropy": 0.6715679660439491, "epoch": 1.1749290602570523, "grad_norm": 50.75, "learning_rate": 1.635777777777778e-05, "loss": 10.5983, "mean_token_accuracy": 0.8121216475963593, "num_tokens": 17557844.0, "step": 2640 }, { "entropy": 0.6241919027641416, "epoch": 1.179380181383186, "grad_norm": 47.25, "learning_rate": 1.6335555555555558e-05, "loss": 10.2909, "mean_token_accuracy": 0.8178541362285614, "num_tokens": 17627019.0, "step": 2650 }, { "entropy": 0.6669848646968604, "epoch": 1.1838313025093194, "grad_norm": 38.5, "learning_rate": 1.6313333333333335e-05, "loss": 10.5679, "mean_token_accuracy": 0.8156964424997568, "num_tokens": 17692635.0, "step": 2660 }, { "entropy": 0.6501018116250634, "epoch": 1.1882824236354532, "grad_norm": 33.5, "learning_rate": 1.6291111111111113e-05, "loss": 10.3085, "mean_token_accuracy": 0.8137432295829058, "num_tokens": 17760815.0, "step": 2670 }, { "entropy": 0.6186877446249127, "epoch": 1.1927335447615868, "grad_norm": 45.75, "learning_rate": 1.626888888888889e-05, "loss": 10.2952, "mean_token_accuracy": 0.8170873422175646, "num_tokens": 17826556.0, "step": 2680 }, { "entropy": 0.6414951920509339, "epoch": 1.1971846658877205, "grad_norm": 39.5, "learning_rate": 1.6246666666666668e-05, "loss": 10.042, "mean_token_accuracy": 0.8235870473086834, "num_tokens": 17890493.0, "step": 2690 }, { "entropy": 0.596523373760283, "epoch": 1.201635787013854, "grad_norm": 50.75, "learning_rate": 1.6224444444444446e-05, "loss": 9.6696, "mean_token_accuracy": 0.824431487172842, "num_tokens": 17955924.0, "step": 2700 }, { "epoch": 1.201635787013854, "eval_biology_entropy": 1.0259322304725647, "eval_biology_loss": 1.3572481870651245, "eval_biology_mean_token_accuracy": 0.6786714745163918, "eval_biology_num_tokens": 17955924.0, "eval_biology_runtime": 40.8952, "eval_biology_samples_per_second": 12.226, "eval_biology_steps_per_second": 12.226, "step": 2700 }, { "epoch": 1.201635787013854, "eval_chemistry_entropy": 0.6431379403471946, "eval_chemistry_loss": 0.7709676027297974, "eval_chemistry_mean_token_accuracy": 0.7921776163578034, "eval_chemistry_num_tokens": 17955924.0, "eval_chemistry_runtime": 46.6921, "eval_chemistry_samples_per_second": 10.708, "eval_chemistry_steps_per_second": 10.708, "step": 2700 }, { "entropy": 0.6200942347757519, "epoch": 1.2060869081399876, "grad_norm": 47.0, "learning_rate": 1.6202222222222223e-05, "loss": 9.7551, "mean_token_accuracy": 0.8228203389793635, "num_tokens": 18024547.0, "step": 2710 }, { "entropy": 0.6334728031419218, "epoch": 1.2105380292661214, "grad_norm": 46.75, "learning_rate": 1.618e-05, "loss": 10.2127, "mean_token_accuracy": 0.818845646083355, "num_tokens": 18093886.0, "step": 2720 }, { "entropy": 0.6153763468377292, "epoch": 1.214989150392255, "grad_norm": 65.5, "learning_rate": 1.615777777777778e-05, "loss": 10.1314, "mean_token_accuracy": 0.819073748216033, "num_tokens": 18166010.0, "step": 2730 }, { "entropy": 0.6560159401036799, "epoch": 1.2194402715183887, "grad_norm": 44.5, "learning_rate": 1.6135555555555556e-05, "loss": 10.1563, "mean_token_accuracy": 0.8201773680746556, "num_tokens": 18236214.0, "step": 2740 }, { "entropy": 0.596173535194248, "epoch": 1.2238913926445223, "grad_norm": 38.25, "learning_rate": 1.6113333333333334e-05, "loss": 9.8183, "mean_token_accuracy": 0.8231377601623535, "num_tokens": 18300911.0, "step": 2750 }, { "entropy": 0.6799774877727032, "epoch": 1.228342513770656, "grad_norm": 50.75, "learning_rate": 1.609111111111111e-05, "loss": 10.8835, "mean_token_accuracy": 0.8077580634504556, "num_tokens": 18361378.0, "step": 2760 }, { "entropy": 0.6651931522414088, "epoch": 1.2327936348967896, "grad_norm": 46.25, "learning_rate": 1.606888888888889e-05, "loss": 10.686, "mean_token_accuracy": 0.8114044293761253, "num_tokens": 18426528.0, "step": 2770 }, { "entropy": 0.5847625308670104, "epoch": 1.2372447560229234, "grad_norm": 42.25, "learning_rate": 1.6046666666666667e-05, "loss": 9.3837, "mean_token_accuracy": 0.8274062320590019, "num_tokens": 18495853.0, "step": 2780 }, { "entropy": 0.6139227262698114, "epoch": 1.241695877149057, "grad_norm": 42.0, "learning_rate": 1.6024444444444444e-05, "loss": 9.9497, "mean_token_accuracy": 0.819492531567812, "num_tokens": 18561871.0, "step": 2790 }, { "entropy": 0.6643421296030283, "epoch": 1.2461469982751905, "grad_norm": 44.25, "learning_rate": 1.6002222222222222e-05, "loss": 10.6891, "mean_token_accuracy": 0.8144296679645777, "num_tokens": 18632874.0, "step": 2800 }, { "epoch": 1.2461469982751905, "eval_biology_entropy": 1.0522388898134232, "eval_biology_loss": 1.3451586961746216, "eval_biology_mean_token_accuracy": 0.6789873982667923, "eval_biology_num_tokens": 18632874.0, "eval_biology_runtime": 39.9327, "eval_biology_samples_per_second": 12.521, "eval_biology_steps_per_second": 12.521, "step": 2800 }, { "epoch": 1.2461469982751905, "eval_chemistry_entropy": 0.6808124091923237, "eval_chemistry_loss": 0.7618144750595093, "eval_chemistry_mean_token_accuracy": 0.7930310650467872, "eval_chemistry_num_tokens": 18632874.0, "eval_chemistry_runtime": 48.6811, "eval_chemistry_samples_per_second": 10.271, "eval_chemistry_steps_per_second": 10.271, "step": 2800 }, { "entropy": 0.6313241773284972, "epoch": 1.2505981194013243, "grad_norm": 43.0, "learning_rate": 1.5980000000000003e-05, "loss": 10.171, "mean_token_accuracy": 0.8152998108416796, "num_tokens": 18705033.0, "step": 2810 }, { "entropy": 0.6695272358134389, "epoch": 1.2550492405274578, "grad_norm": 36.5, "learning_rate": 1.5957777777777777e-05, "loss": 10.6232, "mean_token_accuracy": 0.8120121419429779, "num_tokens": 18775027.0, "step": 2820 }, { "entropy": 0.6264405744150281, "epoch": 1.2595003616535916, "grad_norm": 50.5, "learning_rate": 1.5935555555555558e-05, "loss": 10.0142, "mean_token_accuracy": 0.820975198969245, "num_tokens": 18840707.0, "step": 2830 }, { "entropy": 0.6146207214333117, "epoch": 1.2639514827797251, "grad_norm": 39.25, "learning_rate": 1.5913333333333332e-05, "loss": 9.9342, "mean_token_accuracy": 0.8204426843672991, "num_tokens": 18907324.0, "step": 2840 }, { "entropy": 0.627062719874084, "epoch": 1.2684026039058587, "grad_norm": 36.75, "learning_rate": 1.5891111111111113e-05, "loss": 10.1149, "mean_token_accuracy": 0.8164357714354992, "num_tokens": 18980315.0, "step": 2850 }, { "entropy": 0.6521528273820877, "epoch": 1.2728537250319925, "grad_norm": 41.5, "learning_rate": 1.5868888888888888e-05, "loss": 10.7732, "mean_token_accuracy": 0.808639282360673, "num_tokens": 19047920.0, "step": 2860 }, { "entropy": 0.6662443988956511, "epoch": 1.277304846158126, "grad_norm": 36.25, "learning_rate": 1.584666666666667e-05, "loss": 10.3985, "mean_token_accuracy": 0.8146307349205018, "num_tokens": 19116973.0, "step": 2870 }, { "entropy": 0.6255686291493475, "epoch": 1.2817559672842598, "grad_norm": 38.25, "learning_rate": 1.5824444444444446e-05, "loss": 9.9838, "mean_token_accuracy": 0.8214154217392207, "num_tokens": 19180813.0, "step": 2880 }, { "entropy": 0.6181805668398738, "epoch": 1.2862070884103933, "grad_norm": 39.25, "learning_rate": 1.5802222222222224e-05, "loss": 10.0245, "mean_token_accuracy": 0.8187602888792753, "num_tokens": 19257345.0, "step": 2890 }, { "entropy": 0.6569024728611111, "epoch": 1.2906582095365269, "grad_norm": 45.5, "learning_rate": 1.578e-05, "loss": 10.6273, "mean_token_accuracy": 0.8127008739858865, "num_tokens": 19324641.0, "step": 2900 }, { "epoch": 1.2906582095365269, "eval_biology_entropy": 0.9905156311392784, "eval_biology_loss": 1.360024333000183, "eval_biology_mean_token_accuracy": 0.6785246793031693, "eval_biology_num_tokens": 19324641.0, "eval_biology_runtime": 39.8474, "eval_biology_samples_per_second": 12.548, "eval_biology_steps_per_second": 12.548, "step": 2900 }, { "epoch": 1.2906582095365269, "eval_chemistry_entropy": 0.653872075021267, "eval_chemistry_loss": 0.7638129591941833, "eval_chemistry_mean_token_accuracy": 0.7927864454388619, "eval_chemistry_num_tokens": 19324641.0, "eval_chemistry_runtime": 46.0595, "eval_chemistry_samples_per_second": 10.856, "eval_chemistry_steps_per_second": 10.856, "step": 2900 }, { "entropy": 0.6221754123456776, "epoch": 1.2951093306626607, "grad_norm": 43.25, "learning_rate": 1.575777777777778e-05, "loss": 9.9379, "mean_token_accuracy": 0.8217662025243044, "num_tokens": 19391043.0, "step": 2910 }, { "entropy": 0.5952364468015731, "epoch": 1.2995604517887944, "grad_norm": 47.5, "learning_rate": 1.5735555555555557e-05, "loss": 9.5499, "mean_token_accuracy": 0.8289531849324703, "num_tokens": 19461093.0, "step": 2920 }, { "entropy": 0.5797039135359228, "epoch": 1.304011572914928, "grad_norm": 47.0, "learning_rate": 1.5713333333333334e-05, "loss": 9.4762, "mean_token_accuracy": 0.8289642054587603, "num_tokens": 19527736.0, "step": 2930 }, { "entropy": 0.6462538072839379, "epoch": 1.3084626940410615, "grad_norm": 38.5, "learning_rate": 1.5691111111111112e-05, "loss": 10.1212, "mean_token_accuracy": 0.818146052211523, "num_tokens": 19595645.0, "step": 2940 }, { "entropy": 0.627075092215091, "epoch": 1.3129138151671953, "grad_norm": 36.0, "learning_rate": 1.5668888888888893e-05, "loss": 10.2374, "mean_token_accuracy": 0.8173820059746504, "num_tokens": 19660322.0, "step": 2950 }, { "entropy": 0.6532369766384363, "epoch": 1.3173649362933288, "grad_norm": 46.75, "learning_rate": 1.5646666666666667e-05, "loss": 10.5273, "mean_token_accuracy": 0.8114755034446717, "num_tokens": 19725399.0, "step": 2960 }, { "entropy": 0.6409816164523363, "epoch": 1.3218160574194626, "grad_norm": 49.5, "learning_rate": 1.5624444444444448e-05, "loss": 10.2742, "mean_token_accuracy": 0.8138007991015911, "num_tokens": 19791328.0, "step": 2970 }, { "entropy": 0.6157345019280911, "epoch": 1.3262671785455962, "grad_norm": 44.75, "learning_rate": 1.5602222222222222e-05, "loss": 9.7536, "mean_token_accuracy": 0.8266748197376728, "num_tokens": 19857430.0, "step": 2980 }, { "entropy": 0.6039329887367785, "epoch": 1.3307182996717297, "grad_norm": 48.5, "learning_rate": 1.5580000000000003e-05, "loss": 9.5546, "mean_token_accuracy": 0.8262176886200905, "num_tokens": 19922864.0, "step": 2990 }, { "entropy": 0.6228530476801097, "epoch": 1.3351694207978635, "grad_norm": 43.0, "learning_rate": 1.5557777777777778e-05, "loss": 10.3136, "mean_token_accuracy": 0.8165966145694256, "num_tokens": 19988673.0, "step": 3000 }, { "epoch": 1.3351694207978635, "eval_biology_entropy": 0.9787815891504288, "eval_biology_loss": 1.3644609451293945, "eval_biology_mean_token_accuracy": 0.6780717136263847, "eval_biology_num_tokens": 19988673.0, "eval_biology_runtime": 47.1618, "eval_biology_samples_per_second": 10.602, "eval_biology_steps_per_second": 10.602, "step": 3000 }, { "epoch": 1.3351694207978635, "eval_chemistry_entropy": 0.6456027861535549, "eval_chemistry_loss": 0.7624587416648865, "eval_chemistry_mean_token_accuracy": 0.7938998826146125, "eval_chemistry_num_tokens": 19988673.0, "eval_chemistry_runtime": 46.3754, "eval_chemistry_samples_per_second": 10.782, "eval_chemistry_steps_per_second": 10.782, "step": 3000 }, { "entropy": 0.637952480930835, "epoch": 1.339620541923997, "grad_norm": 54.5, "learning_rate": 1.553555555555556e-05, "loss": 10.3024, "mean_token_accuracy": 0.8173649627715349, "num_tokens": 20054087.0, "step": 3010 }, { "entropy": 0.628881346154958, "epoch": 1.3440716630501308, "grad_norm": 48.0, "learning_rate": 1.5513333333333333e-05, "loss": 10.0117, "mean_token_accuracy": 0.8185407467186451, "num_tokens": 20117738.0, "step": 3020 }, { "entropy": 0.6190263425931335, "epoch": 1.3485227841762644, "grad_norm": 32.25, "learning_rate": 1.5491111111111114e-05, "loss": 10.0847, "mean_token_accuracy": 0.8230381075292825, "num_tokens": 20180919.0, "step": 3030 }, { "entropy": 0.6395858994685113, "epoch": 1.352973905302398, "grad_norm": 43.5, "learning_rate": 1.546888888888889e-05, "loss": 10.187, "mean_token_accuracy": 0.8200148697942495, "num_tokens": 20248890.0, "step": 3040 }, { "entropy": 0.6512627801857889, "epoch": 1.3574250264285317, "grad_norm": 61.25, "learning_rate": 1.544666666666667e-05, "loss": 10.5447, "mean_token_accuracy": 0.8131675466895103, "num_tokens": 20312039.0, "step": 3050 }, { "entropy": 0.5914942998439073, "epoch": 1.3618761475546655, "grad_norm": 42.0, "learning_rate": 1.5424444444444447e-05, "loss": 9.4727, "mean_token_accuracy": 0.8284438151866198, "num_tokens": 20379865.0, "step": 3060 }, { "entropy": 0.6345633203163743, "epoch": 1.366327268680799, "grad_norm": 40.5, "learning_rate": 1.5402222222222224e-05, "loss": 10.2457, "mean_token_accuracy": 0.8185637548565865, "num_tokens": 20450098.0, "step": 3070 }, { "entropy": 0.628783920686692, "epoch": 1.3707783898069326, "grad_norm": 48.0, "learning_rate": 1.5380000000000002e-05, "loss": 9.9781, "mean_token_accuracy": 0.821869732439518, "num_tokens": 20515713.0, "step": 3080 }, { "entropy": 0.6853339564055204, "epoch": 1.3752295109330663, "grad_norm": 40.75, "learning_rate": 1.535777777777778e-05, "loss": 11.2465, "mean_token_accuracy": 0.8053449187427759, "num_tokens": 20577338.0, "step": 3090 }, { "entropy": 0.6173176297917962, "epoch": 1.3796806320591999, "grad_norm": 41.75, "learning_rate": 1.5335555555555557e-05, "loss": 9.7962, "mean_token_accuracy": 0.8228179760277271, "num_tokens": 20640374.0, "step": 3100 }, { "epoch": 1.3796806320591999, "eval_biology_entropy": 0.9570407208204269, "eval_biology_loss": 1.3728957176208496, "eval_biology_mean_token_accuracy": 0.6790110827684402, "eval_biology_num_tokens": 20640374.0, "eval_biology_runtime": 39.5831, "eval_biology_samples_per_second": 12.632, "eval_biology_steps_per_second": 12.632, "step": 3100 }, { "epoch": 1.3796806320591999, "eval_chemistry_entropy": 0.61797180467844, "eval_chemistry_loss": 0.7640883326530457, "eval_chemistry_mean_token_accuracy": 0.7947017440795898, "eval_chemistry_num_tokens": 20640374.0, "eval_chemistry_runtime": 54.027, "eval_chemistry_samples_per_second": 9.255, "eval_chemistry_steps_per_second": 9.255, "step": 3100 }, { "entropy": 0.628461142629385, "epoch": 1.3841317531853337, "grad_norm": 48.0, "learning_rate": 1.5313333333333335e-05, "loss": 10.1218, "mean_token_accuracy": 0.8193665962666273, "num_tokens": 20701599.0, "step": 3110 }, { "entropy": 0.644066066481173, "epoch": 1.3885828743114672, "grad_norm": 44.75, "learning_rate": 1.5291111111111112e-05, "loss": 10.3834, "mean_token_accuracy": 0.8160424407571554, "num_tokens": 20769790.0, "step": 3120 }, { "entropy": 0.6239330711774528, "epoch": 1.3930339954376008, "grad_norm": 32.5, "learning_rate": 1.526888888888889e-05, "loss": 10.0974, "mean_token_accuracy": 0.8212814599275589, "num_tokens": 20837901.0, "step": 3130 }, { "entropy": 0.6491018484346569, "epoch": 1.3974851165637345, "grad_norm": 44.75, "learning_rate": 1.5246666666666668e-05, "loss": 10.3364, "mean_token_accuracy": 0.8171543031930923, "num_tokens": 20900772.0, "step": 3140 }, { "entropy": 0.6686932277865708, "epoch": 1.401936237689868, "grad_norm": 47.75, "learning_rate": 1.5224444444444447e-05, "loss": 10.851, "mean_token_accuracy": 0.8075783431529999, "num_tokens": 20964368.0, "step": 3150 }, { "entropy": 0.6579232438467443, "epoch": 1.4063873588160019, "grad_norm": 39.5, "learning_rate": 1.5202222222222223e-05, "loss": 10.4135, "mean_token_accuracy": 0.8155898574739695, "num_tokens": 21032357.0, "step": 3160 }, { "entropy": 0.6374450953677296, "epoch": 1.4108384799421354, "grad_norm": 41.5, "learning_rate": 1.5180000000000002e-05, "loss": 10.5654, "mean_token_accuracy": 0.8148523326963186, "num_tokens": 21097884.0, "step": 3170 }, { "entropy": 0.6463477646932005, "epoch": 1.415289601068269, "grad_norm": 46.75, "learning_rate": 1.5157777777777778e-05, "loss": 10.2428, "mean_token_accuracy": 0.8172158479690552, "num_tokens": 21162515.0, "step": 3180 }, { "entropy": 0.5953550837934017, "epoch": 1.4197407221944027, "grad_norm": 47.5, "learning_rate": 1.5135555555555557e-05, "loss": 9.6312, "mean_token_accuracy": 0.8280521262437105, "num_tokens": 21230408.0, "step": 3190 }, { "entropy": 0.6325198461301624, "epoch": 1.4241918433205363, "grad_norm": 37.25, "learning_rate": 1.5113333333333335e-05, "loss": 10.1936, "mean_token_accuracy": 0.8156390845775604, "num_tokens": 21299050.0, "step": 3200 }, { "epoch": 1.4241918433205363, "eval_biology_entropy": 1.033722943663597, "eval_biology_loss": 1.3449995517730713, "eval_biology_mean_token_accuracy": 0.6797993869781495, "eval_biology_num_tokens": 21299050.0, "eval_biology_runtime": 39.1453, "eval_biology_samples_per_second": 12.773, "eval_biology_steps_per_second": 12.773, "step": 3200 }, { "epoch": 1.4241918433205363, "eval_chemistry_entropy": 0.6644897412657738, "eval_chemistry_loss": 0.7532535791397095, "eval_chemistry_mean_token_accuracy": 0.7947876628637314, "eval_chemistry_num_tokens": 21299050.0, "eval_chemistry_runtime": 45.7557, "eval_chemistry_samples_per_second": 10.928, "eval_chemistry_steps_per_second": 10.928, "step": 3200 }, { "entropy": 0.6288160899654031, "epoch": 1.42864296444667, "grad_norm": 62.5, "learning_rate": 1.5091111111111113e-05, "loss": 10.1249, "mean_token_accuracy": 0.816605107858777, "num_tokens": 21366237.0, "step": 3210 }, { "entropy": 0.6372722125612199, "epoch": 1.4330940855728036, "grad_norm": 36.0, "learning_rate": 1.506888888888889e-05, "loss": 10.3142, "mean_token_accuracy": 0.8178324706852436, "num_tokens": 21436423.0, "step": 3220 }, { "entropy": 0.6209446837194263, "epoch": 1.4375452066989374, "grad_norm": 39.0, "learning_rate": 1.5046666666666668e-05, "loss": 10.0209, "mean_token_accuracy": 0.819311347976327, "num_tokens": 21504658.0, "step": 3230 }, { "entropy": 0.6337612668983639, "epoch": 1.441996327825071, "grad_norm": 45.25, "learning_rate": 1.5024444444444445e-05, "loss": 10.228, "mean_token_accuracy": 0.8179344519972801, "num_tokens": 21569748.0, "step": 3240 }, { "entropy": 0.6490779631771147, "epoch": 1.4464474489512047, "grad_norm": 41.5, "learning_rate": 1.5002222222222223e-05, "loss": 10.3288, "mean_token_accuracy": 0.817498742416501, "num_tokens": 21630649.0, "step": 3250 }, { "entropy": 0.666816140897572, "epoch": 1.4508985700773382, "grad_norm": 53.5, "learning_rate": 1.498e-05, "loss": 10.8882, "mean_token_accuracy": 0.8089791681617499, "num_tokens": 21694065.0, "step": 3260 }, { "entropy": 0.5993673953227698, "epoch": 1.4553496912034718, "grad_norm": 44.25, "learning_rate": 1.495777777777778e-05, "loss": 9.6729, "mean_token_accuracy": 0.8277637314051389, "num_tokens": 21758494.0, "step": 3270 }, { "entropy": 0.6408176301978529, "epoch": 1.4598008123296056, "grad_norm": 59.5, "learning_rate": 1.4935555555555556e-05, "loss": 10.0971, "mean_token_accuracy": 0.8220336116850376, "num_tokens": 21822217.0, "step": 3280 }, { "entropy": 0.6017687612213194, "epoch": 1.4642519334557391, "grad_norm": 52.25, "learning_rate": 1.4913333333333335e-05, "loss": 9.8479, "mean_token_accuracy": 0.82330856397748, "num_tokens": 21887417.0, "step": 3290 }, { "entropy": 0.6159314071759582, "epoch": 1.468703054581873, "grad_norm": 42.0, "learning_rate": 1.4891111111111111e-05, "loss": 9.9203, "mean_token_accuracy": 0.8198327627032995, "num_tokens": 21954784.0, "step": 3300 }, { "epoch": 1.468703054581873, "eval_biology_entropy": 0.9997066405415534, "eval_biology_loss": 1.357595682144165, "eval_biology_mean_token_accuracy": 0.6786647012233734, "eval_biology_num_tokens": 21954784.0, "eval_biology_runtime": 39.4705, "eval_biology_samples_per_second": 12.668, "eval_biology_steps_per_second": 12.668, "step": 3300 }, { "epoch": 1.468703054581873, "eval_chemistry_entropy": 0.6462086058557034, "eval_chemistry_loss": 0.7550687193870544, "eval_chemistry_mean_token_accuracy": 0.7956333946585655, "eval_chemistry_num_tokens": 21954784.0, "eval_chemistry_runtime": 46.0448, "eval_chemistry_samples_per_second": 10.859, "eval_chemistry_steps_per_second": 10.859, "step": 3300 }, { "entropy": 0.655993225146085, "epoch": 1.4731541757080064, "grad_norm": 45.0, "learning_rate": 1.486888888888889e-05, "loss": 10.7125, "mean_token_accuracy": 0.8117992129176855, "num_tokens": 22022454.0, "step": 3310 }, { "entropy": 0.647513292171061, "epoch": 1.47760529683414, "grad_norm": 35.25, "learning_rate": 1.4846666666666666e-05, "loss": 10.201, "mean_token_accuracy": 0.8165667839348316, "num_tokens": 22090334.0, "step": 3320 }, { "entropy": 0.658707937411964, "epoch": 1.4820564179602738, "grad_norm": 45.75, "learning_rate": 1.4824444444444446e-05, "loss": 10.8132, "mean_token_accuracy": 0.8106453076004982, "num_tokens": 22157686.0, "step": 3330 }, { "entropy": 0.6491913768462837, "epoch": 1.4865075390864073, "grad_norm": 46.25, "learning_rate": 1.4802222222222225e-05, "loss": 10.1864, "mean_token_accuracy": 0.8190740462392568, "num_tokens": 22223473.0, "step": 3340 }, { "entropy": 0.5992923174053431, "epoch": 1.490958660212541, "grad_norm": 41.75, "learning_rate": 1.478e-05, "loss": 9.8765, "mean_token_accuracy": 0.8225152909755706, "num_tokens": 22289965.0, "step": 3350 }, { "entropy": 0.6485524808987975, "epoch": 1.4954097813386746, "grad_norm": 50.25, "learning_rate": 1.475777777777778e-05, "loss": 10.5707, "mean_token_accuracy": 0.8128155149519444, "num_tokens": 22357538.0, "step": 3360 }, { "entropy": 0.6507280296646059, "epoch": 1.4998609024648082, "grad_norm": 48.25, "learning_rate": 1.4735555555555556e-05, "loss": 10.4263, "mean_token_accuracy": 0.8157397713512182, "num_tokens": 22425667.0, "step": 3370 }, { "entropy": 0.6263371775858104, "epoch": 1.504312023590942, "grad_norm": 33.5, "learning_rate": 1.4713333333333335e-05, "loss": 10.0768, "mean_token_accuracy": 0.8203147105872631, "num_tokens": 22493127.0, "step": 3380 }, { "entropy": 0.6092419126071036, "epoch": 1.5087631447170757, "grad_norm": 54.0, "learning_rate": 1.4691111111111111e-05, "loss": 10.0084, "mean_token_accuracy": 0.8206193454563617, "num_tokens": 22556146.0, "step": 3390 }, { "entropy": 0.6266205563209951, "epoch": 1.5132142658432093, "grad_norm": 61.5, "learning_rate": 1.466888888888889e-05, "loss": 9.9577, "mean_token_accuracy": 0.8221445549279451, "num_tokens": 22620726.0, "step": 3400 }, { "epoch": 1.5132142658432093, "eval_biology_entropy": 0.9828049678802491, "eval_biology_loss": 1.367919921875, "eval_biology_mean_token_accuracy": 0.6777160669565201, "eval_biology_num_tokens": 22620726.0, "eval_biology_runtime": 40.6602, "eval_biology_samples_per_second": 12.297, "eval_biology_steps_per_second": 12.297, "step": 3400 }, { "epoch": 1.5132142658432093, "eval_chemistry_entropy": 0.6357372930645943, "eval_chemistry_loss": 0.7558674812316895, "eval_chemistry_mean_token_accuracy": 0.795305383682251, "eval_chemistry_num_tokens": 22620726.0, "eval_chemistry_runtime": 46.4387, "eval_chemistry_samples_per_second": 10.767, "eval_chemistry_steps_per_second": 10.767, "step": 3400 }, { "entropy": 0.6136901346035302, "epoch": 1.5176653869693428, "grad_norm": 36.5, "learning_rate": 1.4646666666666666e-05, "loss": 10.1771, "mean_token_accuracy": 0.8197428908199071, "num_tokens": 22692897.0, "step": 3410 }, { "entropy": 0.6342681768350303, "epoch": 1.5221165080954764, "grad_norm": 44.75, "learning_rate": 1.4624444444444446e-05, "loss": 9.8775, "mean_token_accuracy": 0.8207851707935333, "num_tokens": 22757148.0, "step": 3420 }, { "entropy": 0.6303547226823867, "epoch": 1.5265676292216102, "grad_norm": 44.0, "learning_rate": 1.4602222222222225e-05, "loss": 10.3339, "mean_token_accuracy": 0.8132365688681602, "num_tokens": 22821150.0, "step": 3430 }, { "entropy": 0.5872412131167948, "epoch": 1.531018750347744, "grad_norm": 39.25, "learning_rate": 1.4580000000000001e-05, "loss": 9.412, "mean_token_accuracy": 0.8288289237767458, "num_tokens": 22895186.0, "step": 3440 }, { "entropy": 0.6027078079991043, "epoch": 1.5354698714738775, "grad_norm": 49.0, "learning_rate": 1.455777777777778e-05, "loss": 9.5236, "mean_token_accuracy": 0.8259454619139432, "num_tokens": 22963108.0, "step": 3450 }, { "entropy": 0.6323892536573112, "epoch": 1.539920992600011, "grad_norm": 40.0, "learning_rate": 1.4535555555555556e-05, "loss": 10.2315, "mean_token_accuracy": 0.8185612123459578, "num_tokens": 23020006.0, "step": 3460 }, { "entropy": 0.6121256987564265, "epoch": 1.5443721137261448, "grad_norm": 47.0, "learning_rate": 1.4513333333333336e-05, "loss": 9.8335, "mean_token_accuracy": 0.8221838753670454, "num_tokens": 23084950.0, "step": 3470 }, { "entropy": 0.6412948790937663, "epoch": 1.5488232348522786, "grad_norm": 46.75, "learning_rate": 1.4491111111111111e-05, "loss": 10.0882, "mean_token_accuracy": 0.820423986017704, "num_tokens": 23145883.0, "step": 3480 }, { "entropy": 0.6132731148041785, "epoch": 1.5532743559784121, "grad_norm": 62.75, "learning_rate": 1.446888888888889e-05, "loss": 10.116, "mean_token_accuracy": 0.8188790610060096, "num_tokens": 23208622.0, "step": 3490 }, { "entropy": 0.6269778552465141, "epoch": 1.5577254771045457, "grad_norm": 53.0, "learning_rate": 1.4446666666666668e-05, "loss": 10.06, "mean_token_accuracy": 0.8190484814345836, "num_tokens": 23274053.0, "step": 3500 }, { "epoch": 1.5577254771045457, "eval_biology_entropy": 1.0056324085593225, "eval_biology_loss": 1.3652487993240356, "eval_biology_mean_token_accuracy": 0.6777867015600204, "eval_biology_num_tokens": 23274053.0, "eval_biology_runtime": 40.0262, "eval_biology_samples_per_second": 12.492, "eval_biology_steps_per_second": 12.492, "step": 3500 }, { "epoch": 1.5577254771045457, "eval_chemistry_entropy": 0.6524584084749222, "eval_chemistry_loss": 0.7518841028213501, "eval_chemistry_mean_token_accuracy": 0.7958220383524894, "eval_chemistry_num_tokens": 23274053.0, "eval_chemistry_runtime": 45.9609, "eval_chemistry_samples_per_second": 10.879, "eval_chemistry_steps_per_second": 10.879, "step": 3500 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.560626876730249e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }