ep1ux3vt / checkpoint-2000 /trainer_state.json
roonbug's picture
Upload folder using huggingface_hub
311028c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.2,
"eval_steps": 100,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 0.7168930960819125,
"epoch": 0.016,
"grad_norm": 73.5,
"learning_rate": 1.8e-07,
"loss": 15.0906,
"mean_token_accuracy": 0.7680471498519182,
"num_tokens": 280941.0,
"step": 10
},
{
"entropy": 0.741888347826898,
"epoch": 0.032,
"grad_norm": 78.0,
"learning_rate": 3.8e-07,
"loss": 15.4631,
"mean_token_accuracy": 0.7632880255579948,
"num_tokens": 558056.0,
"step": 20
},
{
"entropy": 0.7365243999287486,
"epoch": 0.048,
"grad_norm": 65.0,
"learning_rate": 5.800000000000001e-07,
"loss": 15.1017,
"mean_token_accuracy": 0.768239913508296,
"num_tokens": 836753.0,
"step": 30
},
{
"entropy": 0.7818019269034266,
"epoch": 0.064,
"grad_norm": 56.25,
"learning_rate": 7.8e-07,
"loss": 15.6995,
"mean_token_accuracy": 0.7602430328726768,
"num_tokens": 1126446.0,
"step": 40
},
{
"entropy": 0.8105136282742024,
"epoch": 0.08,
"grad_norm": 47.25,
"learning_rate": 9.800000000000001e-07,
"loss": 15.0321,
"mean_token_accuracy": 0.7664048440754414,
"num_tokens": 1413596.0,
"step": 50
},
{
"entropy": 0.8216251201927662,
"epoch": 0.096,
"grad_norm": 39.75,
"learning_rate": 1.1800000000000001e-06,
"loss": 14.6285,
"mean_token_accuracy": 0.7707466218620539,
"num_tokens": 1701193.0,
"step": 60
},
{
"entropy": 0.8549969043582678,
"epoch": 0.112,
"grad_norm": 39.0,
"learning_rate": 1.3800000000000001e-06,
"loss": 14.8679,
"mean_token_accuracy": 0.7673181220889091,
"num_tokens": 1979232.0,
"step": 70
},
{
"entropy": 0.8556341353803874,
"epoch": 0.128,
"grad_norm": 34.0,
"learning_rate": 1.5800000000000001e-06,
"loss": 14.6467,
"mean_token_accuracy": 0.7710235182195901,
"num_tokens": 2274177.0,
"step": 80
},
{
"entropy": 0.8478256281465292,
"epoch": 0.144,
"grad_norm": 31.0,
"learning_rate": 1.7800000000000001e-06,
"loss": 14.3261,
"mean_token_accuracy": 0.7732654966413974,
"num_tokens": 2548445.0,
"step": 90
},
{
"entropy": 0.8438362725079059,
"epoch": 0.16,
"grad_norm": 38.5,
"learning_rate": 1.98e-06,
"loss": 14.0318,
"mean_token_accuracy": 0.7773757755756379,
"num_tokens": 2824418.0,
"step": 100
},
{
"epoch": 0.16,
"eval_biology_entropy": 1.211377203464508,
"eval_biology_loss": 1.1644827127456665,
"eval_biology_mean_token_accuracy": 0.7046201548576355,
"eval_biology_num_tokens": 2824418.0,
"eval_biology_runtime": 20.5128,
"eval_biology_samples_per_second": 24.375,
"eval_biology_steps_per_second": 6.094,
"step": 100
},
{
"epoch": 0.16,
"eval_math_entropy": 0.875089626789093,
"eval_math_loss": 0.8965557217597961,
"eval_math_mean_token_accuracy": 0.7736486663818359,
"eval_math_num_tokens": 2824418.0,
"eval_math_runtime": 25.0963,
"eval_math_samples_per_second": 19.923,
"eval_math_steps_per_second": 4.981,
"step": 100
},
{
"entropy": 0.8555477414280176,
"epoch": 0.176,
"grad_norm": 47.75,
"learning_rate": 2.1800000000000003e-06,
"loss": 14.0356,
"mean_token_accuracy": 0.7775060940533877,
"num_tokens": 3110313.0,
"step": 110
},
{
"entropy": 0.8610258311033249,
"epoch": 0.192,
"grad_norm": 31.375,
"learning_rate": 2.38e-06,
"loss": 14.025,
"mean_token_accuracy": 0.7762446004897356,
"num_tokens": 3394170.0,
"step": 120
},
{
"entropy": 0.8509209487587214,
"epoch": 0.208,
"grad_norm": 30.0,
"learning_rate": 2.5800000000000003e-06,
"loss": 13.8558,
"mean_token_accuracy": 0.7793015491217374,
"num_tokens": 3673600.0,
"step": 130
},
{
"entropy": 0.8540813602507115,
"epoch": 0.224,
"grad_norm": 30.125,
"learning_rate": 2.7800000000000005e-06,
"loss": 13.8247,
"mean_token_accuracy": 0.7789989039301872,
"num_tokens": 3953732.0,
"step": 140
},
{
"entropy": 0.8311325689777732,
"epoch": 0.24,
"grad_norm": 30.25,
"learning_rate": 2.9800000000000003e-06,
"loss": 13.4688,
"mean_token_accuracy": 0.7846441507339478,
"num_tokens": 4243655.0,
"step": 150
},
{
"entropy": 0.8143093746155501,
"epoch": 0.256,
"grad_norm": 27.625,
"learning_rate": 3.1800000000000005e-06,
"loss": 13.0712,
"mean_token_accuracy": 0.7881167802959681,
"num_tokens": 4531471.0,
"step": 160
},
{
"entropy": 0.8313567344099283,
"epoch": 0.272,
"grad_norm": 24.0,
"learning_rate": 3.3800000000000007e-06,
"loss": 13.3645,
"mean_token_accuracy": 0.7852793108671904,
"num_tokens": 4810284.0,
"step": 170
},
{
"entropy": 0.8148340426385403,
"epoch": 0.288,
"grad_norm": 26.375,
"learning_rate": 3.58e-06,
"loss": 13.0548,
"mean_token_accuracy": 0.7882425185292959,
"num_tokens": 5095104.0,
"step": 180
},
{
"entropy": 0.8263534324243664,
"epoch": 0.304,
"grad_norm": 25.125,
"learning_rate": 3.7800000000000002e-06,
"loss": 13.2184,
"mean_token_accuracy": 0.7860081434249878,
"num_tokens": 5383732.0,
"step": 190
},
{
"entropy": 0.7958642322570085,
"epoch": 0.32,
"grad_norm": 23.75,
"learning_rate": 3.980000000000001e-06,
"loss": 12.7072,
"mean_token_accuracy": 0.7937722463160753,
"num_tokens": 5676334.0,
"step": 200
},
{
"epoch": 0.32,
"eval_biology_entropy": 1.1982407326698303,
"eval_biology_loss": 1.1808913946151733,
"eval_biology_mean_token_accuracy": 0.6998598065376281,
"eval_biology_num_tokens": 5676334.0,
"eval_biology_runtime": 19.7103,
"eval_biology_samples_per_second": 25.367,
"eval_biology_steps_per_second": 6.342,
"step": 200
},
{
"epoch": 0.32,
"eval_math_entropy": 0.8109114770889282,
"eval_math_loss": 0.822318971157074,
"eval_math_mean_token_accuracy": 0.7874419956207276,
"eval_math_num_tokens": 5676334.0,
"eval_math_runtime": 24.5627,
"eval_math_samples_per_second": 20.356,
"eval_math_steps_per_second": 5.089,
"step": 200
},
{
"entropy": 0.8209991015493869,
"epoch": 0.336,
"grad_norm": 23.375,
"learning_rate": 4.18e-06,
"loss": 13.1713,
"mean_token_accuracy": 0.78699039965868,
"num_tokens": 5958480.0,
"step": 210
},
{
"entropy": 0.7989038350060582,
"epoch": 0.352,
"grad_norm": 25.0,
"learning_rate": 4.38e-06,
"loss": 12.7482,
"mean_token_accuracy": 0.7930307753384114,
"num_tokens": 6242161.0,
"step": 220
},
{
"entropy": 0.7915343299508095,
"epoch": 0.368,
"grad_norm": 24.875,
"learning_rate": 4.58e-06,
"loss": 12.6757,
"mean_token_accuracy": 0.7925275303423405,
"num_tokens": 6523679.0,
"step": 230
},
{
"entropy": 0.7743825454264879,
"epoch": 0.384,
"grad_norm": 23.375,
"learning_rate": 4.78e-06,
"loss": 12.3704,
"mean_token_accuracy": 0.79692403934896,
"num_tokens": 6810978.0,
"step": 240
},
{
"entropy": 0.8141555316746235,
"epoch": 0.4,
"grad_norm": 24.125,
"learning_rate": 4.980000000000001e-06,
"loss": 13.0618,
"mean_token_accuracy": 0.7885617177933455,
"num_tokens": 7096903.0,
"step": 250
},
{
"entropy": 0.7800503006204963,
"epoch": 0.416,
"grad_norm": 22.25,
"learning_rate": 5.18e-06,
"loss": 12.498,
"mean_token_accuracy": 0.7941101636737585,
"num_tokens": 7377181.0,
"step": 260
},
{
"entropy": 0.7713520534336566,
"epoch": 0.432,
"grad_norm": 25.625,
"learning_rate": 5.380000000000001e-06,
"loss": 12.2429,
"mean_token_accuracy": 0.798516258224845,
"num_tokens": 7650523.0,
"step": 270
},
{
"entropy": 0.767449575662613,
"epoch": 0.448,
"grad_norm": 24.875,
"learning_rate": 5.580000000000001e-06,
"loss": 12.2843,
"mean_token_accuracy": 0.7973004225641489,
"num_tokens": 7936788.0,
"step": 280
},
{
"entropy": 0.7630951976403594,
"epoch": 0.464,
"grad_norm": 21.375,
"learning_rate": 5.78e-06,
"loss": 12.2689,
"mean_token_accuracy": 0.7984702557325363,
"num_tokens": 8223147.0,
"step": 290
},
{
"entropy": 0.7839587140828371,
"epoch": 0.48,
"grad_norm": 22.125,
"learning_rate": 5.98e-06,
"loss": 12.4483,
"mean_token_accuracy": 0.7962926685810089,
"num_tokens": 8506030.0,
"step": 300
},
{
"epoch": 0.48,
"eval_biology_entropy": 1.2097046246528627,
"eval_biology_loss": 1.1880755424499512,
"eval_biology_mean_token_accuracy": 0.6989095420837402,
"eval_biology_num_tokens": 8506030.0,
"eval_biology_runtime": 19.7532,
"eval_biology_samples_per_second": 25.312,
"eval_biology_steps_per_second": 6.328,
"step": 300
},
{
"epoch": 0.48,
"eval_math_entropy": 0.7799157240390777,
"eval_math_loss": 0.7781939506530762,
"eval_math_mean_token_accuracy": 0.7959079008102417,
"eval_math_num_tokens": 8506030.0,
"eval_math_runtime": 24.5905,
"eval_math_samples_per_second": 20.333,
"eval_math_steps_per_second": 5.083,
"step": 300
},
{
"entropy": 0.7540466286242008,
"epoch": 0.496,
"grad_norm": 24.375,
"learning_rate": 6.18e-06,
"loss": 12.0453,
"mean_token_accuracy": 0.8007366862148046,
"num_tokens": 8788726.0,
"step": 310
},
{
"entropy": 0.7493760107085109,
"epoch": 0.512,
"grad_norm": 24.0,
"learning_rate": 6.380000000000001e-06,
"loss": 11.9382,
"mean_token_accuracy": 0.8025602623820305,
"num_tokens": 9078039.0,
"step": 320
},
{
"entropy": 0.7366351887583733,
"epoch": 0.528,
"grad_norm": 23.0,
"learning_rate": 6.5800000000000005e-06,
"loss": 11.7509,
"mean_token_accuracy": 0.8059428248554468,
"num_tokens": 9373860.0,
"step": 330
},
{
"entropy": 0.7388057533651591,
"epoch": 0.544,
"grad_norm": 24.125,
"learning_rate": 6.780000000000001e-06,
"loss": 11.827,
"mean_token_accuracy": 0.8040345013141632,
"num_tokens": 9660940.0,
"step": 340
},
{
"entropy": 0.7601569008082152,
"epoch": 0.56,
"grad_norm": 26.625,
"learning_rate": 6.98e-06,
"loss": 12.1324,
"mean_token_accuracy": 0.797855831682682,
"num_tokens": 9932302.0,
"step": 350
},
{
"entropy": 0.7458819771185518,
"epoch": 0.576,
"grad_norm": 22.375,
"learning_rate": 7.180000000000001e-06,
"loss": 11.9451,
"mean_token_accuracy": 0.8013740532100201,
"num_tokens": 10215462.0,
"step": 360
},
{
"entropy": 0.7404385067522525,
"epoch": 0.592,
"grad_norm": 21.625,
"learning_rate": 7.3800000000000005e-06,
"loss": 11.8309,
"mean_token_accuracy": 0.802900119498372,
"num_tokens": 10504396.0,
"step": 370
},
{
"entropy": 0.7532710742205382,
"epoch": 0.608,
"grad_norm": 27.75,
"learning_rate": 7.58e-06,
"loss": 12.0281,
"mean_token_accuracy": 0.801684994623065,
"num_tokens": 10793126.0,
"step": 380
},
{
"entropy": 0.7331796364858747,
"epoch": 0.624,
"grad_norm": 21.25,
"learning_rate": 7.78e-06,
"loss": 11.697,
"mean_token_accuracy": 0.8045222193002701,
"num_tokens": 11081768.0,
"step": 390
},
{
"entropy": 0.7177777705714107,
"epoch": 0.64,
"grad_norm": 20.0,
"learning_rate": 7.980000000000002e-06,
"loss": 11.4977,
"mean_token_accuracy": 0.8074667323380709,
"num_tokens": 11370320.0,
"step": 400
},
{
"epoch": 0.64,
"eval_biology_entropy": 1.199187099456787,
"eval_biology_loss": 1.194938063621521,
"eval_biology_mean_token_accuracy": 0.6980597639083862,
"eval_biology_num_tokens": 11370320.0,
"eval_biology_runtime": 19.7518,
"eval_biology_samples_per_second": 25.314,
"eval_biology_steps_per_second": 6.329,
"step": 400
},
{
"epoch": 0.64,
"eval_math_entropy": 0.7331400663852692,
"eval_math_loss": 0.746539831161499,
"eval_math_mean_token_accuracy": 0.8015342946052552,
"eval_math_num_tokens": 11370320.0,
"eval_math_runtime": 24.6063,
"eval_math_samples_per_second": 20.32,
"eval_math_steps_per_second": 5.08,
"step": 400
},
{
"entropy": 0.7210552679374814,
"epoch": 0.656,
"grad_norm": 20.75,
"learning_rate": 8.18e-06,
"loss": 11.4447,
"mean_token_accuracy": 0.8082952104508877,
"num_tokens": 11657835.0,
"step": 410
},
{
"entropy": 0.7324020706117154,
"epoch": 0.672,
"grad_norm": 22.5,
"learning_rate": 8.380000000000001e-06,
"loss": 11.7039,
"mean_token_accuracy": 0.8040592070668936,
"num_tokens": 11949262.0,
"step": 420
},
{
"entropy": 0.7221599837765098,
"epoch": 0.688,
"grad_norm": 19.5,
"learning_rate": 8.580000000000001e-06,
"loss": 11.5232,
"mean_token_accuracy": 0.8065517093986273,
"num_tokens": 12227640.0,
"step": 430
},
{
"entropy": 0.7129955545067788,
"epoch": 0.704,
"grad_norm": 19.0,
"learning_rate": 8.78e-06,
"loss": 11.4179,
"mean_token_accuracy": 0.8087377645075321,
"num_tokens": 12516641.0,
"step": 440
},
{
"entropy": 0.7289297079667449,
"epoch": 0.72,
"grad_norm": 23.125,
"learning_rate": 8.98e-06,
"loss": 11.5855,
"mean_token_accuracy": 0.8067171189934015,
"num_tokens": 12793343.0,
"step": 450
},
{
"entropy": 0.7121220523491502,
"epoch": 0.736,
"grad_norm": 21.0,
"learning_rate": 9.180000000000002e-06,
"loss": 11.3422,
"mean_token_accuracy": 0.8094062607735395,
"num_tokens": 13077981.0,
"step": 460
},
{
"entropy": 0.6984126020222903,
"epoch": 0.752,
"grad_norm": 18.625,
"learning_rate": 9.38e-06,
"loss": 11.0848,
"mean_token_accuracy": 0.811941733583808,
"num_tokens": 13358957.0,
"step": 470
},
{
"entropy": 0.677340486086905,
"epoch": 0.768,
"grad_norm": 20.875,
"learning_rate": 9.58e-06,
"loss": 10.8164,
"mean_token_accuracy": 0.8181491158902645,
"num_tokens": 13653412.0,
"step": 480
},
{
"entropy": 0.717779103666544,
"epoch": 0.784,
"grad_norm": 20.875,
"learning_rate": 9.780000000000001e-06,
"loss": 11.5008,
"mean_token_accuracy": 0.8082873310893774,
"num_tokens": 13940856.0,
"step": 490
},
{
"entropy": 0.7195664433762431,
"epoch": 0.8,
"grad_norm": 18.625,
"learning_rate": 9.980000000000001e-06,
"loss": 11.4516,
"mean_token_accuracy": 0.8075944270938635,
"num_tokens": 14230754.0,
"step": 500
},
{
"epoch": 0.8,
"eval_biology_entropy": 1.2116008014678956,
"eval_biology_loss": 1.1978343725204468,
"eval_biology_mean_token_accuracy": 0.6975936050415039,
"eval_biology_num_tokens": 14230754.0,
"eval_biology_runtime": 19.7814,
"eval_biology_samples_per_second": 25.276,
"eval_biology_steps_per_second": 6.319,
"step": 500
},
{
"epoch": 0.8,
"eval_math_entropy": 0.7416743865013122,
"eval_math_loss": 0.7205922603607178,
"eval_math_mean_token_accuracy": 0.8071010875701904,
"eval_math_num_tokens": 14230754.0,
"eval_math_runtime": 24.6035,
"eval_math_samples_per_second": 20.322,
"eval_math_steps_per_second": 5.081,
"step": 500
},
{
"entropy": 0.7075521990656852,
"epoch": 0.816,
"grad_norm": 19.875,
"learning_rate": 1.018e-05,
"loss": 11.2548,
"mean_token_accuracy": 0.8095884408801794,
"num_tokens": 14519893.0,
"step": 510
},
{
"entropy": 0.6945433892309666,
"epoch": 0.832,
"grad_norm": 19.375,
"learning_rate": 1.038e-05,
"loss": 11.0849,
"mean_token_accuracy": 0.8136709745973348,
"num_tokens": 14805088.0,
"step": 520
},
{
"entropy": 0.7229658916592598,
"epoch": 0.848,
"grad_norm": 21.625,
"learning_rate": 1.0580000000000002e-05,
"loss": 11.4838,
"mean_token_accuracy": 0.8065179891884326,
"num_tokens": 15086306.0,
"step": 530
},
{
"entropy": 0.7076279081404209,
"epoch": 0.864,
"grad_norm": 18.25,
"learning_rate": 1.0780000000000002e-05,
"loss": 11.2928,
"mean_token_accuracy": 0.8091448776423931,
"num_tokens": 15370985.0,
"step": 540
},
{
"entropy": 0.7115106744691729,
"epoch": 0.88,
"grad_norm": 18.5,
"learning_rate": 1.0980000000000002e-05,
"loss": 11.3236,
"mean_token_accuracy": 0.8087493713945151,
"num_tokens": 15653836.0,
"step": 550
},
{
"entropy": 0.7007610065862536,
"epoch": 0.896,
"grad_norm": 19.25,
"learning_rate": 1.1180000000000001e-05,
"loss": 11.1858,
"mean_token_accuracy": 0.8117571648210287,
"num_tokens": 15932179.0,
"step": 560
},
{
"entropy": 0.689027976989746,
"epoch": 0.912,
"grad_norm": 19.0,
"learning_rate": 1.138e-05,
"loss": 11.0305,
"mean_token_accuracy": 0.8121056731790304,
"num_tokens": 16219842.0,
"step": 570
},
{
"entropy": 0.6829117186367512,
"epoch": 0.928,
"grad_norm": 17.75,
"learning_rate": 1.1580000000000001e-05,
"loss": 10.8991,
"mean_token_accuracy": 0.814700061455369,
"num_tokens": 16499842.0,
"step": 580
},
{
"entropy": 0.6976756127551198,
"epoch": 0.944,
"grad_norm": 33.5,
"learning_rate": 1.178e-05,
"loss": 11.1931,
"mean_token_accuracy": 0.8115118339657783,
"num_tokens": 16781882.0,
"step": 590
},
{
"entropy": 0.7033443799242377,
"epoch": 0.96,
"grad_norm": 16.75,
"learning_rate": 1.198e-05,
"loss": 11.1514,
"mean_token_accuracy": 0.8120843637734652,
"num_tokens": 17067407.0,
"step": 600
},
{
"epoch": 0.96,
"eval_biology_entropy": 1.186503161907196,
"eval_biology_loss": 1.2035058736801147,
"eval_biology_mean_token_accuracy": 0.6964959187507629,
"eval_biology_num_tokens": 17067407.0,
"eval_biology_runtime": 20.0132,
"eval_biology_samples_per_second": 24.983,
"eval_biology_steps_per_second": 6.246,
"step": 600
},
{
"epoch": 0.96,
"eval_math_entropy": 0.7170893518924714,
"eval_math_loss": 0.7009586095809937,
"eval_math_mean_token_accuracy": 0.8111450595855713,
"eval_math_num_tokens": 17067407.0,
"eval_math_runtime": 24.6137,
"eval_math_samples_per_second": 20.314,
"eval_math_steps_per_second": 5.078,
"step": 600
},
{
"entropy": 0.6821664813905954,
"epoch": 0.976,
"grad_norm": 22.0,
"learning_rate": 1.218e-05,
"loss": 10.9331,
"mean_token_accuracy": 0.8155146226286888,
"num_tokens": 17350994.0,
"step": 610
},
{
"entropy": 0.7061214720830321,
"epoch": 0.992,
"grad_norm": 21.375,
"learning_rate": 1.2380000000000002e-05,
"loss": 11.2138,
"mean_token_accuracy": 0.8097808599472046,
"num_tokens": 17637514.0,
"step": 620
},
{
"entropy": 0.688976364955306,
"epoch": 1.008,
"grad_norm": 18.75,
"learning_rate": 1.2580000000000002e-05,
"loss": 10.9803,
"mean_token_accuracy": 0.8123320799320937,
"num_tokens": 17926570.0,
"step": 630
},
{
"entropy": 0.66466862000525,
"epoch": 1.024,
"grad_norm": 17.625,
"learning_rate": 1.2780000000000001e-05,
"loss": 10.6644,
"mean_token_accuracy": 0.8173153955489397,
"num_tokens": 18207652.0,
"step": 640
},
{
"entropy": 0.6729175634682178,
"epoch": 1.04,
"grad_norm": 18.5,
"learning_rate": 1.2980000000000001e-05,
"loss": 10.5877,
"mean_token_accuracy": 0.8187220424413681,
"num_tokens": 18484931.0,
"step": 650
},
{
"entropy": 0.6688125738874078,
"epoch": 1.056,
"grad_norm": 30.0,
"learning_rate": 1.3180000000000001e-05,
"loss": 10.7513,
"mean_token_accuracy": 0.8168547667562962,
"num_tokens": 18773457.0,
"step": 660
},
{
"entropy": 0.677242561429739,
"epoch": 1.072,
"grad_norm": 21.0,
"learning_rate": 1.3380000000000002e-05,
"loss": 10.7823,
"mean_token_accuracy": 0.8151846762746573,
"num_tokens": 19055365.0,
"step": 670
},
{
"entropy": 0.6669268727302551,
"epoch": 1.088,
"grad_norm": 18.5,
"learning_rate": 1.3580000000000002e-05,
"loss": 10.6585,
"mean_token_accuracy": 0.8173960983753205,
"num_tokens": 19345730.0,
"step": 680
},
{
"entropy": 0.6672801608219743,
"epoch": 1.104,
"grad_norm": 16.5,
"learning_rate": 1.378e-05,
"loss": 10.6607,
"mean_token_accuracy": 0.8167301990091801,
"num_tokens": 19637390.0,
"step": 690
},
{
"entropy": 0.6486887495964766,
"epoch": 1.12,
"grad_norm": 17.375,
"learning_rate": 1.398e-05,
"loss": 10.3743,
"mean_token_accuracy": 0.8213667117059231,
"num_tokens": 19923914.0,
"step": 700
},
{
"epoch": 1.12,
"eval_biology_entropy": 1.1550931658744812,
"eval_biology_loss": 1.2134206295013428,
"eval_biology_mean_token_accuracy": 0.6951225929260254,
"eval_biology_num_tokens": 19923914.0,
"eval_biology_runtime": 19.789,
"eval_biology_samples_per_second": 25.267,
"eval_biology_steps_per_second": 6.317,
"step": 700
},
{
"epoch": 1.12,
"eval_math_entropy": 0.6779097893238067,
"eval_math_loss": 0.687623143196106,
"eval_math_mean_token_accuracy": 0.8132782921791076,
"eval_math_num_tokens": 19923914.0,
"eval_math_runtime": 24.5984,
"eval_math_samples_per_second": 20.327,
"eval_math_steps_per_second": 5.082,
"step": 700
},
{
"entropy": 0.6534717444330453,
"epoch": 1.1360000000000001,
"grad_norm": 18.625,
"learning_rate": 1.418e-05,
"loss": 10.4612,
"mean_token_accuracy": 0.8200784765183926,
"num_tokens": 20201892.0,
"step": 710
},
{
"entropy": 0.6626596201211215,
"epoch": 1.152,
"grad_norm": 17.375,
"learning_rate": 1.4380000000000001e-05,
"loss": 10.6065,
"mean_token_accuracy": 0.8195517498999834,
"num_tokens": 20490282.0,
"step": 720
},
{
"entropy": 0.6553794769570231,
"epoch": 1.168,
"grad_norm": 16.875,
"learning_rate": 1.4580000000000001e-05,
"loss": 10.4507,
"mean_token_accuracy": 0.8203363090753555,
"num_tokens": 20785786.0,
"step": 730
},
{
"entropy": 0.6596008328720927,
"epoch": 1.184,
"grad_norm": 16.75,
"learning_rate": 1.478e-05,
"loss": 10.4924,
"mean_token_accuracy": 0.8198904592543841,
"num_tokens": 21074205.0,
"step": 740
},
{
"entropy": 0.6442235017195344,
"epoch": 1.2,
"grad_norm": 17.75,
"learning_rate": 1.498e-05,
"loss": 10.3329,
"mean_token_accuracy": 0.8222478657960892,
"num_tokens": 21369159.0,
"step": 750
},
{
"entropy": 0.6584161130711437,
"epoch": 1.216,
"grad_norm": 17.75,
"learning_rate": 1.5180000000000002e-05,
"loss": 10.4952,
"mean_token_accuracy": 0.8194109592586756,
"num_tokens": 21649178.0,
"step": 760
},
{
"entropy": 0.6401060940697789,
"epoch": 1.232,
"grad_norm": 17.875,
"learning_rate": 1.5380000000000002e-05,
"loss": 10.2162,
"mean_token_accuracy": 0.8237158339470625,
"num_tokens": 21930239.0,
"step": 770
},
{
"entropy": 0.6497831366956234,
"epoch": 1.248,
"grad_norm": 17.375,
"learning_rate": 1.5580000000000003e-05,
"loss": 10.3038,
"mean_token_accuracy": 0.8218781109899282,
"num_tokens": 22216387.0,
"step": 780
},
{
"entropy": 0.6619962759315967,
"epoch": 1.264,
"grad_norm": 17.5,
"learning_rate": 1.578e-05,
"loss": 10.6292,
"mean_token_accuracy": 0.8176151167601347,
"num_tokens": 22501002.0,
"step": 790
},
{
"entropy": 0.6531268676742912,
"epoch": 1.28,
"grad_norm": 17.0,
"learning_rate": 1.5980000000000003e-05,
"loss": 10.4508,
"mean_token_accuracy": 0.8208171010017395,
"num_tokens": 22779682.0,
"step": 800
},
{
"epoch": 1.28,
"eval_biology_entropy": 1.195213288784027,
"eval_biology_loss": 1.2141798734664917,
"eval_biology_mean_token_accuracy": 0.6942030134201049,
"eval_biology_num_tokens": 22779682.0,
"eval_biology_runtime": 19.7104,
"eval_biology_samples_per_second": 25.367,
"eval_biology_steps_per_second": 6.342,
"step": 800
},
{
"epoch": 1.28,
"eval_math_entropy": 0.6831141312122345,
"eval_math_loss": 0.6743567585945129,
"eval_math_mean_token_accuracy": 0.8159358091354371,
"eval_math_num_tokens": 22779682.0,
"eval_math_runtime": 24.5582,
"eval_math_samples_per_second": 20.36,
"eval_math_steps_per_second": 5.09,
"step": 800
},
{
"entropy": 0.6429138701409102,
"epoch": 1.296,
"grad_norm": 17.125,
"learning_rate": 1.618e-05,
"loss": 10.2614,
"mean_token_accuracy": 0.8221869930624962,
"num_tokens": 23057744.0,
"step": 810
},
{
"entropy": 0.6462734818458558,
"epoch": 1.312,
"grad_norm": 20.75,
"learning_rate": 1.638e-05,
"loss": 10.4226,
"mean_token_accuracy": 0.8203614544123411,
"num_tokens": 23344644.0,
"step": 820
},
{
"entropy": 0.6614464454352855,
"epoch": 1.328,
"grad_norm": 17.75,
"learning_rate": 1.658e-05,
"loss": 10.5671,
"mean_token_accuracy": 0.8176218140870333,
"num_tokens": 23622405.0,
"step": 830
},
{
"entropy": 0.6431225946173071,
"epoch": 1.3439999999999999,
"grad_norm": 15.25,
"learning_rate": 1.6780000000000002e-05,
"loss": 10.2817,
"mean_token_accuracy": 0.8216937210410833,
"num_tokens": 23899771.0,
"step": 840
},
{
"entropy": 0.6422285752370953,
"epoch": 1.3599999999999999,
"grad_norm": 15.9375,
"learning_rate": 1.698e-05,
"loss": 10.2688,
"mean_token_accuracy": 0.8234549313783646,
"num_tokens": 24187023.0,
"step": 850
},
{
"entropy": 0.6847162164747715,
"epoch": 1.376,
"grad_norm": 17.25,
"learning_rate": 1.718e-05,
"loss": 10.9876,
"mean_token_accuracy": 0.81151960529387,
"num_tokens": 24466132.0,
"step": 860
},
{
"entropy": 0.6464430714026094,
"epoch": 1.392,
"grad_norm": 16.25,
"learning_rate": 1.7380000000000003e-05,
"loss": 10.3124,
"mean_token_accuracy": 0.821755214035511,
"num_tokens": 24748043.0,
"step": 870
},
{
"entropy": 0.6374656381085515,
"epoch": 1.408,
"grad_norm": 17.125,
"learning_rate": 1.758e-05,
"loss": 10.1759,
"mean_token_accuracy": 0.823258052393794,
"num_tokens": 25036674.0,
"step": 880
},
{
"entropy": 0.6316773502156139,
"epoch": 1.424,
"grad_norm": 16.375,
"learning_rate": 1.7780000000000003e-05,
"loss": 10.1508,
"mean_token_accuracy": 0.8241602942347527,
"num_tokens": 25324579.0,
"step": 890
},
{
"entropy": 0.6475198846310377,
"epoch": 1.44,
"grad_norm": 18.125,
"learning_rate": 1.798e-05,
"loss": 10.3668,
"mean_token_accuracy": 0.8207426533102989,
"num_tokens": 25606824.0,
"step": 900
},
{
"epoch": 1.44,
"eval_biology_entropy": 1.1267103943824768,
"eval_biology_loss": 1.223482370376587,
"eval_biology_mean_token_accuracy": 0.6937152419090271,
"eval_biology_num_tokens": 25606824.0,
"eval_biology_runtime": 19.7672,
"eval_biology_samples_per_second": 25.294,
"eval_biology_steps_per_second": 6.324,
"step": 900
},
{
"epoch": 1.44,
"eval_math_entropy": 0.65911474609375,
"eval_math_loss": 0.6649472713470459,
"eval_math_mean_token_accuracy": 0.8173848538398742,
"eval_math_num_tokens": 25606824.0,
"eval_math_runtime": 24.6125,
"eval_math_samples_per_second": 20.315,
"eval_math_steps_per_second": 5.079,
"step": 900
},
{
"entropy": 0.6390923649072647,
"epoch": 1.456,
"grad_norm": 16.375,
"learning_rate": 1.8180000000000002e-05,
"loss": 10.2031,
"mean_token_accuracy": 0.8223831083625555,
"num_tokens": 25886396.0,
"step": 910
},
{
"entropy": 0.641916100680828,
"epoch": 1.472,
"grad_norm": 15.4375,
"learning_rate": 1.8380000000000004e-05,
"loss": 10.3424,
"mean_token_accuracy": 0.8202576618641615,
"num_tokens": 26163618.0,
"step": 920
},
{
"entropy": 0.662255228124559,
"epoch": 1.488,
"grad_norm": 17.5,
"learning_rate": 1.858e-05,
"loss": 10.5833,
"mean_token_accuracy": 0.8174156688153744,
"num_tokens": 26438338.0,
"step": 930
},
{
"entropy": 0.6319910818710923,
"epoch": 1.504,
"grad_norm": 17.5,
"learning_rate": 1.878e-05,
"loss": 10.1189,
"mean_token_accuracy": 0.8249218709766865,
"num_tokens": 26729255.0,
"step": 940
},
{
"entropy": 0.6524576544761658,
"epoch": 1.52,
"grad_norm": 15.6875,
"learning_rate": 1.898e-05,
"loss": 10.3845,
"mean_token_accuracy": 0.8204564996063709,
"num_tokens": 27017935.0,
"step": 950
},
{
"entropy": 0.6368671843782068,
"epoch": 1.536,
"grad_norm": 15.3125,
"learning_rate": 1.918e-05,
"loss": 10.2034,
"mean_token_accuracy": 0.8228708405047656,
"num_tokens": 27306339.0,
"step": 960
},
{
"entropy": 0.6371303182095289,
"epoch": 1.552,
"grad_norm": 16.5,
"learning_rate": 1.938e-05,
"loss": 10.1556,
"mean_token_accuracy": 0.8237581226974726,
"num_tokens": 27591959.0,
"step": 970
},
{
"entropy": 0.6189220814034343,
"epoch": 1.568,
"grad_norm": 15.4375,
"learning_rate": 1.9580000000000002e-05,
"loss": 9.9336,
"mean_token_accuracy": 0.8277939360588789,
"num_tokens": 27884398.0,
"step": 980
},
{
"entropy": 0.6375723648816347,
"epoch": 1.584,
"grad_norm": 15.3125,
"learning_rate": 1.978e-05,
"loss": 10.116,
"mean_token_accuracy": 0.824637695401907,
"num_tokens": 28171274.0,
"step": 990
},
{
"entropy": 0.631741807423532,
"epoch": 1.6,
"grad_norm": 16.125,
"learning_rate": 1.9980000000000002e-05,
"loss": 10.1091,
"mean_token_accuracy": 0.8227341767400503,
"num_tokens": 28457624.0,
"step": 1000
},
{
"epoch": 1.6,
"eval_biology_entropy": 1.133832766532898,
"eval_biology_loss": 1.2268259525299072,
"eval_biology_mean_token_accuracy": 0.6920726819038391,
"eval_biology_num_tokens": 28457624.0,
"eval_biology_runtime": 19.738,
"eval_biology_samples_per_second": 25.332,
"eval_biology_steps_per_second": 6.333,
"step": 1000
},
{
"epoch": 1.6,
"eval_math_entropy": 0.6353513326644897,
"eval_math_loss": 0.6570390462875366,
"eval_math_mean_token_accuracy": 0.8185423817634583,
"eval_math_num_tokens": 28457624.0,
"eval_math_runtime": 24.6162,
"eval_math_samples_per_second": 20.312,
"eval_math_steps_per_second": 5.078,
"step": 1000
},
{
"entropy": 0.6374903971329331,
"epoch": 1.616,
"grad_norm": 16.125,
"learning_rate": 1.9980000000000002e-05,
"loss": 10.264,
"mean_token_accuracy": 0.8219246376305819,
"num_tokens": 28743099.0,
"step": 1010
},
{
"entropy": 0.6523007312789559,
"epoch": 1.6320000000000001,
"grad_norm": 15.375,
"learning_rate": 1.995777777777778e-05,
"loss": 10.326,
"mean_token_accuracy": 0.8201606553047895,
"num_tokens": 29017297.0,
"step": 1020
},
{
"entropy": 0.629386986978352,
"epoch": 1.6480000000000001,
"grad_norm": 14.6875,
"learning_rate": 1.9935555555555557e-05,
"loss": 10.0254,
"mean_token_accuracy": 0.8264750462025404,
"num_tokens": 29303707.0,
"step": 1030
},
{
"entropy": 0.6322049422189593,
"epoch": 1.6640000000000001,
"grad_norm": 16.625,
"learning_rate": 1.9913333333333335e-05,
"loss": 10.1151,
"mean_token_accuracy": 0.8231775060296058,
"num_tokens": 29597156.0,
"step": 1040
},
{
"entropy": 0.6406657313928008,
"epoch": 1.6800000000000002,
"grad_norm": 14.0,
"learning_rate": 1.9891111111111112e-05,
"loss": 10.2285,
"mean_token_accuracy": 0.8236899144947529,
"num_tokens": 29883879.0,
"step": 1050
},
{
"entropy": 0.6394492890685797,
"epoch": 1.696,
"grad_norm": 16.5,
"learning_rate": 1.986888888888889e-05,
"loss": 10.2443,
"mean_token_accuracy": 0.8218765918165445,
"num_tokens": 30165760.0,
"step": 1060
},
{
"entropy": 0.6265557751059532,
"epoch": 1.712,
"grad_norm": 13.6875,
"learning_rate": 1.9846666666666668e-05,
"loss": 10.0545,
"mean_token_accuracy": 0.82537433616817,
"num_tokens": 30460367.0,
"step": 1070
},
{
"entropy": 0.6222736675292253,
"epoch": 1.728,
"grad_norm": 16.375,
"learning_rate": 1.9824444444444445e-05,
"loss": 9.8823,
"mean_token_accuracy": 0.8277810603380203,
"num_tokens": 30739137.0,
"step": 1080
},
{
"entropy": 0.6292094394564629,
"epoch": 1.744,
"grad_norm": 15.375,
"learning_rate": 1.9802222222222226e-05,
"loss": 10.0169,
"mean_token_accuracy": 0.8262683913111687,
"num_tokens": 31022663.0,
"step": 1090
},
{
"entropy": 0.6284451805055141,
"epoch": 1.76,
"grad_norm": 15.9375,
"learning_rate": 1.978e-05,
"loss": 10.033,
"mean_token_accuracy": 0.8236148204654455,
"num_tokens": 31306494.0,
"step": 1100
},
{
"epoch": 1.76,
"eval_biology_entropy": 1.1596141772270203,
"eval_biology_loss": 1.2293517589569092,
"eval_biology_mean_token_accuracy": 0.6917372670173645,
"eval_biology_num_tokens": 31306494.0,
"eval_biology_runtime": 19.7887,
"eval_biology_samples_per_second": 25.267,
"eval_biology_steps_per_second": 6.317,
"step": 1100
},
{
"epoch": 1.76,
"eval_math_entropy": 0.6379002649784088,
"eval_math_loss": 0.6466652154922485,
"eval_math_mean_token_accuracy": 0.8209902768135071,
"eval_math_num_tokens": 31306494.0,
"eval_math_runtime": 24.6327,
"eval_math_samples_per_second": 20.298,
"eval_math_steps_per_second": 5.075,
"step": 1100
},
{
"entropy": 0.625352057442069,
"epoch": 1.776,
"grad_norm": 14.25,
"learning_rate": 1.975777777777778e-05,
"loss": 10.0185,
"mean_token_accuracy": 0.8257606349885463,
"num_tokens": 31595542.0,
"step": 1110
},
{
"entropy": 0.6339781129732728,
"epoch": 1.792,
"grad_norm": 15.0625,
"learning_rate": 1.9735555555555556e-05,
"loss": 10.206,
"mean_token_accuracy": 0.823058757558465,
"num_tokens": 31881189.0,
"step": 1120
},
{
"entropy": 0.6271994180977345,
"epoch": 1.808,
"grad_norm": 14.0625,
"learning_rate": 1.9713333333333337e-05,
"loss": 10.0454,
"mean_token_accuracy": 0.824696258828044,
"num_tokens": 32164196.0,
"step": 1130
},
{
"entropy": 0.6018361985683441,
"epoch": 1.8239999999999998,
"grad_norm": 15.25,
"learning_rate": 1.969111111111111e-05,
"loss": 9.6036,
"mean_token_accuracy": 0.8306465744972229,
"num_tokens": 32441530.0,
"step": 1140
},
{
"entropy": 0.6176456701010465,
"epoch": 1.8399999999999999,
"grad_norm": 14.75,
"learning_rate": 1.9668888888888892e-05,
"loss": 9.9561,
"mean_token_accuracy": 0.82696249820292,
"num_tokens": 32723145.0,
"step": 1150
},
{
"entropy": 0.6277465337887407,
"epoch": 1.8559999999999999,
"grad_norm": 15.1875,
"learning_rate": 1.9646666666666666e-05,
"loss": 10.0615,
"mean_token_accuracy": 0.8258139468729496,
"num_tokens": 33011263.0,
"step": 1160
},
{
"entropy": 0.6127156307920814,
"epoch": 1.8719999999999999,
"grad_norm": 15.75,
"learning_rate": 1.9624444444444447e-05,
"loss": 9.8454,
"mean_token_accuracy": 0.8283385183662176,
"num_tokens": 33298921.0,
"step": 1170
},
{
"entropy": 0.626422967761755,
"epoch": 1.888,
"grad_norm": 16.25,
"learning_rate": 1.9602222222222225e-05,
"loss": 10.0059,
"mean_token_accuracy": 0.8256520442664623,
"num_tokens": 33576243.0,
"step": 1180
},
{
"entropy": 0.6264065893366932,
"epoch": 1.904,
"grad_norm": 15.4375,
"learning_rate": 1.9580000000000002e-05,
"loss": 9.9977,
"mean_token_accuracy": 0.8253488805145025,
"num_tokens": 33850968.0,
"step": 1190
},
{
"entropy": 0.6095137868076563,
"epoch": 1.92,
"grad_norm": 14.9375,
"learning_rate": 1.955777777777778e-05,
"loss": 9.7316,
"mean_token_accuracy": 0.8285220514982938,
"num_tokens": 34128558.0,
"step": 1200
},
{
"epoch": 1.92,
"eval_biology_entropy": 1.182666036605835,
"eval_biology_loss": 1.2319380044937134,
"eval_biology_mean_token_accuracy": 0.6909053907394409,
"eval_biology_num_tokens": 34128558.0,
"eval_biology_runtime": 19.7849,
"eval_biology_samples_per_second": 25.272,
"eval_biology_steps_per_second": 6.318,
"step": 1200
},
{
"epoch": 1.92,
"eval_math_entropy": 0.649978009223938,
"eval_math_loss": 0.63917076587677,
"eval_math_mean_token_accuracy": 0.8233149046897889,
"eval_math_num_tokens": 34128558.0,
"eval_math_runtime": 24.6175,
"eval_math_samples_per_second": 20.311,
"eval_math_steps_per_second": 5.078,
"step": 1200
},
{
"entropy": 0.6194202324375511,
"epoch": 1.936,
"grad_norm": 16.25,
"learning_rate": 1.9535555555555557e-05,
"loss": 9.9383,
"mean_token_accuracy": 0.8252742733806372,
"num_tokens": 34408056.0,
"step": 1210
},
{
"entropy": 0.6192464983090759,
"epoch": 1.952,
"grad_norm": 14.625,
"learning_rate": 1.9513333333333335e-05,
"loss": 9.7913,
"mean_token_accuracy": 0.8282815985381603,
"num_tokens": 34684679.0,
"step": 1220
},
{
"entropy": 0.6408920273184776,
"epoch": 1.968,
"grad_norm": 16.875,
"learning_rate": 1.9491111111111113e-05,
"loss": 10.1921,
"mean_token_accuracy": 0.8224945243448019,
"num_tokens": 34971038.0,
"step": 1230
},
{
"entropy": 0.6087088288739324,
"epoch": 1.984,
"grad_norm": 14.25,
"learning_rate": 1.946888888888889e-05,
"loss": 9.7506,
"mean_token_accuracy": 0.8292552776634693,
"num_tokens": 35262281.0,
"step": 1240
},
{
"entropy": 0.6189011264592409,
"epoch": 2.0,
"grad_norm": 14.5625,
"learning_rate": 1.9446666666666668e-05,
"loss": 9.8391,
"mean_token_accuracy": 0.8280998166650534,
"num_tokens": 35560864.0,
"step": 1250
},
{
"entropy": 0.5718940345570445,
"epoch": 2.016,
"grad_norm": 17.875,
"learning_rate": 1.9424444444444446e-05,
"loss": 8.9755,
"mean_token_accuracy": 0.8381088264286518,
"num_tokens": 35846704.0,
"step": 1260
},
{
"entropy": 0.5528001293540001,
"epoch": 2.032,
"grad_norm": 17.375,
"learning_rate": 1.9402222222222223e-05,
"loss": 8.7959,
"mean_token_accuracy": 0.8400239538401365,
"num_tokens": 36128775.0,
"step": 1270
},
{
"entropy": 0.5431115614250303,
"epoch": 2.048,
"grad_norm": 16.875,
"learning_rate": 1.938e-05,
"loss": 8.6342,
"mean_token_accuracy": 0.8440989479422569,
"num_tokens": 36419504.0,
"step": 1280
},
{
"entropy": 0.5422856478020549,
"epoch": 2.064,
"grad_norm": 16.125,
"learning_rate": 1.935777777777778e-05,
"loss": 8.6381,
"mean_token_accuracy": 0.8434138212352991,
"num_tokens": 36706816.0,
"step": 1290
},
{
"entropy": 0.5466266760602594,
"epoch": 2.08,
"grad_norm": 17.25,
"learning_rate": 1.9335555555555556e-05,
"loss": 8.6792,
"mean_token_accuracy": 0.8412496495991946,
"num_tokens": 36988475.0,
"step": 1300
},
{
"epoch": 2.08,
"eval_biology_entropy": 0.9598336253166199,
"eval_biology_loss": 1.2842097282409668,
"eval_biology_mean_token_accuracy": 0.6866690034866333,
"eval_biology_num_tokens": 36988475.0,
"eval_biology_runtime": 19.8089,
"eval_biology_samples_per_second": 25.241,
"eval_biology_steps_per_second": 6.31,
"step": 1300
},
{
"epoch": 2.08,
"eval_math_entropy": 0.5634605071544647,
"eval_math_loss": 0.6450303792953491,
"eval_math_mean_token_accuracy": 0.823544692993164,
"eval_math_num_tokens": 36988475.0,
"eval_math_runtime": 24.6182,
"eval_math_samples_per_second": 20.31,
"eval_math_steps_per_second": 5.078,
"step": 1300
},
{
"entropy": 0.5461967477574945,
"epoch": 2.096,
"grad_norm": 16.25,
"learning_rate": 1.9313333333333334e-05,
"loss": 8.7832,
"mean_token_accuracy": 0.8421301823109388,
"num_tokens": 37270131.0,
"step": 1310
},
{
"entropy": 0.5379752703011036,
"epoch": 2.112,
"grad_norm": 17.125,
"learning_rate": 1.9291111111111115e-05,
"loss": 8.6125,
"mean_token_accuracy": 0.8422962158918381,
"num_tokens": 37563537.0,
"step": 1320
},
{
"entropy": 0.5374840356409549,
"epoch": 2.128,
"grad_norm": 16.75,
"learning_rate": 1.926888888888889e-05,
"loss": 8.4982,
"mean_token_accuracy": 0.8446350190788507,
"num_tokens": 37843959.0,
"step": 1330
},
{
"entropy": 0.5455164171755313,
"epoch": 2.144,
"grad_norm": 16.375,
"learning_rate": 1.924666666666667e-05,
"loss": 8.6663,
"mean_token_accuracy": 0.842664523050189,
"num_tokens": 38133092.0,
"step": 1340
},
{
"entropy": 0.5403652492910623,
"epoch": 2.16,
"grad_norm": 15.0625,
"learning_rate": 1.9224444444444444e-05,
"loss": 8.6681,
"mean_token_accuracy": 0.8432158157229424,
"num_tokens": 38421229.0,
"step": 1350
},
{
"entropy": 0.5242220051586628,
"epoch": 2.176,
"grad_norm": 16.875,
"learning_rate": 1.9202222222222225e-05,
"loss": 8.3559,
"mean_token_accuracy": 0.8481345418840647,
"num_tokens": 38708043.0,
"step": 1360
},
{
"entropy": 0.552289474569261,
"epoch": 2.192,
"grad_norm": 16.25,
"learning_rate": 1.918e-05,
"loss": 8.8236,
"mean_token_accuracy": 0.8398358784615993,
"num_tokens": 38996930.0,
"step": 1370
},
{
"entropy": 0.5456716753542423,
"epoch": 2.208,
"grad_norm": 18.625,
"learning_rate": 1.915777777777778e-05,
"loss": 8.7515,
"mean_token_accuracy": 0.8416260961443186,
"num_tokens": 39279481.0,
"step": 1380
},
{
"entropy": 0.5409996012225747,
"epoch": 2.224,
"grad_norm": 17.5,
"learning_rate": 1.9135555555555555e-05,
"loss": 8.6161,
"mean_token_accuracy": 0.843621500954032,
"num_tokens": 39569030.0,
"step": 1390
},
{
"entropy": 0.5462250377982855,
"epoch": 2.24,
"grad_norm": 16.5,
"learning_rate": 1.9113333333333336e-05,
"loss": 8.7545,
"mean_token_accuracy": 0.8413930989801883,
"num_tokens": 39854873.0,
"step": 1400
},
{
"epoch": 2.24,
"eval_biology_entropy": 0.9318458199501037,
"eval_biology_loss": 1.3069241046905518,
"eval_biology_mean_token_accuracy": 0.6855153131484986,
"eval_biology_num_tokens": 39854873.0,
"eval_biology_runtime": 19.7633,
"eval_biology_samples_per_second": 25.299,
"eval_biology_steps_per_second": 6.325,
"step": 1400
},
{
"epoch": 2.24,
"eval_math_entropy": 0.5731087529659271,
"eval_math_loss": 0.6433758735656738,
"eval_math_mean_token_accuracy": 0.8230452270507812,
"eval_math_num_tokens": 39854873.0,
"eval_math_runtime": 24.8539,
"eval_math_samples_per_second": 20.118,
"eval_math_steps_per_second": 5.029,
"step": 1400
},
{
"entropy": 0.543942479789257,
"epoch": 2.2560000000000002,
"grad_norm": 16.875,
"learning_rate": 1.9091111111111113e-05,
"loss": 8.6955,
"mean_token_accuracy": 0.8418730091303587,
"num_tokens": 40141190.0,
"step": 1410
},
{
"entropy": 0.5582456098869443,
"epoch": 2.2720000000000002,
"grad_norm": 19.625,
"learning_rate": 1.906888888888889e-05,
"loss": 8.8471,
"mean_token_accuracy": 0.8396125495433807,
"num_tokens": 40415203.0,
"step": 1420
},
{
"entropy": 0.5530563285574317,
"epoch": 2.288,
"grad_norm": 17.875,
"learning_rate": 1.904666666666667e-05,
"loss": 8.823,
"mean_token_accuracy": 0.8394552428275347,
"num_tokens": 40702393.0,
"step": 1430
},
{
"entropy": 0.5304178670048714,
"epoch": 2.304,
"grad_norm": 18.0,
"learning_rate": 1.9024444444444446e-05,
"loss": 8.4621,
"mean_token_accuracy": 0.8458537045866251,
"num_tokens": 40982775.0,
"step": 1440
},
{
"entropy": 0.5507002430036664,
"epoch": 2.32,
"grad_norm": 17.25,
"learning_rate": 1.9002222222222224e-05,
"loss": 8.778,
"mean_token_accuracy": 0.8414905358105897,
"num_tokens": 41263356.0,
"step": 1450
},
{
"entropy": 0.5461155388504266,
"epoch": 2.336,
"grad_norm": 16.375,
"learning_rate": 1.898e-05,
"loss": 8.7082,
"mean_token_accuracy": 0.8419744338840246,
"num_tokens": 41545235.0,
"step": 1460
},
{
"entropy": 0.5644198174588382,
"epoch": 2.352,
"grad_norm": 17.0,
"learning_rate": 1.895777777777778e-05,
"loss": 9.0111,
"mean_token_accuracy": 0.837278475239873,
"num_tokens": 41833417.0,
"step": 1470
},
{
"entropy": 0.5465062925592065,
"epoch": 2.368,
"grad_norm": 16.625,
"learning_rate": 1.8935555555555556e-05,
"loss": 8.7822,
"mean_token_accuracy": 0.8409049317240715,
"num_tokens": 42117030.0,
"step": 1480
},
{
"entropy": 0.5634627625346184,
"epoch": 2.384,
"grad_norm": 17.0,
"learning_rate": 1.8913333333333334e-05,
"loss": 8.9984,
"mean_token_accuracy": 0.8382249467074872,
"num_tokens": 42410990.0,
"step": 1490
},
{
"entropy": 0.5541804760694504,
"epoch": 2.4,
"grad_norm": 19.125,
"learning_rate": 1.8891111111111115e-05,
"loss": 8.8381,
"mean_token_accuracy": 0.8405785549432039,
"num_tokens": 42691890.0,
"step": 1500
},
{
"epoch": 2.4,
"eval_biology_entropy": 0.9389902620315552,
"eval_biology_loss": 1.3136844635009766,
"eval_biology_mean_token_accuracy": 0.6836875596046448,
"eval_biology_num_tokens": 42691890.0,
"eval_biology_runtime": 19.8044,
"eval_biology_samples_per_second": 25.247,
"eval_biology_steps_per_second": 6.312,
"step": 1500
},
{
"epoch": 2.4,
"eval_math_entropy": 0.5745205206871032,
"eval_math_loss": 0.6360605955123901,
"eval_math_mean_token_accuracy": 0.824403573513031,
"eval_math_num_tokens": 42691890.0,
"eval_math_runtime": 24.8829,
"eval_math_samples_per_second": 20.094,
"eval_math_steps_per_second": 5.024,
"step": 1500
},
{
"entropy": 0.5681238017976284,
"epoch": 2.416,
"grad_norm": 17.375,
"learning_rate": 1.886888888888889e-05,
"loss": 9.0358,
"mean_token_accuracy": 0.8371844127774238,
"num_tokens": 42971588.0,
"step": 1510
},
{
"entropy": 0.5386728642508387,
"epoch": 2.432,
"grad_norm": 17.875,
"learning_rate": 1.884666666666667e-05,
"loss": 8.6841,
"mean_token_accuracy": 0.8422587804496289,
"num_tokens": 43253821.0,
"step": 1520
},
{
"entropy": 0.5443267293274403,
"epoch": 2.448,
"grad_norm": 18.125,
"learning_rate": 1.8824444444444445e-05,
"loss": 8.67,
"mean_token_accuracy": 0.8435158774256706,
"num_tokens": 43550902.0,
"step": 1530
},
{
"entropy": 0.5498035730794072,
"epoch": 2.464,
"grad_norm": 18.0,
"learning_rate": 1.8802222222222226e-05,
"loss": 8.7914,
"mean_token_accuracy": 0.8404768038541078,
"num_tokens": 43844259.0,
"step": 1540
},
{
"entropy": 0.5709992805495858,
"epoch": 2.48,
"grad_norm": 17.5,
"learning_rate": 1.878e-05,
"loss": 9.1217,
"mean_token_accuracy": 0.8360334102064371,
"num_tokens": 44115701.0,
"step": 1550
},
{
"entropy": 0.5559496510773897,
"epoch": 2.496,
"grad_norm": 17.125,
"learning_rate": 1.875777777777778e-05,
"loss": 8.8877,
"mean_token_accuracy": 0.8398744653910398,
"num_tokens": 44405520.0,
"step": 1560
},
{
"entropy": 0.5509569091722369,
"epoch": 2.512,
"grad_norm": 16.875,
"learning_rate": 1.873555555555556e-05,
"loss": 8.7783,
"mean_token_accuracy": 0.8410256687551737,
"num_tokens": 44686477.0,
"step": 1570
},
{
"entropy": 0.5495854092761874,
"epoch": 2.528,
"grad_norm": 16.0,
"learning_rate": 1.8713333333333336e-05,
"loss": 8.7681,
"mean_token_accuracy": 0.8412394899874925,
"num_tokens": 44969760.0,
"step": 1580
},
{
"entropy": 0.5426954831928015,
"epoch": 2.544,
"grad_norm": 17.375,
"learning_rate": 1.8691111111111114e-05,
"loss": 8.7142,
"mean_token_accuracy": 0.8419138621538877,
"num_tokens": 45255326.0,
"step": 1590
},
{
"entropy": 0.5427656076848507,
"epoch": 2.56,
"grad_norm": 16.75,
"learning_rate": 1.866888888888889e-05,
"loss": 8.6495,
"mean_token_accuracy": 0.8414557803422212,
"num_tokens": 45532525.0,
"step": 1600
},
{
"epoch": 2.56,
"eval_biology_entropy": 0.9223047132492066,
"eval_biology_loss": 1.3209964036941528,
"eval_biology_mean_token_accuracy": 0.6842733683586121,
"eval_biology_num_tokens": 45532525.0,
"eval_biology_runtime": 19.7451,
"eval_biology_samples_per_second": 25.323,
"eval_biology_steps_per_second": 6.331,
"step": 1600
},
{
"epoch": 2.56,
"eval_math_entropy": 0.5608251221179962,
"eval_math_loss": 0.6343366503715515,
"eval_math_mean_token_accuracy": 0.8256231875419616,
"eval_math_num_tokens": 45532525.0,
"eval_math_runtime": 24.5926,
"eval_math_samples_per_second": 20.331,
"eval_math_steps_per_second": 5.083,
"step": 1600
},
{
"entropy": 0.5462278285995126,
"epoch": 2.576,
"grad_norm": 17.375,
"learning_rate": 1.864666666666667e-05,
"loss": 8.7304,
"mean_token_accuracy": 0.8419133082032204,
"num_tokens": 45817478.0,
"step": 1610
},
{
"entropy": 0.5377364344894886,
"epoch": 2.592,
"grad_norm": 16.125,
"learning_rate": 1.8624444444444446e-05,
"loss": 8.6128,
"mean_token_accuracy": 0.8431659761816264,
"num_tokens": 46109575.0,
"step": 1620
},
{
"entropy": 0.5603145483881236,
"epoch": 2.608,
"grad_norm": 17.625,
"learning_rate": 1.8602222222222224e-05,
"loss": 8.926,
"mean_token_accuracy": 0.8384825445711612,
"num_tokens": 46391461.0,
"step": 1630
},
{
"entropy": 0.5244756257161498,
"epoch": 2.624,
"grad_norm": 16.0,
"learning_rate": 1.858e-05,
"loss": 8.3548,
"mean_token_accuracy": 0.8485719878226519,
"num_tokens": 46683117.0,
"step": 1640
},
{
"entropy": 0.5536964586004615,
"epoch": 2.64,
"grad_norm": 17.375,
"learning_rate": 1.855777777777778e-05,
"loss": 8.8373,
"mean_token_accuracy": 0.839028225839138,
"num_tokens": 46965534.0,
"step": 1650
},
{
"entropy": 0.5332709014415741,
"epoch": 2.656,
"grad_norm": 16.75,
"learning_rate": 1.8535555555555557e-05,
"loss": 8.4657,
"mean_token_accuracy": 0.844694945588708,
"num_tokens": 47245852.0,
"step": 1660
},
{
"entropy": 0.5504178514704108,
"epoch": 2.672,
"grad_norm": 18.0,
"learning_rate": 1.8513333333333335e-05,
"loss": 8.8108,
"mean_token_accuracy": 0.8397566247731447,
"num_tokens": 47524916.0,
"step": 1670
},
{
"entropy": 0.5476151436567307,
"epoch": 2.6879999999999997,
"grad_norm": 15.5,
"learning_rate": 1.8491111111111112e-05,
"loss": 8.7263,
"mean_token_accuracy": 0.8412932168692351,
"num_tokens": 47807131.0,
"step": 1680
},
{
"entropy": 0.5529261413961649,
"epoch": 2.7039999999999997,
"grad_norm": 16.75,
"learning_rate": 1.846888888888889e-05,
"loss": 8.8394,
"mean_token_accuracy": 0.838797665014863,
"num_tokens": 48099654.0,
"step": 1690
},
{
"entropy": 0.5456200305372476,
"epoch": 2.7199999999999998,
"grad_norm": 17.625,
"learning_rate": 1.8446666666666667e-05,
"loss": 8.6682,
"mean_token_accuracy": 0.841873237863183,
"num_tokens": 48375019.0,
"step": 1700
},
{
"epoch": 2.7199999999999998,
"eval_biology_entropy": 0.9218110795021057,
"eval_biology_loss": 1.3180720806121826,
"eval_biology_mean_token_accuracy": 0.684489251613617,
"eval_biology_num_tokens": 48375019.0,
"eval_biology_runtime": 19.7671,
"eval_biology_samples_per_second": 25.295,
"eval_biology_steps_per_second": 6.324,
"step": 1700
},
{
"epoch": 2.7199999999999998,
"eval_math_entropy": 0.5653176684379577,
"eval_math_loss": 0.6293387413024902,
"eval_math_mean_token_accuracy": 0.8265204019546509,
"eval_math_num_tokens": 48375019.0,
"eval_math_runtime": 24.6242,
"eval_math_samples_per_second": 20.305,
"eval_math_steps_per_second": 5.076,
"step": 1700
},
{
"entropy": 0.5463435992598533,
"epoch": 2.7359999999999998,
"grad_norm": 17.875,
"learning_rate": 1.842444444444445e-05,
"loss": 8.7467,
"mean_token_accuracy": 0.8409269347786903,
"num_tokens": 48659284.0,
"step": 1710
},
{
"entropy": 0.5540915697813034,
"epoch": 2.752,
"grad_norm": 16.625,
"learning_rate": 1.8402222222222223e-05,
"loss": 8.8179,
"mean_token_accuracy": 0.8404988449066877,
"num_tokens": 48943804.0,
"step": 1720
},
{
"entropy": 0.5476498136296868,
"epoch": 2.768,
"grad_norm": 15.75,
"learning_rate": 1.8380000000000004e-05,
"loss": 8.8006,
"mean_token_accuracy": 0.841009271889925,
"num_tokens": 49230939.0,
"step": 1730
},
{
"entropy": 0.5640784077346325,
"epoch": 2.784,
"grad_norm": 17.0,
"learning_rate": 1.8357777777777778e-05,
"loss": 9.005,
"mean_token_accuracy": 0.8373467523604632,
"num_tokens": 49504425.0,
"step": 1740
},
{
"entropy": 0.5430868171155453,
"epoch": 2.8,
"grad_norm": 17.25,
"learning_rate": 1.833555555555556e-05,
"loss": 8.6415,
"mean_token_accuracy": 0.8431323904544115,
"num_tokens": 49782661.0,
"step": 1750
},
{
"entropy": 0.5524541085585952,
"epoch": 2.816,
"grad_norm": 17.0,
"learning_rate": 1.8313333333333333e-05,
"loss": 8.7568,
"mean_token_accuracy": 0.8402639802545309,
"num_tokens": 50073632.0,
"step": 1760
},
{
"entropy": 0.5354580119252205,
"epoch": 2.832,
"grad_norm": 17.875,
"learning_rate": 1.8291111111111114e-05,
"loss": 8.632,
"mean_token_accuracy": 0.8425567515194416,
"num_tokens": 50356964.0,
"step": 1770
},
{
"entropy": 0.5553580898791551,
"epoch": 2.848,
"grad_norm": 15.625,
"learning_rate": 1.8268888888888888e-05,
"loss": 8.8708,
"mean_token_accuracy": 0.8403212446719408,
"num_tokens": 50644535.0,
"step": 1780
},
{
"entropy": 0.5478905290365219,
"epoch": 2.864,
"grad_norm": 18.375,
"learning_rate": 1.824666666666667e-05,
"loss": 8.7886,
"mean_token_accuracy": 0.841050173342228,
"num_tokens": 50925653.0,
"step": 1790
},
{
"entropy": 0.5450881006196141,
"epoch": 2.88,
"grad_norm": 16.625,
"learning_rate": 1.8224444444444447e-05,
"loss": 8.6642,
"mean_token_accuracy": 0.84115383438766,
"num_tokens": 51204374.0,
"step": 1800
},
{
"epoch": 2.88,
"eval_biology_entropy": 0.9217254042625427,
"eval_biology_loss": 1.3202892541885376,
"eval_biology_mean_token_accuracy": 0.6841764874458313,
"eval_biology_num_tokens": 51204374.0,
"eval_biology_runtime": 19.813,
"eval_biology_samples_per_second": 25.236,
"eval_biology_steps_per_second": 6.309,
"step": 1800
},
{
"epoch": 2.88,
"eval_math_entropy": 0.5681756961345673,
"eval_math_loss": 0.6243875026702881,
"eval_math_mean_token_accuracy": 0.8276074986457824,
"eval_math_num_tokens": 51204374.0,
"eval_math_runtime": 24.6389,
"eval_math_samples_per_second": 20.293,
"eval_math_steps_per_second": 5.073,
"step": 1800
},
{
"entropy": 0.5379522321745753,
"epoch": 2.896,
"grad_norm": 17.75,
"learning_rate": 1.8202222222222225e-05,
"loss": 8.6271,
"mean_token_accuracy": 0.8437154974788428,
"num_tokens": 51483944.0,
"step": 1810
},
{
"entropy": 0.5471471425145864,
"epoch": 2.912,
"grad_norm": 17.0,
"learning_rate": 1.8180000000000002e-05,
"loss": 8.6769,
"mean_token_accuracy": 0.8418111637234688,
"num_tokens": 51765755.0,
"step": 1820
},
{
"entropy": 0.5508731028065086,
"epoch": 2.928,
"grad_norm": 17.0,
"learning_rate": 1.815777777777778e-05,
"loss": 8.7862,
"mean_token_accuracy": 0.8419726848602295,
"num_tokens": 52056379.0,
"step": 1830
},
{
"entropy": 0.5354436157271266,
"epoch": 2.944,
"grad_norm": 16.375,
"learning_rate": 1.8135555555555557e-05,
"loss": 8.5928,
"mean_token_accuracy": 0.8430151861160994,
"num_tokens": 52346232.0,
"step": 1840
},
{
"entropy": 0.5504492402076722,
"epoch": 2.96,
"grad_norm": 17.5,
"learning_rate": 1.8113333333333335e-05,
"loss": 8.7501,
"mean_token_accuracy": 0.8406570095568895,
"num_tokens": 52633789.0,
"step": 1850
},
{
"entropy": 0.5475983273237943,
"epoch": 2.976,
"grad_norm": 15.25,
"learning_rate": 1.8091111111111113e-05,
"loss": 8.714,
"mean_token_accuracy": 0.8408999726176262,
"num_tokens": 52911755.0,
"step": 1860
},
{
"entropy": 0.5406377092003822,
"epoch": 2.992,
"grad_norm": 16.0,
"learning_rate": 1.806888888888889e-05,
"loss": 8.6328,
"mean_token_accuracy": 0.8425012800842524,
"num_tokens": 53198176.0,
"step": 1870
},
{
"entropy": 0.5113964939489961,
"epoch": 3.008,
"grad_norm": 22.875,
"learning_rate": 1.8046666666666668e-05,
"loss": 7.822,
"mean_token_accuracy": 0.8548128705471754,
"num_tokens": 53481893.0,
"step": 1880
},
{
"entropy": 0.4152779897674918,
"epoch": 3.024,
"grad_norm": 22.0,
"learning_rate": 1.8024444444444445e-05,
"loss": 6.7546,
"mean_token_accuracy": 0.8709000959992409,
"num_tokens": 53771717.0,
"step": 1890
},
{
"entropy": 0.40918179890140893,
"epoch": 3.04,
"grad_norm": 22.75,
"learning_rate": 1.8002222222222223e-05,
"loss": 6.5585,
"mean_token_accuracy": 0.8754206687211991,
"num_tokens": 54058045.0,
"step": 1900
},
{
"epoch": 3.04,
"eval_biology_entropy": 0.6926028978824615,
"eval_biology_loss": 1.5856647491455078,
"eval_biology_mean_token_accuracy": 0.6686906161308288,
"eval_biology_num_tokens": 54058045.0,
"eval_biology_runtime": 19.7886,
"eval_biology_samples_per_second": 25.267,
"eval_biology_steps_per_second": 6.317,
"step": 1900
},
{
"epoch": 3.04,
"eval_math_entropy": 0.46198054814338685,
"eval_math_loss": 0.6858065724372864,
"eval_math_mean_token_accuracy": 0.822696931362152,
"eval_math_num_tokens": 54058045.0,
"eval_math_runtime": 24.6254,
"eval_math_samples_per_second": 20.304,
"eval_math_steps_per_second": 5.076,
"step": 1900
},
{
"entropy": 0.40489907208830117,
"epoch": 3.056,
"grad_norm": 25.25,
"learning_rate": 1.798e-05,
"loss": 6.3683,
"mean_token_accuracy": 0.8761902552098035,
"num_tokens": 54334332.0,
"step": 1910
},
{
"entropy": 0.40934212449938057,
"epoch": 3.072,
"grad_norm": 23.5,
"learning_rate": 1.7957777777777778e-05,
"loss": 6.4642,
"mean_token_accuracy": 0.8751021821051836,
"num_tokens": 54624543.0,
"step": 1920
},
{
"entropy": 0.39203624669462445,
"epoch": 3.088,
"grad_norm": 25.375,
"learning_rate": 1.7935555555555556e-05,
"loss": 6.2116,
"mean_token_accuracy": 0.8796712458133698,
"num_tokens": 54907550.0,
"step": 1930
},
{
"entropy": 0.40762526309117675,
"epoch": 3.104,
"grad_norm": 23.375,
"learning_rate": 1.7913333333333337e-05,
"loss": 6.4546,
"mean_token_accuracy": 0.8756711948662996,
"num_tokens": 55190959.0,
"step": 1940
},
{
"entropy": 0.39818487148731946,
"epoch": 3.12,
"grad_norm": 24.0,
"learning_rate": 1.789111111111111e-05,
"loss": 6.3256,
"mean_token_accuracy": 0.8780492424964905,
"num_tokens": 55481635.0,
"step": 1950
},
{
"entropy": 0.4032851942814887,
"epoch": 3.136,
"grad_norm": 24.5,
"learning_rate": 1.7868888888888892e-05,
"loss": 6.4233,
"mean_token_accuracy": 0.8760235741734504,
"num_tokens": 55769010.0,
"step": 1960
},
{
"entropy": 0.40782611249014733,
"epoch": 3.152,
"grad_norm": 23.25,
"learning_rate": 1.7846666666666666e-05,
"loss": 6.4685,
"mean_token_accuracy": 0.8753455895930529,
"num_tokens": 56053160.0,
"step": 1970
},
{
"entropy": 0.41834324020892383,
"epoch": 3.168,
"grad_norm": 24.75,
"learning_rate": 1.7824444444444447e-05,
"loss": 6.6597,
"mean_token_accuracy": 0.8723560575395822,
"num_tokens": 56337066.0,
"step": 1980
},
{
"entropy": 0.4165022653527558,
"epoch": 3.184,
"grad_norm": 27.375,
"learning_rate": 1.780222222222222e-05,
"loss": 6.5528,
"mean_token_accuracy": 0.8731590420007705,
"num_tokens": 56618899.0,
"step": 1990
},
{
"entropy": 0.40481978207826613,
"epoch": 3.2,
"grad_norm": 22.625,
"learning_rate": 1.7780000000000003e-05,
"loss": 6.4003,
"mean_token_accuracy": 0.8771062396466732,
"num_tokens": 56910071.0,
"step": 2000
},
{
"epoch": 3.2,
"eval_biology_entropy": 0.6016733210086822,
"eval_biology_loss": 1.7224782705307007,
"eval_biology_mean_token_accuracy": 0.6672822990417481,
"eval_biology_num_tokens": 56910071.0,
"eval_biology_runtime": 19.7656,
"eval_biology_samples_per_second": 25.297,
"eval_biology_steps_per_second": 6.324,
"step": 2000
},
{
"epoch": 3.2,
"eval_math_entropy": 0.4420904459953308,
"eval_math_loss": 0.6964770555496216,
"eval_math_mean_token_accuracy": 0.8225251660346985,
"eval_math_num_tokens": 56910071.0,
"eval_math_runtime": 24.578,
"eval_math_samples_per_second": 20.343,
"eval_math_steps_per_second": 5.086,
"step": 2000
}
],
"logging_steps": 10,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.791878293573609e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}