{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 2.085290367901325, "epoch": 0.008, "grad_norm": 0.0146484375, "learning_rate": 1.8e-07, "loss": 2.8737, "mean_token_accuracy": 0.48555898685008286, "num_tokens": 173886.0, "step": 10 }, { "entropy": 2.082674165070057, "epoch": 0.016, "grad_norm": 0.00750732421875, "learning_rate": 3.8e-07, "loss": 2.8766, "mean_token_accuracy": 0.48314503096044065, "num_tokens": 338352.0, "step": 20 }, { "entropy": 2.083346585184336, "epoch": 0.024, "grad_norm": 0.01116943359375, "learning_rate": 5.800000000000001e-07, "loss": 2.8206, "mean_token_accuracy": 0.4898203007876873, "num_tokens": 505613.0, "step": 30 }, { "entropy": 2.084573998302221, "epoch": 0.032, "grad_norm": 0.01361083984375, "learning_rate": 7.8e-07, "loss": 2.846, "mean_token_accuracy": 0.4879810094833374, "num_tokens": 671193.0, "step": 40 }, { "entropy": 2.112175610661507, "epoch": 0.04, "grad_norm": 0.0184326171875, "learning_rate": 9.800000000000001e-07, "loss": 2.8945, "mean_token_accuracy": 0.48235367350280284, "num_tokens": 835868.0, "step": 50 }, { "entropy": 2.004745631664991, "epoch": 0.048, "grad_norm": 0.01055908203125, "learning_rate": 1.1800000000000001e-06, "loss": 2.7943, "mean_token_accuracy": 0.4953182445839047, "num_tokens": 1001008.0, "step": 60 }, { "entropy": 2.0760968685150147, "epoch": 0.056, "grad_norm": 0.013427734375, "learning_rate": 1.3800000000000001e-06, "loss": 2.8689, "mean_token_accuracy": 0.48274643896147607, "num_tokens": 1166615.0, "step": 70 }, { "entropy": 2.1421599693596365, "epoch": 0.064, "grad_norm": 0.01226806640625, "learning_rate": 1.5800000000000001e-06, "loss": 2.927, "mean_token_accuracy": 0.47564963400363924, "num_tokens": 1331208.0, "step": 80 }, { "entropy": 2.0629068821668626, "epoch": 0.072, "grad_norm": 0.00994873046875, "learning_rate": 1.7800000000000001e-06, "loss": 2.8669, "mean_token_accuracy": 0.48655857108533385, "num_tokens": 1499449.0, "step": 90 }, { "entropy": 2.1223726868629456, "epoch": 0.08, "grad_norm": 0.0184326171875, "learning_rate": 1.98e-06, "loss": 2.8834, "mean_token_accuracy": 0.48028963077813386, "num_tokens": 1667532.0, "step": 100 }, { "epoch": 0.08, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 1667532.0, "eval_coding_runtime": 96.0471, "eval_coding_samples_per_second": 5.206, "eval_coding_steps_per_second": 2.603, "step": 100 }, { "epoch": 0.08, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 1667532.0, "eval_physics_runtime": 63.6847, "eval_physics_samples_per_second": 7.851, "eval_physics_steps_per_second": 3.926, "step": 100 }, { "entropy": 2.028460328280926, "epoch": 0.088, "grad_norm": 0.00732421875, "learning_rate": 2.1800000000000003e-06, "loss": 2.7998, "mean_token_accuracy": 0.4919602179899812, "num_tokens": 1835489.0, "step": 110 }, { "entropy": 2.0500536493957044, "epoch": 0.096, "grad_norm": 0.0166015625, "learning_rate": 2.38e-06, "loss": 2.843, "mean_token_accuracy": 0.486666096188128, "num_tokens": 2007176.0, "step": 120 }, { "entropy": 2.133950074762106, "epoch": 0.104, "grad_norm": 0.00897216796875, "learning_rate": 2.5800000000000003e-06, "loss": 2.908, "mean_token_accuracy": 0.47787463404238223, "num_tokens": 2171335.0, "step": 130 }, { "entropy": 2.0415130145847797, "epoch": 0.112, "grad_norm": 0.0125732421875, "learning_rate": 2.7800000000000005e-06, "loss": 2.8213, "mean_token_accuracy": 0.48844432439655067, "num_tokens": 2342259.0, "step": 140 }, { "entropy": 2.19760585129261, "epoch": 0.12, "grad_norm": 0.01519775390625, "learning_rate": 2.9800000000000003e-06, "loss": 2.9587, "mean_token_accuracy": 0.4719714432954788, "num_tokens": 2504279.0, "step": 150 }, { "entropy": 2.133862767368555, "epoch": 0.128, "grad_norm": 0.0101318359375, "learning_rate": 3.1800000000000005e-06, "loss": 2.895, "mean_token_accuracy": 0.48206167966127395, "num_tokens": 2663987.0, "step": 160 }, { "entropy": 2.1023842833936213, "epoch": 0.136, "grad_norm": 0.00836181640625, "learning_rate": 3.3800000000000007e-06, "loss": 2.8679, "mean_token_accuracy": 0.48314376436173917, "num_tokens": 2829720.0, "step": 170 }, { "entropy": 2.096611645817757, "epoch": 0.144, "grad_norm": 0.016845703125, "learning_rate": 3.58e-06, "loss": 2.882, "mean_token_accuracy": 0.4854999572038651, "num_tokens": 2997824.0, "step": 180 }, { "entropy": 2.104377479106188, "epoch": 0.152, "grad_norm": 0.00860595703125, "learning_rate": 3.7800000000000002e-06, "loss": 2.8908, "mean_token_accuracy": 0.48179021421819923, "num_tokens": 3159946.0, "step": 190 }, { "entropy": 2.091322401165962, "epoch": 0.16, "grad_norm": 0.00927734375, "learning_rate": 3.980000000000001e-06, "loss": 2.8428, "mean_token_accuracy": 0.4882024429738522, "num_tokens": 3330597.0, "step": 200 }, { "epoch": 0.16, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 3330597.0, "eval_coding_runtime": 95.6196, "eval_coding_samples_per_second": 5.229, "eval_coding_steps_per_second": 2.615, "step": 200 }, { "epoch": 0.16, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 3330597.0, "eval_physics_runtime": 63.6492, "eval_physics_samples_per_second": 7.856, "eval_physics_steps_per_second": 3.928, "step": 200 }, { "entropy": 2.142216607928276, "epoch": 0.168, "grad_norm": 0.00701904296875, "learning_rate": 4.18e-06, "loss": 2.9186, "mean_token_accuracy": 0.4779110599309206, "num_tokens": 3496104.0, "step": 210 }, { "entropy": 2.1577497258782388, "epoch": 0.176, "grad_norm": 0.010498046875, "learning_rate": 4.38e-06, "loss": 2.9356, "mean_token_accuracy": 0.4760912239551544, "num_tokens": 3658264.0, "step": 220 }, { "entropy": 2.1311790965497495, "epoch": 0.184, "grad_norm": 0.006317138671875, "learning_rate": 4.58e-06, "loss": 2.8804, "mean_token_accuracy": 0.48113634046167136, "num_tokens": 3821352.0, "step": 230 }, { "entropy": 2.110344947129488, "epoch": 0.192, "grad_norm": 0.018798828125, "learning_rate": 4.78e-06, "loss": 2.8458, "mean_token_accuracy": 0.4866057766601443, "num_tokens": 3995568.0, "step": 240 }, { "entropy": 2.1274008348584177, "epoch": 0.2, "grad_norm": 0.00970458984375, "learning_rate": 4.980000000000001e-06, "loss": 2.9325, "mean_token_accuracy": 0.47856651023030283, "num_tokens": 4160918.0, "step": 250 }, { "entropy": 2.0445496000349523, "epoch": 0.208, "grad_norm": 0.0072021484375, "learning_rate": 5.18e-06, "loss": 2.7895, "mean_token_accuracy": 0.49246302992105484, "num_tokens": 4321436.0, "step": 260 }, { "entropy": 2.118650445342064, "epoch": 0.216, "grad_norm": 0.0118408203125, "learning_rate": 5.380000000000001e-06, "loss": 2.831, "mean_token_accuracy": 0.48565028738230465, "num_tokens": 4482225.0, "step": 270 }, { "entropy": 2.0710766136646273, "epoch": 0.224, "grad_norm": 0.00823974609375, "learning_rate": 5.580000000000001e-06, "loss": 2.8524, "mean_token_accuracy": 0.4889204766601324, "num_tokens": 4648491.0, "step": 280 }, { "entropy": 2.0747790560126305, "epoch": 0.232, "grad_norm": 0.009033203125, "learning_rate": 5.78e-06, "loss": 2.8736, "mean_token_accuracy": 0.4836031161248684, "num_tokens": 4813449.0, "step": 290 }, { "entropy": 2.1116694904863835, "epoch": 0.24, "grad_norm": 0.01141357421875, "learning_rate": 5.98e-06, "loss": 2.8974, "mean_token_accuracy": 0.47815693244338037, "num_tokens": 4986175.0, "step": 300 }, { "epoch": 0.24, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 4986175.0, "eval_coding_runtime": 95.7299, "eval_coding_samples_per_second": 5.223, "eval_coding_steps_per_second": 2.612, "step": 300 }, { "epoch": 0.24, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 4986175.0, "eval_physics_runtime": 63.6784, "eval_physics_samples_per_second": 7.852, "eval_physics_steps_per_second": 3.926, "step": 300 }, { "entropy": 2.085676248371601, "epoch": 0.248, "grad_norm": 0.01513671875, "learning_rate": 6.18e-06, "loss": 2.8547, "mean_token_accuracy": 0.4871716873720288, "num_tokens": 5164741.0, "step": 310 }, { "entropy": 2.111249604821205, "epoch": 0.256, "grad_norm": 0.01324462890625, "learning_rate": 6.380000000000001e-06, "loss": 2.869, "mean_token_accuracy": 0.482149850204587, "num_tokens": 5329320.0, "step": 320 }, { "entropy": 2.0735500395298003, "epoch": 0.264, "grad_norm": 0.01300048828125, "learning_rate": 6.5800000000000005e-06, "loss": 2.815, "mean_token_accuracy": 0.4895795300602913, "num_tokens": 5497259.0, "step": 330 }, { "entropy": 2.1957527093589304, "epoch": 0.272, "grad_norm": 0.0155029296875, "learning_rate": 6.780000000000001e-06, "loss": 2.9296, "mean_token_accuracy": 0.4737250735983253, "num_tokens": 5658796.0, "step": 340 }, { "entropy": 2.0978702254593373, "epoch": 0.28, "grad_norm": 0.012939453125, "learning_rate": 6.98e-06, "loss": 2.9223, "mean_token_accuracy": 0.47742239478975534, "num_tokens": 5819027.0, "step": 350 }, { "entropy": 2.126638425886631, "epoch": 0.288, "grad_norm": 0.01104736328125, "learning_rate": 7.180000000000001e-06, "loss": 2.8822, "mean_token_accuracy": 0.48392471522092817, "num_tokens": 5980201.0, "step": 360 }, { "entropy": 2.1021372735500337, "epoch": 0.296, "grad_norm": 0.0208740234375, "learning_rate": 7.3800000000000005e-06, "loss": 2.8638, "mean_token_accuracy": 0.4845348121598363, "num_tokens": 6139830.0, "step": 370 }, { "entropy": 2.1106897443532944, "epoch": 0.304, "grad_norm": 0.00787353515625, "learning_rate": 7.58e-06, "loss": 2.8357, "mean_token_accuracy": 0.4834788400679827, "num_tokens": 6298858.0, "step": 380 }, { "entropy": 2.143523923307657, "epoch": 0.312, "grad_norm": 0.00958251953125, "learning_rate": 7.78e-06, "loss": 2.9236, "mean_token_accuracy": 0.47671638112515213, "num_tokens": 6456502.0, "step": 390 }, { "entropy": 2.1132052429020405, "epoch": 0.32, "grad_norm": 0.01336669921875, "learning_rate": 7.980000000000002e-06, "loss": 2.8795, "mean_token_accuracy": 0.48184755612164737, "num_tokens": 6622798.0, "step": 400 }, { "epoch": 0.32, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 6622798.0, "eval_coding_runtime": 95.588, "eval_coding_samples_per_second": 5.231, "eval_coding_steps_per_second": 2.615, "step": 400 }, { "epoch": 0.32, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 6622798.0, "eval_physics_runtime": 63.5621, "eval_physics_samples_per_second": 7.866, "eval_physics_steps_per_second": 3.933, "step": 400 }, { "entropy": 2.1359100699424745, "epoch": 0.328, "grad_norm": 0.01220703125, "learning_rate": 8.18e-06, "loss": 2.892, "mean_token_accuracy": 0.4762568075209856, "num_tokens": 6782985.0, "step": 410 }, { "entropy": 2.080799935385585, "epoch": 0.336, "grad_norm": 0.021484375, "learning_rate": 8.380000000000001e-06, "loss": 2.8705, "mean_token_accuracy": 0.4826640056446195, "num_tokens": 6953611.0, "step": 420 }, { "entropy": 2.0621030911803246, "epoch": 0.344, "grad_norm": 0.013427734375, "learning_rate": 8.580000000000001e-06, "loss": 2.777, "mean_token_accuracy": 0.48948597013950346, "num_tokens": 7121949.0, "step": 430 }, { "entropy": 2.0954283088445664, "epoch": 0.352, "grad_norm": 0.01080322265625, "learning_rate": 8.78e-06, "loss": 2.8529, "mean_token_accuracy": 0.48598715253174307, "num_tokens": 7290378.0, "step": 440 }, { "entropy": 2.1145325861871243, "epoch": 0.36, "grad_norm": 0.01611328125, "learning_rate": 8.98e-06, "loss": 2.864, "mean_token_accuracy": 0.48475142791867254, "num_tokens": 7452920.0, "step": 450 }, { "entropy": 2.0749227844178675, "epoch": 0.368, "grad_norm": 0.0255126953125, "learning_rate": 9.180000000000002e-06, "loss": 2.842, "mean_token_accuracy": 0.4888931142166257, "num_tokens": 7621082.0, "step": 460 }, { "entropy": 2.158674482256174, "epoch": 0.376, "grad_norm": 0.0189208984375, "learning_rate": 9.38e-06, "loss": 2.8965, "mean_token_accuracy": 0.47893171701580284, "num_tokens": 7787036.0, "step": 470 }, { "entropy": 2.172913119196892, "epoch": 0.384, "grad_norm": 0.0211181640625, "learning_rate": 9.58e-06, "loss": 2.9484, "mean_token_accuracy": 0.47502864580601456, "num_tokens": 7955570.0, "step": 480 }, { "entropy": 2.133708968013525, "epoch": 0.392, "grad_norm": 0.0089111328125, "learning_rate": 9.780000000000001e-06, "loss": 2.9115, "mean_token_accuracy": 0.481097811833024, "num_tokens": 8123527.0, "step": 490 }, { "entropy": 2.075808613747358, "epoch": 0.4, "grad_norm": 0.01458740234375, "learning_rate": 9.980000000000001e-06, "loss": 2.8509, "mean_token_accuracy": 0.4880217058584094, "num_tokens": 8291049.0, "step": 500 }, { "epoch": 0.4, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 8291049.0, "eval_coding_runtime": 95.8745, "eval_coding_samples_per_second": 5.215, "eval_coding_steps_per_second": 2.608, "step": 500 }, { "epoch": 0.4, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 8291049.0, "eval_physics_runtime": 63.2475, "eval_physics_samples_per_second": 7.905, "eval_physics_steps_per_second": 3.953, "step": 500 }, { "entropy": 2.0808891989290714, "epoch": 0.408, "grad_norm": 0.00933837890625, "learning_rate": 1.018e-05, "loss": 2.8602, "mean_token_accuracy": 0.48418706730008126, "num_tokens": 8459795.0, "step": 510 }, { "entropy": 2.108520568162203, "epoch": 0.416, "grad_norm": 0.01300048828125, "learning_rate": 1.038e-05, "loss": 2.8709, "mean_token_accuracy": 0.4807485779747367, "num_tokens": 8627310.0, "step": 520 }, { "entropy": 2.095228637009859, "epoch": 0.424, "grad_norm": 0.0186767578125, "learning_rate": 1.0580000000000002e-05, "loss": 2.8575, "mean_token_accuracy": 0.4873119568452239, "num_tokens": 8792574.0, "step": 530 }, { "entropy": 2.0733875431120397, "epoch": 0.432, "grad_norm": 0.0152587890625, "learning_rate": 1.0780000000000002e-05, "loss": 2.8746, "mean_token_accuracy": 0.48152947407215835, "num_tokens": 8958371.0, "step": 540 }, { "entropy": 2.0743850864470006, "epoch": 0.44, "grad_norm": 0.0142822265625, "learning_rate": 1.0980000000000002e-05, "loss": 2.8417, "mean_token_accuracy": 0.48614103216677906, "num_tokens": 9125564.0, "step": 550 }, { "entropy": 2.0296142108738424, "epoch": 0.448, "grad_norm": 0.00909423828125, "learning_rate": 1.1180000000000001e-05, "loss": 2.8092, "mean_token_accuracy": 0.49211322125047446, "num_tokens": 9294388.0, "step": 560 }, { "entropy": 2.042448101937771, "epoch": 0.456, "grad_norm": 0.00897216796875, "learning_rate": 1.138e-05, "loss": 2.8196, "mean_token_accuracy": 0.4887328814715147, "num_tokens": 9455428.0, "step": 570 }, { "entropy": 2.124661027640104, "epoch": 0.464, "grad_norm": 0.0172119140625, "learning_rate": 1.1580000000000001e-05, "loss": 2.8825, "mean_token_accuracy": 0.4844032688066363, "num_tokens": 9620366.0, "step": 580 }, { "entropy": 2.0920108027756212, "epoch": 0.472, "grad_norm": 0.01458740234375, "learning_rate": 1.178e-05, "loss": 2.8011, "mean_token_accuracy": 0.49246231019496917, "num_tokens": 9788925.0, "step": 590 }, { "entropy": 2.0895667865872385, "epoch": 0.48, "grad_norm": 0.007568359375, "learning_rate": 1.198e-05, "loss": 2.8784, "mean_token_accuracy": 0.4846471020951867, "num_tokens": 9955431.0, "step": 600 }, { "epoch": 0.48, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 9955431.0, "eval_coding_runtime": 95.6269, "eval_coding_samples_per_second": 5.229, "eval_coding_steps_per_second": 2.614, "step": 600 }, { "epoch": 0.48, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 9955431.0, "eval_physics_runtime": 63.6584, "eval_physics_samples_per_second": 7.854, "eval_physics_steps_per_second": 3.927, "step": 600 }, { "entropy": 2.1037806943058968, "epoch": 0.488, "grad_norm": 0.01434326171875, "learning_rate": 1.218e-05, "loss": 2.8897, "mean_token_accuracy": 0.48129043709486724, "num_tokens": 10119599.0, "step": 610 }, { "entropy": 2.1045577257871626, "epoch": 0.496, "grad_norm": 0.02294921875, "learning_rate": 1.2380000000000002e-05, "loss": 2.8986, "mean_token_accuracy": 0.4801080621778965, "num_tokens": 10289883.0, "step": 620 }, { "entropy": 2.1075518101453783, "epoch": 0.504, "grad_norm": 0.007354736328125, "learning_rate": 1.2580000000000002e-05, "loss": 2.8519, "mean_token_accuracy": 0.4833444032818079, "num_tokens": 10454683.0, "step": 630 }, { "entropy": 2.106730377674103, "epoch": 0.512, "grad_norm": 0.00823974609375, "learning_rate": 1.2780000000000001e-05, "loss": 2.9125, "mean_token_accuracy": 0.4791097413748503, "num_tokens": 10619468.0, "step": 640 }, { "entropy": 2.149070845544338, "epoch": 0.52, "grad_norm": 0.017822265625, "learning_rate": 1.2980000000000001e-05, "loss": 2.9383, "mean_token_accuracy": 0.4766066731885076, "num_tokens": 10781285.0, "step": 650 }, { "entropy": 2.109221810847521, "epoch": 0.528, "grad_norm": 0.01123046875, "learning_rate": 1.3180000000000001e-05, "loss": 2.8512, "mean_token_accuracy": 0.48691278155893086, "num_tokens": 10944904.0, "step": 660 }, { "entropy": 2.10501976236701, "epoch": 0.536, "grad_norm": 0.0079345703125, "learning_rate": 1.3380000000000002e-05, "loss": 2.8583, "mean_token_accuracy": 0.48396560084074736, "num_tokens": 11106538.0, "step": 670 }, { "entropy": 2.1422630779445173, "epoch": 0.544, "grad_norm": 0.013916015625, "learning_rate": 1.3580000000000002e-05, "loss": 2.9198, "mean_token_accuracy": 0.4756466532126069, "num_tokens": 11266409.0, "step": 680 }, { "entropy": 2.066490865498781, "epoch": 0.552, "grad_norm": 0.0120849609375, "learning_rate": 1.378e-05, "loss": 2.8749, "mean_token_accuracy": 0.48253467567265035, "num_tokens": 11438391.0, "step": 690 }, { "entropy": 2.1218110136687756, "epoch": 0.56, "grad_norm": 0.00811767578125, "learning_rate": 1.398e-05, "loss": 2.8685, "mean_token_accuracy": 0.4830782825127244, "num_tokens": 11605544.0, "step": 700 }, { "epoch": 0.56, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 11605544.0, "eval_coding_runtime": 95.6063, "eval_coding_samples_per_second": 5.23, "eval_coding_steps_per_second": 2.615, "step": 700 }, { "epoch": 0.56, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 11605544.0, "eval_physics_runtime": 63.652, "eval_physics_samples_per_second": 7.855, "eval_physics_steps_per_second": 3.928, "step": 700 }, { "entropy": 2.0374118871986866, "epoch": 0.568, "grad_norm": 0.0164794921875, "learning_rate": 1.418e-05, "loss": 2.8443, "mean_token_accuracy": 0.48784226551651955, "num_tokens": 11765430.0, "step": 710 }, { "entropy": 2.079664882272482, "epoch": 0.576, "grad_norm": 0.01336669921875, "learning_rate": 1.4380000000000001e-05, "loss": 2.8436, "mean_token_accuracy": 0.4859828842803836, "num_tokens": 11931715.0, "step": 720 }, { "entropy": 2.091735219210386, "epoch": 0.584, "grad_norm": 0.00836181640625, "learning_rate": 1.4580000000000001e-05, "loss": 2.891, "mean_token_accuracy": 0.4845219163224101, "num_tokens": 12093686.0, "step": 730 }, { "entropy": 2.092084125429392, "epoch": 0.592, "grad_norm": 0.01153564453125, "learning_rate": 1.478e-05, "loss": 2.8884, "mean_token_accuracy": 0.4797184634953737, "num_tokens": 12257243.0, "step": 740 }, { "entropy": 2.138279449939728, "epoch": 0.6, "grad_norm": 0.01043701171875, "learning_rate": 1.498e-05, "loss": 2.8937, "mean_token_accuracy": 0.4808414502069354, "num_tokens": 12422008.0, "step": 750 }, { "entropy": 2.025119251012802, "epoch": 0.608, "grad_norm": 0.01422119140625, "learning_rate": 1.5180000000000002e-05, "loss": 2.801, "mean_token_accuracy": 0.4929833130910993, "num_tokens": 12583154.0, "step": 760 }, { "entropy": 2.068293011188507, "epoch": 0.616, "grad_norm": 0.018798828125, "learning_rate": 1.5380000000000002e-05, "loss": 2.8332, "mean_token_accuracy": 0.4890221064910293, "num_tokens": 12746458.0, "step": 770 }, { "entropy": 2.1028603583574297, "epoch": 0.624, "grad_norm": 0.009033203125, "learning_rate": 1.5580000000000003e-05, "loss": 2.9238, "mean_token_accuracy": 0.4755176778882742, "num_tokens": 12905392.0, "step": 780 }, { "entropy": 2.103465436398983, "epoch": 0.632, "grad_norm": 0.012939453125, "learning_rate": 1.578e-05, "loss": 2.8914, "mean_token_accuracy": 0.48264262825250626, "num_tokens": 13071058.0, "step": 790 }, { "entropy": 2.107102530449629, "epoch": 0.64, "grad_norm": 0.01373291015625, "learning_rate": 1.5980000000000003e-05, "loss": 2.8937, "mean_token_accuracy": 0.4812551226466894, "num_tokens": 13232198.0, "step": 800 }, { "epoch": 0.64, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 13232198.0, "eval_coding_runtime": 95.5879, "eval_coding_samples_per_second": 5.231, "eval_coding_steps_per_second": 2.615, "step": 800 }, { "epoch": 0.64, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 13232198.0, "eval_physics_runtime": 63.5241, "eval_physics_samples_per_second": 7.871, "eval_physics_steps_per_second": 3.936, "step": 800 }, { "entropy": 2.1226571217179298, "epoch": 0.648, "grad_norm": 0.010009765625, "learning_rate": 1.618e-05, "loss": 2.8715, "mean_token_accuracy": 0.48374479077756405, "num_tokens": 13401064.0, "step": 810 }, { "entropy": 2.071519161760807, "epoch": 0.656, "grad_norm": 0.007476806640625, "learning_rate": 1.638e-05, "loss": 2.8209, "mean_token_accuracy": 0.48990428987890483, "num_tokens": 13575902.0, "step": 820 }, { "entropy": 2.1139444075524807, "epoch": 0.664, "grad_norm": 0.008544921875, "learning_rate": 1.658e-05, "loss": 2.9185, "mean_token_accuracy": 0.48024304024875164, "num_tokens": 13733192.0, "step": 830 }, { "entropy": 2.1097038336098195, "epoch": 0.672, "grad_norm": 0.0201416015625, "learning_rate": 1.6780000000000002e-05, "loss": 2.856, "mean_token_accuracy": 0.48414181135594847, "num_tokens": 13895997.0, "step": 840 }, { "entropy": 2.0350618422031403, "epoch": 0.68, "grad_norm": 0.007354736328125, "learning_rate": 1.698e-05, "loss": 2.8006, "mean_token_accuracy": 0.49249137472361326, "num_tokens": 14074579.0, "step": 850 }, { "entropy": 2.1360582523047924, "epoch": 0.688, "grad_norm": 0.012939453125, "learning_rate": 1.718e-05, "loss": 2.9135, "mean_token_accuracy": 0.48240375574678185, "num_tokens": 14234888.0, "step": 860 }, { "entropy": 2.0854096628725527, "epoch": 0.696, "grad_norm": 0.0159912109375, "learning_rate": 1.7380000000000003e-05, "loss": 2.8889, "mean_token_accuracy": 0.48132998365908863, "num_tokens": 14411150.0, "step": 870 }, { "entropy": 2.1598948813974856, "epoch": 0.704, "grad_norm": 0.0067138671875, "learning_rate": 1.758e-05, "loss": 2.941, "mean_token_accuracy": 0.47370197977870704, "num_tokens": 14567908.0, "step": 880 }, { "entropy": 2.1014511175453663, "epoch": 0.712, "grad_norm": 0.01458740234375, "learning_rate": 1.7780000000000003e-05, "loss": 2.8989, "mean_token_accuracy": 0.4840679431334138, "num_tokens": 14728975.0, "step": 890 }, { "entropy": 2.128746274113655, "epoch": 0.72, "grad_norm": 0.01361083984375, "learning_rate": 1.798e-05, "loss": 2.8664, "mean_token_accuracy": 0.4867586812004447, "num_tokens": 14882927.0, "step": 900 }, { "epoch": 0.72, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 14882927.0, "eval_coding_runtime": 95.8303, "eval_coding_samples_per_second": 5.218, "eval_coding_steps_per_second": 2.609, "step": 900 }, { "epoch": 0.72, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 14882927.0, "eval_physics_runtime": 63.6666, "eval_physics_samples_per_second": 7.853, "eval_physics_steps_per_second": 3.927, "step": 900 }, { "entropy": 2.0901576526463033, "epoch": 0.728, "grad_norm": 0.0067138671875, "learning_rate": 1.8180000000000002e-05, "loss": 2.8708, "mean_token_accuracy": 0.48403808698058126, "num_tokens": 15052770.0, "step": 910 }, { "entropy": 2.122075318545103, "epoch": 0.736, "grad_norm": 0.01129150390625, "learning_rate": 1.8380000000000004e-05, "loss": 2.8633, "mean_token_accuracy": 0.4852804895490408, "num_tokens": 15217342.0, "step": 920 }, { "entropy": 2.093779005110264, "epoch": 0.744, "grad_norm": 0.0079345703125, "learning_rate": 1.858e-05, "loss": 2.8451, "mean_token_accuracy": 0.48721388857811687, "num_tokens": 15385161.0, "step": 930 }, { "entropy": 2.131568420678377, "epoch": 0.752, "grad_norm": 0.0096435546875, "learning_rate": 1.878e-05, "loss": 2.9118, "mean_token_accuracy": 0.479784238897264, "num_tokens": 15547710.0, "step": 940 }, { "entropy": 2.18331568017602, "epoch": 0.76, "grad_norm": 0.007171630859375, "learning_rate": 1.898e-05, "loss": 2.9565, "mean_token_accuracy": 0.4732973873615265, "num_tokens": 15704494.0, "step": 950 }, { "entropy": 2.1320055924355983, "epoch": 0.768, "grad_norm": 0.01177978515625, "learning_rate": 1.918e-05, "loss": 2.8867, "mean_token_accuracy": 0.4821720516309142, "num_tokens": 15877177.0, "step": 960 }, { "entropy": 2.035961114615202, "epoch": 0.776, "grad_norm": 0.008544921875, "learning_rate": 1.938e-05, "loss": 2.8155, "mean_token_accuracy": 0.4874297235161066, "num_tokens": 16050530.0, "step": 970 }, { "entropy": 2.0721310496330263, "epoch": 0.784, "grad_norm": 0.01214599609375, "learning_rate": 1.9580000000000002e-05, "loss": 2.8311, "mean_token_accuracy": 0.4866502169519663, "num_tokens": 16219640.0, "step": 980 }, { "entropy": 2.1055771827697756, "epoch": 0.792, "grad_norm": 0.00927734375, "learning_rate": 1.978e-05, "loss": 2.8748, "mean_token_accuracy": 0.4852980706840754, "num_tokens": 16378820.0, "step": 990 }, { "entropy": 2.0726777322590353, "epoch": 0.8, "grad_norm": 0.01348876953125, "learning_rate": 1.9980000000000002e-05, "loss": 2.8588, "mean_token_accuracy": 0.4857082888484001, "num_tokens": 16548261.0, "step": 1000 }, { "epoch": 0.8, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 16548261.0, "eval_coding_runtime": 95.9331, "eval_coding_samples_per_second": 5.212, "eval_coding_steps_per_second": 2.606, "step": 1000 }, { "epoch": 0.8, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 16548261.0, "eval_physics_runtime": 63.1527, "eval_physics_samples_per_second": 7.917, "eval_physics_steps_per_second": 3.959, "step": 1000 }, { "entropy": 2.1674983091652393, "epoch": 0.808, "grad_norm": 0.0155029296875, "learning_rate": 1.9980000000000002e-05, "loss": 2.9179, "mean_token_accuracy": 0.4778887365013361, "num_tokens": 16711667.0, "step": 1010 }, { "entropy": 2.082773020863533, "epoch": 0.816, "grad_norm": 0.00738525390625, "learning_rate": 1.995777777777778e-05, "loss": 2.8647, "mean_token_accuracy": 0.48503445759415625, "num_tokens": 16870533.0, "step": 1020 }, { "entropy": 2.1195092521607877, "epoch": 0.824, "grad_norm": 0.0084228515625, "learning_rate": 1.9935555555555557e-05, "loss": 2.8734, "mean_token_accuracy": 0.4803088143467903, "num_tokens": 17035600.0, "step": 1030 }, { "entropy": 2.1006917230784894, "epoch": 0.832, "grad_norm": 0.01318359375, "learning_rate": 1.9913333333333335e-05, "loss": 2.8807, "mean_token_accuracy": 0.48227673936635257, "num_tokens": 17196371.0, "step": 1040 }, { "entropy": 2.1442659996449946, "epoch": 0.84, "grad_norm": 0.01171875, "learning_rate": 1.9891111111111112e-05, "loss": 2.891, "mean_token_accuracy": 0.48074845802038907, "num_tokens": 17356919.0, "step": 1050 }, { "entropy": 2.0704655081033705, "epoch": 0.848, "grad_norm": 0.0177001953125, "learning_rate": 1.986888888888889e-05, "loss": 2.8486, "mean_token_accuracy": 0.4880160320550203, "num_tokens": 17527066.0, "step": 1060 }, { "entropy": 2.125109201669693, "epoch": 0.856, "grad_norm": 0.00823974609375, "learning_rate": 1.9846666666666668e-05, "loss": 2.8795, "mean_token_accuracy": 0.4847541594877839, "num_tokens": 17689902.0, "step": 1070 }, { "entropy": 2.1320445723831654, "epoch": 0.864, "grad_norm": 0.0140380859375, "learning_rate": 1.9824444444444445e-05, "loss": 2.8809, "mean_token_accuracy": 0.480629575625062, "num_tokens": 17859605.0, "step": 1080 }, { "entropy": 2.1207774586975576, "epoch": 0.872, "grad_norm": 0.0140380859375, "learning_rate": 1.9802222222222226e-05, "loss": 2.8729, "mean_token_accuracy": 0.4811369523406029, "num_tokens": 18019924.0, "step": 1090 }, { "entropy": 2.1364660277962684, "epoch": 0.88, "grad_norm": 0.01361083984375, "learning_rate": 1.978e-05, "loss": 2.8681, "mean_token_accuracy": 0.4821885580196977, "num_tokens": 18187315.0, "step": 1100 }, { "epoch": 0.88, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 18187315.0, "eval_coding_runtime": 95.6457, "eval_coding_samples_per_second": 5.228, "eval_coding_steps_per_second": 2.614, "step": 1100 }, { "epoch": 0.88, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 18187315.0, "eval_physics_runtime": 63.6627, "eval_physics_samples_per_second": 7.854, "eval_physics_steps_per_second": 3.927, "step": 1100 }, { "entropy": 1.9653398767113686, "epoch": 0.888, "grad_norm": 0.009033203125, "learning_rate": 1.975777777777778e-05, "loss": 2.7728, "mean_token_accuracy": 0.4965917535126209, "num_tokens": 18366329.0, "step": 1110 }, { "entropy": 2.0933822274208067, "epoch": 0.896, "grad_norm": 0.0191650390625, "learning_rate": 1.9735555555555556e-05, "loss": 2.8728, "mean_token_accuracy": 0.4872217344120145, "num_tokens": 18536790.0, "step": 1120 }, { "entropy": 2.1074513986706735, "epoch": 0.904, "grad_norm": 0.007049560546875, "learning_rate": 1.9713333333333337e-05, "loss": 2.9176, "mean_token_accuracy": 0.48107887227088214, "num_tokens": 18701809.0, "step": 1130 }, { "entropy": 2.160377311706543, "epoch": 0.912, "grad_norm": 0.006683349609375, "learning_rate": 1.969111111111111e-05, "loss": 2.9384, "mean_token_accuracy": 0.4739804005250335, "num_tokens": 18869344.0, "step": 1140 }, { "entropy": 2.0753089264035225, "epoch": 0.92, "grad_norm": 0.01416015625, "learning_rate": 1.9668888888888892e-05, "loss": 2.882, "mean_token_accuracy": 0.48446453278884294, "num_tokens": 19033910.0, "step": 1150 }, { "entropy": 2.1365489259362223, "epoch": 0.928, "grad_norm": 0.006988525390625, "learning_rate": 1.9646666666666666e-05, "loss": 2.8854, "mean_token_accuracy": 0.4823284359648824, "num_tokens": 19199834.0, "step": 1160 }, { "entropy": 2.0710911326110364, "epoch": 0.936, "grad_norm": 0.007080078125, "learning_rate": 1.9624444444444447e-05, "loss": 2.8127, "mean_token_accuracy": 0.48797463029623034, "num_tokens": 19361703.0, "step": 1170 }, { "entropy": 2.116609390079975, "epoch": 0.944, "grad_norm": 0.007110595703125, "learning_rate": 1.9602222222222225e-05, "loss": 2.8751, "mean_token_accuracy": 0.48552187960594895, "num_tokens": 19525385.0, "step": 1180 }, { "entropy": 2.0949166089296343, "epoch": 0.952, "grad_norm": 0.03076171875, "learning_rate": 1.9580000000000002e-05, "loss": 2.8499, "mean_token_accuracy": 0.48639670219272374, "num_tokens": 19690715.0, "step": 1190 }, { "entropy": 2.1095651358366014, "epoch": 0.96, "grad_norm": 0.015380859375, "learning_rate": 1.955777777777778e-05, "loss": 2.8706, "mean_token_accuracy": 0.48429451026022435, "num_tokens": 19856973.0, "step": 1200 }, { "epoch": 0.96, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 19856973.0, "eval_coding_runtime": 95.6684, "eval_coding_samples_per_second": 5.226, "eval_coding_steps_per_second": 2.613, "step": 1200 }, { "epoch": 0.96, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 19856973.0, "eval_physics_runtime": 63.6635, "eval_physics_samples_per_second": 7.854, "eval_physics_steps_per_second": 3.927, "step": 1200 }, { "entropy": 2.0797565251588823, "epoch": 0.968, "grad_norm": 0.0091552734375, "learning_rate": 1.9535555555555557e-05, "loss": 2.8871, "mean_token_accuracy": 0.487212173268199, "num_tokens": 20021093.0, "step": 1210 }, { "entropy": 2.0395397424697874, "epoch": 0.976, "grad_norm": 0.01397705078125, "learning_rate": 1.9513333333333335e-05, "loss": 2.8149, "mean_token_accuracy": 0.4898043507710099, "num_tokens": 20190600.0, "step": 1220 }, { "entropy": 2.170710327476263, "epoch": 0.984, "grad_norm": 0.0123291015625, "learning_rate": 1.9491111111111113e-05, "loss": 2.9186, "mean_token_accuracy": 0.4756599869579077, "num_tokens": 20352255.0, "step": 1230 }, { "entropy": 2.074953277409077, "epoch": 0.992, "grad_norm": 0.01092529296875, "learning_rate": 1.946888888888889e-05, "loss": 2.8591, "mean_token_accuracy": 0.48683627638965843, "num_tokens": 20518014.0, "step": 1240 }, { "entropy": 2.1222646929323674, "epoch": 1.0, "grad_norm": 0.00909423828125, "learning_rate": 1.9446666666666668e-05, "loss": 2.8802, "mean_token_accuracy": 0.4825377007946372, "num_tokens": 20686804.0, "step": 1250 }, { "entropy": 2.071950024366379, "epoch": 1.008, "grad_norm": 0.01141357421875, "learning_rate": 1.9424444444444446e-05, "loss": 2.8335, "mean_token_accuracy": 0.4894521238282323, "num_tokens": 20859215.0, "step": 1260 }, { "entropy": 2.178350891917944, "epoch": 1.016, "grad_norm": 0.0086669921875, "learning_rate": 1.9402222222222223e-05, "loss": 2.9515, "mean_token_accuracy": 0.475351152010262, "num_tokens": 21019542.0, "step": 1270 }, { "entropy": 2.0342648893594744, "epoch": 1.024, "grad_norm": 0.01483154296875, "learning_rate": 1.938e-05, "loss": 2.8036, "mean_token_accuracy": 0.4928664604201913, "num_tokens": 21186658.0, "step": 1280 }, { "entropy": 2.0908240154385567, "epoch": 1.032, "grad_norm": 0.00897216796875, "learning_rate": 1.935777777777778e-05, "loss": 2.8868, "mean_token_accuracy": 0.483198188431561, "num_tokens": 21351715.0, "step": 1290 }, { "entropy": 2.1210865639150143, "epoch": 1.04, "grad_norm": 0.008544921875, "learning_rate": 1.9335555555555556e-05, "loss": 2.9071, "mean_token_accuracy": 0.47981371227651837, "num_tokens": 21517237.0, "step": 1300 }, { "epoch": 1.04, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 21517237.0, "eval_coding_runtime": 96.1911, "eval_coding_samples_per_second": 5.198, "eval_coding_steps_per_second": 2.599, "step": 1300 }, { "epoch": 1.04, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 21517237.0, "eval_physics_runtime": 63.9611, "eval_physics_samples_per_second": 7.817, "eval_physics_steps_per_second": 3.909, "step": 1300 }, { "entropy": 2.102020622044802, "epoch": 1.048, "grad_norm": 0.007415771484375, "learning_rate": 1.9313333333333334e-05, "loss": 2.8719, "mean_token_accuracy": 0.4847179941833019, "num_tokens": 21682780.0, "step": 1310 }, { "entropy": 2.045586694777012, "epoch": 1.056, "grad_norm": 0.0086669921875, "learning_rate": 1.9291111111111115e-05, "loss": 2.8189, "mean_token_accuracy": 0.48618846032768487, "num_tokens": 21852528.0, "step": 1320 }, { "entropy": 2.0868709199130535, "epoch": 1.064, "grad_norm": 0.01226806640625, "learning_rate": 1.926888888888889e-05, "loss": 2.8313, "mean_token_accuracy": 0.4878941228613257, "num_tokens": 22021514.0, "step": 1330 }, { "entropy": 2.111675312370062, "epoch": 1.072, "grad_norm": 0.00909423828125, "learning_rate": 1.924666666666667e-05, "loss": 2.889, "mean_token_accuracy": 0.4821753781288862, "num_tokens": 22192380.0, "step": 1340 }, { "entropy": 2.080042862892151, "epoch": 1.08, "grad_norm": 0.02294921875, "learning_rate": 1.9224444444444444e-05, "loss": 2.8401, "mean_token_accuracy": 0.4857699448242784, "num_tokens": 22356835.0, "step": 1350 }, { "entropy": 2.138657582551241, "epoch": 1.088, "grad_norm": 0.006927490234375, "learning_rate": 1.9202222222222225e-05, "loss": 2.9223, "mean_token_accuracy": 0.4771748060360551, "num_tokens": 22517971.0, "step": 1360 }, { "entropy": 2.1065418414771555, "epoch": 1.096, "grad_norm": 0.01422119140625, "learning_rate": 1.918e-05, "loss": 2.8768, "mean_token_accuracy": 0.4816119741648436, "num_tokens": 22681680.0, "step": 1370 }, { "entropy": 2.087523814290762, "epoch": 1.104, "grad_norm": 0.020751953125, "learning_rate": 1.915777777777778e-05, "loss": 2.8938, "mean_token_accuracy": 0.4846269652247429, "num_tokens": 22848694.0, "step": 1380 }, { "entropy": 2.1188573092222214, "epoch": 1.112, "grad_norm": 0.0087890625, "learning_rate": 1.9135555555555555e-05, "loss": 2.8744, "mean_token_accuracy": 0.48116844836622474, "num_tokens": 23016014.0, "step": 1390 }, { "entropy": 2.071516238898039, "epoch": 1.12, "grad_norm": 0.01544189453125, "learning_rate": 1.9113333333333336e-05, "loss": 2.8453, "mean_token_accuracy": 0.48592273648828266, "num_tokens": 23183889.0, "step": 1400 }, { "epoch": 1.12, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 23183889.0, "eval_coding_runtime": 96.0999, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.601, "step": 1400 }, { "epoch": 1.12, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 23183889.0, "eval_physics_runtime": 64.1646, "eval_physics_samples_per_second": 7.792, "eval_physics_steps_per_second": 3.896, "step": 1400 }, { "entropy": 2.0450177520513533, "epoch": 1.1280000000000001, "grad_norm": 0.00823974609375, "learning_rate": 1.9091111111111113e-05, "loss": 2.8314, "mean_token_accuracy": 0.4920926999300718, "num_tokens": 23354096.0, "step": 1410 }, { "entropy": 2.0993775948882103, "epoch": 1.1360000000000001, "grad_norm": 0.0146484375, "learning_rate": 1.906888888888889e-05, "loss": 2.8513, "mean_token_accuracy": 0.4838741825893521, "num_tokens": 23523903.0, "step": 1420 }, { "entropy": 2.0676057398319245, "epoch": 1.144, "grad_norm": 0.0135498046875, "learning_rate": 1.904666666666667e-05, "loss": 2.8661, "mean_token_accuracy": 0.4864560257643461, "num_tokens": 23684571.0, "step": 1430 }, { "entropy": 2.0999209016561506, "epoch": 1.152, "grad_norm": 0.00982666015625, "learning_rate": 1.9024444444444446e-05, "loss": 2.8681, "mean_token_accuracy": 0.4822270443663001, "num_tokens": 23848848.0, "step": 1440 }, { "entropy": 2.110722591727972, "epoch": 1.16, "grad_norm": 0.0086669921875, "learning_rate": 1.9002222222222224e-05, "loss": 2.871, "mean_token_accuracy": 0.48459589947015047, "num_tokens": 24011654.0, "step": 1450 }, { "entropy": 2.181078152358532, "epoch": 1.168, "grad_norm": 0.0098876953125, "learning_rate": 1.898e-05, "loss": 2.943, "mean_token_accuracy": 0.47790372706949713, "num_tokens": 24174589.0, "step": 1460 }, { "entropy": 2.134608647972345, "epoch": 1.176, "grad_norm": 0.0244140625, "learning_rate": 1.895777777777778e-05, "loss": 2.8963, "mean_token_accuracy": 0.4795195382088423, "num_tokens": 24336249.0, "step": 1470 }, { "entropy": 2.0590513460338116, "epoch": 1.184, "grad_norm": 0.025146484375, "learning_rate": 1.8935555555555556e-05, "loss": 2.8476, "mean_token_accuracy": 0.4868919890373945, "num_tokens": 24495981.0, "step": 1480 }, { "entropy": 2.094113527983427, "epoch": 1.192, "grad_norm": 0.021728515625, "learning_rate": 1.8913333333333334e-05, "loss": 2.8476, "mean_token_accuracy": 0.488334871083498, "num_tokens": 24658862.0, "step": 1490 }, { "entropy": 2.066910207271576, "epoch": 1.2, "grad_norm": 0.01214599609375, "learning_rate": 1.8891111111111115e-05, "loss": 2.8177, "mean_token_accuracy": 0.4844546576961875, "num_tokens": 24826245.0, "step": 1500 }, { "epoch": 1.2, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 24826245.0, "eval_coding_runtime": 95.975, "eval_coding_samples_per_second": 5.21, "eval_coding_steps_per_second": 2.605, "step": 1500 }, { "epoch": 1.2, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 24826245.0, "eval_physics_runtime": 63.6317, "eval_physics_samples_per_second": 7.858, "eval_physics_steps_per_second": 3.929, "step": 1500 }, { "entropy": 2.1044005908071997, "epoch": 1.208, "grad_norm": 0.01177978515625, "learning_rate": 1.886888888888889e-05, "loss": 2.8829, "mean_token_accuracy": 0.48166574332863094, "num_tokens": 24988821.0, "step": 1510 }, { "entropy": 2.1516220718622208, "epoch": 1.216, "grad_norm": 0.012451171875, "learning_rate": 1.884666666666667e-05, "loss": 2.9433, "mean_token_accuracy": 0.47657596562057736, "num_tokens": 25147775.0, "step": 1520 }, { "entropy": 2.1071897082030775, "epoch": 1.224, "grad_norm": 0.00860595703125, "learning_rate": 1.8824444444444445e-05, "loss": 2.8592, "mean_token_accuracy": 0.479968504793942, "num_tokens": 25312652.0, "step": 1530 }, { "entropy": 2.031100772321224, "epoch": 1.232, "grad_norm": 0.006103515625, "learning_rate": 1.8802222222222226e-05, "loss": 2.8016, "mean_token_accuracy": 0.4919817751273513, "num_tokens": 25480033.0, "step": 1540 }, { "entropy": 2.0882932603359223, "epoch": 1.24, "grad_norm": 0.017822265625, "learning_rate": 1.878e-05, "loss": 2.87, "mean_token_accuracy": 0.4867149593308568, "num_tokens": 25643821.0, "step": 1550 }, { "entropy": 2.1069738253951074, "epoch": 1.248, "grad_norm": 0.01275634765625, "learning_rate": 1.875777777777778e-05, "loss": 2.9072, "mean_token_accuracy": 0.48037715051323177, "num_tokens": 25811978.0, "step": 1560 }, { "entropy": 2.1235784739255905, "epoch": 1.256, "grad_norm": 0.0098876953125, "learning_rate": 1.873555555555556e-05, "loss": 2.8361, "mean_token_accuracy": 0.4876450767740607, "num_tokens": 25980217.0, "step": 1570 }, { "entropy": 2.07255465015769, "epoch": 1.264, "grad_norm": 0.0205078125, "learning_rate": 1.8713333333333336e-05, "loss": 2.8226, "mean_token_accuracy": 0.48956795781850815, "num_tokens": 26146299.0, "step": 1580 }, { "entropy": 2.083909762650728, "epoch": 1.272, "grad_norm": 0.006805419921875, "learning_rate": 1.8691111111111114e-05, "loss": 2.8296, "mean_token_accuracy": 0.48632726091891526, "num_tokens": 26315594.0, "step": 1590 }, { "entropy": 2.1811861269176007, "epoch": 1.28, "grad_norm": 0.017333984375, "learning_rate": 1.866888888888889e-05, "loss": 2.9711, "mean_token_accuracy": 0.47178333820775153, "num_tokens": 26478207.0, "step": 1600 }, { "epoch": 1.28, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 26478207.0, "eval_coding_runtime": 95.7162, "eval_coding_samples_per_second": 5.224, "eval_coding_steps_per_second": 2.612, "step": 1600 }, { "epoch": 1.28, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 26478207.0, "eval_physics_runtime": 63.6631, "eval_physics_samples_per_second": 7.854, "eval_physics_steps_per_second": 3.927, "step": 1600 }, { "entropy": 2.018047058582306, "epoch": 1.288, "grad_norm": 0.015869140625, "learning_rate": 1.864666666666667e-05, "loss": 2.7963, "mean_token_accuracy": 0.49348701871931555, "num_tokens": 26647567.0, "step": 1610 }, { "entropy": 2.1041888780891895, "epoch": 1.296, "grad_norm": 0.0081787109375, "learning_rate": 1.8624444444444446e-05, "loss": 2.8794, "mean_token_accuracy": 0.4833239272236824, "num_tokens": 26806477.0, "step": 1620 }, { "entropy": 2.0873175472021104, "epoch": 1.304, "grad_norm": 0.0169677734375, "learning_rate": 1.8602222222222224e-05, "loss": 2.8534, "mean_token_accuracy": 0.4844379436224699, "num_tokens": 26973124.0, "step": 1630 }, { "entropy": 2.184805364906788, "epoch": 1.312, "grad_norm": 0.0081787109375, "learning_rate": 1.858e-05, "loss": 2.9204, "mean_token_accuracy": 0.4806702647358179, "num_tokens": 27137528.0, "step": 1640 }, { "entropy": 2.1004837110638617, "epoch": 1.32, "grad_norm": 0.0189208984375, "learning_rate": 1.855777777777778e-05, "loss": 2.839, "mean_token_accuracy": 0.4855312593281269, "num_tokens": 27300986.0, "step": 1650 }, { "entropy": 2.1488297916948795, "epoch": 1.328, "grad_norm": 0.00677490234375, "learning_rate": 1.8535555555555557e-05, "loss": 2.876, "mean_token_accuracy": 0.48554224893450737, "num_tokens": 27465859.0, "step": 1660 }, { "entropy": 2.11965301707387, "epoch": 1.336, "grad_norm": 0.0089111328125, "learning_rate": 1.8513333333333335e-05, "loss": 2.909, "mean_token_accuracy": 0.4828299328684807, "num_tokens": 27627148.0, "step": 1670 }, { "entropy": 2.139040616154671, "epoch": 1.3439999999999999, "grad_norm": 0.01214599609375, "learning_rate": 1.8491111111111112e-05, "loss": 2.9133, "mean_token_accuracy": 0.4807636830955744, "num_tokens": 27795568.0, "step": 1680 }, { "entropy": 2.1508292332291603, "epoch": 1.3519999999999999, "grad_norm": 0.00848388671875, "learning_rate": 1.846888888888889e-05, "loss": 2.9009, "mean_token_accuracy": 0.48149400427937505, "num_tokens": 27958162.0, "step": 1690 }, { "entropy": 2.0931077137589456, "epoch": 1.3599999999999999, "grad_norm": 0.0201416015625, "learning_rate": 1.8446666666666667e-05, "loss": 2.8726, "mean_token_accuracy": 0.4807087674736977, "num_tokens": 28129988.0, "step": 1700 }, { "epoch": 1.3599999999999999, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 28129988.0, "eval_coding_runtime": 96.0975, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.602, "step": 1700 }, { "epoch": 1.3599999999999999, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 28129988.0, "eval_physics_runtime": 63.7443, "eval_physics_samples_per_second": 7.844, "eval_physics_steps_per_second": 3.922, "step": 1700 }, { "entropy": 2.0795333731919525, "epoch": 1.3679999999999999, "grad_norm": 0.0096435546875, "learning_rate": 1.842444444444445e-05, "loss": 2.8522, "mean_token_accuracy": 0.4887755334377289, "num_tokens": 28299534.0, "step": 1710 }, { "entropy": 2.141296311467886, "epoch": 1.376, "grad_norm": 0.0157470703125, "learning_rate": 1.8402222222222223e-05, "loss": 2.9271, "mean_token_accuracy": 0.4771733907982707, "num_tokens": 28465903.0, "step": 1720 }, { "entropy": 2.0781943283975126, "epoch": 1.384, "grad_norm": 0.01055908203125, "learning_rate": 1.8380000000000004e-05, "loss": 2.8574, "mean_token_accuracy": 0.4866001008078456, "num_tokens": 28639289.0, "step": 1730 }, { "entropy": 2.0997987784445287, "epoch": 1.392, "grad_norm": 0.02099609375, "learning_rate": 1.8357777777777778e-05, "loss": 2.8585, "mean_token_accuracy": 0.48320644851773975, "num_tokens": 28801554.0, "step": 1740 }, { "entropy": 2.150196324288845, "epoch": 1.4, "grad_norm": 0.007354736328125, "learning_rate": 1.833555555555556e-05, "loss": 2.9002, "mean_token_accuracy": 0.48246240708976984, "num_tokens": 28963885.0, "step": 1750 }, { "entropy": 2.084288237988949, "epoch": 1.408, "grad_norm": 0.01806640625, "learning_rate": 1.8313333333333333e-05, "loss": 2.8911, "mean_token_accuracy": 0.4849772537127137, "num_tokens": 29125114.0, "step": 1760 }, { "entropy": 2.1347289800643923, "epoch": 1.416, "grad_norm": 0.0120849609375, "learning_rate": 1.8291111111111114e-05, "loss": 2.9129, "mean_token_accuracy": 0.477182531170547, "num_tokens": 29285272.0, "step": 1770 }, { "entropy": 2.1285498954355715, "epoch": 1.424, "grad_norm": 0.018310546875, "learning_rate": 1.8268888888888888e-05, "loss": 2.9215, "mean_token_accuracy": 0.4772155126556754, "num_tokens": 29454334.0, "step": 1780 }, { "entropy": 2.0727533496916295, "epoch": 1.432, "grad_norm": 0.01336669921875, "learning_rate": 1.824666666666667e-05, "loss": 2.8803, "mean_token_accuracy": 0.4822785761207342, "num_tokens": 29616865.0, "step": 1790 }, { "entropy": 2.0606922268867494, "epoch": 1.44, "grad_norm": 0.00860595703125, "learning_rate": 1.8224444444444447e-05, "loss": 2.8232, "mean_token_accuracy": 0.48800144270062445, "num_tokens": 29789253.0, "step": 1800 }, { "epoch": 1.44, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 29789253.0, "eval_coding_runtime": 96.3208, "eval_coding_samples_per_second": 5.191, "eval_coding_steps_per_second": 2.595, "step": 1800 }, { "epoch": 1.44, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 29789253.0, "eval_physics_runtime": 63.9922, "eval_physics_samples_per_second": 7.813, "eval_physics_steps_per_second": 3.907, "step": 1800 }, { "entropy": 2.0618888407945635, "epoch": 1.448, "grad_norm": 0.007171630859375, "learning_rate": 1.8202222222222225e-05, "loss": 2.8573, "mean_token_accuracy": 0.4843838458880782, "num_tokens": 29953006.0, "step": 1810 }, { "entropy": 2.022880507260561, "epoch": 1.456, "grad_norm": 0.0194091796875, "learning_rate": 1.8180000000000002e-05, "loss": 2.8031, "mean_token_accuracy": 0.49401772283017636, "num_tokens": 30128576.0, "step": 1820 }, { "entropy": 2.0860828548669814, "epoch": 1.464, "grad_norm": 0.0294189453125, "learning_rate": 1.815777777777778e-05, "loss": 2.8756, "mean_token_accuracy": 0.4849246619269252, "num_tokens": 30288894.0, "step": 1830 }, { "entropy": 2.188418509066105, "epoch": 1.472, "grad_norm": 0.015869140625, "learning_rate": 1.8135555555555557e-05, "loss": 2.9527, "mean_token_accuracy": 0.4721253639087081, "num_tokens": 30452719.0, "step": 1840 }, { "entropy": 2.085013201087713, "epoch": 1.48, "grad_norm": 0.00946044921875, "learning_rate": 1.8113333333333335e-05, "loss": 2.8646, "mean_token_accuracy": 0.486043793335557, "num_tokens": 30620659.0, "step": 1850 }, { "entropy": 2.143638453632593, "epoch": 1.488, "grad_norm": 0.0108642578125, "learning_rate": 1.8091111111111113e-05, "loss": 2.9264, "mean_token_accuracy": 0.47975865937769413, "num_tokens": 30782343.0, "step": 1860 }, { "entropy": 2.110791251808405, "epoch": 1.496, "grad_norm": 0.00689697265625, "learning_rate": 1.806888888888889e-05, "loss": 2.8831, "mean_token_accuracy": 0.4842625314369798, "num_tokens": 30947703.0, "step": 1870 }, { "entropy": 2.1235111109912395, "epoch": 1.504, "grad_norm": 0.0181884765625, "learning_rate": 1.8046666666666668e-05, "loss": 2.8413, "mean_token_accuracy": 0.4860221529379487, "num_tokens": 31113238.0, "step": 1880 }, { "entropy": 2.0822810381650925, "epoch": 1.512, "grad_norm": 0.0125732421875, "learning_rate": 1.8024444444444445e-05, "loss": 2.864, "mean_token_accuracy": 0.48232704903930423, "num_tokens": 31283883.0, "step": 1890 }, { "entropy": 2.1699491038918497, "epoch": 1.52, "grad_norm": 0.0101318359375, "learning_rate": 1.8002222222222223e-05, "loss": 2.9505, "mean_token_accuracy": 0.4706884307786822, "num_tokens": 31444589.0, "step": 1900 }, { "epoch": 1.52, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 31444589.0, "eval_coding_runtime": 95.9376, "eval_coding_samples_per_second": 5.212, "eval_coding_steps_per_second": 2.606, "step": 1900 }, { "epoch": 1.52, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 31444589.0, "eval_physics_runtime": 63.8135, "eval_physics_samples_per_second": 7.835, "eval_physics_steps_per_second": 3.918, "step": 1900 }, { "entropy": 2.1199583016335963, "epoch": 1.528, "grad_norm": 0.007598876953125, "learning_rate": 1.798e-05, "loss": 2.8928, "mean_token_accuracy": 0.4802051903679967, "num_tokens": 31608952.0, "step": 1910 }, { "entropy": 2.175958313792944, "epoch": 1.536, "grad_norm": 0.0089111328125, "learning_rate": 1.7957777777777778e-05, "loss": 2.9365, "mean_token_accuracy": 0.4747030269354582, "num_tokens": 31774137.0, "step": 1920 }, { "entropy": 2.086700049042702, "epoch": 1.544, "grad_norm": 0.0113525390625, "learning_rate": 1.7935555555555556e-05, "loss": 2.8777, "mean_token_accuracy": 0.4859625767916441, "num_tokens": 31937927.0, "step": 1930 }, { "entropy": 2.1010241121053697, "epoch": 1.552, "grad_norm": 0.0068359375, "learning_rate": 1.7913333333333337e-05, "loss": 2.8916, "mean_token_accuracy": 0.4840227223932743, "num_tokens": 32101776.0, "step": 1940 }, { "entropy": 2.1249936796724795, "epoch": 1.56, "grad_norm": 0.01446533203125, "learning_rate": 1.789111111111111e-05, "loss": 2.8819, "mean_token_accuracy": 0.48242205642163755, "num_tokens": 32271213.0, "step": 1950 }, { "entropy": 2.040369462966919, "epoch": 1.568, "grad_norm": 0.0240478515625, "learning_rate": 1.7868888888888892e-05, "loss": 2.8209, "mean_token_accuracy": 0.48918427657335994, "num_tokens": 32428021.0, "step": 1960 }, { "entropy": 2.2181196875870226, "epoch": 1.576, "grad_norm": 0.0108642578125, "learning_rate": 1.7846666666666666e-05, "loss": 2.9794, "mean_token_accuracy": 0.4697700636461377, "num_tokens": 32585407.0, "step": 1970 }, { "entropy": 2.096325629204512, "epoch": 1.584, "grad_norm": 0.0196533203125, "learning_rate": 1.7824444444444447e-05, "loss": 2.8507, "mean_token_accuracy": 0.4877478454262018, "num_tokens": 32752735.0, "step": 1980 }, { "entropy": 2.1082000881433487, "epoch": 1.592, "grad_norm": 0.0079345703125, "learning_rate": 1.780222222222222e-05, "loss": 2.8789, "mean_token_accuracy": 0.482938090339303, "num_tokens": 32912519.0, "step": 1990 }, { "entropy": 2.139867788553238, "epoch": 1.6, "grad_norm": 0.01007080078125, "learning_rate": 1.7780000000000003e-05, "loss": 2.9136, "mean_token_accuracy": 0.4764457972720265, "num_tokens": 33075822.0, "step": 2000 }, { "epoch": 1.6, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 33075822.0, "eval_coding_runtime": 96.1948, "eval_coding_samples_per_second": 5.198, "eval_coding_steps_per_second": 2.599, "step": 2000 }, { "epoch": 1.6, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 33075822.0, "eval_physics_runtime": 63.6877, "eval_physics_samples_per_second": 7.851, "eval_physics_steps_per_second": 3.925, "step": 2000 }, { "entropy": 2.0446168780326843, "epoch": 1.608, "grad_norm": 0.00750732421875, "learning_rate": 1.7757777777777777e-05, "loss": 2.8161, "mean_token_accuracy": 0.49365365616977214, "num_tokens": 33250394.0, "step": 2010 }, { "entropy": 2.0819836497306823, "epoch": 1.616, "grad_norm": 0.023681640625, "learning_rate": 1.7735555555555558e-05, "loss": 2.8419, "mean_token_accuracy": 0.4879772139713168, "num_tokens": 33411850.0, "step": 2020 }, { "entropy": 2.041516558080912, "epoch": 1.624, "grad_norm": 0.01708984375, "learning_rate": 1.7713333333333335e-05, "loss": 2.7918, "mean_token_accuracy": 0.4918495800346136, "num_tokens": 33577978.0, "step": 2030 }, { "entropy": 2.1720810759812594, "epoch": 1.6320000000000001, "grad_norm": 0.016357421875, "learning_rate": 1.7691111111111113e-05, "loss": 2.93, "mean_token_accuracy": 0.47755391960963606, "num_tokens": 33739456.0, "step": 2040 }, { "entropy": 2.0735519513487817, "epoch": 1.6400000000000001, "grad_norm": 0.0087890625, "learning_rate": 1.766888888888889e-05, "loss": 2.8702, "mean_token_accuracy": 0.4865173580124974, "num_tokens": 33904537.0, "step": 2050 }, { "entropy": 2.049781569093466, "epoch": 1.6480000000000001, "grad_norm": 0.017822265625, "learning_rate": 1.7646666666666668e-05, "loss": 2.8312, "mean_token_accuracy": 0.4884044425562024, "num_tokens": 34072022.0, "step": 2060 }, { "entropy": 2.0407406061887743, "epoch": 1.6560000000000001, "grad_norm": 0.011962890625, "learning_rate": 1.7624444444444446e-05, "loss": 2.8257, "mean_token_accuracy": 0.48913145195692775, "num_tokens": 34236954.0, "step": 2070 }, { "entropy": 2.0800452411174772, "epoch": 1.6640000000000001, "grad_norm": 0.00634765625, "learning_rate": 1.7602222222222223e-05, "loss": 2.8431, "mean_token_accuracy": 0.4870519321411848, "num_tokens": 34398959.0, "step": 2080 }, { "entropy": 2.0950917959213258, "epoch": 1.6720000000000002, "grad_norm": 0.00811767578125, "learning_rate": 1.758e-05, "loss": 2.8336, "mean_token_accuracy": 0.4887974336743355, "num_tokens": 34564364.0, "step": 2090 }, { "entropy": 2.0823982164263724, "epoch": 1.6800000000000002, "grad_norm": 0.0123291015625, "learning_rate": 1.755777777777778e-05, "loss": 2.8866, "mean_token_accuracy": 0.4803567361086607, "num_tokens": 34732284.0, "step": 2100 }, { "epoch": 1.6800000000000002, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 34732284.0, "eval_coding_runtime": 96.096, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.602, "step": 2100 }, { "epoch": 1.6800000000000002, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 34732284.0, "eval_physics_runtime": 64.047, "eval_physics_samples_per_second": 7.807, "eval_physics_steps_per_second": 3.903, "step": 2100 }, { "entropy": 2.108163347095251, "epoch": 1.688, "grad_norm": 0.0255126953125, "learning_rate": 1.7535555555555556e-05, "loss": 2.8721, "mean_token_accuracy": 0.4795815791934729, "num_tokens": 34895374.0, "step": 2110 }, { "entropy": 2.094745473563671, "epoch": 1.696, "grad_norm": 0.01904296875, "learning_rate": 1.7513333333333334e-05, "loss": 2.8655, "mean_token_accuracy": 0.48117484580725434, "num_tokens": 35053712.0, "step": 2120 }, { "entropy": 2.1672711707651615, "epoch": 1.704, "grad_norm": 0.0238037109375, "learning_rate": 1.749111111111111e-05, "loss": 2.8962, "mean_token_accuracy": 0.47733605969697235, "num_tokens": 35216941.0, "step": 2130 }, { "entropy": 2.076349521800876, "epoch": 1.712, "grad_norm": 0.007171630859375, "learning_rate": 1.746888888888889e-05, "loss": 2.8409, "mean_token_accuracy": 0.48228554893285036, "num_tokens": 35393088.0, "step": 2140 }, { "entropy": 2.086632777005434, "epoch": 1.72, "grad_norm": 0.01416015625, "learning_rate": 1.7446666666666667e-05, "loss": 2.8509, "mean_token_accuracy": 0.48758579082787035, "num_tokens": 35559385.0, "step": 2150 }, { "entropy": 2.116019304841757, "epoch": 1.728, "grad_norm": 0.006744384765625, "learning_rate": 1.7424444444444444e-05, "loss": 2.8813, "mean_token_accuracy": 0.482978693023324, "num_tokens": 35729007.0, "step": 2160 }, { "entropy": 2.0990616902709007, "epoch": 1.736, "grad_norm": 0.0087890625, "learning_rate": 1.7402222222222222e-05, "loss": 2.8565, "mean_token_accuracy": 0.4873128758743405, "num_tokens": 35905199.0, "step": 2170 }, { "entropy": 2.0888281360268595, "epoch": 1.744, "grad_norm": 0.0086669921875, "learning_rate": 1.7380000000000003e-05, "loss": 2.8736, "mean_token_accuracy": 0.4849411312490702, "num_tokens": 36072489.0, "step": 2180 }, { "entropy": 2.1180737152695657, "epoch": 1.752, "grad_norm": 0.007720947265625, "learning_rate": 1.735777777777778e-05, "loss": 2.9049, "mean_token_accuracy": 0.4808773329481483, "num_tokens": 36240600.0, "step": 2190 }, { "entropy": 2.12109714448452, "epoch": 1.76, "grad_norm": 0.009765625, "learning_rate": 1.7335555555555558e-05, "loss": 2.8884, "mean_token_accuracy": 0.4813131859526038, "num_tokens": 36398285.0, "step": 2200 }, { "epoch": 1.76, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 36398285.0, "eval_coding_runtime": 96.2637, "eval_coding_samples_per_second": 5.194, "eval_coding_steps_per_second": 2.597, "step": 2200 }, { "epoch": 1.76, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 36398285.0, "eval_physics_runtime": 64.2412, "eval_physics_samples_per_second": 7.783, "eval_physics_steps_per_second": 3.892, "step": 2200 }, { "entropy": 2.155783172696829, "epoch": 1.768, "grad_norm": 0.00872802734375, "learning_rate": 1.7313333333333336e-05, "loss": 2.9112, "mean_token_accuracy": 0.4770881399512291, "num_tokens": 36559891.0, "step": 2210 }, { "entropy": 2.1637654595077036, "epoch": 1.776, "grad_norm": 0.015625, "learning_rate": 1.7291111111111113e-05, "loss": 2.9176, "mean_token_accuracy": 0.47640001866966486, "num_tokens": 36730479.0, "step": 2220 }, { "entropy": 2.0420110248029233, "epoch": 1.784, "grad_norm": 0.01531982421875, "learning_rate": 1.726888888888889e-05, "loss": 2.7908, "mean_token_accuracy": 0.49196922313421965, "num_tokens": 36897454.0, "step": 2230 }, { "entropy": 2.110601053386927, "epoch": 1.792, "grad_norm": 0.01129150390625, "learning_rate": 1.724666666666667e-05, "loss": 2.8796, "mean_token_accuracy": 0.4833569750189781, "num_tokens": 37057425.0, "step": 2240 }, { "entropy": 2.044808383285999, "epoch": 1.8, "grad_norm": 0.02099609375, "learning_rate": 1.7224444444444446e-05, "loss": 2.8188, "mean_token_accuracy": 0.489076080173254, "num_tokens": 37225440.0, "step": 2250 }, { "entropy": 2.0545787177979946, "epoch": 1.808, "grad_norm": 0.0186767578125, "learning_rate": 1.7202222222222224e-05, "loss": 2.7854, "mean_token_accuracy": 0.4944944025948644, "num_tokens": 37395449.0, "step": 2260 }, { "entropy": 2.108742152154446, "epoch": 1.8159999999999998, "grad_norm": 0.009033203125, "learning_rate": 1.718e-05, "loss": 2.8803, "mean_token_accuracy": 0.48428055848926305, "num_tokens": 37565463.0, "step": 2270 }, { "entropy": 2.066716094315052, "epoch": 1.8239999999999998, "grad_norm": 0.0076904296875, "learning_rate": 1.715777777777778e-05, "loss": 2.8542, "mean_token_accuracy": 0.487205614708364, "num_tokens": 37729535.0, "step": 2280 }, { "entropy": 2.115487913042307, "epoch": 1.8319999999999999, "grad_norm": 0.019775390625, "learning_rate": 1.7135555555555557e-05, "loss": 2.9002, "mean_token_accuracy": 0.48101999796926975, "num_tokens": 37888632.0, "step": 2290 }, { "entropy": 2.091165804117918, "epoch": 1.8399999999999999, "grad_norm": 0.00921630859375, "learning_rate": 1.7113333333333334e-05, "loss": 2.8162, "mean_token_accuracy": 0.4885393580421805, "num_tokens": 38057839.0, "step": 2300 }, { "epoch": 1.8399999999999999, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 38057839.0, "eval_coding_runtime": 96.1927, "eval_coding_samples_per_second": 5.198, "eval_coding_steps_per_second": 2.599, "step": 2300 }, { "epoch": 1.8399999999999999, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 38057839.0, "eval_physics_runtime": 63.966, "eval_physics_samples_per_second": 7.817, "eval_physics_steps_per_second": 3.908, "step": 2300 }, { "entropy": 2.0943633928894996, "epoch": 1.8479999999999999, "grad_norm": 0.012451171875, "learning_rate": 1.7091111111111112e-05, "loss": 2.8744, "mean_token_accuracy": 0.4854456914588809, "num_tokens": 38228321.0, "step": 2310 }, { "entropy": 2.1238606452941893, "epoch": 1.8559999999999999, "grad_norm": 0.01806640625, "learning_rate": 1.706888888888889e-05, "loss": 2.8687, "mean_token_accuracy": 0.4809235129505396, "num_tokens": 38398033.0, "step": 2320 }, { "entropy": 2.118824549764395, "epoch": 1.8639999999999999, "grad_norm": 0.007720947265625, "learning_rate": 1.704666666666667e-05, "loss": 2.8771, "mean_token_accuracy": 0.4786581486463547, "num_tokens": 38568933.0, "step": 2330 }, { "entropy": 2.1477391608059406, "epoch": 1.8719999999999999, "grad_norm": 0.0157470703125, "learning_rate": 1.7024444444444445e-05, "loss": 2.9297, "mean_token_accuracy": 0.4786318261176348, "num_tokens": 38729203.0, "step": 2340 }, { "entropy": 2.1359975136816503, "epoch": 1.88, "grad_norm": 0.00677490234375, "learning_rate": 1.7002222222222226e-05, "loss": 2.9197, "mean_token_accuracy": 0.47710791788995266, "num_tokens": 38895784.0, "step": 2350 }, { "entropy": 2.1358583688735964, "epoch": 1.888, "grad_norm": 0.018310546875, "learning_rate": 1.698e-05, "loss": 2.9045, "mean_token_accuracy": 0.47970328964293, "num_tokens": 39058791.0, "step": 2360 }, { "entropy": 2.1071565486490726, "epoch": 1.896, "grad_norm": 0.008056640625, "learning_rate": 1.695777777777778e-05, "loss": 2.8754, "mean_token_accuracy": 0.4833732729777694, "num_tokens": 39224201.0, "step": 2370 }, { "entropy": 2.1035716101527213, "epoch": 1.904, "grad_norm": 0.0091552734375, "learning_rate": 1.6935555555555555e-05, "loss": 2.8949, "mean_token_accuracy": 0.47980596460402014, "num_tokens": 39386170.0, "step": 2380 }, { "entropy": 2.1066678419709204, "epoch": 1.912, "grad_norm": 0.0213623046875, "learning_rate": 1.6913333333333336e-05, "loss": 2.8792, "mean_token_accuracy": 0.4800190070644021, "num_tokens": 39545061.0, "step": 2390 }, { "entropy": 2.1327901519834995, "epoch": 1.92, "grad_norm": 0.01446533203125, "learning_rate": 1.689111111111111e-05, "loss": 2.8996, "mean_token_accuracy": 0.4832611232995987, "num_tokens": 39710381.0, "step": 2400 }, { "epoch": 1.92, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 39710381.0, "eval_coding_runtime": 95.9878, "eval_coding_samples_per_second": 5.209, "eval_coding_steps_per_second": 2.604, "step": 2400 }, { "epoch": 1.92, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 39710381.0, "eval_physics_runtime": 63.8325, "eval_physics_samples_per_second": 7.833, "eval_physics_steps_per_second": 3.916, "step": 2400 }, { "entropy": 2.1592532977461816, "epoch": 1.928, "grad_norm": 0.00714111328125, "learning_rate": 1.686888888888889e-05, "loss": 2.9007, "mean_token_accuracy": 0.48193860817700623, "num_tokens": 39878408.0, "step": 2410 }, { "entropy": 2.0319110229611397, "epoch": 1.936, "grad_norm": 0.0159912109375, "learning_rate": 1.684666666666667e-05, "loss": 2.8144, "mean_token_accuracy": 0.49194381963461636, "num_tokens": 40043354.0, "step": 2420 }, { "entropy": 2.0290094792842863, "epoch": 1.944, "grad_norm": 0.0108642578125, "learning_rate": 1.6824444444444447e-05, "loss": 2.8173, "mean_token_accuracy": 0.490530570037663, "num_tokens": 40206607.0, "step": 2430 }, { "entropy": 2.001203689724207, "epoch": 1.952, "grad_norm": 0.0157470703125, "learning_rate": 1.6802222222222224e-05, "loss": 2.775, "mean_token_accuracy": 0.4963298514485359, "num_tokens": 40375790.0, "step": 2440 }, { "entropy": 2.078115231543779, "epoch": 1.96, "grad_norm": 0.01220703125, "learning_rate": 1.6780000000000002e-05, "loss": 2.8797, "mean_token_accuracy": 0.4827138794586062, "num_tokens": 40539856.0, "step": 2450 }, { "entropy": 2.1310867570340633, "epoch": 1.968, "grad_norm": 0.01446533203125, "learning_rate": 1.675777777777778e-05, "loss": 2.896, "mean_token_accuracy": 0.4785461273044348, "num_tokens": 40705451.0, "step": 2460 }, { "entropy": 2.1071126513183116, "epoch": 1.976, "grad_norm": 0.0184326171875, "learning_rate": 1.6735555555555557e-05, "loss": 2.8625, "mean_token_accuracy": 0.4837299410253763, "num_tokens": 40873446.0, "step": 2470 }, { "entropy": 2.1266873709857466, "epoch": 1.984, "grad_norm": 0.01470947265625, "learning_rate": 1.6713333333333335e-05, "loss": 2.9035, "mean_token_accuracy": 0.4816863130778074, "num_tokens": 41040313.0, "step": 2480 }, { "entropy": 2.0827384896576406, "epoch": 1.992, "grad_norm": 0.00640869140625, "learning_rate": 1.6691111111111112e-05, "loss": 2.8539, "mean_token_accuracy": 0.4890477364882827, "num_tokens": 41205940.0, "step": 2490 }, { "entropy": 2.1126816742122174, "epoch": 2.0, "grad_norm": 0.01556396484375, "learning_rate": 1.666888888888889e-05, "loss": 2.8749, "mean_token_accuracy": 0.48269832525402306, "num_tokens": 41373608.0, "step": 2500 }, { "epoch": 2.0, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 41373608.0, "eval_coding_runtime": 96.1355, "eval_coding_samples_per_second": 5.201, "eval_coding_steps_per_second": 2.6, "step": 2500 }, { "epoch": 2.0, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 41373608.0, "eval_physics_runtime": 63.6152, "eval_physics_samples_per_second": 7.86, "eval_physics_steps_per_second": 3.93, "step": 2500 }, { "entropy": 2.109695591032505, "epoch": 2.008, "grad_norm": 0.015625, "learning_rate": 1.6646666666666668e-05, "loss": 2.896, "mean_token_accuracy": 0.479962002299726, "num_tokens": 41541054.0, "step": 2510 }, { "entropy": 2.0923699125647546, "epoch": 2.016, "grad_norm": 0.00848388671875, "learning_rate": 1.6624444444444445e-05, "loss": 2.8569, "mean_token_accuracy": 0.48771672397851945, "num_tokens": 41708226.0, "step": 2520 }, { "entropy": 2.055105470865965, "epoch": 2.024, "grad_norm": 0.00982666015625, "learning_rate": 1.6602222222222223e-05, "loss": 2.8172, "mean_token_accuracy": 0.492337409965694, "num_tokens": 41874459.0, "step": 2530 }, { "entropy": 2.0631272219121457, "epoch": 2.032, "grad_norm": 0.009765625, "learning_rate": 1.658e-05, "loss": 2.8367, "mean_token_accuracy": 0.489038916118443, "num_tokens": 42045433.0, "step": 2540 }, { "entropy": 2.1083615384995937, "epoch": 2.04, "grad_norm": 0.01141357421875, "learning_rate": 1.6557777777777778e-05, "loss": 2.8541, "mean_token_accuracy": 0.4863636655732989, "num_tokens": 42208657.0, "step": 2550 }, { "entropy": 2.1512265764176846, "epoch": 2.048, "grad_norm": 0.010986328125, "learning_rate": 1.6535555555555556e-05, "loss": 2.9351, "mean_token_accuracy": 0.4792114529758692, "num_tokens": 42372694.0, "step": 2560 }, { "entropy": 2.0822809383273126, "epoch": 2.056, "grad_norm": 0.01007080078125, "learning_rate": 1.6513333333333333e-05, "loss": 2.8577, "mean_token_accuracy": 0.4852941369637847, "num_tokens": 42541126.0, "step": 2570 }, { "entropy": 2.078305044025183, "epoch": 2.064, "grad_norm": 0.01422119140625, "learning_rate": 1.6491111111111114e-05, "loss": 2.8323, "mean_token_accuracy": 0.48723302837461235, "num_tokens": 42710240.0, "step": 2580 }, { "entropy": 2.096334061771631, "epoch": 2.072, "grad_norm": 0.009765625, "learning_rate": 1.646888888888889e-05, "loss": 2.8596, "mean_token_accuracy": 0.48555029947310685, "num_tokens": 42874781.0, "step": 2590 }, { "entropy": 2.1294494703412057, "epoch": 2.08, "grad_norm": 0.0235595703125, "learning_rate": 1.644666666666667e-05, "loss": 2.918, "mean_token_accuracy": 0.4746216956526041, "num_tokens": 43037844.0, "step": 2600 }, { "epoch": 2.08, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 43037844.0, "eval_coding_runtime": 96.2051, "eval_coding_samples_per_second": 5.197, "eval_coding_steps_per_second": 2.599, "step": 2600 }, { "epoch": 2.08, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 43037844.0, "eval_physics_runtime": 64.2843, "eval_physics_samples_per_second": 7.778, "eval_physics_steps_per_second": 3.889, "step": 2600 }, { "entropy": 2.1377002507448197, "epoch": 2.088, "grad_norm": 0.0062255859375, "learning_rate": 1.6424444444444444e-05, "loss": 2.9047, "mean_token_accuracy": 0.47883504033088686, "num_tokens": 43202298.0, "step": 2610 }, { "entropy": 2.049978656321764, "epoch": 2.096, "grad_norm": 0.007171630859375, "learning_rate": 1.6402222222222225e-05, "loss": 2.8012, "mean_token_accuracy": 0.4908132331445813, "num_tokens": 43368675.0, "step": 2620 }, { "entropy": 2.0891571439802648, "epoch": 2.104, "grad_norm": 0.0211181640625, "learning_rate": 1.638e-05, "loss": 2.858, "mean_token_accuracy": 0.4863886846229434, "num_tokens": 43528580.0, "step": 2630 }, { "entropy": 2.084837993234396, "epoch": 2.112, "grad_norm": 0.00885009765625, "learning_rate": 1.635777777777778e-05, "loss": 2.8258, "mean_token_accuracy": 0.49133954383432865, "num_tokens": 43699544.0, "step": 2640 }, { "entropy": 2.1012887261807918, "epoch": 2.12, "grad_norm": 0.0191650390625, "learning_rate": 1.6335555555555558e-05, "loss": 2.8533, "mean_token_accuracy": 0.4835510091856122, "num_tokens": 43858502.0, "step": 2650 }, { "entropy": 2.126673661917448, "epoch": 2.128, "grad_norm": 0.00885009765625, "learning_rate": 1.6313333333333335e-05, "loss": 2.9162, "mean_token_accuracy": 0.48049711883068086, "num_tokens": 44012632.0, "step": 2660 }, { "entropy": 2.110631703585386, "epoch": 2.136, "grad_norm": 0.0084228515625, "learning_rate": 1.6291111111111113e-05, "loss": 2.8524, "mean_token_accuracy": 0.48574508912861347, "num_tokens": 44179301.0, "step": 2670 }, { "entropy": 2.0693719699978828, "epoch": 2.144, "grad_norm": 0.00933837890625, "learning_rate": 1.626888888888889e-05, "loss": 2.8489, "mean_token_accuracy": 0.486761187389493, "num_tokens": 44349732.0, "step": 2680 }, { "entropy": 2.1314582899212837, "epoch": 2.152, "grad_norm": 0.0064697265625, "learning_rate": 1.6246666666666668e-05, "loss": 2.8933, "mean_token_accuracy": 0.47988808769732716, "num_tokens": 44514865.0, "step": 2690 }, { "entropy": 2.146127165853977, "epoch": 2.16, "grad_norm": 0.0191650390625, "learning_rate": 1.6224444444444446e-05, "loss": 2.9227, "mean_token_accuracy": 0.47669907119125127, "num_tokens": 44680410.0, "step": 2700 }, { "epoch": 2.16, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 44680410.0, "eval_coding_runtime": 96.1226, "eval_coding_samples_per_second": 5.202, "eval_coding_steps_per_second": 2.601, "step": 2700 }, { "epoch": 2.16, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 44680410.0, "eval_physics_runtime": 63.8308, "eval_physics_samples_per_second": 7.833, "eval_physics_steps_per_second": 3.917, "step": 2700 }, { "entropy": 2.1261583000421522, "epoch": 2.168, "grad_norm": 0.009033203125, "learning_rate": 1.6202222222222223e-05, "loss": 2.8919, "mean_token_accuracy": 0.4785753648728132, "num_tokens": 44846515.0, "step": 2710 }, { "entropy": 2.0676582753658295, "epoch": 2.176, "grad_norm": 0.00927734375, "learning_rate": 1.618e-05, "loss": 2.8569, "mean_token_accuracy": 0.4861806998029351, "num_tokens": 45017922.0, "step": 2720 }, { "entropy": 2.1044729210436346, "epoch": 2.184, "grad_norm": 0.007293701171875, "learning_rate": 1.615777777777778e-05, "loss": 2.8779, "mean_token_accuracy": 0.48611082304269077, "num_tokens": 45181823.0, "step": 2730 }, { "entropy": 2.0987057738006114, "epoch": 2.192, "grad_norm": 0.014892578125, "learning_rate": 1.6135555555555556e-05, "loss": 2.8557, "mean_token_accuracy": 0.4868852591142058, "num_tokens": 45348179.0, "step": 2740 }, { "entropy": 2.093054236471653, "epoch": 2.2, "grad_norm": 0.008056640625, "learning_rate": 1.6113333333333334e-05, "loss": 2.8597, "mean_token_accuracy": 0.48510244842618705, "num_tokens": 45514208.0, "step": 2750 }, { "entropy": 2.070825570821762, "epoch": 2.208, "grad_norm": 0.0074462890625, "learning_rate": 1.609111111111111e-05, "loss": 2.8633, "mean_token_accuracy": 0.486143646389246, "num_tokens": 45678397.0, "step": 2760 }, { "entropy": 2.0762874506413938, "epoch": 2.216, "grad_norm": 0.01348876953125, "learning_rate": 1.606888888888889e-05, "loss": 2.8764, "mean_token_accuracy": 0.4872116198763251, "num_tokens": 45844632.0, "step": 2770 }, { "entropy": 2.0525411412119867, "epoch": 2.224, "grad_norm": 0.00848388671875, "learning_rate": 1.6046666666666667e-05, "loss": 2.8189, "mean_token_accuracy": 0.4916418811306357, "num_tokens": 46010052.0, "step": 2780 }, { "entropy": 2.0773925125598907, "epoch": 2.232, "grad_norm": 0.00738525390625, "learning_rate": 1.6024444444444444e-05, "loss": 2.8449, "mean_token_accuracy": 0.4843680987134576, "num_tokens": 46176730.0, "step": 2790 }, { "entropy": 2.1263207592070104, "epoch": 2.24, "grad_norm": 0.00982666015625, "learning_rate": 1.6002222222222222e-05, "loss": 2.918, "mean_token_accuracy": 0.476583500020206, "num_tokens": 46347116.0, "step": 2800 }, { "epoch": 2.24, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 46347116.0, "eval_coding_runtime": 95.9232, "eval_coding_samples_per_second": 5.213, "eval_coding_steps_per_second": 2.606, "step": 2800 }, { "epoch": 2.24, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 46347116.0, "eval_physics_runtime": 63.9756, "eval_physics_samples_per_second": 7.815, "eval_physics_steps_per_second": 3.908, "step": 2800 }, { "entropy": 2.1278377093374727, "epoch": 2.248, "grad_norm": 0.01080322265625, "learning_rate": 1.5980000000000003e-05, "loss": 2.9003, "mean_token_accuracy": 0.4784318573772907, "num_tokens": 46513696.0, "step": 2810 }, { "entropy": 2.128158251941204, "epoch": 2.2560000000000002, "grad_norm": 0.01312255859375, "learning_rate": 1.5957777777777777e-05, "loss": 2.8978, "mean_token_accuracy": 0.48350818566977977, "num_tokens": 46673074.0, "step": 2820 }, { "entropy": 2.07526678070426, "epoch": 2.2640000000000002, "grad_norm": 0.0081787109375, "learning_rate": 1.5935555555555558e-05, "loss": 2.8253, "mean_token_accuracy": 0.48843313101679087, "num_tokens": 46841474.0, "step": 2830 }, { "entropy": 2.098488190770149, "epoch": 2.2720000000000002, "grad_norm": 0.011962890625, "learning_rate": 1.5913333333333332e-05, "loss": 2.8562, "mean_token_accuracy": 0.4841760952025652, "num_tokens": 47009386.0, "step": 2840 }, { "entropy": 2.1084106832742693, "epoch": 2.2800000000000002, "grad_norm": 0.0140380859375, "learning_rate": 1.5891111111111113e-05, "loss": 2.8686, "mean_token_accuracy": 0.48397026900202034, "num_tokens": 47175754.0, "step": 2850 }, { "entropy": 2.165386701375246, "epoch": 2.288, "grad_norm": 0.0179443359375, "learning_rate": 1.5868888888888888e-05, "loss": 2.9623, "mean_token_accuracy": 0.47135423719882963, "num_tokens": 47344231.0, "step": 2860 }, { "entropy": 2.062075611203909, "epoch": 2.296, "grad_norm": 0.015869140625, "learning_rate": 1.584666666666667e-05, "loss": 2.8116, "mean_token_accuracy": 0.49171398133039473, "num_tokens": 47507052.0, "step": 2870 }, { "entropy": 2.126021772623062, "epoch": 2.304, "grad_norm": 0.006072998046875, "learning_rate": 1.5824444444444446e-05, "loss": 2.8995, "mean_token_accuracy": 0.482550716586411, "num_tokens": 47658392.0, "step": 2880 }, { "entropy": 2.0740631029009817, "epoch": 2.312, "grad_norm": 0.01104736328125, "learning_rate": 1.5802222222222224e-05, "loss": 2.8559, "mean_token_accuracy": 0.4810700535774231, "num_tokens": 47827802.0, "step": 2890 }, { "entropy": 2.1145732060074804, "epoch": 2.32, "grad_norm": 0.01116943359375, "learning_rate": 1.578e-05, "loss": 2.8727, "mean_token_accuracy": 0.4866561494767666, "num_tokens": 47995015.0, "step": 2900 }, { "epoch": 2.32, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 47995015.0, "eval_coding_runtime": 95.7936, "eval_coding_samples_per_second": 5.22, "eval_coding_steps_per_second": 2.61, "step": 2900 }, { "epoch": 2.32, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 47995015.0, "eval_physics_runtime": 64.2171, "eval_physics_samples_per_second": 7.786, "eval_physics_steps_per_second": 3.893, "step": 2900 }, { "entropy": 2.0761554703116416, "epoch": 2.328, "grad_norm": 0.01141357421875, "learning_rate": 1.575777777777778e-05, "loss": 2.8017, "mean_token_accuracy": 0.4926915055140853, "num_tokens": 48163933.0, "step": 2910 }, { "entropy": 2.0599563673138617, "epoch": 2.336, "grad_norm": 0.0076904296875, "learning_rate": 1.5735555555555557e-05, "loss": 2.852, "mean_token_accuracy": 0.4864024940878153, "num_tokens": 48336708.0, "step": 2920 }, { "entropy": 2.0758877620100975, "epoch": 2.344, "grad_norm": 0.00958251953125, "learning_rate": 1.5713333333333334e-05, "loss": 2.8046, "mean_token_accuracy": 0.4939649226143956, "num_tokens": 48501684.0, "step": 2930 }, { "entropy": 2.1009161733090878, "epoch": 2.352, "grad_norm": 0.01092529296875, "learning_rate": 1.5691111111111112e-05, "loss": 2.8784, "mean_token_accuracy": 0.48454554490745066, "num_tokens": 48665311.0, "step": 2940 }, { "entropy": 2.0380428835749624, "epoch": 2.36, "grad_norm": 0.01251220703125, "learning_rate": 1.5668888888888893e-05, "loss": 2.8256, "mean_token_accuracy": 0.49210381004959347, "num_tokens": 48842066.0, "step": 2950 }, { "entropy": 2.174724444001913, "epoch": 2.368, "grad_norm": 0.010498046875, "learning_rate": 1.5646666666666667e-05, "loss": 2.9176, "mean_token_accuracy": 0.476856386102736, "num_tokens": 49005334.0, "step": 2960 }, { "entropy": 2.1461206674575806, "epoch": 2.376, "grad_norm": 0.00750732421875, "learning_rate": 1.5624444444444448e-05, "loss": 2.9246, "mean_token_accuracy": 0.48218597918748857, "num_tokens": 49166287.0, "step": 2970 }, { "entropy": 2.1835753597319125, "epoch": 2.384, "grad_norm": 0.00634765625, "learning_rate": 1.5602222222222222e-05, "loss": 2.945, "mean_token_accuracy": 0.47245176918804643, "num_tokens": 49331936.0, "step": 2980 }, { "entropy": 2.1323873437941074, "epoch": 2.392, "grad_norm": 0.01556396484375, "learning_rate": 1.5580000000000003e-05, "loss": 2.9147, "mean_token_accuracy": 0.4816423388198018, "num_tokens": 49500119.0, "step": 2990 }, { "entropy": 2.0695614576339723, "epoch": 2.4, "grad_norm": 0.0078125, "learning_rate": 1.5557777777777778e-05, "loss": 2.8376, "mean_token_accuracy": 0.48881256151944397, "num_tokens": 49671048.0, "step": 3000 }, { "epoch": 2.4, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 49671048.0, "eval_coding_runtime": 95.7733, "eval_coding_samples_per_second": 5.221, "eval_coding_steps_per_second": 2.61, "step": 3000 }, { "epoch": 2.4, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 49671048.0, "eval_physics_runtime": 63.5578, "eval_physics_samples_per_second": 7.867, "eval_physics_steps_per_second": 3.933, "step": 3000 }, { "entropy": 2.048779007047415, "epoch": 2.408, "grad_norm": 0.0101318359375, "learning_rate": 1.553555555555556e-05, "loss": 2.8455, "mean_token_accuracy": 0.4872598424553871, "num_tokens": 49845815.0, "step": 3010 }, { "entropy": 2.053139239549637, "epoch": 2.416, "grad_norm": 0.0084228515625, "learning_rate": 1.5513333333333333e-05, "loss": 2.8561, "mean_token_accuracy": 0.4852673104032874, "num_tokens": 50018182.0, "step": 3020 }, { "entropy": 2.070374865829945, "epoch": 2.424, "grad_norm": 0.018798828125, "learning_rate": 1.5491111111111114e-05, "loss": 2.8782, "mean_token_accuracy": 0.4815656444057822, "num_tokens": 50176877.0, "step": 3030 }, { "entropy": 2.0859608858823777, "epoch": 2.432, "grad_norm": 0.0137939453125, "learning_rate": 1.546888888888889e-05, "loss": 2.8833, "mean_token_accuracy": 0.4850707145407796, "num_tokens": 50341187.0, "step": 3040 }, { "entropy": 2.13402646407485, "epoch": 2.44, "grad_norm": 0.006805419921875, "learning_rate": 1.544666666666667e-05, "loss": 2.8688, "mean_token_accuracy": 0.48171408735215665, "num_tokens": 50505772.0, "step": 3050 }, { "entropy": 2.146274097263813, "epoch": 2.448, "grad_norm": 0.00994873046875, "learning_rate": 1.5424444444444447e-05, "loss": 2.8915, "mean_token_accuracy": 0.48076413553208114, "num_tokens": 50667019.0, "step": 3060 }, { "entropy": 2.117669067531824, "epoch": 2.456, "grad_norm": 0.025634765625, "learning_rate": 1.5402222222222224e-05, "loss": 2.9298, "mean_token_accuracy": 0.4758553016930819, "num_tokens": 50829652.0, "step": 3070 }, { "entropy": 2.121635789424181, "epoch": 2.464, "grad_norm": 0.00750732421875, "learning_rate": 1.5380000000000002e-05, "loss": 2.8609, "mean_token_accuracy": 0.48166232127696273, "num_tokens": 50991113.0, "step": 3080 }, { "entropy": 2.1312516391277314, "epoch": 2.472, "grad_norm": 0.00799560546875, "learning_rate": 1.535777777777778e-05, "loss": 2.8987, "mean_token_accuracy": 0.4824211601167917, "num_tokens": 51159212.0, "step": 3090 }, { "entropy": 2.0914432048797607, "epoch": 2.48, "grad_norm": 0.00714111328125, "learning_rate": 1.5335555555555557e-05, "loss": 2.8976, "mean_token_accuracy": 0.47798026613891126, "num_tokens": 51319624.0, "step": 3100 }, { "epoch": 2.48, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 51319624.0, "eval_coding_runtime": 95.9767, "eval_coding_samples_per_second": 5.21, "eval_coding_steps_per_second": 2.605, "step": 3100 }, { "epoch": 2.48, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 51319624.0, "eval_physics_runtime": 63.679, "eval_physics_samples_per_second": 7.852, "eval_physics_steps_per_second": 3.926, "step": 3100 }, { "entropy": 2.136239843815565, "epoch": 2.488, "grad_norm": 0.007080078125, "learning_rate": 1.5313333333333335e-05, "loss": 2.894, "mean_token_accuracy": 0.48326832950115206, "num_tokens": 51488050.0, "step": 3110 }, { "entropy": 2.0875415176153185, "epoch": 2.496, "grad_norm": 0.01300048828125, "learning_rate": 1.5291111111111112e-05, "loss": 2.8606, "mean_token_accuracy": 0.4845199853181839, "num_tokens": 51654633.0, "step": 3120 }, { "entropy": 2.1257060185074805, "epoch": 2.504, "grad_norm": 0.0172119140625, "learning_rate": 1.526888888888889e-05, "loss": 2.8722, "mean_token_accuracy": 0.4818729363381863, "num_tokens": 51818044.0, "step": 3130 }, { "entropy": 2.0669797226786613, "epoch": 2.512, "grad_norm": 0.015625, "learning_rate": 1.5246666666666668e-05, "loss": 2.8189, "mean_token_accuracy": 0.48912298064678905, "num_tokens": 51980051.0, "step": 3140 }, { "entropy": 2.0928348623216153, "epoch": 2.52, "grad_norm": 0.01019287109375, "learning_rate": 1.5224444444444447e-05, "loss": 2.8648, "mean_token_accuracy": 0.4851804681122303, "num_tokens": 52146049.0, "step": 3150 }, { "entropy": 2.0820957243442537, "epoch": 2.528, "grad_norm": 0.00726318359375, "learning_rate": 1.5202222222222223e-05, "loss": 2.8607, "mean_token_accuracy": 0.48901640996336937, "num_tokens": 52316109.0, "step": 3160 }, { "entropy": 2.0938249841332435, "epoch": 2.536, "grad_norm": 0.0120849609375, "learning_rate": 1.5180000000000002e-05, "loss": 2.8368, "mean_token_accuracy": 0.4879815449938178, "num_tokens": 52475120.0, "step": 3170 }, { "entropy": 2.153928484022617, "epoch": 2.544, "grad_norm": 0.0145263671875, "learning_rate": 1.5157777777777778e-05, "loss": 2.9383, "mean_token_accuracy": 0.4752116585150361, "num_tokens": 52634528.0, "step": 3180 }, { "entropy": 2.090732706338167, "epoch": 2.552, "grad_norm": 0.00823974609375, "learning_rate": 1.5135555555555557e-05, "loss": 2.9029, "mean_token_accuracy": 0.48127470947802065, "num_tokens": 52797762.0, "step": 3190 }, { "entropy": 2.102490375936031, "epoch": 2.56, "grad_norm": 0.007537841796875, "learning_rate": 1.5113333333333335e-05, "loss": 2.8376, "mean_token_accuracy": 0.4870879840105772, "num_tokens": 52964035.0, "step": 3200 }, { "epoch": 2.56, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 52964035.0, "eval_coding_runtime": 96.1669, "eval_coding_samples_per_second": 5.199, "eval_coding_steps_per_second": 2.6, "step": 3200 }, { "epoch": 2.56, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 52964035.0, "eval_physics_runtime": 63.882, "eval_physics_samples_per_second": 7.827, "eval_physics_steps_per_second": 3.913, "step": 3200 }, { "entropy": 2.1070725575089453, "epoch": 2.568, "grad_norm": 0.01275634765625, "learning_rate": 1.5091111111111113e-05, "loss": 2.8546, "mean_token_accuracy": 0.4875574728474021, "num_tokens": 53127106.0, "step": 3210 }, { "entropy": 2.15907968506217, "epoch": 2.576, "grad_norm": 0.0157470703125, "learning_rate": 1.506888888888889e-05, "loss": 2.9158, "mean_token_accuracy": 0.477414826862514, "num_tokens": 53293345.0, "step": 3220 }, { "entropy": 2.132044891268015, "epoch": 2.584, "grad_norm": 0.01544189453125, "learning_rate": 1.5046666666666668e-05, "loss": 2.8982, "mean_token_accuracy": 0.4789698511362076, "num_tokens": 53454336.0, "step": 3230 }, { "entropy": 2.0777774840593337, "epoch": 2.592, "grad_norm": 0.02099609375, "learning_rate": 1.5024444444444445e-05, "loss": 2.8415, "mean_token_accuracy": 0.48330534659326074, "num_tokens": 53620896.0, "step": 3240 }, { "entropy": 2.0706359148025513, "epoch": 2.6, "grad_norm": 0.01519775390625, "learning_rate": 1.5002222222222223e-05, "loss": 2.84, "mean_token_accuracy": 0.4818432040512562, "num_tokens": 53784544.0, "step": 3250 }, { "entropy": 2.0699372351169587, "epoch": 2.608, "grad_norm": 0.00897216796875, "learning_rate": 1.498e-05, "loss": 2.8468, "mean_token_accuracy": 0.48775772508233783, "num_tokens": 53950989.0, "step": 3260 }, { "entropy": 2.0821156427264214, "epoch": 2.616, "grad_norm": 0.0150146484375, "learning_rate": 1.495777777777778e-05, "loss": 2.8652, "mean_token_accuracy": 0.48366980664432047, "num_tokens": 54116097.0, "step": 3270 }, { "entropy": 2.1099216148257254, "epoch": 2.624, "grad_norm": 0.00885009765625, "learning_rate": 1.4935555555555556e-05, "loss": 2.8907, "mean_token_accuracy": 0.4797281926497817, "num_tokens": 54282732.0, "step": 3280 }, { "entropy": 2.1004807710647584, "epoch": 2.632, "grad_norm": 0.01953125, "learning_rate": 1.4913333333333335e-05, "loss": 2.8485, "mean_token_accuracy": 0.48381266705691817, "num_tokens": 54442780.0, "step": 3290 }, { "entropy": 2.1202649302780627, "epoch": 2.64, "grad_norm": 0.008056640625, "learning_rate": 1.4891111111111111e-05, "loss": 2.8615, "mean_token_accuracy": 0.48509005140513184, "num_tokens": 54609748.0, "step": 3300 }, { "epoch": 2.64, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 54609748.0, "eval_coding_runtime": 96.1735, "eval_coding_samples_per_second": 5.199, "eval_coding_steps_per_second": 2.599, "step": 3300 }, { "epoch": 2.64, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 54609748.0, "eval_physics_runtime": 63.9233, "eval_physics_samples_per_second": 7.822, "eval_physics_steps_per_second": 3.911, "step": 3300 }, { "entropy": 2.122569767385721, "epoch": 2.648, "grad_norm": 0.0079345703125, "learning_rate": 1.486888888888889e-05, "loss": 2.8805, "mean_token_accuracy": 0.4804168799892068, "num_tokens": 54783193.0, "step": 3310 }, { "entropy": 2.163311902433634, "epoch": 2.656, "grad_norm": 0.009521484375, "learning_rate": 1.4846666666666666e-05, "loss": 2.9588, "mean_token_accuracy": 0.4717843746766448, "num_tokens": 54953583.0, "step": 3320 }, { "entropy": 2.131222031265497, "epoch": 2.664, "grad_norm": 0.0184326171875, "learning_rate": 1.4824444444444446e-05, "loss": 2.9142, "mean_token_accuracy": 0.47831583991646764, "num_tokens": 55117144.0, "step": 3330 }, { "entropy": 2.118143031746149, "epoch": 2.672, "grad_norm": 0.00970458984375, "learning_rate": 1.4802222222222225e-05, "loss": 2.9115, "mean_token_accuracy": 0.47941809725016354, "num_tokens": 55281945.0, "step": 3340 }, { "entropy": 2.0971298292279243, "epoch": 2.68, "grad_norm": 0.01483154296875, "learning_rate": 1.478e-05, "loss": 2.8689, "mean_token_accuracy": 0.48324434272944927, "num_tokens": 55448136.0, "step": 3350 }, { "entropy": 2.063984639942646, "epoch": 2.6879999999999997, "grad_norm": 0.009765625, "learning_rate": 1.475777777777778e-05, "loss": 2.8728, "mean_token_accuracy": 0.48411469515413047, "num_tokens": 55612391.0, "step": 3360 }, { "entropy": 2.0723772957921027, "epoch": 2.6959999999999997, "grad_norm": 0.0113525390625, "learning_rate": 1.4735555555555556e-05, "loss": 2.8513, "mean_token_accuracy": 0.4875372413545847, "num_tokens": 55785705.0, "step": 3370 }, { "entropy": 2.0496960267424584, "epoch": 2.7039999999999997, "grad_norm": 0.01397705078125, "learning_rate": 1.4713333333333335e-05, "loss": 2.8275, "mean_token_accuracy": 0.4882535219192505, "num_tokens": 55949791.0, "step": 3380 }, { "entropy": 2.086968547850847, "epoch": 2.7119999999999997, "grad_norm": 0.00787353515625, "learning_rate": 1.4691111111111111e-05, "loss": 2.8755, "mean_token_accuracy": 0.4829007627442479, "num_tokens": 56116561.0, "step": 3390 }, { "entropy": 2.1156416177749633, "epoch": 2.7199999999999998, "grad_norm": 0.00860595703125, "learning_rate": 1.466888888888889e-05, "loss": 2.8348, "mean_token_accuracy": 0.4874603316187859, "num_tokens": 56282204.0, "step": 3400 }, { "epoch": 2.7199999999999998, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 56282204.0, "eval_coding_runtime": 96.09, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.602, "step": 3400 }, { "epoch": 2.7199999999999998, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 56282204.0, "eval_physics_runtime": 63.9083, "eval_physics_samples_per_second": 7.824, "eval_physics_steps_per_second": 3.912, "step": 3400 }, { "entropy": 2.115247245877981, "epoch": 2.7279999999999998, "grad_norm": 0.0164794921875, "learning_rate": 1.4646666666666666e-05, "loss": 2.9011, "mean_token_accuracy": 0.48062342032790184, "num_tokens": 56441829.0, "step": 3410 }, { "entropy": 2.1556713484227656, "epoch": 2.7359999999999998, "grad_norm": 0.00811767578125, "learning_rate": 1.4624444444444446e-05, "loss": 2.9192, "mean_token_accuracy": 0.4784844558686018, "num_tokens": 56605478.0, "step": 3420 }, { "entropy": 2.0934521816670895, "epoch": 2.7439999999999998, "grad_norm": 0.020751953125, "learning_rate": 1.4602222222222225e-05, "loss": 2.8716, "mean_token_accuracy": 0.48414743393659593, "num_tokens": 56764430.0, "step": 3430 }, { "entropy": 2.0943986624479294, "epoch": 2.752, "grad_norm": 0.0087890625, "learning_rate": 1.4580000000000001e-05, "loss": 2.8474, "mean_token_accuracy": 0.48711673989892007, "num_tokens": 56935874.0, "step": 3440 }, { "entropy": 2.1071030378341673, "epoch": 2.76, "grad_norm": 0.00836181640625, "learning_rate": 1.455777777777778e-05, "loss": 2.8753, "mean_token_accuracy": 0.48405872862786054, "num_tokens": 57104270.0, "step": 3450 }, { "entropy": 2.062966075539589, "epoch": 2.768, "grad_norm": 0.00775146484375, "learning_rate": 1.4535555555555556e-05, "loss": 2.8382, "mean_token_accuracy": 0.4887760190293193, "num_tokens": 57270552.0, "step": 3460 }, { "entropy": 2.0992351293563845, "epoch": 2.776, "grad_norm": 0.006591796875, "learning_rate": 1.4513333333333336e-05, "loss": 2.865, "mean_token_accuracy": 0.4836490359157324, "num_tokens": 57434347.0, "step": 3470 }, { "entropy": 2.1229082576930525, "epoch": 2.784, "grad_norm": 0.01239013671875, "learning_rate": 1.4491111111111111e-05, "loss": 2.9044, "mean_token_accuracy": 0.47999848127365113, "num_tokens": 57602968.0, "step": 3480 }, { "entropy": 2.1613018564879893, "epoch": 2.792, "grad_norm": 0.01287841796875, "learning_rate": 1.446888888888889e-05, "loss": 2.9421, "mean_token_accuracy": 0.47603240981698036, "num_tokens": 57760844.0, "step": 3490 }, { "entropy": 2.184716771543026, "epoch": 2.8, "grad_norm": 0.01953125, "learning_rate": 1.4446666666666668e-05, "loss": 2.9371, "mean_token_accuracy": 0.47578484620898964, "num_tokens": 57925886.0, "step": 3500 }, { "epoch": 2.8, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 57925886.0, "eval_coding_runtime": 95.8176, "eval_coding_samples_per_second": 5.218, "eval_coding_steps_per_second": 2.609, "step": 3500 }, { "epoch": 2.8, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 57925886.0, "eval_physics_runtime": 63.2558, "eval_physics_samples_per_second": 7.904, "eval_physics_steps_per_second": 3.952, "step": 3500 }, { "entropy": 2.103351902216673, "epoch": 2.808, "grad_norm": 0.007415771484375, "learning_rate": 1.4424444444444446e-05, "loss": 2.8631, "mean_token_accuracy": 0.4846200209110975, "num_tokens": 58090738.0, "step": 3510 }, { "entropy": 2.0398203767836094, "epoch": 2.816, "grad_norm": 0.011474609375, "learning_rate": 1.4402222222222224e-05, "loss": 2.8505, "mean_token_accuracy": 0.4874549536034465, "num_tokens": 58262960.0, "step": 3520 }, { "entropy": 2.1221997044980525, "epoch": 2.824, "grad_norm": 0.0225830078125, "learning_rate": 1.4380000000000001e-05, "loss": 2.9097, "mean_token_accuracy": 0.47787830252200364, "num_tokens": 58419404.0, "step": 3530 }, { "entropy": 2.1137804381549357, "epoch": 2.832, "grad_norm": 0.016845703125, "learning_rate": 1.4357777777777779e-05, "loss": 2.8485, "mean_token_accuracy": 0.4841687433421612, "num_tokens": 58586324.0, "step": 3540 }, { "entropy": 2.141510935127735, "epoch": 2.84, "grad_norm": 0.01226806640625, "learning_rate": 1.4335555555555556e-05, "loss": 2.9084, "mean_token_accuracy": 0.4797965111210942, "num_tokens": 58753501.0, "step": 3550 }, { "entropy": 2.051307424902916, "epoch": 2.848, "grad_norm": 0.01129150390625, "learning_rate": 1.4313333333333334e-05, "loss": 2.8878, "mean_token_accuracy": 0.4822594778612256, "num_tokens": 58912949.0, "step": 3560 }, { "entropy": 2.07161338403821, "epoch": 2.856, "grad_norm": 0.0145263671875, "learning_rate": 1.4291111111111113e-05, "loss": 2.8904, "mean_token_accuracy": 0.48354612961411475, "num_tokens": 59082635.0, "step": 3570 }, { "entropy": 2.0775130078196526, "epoch": 2.864, "grad_norm": 0.0133056640625, "learning_rate": 1.426888888888889e-05, "loss": 2.8257, "mean_token_accuracy": 0.48698422852903606, "num_tokens": 59252494.0, "step": 3580 }, { "entropy": 2.098304682970047, "epoch": 2.872, "grad_norm": 0.0186767578125, "learning_rate": 1.4246666666666669e-05, "loss": 2.8601, "mean_token_accuracy": 0.48505131993442774, "num_tokens": 59415783.0, "step": 3590 }, { "entropy": 2.1120006777346134, "epoch": 2.88, "grad_norm": 0.00860595703125, "learning_rate": 1.4224444444444445e-05, "loss": 2.9037, "mean_token_accuracy": 0.47901488821953536, "num_tokens": 59574951.0, "step": 3600 }, { "epoch": 2.88, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 59574951.0, "eval_coding_runtime": 95.9323, "eval_coding_samples_per_second": 5.212, "eval_coding_steps_per_second": 2.606, "step": 3600 }, { "epoch": 2.88, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 59574951.0, "eval_physics_runtime": 63.9284, "eval_physics_samples_per_second": 7.821, "eval_physics_steps_per_second": 3.911, "step": 3600 }, { "entropy": 2.0330657340586185, "epoch": 2.888, "grad_norm": 0.006866455078125, "learning_rate": 1.4202222222222224e-05, "loss": 2.8195, "mean_token_accuracy": 0.4911785379052162, "num_tokens": 59742110.0, "step": 3610 }, { "entropy": 2.1034228295087813, "epoch": 2.896, "grad_norm": 0.01385498046875, "learning_rate": 1.418e-05, "loss": 2.9084, "mean_token_accuracy": 0.47603141590952874, "num_tokens": 59903274.0, "step": 3620 }, { "entropy": 2.0917329132556914, "epoch": 2.904, "grad_norm": 0.0115966796875, "learning_rate": 1.4157777777777779e-05, "loss": 2.8871, "mean_token_accuracy": 0.48319196961820127, "num_tokens": 60064633.0, "step": 3630 }, { "entropy": 2.1128187902271747, "epoch": 2.912, "grad_norm": 0.016845703125, "learning_rate": 1.4135555555555555e-05, "loss": 2.8765, "mean_token_accuracy": 0.4844615634530783, "num_tokens": 60234203.0, "step": 3640 }, { "entropy": 2.0910258486866953, "epoch": 2.92, "grad_norm": 0.025390625, "learning_rate": 1.4113333333333334e-05, "loss": 2.8796, "mean_token_accuracy": 0.48172583151608706, "num_tokens": 60392231.0, "step": 3650 }, { "entropy": 2.08348820656538, "epoch": 2.928, "grad_norm": 0.0133056640625, "learning_rate": 1.4091111111111114e-05, "loss": 2.8514, "mean_token_accuracy": 0.4873508451506495, "num_tokens": 60559123.0, "step": 3660 }, { "entropy": 2.125985510647297, "epoch": 2.936, "grad_norm": 0.0113525390625, "learning_rate": 1.406888888888889e-05, "loss": 2.8648, "mean_token_accuracy": 0.48422206435352566, "num_tokens": 60723827.0, "step": 3670 }, { "entropy": 2.055304131656885, "epoch": 2.944, "grad_norm": 0.01165771484375, "learning_rate": 1.4046666666666669e-05, "loss": 2.8182, "mean_token_accuracy": 0.4933670725673437, "num_tokens": 60893423.0, "step": 3680 }, { "entropy": 2.130630461126566, "epoch": 2.952, "grad_norm": 0.0146484375, "learning_rate": 1.4024444444444445e-05, "loss": 2.9199, "mean_token_accuracy": 0.47805153056979177, "num_tokens": 61061168.0, "step": 3690 }, { "entropy": 2.0945800617337227, "epoch": 2.96, "grad_norm": 0.00732421875, "learning_rate": 1.4002222222222224e-05, "loss": 2.8165, "mean_token_accuracy": 0.49219137877225877, "num_tokens": 61228692.0, "step": 3700 }, { "epoch": 2.96, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 61228692.0, "eval_coding_runtime": 96.0964, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.602, "step": 3700 }, { "epoch": 2.96, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 61228692.0, "eval_physics_runtime": 63.9227, "eval_physics_samples_per_second": 7.822, "eval_physics_steps_per_second": 3.911, "step": 3700 }, { "entropy": 2.108620697259903, "epoch": 2.968, "grad_norm": 0.012451171875, "learning_rate": 1.398e-05, "loss": 2.8812, "mean_token_accuracy": 0.4813636614009738, "num_tokens": 61391712.0, "step": 3710 }, { "entropy": 2.1721582867205145, "epoch": 2.976, "grad_norm": 0.01312255859375, "learning_rate": 1.395777777777778e-05, "loss": 2.8848, "mean_token_accuracy": 0.4803122842684388, "num_tokens": 61560852.0, "step": 3720 }, { "entropy": 2.070528745651245, "epoch": 2.984, "grad_norm": 0.0091552734375, "learning_rate": 1.3935555555555557e-05, "loss": 2.8335, "mean_token_accuracy": 0.48847730122506616, "num_tokens": 61730032.0, "step": 3730 }, { "entropy": 2.0762289818376303, "epoch": 2.992, "grad_norm": 0.00885009765625, "learning_rate": 1.3913333333333335e-05, "loss": 2.8461, "mean_token_accuracy": 0.4850864246487617, "num_tokens": 61898951.0, "step": 3740 }, { "entropy": 2.0912881292402745, "epoch": 3.0, "grad_norm": 0.009765625, "learning_rate": 1.3891111111111114e-05, "loss": 2.837, "mean_token_accuracy": 0.48912574239075185, "num_tokens": 62060412.0, "step": 3750 }, { "entropy": 2.0631627537310124, "epoch": 3.008, "grad_norm": 0.007568359375, "learning_rate": 1.386888888888889e-05, "loss": 2.8415, "mean_token_accuracy": 0.48977078720927236, "num_tokens": 62223875.0, "step": 3760 }, { "entropy": 2.096564035117626, "epoch": 3.016, "grad_norm": 0.0113525390625, "learning_rate": 1.3846666666666669e-05, "loss": 2.9114, "mean_token_accuracy": 0.4801721587777138, "num_tokens": 62384589.0, "step": 3770 }, { "entropy": 2.0905525363981723, "epoch": 3.024, "grad_norm": 0.012939453125, "learning_rate": 1.3824444444444445e-05, "loss": 2.8798, "mean_token_accuracy": 0.48243289571255443, "num_tokens": 62553166.0, "step": 3780 }, { "entropy": 2.0739366367459295, "epoch": 3.032, "grad_norm": 0.0106201171875, "learning_rate": 1.3802222222222224e-05, "loss": 2.8417, "mean_token_accuracy": 0.4866447998210788, "num_tokens": 62725683.0, "step": 3790 }, { "entropy": 2.123819203674793, "epoch": 3.04, "grad_norm": 0.0142822265625, "learning_rate": 1.378e-05, "loss": 2.8806, "mean_token_accuracy": 0.4823303589597344, "num_tokens": 62893113.0, "step": 3800 }, { "epoch": 3.04, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 62893113.0, "eval_coding_runtime": 96.0924, "eval_coding_samples_per_second": 5.203, "eval_coding_steps_per_second": 2.602, "step": 3800 }, { "epoch": 3.04, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 62893113.0, "eval_physics_runtime": 64.019, "eval_physics_samples_per_second": 7.81, "eval_physics_steps_per_second": 3.905, "step": 3800 }, { "entropy": 2.126609382778406, "epoch": 3.048, "grad_norm": 0.007598876953125, "learning_rate": 1.375777777777778e-05, "loss": 2.8639, "mean_token_accuracy": 0.483892584964633, "num_tokens": 63052246.0, "step": 3810 }, { "entropy": 2.10702953748405, "epoch": 3.056, "grad_norm": 0.00927734375, "learning_rate": 1.3735555555555557e-05, "loss": 2.9187, "mean_token_accuracy": 0.47841427810490134, "num_tokens": 63218088.0, "step": 3820 }, { "entropy": 2.0819733083248138, "epoch": 3.064, "grad_norm": 0.0203857421875, "learning_rate": 1.3713333333333335e-05, "loss": 2.8582, "mean_token_accuracy": 0.4865171581506729, "num_tokens": 63382267.0, "step": 3830 }, { "entropy": 2.1380323246121407, "epoch": 3.072, "grad_norm": 0.0146484375, "learning_rate": 1.3691111111111112e-05, "loss": 2.9185, "mean_token_accuracy": 0.47557642199099065, "num_tokens": 63551201.0, "step": 3840 }, { "entropy": 2.1633306980133056, "epoch": 3.08, "grad_norm": 0.013916015625, "learning_rate": 1.366888888888889e-05, "loss": 2.9549, "mean_token_accuracy": 0.4695492934435606, "num_tokens": 63712393.0, "step": 3850 }, { "entropy": 2.1138852924108504, "epoch": 3.088, "grad_norm": 0.012939453125, "learning_rate": 1.3646666666666668e-05, "loss": 2.8836, "mean_token_accuracy": 0.48224593289196493, "num_tokens": 63876362.0, "step": 3860 }, { "entropy": 2.065829519927502, "epoch": 3.096, "grad_norm": 0.0084228515625, "learning_rate": 1.3624444444444445e-05, "loss": 2.8553, "mean_token_accuracy": 0.48577627055346967, "num_tokens": 64048468.0, "step": 3870 }, { "entropy": 2.069417446106672, "epoch": 3.104, "grad_norm": 0.0177001953125, "learning_rate": 1.3602222222222223e-05, "loss": 2.8875, "mean_token_accuracy": 0.48573009874671697, "num_tokens": 64208171.0, "step": 3880 }, { "entropy": 2.0596209660172464, "epoch": 3.112, "grad_norm": 0.0157470703125, "learning_rate": 1.3580000000000002e-05, "loss": 2.8312, "mean_token_accuracy": 0.4890388745814562, "num_tokens": 64374523.0, "step": 3890 }, { "entropy": 2.1080878011882307, "epoch": 3.12, "grad_norm": 0.018798828125, "learning_rate": 1.3557777777777778e-05, "loss": 2.8625, "mean_token_accuracy": 0.48503108602017164, "num_tokens": 64544493.0, "step": 3900 }, { "epoch": 3.12, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 64544493.0, "eval_coding_runtime": 95.9386, "eval_coding_samples_per_second": 5.212, "eval_coding_steps_per_second": 2.606, "step": 3900 }, { "epoch": 3.12, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 64544493.0, "eval_physics_runtime": 63.7638, "eval_physics_samples_per_second": 7.841, "eval_physics_steps_per_second": 3.921, "step": 3900 }, { "entropy": 2.173774953186512, "epoch": 3.128, "grad_norm": 0.006378173828125, "learning_rate": 1.3535555555555557e-05, "loss": 2.9569, "mean_token_accuracy": 0.4744618350639939, "num_tokens": 64700791.0, "step": 3910 }, { "entropy": 2.0850121699273587, "epoch": 3.136, "grad_norm": 0.01806640625, "learning_rate": 1.3513333333333333e-05, "loss": 2.8541, "mean_token_accuracy": 0.48463378231972454, "num_tokens": 64872488.0, "step": 3920 }, { "entropy": 2.05157615467906, "epoch": 3.144, "grad_norm": 0.0120849609375, "learning_rate": 1.3491111111111113e-05, "loss": 2.8004, "mean_token_accuracy": 0.4967340124770999, "num_tokens": 65038809.0, "step": 3930 }, { "entropy": 2.1126288808882236, "epoch": 3.152, "grad_norm": 0.00787353515625, "learning_rate": 1.3468888888888888e-05, "loss": 2.8859, "mean_token_accuracy": 0.48235713597387075, "num_tokens": 65201124.0, "step": 3940 }, { "entropy": 2.1096727564930915, "epoch": 3.16, "grad_norm": 0.018798828125, "learning_rate": 1.3446666666666668e-05, "loss": 2.8681, "mean_token_accuracy": 0.48350467029958966, "num_tokens": 65362969.0, "step": 3950 }, { "entropy": 2.051737867295742, "epoch": 3.168, "grad_norm": 0.00982666015625, "learning_rate": 1.3424444444444447e-05, "loss": 2.8291, "mean_token_accuracy": 0.49016483388841153, "num_tokens": 65525868.0, "step": 3960 }, { "entropy": 2.045945072174072, "epoch": 3.176, "grad_norm": 0.0152587890625, "learning_rate": 1.3402222222222223e-05, "loss": 2.8157, "mean_token_accuracy": 0.49007453080266716, "num_tokens": 65699210.0, "step": 3970 }, { "entropy": 2.1140901155769827, "epoch": 3.184, "grad_norm": 0.0150146484375, "learning_rate": 1.3380000000000002e-05, "loss": 2.8657, "mean_token_accuracy": 0.48202042542397977, "num_tokens": 65861603.0, "step": 3980 }, { "entropy": 2.1720369420945644, "epoch": 3.192, "grad_norm": 0.0147705078125, "learning_rate": 1.3357777777777778e-05, "loss": 2.9377, "mean_token_accuracy": 0.4760190242901444, "num_tokens": 66029358.0, "step": 3990 }, { "entropy": 2.119066651165485, "epoch": 3.2, "grad_norm": 0.00921630859375, "learning_rate": 1.3335555555555558e-05, "loss": 2.8737, "mean_token_accuracy": 0.4842309094965458, "num_tokens": 66193675.0, "step": 4000 }, { "epoch": 3.2, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 66193675.0, "eval_coding_runtime": 96.2098, "eval_coding_samples_per_second": 5.197, "eval_coding_steps_per_second": 2.598, "step": 4000 }, { "epoch": 3.2, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 66193675.0, "eval_physics_runtime": 63.2033, "eval_physics_samples_per_second": 7.911, "eval_physics_steps_per_second": 3.955, "step": 4000 }, { "entropy": 2.0837896250188352, "epoch": 3.208, "grad_norm": 0.0067138671875, "learning_rate": 1.3313333333333333e-05, "loss": 2.8407, "mean_token_accuracy": 0.48807795345783234, "num_tokens": 66358798.0, "step": 4010 }, { "entropy": 2.0642104752361776, "epoch": 3.216, "grad_norm": 0.01068115234375, "learning_rate": 1.3291111111111113e-05, "loss": 2.8505, "mean_token_accuracy": 0.48781331330537797, "num_tokens": 66522835.0, "step": 4020 }, { "entropy": 2.152095118165016, "epoch": 3.224, "grad_norm": 0.010986328125, "learning_rate": 1.3268888888888889e-05, "loss": 2.8995, "mean_token_accuracy": 0.4772783122956753, "num_tokens": 66683941.0, "step": 4030 }, { "entropy": 2.158082576841116, "epoch": 3.232, "grad_norm": 0.01220703125, "learning_rate": 1.3246666666666668e-05, "loss": 2.9012, "mean_token_accuracy": 0.47906074915081265, "num_tokens": 66853693.0, "step": 4040 }, { "entropy": 2.0139299370348454, "epoch": 3.24, "grad_norm": 0.018798828125, "learning_rate": 1.3224444444444446e-05, "loss": 2.7856, "mean_token_accuracy": 0.496611525118351, "num_tokens": 67023270.0, "step": 4050 }, { "entropy": 2.047669659554958, "epoch": 3.248, "grad_norm": 0.0155029296875, "learning_rate": 1.3202222222222223e-05, "loss": 2.8252, "mean_token_accuracy": 0.49155114889144896, "num_tokens": 67192299.0, "step": 4060 }, { "entropy": 2.086980938911438, "epoch": 3.2560000000000002, "grad_norm": 0.0166015625, "learning_rate": 1.3180000000000001e-05, "loss": 2.8327, "mean_token_accuracy": 0.48563261032104493, "num_tokens": 67354858.0, "step": 4070 }, { "entropy": 2.0698634557425977, "epoch": 3.2640000000000002, "grad_norm": 0.0126953125, "learning_rate": 1.3157777777777778e-05, "loss": 2.8554, "mean_token_accuracy": 0.4893287578597665, "num_tokens": 67517068.0, "step": 4080 }, { "entropy": 2.117519376426935, "epoch": 3.2720000000000002, "grad_norm": 0.0135498046875, "learning_rate": 1.3135555555555558e-05, "loss": 2.8507, "mean_token_accuracy": 0.48337089773267505, "num_tokens": 67682839.0, "step": 4090 }, { "entropy": 2.08798957914114, "epoch": 3.2800000000000002, "grad_norm": 0.0155029296875, "learning_rate": 1.3113333333333334e-05, "loss": 2.8881, "mean_token_accuracy": 0.4841751741245389, "num_tokens": 67841380.0, "step": 4100 }, { "epoch": 3.2800000000000002, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 67841380.0, "eval_coding_runtime": 95.9623, "eval_coding_samples_per_second": 5.21, "eval_coding_steps_per_second": 2.605, "step": 4100 }, { "epoch": 3.2800000000000002, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 67841380.0, "eval_physics_runtime": 64.0377, "eval_physics_samples_per_second": 7.808, "eval_physics_steps_per_second": 3.904, "step": 4100 }, { "entropy": 2.106385115534067, "epoch": 3.288, "grad_norm": 0.00714111328125, "learning_rate": 1.3091111111111113e-05, "loss": 2.848, "mean_token_accuracy": 0.4901309922337532, "num_tokens": 68009796.0, "step": 4110 }, { "entropy": 2.091083314269781, "epoch": 3.296, "grad_norm": 0.0142822265625, "learning_rate": 1.306888888888889e-05, "loss": 2.9206, "mean_token_accuracy": 0.4773440737277269, "num_tokens": 68175287.0, "step": 4120 }, { "entropy": 2.1063731901347635, "epoch": 3.304, "grad_norm": 0.01416015625, "learning_rate": 1.3046666666666668e-05, "loss": 2.8551, "mean_token_accuracy": 0.4847688987851143, "num_tokens": 68343118.0, "step": 4130 }, { "entropy": 2.0561120234429837, "epoch": 3.312, "grad_norm": 0.01397705078125, "learning_rate": 1.3024444444444446e-05, "loss": 2.8074, "mean_token_accuracy": 0.48743151165544985, "num_tokens": 68513030.0, "step": 4140 }, { "entropy": 2.136690790951252, "epoch": 3.32, "grad_norm": 0.01190185546875, "learning_rate": 1.3002222222222223e-05, "loss": 2.9183, "mean_token_accuracy": 0.4794544292613864, "num_tokens": 68680267.0, "step": 4150 }, { "entropy": 2.1080239094793796, "epoch": 3.328, "grad_norm": 0.0101318359375, "learning_rate": 1.2980000000000001e-05, "loss": 2.8969, "mean_token_accuracy": 0.48164221588522194, "num_tokens": 68839883.0, "step": 4160 }, { "entropy": 2.1078054189682005, "epoch": 3.336, "grad_norm": 0.007537841796875, "learning_rate": 1.2957777777777779e-05, "loss": 2.9034, "mean_token_accuracy": 0.48090961929410697, "num_tokens": 69005121.0, "step": 4170 }, { "entropy": 2.0919542267918585, "epoch": 3.344, "grad_norm": 0.008544921875, "learning_rate": 1.2935555555555556e-05, "loss": 2.8543, "mean_token_accuracy": 0.48435192797333004, "num_tokens": 69173450.0, "step": 4180 }, { "entropy": 2.0624096959829332, "epoch": 3.352, "grad_norm": 0.021484375, "learning_rate": 1.2913333333333336e-05, "loss": 2.8425, "mean_token_accuracy": 0.48494853284209966, "num_tokens": 69343066.0, "step": 4190 }, { "entropy": 2.067618714272976, "epoch": 3.36, "grad_norm": 0.01177978515625, "learning_rate": 1.2891111111111112e-05, "loss": 2.8428, "mean_token_accuracy": 0.4874798431992531, "num_tokens": 69510512.0, "step": 4200 }, { "epoch": 3.36, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 69510512.0, "eval_coding_runtime": 96.2288, "eval_coding_samples_per_second": 5.196, "eval_coding_steps_per_second": 2.598, "step": 4200 }, { "epoch": 3.36, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 69510512.0, "eval_physics_runtime": 64.0247, "eval_physics_samples_per_second": 7.809, "eval_physics_steps_per_second": 3.905, "step": 4200 }, { "entropy": 2.0581852816045285, "epoch": 3.368, "grad_norm": 0.00909423828125, "learning_rate": 1.2868888888888891e-05, "loss": 2.8097, "mean_token_accuracy": 0.4904202425852418, "num_tokens": 69684041.0, "step": 4210 }, { "entropy": 2.0333582244813444, "epoch": 3.376, "grad_norm": 0.0078125, "learning_rate": 1.2846666666666667e-05, "loss": 2.8192, "mean_token_accuracy": 0.4936717139557004, "num_tokens": 69853992.0, "step": 4220 }, { "entropy": 2.168538150936365, "epoch": 3.384, "grad_norm": 0.0162353515625, "learning_rate": 1.2824444444444446e-05, "loss": 2.9049, "mean_token_accuracy": 0.4765908816829324, "num_tokens": 70018276.0, "step": 4230 }, { "entropy": 2.168168669193983, "epoch": 3.392, "grad_norm": 0.0108642578125, "learning_rate": 1.2802222222222222e-05, "loss": 2.9258, "mean_token_accuracy": 0.4788417614996433, "num_tokens": 70178388.0, "step": 4240 }, { "entropy": 2.0856957830488683, "epoch": 3.4, "grad_norm": 0.01318359375, "learning_rate": 1.2780000000000001e-05, "loss": 2.8073, "mean_token_accuracy": 0.4932461641728878, "num_tokens": 70341232.0, "step": 4250 }, { "entropy": 2.068350695818663, "epoch": 3.408, "grad_norm": 0.01031494140625, "learning_rate": 1.2757777777777777e-05, "loss": 2.8485, "mean_token_accuracy": 0.48950101863592865, "num_tokens": 70507903.0, "step": 4260 }, { "entropy": 2.110171654820442, "epoch": 3.416, "grad_norm": 0.01416015625, "learning_rate": 1.2735555555555557e-05, "loss": 2.848, "mean_token_accuracy": 0.4902174774557352, "num_tokens": 70668601.0, "step": 4270 }, { "entropy": 2.070227029919624, "epoch": 3.424, "grad_norm": 0.01251220703125, "learning_rate": 1.2713333333333336e-05, "loss": 2.8453, "mean_token_accuracy": 0.4847742343321443, "num_tokens": 70835982.0, "step": 4280 }, { "entropy": 2.0945966966450213, "epoch": 3.432, "grad_norm": 0.006561279296875, "learning_rate": 1.2691111111111112e-05, "loss": 2.8637, "mean_token_accuracy": 0.48577702064067124, "num_tokens": 71003177.0, "step": 4290 }, { "entropy": 2.1412690974771977, "epoch": 3.44, "grad_norm": 0.0081787109375, "learning_rate": 1.2668888888888891e-05, "loss": 2.9252, "mean_token_accuracy": 0.4771595584228635, "num_tokens": 71172338.0, "step": 4300 }, { "epoch": 3.44, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 71172338.0, "eval_coding_runtime": 96.2342, "eval_coding_samples_per_second": 5.196, "eval_coding_steps_per_second": 2.598, "step": 4300 }, { "epoch": 3.44, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 71172338.0, "eval_physics_runtime": 64.0711, "eval_physics_samples_per_second": 7.804, "eval_physics_steps_per_second": 3.902, "step": 4300 }, { "entropy": 2.109636855870485, "epoch": 3.448, "grad_norm": 0.00994873046875, "learning_rate": 1.2646666666666667e-05, "loss": 2.8686, "mean_token_accuracy": 0.48347108364105223, "num_tokens": 71341354.0, "step": 4310 }, { "entropy": 2.1264806307852266, "epoch": 3.456, "grad_norm": 0.00714111328125, "learning_rate": 1.2624444444444446e-05, "loss": 2.8904, "mean_token_accuracy": 0.48083948735147714, "num_tokens": 71506904.0, "step": 4320 }, { "entropy": 2.1252997659146784, "epoch": 3.464, "grad_norm": 0.018310546875, "learning_rate": 1.2602222222222222e-05, "loss": 2.8975, "mean_token_accuracy": 0.47978228740394113, "num_tokens": 71664931.0, "step": 4330 }, { "entropy": 2.1598515301942824, "epoch": 3.472, "grad_norm": 0.0118408203125, "learning_rate": 1.2580000000000002e-05, "loss": 2.9, "mean_token_accuracy": 0.4783101208508015, "num_tokens": 71830111.0, "step": 4340 }, { "entropy": 2.0683537125587463, "epoch": 3.48, "grad_norm": 0.0118408203125, "learning_rate": 1.2557777777777779e-05, "loss": 2.8126, "mean_token_accuracy": 0.4885444832965732, "num_tokens": 71995325.0, "step": 4350 }, { "entropy": 2.0984449461102486, "epoch": 3.488, "grad_norm": 0.0196533203125, "learning_rate": 1.2535555555555557e-05, "loss": 2.8636, "mean_token_accuracy": 0.48451535440981386, "num_tokens": 72162307.0, "step": 4360 }, { "entropy": 2.0970567755401133, "epoch": 3.496, "grad_norm": 0.0115966796875, "learning_rate": 1.2513333333333334e-05, "loss": 2.8887, "mean_token_accuracy": 0.4860412361100316, "num_tokens": 72325173.0, "step": 4370 }, { "entropy": 2.137071938067675, "epoch": 3.504, "grad_norm": 0.01409912109375, "learning_rate": 1.2491111111111112e-05, "loss": 2.9094, "mean_token_accuracy": 0.4761716028675437, "num_tokens": 72492324.0, "step": 4380 }, { "entropy": 2.0667936712503434, "epoch": 3.512, "grad_norm": 0.00830078125, "learning_rate": 1.246888888888889e-05, "loss": 2.8455, "mean_token_accuracy": 0.48641248401254417, "num_tokens": 72657433.0, "step": 4390 }, { "entropy": 2.107886756956577, "epoch": 3.52, "grad_norm": 0.0120849609375, "learning_rate": 1.2446666666666667e-05, "loss": 2.9048, "mean_token_accuracy": 0.48285721000283954, "num_tokens": 72814868.0, "step": 4400 }, { "epoch": 3.52, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 72814868.0, "eval_coding_runtime": 96.291, "eval_coding_samples_per_second": 5.193, "eval_coding_steps_per_second": 2.596, "step": 4400 }, { "epoch": 3.52, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 72814868.0, "eval_physics_runtime": 64.0553, "eval_physics_samples_per_second": 7.806, "eval_physics_steps_per_second": 3.903, "step": 4400 }, { "entropy": 2.0622735850512983, "epoch": 3.528, "grad_norm": 0.01153564453125, "learning_rate": 1.2424444444444445e-05, "loss": 2.867, "mean_token_accuracy": 0.4835289839655161, "num_tokens": 72981043.0, "step": 4410 }, { "entropy": 2.107982115447521, "epoch": 3.536, "grad_norm": 0.01171875, "learning_rate": 1.2402222222222222e-05, "loss": 2.8809, "mean_token_accuracy": 0.48394747320562603, "num_tokens": 73144676.0, "step": 4420 }, { "entropy": 2.113698351383209, "epoch": 3.544, "grad_norm": 0.00775146484375, "learning_rate": 1.2380000000000002e-05, "loss": 2.8634, "mean_token_accuracy": 0.48486993331462147, "num_tokens": 73309118.0, "step": 4430 }, { "entropy": 2.0850420869886874, "epoch": 3.552, "grad_norm": 0.01434326171875, "learning_rate": 1.235777777777778e-05, "loss": 2.8099, "mean_token_accuracy": 0.48757751416414974, "num_tokens": 73474150.0, "step": 4440 }, { "entropy": 2.0784714840352536, "epoch": 3.56, "grad_norm": 0.020751953125, "learning_rate": 1.2335555555555557e-05, "loss": 2.8407, "mean_token_accuracy": 0.4856953978538513, "num_tokens": 73641373.0, "step": 4450 }, { "entropy": 2.054875613749027, "epoch": 3.568, "grad_norm": 0.00836181640625, "learning_rate": 1.2313333333333335e-05, "loss": 2.8258, "mean_token_accuracy": 0.488472581282258, "num_tokens": 73814337.0, "step": 4460 }, { "entropy": 2.18665976151824, "epoch": 3.576, "grad_norm": 0.01361083984375, "learning_rate": 1.2291111111111112e-05, "loss": 2.9774, "mean_token_accuracy": 0.47147147078067064, "num_tokens": 73975753.0, "step": 4470 }, { "entropy": 2.1290567815303802, "epoch": 3.584, "grad_norm": 0.025146484375, "learning_rate": 1.226888888888889e-05, "loss": 2.8775, "mean_token_accuracy": 0.48378261309117077, "num_tokens": 74139916.0, "step": 4480 }, { "entropy": 2.046985600143671, "epoch": 3.592, "grad_norm": 0.01318359375, "learning_rate": 1.2246666666666667e-05, "loss": 2.7691, "mean_token_accuracy": 0.4934337234124541, "num_tokens": 74319118.0, "step": 4490 }, { "entropy": 2.143827559798956, "epoch": 3.6, "grad_norm": 0.00836181640625, "learning_rate": 1.2224444444444445e-05, "loss": 2.9267, "mean_token_accuracy": 0.47726704850792884, "num_tokens": 74481410.0, "step": 4500 }, { "epoch": 3.6, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 74481410.0, "eval_coding_runtime": 96.5913, "eval_coding_samples_per_second": 5.176, "eval_coding_steps_per_second": 2.588, "step": 4500 }, { "epoch": 3.6, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 74481410.0, "eval_physics_runtime": 63.9746, "eval_physics_samples_per_second": 7.816, "eval_physics_steps_per_second": 3.908, "step": 4500 }, { "entropy": 2.1184289254248143, "epoch": 3.608, "grad_norm": 0.01318359375, "learning_rate": 1.2202222222222224e-05, "loss": 2.8798, "mean_token_accuracy": 0.48143869936466216, "num_tokens": 74648297.0, "step": 4510 }, { "entropy": 2.02643828317523, "epoch": 3.616, "grad_norm": 0.0162353515625, "learning_rate": 1.218e-05, "loss": 2.8212, "mean_token_accuracy": 0.49379726257175205, "num_tokens": 74818399.0, "step": 4520 }, { "entropy": 2.068718433380127, "epoch": 3.624, "grad_norm": 0.01434326171875, "learning_rate": 1.215777777777778e-05, "loss": 2.8464, "mean_token_accuracy": 0.489543769787997, "num_tokens": 74986438.0, "step": 4530 }, { "entropy": 2.111594308167696, "epoch": 3.632, "grad_norm": 0.0179443359375, "learning_rate": 1.2135555555555556e-05, "loss": 2.8478, "mean_token_accuracy": 0.48541087526828053, "num_tokens": 75148445.0, "step": 4540 }, { "entropy": 2.127137565612793, "epoch": 3.64, "grad_norm": 0.01141357421875, "learning_rate": 1.2113333333333335e-05, "loss": 2.9224, "mean_token_accuracy": 0.47523438911885024, "num_tokens": 75309638.0, "step": 4550 }, { "entropy": 2.1118921637535095, "epoch": 3.648, "grad_norm": 0.02197265625, "learning_rate": 1.209111111111111e-05, "loss": 2.848, "mean_token_accuracy": 0.48891634624451397, "num_tokens": 75475448.0, "step": 4560 }, { "entropy": 2.140853127092123, "epoch": 3.656, "grad_norm": 0.01055908203125, "learning_rate": 1.206888888888889e-05, "loss": 2.8685, "mean_token_accuracy": 0.4791616892442107, "num_tokens": 75639974.0, "step": 4570 }, { "entropy": 2.0595723688602448, "epoch": 3.664, "grad_norm": 0.01165771484375, "learning_rate": 1.204666666666667e-05, "loss": 2.8164, "mean_token_accuracy": 0.4857981903478503, "num_tokens": 75811516.0, "step": 4580 }, { "entropy": 2.1883110869675875, "epoch": 3.672, "grad_norm": 0.01214599609375, "learning_rate": 1.2024444444444445e-05, "loss": 2.9329, "mean_token_accuracy": 0.47467537205666305, "num_tokens": 75970707.0, "step": 4590 }, { "entropy": 2.1031787514686586, "epoch": 3.68, "grad_norm": 0.00823974609375, "learning_rate": 1.2002222222222225e-05, "loss": 2.8995, "mean_token_accuracy": 0.4794537903741002, "num_tokens": 76131750.0, "step": 4600 }, { "epoch": 3.68, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 76131750.0, "eval_coding_runtime": 96.3114, "eval_coding_samples_per_second": 5.191, "eval_coding_steps_per_second": 2.596, "step": 4600 }, { "epoch": 3.68, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 76131750.0, "eval_physics_runtime": 64.1076, "eval_physics_samples_per_second": 7.799, "eval_physics_steps_per_second": 3.9, "step": 4600 }, { "entropy": 2.0964153692126275, "epoch": 3.6879999999999997, "grad_norm": 0.00958251953125, "learning_rate": 1.198e-05, "loss": 2.8864, "mean_token_accuracy": 0.48329670932143926, "num_tokens": 76298810.0, "step": 4610 }, { "entropy": 2.1300269193947314, "epoch": 3.6959999999999997, "grad_norm": 0.00830078125, "learning_rate": 1.195777777777778e-05, "loss": 2.9344, "mean_token_accuracy": 0.47541601955890656, "num_tokens": 76460225.0, "step": 4620 }, { "entropy": 2.0679371163249014, "epoch": 3.7039999999999997, "grad_norm": 0.0169677734375, "learning_rate": 1.1935555555555556e-05, "loss": 2.8278, "mean_token_accuracy": 0.491782989539206, "num_tokens": 76631291.0, "step": 4630 }, { "entropy": 2.1284476265311243, "epoch": 3.7119999999999997, "grad_norm": 0.00823974609375, "learning_rate": 1.1913333333333335e-05, "loss": 2.8983, "mean_token_accuracy": 0.47942620553076265, "num_tokens": 76793614.0, "step": 4640 }, { "entropy": 2.2345407247543334, "epoch": 3.7199999999999998, "grad_norm": 0.006591796875, "learning_rate": 1.1891111111111111e-05, "loss": 2.9871, "mean_token_accuracy": 0.4687922740355134, "num_tokens": 76956311.0, "step": 4650 }, { "entropy": 2.088286682218313, "epoch": 3.7279999999999998, "grad_norm": 0.00799560546875, "learning_rate": 1.186888888888889e-05, "loss": 2.8911, "mean_token_accuracy": 0.4833483690395951, "num_tokens": 77121009.0, "step": 4660 }, { "entropy": 2.1162419065833094, "epoch": 3.7359999999999998, "grad_norm": 0.0101318359375, "learning_rate": 1.1846666666666668e-05, "loss": 2.9061, "mean_token_accuracy": 0.4805751595646143, "num_tokens": 77285031.0, "step": 4670 }, { "entropy": 2.109429658949375, "epoch": 3.7439999999999998, "grad_norm": 0.01416015625, "learning_rate": 1.1824444444444445e-05, "loss": 2.8624, "mean_token_accuracy": 0.48205650951713325, "num_tokens": 77446311.0, "step": 4680 }, { "entropy": 2.145034124702215, "epoch": 3.752, "grad_norm": 0.01513671875, "learning_rate": 1.1802222222222223e-05, "loss": 2.9137, "mean_token_accuracy": 0.48031143862754105, "num_tokens": 77611656.0, "step": 4690 }, { "entropy": 2.0881564006209374, "epoch": 3.76, "grad_norm": 0.0091552734375, "learning_rate": 1.178e-05, "loss": 2.8644, "mean_token_accuracy": 0.4838464429602027, "num_tokens": 77781488.0, "step": 4700 }, { "epoch": 3.76, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 77781488.0, "eval_coding_runtime": 96.4001, "eval_coding_samples_per_second": 5.187, "eval_coding_steps_per_second": 2.593, "step": 4700 }, { "epoch": 3.76, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 77781488.0, "eval_physics_runtime": 64.2998, "eval_physics_samples_per_second": 7.776, "eval_physics_steps_per_second": 3.888, "step": 4700 }, { "entropy": 2.149661224335432, "epoch": 3.768, "grad_norm": 0.00958251953125, "learning_rate": 1.1757777777777778e-05, "loss": 2.9199, "mean_token_accuracy": 0.4811420265585184, "num_tokens": 77946801.0, "step": 4710 }, { "entropy": 2.117289527505636, "epoch": 3.776, "grad_norm": 0.00579833984375, "learning_rate": 1.1735555555555556e-05, "loss": 2.8854, "mean_token_accuracy": 0.481607536226511, "num_tokens": 78108138.0, "step": 4720 }, { "entropy": 2.0819385163486004, "epoch": 3.784, "grad_norm": 0.010986328125, "learning_rate": 1.1713333333333334e-05, "loss": 2.8811, "mean_token_accuracy": 0.48273362312465906, "num_tokens": 78277029.0, "step": 4730 }, { "entropy": 2.0590057149529457, "epoch": 3.792, "grad_norm": 0.0101318359375, "learning_rate": 1.1691111111111113e-05, "loss": 2.8452, "mean_token_accuracy": 0.4871557403355837, "num_tokens": 78449177.0, "step": 4740 }, { "entropy": 2.097750276327133, "epoch": 3.8, "grad_norm": 0.0167236328125, "learning_rate": 1.1668888888888889e-05, "loss": 2.8818, "mean_token_accuracy": 0.48112841956317426, "num_tokens": 78612331.0, "step": 4750 }, { "entropy": 2.1645462520420553, "epoch": 3.808, "grad_norm": 0.01031494140625, "learning_rate": 1.1646666666666668e-05, "loss": 2.8845, "mean_token_accuracy": 0.48126564230769875, "num_tokens": 78778761.0, "step": 4760 }, { "entropy": 2.188579352200031, "epoch": 3.816, "grad_norm": 0.00830078125, "learning_rate": 1.1624444444444446e-05, "loss": 2.943, "mean_token_accuracy": 0.4771542547270656, "num_tokens": 78938941.0, "step": 4770 }, { "entropy": 2.0487091943621634, "epoch": 3.824, "grad_norm": 0.0108642578125, "learning_rate": 1.1602222222222223e-05, "loss": 2.8339, "mean_token_accuracy": 0.4885430796071887, "num_tokens": 79102912.0, "step": 4780 }, { "entropy": 2.070402644574642, "epoch": 3.832, "grad_norm": 0.01025390625, "learning_rate": 1.1580000000000001e-05, "loss": 2.8468, "mean_token_accuracy": 0.48718465957790613, "num_tokens": 79265096.0, "step": 4790 }, { "entropy": 2.1173644840717314, "epoch": 3.84, "grad_norm": 0.0164794921875, "learning_rate": 1.1557777777777779e-05, "loss": 2.8836, "mean_token_accuracy": 0.4810946863144636, "num_tokens": 79436236.0, "step": 4800 }, { "epoch": 3.84, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 79436236.0, "eval_coding_runtime": 96.3659, "eval_coding_samples_per_second": 5.189, "eval_coding_steps_per_second": 2.594, "step": 4800 }, { "epoch": 3.84, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 79436236.0, "eval_physics_runtime": 64.0966, "eval_physics_samples_per_second": 7.801, "eval_physics_steps_per_second": 3.9, "step": 4800 }, { "entropy": 2.054683540761471, "epoch": 3.848, "grad_norm": 0.006744384765625, "learning_rate": 1.1535555555555556e-05, "loss": 2.8568, "mean_token_accuracy": 0.48323863800615074, "num_tokens": 79607776.0, "step": 4810 }, { "entropy": 2.0291363187134266, "epoch": 3.856, "grad_norm": 0.00994873046875, "learning_rate": 1.1513333333333334e-05, "loss": 2.8152, "mean_token_accuracy": 0.4893066372722387, "num_tokens": 79772163.0, "step": 4820 }, { "entropy": 2.0947953447699548, "epoch": 3.864, "grad_norm": 0.0166015625, "learning_rate": 1.1491111111111113e-05, "loss": 2.8396, "mean_token_accuracy": 0.48406948503106834, "num_tokens": 79937921.0, "step": 4830 }, { "entropy": 2.1082398749887945, "epoch": 3.872, "grad_norm": 0.0194091796875, "learning_rate": 1.1468888888888889e-05, "loss": 2.894, "mean_token_accuracy": 0.48280362226068974, "num_tokens": 80099967.0, "step": 4840 }, { "entropy": 2.0668815061450005, "epoch": 3.88, "grad_norm": 0.0107421875, "learning_rate": 1.1446666666666668e-05, "loss": 2.873, "mean_token_accuracy": 0.48297429028898475, "num_tokens": 80264711.0, "step": 4850 }, { "entropy": 2.10538115054369, "epoch": 3.888, "grad_norm": 0.006988525390625, "learning_rate": 1.1424444444444444e-05, "loss": 2.8718, "mean_token_accuracy": 0.48427290078252555, "num_tokens": 80430055.0, "step": 4860 }, { "entropy": 2.1708266600966453, "epoch": 3.896, "grad_norm": 0.00848388671875, "learning_rate": 1.1402222222222224e-05, "loss": 2.9515, "mean_token_accuracy": 0.47322598174214364, "num_tokens": 80598176.0, "step": 4870 }, { "entropy": 2.1749521791934967, "epoch": 3.904, "grad_norm": 0.009033203125, "learning_rate": 1.138e-05, "loss": 2.9558, "mean_token_accuracy": 0.47180029209703206, "num_tokens": 80763923.0, "step": 4880 }, { "entropy": 2.058152811229229, "epoch": 3.912, "grad_norm": 0.0164794921875, "learning_rate": 1.1357777777777779e-05, "loss": 2.8218, "mean_token_accuracy": 0.4886314647272229, "num_tokens": 80929452.0, "step": 4890 }, { "entropy": 2.07114285081625, "epoch": 3.92, "grad_norm": 0.00909423828125, "learning_rate": 1.1335555555555558e-05, "loss": 2.8476, "mean_token_accuracy": 0.4865548949688673, "num_tokens": 81098568.0, "step": 4900 }, { "epoch": 3.92, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 81098568.0, "eval_coding_runtime": 95.8984, "eval_coding_samples_per_second": 5.214, "eval_coding_steps_per_second": 2.607, "step": 4900 }, { "epoch": 3.92, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 81098568.0, "eval_physics_runtime": 64.04, "eval_physics_samples_per_second": 7.808, "eval_physics_steps_per_second": 3.904, "step": 4900 }, { "entropy": 2.1175915651023387, "epoch": 3.928, "grad_norm": 0.0198974609375, "learning_rate": 1.1313333333333334e-05, "loss": 2.8916, "mean_token_accuracy": 0.4806467808783054, "num_tokens": 81258231.0, "step": 4910 }, { "entropy": 2.0774527341127396, "epoch": 3.936, "grad_norm": 0.01202392578125, "learning_rate": 1.1291111111111113e-05, "loss": 2.8713, "mean_token_accuracy": 0.4864236503839493, "num_tokens": 81423109.0, "step": 4920 }, { "entropy": 2.1824303403496743, "epoch": 3.944, "grad_norm": 0.02294921875, "learning_rate": 1.126888888888889e-05, "loss": 2.888, "mean_token_accuracy": 0.4796422986313701, "num_tokens": 81586268.0, "step": 4930 }, { "entropy": 2.093172822892666, "epoch": 3.952, "grad_norm": 0.0120849609375, "learning_rate": 1.1246666666666669e-05, "loss": 2.8452, "mean_token_accuracy": 0.4844025122001767, "num_tokens": 81753214.0, "step": 4940 }, { "entropy": 2.1183643460273744, "epoch": 3.96, "grad_norm": 0.0087890625, "learning_rate": 1.1224444444444444e-05, "loss": 2.893, "mean_token_accuracy": 0.4826692482456565, "num_tokens": 81918071.0, "step": 4950 }, { "entropy": 2.1083862885832785, "epoch": 3.968, "grad_norm": 0.0289306640625, "learning_rate": 1.1202222222222224e-05, "loss": 2.8873, "mean_token_accuracy": 0.48046484012156726, "num_tokens": 82076472.0, "step": 4960 }, { "entropy": 2.0946729026734827, "epoch": 3.976, "grad_norm": 0.00897216796875, "learning_rate": 1.1180000000000001e-05, "loss": 2.8512, "mean_token_accuracy": 0.4866802429780364, "num_tokens": 82247992.0, "step": 4970 }, { "entropy": 2.0790536925196648, "epoch": 3.984, "grad_norm": 0.021728515625, "learning_rate": 1.1157777777777779e-05, "loss": 2.8292, "mean_token_accuracy": 0.4870005436241627, "num_tokens": 82418472.0, "step": 4980 }, { "entropy": 2.086121869832277, "epoch": 3.992, "grad_norm": 0.0186767578125, "learning_rate": 1.1135555555555557e-05, "loss": 2.8427, "mean_token_accuracy": 0.48974594939500093, "num_tokens": 82582854.0, "step": 4990 }, { "entropy": 2.0794210851192476, "epoch": 4.0, "grad_norm": 0.0203857421875, "learning_rate": 1.1113333333333334e-05, "loss": 2.8479, "mean_token_accuracy": 0.4871451547369361, "num_tokens": 82747216.0, "step": 5000 }, { "epoch": 4.0, "eval_coding_entropy": 1.266241003036499, "eval_coding_loss": 1.2625732421875, "eval_coding_mean_token_accuracy": 0.6838934738636017, "eval_coding_num_tokens": 82747216.0, "eval_coding_runtime": 95.9103, "eval_coding_samples_per_second": 5.213, "eval_coding_steps_per_second": 2.607, "step": 5000 }, { "epoch": 4.0, "eval_physics_entropy": 2.116599133968353, "eval_physics_loss": 2.9798319339752197, "eval_physics_mean_token_accuracy": 0.4795549786090851, "eval_physics_num_tokens": 82747216.0, "eval_physics_runtime": 63.8373, "eval_physics_samples_per_second": 7.832, "eval_physics_steps_per_second": 3.916, "step": 5000 } ], "logging_steps": 10, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.713055577109017e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }