| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.61, | |
| "eval_steps": 500, | |
| "global_step": 61000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001, | |
| "grad_norm": 17.39519691467285, | |
| "learning_rate": 2.97e-05, | |
| "loss": 9.7941, | |
| "num_input_tokens_seen": 6553600, | |
| "step": 100, | |
| "train_runtime": 74.0623, | |
| "train_tokens_per_second": 88487.632 | |
| }, | |
| { | |
| "epoch": 0.002, | |
| "grad_norm": 10.212440490722656, | |
| "learning_rate": 5.97e-05, | |
| "loss": 1.0389, | |
| "num_input_tokens_seen": 13107200, | |
| "step": 200, | |
| "train_runtime": 135.0365, | |
| "train_tokens_per_second": 97064.126 | |
| }, | |
| { | |
| "epoch": 0.003, | |
| "grad_norm": 6.982235908508301, | |
| "learning_rate": 8.969999999999998e-05, | |
| "loss": 0.7951, | |
| "num_input_tokens_seen": 19660800, | |
| "step": 300, | |
| "train_runtime": 196.4342, | |
| "train_tokens_per_second": 100088.472 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 2.089735507965088, | |
| "learning_rate": 0.0001197, | |
| "loss": 0.6341, | |
| "num_input_tokens_seen": 26214400, | |
| "step": 400, | |
| "train_runtime": 257.5653, | |
| "train_tokens_per_second": 101777.682 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 2.6269969940185547, | |
| "learning_rate": 0.00014969999999999998, | |
| "loss": 0.5353, | |
| "num_input_tokens_seen": 32768000, | |
| "step": 500, | |
| "train_runtime": 323.599, | |
| "train_tokens_per_second": 101261.143 | |
| }, | |
| { | |
| "epoch": 0.006, | |
| "grad_norm": 0.9126470685005188, | |
| "learning_rate": 0.00017969999999999998, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 39321600, | |
| "step": 600, | |
| "train_runtime": 385.3073, | |
| "train_tokens_per_second": 102052.566 | |
| }, | |
| { | |
| "epoch": 0.007, | |
| "grad_norm": 0.7452394366264343, | |
| "learning_rate": 0.00020969999999999997, | |
| "loss": 0.4534, | |
| "num_input_tokens_seen": 45875200, | |
| "step": 700, | |
| "train_runtime": 447.534, | |
| "train_tokens_per_second": 102506.63 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 0.6909123659133911, | |
| "learning_rate": 0.0002397, | |
| "loss": 0.4323, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 800, | |
| "train_runtime": 510.1043, | |
| "train_tokens_per_second": 102780.558 | |
| }, | |
| { | |
| "epoch": 0.009, | |
| "grad_norm": 0.5689504146575928, | |
| "learning_rate": 0.0002697, | |
| "loss": 0.4262, | |
| "num_input_tokens_seen": 58982400, | |
| "step": 900, | |
| "train_runtime": 571.3595, | |
| "train_tokens_per_second": 103231.669 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.42208704352378845, | |
| "learning_rate": 0.00029969999999999997, | |
| "loss": 0.4158, | |
| "num_input_tokens_seen": 65536000, | |
| "step": 1000, | |
| "train_runtime": 638.5123, | |
| "train_tokens_per_second": 102638.586 | |
| }, | |
| { | |
| "epoch": 0.011, | |
| "grad_norm": 0.4542798399925232, | |
| "learning_rate": 0.00029999925978027874, | |
| "loss": 0.4127, | |
| "num_input_tokens_seen": 72089600, | |
| "step": 1100, | |
| "train_runtime": 698.6527, | |
| "train_tokens_per_second": 103183.742 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.4086480736732483, | |
| "learning_rate": 0.0002999970091452017, | |
| "loss": 0.4018, | |
| "num_input_tokens_seen": 78643200, | |
| "step": 1200, | |
| "train_runtime": 761.7182, | |
| "train_tokens_per_second": 103244.485 | |
| }, | |
| { | |
| "epoch": 0.013, | |
| "grad_norm": 0.37623685598373413, | |
| "learning_rate": 0.00029999324804190795, | |
| "loss": 0.3969, | |
| "num_input_tokens_seen": 85196800, | |
| "step": 1300, | |
| "train_runtime": 827.9033, | |
| "train_tokens_per_second": 102906.7 | |
| }, | |
| { | |
| "epoch": 0.014, | |
| "grad_norm": 0.3346163332462311, | |
| "learning_rate": 0.0002999879765082716, | |
| "loss": 0.3906, | |
| "num_input_tokens_seen": 91750400, | |
| "step": 1400, | |
| "train_runtime": 889.5401, | |
| "train_tokens_per_second": 103143.635 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 0.4093320369720459, | |
| "learning_rate": 0.000299981194597377, | |
| "loss": 0.3852, | |
| "num_input_tokens_seen": 98304000, | |
| "step": 1500, | |
| "train_runtime": 950.9359, | |
| "train_tokens_per_second": 103376.055 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.3808560371398926, | |
| "learning_rate": 0.0002999729023775179, | |
| "loss": 0.3819, | |
| "num_input_tokens_seen": 104857600, | |
| "step": 1600, | |
| "train_runtime": 1017.4047, | |
| "train_tokens_per_second": 103063.807 | |
| }, | |
| { | |
| "epoch": 0.017, | |
| "grad_norm": 0.3014701306819916, | |
| "learning_rate": 0.0002999630999321969, | |
| "loss": 0.387, | |
| "num_input_tokens_seen": 111411200, | |
| "step": 1700, | |
| "train_runtime": 1075.027, | |
| "train_tokens_per_second": 103635.721 | |
| }, | |
| { | |
| "epoch": 0.018, | |
| "grad_norm": 0.25073230266571045, | |
| "learning_rate": 0.00029995178736012443, | |
| "loss": 0.382, | |
| "num_input_tokens_seen": 117964800, | |
| "step": 1800, | |
| "train_runtime": 1141.6684, | |
| "train_tokens_per_second": 103326.671 | |
| }, | |
| { | |
| "epoch": 0.019, | |
| "grad_norm": 0.2569698989391327, | |
| "learning_rate": 0.0002999389647752181, | |
| "loss": 0.3745, | |
| "num_input_tokens_seen": 124518400, | |
| "step": 1900, | |
| "train_runtime": 1202.9974, | |
| "train_tokens_per_second": 103506.793 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.2895148694515228, | |
| "learning_rate": 0.00029992463230660104, | |
| "loss": 0.3747, | |
| "num_input_tokens_seen": 131072000, | |
| "step": 2000, | |
| "train_runtime": 1271.272, | |
| "train_tokens_per_second": 103103.035 | |
| }, | |
| { | |
| "epoch": 0.021, | |
| "grad_norm": 0.28352853655815125, | |
| "learning_rate": 0.00029990879009860117, | |
| "loss": 0.3701, | |
| "num_input_tokens_seen": 137625600, | |
| "step": 2100, | |
| "train_runtime": 1335.8501, | |
| "train_tokens_per_second": 103024.736 | |
| }, | |
| { | |
| "epoch": 0.022, | |
| "grad_norm": 0.2598542273044586, | |
| "learning_rate": 0.0002998914383107493, | |
| "loss": 0.3715, | |
| "num_input_tokens_seen": 144179200, | |
| "step": 2200, | |
| "train_runtime": 1400.0516, | |
| "train_tokens_per_second": 102981.347 | |
| }, | |
| { | |
| "epoch": 0.023, | |
| "grad_norm": 0.300857275724411, | |
| "learning_rate": 0.0002998725771177778, | |
| "loss": 0.3723, | |
| "num_input_tokens_seen": 150732800, | |
| "step": 2300, | |
| "train_runtime": 1465.03, | |
| "train_tokens_per_second": 102887.178 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.19827991724014282, | |
| "learning_rate": 0.00029985220670961847, | |
| "loss": 0.3654, | |
| "num_input_tokens_seen": 157286400, | |
| "step": 2400, | |
| "train_runtime": 1534.4652, | |
| "train_tokens_per_second": 102502.423 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 0.36876365542411804, | |
| "learning_rate": 0.0002998303272914014, | |
| "loss": 0.368, | |
| "num_input_tokens_seen": 163840000, | |
| "step": 2500, | |
| "train_runtime": 1598.5928, | |
| "train_tokens_per_second": 102490.141 | |
| }, | |
| { | |
| "epoch": 0.026, | |
| "grad_norm": 0.23755036294460297, | |
| "learning_rate": 0.00029980693908345185, | |
| "loss": 0.3648, | |
| "num_input_tokens_seen": 170393600, | |
| "step": 2600, | |
| "train_runtime": 1661.9675, | |
| "train_tokens_per_second": 102525.227 | |
| }, | |
| { | |
| "epoch": 0.027, | |
| "grad_norm": 0.3921568691730499, | |
| "learning_rate": 0.00029978204232128895, | |
| "loss": 0.3633, | |
| "num_input_tokens_seen": 176947200, | |
| "step": 2700, | |
| "train_runtime": 1731.9606, | |
| "train_tokens_per_second": 102165.837 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 0.1964094191789627, | |
| "learning_rate": 0.0002997556372556227, | |
| "loss": 0.365, | |
| "num_input_tokens_seen": 183500800, | |
| "step": 2800, | |
| "train_runtime": 1796.4926, | |
| "train_tokens_per_second": 102143.922 | |
| }, | |
| { | |
| "epoch": 0.029, | |
| "grad_norm": 0.2469199150800705, | |
| "learning_rate": 0.0002997277241523519, | |
| "loss": 0.364, | |
| "num_input_tokens_seen": 190054400, | |
| "step": 2900, | |
| "train_runtime": 1860.3342, | |
| "train_tokens_per_second": 102161.428 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.19437766075134277, | |
| "learning_rate": 0.00029969830329256125, | |
| "loss": 0.3574, | |
| "num_input_tokens_seen": 196608000, | |
| "step": 3000, | |
| "train_runtime": 1924.7283, | |
| "train_tokens_per_second": 102148.444 | |
| }, | |
| { | |
| "epoch": 0.031, | |
| "grad_norm": 0.23198598623275757, | |
| "learning_rate": 0.00029966737497251836, | |
| "loss": 0.3599, | |
| "num_input_tokens_seen": 203161600, | |
| "step": 3100, | |
| "train_runtime": 1993.345, | |
| "train_tokens_per_second": 101919.94 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.22857527434825897, | |
| "learning_rate": 0.0002996349395036711, | |
| "loss": 0.3579, | |
| "num_input_tokens_seen": 209715200, | |
| "step": 3200, | |
| "train_runtime": 2057.8023, | |
| "train_tokens_per_second": 101912.218 | |
| }, | |
| { | |
| "epoch": 0.033, | |
| "grad_norm": 0.24812710285186768, | |
| "learning_rate": 0.00029960099721264435, | |
| "loss": 0.3612, | |
| "num_input_tokens_seen": 216268800, | |
| "step": 3300, | |
| "train_runtime": 2121.9536, | |
| "train_tokens_per_second": 101919.666 | |
| }, | |
| { | |
| "epoch": 0.034, | |
| "grad_norm": 0.21982239186763763, | |
| "learning_rate": 0.0002995655484412365, | |
| "loss": 0.3554, | |
| "num_input_tokens_seen": 222822400, | |
| "step": 3400, | |
| "train_runtime": 2186.6347, | |
| "train_tokens_per_second": 101901.979 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 0.3460980951786041, | |
| "learning_rate": 0.00029952859354641636, | |
| "loss": 0.3568, | |
| "num_input_tokens_seen": 229376000, | |
| "step": 3500, | |
| "train_runtime": 2256.5384, | |
| "train_tokens_per_second": 101649.502 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.25577911734580994, | |
| "learning_rate": 0.00029949013290031924, | |
| "loss": 0.354, | |
| "num_input_tokens_seen": 235929600, | |
| "step": 3600, | |
| "train_runtime": 2320.5776, | |
| "train_tokens_per_second": 101668.483 | |
| }, | |
| { | |
| "epoch": 0.037, | |
| "grad_norm": 0.16108086705207825, | |
| "learning_rate": 0.00029945016689024353, | |
| "loss": 0.3509, | |
| "num_input_tokens_seen": 242483200, | |
| "step": 3700, | |
| "train_runtime": 2383.8992, | |
| "train_tokens_per_second": 101717.051 | |
| }, | |
| { | |
| "epoch": 0.038, | |
| "grad_norm": 0.2431662529706955, | |
| "learning_rate": 0.0002994086959186464, | |
| "loss": 0.3527, | |
| "num_input_tokens_seen": 249036800, | |
| "step": 3800, | |
| "train_runtime": 2448.8427, | |
| "train_tokens_per_second": 101695.71 | |
| }, | |
| { | |
| "epoch": 0.039, | |
| "grad_norm": 0.18574966490268707, | |
| "learning_rate": 0.00029936572040314014, | |
| "loss": 0.3546, | |
| "num_input_tokens_seen": 255590400, | |
| "step": 3900, | |
| "train_runtime": 2518.1288, | |
| "train_tokens_per_second": 101500.13 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.15902996063232422, | |
| "learning_rate": 0.0002993212407764877, | |
| "loss": 0.3519, | |
| "num_input_tokens_seen": 262144000, | |
| "step": 4000, | |
| "train_runtime": 2581.8809, | |
| "train_tokens_per_second": 101532.18 | |
| }, | |
| { | |
| "epoch": 0.041, | |
| "grad_norm": 0.21019065380096436, | |
| "learning_rate": 0.00029927525748659834, | |
| "loss": 0.3567, | |
| "num_input_tokens_seen": 268697600, | |
| "step": 4100, | |
| "train_runtime": 2646.5068, | |
| "train_tokens_per_second": 101529.154 | |
| }, | |
| { | |
| "epoch": 0.042, | |
| "grad_norm": 0.18648174405097961, | |
| "learning_rate": 0.0002992277709965234, | |
| "loss": 0.3512, | |
| "num_input_tokens_seen": 275251200, | |
| "step": 4200, | |
| "train_runtime": 2710.4754, | |
| "train_tokens_per_second": 101550.895 | |
| }, | |
| { | |
| "epoch": 0.043, | |
| "grad_norm": 0.21123889088630676, | |
| "learning_rate": 0.0002991787817844513, | |
| "loss": 0.3521, | |
| "num_input_tokens_seen": 281804800, | |
| "step": 4300, | |
| "train_runtime": 2780.6173, | |
| "train_tokens_per_second": 101346.13 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 0.22183509171009064, | |
| "learning_rate": 0.0002991282903437028, | |
| "loss": 0.3486, | |
| "num_input_tokens_seen": 288358400, | |
| "step": 4400, | |
| "train_runtime": 2843.584, | |
| "train_tokens_per_second": 101406.674 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 0.19213925302028656, | |
| "learning_rate": 0.0002990762971827262, | |
| "loss": 0.3481, | |
| "num_input_tokens_seen": 294912000, | |
| "step": 4500, | |
| "train_runtime": 2906.5309, | |
| "train_tokens_per_second": 101465.29 | |
| }, | |
| { | |
| "epoch": 0.046, | |
| "grad_norm": 0.16215530037879944, | |
| "learning_rate": 0.00029902280282509197, | |
| "loss": 0.3506, | |
| "num_input_tokens_seen": 301465600, | |
| "step": 4600, | |
| "train_runtime": 2977.8135, | |
| "train_tokens_per_second": 101237.232 | |
| }, | |
| { | |
| "epoch": 0.047, | |
| "grad_norm": 0.17120705544948578, | |
| "learning_rate": 0.0002989678078094878, | |
| "loss": 0.3433, | |
| "num_input_tokens_seen": 308019200, | |
| "step": 4700, | |
| "train_runtime": 3040.7538, | |
| "train_tokens_per_second": 101296.988 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.26389873027801514, | |
| "learning_rate": 0.00029891131268971284, | |
| "loss": 0.345, | |
| "num_input_tokens_seen": 314572800, | |
| "step": 4800, | |
| "train_runtime": 3104.3446, | |
| "train_tokens_per_second": 101333.081 | |
| }, | |
| { | |
| "epoch": 0.049, | |
| "grad_norm": 0.1639779806137085, | |
| "learning_rate": 0.0002988533180346723, | |
| "loss": 0.3431, | |
| "num_input_tokens_seen": 321126400, | |
| "step": 4900, | |
| "train_runtime": 3172.6385, | |
| "train_tokens_per_second": 101217.457 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.21486082673072815, | |
| "learning_rate": 0.0002987938244283717, | |
| "loss": 0.3413, | |
| "num_input_tokens_seen": 327680000, | |
| "step": 5000, | |
| "train_runtime": 3237.5961, | |
| "train_tokens_per_second": 101210.896 | |
| }, | |
| { | |
| "epoch": 0.051, | |
| "grad_norm": 0.20326170325279236, | |
| "learning_rate": 0.00029873283246991105, | |
| "loss": 0.3457, | |
| "num_input_tokens_seen": 334233600, | |
| "step": 5100, | |
| "train_runtime": 3302.3096, | |
| "train_tokens_per_second": 101212.074 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 0.171161487698555, | |
| "learning_rate": 0.0002986703427734787, | |
| "loss": 0.345, | |
| "num_input_tokens_seen": 340787200, | |
| "step": 5200, | |
| "train_runtime": 3367.4928, | |
| "train_tokens_per_second": 101199.089 | |
| }, | |
| { | |
| "epoch": 0.053, | |
| "grad_norm": 0.19781792163848877, | |
| "learning_rate": 0.00029860635596834517, | |
| "loss": 0.3455, | |
| "num_input_tokens_seen": 347340800, | |
| "step": 5300, | |
| "train_runtime": 3430.9148, | |
| "train_tokens_per_second": 101238.538 | |
| }, | |
| { | |
| "epoch": 0.054, | |
| "grad_norm": 0.1795511543750763, | |
| "learning_rate": 0.0002985408726988569, | |
| "loss": 0.3439, | |
| "num_input_tokens_seen": 353894400, | |
| "step": 5400, | |
| "train_runtime": 3498.4556, | |
| "train_tokens_per_second": 101157.322 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 0.1671728938817978, | |
| "learning_rate": 0.0002984738936244296, | |
| "loss": 0.3422, | |
| "num_input_tokens_seen": 360448000, | |
| "step": 5500, | |
| "train_runtime": 3561.4394, | |
| "train_tokens_per_second": 101208.516 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.17824003100395203, | |
| "learning_rate": 0.0002984054194195419, | |
| "loss": 0.3489, | |
| "num_input_tokens_seen": 367001600, | |
| "step": 5600, | |
| "train_runtime": 3625.8956, | |
| "train_tokens_per_second": 101216.814 | |
| }, | |
| { | |
| "epoch": 0.057, | |
| "grad_norm": 0.1654757708311081, | |
| "learning_rate": 0.0002983354507737283, | |
| "loss": 0.3463, | |
| "num_input_tokens_seen": 373555200, | |
| "step": 5700, | |
| "train_runtime": 3690.173, | |
| "train_tokens_per_second": 101229.725 | |
| }, | |
| { | |
| "epoch": 0.058, | |
| "grad_norm": 0.2033533751964569, | |
| "learning_rate": 0.00029826398839157215, | |
| "loss": 0.3462, | |
| "num_input_tokens_seen": 380108800, | |
| "step": 5800, | |
| "train_runtime": 3759.2019, | |
| "train_tokens_per_second": 101114.229 | |
| }, | |
| { | |
| "epoch": 0.059, | |
| "grad_norm": 0.19753150641918182, | |
| "learning_rate": 0.000298191032992699, | |
| "loss": 0.3436, | |
| "num_input_tokens_seen": 386662400, | |
| "step": 5900, | |
| "train_runtime": 3822.1964, | |
| "train_tokens_per_second": 101162.357 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.13978537917137146, | |
| "learning_rate": 0.0002981165853117688, | |
| "loss": 0.3393, | |
| "num_input_tokens_seen": 393216000, | |
| "step": 6000, | |
| "train_runtime": 3890.9859, | |
| "train_tokens_per_second": 101058.192 | |
| }, | |
| { | |
| "epoch": 0.061, | |
| "grad_norm": 0.28539636731147766, | |
| "learning_rate": 0.000298040646098469, | |
| "loss": 0.3419, | |
| "num_input_tokens_seen": 399769600, | |
| "step": 6100, | |
| "train_runtime": 3955.42, | |
| "train_tokens_per_second": 101068.813 | |
| }, | |
| { | |
| "epoch": 0.062, | |
| "grad_norm": 0.14195021986961365, | |
| "learning_rate": 0.0002979632161175064, | |
| "loss": 0.3408, | |
| "num_input_tokens_seen": 406323200, | |
| "step": 6200, | |
| "train_runtime": 4019.3462, | |
| "train_tokens_per_second": 101091.865 | |
| }, | |
| { | |
| "epoch": 0.063, | |
| "grad_norm": 0.26058393716812134, | |
| "learning_rate": 0.0002978842961486003, | |
| "loss": 0.3411, | |
| "num_input_tokens_seen": 412876800, | |
| "step": 6300, | |
| "train_runtime": 4082.619, | |
| "train_tokens_per_second": 101130.379 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.1645655333995819, | |
| "learning_rate": 0.0002978038869864738, | |
| "loss": 0.3392, | |
| "num_input_tokens_seen": 419430400, | |
| "step": 6400, | |
| "train_runtime": 4152.2955, | |
| "train_tokens_per_second": 101011.694 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 0.1678280532360077, | |
| "learning_rate": 0.0002977219894408463, | |
| "loss": 0.338, | |
| "num_input_tokens_seen": 425984000, | |
| "step": 6500, | |
| "train_runtime": 4215.8141, | |
| "train_tokens_per_second": 101044.304 | |
| }, | |
| { | |
| "epoch": 0.066, | |
| "grad_norm": 0.19337573647499084, | |
| "learning_rate": 0.0002976386043364251, | |
| "loss": 0.3424, | |
| "num_input_tokens_seen": 432537600, | |
| "step": 6600, | |
| "train_runtime": 4278.8465, | |
| "train_tokens_per_second": 101087.432 | |
| }, | |
| { | |
| "epoch": 0.067, | |
| "grad_norm": 0.14295175671577454, | |
| "learning_rate": 0.00029755373251289733, | |
| "loss": 0.3443, | |
| "num_input_tokens_seen": 439091200, | |
| "step": 6700, | |
| "train_runtime": 4348.6665, | |
| "train_tokens_per_second": 100971.459 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 0.22164900600910187, | |
| "learning_rate": 0.0002974673748249213, | |
| "loss": 0.339, | |
| "num_input_tokens_seen": 445644800, | |
| "step": 6800, | |
| "train_runtime": 4413.12, | |
| "train_tokens_per_second": 100981.799 | |
| }, | |
| { | |
| "epoch": 0.069, | |
| "grad_norm": 0.1831408590078354, | |
| "learning_rate": 0.00029737953214211804, | |
| "loss": 0.3398, | |
| "num_input_tokens_seen": 452198400, | |
| "step": 6900, | |
| "train_runtime": 4477.6672, | |
| "train_tokens_per_second": 100989.73 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.21329298615455627, | |
| "learning_rate": 0.0002972902053490623, | |
| "loss": 0.3372, | |
| "num_input_tokens_seen": 458752000, | |
| "step": 7000, | |
| "train_runtime": 4541.4752, | |
| "train_tokens_per_second": 101013.873 | |
| }, | |
| { | |
| "epoch": 0.071, | |
| "grad_norm": 0.16601704061031342, | |
| "learning_rate": 0.00029719939534527393, | |
| "loss": 0.3436, | |
| "num_input_tokens_seen": 465305600, | |
| "step": 7100, | |
| "train_runtime": 4607.1943, | |
| "train_tokens_per_second": 100995.436 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.2303948849439621, | |
| "learning_rate": 0.00029710710304520866, | |
| "loss": 0.339, | |
| "num_input_tokens_seen": 471859200, | |
| "step": 7200, | |
| "train_runtime": 4672.0421, | |
| "train_tokens_per_second": 100996.349 | |
| }, | |
| { | |
| "epoch": 0.073, | |
| "grad_norm": 0.21449029445648193, | |
| "learning_rate": 0.00029701332937824885, | |
| "loss": 0.336, | |
| "num_input_tokens_seen": 478412800, | |
| "step": 7300, | |
| "train_runtime": 4742.0375, | |
| "train_tokens_per_second": 100887.605 | |
| }, | |
| { | |
| "epoch": 0.074, | |
| "grad_norm": 0.1367533802986145, | |
| "learning_rate": 0.0002969180752886944, | |
| "loss": 0.3397, | |
| "num_input_tokens_seen": 484966400, | |
| "step": 7400, | |
| "train_runtime": 4805.1341, | |
| "train_tokens_per_second": 100926.716 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 0.1852603256702423, | |
| "learning_rate": 0.0002968213417357529, | |
| "loss": 0.34, | |
| "num_input_tokens_seen": 491520000, | |
| "step": 7500, | |
| "train_runtime": 4867.6611, | |
| "train_tokens_per_second": 100976.628 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 0.18590585887432098, | |
| "learning_rate": 0.00029672312969353015, | |
| "loss": 0.3375, | |
| "num_input_tokens_seen": 498073600, | |
| "step": 7600, | |
| "train_runtime": 4938.9456, | |
| "train_tokens_per_second": 100846.14 | |
| }, | |
| { | |
| "epoch": 0.077, | |
| "grad_norm": 0.17078232765197754, | |
| "learning_rate": 0.00029662344015102027, | |
| "loss": 0.3374, | |
| "num_input_tokens_seen": 504627200, | |
| "step": 7700, | |
| "train_runtime": 5003.5948, | |
| "train_tokens_per_second": 100852.931 | |
| }, | |
| { | |
| "epoch": 0.078, | |
| "grad_norm": 0.14574670791625977, | |
| "learning_rate": 0.00029652227411209594, | |
| "loss": 0.3369, | |
| "num_input_tokens_seen": 511180800, | |
| "step": 7800, | |
| "train_runtime": 5067.2522, | |
| "train_tokens_per_second": 100879.289 | |
| }, | |
| { | |
| "epoch": 0.079, | |
| "grad_norm": 0.1603483408689499, | |
| "learning_rate": 0.0002964196325954979, | |
| "loss": 0.3352, | |
| "num_input_tokens_seen": 517734400, | |
| "step": 7900, | |
| "train_runtime": 5131.2908, | |
| "train_tokens_per_second": 100897.497 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.16576310992240906, | |
| "learning_rate": 0.0002963155166348253, | |
| "loss": 0.3376, | |
| "num_input_tokens_seen": 524288000, | |
| "step": 8000, | |
| "train_runtime": 5200.6662, | |
| "train_tokens_per_second": 100811.699 | |
| }, | |
| { | |
| "epoch": 0.081, | |
| "grad_norm": 0.31833919882774353, | |
| "learning_rate": 0.0002962099272785246, | |
| "loss": 0.3382, | |
| "num_input_tokens_seen": 530841600, | |
| "step": 8100, | |
| "train_runtime": 5266.7639, | |
| "train_tokens_per_second": 100790.849 | |
| }, | |
| { | |
| "epoch": 0.082, | |
| "grad_norm": 0.14755409955978394, | |
| "learning_rate": 0.0002961028655898794, | |
| "loss": 0.3348, | |
| "num_input_tokens_seen": 537395200, | |
| "step": 8200, | |
| "train_runtime": 5331.3948, | |
| "train_tokens_per_second": 100798.238 | |
| }, | |
| { | |
| "epoch": 0.083, | |
| "grad_norm": 0.2060171663761139, | |
| "learning_rate": 0.0002959943326469998, | |
| "loss": 0.3338, | |
| "num_input_tokens_seen": 543948800, | |
| "step": 8300, | |
| "train_runtime": 5395.0396, | |
| "train_tokens_per_second": 100823.876 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.16461625695228577, | |
| "learning_rate": 0.0002958843295428112, | |
| "loss": 0.3326, | |
| "num_input_tokens_seen": 550502400, | |
| "step": 8400, | |
| "train_runtime": 5458.2259, | |
| "train_tokens_per_second": 100857.387 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 0.15455660223960876, | |
| "learning_rate": 0.0002957728573850438, | |
| "loss": 0.3339, | |
| "num_input_tokens_seen": 557056000, | |
| "step": 8500, | |
| "train_runtime": 5527.7417, | |
| "train_tokens_per_second": 100774.607 | |
| }, | |
| { | |
| "epoch": 0.086, | |
| "grad_norm": 0.17872081696987152, | |
| "learning_rate": 0.0002956599172962209, | |
| "loss": 0.3404, | |
| "num_input_tokens_seen": 563609600, | |
| "step": 8600, | |
| "train_runtime": 5593.3318, | |
| "train_tokens_per_second": 100764.557 | |
| }, | |
| { | |
| "epoch": 0.087, | |
| "grad_norm": 0.19022491574287415, | |
| "learning_rate": 0.0002955455104136479, | |
| "loss": 0.3329, | |
| "num_input_tokens_seen": 570163200, | |
| "step": 8700, | |
| "train_runtime": 5659.0887, | |
| "train_tokens_per_second": 100751.77 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.14710059762001038, | |
| "learning_rate": 0.00029542963788940096, | |
| "loss": 0.3323, | |
| "num_input_tokens_seen": 576716800, | |
| "step": 8800, | |
| "train_runtime": 5722.168, | |
| "train_tokens_per_second": 100786.415 | |
| }, | |
| { | |
| "epoch": 0.089, | |
| "grad_norm": 0.1998033970594406, | |
| "learning_rate": 0.00029531230089031505, | |
| "loss": 0.3378, | |
| "num_input_tokens_seen": 583270400, | |
| "step": 8900, | |
| "train_runtime": 5787.7324, | |
| "train_tokens_per_second": 100777.016 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.125193253159523, | |
| "learning_rate": 0.0002951935005979724, | |
| "loss": 0.3325, | |
| "num_input_tokens_seen": 589824000, | |
| "step": 9000, | |
| "train_runtime": 5855.8455, | |
| "train_tokens_per_second": 100723.968 | |
| }, | |
| { | |
| "epoch": 0.091, | |
| "grad_norm": 0.19552631676197052, | |
| "learning_rate": 0.0002950732382086907, | |
| "loss": 0.3316, | |
| "num_input_tokens_seen": 596377600, | |
| "step": 9100, | |
| "train_runtime": 5921.9714, | |
| "train_tokens_per_second": 100705.923 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.16468137502670288, | |
| "learning_rate": 0.0002949515149335108, | |
| "loss": 0.3349, | |
| "num_input_tokens_seen": 602931200, | |
| "step": 9200, | |
| "train_runtime": 5986.1243, | |
| "train_tokens_per_second": 100721.464 | |
| }, | |
| { | |
| "epoch": 0.093, | |
| "grad_norm": 0.1658785343170166, | |
| "learning_rate": 0.0002948283319981848, | |
| "loss": 0.3281, | |
| "num_input_tokens_seen": 609484800, | |
| "step": 9300, | |
| "train_runtime": 6050.7028, | |
| "train_tokens_per_second": 100729.588 | |
| }, | |
| { | |
| "epoch": 0.094, | |
| "grad_norm": 0.16668474674224854, | |
| "learning_rate": 0.00029470369064316354, | |
| "loss": 0.3301, | |
| "num_input_tokens_seen": 616038400, | |
| "step": 9400, | |
| "train_runtime": 6115.0892, | |
| "train_tokens_per_second": 100740.706 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 0.16522246599197388, | |
| "learning_rate": 0.00029457759212358397, | |
| "loss": 0.3305, | |
| "num_input_tokens_seen": 622592000, | |
| "step": 9500, | |
| "train_runtime": 6183.2082, | |
| "train_tokens_per_second": 100690.77 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.2229623645544052, | |
| "learning_rate": 0.00029445003770925686, | |
| "loss": 0.3289, | |
| "num_input_tokens_seen": 629145600, | |
| "step": 9600, | |
| "train_runtime": 6247.5147, | |
| "train_tokens_per_second": 100703.341 | |
| }, | |
| { | |
| "epoch": 0.097, | |
| "grad_norm": 0.16620689630508423, | |
| "learning_rate": 0.00029432102868465367, | |
| "loss": 0.3299, | |
| "num_input_tokens_seen": 635699200, | |
| "step": 9700, | |
| "train_runtime": 6312.7504, | |
| "train_tokens_per_second": 100700.829 | |
| }, | |
| { | |
| "epoch": 0.098, | |
| "grad_norm": 0.15970012545585632, | |
| "learning_rate": 0.0002941905663488939, | |
| "loss": 0.3292, | |
| "num_input_tokens_seen": 642252800, | |
| "step": 9800, | |
| "train_runtime": 6382.1987, | |
| "train_tokens_per_second": 100631.903 | |
| }, | |
| { | |
| "epoch": 0.099, | |
| "grad_norm": 0.14614014327526093, | |
| "learning_rate": 0.0002940586520157318, | |
| "loss": 0.3329, | |
| "num_input_tokens_seen": 648806400, | |
| "step": 9900, | |
| "train_runtime": 6445.6924, | |
| "train_tokens_per_second": 100657.362 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.16558828949928284, | |
| "learning_rate": 0.00029392528701354325, | |
| "loss": 0.3286, | |
| "num_input_tokens_seen": 655360000, | |
| "step": 10000, | |
| "train_runtime": 6509.151, | |
| "train_tokens_per_second": 100682.87 | |
| }, | |
| { | |
| "epoch": 0.101, | |
| "grad_norm": 0.1442118138074875, | |
| "learning_rate": 0.00029379047268531243, | |
| "loss": 0.3314, | |
| "num_input_tokens_seen": 661913600, | |
| "step": 10100, | |
| "train_runtime": 6575.3071, | |
| "train_tokens_per_second": 100666.568 | |
| }, | |
| { | |
| "epoch": 0.102, | |
| "grad_norm": 0.16007182002067566, | |
| "learning_rate": 0.00029365421038861795, | |
| "loss": 0.3326, | |
| "num_input_tokens_seen": 668467200, | |
| "step": 10200, | |
| "train_runtime": 6639.6314, | |
| "train_tokens_per_second": 100678.359 | |
| }, | |
| { | |
| "epoch": 0.103, | |
| "grad_norm": 0.1417239010334015, | |
| "learning_rate": 0.0002935165014956198, | |
| "loss": 0.3292, | |
| "num_input_tokens_seen": 675020800, | |
| "step": 10300, | |
| "train_runtime": 6704.2875, | |
| "train_tokens_per_second": 100684.942 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.20092202723026276, | |
| "learning_rate": 0.0002933773473930448, | |
| "loss": 0.3251, | |
| "num_input_tokens_seen": 681574400, | |
| "step": 10400, | |
| "train_runtime": 6769.9733, | |
| "train_tokens_per_second": 100676.083 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 0.12387008965015411, | |
| "learning_rate": 0.0002932367494821734, | |
| "loss": 0.3302, | |
| "num_input_tokens_seen": 688128000, | |
| "step": 10500, | |
| "train_runtime": 6840.7627, | |
| "train_tokens_per_second": 100592.292 | |
| }, | |
| { | |
| "epoch": 0.106, | |
| "grad_norm": 0.17865417897701263, | |
| "learning_rate": 0.00029309470917882497, | |
| "loss": 0.328, | |
| "num_input_tokens_seen": 694681600, | |
| "step": 10600, | |
| "train_runtime": 6905.9119, | |
| "train_tokens_per_second": 100592.305 | |
| }, | |
| { | |
| "epoch": 0.107, | |
| "grad_norm": 0.14125974476337433, | |
| "learning_rate": 0.0002929512279133437, | |
| "loss": 0.3296, | |
| "num_input_tokens_seen": 701235200, | |
| "step": 10700, | |
| "train_runtime": 6969.9941, | |
| "train_tokens_per_second": 100607.718 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.15725336968898773, | |
| "learning_rate": 0.0002928063071305844, | |
| "loss": 0.3279, | |
| "num_input_tokens_seen": 707788800, | |
| "step": 10800, | |
| "train_runtime": 7032.9479, | |
| "train_tokens_per_second": 100638.994 | |
| }, | |
| { | |
| "epoch": 0.109, | |
| "grad_norm": 0.15254800021648407, | |
| "learning_rate": 0.0002926599482898978, | |
| "loss": 0.3276, | |
| "num_input_tokens_seen": 714342400, | |
| "step": 10900, | |
| "train_runtime": 7097.644, | |
| "train_tokens_per_second": 100645.002 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.23630526661872864, | |
| "learning_rate": 0.00029251215286511573, | |
| "loss": 0.3278, | |
| "num_input_tokens_seen": 720896000, | |
| "step": 11000, | |
| "train_runtime": 7167.7206, | |
| "train_tokens_per_second": 100575.348 | |
| }, | |
| { | |
| "epoch": 0.111, | |
| "grad_norm": 0.14799726009368896, | |
| "learning_rate": 0.00029236292234453647, | |
| "loss": 0.3264, | |
| "num_input_tokens_seen": 727449600, | |
| "step": 11100, | |
| "train_runtime": 7232.1207, | |
| "train_tokens_per_second": 100585.932 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.17712198197841644, | |
| "learning_rate": 0.0002922122582309097, | |
| "loss": 0.3304, | |
| "num_input_tokens_seen": 734003200, | |
| "step": 11200, | |
| "train_runtime": 7296.7016, | |
| "train_tokens_per_second": 100593.835 | |
| }, | |
| { | |
| "epoch": 0.113, | |
| "grad_norm": 0.1620536595582962, | |
| "learning_rate": 0.0002920601620414215, | |
| "loss": 0.3266, | |
| "num_input_tokens_seen": 740556800, | |
| "step": 11300, | |
| "train_runtime": 7359.3874, | |
| "train_tokens_per_second": 100627.506 | |
| }, | |
| { | |
| "epoch": 0.114, | |
| "grad_norm": 0.1695978045463562, | |
| "learning_rate": 0.0002919066353076786, | |
| "loss": 0.3269, | |
| "num_input_tokens_seen": 747110400, | |
| "step": 11400, | |
| "train_runtime": 7425.5624, | |
| "train_tokens_per_second": 100613.308 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 0.23728708922863007, | |
| "learning_rate": 0.00029175167957569366, | |
| "loss": 0.3269, | |
| "num_input_tokens_seen": 753664000, | |
| "step": 11500, | |
| "train_runtime": 7489.1752, | |
| "train_tokens_per_second": 100633.779 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 0.14579418301582336, | |
| "learning_rate": 0.0002915952964058691, | |
| "loss": 0.3254, | |
| "num_input_tokens_seen": 760217600, | |
| "step": 11600, | |
| "train_runtime": 7559.1466, | |
| "train_tokens_per_second": 100569.237 | |
| }, | |
| { | |
| "epoch": 0.117, | |
| "grad_norm": 0.15569131076335907, | |
| "learning_rate": 0.00029143748737298173, | |
| "loss": 0.3309, | |
| "num_input_tokens_seen": 766771200, | |
| "step": 11700, | |
| "train_runtime": 7625.7219, | |
| "train_tokens_per_second": 100550.638 | |
| }, | |
| { | |
| "epoch": 0.118, | |
| "grad_norm": 0.15939873456954956, | |
| "learning_rate": 0.00029127825406616677, | |
| "loss": 0.3251, | |
| "num_input_tokens_seen": 773324800, | |
| "step": 11800, | |
| "train_runtime": 7690.5664, | |
| "train_tokens_per_second": 100554.987 | |
| }, | |
| { | |
| "epoch": 0.119, | |
| "grad_norm": 0.1355784833431244, | |
| "learning_rate": 0.0002911175980889019, | |
| "loss": 0.3287, | |
| "num_input_tokens_seen": 779878400, | |
| "step": 11900, | |
| "train_runtime": 7753.5378, | |
| "train_tokens_per_second": 100583.556 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.19504176080226898, | |
| "learning_rate": 0.00029095552105899095, | |
| "loss": 0.325, | |
| "num_input_tokens_seen": 786432000, | |
| "step": 12000, | |
| "train_runtime": 7817.9364, | |
| "train_tokens_per_second": 100593.297 | |
| }, | |
| { | |
| "epoch": 0.121, | |
| "grad_norm": 0.1594318449497223, | |
| "learning_rate": 0.0002907920246085478, | |
| "loss": 0.3242, | |
| "num_input_tokens_seen": 792985600, | |
| "step": 12100, | |
| "train_runtime": 7887.1116, | |
| "train_tokens_per_second": 100541.953 | |
| }, | |
| { | |
| "epoch": 0.122, | |
| "grad_norm": 0.15172167122364044, | |
| "learning_rate": 0.00029062711038397996, | |
| "loss": 0.3325, | |
| "num_input_tokens_seen": 799539200, | |
| "step": 12200, | |
| "train_runtime": 7952.1371, | |
| "train_tokens_per_second": 100543.94 | |
| }, | |
| { | |
| "epoch": 0.123, | |
| "grad_norm": 0.13253241777420044, | |
| "learning_rate": 0.00029046078004597175, | |
| "loss": 0.3239, | |
| "num_input_tokens_seen": 806092800, | |
| "step": 12300, | |
| "train_runtime": 8016.3597, | |
| "train_tokens_per_second": 100555.966 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.2943899929523468, | |
| "learning_rate": 0.00029029303526946796, | |
| "loss": 0.3238, | |
| "num_input_tokens_seen": 812646400, | |
| "step": 12400, | |
| "train_runtime": 8079.6597, | |
| "train_tokens_per_second": 100579.286 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 0.1583172082901001, | |
| "learning_rate": 0.0002901238777436565, | |
| "loss": 0.3217, | |
| "num_input_tokens_seen": 819200000, | |
| "step": 12500, | |
| "train_runtime": 8148.9297, | |
| "train_tokens_per_second": 100528.539 | |
| }, | |
| { | |
| "epoch": 0.126, | |
| "grad_norm": 0.1598382592201233, | |
| "learning_rate": 0.00028995330917195184, | |
| "loss": 0.3245, | |
| "num_input_tokens_seen": 825753600, | |
| "step": 12600, | |
| "train_runtime": 8213.0201, | |
| "train_tokens_per_second": 100542.016 | |
| }, | |
| { | |
| "epoch": 0.127, | |
| "grad_norm": 0.13507018983364105, | |
| "learning_rate": 0.00028978133127197765, | |
| "loss": 0.3247, | |
| "num_input_tokens_seen": 832307200, | |
| "step": 12700, | |
| "train_runtime": 8277.3925, | |
| "train_tokens_per_second": 100551.859 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.1688830703496933, | |
| "learning_rate": 0.0002896079457755493, | |
| "loss": 0.3258, | |
| "num_input_tokens_seen": 838860800, | |
| "step": 12800, | |
| "train_runtime": 8342.3491, | |
| "train_tokens_per_second": 100554.507 | |
| }, | |
| { | |
| "epoch": 0.129, | |
| "grad_norm": 0.2753322422504425, | |
| "learning_rate": 0.000289433154428657, | |
| "loss": 0.3249, | |
| "num_input_tokens_seen": 845414400, | |
| "step": 12900, | |
| "train_runtime": 8406.9898, | |
| "train_tokens_per_second": 100560.892 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.20588786900043488, | |
| "learning_rate": 0.0002892569589914476, | |
| "loss": 0.3232, | |
| "num_input_tokens_seen": 851968000, | |
| "step": 13000, | |
| "train_runtime": 8475.9626, | |
| "train_tokens_per_second": 100515.781 | |
| }, | |
| { | |
| "epoch": 0.131, | |
| "grad_norm": 0.1462445855140686, | |
| "learning_rate": 0.0002890793612382072, | |
| "loss": 0.3239, | |
| "num_input_tokens_seen": 858521600, | |
| "step": 13100, | |
| "train_runtime": 8539.9861, | |
| "train_tokens_per_second": 100529.625 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.11379440873861313, | |
| "learning_rate": 0.0002889003629573432, | |
| "loss": 0.3249, | |
| "num_input_tokens_seen": 865075200, | |
| "step": 13200, | |
| "train_runtime": 8604.867, | |
| "train_tokens_per_second": 100533.244 | |
| }, | |
| { | |
| "epoch": 0.133, | |
| "grad_norm": 0.12769202888011932, | |
| "learning_rate": 0.00028871996595136626, | |
| "loss": 0.327, | |
| "num_input_tokens_seen": 871628800, | |
| "step": 13300, | |
| "train_runtime": 8669.3605, | |
| "train_tokens_per_second": 100541.303 | |
| }, | |
| { | |
| "epoch": 0.134, | |
| "grad_norm": 0.14837151765823364, | |
| "learning_rate": 0.0002885381720368723, | |
| "loss": 0.321, | |
| "num_input_tokens_seen": 878182400, | |
| "step": 13400, | |
| "train_runtime": 8738.2624, | |
| "train_tokens_per_second": 100498.515 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 0.1538904309272766, | |
| "learning_rate": 0.000288354983044524, | |
| "loss": 0.3207, | |
| "num_input_tokens_seen": 884736000, | |
| "step": 13500, | |
| "train_runtime": 8802.2586, | |
| "train_tokens_per_second": 100512.385 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.12802962958812714, | |
| "learning_rate": 0.00028817040081903245, | |
| "loss": 0.3241, | |
| "num_input_tokens_seen": 891289600, | |
| "step": 13600, | |
| "train_runtime": 8866.1163, | |
| "train_tokens_per_second": 100527.624 | |
| }, | |
| { | |
| "epoch": 0.137, | |
| "grad_norm": 0.35466450452804565, | |
| "learning_rate": 0.00028798442721913867, | |
| "loss": 0.3214, | |
| "num_input_tokens_seen": 897843200, | |
| "step": 13700, | |
| "train_runtime": 8930.5828, | |
| "train_tokens_per_second": 100535.79 | |
| }, | |
| { | |
| "epoch": 0.138, | |
| "grad_norm": 0.13867586851119995, | |
| "learning_rate": 0.00028779706411759465, | |
| "loss": 0.3199, | |
| "num_input_tokens_seen": 904396800, | |
| "step": 13800, | |
| "train_runtime": 9001.3287, | |
| "train_tokens_per_second": 100473.7 | |
| }, | |
| { | |
| "epoch": 0.139, | |
| "grad_norm": 0.2114623785018921, | |
| "learning_rate": 0.00028760831340114484, | |
| "loss": 0.3234, | |
| "num_input_tokens_seen": 910950400, | |
| "step": 13900, | |
| "train_runtime": 9066.3163, | |
| "train_tokens_per_second": 100476.353 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.14202618598937988, | |
| "learning_rate": 0.00028741817697050683, | |
| "loss": 0.3232, | |
| "num_input_tokens_seen": 917504000, | |
| "step": 14000, | |
| "train_runtime": 9130.2003, | |
| "train_tokens_per_second": 100491.114 | |
| }, | |
| { | |
| "epoch": 0.141, | |
| "grad_norm": 0.1686236560344696, | |
| "learning_rate": 0.00028722665674035233, | |
| "loss": 0.3203, | |
| "num_input_tokens_seen": 924057600, | |
| "step": 14100, | |
| "train_runtime": 9195.1426, | |
| "train_tokens_per_second": 100494.102 | |
| }, | |
| { | |
| "epoch": 0.142, | |
| "grad_norm": 0.14483292400836945, | |
| "learning_rate": 0.0002870337546392879, | |
| "loss": 0.3321, | |
| "num_input_tokens_seen": 930611200, | |
| "step": 14200, | |
| "train_runtime": 9259.404, | |
| "train_tokens_per_second": 100504.438 | |
| }, | |
| { | |
| "epoch": 0.143, | |
| "grad_norm": 0.12517394125461578, | |
| "learning_rate": 0.00028683947260983576, | |
| "loss": 0.3233, | |
| "num_input_tokens_seen": 937164800, | |
| "step": 14300, | |
| "train_runtime": 9324.1454, | |
| "train_tokens_per_second": 100509.458 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.24776680767536163, | |
| "learning_rate": 0.00028664381260841356, | |
| "loss": 0.3192, | |
| "num_input_tokens_seen": 943718400, | |
| "step": 14400, | |
| "train_runtime": 9393.645, | |
| "train_tokens_per_second": 100463.494 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 0.4200928807258606, | |
| "learning_rate": 0.0002864467766053154, | |
| "loss": 0.321, | |
| "num_input_tokens_seen": 950272000, | |
| "step": 14500, | |
| "train_runtime": 9456.5857, | |
| "train_tokens_per_second": 100487.853 | |
| }, | |
| { | |
| "epoch": 0.146, | |
| "grad_norm": 0.14573471248149872, | |
| "learning_rate": 0.00028624836658469165, | |
| "loss": 0.3198, | |
| "num_input_tokens_seen": 956825600, | |
| "step": 14600, | |
| "train_runtime": 9525.9633, | |
| "train_tokens_per_second": 100443.973 | |
| }, | |
| { | |
| "epoch": 0.147, | |
| "grad_norm": 0.1546989232301712, | |
| "learning_rate": 0.00028604858454452906, | |
| "loss": 0.3267, | |
| "num_input_tokens_seen": 963379200, | |
| "step": 14700, | |
| "train_runtime": 9585.7512, | |
| "train_tokens_per_second": 100501.169 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.172988623380661, | |
| "learning_rate": 0.00028584743249663057, | |
| "loss": 0.3222, | |
| "num_input_tokens_seen": 969932800, | |
| "step": 14800, | |
| "train_runtime": 9650.7111, | |
| "train_tokens_per_second": 100503.765 | |
| }, | |
| { | |
| "epoch": 0.149, | |
| "grad_norm": 0.19345735013484955, | |
| "learning_rate": 0.000285644912466595, | |
| "loss": 0.3194, | |
| "num_input_tokens_seen": 976486400, | |
| "step": 14900, | |
| "train_runtime": 9721.1196, | |
| "train_tokens_per_second": 100449.994 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.13317954540252686, | |
| "learning_rate": 0.00028544102649379684, | |
| "loss": 0.3236, | |
| "num_input_tokens_seen": 983040000, | |
| "step": 15000, | |
| "train_runtime": 9784.7921, | |
| "train_tokens_per_second": 100466.11 | |
| }, | |
| { | |
| "epoch": 0.151, | |
| "grad_norm": 0.17458604276180267, | |
| "learning_rate": 0.00028523577663136556, | |
| "loss": 0.3208, | |
| "num_input_tokens_seen": 989593600, | |
| "step": 15100, | |
| "train_runtime": 9853.1273, | |
| "train_tokens_per_second": 100434.468 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.1358109712600708, | |
| "learning_rate": 0.000285029164946165, | |
| "loss": 0.3237, | |
| "num_input_tokens_seen": 996147200, | |
| "step": 15200, | |
| "train_runtime": 9917.7044, | |
| "train_tokens_per_second": 100441.307 | |
| }, | |
| { | |
| "epoch": 0.153, | |
| "grad_norm": 0.16100633144378662, | |
| "learning_rate": 0.0002848211935187725, | |
| "loss": 0.3267, | |
| "num_input_tokens_seen": 1002700800, | |
| "step": 15300, | |
| "train_runtime": 9982.8922, | |
| "train_tokens_per_second": 100441.914 | |
| }, | |
| { | |
| "epoch": 0.154, | |
| "grad_norm": 0.20419622957706451, | |
| "learning_rate": 0.0002846118644434581, | |
| "loss": 0.3193, | |
| "num_input_tokens_seen": 1009254400, | |
| "step": 15400, | |
| "train_runtime": 10046.3454, | |
| "train_tokens_per_second": 100459.855 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 0.17805695533752441, | |
| "learning_rate": 0.00028440117982816326, | |
| "loss": 0.3159, | |
| "num_input_tokens_seen": 1015808000, | |
| "step": 15500, | |
| "train_runtime": 10110.0124, | |
| "train_tokens_per_second": 100475.446 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.17533563077449799, | |
| "learning_rate": 0.0002841891417944796, | |
| "loss": 0.3216, | |
| "num_input_tokens_seen": 1022361600, | |
| "step": 15600, | |
| "train_runtime": 10178.7469, | |
| "train_tokens_per_second": 100440.812 | |
| }, | |
| { | |
| "epoch": 0.157, | |
| "grad_norm": 0.13143610954284668, | |
| "learning_rate": 0.0002839757524776279, | |
| "loss": 0.3234, | |
| "num_input_tokens_seen": 1028915200, | |
| "step": 15700, | |
| "train_runtime": 10243.1395, | |
| "train_tokens_per_second": 100449.203 | |
| }, | |
| { | |
| "epoch": 0.158, | |
| "grad_norm": 0.13563373684883118, | |
| "learning_rate": 0.0002837610140264361, | |
| "loss": 0.3194, | |
| "num_input_tokens_seen": 1035468800, | |
| "step": 15800, | |
| "train_runtime": 10307.5423, | |
| "train_tokens_per_second": 100457.39 | |
| }, | |
| { | |
| "epoch": 0.159, | |
| "grad_norm": 0.14616088569164276, | |
| "learning_rate": 0.0002835449286033182, | |
| "loss": 0.3178, | |
| "num_input_tokens_seen": 1042022400, | |
| "step": 15900, | |
| "train_runtime": 10378.0909, | |
| "train_tokens_per_second": 100405.982 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.1539888232946396, | |
| "learning_rate": 0.0002833274983842518, | |
| "loss": 0.3156, | |
| "num_input_tokens_seen": 1048576000, | |
| "step": 16000, | |
| "train_runtime": 10441.484, | |
| "train_tokens_per_second": 100424.039 | |
| }, | |
| { | |
| "epoch": 0.161, | |
| "grad_norm": 0.15786372125148773, | |
| "learning_rate": 0.0002831087255587569, | |
| "loss": 0.318, | |
| "num_input_tokens_seen": 1055129600, | |
| "step": 16100, | |
| "train_runtime": 10505.72, | |
| "train_tokens_per_second": 100433.821 | |
| }, | |
| { | |
| "epoch": 0.162, | |
| "grad_norm": 0.14359760284423828, | |
| "learning_rate": 0.0002828886123298734, | |
| "loss": 0.3179, | |
| "num_input_tokens_seen": 1061683200, | |
| "step": 16200, | |
| "train_runtime": 10570.7713, | |
| "train_tokens_per_second": 100435.736 | |
| }, | |
| { | |
| "epoch": 0.163, | |
| "grad_norm": 0.1415397673845291, | |
| "learning_rate": 0.00028266716091413906, | |
| "loss": 0.32, | |
| "num_input_tokens_seen": 1068236800, | |
| "step": 16300, | |
| "train_runtime": 10635.2645, | |
| "train_tokens_per_second": 100442.899 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.1199110895395279, | |
| "learning_rate": 0.0002824443735415673, | |
| "loss": 0.3188, | |
| "num_input_tokens_seen": 1074790400, | |
| "step": 16400, | |
| "train_runtime": 10704.7074, | |
| "train_tokens_per_second": 100403.529 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 0.18369431793689728, | |
| "learning_rate": 0.0002822202524556243, | |
| "loss": 0.3208, | |
| "num_input_tokens_seen": 1081344000, | |
| "step": 16500, | |
| "train_runtime": 10770.1863, | |
| "train_tokens_per_second": 100401.606 | |
| }, | |
| { | |
| "epoch": 0.166, | |
| "grad_norm": 0.2615172266960144, | |
| "learning_rate": 0.00028199479991320695, | |
| "loss": 0.3224, | |
| "num_input_tokens_seen": 1087897600, | |
| "step": 16600, | |
| "train_runtime": 10834.6749, | |
| "train_tokens_per_second": 100408.883 | |
| }, | |
| { | |
| "epoch": 0.167, | |
| "grad_norm": 0.1250002384185791, | |
| "learning_rate": 0.00028176801818461994, | |
| "loss": 0.3171, | |
| "num_input_tokens_seen": 1094451200, | |
| "step": 16700, | |
| "train_runtime": 10899.3075, | |
| "train_tokens_per_second": 100414.747 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.14198775589466095, | |
| "learning_rate": 0.00028153990955355273, | |
| "loss": 0.3194, | |
| "num_input_tokens_seen": 1101004800, | |
| "step": 16800, | |
| "train_runtime": 10964.3423, | |
| "train_tokens_per_second": 100416.858 | |
| }, | |
| { | |
| "epoch": 0.169, | |
| "grad_norm": 0.14076939225196838, | |
| "learning_rate": 0.00028131047631705665, | |
| "loss": 0.3189, | |
| "num_input_tokens_seen": 1107558400, | |
| "step": 16900, | |
| "train_runtime": 11033.6033, | |
| "train_tokens_per_second": 100380.48 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.13334921002388, | |
| "learning_rate": 0.00028107972078552187, | |
| "loss": 0.3198, | |
| "num_input_tokens_seen": 1114112000, | |
| "step": 17000, | |
| "train_runtime": 11098.612, | |
| "train_tokens_per_second": 100383.003 | |
| }, | |
| { | |
| "epoch": 0.171, | |
| "grad_norm": 0.13615840673446655, | |
| "learning_rate": 0.0002808476452826541, | |
| "loss": 0.3168, | |
| "num_input_tokens_seen": 1120665600, | |
| "step": 17100, | |
| "train_runtime": 11161.3832, | |
| "train_tokens_per_second": 100405.62 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.14747090637683868, | |
| "learning_rate": 0.00028061425214545094, | |
| "loss": 0.3163, | |
| "num_input_tokens_seen": 1127219200, | |
| "step": 17200, | |
| "train_runtime": 11231.5954, | |
| "train_tokens_per_second": 100361.45 | |
| }, | |
| { | |
| "epoch": 0.173, | |
| "grad_norm": 0.15957149863243103, | |
| "learning_rate": 0.00028037954372417883, | |
| "loss": 0.317, | |
| "num_input_tokens_seen": 1133772800, | |
| "step": 17300, | |
| "train_runtime": 11295.5019, | |
| "train_tokens_per_second": 100373.831 | |
| }, | |
| { | |
| "epoch": 0.174, | |
| "grad_norm": 0.20420241355895996, | |
| "learning_rate": 0.0002801435223823488, | |
| "loss": 0.3207, | |
| "num_input_tokens_seen": 1140326400, | |
| "step": 17400, | |
| "train_runtime": 11360.8649, | |
| "train_tokens_per_second": 100373.203 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 0.20070046186447144, | |
| "learning_rate": 0.00027990619049669336, | |
| "loss": 0.3206, | |
| "num_input_tokens_seen": 1146880000, | |
| "step": 17500, | |
| "train_runtime": 11424.854, | |
| "train_tokens_per_second": 100384.652 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.13903649151325226, | |
| "learning_rate": 0.00027966755045714177, | |
| "loss": 0.3227, | |
| "num_input_tokens_seen": 1153433600, | |
| "step": 17600, | |
| "train_runtime": 11488.6874, | |
| "train_tokens_per_second": 100397.336 | |
| }, | |
| { | |
| "epoch": 0.177, | |
| "grad_norm": 0.15853877365589142, | |
| "learning_rate": 0.00027942760466679673, | |
| "loss": 0.3168, | |
| "num_input_tokens_seen": 1159987200, | |
| "step": 17700, | |
| "train_runtime": 11559.2862, | |
| "train_tokens_per_second": 100351.11 | |
| }, | |
| { | |
| "epoch": 0.178, | |
| "grad_norm": 0.14262589812278748, | |
| "learning_rate": 0.00027918635554190956, | |
| "loss": 0.3235, | |
| "num_input_tokens_seen": 1166540800, | |
| "step": 17800, | |
| "train_runtime": 11622.4751, | |
| "train_tokens_per_second": 100369.395 | |
| }, | |
| { | |
| "epoch": 0.179, | |
| "grad_norm": 0.14338357746601105, | |
| "learning_rate": 0.00027894380551185636, | |
| "loss": 0.3204, | |
| "num_input_tokens_seen": 1173094400, | |
| "step": 17900, | |
| "train_runtime": 11687.9668, | |
| "train_tokens_per_second": 100367.705 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.12374505400657654, | |
| "learning_rate": 0.00027869995701911314, | |
| "loss": 0.3156, | |
| "num_input_tokens_seen": 1179648000, | |
| "step": 18000, | |
| "train_runtime": 11751.6619, | |
| "train_tokens_per_second": 100381.377 | |
| }, | |
| { | |
| "epoch": 0.181, | |
| "grad_norm": 0.11708634346723557, | |
| "learning_rate": 0.0002784548125192316, | |
| "loss": 0.3145, | |
| "num_input_tokens_seen": 1186201600, | |
| "step": 18100, | |
| "train_runtime": 11816.0633, | |
| "train_tokens_per_second": 100388.9 | |
| }, | |
| { | |
| "epoch": 0.182, | |
| "grad_norm": 0.1318449079990387, | |
| "learning_rate": 0.0002782083744808141, | |
| "loss": 0.3159, | |
| "num_input_tokens_seen": 1192755200, | |
| "step": 18200, | |
| "train_runtime": 11887.7736, | |
| "train_tokens_per_second": 100334.616 | |
| }, | |
| { | |
| "epoch": 0.183, | |
| "grad_norm": 0.3383175730705261, | |
| "learning_rate": 0.000277960645385489, | |
| "loss": 0.3191, | |
| "num_input_tokens_seen": 1199308800, | |
| "step": 18300, | |
| "train_runtime": 11953.3207, | |
| "train_tokens_per_second": 100332.688 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.13779285550117493, | |
| "learning_rate": 0.00027771162772788544, | |
| "loss": 0.3168, | |
| "num_input_tokens_seen": 1205862400, | |
| "step": 18400, | |
| "train_runtime": 12016.7432, | |
| "train_tokens_per_second": 100348.521 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 0.15161630511283875, | |
| "learning_rate": 0.00027746132401560857, | |
| "loss": 0.3146, | |
| "num_input_tokens_seen": 1212416000, | |
| "step": 18500, | |
| "train_runtime": 12081.3443, | |
| "train_tokens_per_second": 100354.395 | |
| }, | |
| { | |
| "epoch": 0.186, | |
| "grad_norm": 0.1523953378200531, | |
| "learning_rate": 0.0002772097367692139, | |
| "loss": 0.3172, | |
| "num_input_tokens_seen": 1218969600, | |
| "step": 18600, | |
| "train_runtime": 12145.9663, | |
| "train_tokens_per_second": 100360.035 | |
| }, | |
| { | |
| "epoch": 0.187, | |
| "grad_norm": 0.12802754342556, | |
| "learning_rate": 0.00027695686852218226, | |
| "loss": 0.3198, | |
| "num_input_tokens_seen": 1225523200, | |
| "step": 18700, | |
| "train_runtime": 12215.5887, | |
| "train_tokens_per_second": 100324.53 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.13653679192066193, | |
| "learning_rate": 0.00027670272182089416, | |
| "loss": 0.319, | |
| "num_input_tokens_seen": 1232076800, | |
| "step": 18800, | |
| "train_runtime": 12280.146, | |
| "train_tokens_per_second": 100330.794 | |
| }, | |
| { | |
| "epoch": 0.189, | |
| "grad_norm": 0.15152159333229065, | |
| "learning_rate": 0.0002764472992246039, | |
| "loss": 0.3165, | |
| "num_input_tokens_seen": 1238630400, | |
| "step": 18900, | |
| "train_runtime": 12344.6292, | |
| "train_tokens_per_second": 100337.594 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.13211041688919067, | |
| "learning_rate": 0.0002761906033054143, | |
| "loss": 0.3161, | |
| "num_input_tokens_seen": 1245184000, | |
| "step": 19000, | |
| "train_runtime": 12407.4556, | |
| "train_tokens_per_second": 100357.724 | |
| }, | |
| { | |
| "epoch": 0.191, | |
| "grad_norm": 0.19933822751045227, | |
| "learning_rate": 0.00027593263664825045, | |
| "loss": 0.3173, | |
| "num_input_tokens_seen": 1251737600, | |
| "step": 19100, | |
| "train_runtime": 12472.5241, | |
| "train_tokens_per_second": 100359.606 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.1472938358783722, | |
| "learning_rate": 0.00027567340185083363, | |
| "loss": 0.3157, | |
| "num_input_tokens_seen": 1258291200, | |
| "step": 19200, | |
| "train_runtime": 12542.0532, | |
| "train_tokens_per_second": 100325.774 | |
| }, | |
| { | |
| "epoch": 0.193, | |
| "grad_norm": 0.1466071903705597, | |
| "learning_rate": 0.00027541290152365537, | |
| "loss": 0.3188, | |
| "num_input_tokens_seen": 1264844800, | |
| "step": 19300, | |
| "train_runtime": 12606.5735, | |
| "train_tokens_per_second": 100332.164 | |
| }, | |
| { | |
| "epoch": 0.194, | |
| "grad_norm": 0.1384386122226715, | |
| "learning_rate": 0.00027515113828995117, | |
| "loss": 0.318, | |
| "num_input_tokens_seen": 1271398400, | |
| "step": 19400, | |
| "train_runtime": 12672.5058, | |
| "train_tokens_per_second": 100327.309 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 0.16287657618522644, | |
| "learning_rate": 0.00027488811478567374, | |
| "loss": 0.3153, | |
| "num_input_tokens_seen": 1277952000, | |
| "step": 19500, | |
| "train_runtime": 12735.4985, | |
| "train_tokens_per_second": 100345.66 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.14955779910087585, | |
| "learning_rate": 0.0002746238336594671, | |
| "loss": 0.3144, | |
| "num_input_tokens_seen": 1284505600, | |
| "step": 19600, | |
| "train_runtime": 12804.8911, | |
| "train_tokens_per_second": 100313.669 | |
| }, | |
| { | |
| "epoch": 0.197, | |
| "grad_norm": 0.15176887810230255, | |
| "learning_rate": 0.00027435829757263894, | |
| "loss": 0.3172, | |
| "num_input_tokens_seen": 1291059200, | |
| "step": 19700, | |
| "train_runtime": 12869.0984, | |
| "train_tokens_per_second": 100322.428 | |
| }, | |
| { | |
| "epoch": 0.198, | |
| "grad_norm": 0.12215608358383179, | |
| "learning_rate": 0.0002740915091991349, | |
| "loss": 0.3182, | |
| "num_input_tokens_seen": 1297612800, | |
| "step": 19800, | |
| "train_runtime": 12932.8746, | |
| "train_tokens_per_second": 100334.446 | |
| }, | |
| { | |
| "epoch": 0.199, | |
| "grad_norm": 0.248954638838768, | |
| "learning_rate": 0.0002738234712255109, | |
| "loss": 0.3171, | |
| "num_input_tokens_seen": 1304166400, | |
| "step": 19900, | |
| "train_runtime": 13003.7739, | |
| "train_tokens_per_second": 100291.378 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.18855011463165283, | |
| "learning_rate": 0.00027355418635090635, | |
| "loss": 0.3181, | |
| "num_input_tokens_seen": 1310720000, | |
| "step": 20000, | |
| "train_runtime": 13068.3505, | |
| "train_tokens_per_second": 100297.279 | |
| }, | |
| { | |
| "epoch": 0.201, | |
| "grad_norm": 0.17624643445014954, | |
| "learning_rate": 0.000273283657287017, | |
| "loss": 0.3147, | |
| "num_input_tokens_seen": 1317273600, | |
| "step": 20100, | |
| "train_runtime": 13133.7291, | |
| "train_tokens_per_second": 100296.998 | |
| }, | |
| { | |
| "epoch": 0.202, | |
| "grad_norm": 0.12586164474487305, | |
| "learning_rate": 0.00027301188675806745, | |
| "loss": 0.3203, | |
| "num_input_tokens_seen": 1323827200, | |
| "step": 20200, | |
| "train_runtime": 13197.5369, | |
| "train_tokens_per_second": 100308.657 | |
| }, | |
| { | |
| "epoch": 0.203, | |
| "grad_norm": 0.13073797523975372, | |
| "learning_rate": 0.0002727388775007839, | |
| "loss": 0.3149, | |
| "num_input_tokens_seen": 1330380800, | |
| "step": 20300, | |
| "train_runtime": 13261.8266, | |
| "train_tokens_per_second": 100316.558 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.12983232736587524, | |
| "learning_rate": 0.0002724646322643666, | |
| "loss": 0.3157, | |
| "num_input_tokens_seen": 1336934400, | |
| "step": 20400, | |
| "train_runtime": 13325.295, | |
| "train_tokens_per_second": 100330.567 | |
| }, | |
| { | |
| "epoch": 0.205, | |
| "grad_norm": 0.2400187999010086, | |
| "learning_rate": 0.000272189153810462, | |
| "loss": 0.3178, | |
| "num_input_tokens_seen": 1343488000, | |
| "step": 20500, | |
| "train_runtime": 13395.2424, | |
| "train_tokens_per_second": 100295.908 | |
| }, | |
| { | |
| "epoch": 0.206, | |
| "grad_norm": 0.11757266521453857, | |
| "learning_rate": 0.0002719124449131351, | |
| "loss": 0.3164, | |
| "num_input_tokens_seen": 1350041600, | |
| "step": 20600, | |
| "train_runtime": 13459.4754, | |
| "train_tokens_per_second": 100304.177 | |
| }, | |
| { | |
| "epoch": 0.207, | |
| "grad_norm": 0.1606636494398117, | |
| "learning_rate": 0.00027163450835884144, | |
| "loss": 0.3146, | |
| "num_input_tokens_seen": 1356595200, | |
| "step": 20700, | |
| "train_runtime": 13524.1715, | |
| "train_tokens_per_second": 100308.932 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.1295078545808792, | |
| "learning_rate": 0.00027135534694639894, | |
| "loss": 0.3175, | |
| "num_input_tokens_seen": 1363148800, | |
| "step": 20800, | |
| "train_runtime": 13588.4538, | |
| "train_tokens_per_second": 100316.697 | |
| }, | |
| { | |
| "epoch": 0.209, | |
| "grad_norm": 0.18409083783626556, | |
| "learning_rate": 0.00027107496348696003, | |
| "loss": 0.3189, | |
| "num_input_tokens_seen": 1369702400, | |
| "step": 20900, | |
| "train_runtime": 13653.2417, | |
| "train_tokens_per_second": 100320.673 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.12083840370178223, | |
| "learning_rate": 0.00027079336080398296, | |
| "loss": 0.3139, | |
| "num_input_tokens_seen": 1376256000, | |
| "step": 21000, | |
| "train_runtime": 13723.0075, | |
| "train_tokens_per_second": 100288.22 | |
| }, | |
| { | |
| "epoch": 0.211, | |
| "grad_norm": 0.16270384192466736, | |
| "learning_rate": 0.00027051054173320366, | |
| "loss": 0.3147, | |
| "num_input_tokens_seen": 1382809600, | |
| "step": 21100, | |
| "train_runtime": 13787.7693, | |
| "train_tokens_per_second": 100292.482 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.12299864739179611, | |
| "learning_rate": 0.000270226509122607, | |
| "loss": 0.3137, | |
| "num_input_tokens_seen": 1389363200, | |
| "step": 21200, | |
| "train_runtime": 13851.6298, | |
| "train_tokens_per_second": 100303.229 | |
| }, | |
| { | |
| "epoch": 0.213, | |
| "grad_norm": 0.12248677760362625, | |
| "learning_rate": 0.0002699412658323983, | |
| "loss": 0.3177, | |
| "num_input_tokens_seen": 1395916800, | |
| "step": 21300, | |
| "train_runtime": 13915.8434, | |
| "train_tokens_per_second": 100311.333 | |
| }, | |
| { | |
| "epoch": 0.214, | |
| "grad_norm": 0.13090935349464417, | |
| "learning_rate": 0.00026965481473497423, | |
| "loss": 0.3146, | |
| "num_input_tokens_seen": 1402470400, | |
| "step": 21400, | |
| "train_runtime": 13985.645, | |
| "train_tokens_per_second": 100279.28 | |
| }, | |
| { | |
| "epoch": 0.215, | |
| "grad_norm": 0.1279245913028717, | |
| "learning_rate": 0.0002693671587148942, | |
| "loss": 0.3128, | |
| "num_input_tokens_seen": 1409024000, | |
| "step": 21500, | |
| "train_runtime": 14050.4506, | |
| "train_tokens_per_second": 100283.19 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.15504342317581177, | |
| "learning_rate": 0.0002690783006688511, | |
| "loss": 0.3145, | |
| "num_input_tokens_seen": 1415577600, | |
| "step": 21600, | |
| "train_runtime": 14115.855, | |
| "train_tokens_per_second": 100282.81 | |
| }, | |
| { | |
| "epoch": 0.217, | |
| "grad_norm": 0.1325046420097351, | |
| "learning_rate": 0.0002687882435056423, | |
| "loss": 0.3138, | |
| "num_input_tokens_seen": 1422131200, | |
| "step": 21700, | |
| "train_runtime": 14179.61, | |
| "train_tokens_per_second": 100294.098 | |
| }, | |
| { | |
| "epoch": 0.218, | |
| "grad_norm": 0.17374184727668762, | |
| "learning_rate": 0.0002684969901461402, | |
| "loss": 0.3179, | |
| "num_input_tokens_seen": 1428684800, | |
| "step": 21800, | |
| "train_runtime": 14245.0199, | |
| "train_tokens_per_second": 100293.633 | |
| }, | |
| { | |
| "epoch": 0.219, | |
| "grad_norm": 0.16908228397369385, | |
| "learning_rate": 0.000268204543523263, | |
| "loss": 0.3182, | |
| "num_input_tokens_seen": 1435238400, | |
| "step": 21900, | |
| "train_runtime": 14310.1147, | |
| "train_tokens_per_second": 100295.381 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.15052039921283722, | |
| "learning_rate": 0.0002679109065819447, | |
| "loss": 0.3148, | |
| "num_input_tokens_seen": 1441792000, | |
| "step": 22000, | |
| "train_runtime": 14374.221, | |
| "train_tokens_per_second": 100304.01 | |
| }, | |
| { | |
| "epoch": 0.221, | |
| "grad_norm": 0.1661474108695984, | |
| "learning_rate": 0.0002676160822791062, | |
| "loss": 0.3142, | |
| "num_input_tokens_seen": 1448345600, | |
| "step": 22100, | |
| "train_runtime": 14445.9108, | |
| "train_tokens_per_second": 100259.902 | |
| }, | |
| { | |
| "epoch": 0.222, | |
| "grad_norm": 0.16423378884792328, | |
| "learning_rate": 0.00026732007358362496, | |
| "loss": 0.323, | |
| "num_input_tokens_seen": 1454899200, | |
| "step": 22200, | |
| "train_runtime": 14510.5733, | |
| "train_tokens_per_second": 100264.763 | |
| }, | |
| { | |
| "epoch": 0.223, | |
| "grad_norm": 0.14868460595607758, | |
| "learning_rate": 0.0002670228834763052, | |
| "loss": 0.3155, | |
| "num_input_tokens_seen": 1461452800, | |
| "step": 22300, | |
| "train_runtime": 14575.7382, | |
| "train_tokens_per_second": 100266.126 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.1287386268377304, | |
| "learning_rate": 0.00026672451494984804, | |
| "loss": 0.3152, | |
| "num_input_tokens_seen": 1468006400, | |
| "step": 22400, | |
| "train_runtime": 14639.7379, | |
| "train_tokens_per_second": 100275.456 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 0.14276720583438873, | |
| "learning_rate": 0.0002664249710088213, | |
| "loss": 0.3131, | |
| "num_input_tokens_seen": 1474560000, | |
| "step": 22500, | |
| "train_runtime": 14703.588, | |
| "train_tokens_per_second": 100285.726 | |
| }, | |
| { | |
| "epoch": 0.226, | |
| "grad_norm": 0.1419740915298462, | |
| "learning_rate": 0.00026612425466962893, | |
| "loss": 0.3112, | |
| "num_input_tokens_seen": 1481113600, | |
| "step": 22600, | |
| "train_runtime": 14773.1939, | |
| "train_tokens_per_second": 100256.83 | |
| }, | |
| { | |
| "epoch": 0.227, | |
| "grad_norm": 0.12067803740501404, | |
| "learning_rate": 0.00026582236896048134, | |
| "loss": 0.3122, | |
| "num_input_tokens_seen": 1487667200, | |
| "step": 22700, | |
| "train_runtime": 14837.1829, | |
| "train_tokens_per_second": 100266.15 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.1338939219713211, | |
| "learning_rate": 0.00026551931692136413, | |
| "loss": 0.3128, | |
| "num_input_tokens_seen": 1494220800, | |
| "step": 22800, | |
| "train_runtime": 14900.9562, | |
| "train_tokens_per_second": 100276.84 | |
| }, | |
| { | |
| "epoch": 0.229, | |
| "grad_norm": 0.16754469275474548, | |
| "learning_rate": 0.00026521510160400804, | |
| "loss": 0.3133, | |
| "num_input_tokens_seen": 1500774400, | |
| "step": 22900, | |
| "train_runtime": 14965.1238, | |
| "train_tokens_per_second": 100284.797 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.12648451328277588, | |
| "learning_rate": 0.00026490972607185793, | |
| "loss": 0.311, | |
| "num_input_tokens_seen": 1507328000, | |
| "step": 23000, | |
| "train_runtime": 15034.861, | |
| "train_tokens_per_second": 100255.533 | |
| }, | |
| { | |
| "epoch": 0.231, | |
| "grad_norm": 0.12040221691131592, | |
| "learning_rate": 0.0002646031934000421, | |
| "loss": 0.3166, | |
| "num_input_tokens_seen": 1513881600, | |
| "step": 23100, | |
| "train_runtime": 15099.2676, | |
| "train_tokens_per_second": 100261.922 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.12486282736063004, | |
| "learning_rate": 0.00026429550667534095, | |
| "loss": 0.3151, | |
| "num_input_tokens_seen": 1520435200, | |
| "step": 23200, | |
| "train_runtime": 15164.1184, | |
| "train_tokens_per_second": 100265.321 | |
| }, | |
| { | |
| "epoch": 0.233, | |
| "grad_norm": 0.18211719393730164, | |
| "learning_rate": 0.0002639866689961565, | |
| "loss": 0.3117, | |
| "num_input_tokens_seen": 1526988800, | |
| "step": 23300, | |
| "train_runtime": 15229.7058, | |
| "train_tokens_per_second": 100263.841 | |
| }, | |
| { | |
| "epoch": 0.234, | |
| "grad_norm": 0.13128802180290222, | |
| "learning_rate": 0.00026367668347248083, | |
| "loss": 0.3125, | |
| "num_input_tokens_seen": 1533542400, | |
| "step": 23400, | |
| "train_runtime": 15293.6404, | |
| "train_tokens_per_second": 100273.209 | |
| }, | |
| { | |
| "epoch": 0.235, | |
| "grad_norm": 0.11493753641843796, | |
| "learning_rate": 0.0002633655532258646, | |
| "loss": 0.317, | |
| "num_input_tokens_seen": 1540096000, | |
| "step": 23500, | |
| "train_runtime": 15365.113, | |
| "train_tokens_per_second": 100233.301 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 0.15309779345989227, | |
| "learning_rate": 0.000263053281389386, | |
| "loss": 0.3136, | |
| "num_input_tokens_seen": 1546649600, | |
| "step": 23600, | |
| "train_runtime": 15428.6523, | |
| "train_tokens_per_second": 100245.282 | |
| }, | |
| { | |
| "epoch": 0.237, | |
| "grad_norm": 0.15829730033874512, | |
| "learning_rate": 0.0002627398711076189, | |
| "loss": 0.3098, | |
| "num_input_tokens_seen": 1553203200, | |
| "step": 23700, | |
| "train_runtime": 15493.1944, | |
| "train_tokens_per_second": 100250.675 | |
| }, | |
| { | |
| "epoch": 0.238, | |
| "grad_norm": 0.13252806663513184, | |
| "learning_rate": 0.0002624253255366014, | |
| "loss": 0.3096, | |
| "num_input_tokens_seen": 1559756800, | |
| "step": 23800, | |
| "train_runtime": 15556.5037, | |
| "train_tokens_per_second": 100263.969 | |
| }, | |
| { | |
| "epoch": 0.239, | |
| "grad_norm": 0.18889528512954712, | |
| "learning_rate": 0.0002621096478438039, | |
| "loss": 0.3146, | |
| "num_input_tokens_seen": 1566310400, | |
| "step": 23900, | |
| "train_runtime": 15621.7412, | |
| "train_tokens_per_second": 100264.777 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.16285447776317596, | |
| "learning_rate": 0.00026179284120809727, | |
| "loss": 0.3168, | |
| "num_input_tokens_seen": 1572864000, | |
| "step": 24000, | |
| "train_runtime": 15687.4424, | |
| "train_tokens_per_second": 100262.615 | |
| }, | |
| { | |
| "epoch": 0.241, | |
| "grad_norm": 0.14852070808410645, | |
| "learning_rate": 0.0002614749088197208, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 1579417600, | |
| "step": 24100, | |
| "train_runtime": 15752.1472, | |
| "train_tokens_per_second": 100266.813 | |
| }, | |
| { | |
| "epoch": 0.242, | |
| "grad_norm": 0.22735795378684998, | |
| "learning_rate": 0.00026115585388025015, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 1585971200, | |
| "step": 24200, | |
| "train_runtime": 15823.0117, | |
| "train_tokens_per_second": 100231.943 | |
| }, | |
| { | |
| "epoch": 0.243, | |
| "grad_norm": 0.16086964309215546, | |
| "learning_rate": 0.00026083567960256493, | |
| "loss": 0.3107, | |
| "num_input_tokens_seen": 1592524800, | |
| "step": 24300, | |
| "train_runtime": 15889.3517, | |
| "train_tokens_per_second": 100225.914 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.15085358917713165, | |
| "learning_rate": 0.00026051438921081667, | |
| "loss": 0.3112, | |
| "num_input_tokens_seen": 1599078400, | |
| "step": 24400, | |
| "train_runtime": 15954.2137, | |
| "train_tokens_per_second": 100229.22 | |
| }, | |
| { | |
| "epoch": 0.245, | |
| "grad_norm": 0.14889656007289886, | |
| "learning_rate": 0.00026019198594039595, | |
| "loss": 0.3147, | |
| "num_input_tokens_seen": 1605632000, | |
| "step": 24500, | |
| "train_runtime": 16020.1883, | |
| "train_tokens_per_second": 100225.539 | |
| }, | |
| { | |
| "epoch": 0.246, | |
| "grad_norm": 0.15055876970291138, | |
| "learning_rate": 0.00025986847303790026, | |
| "loss": 0.3125, | |
| "num_input_tokens_seen": 1612185600, | |
| "step": 24600, | |
| "train_runtime": 16084.1346, | |
| "train_tokens_per_second": 100234.525 | |
| }, | |
| { | |
| "epoch": 0.247, | |
| "grad_norm": 0.14507324993610382, | |
| "learning_rate": 0.00025954385376110076, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 1618739200, | |
| "step": 24700, | |
| "train_runtime": 16148.9618, | |
| "train_tokens_per_second": 100237.973 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.1229107677936554, | |
| "learning_rate": 0.00025921813137891005, | |
| "loss": 0.3147, | |
| "num_input_tokens_seen": 1625292800, | |
| "step": 24800, | |
| "train_runtime": 16214.7466, | |
| "train_tokens_per_second": 100235.473 | |
| }, | |
| { | |
| "epoch": 0.249, | |
| "grad_norm": 0.1423114389181137, | |
| "learning_rate": 0.000258891309171349, | |
| "loss": 0.3127, | |
| "num_input_tokens_seen": 1631846400, | |
| "step": 24900, | |
| "train_runtime": 16278.9968, | |
| "train_tokens_per_second": 100242.442 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.15807275474071503, | |
| "learning_rate": 0.00025856339042951344, | |
| "loss": 0.3088, | |
| "num_input_tokens_seen": 1638400000, | |
| "step": 25000, | |
| "train_runtime": 16343.5944, | |
| "train_tokens_per_second": 100247.226 | |
| }, | |
| { | |
| "epoch": 0.251, | |
| "grad_norm": 0.15635885298252106, | |
| "learning_rate": 0.0002582343784555415, | |
| "loss": 0.3105, | |
| "num_input_tokens_seen": 1644953600, | |
| "step": 25100, | |
| "train_runtime": 16414.1861, | |
| "train_tokens_per_second": 100215.362 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.13579483330249786, | |
| "learning_rate": 0.00025790427656258017, | |
| "loss": 0.3159, | |
| "num_input_tokens_seen": 1651507200, | |
| "step": 25200, | |
| "train_runtime": 16478.0373, | |
| "train_tokens_per_second": 100224.752 | |
| }, | |
| { | |
| "epoch": 0.253, | |
| "grad_norm": 0.14977572858333588, | |
| "learning_rate": 0.00025757308807475185, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 1658060800, | |
| "step": 25300, | |
| "train_runtime": 16542.7006, | |
| "train_tokens_per_second": 100229.149 | |
| }, | |
| { | |
| "epoch": 0.254, | |
| "grad_norm": 0.1324361115694046, | |
| "learning_rate": 0.00025724081632712086, | |
| "loss": 0.3108, | |
| "num_input_tokens_seen": 1664614400, | |
| "step": 25400, | |
| "train_runtime": 16607.2591, | |
| "train_tokens_per_second": 100234.144 | |
| }, | |
| { | |
| "epoch": 0.255, | |
| "grad_norm": 0.12053392827510834, | |
| "learning_rate": 0.0002569074646656601, | |
| "loss": 0.3081, | |
| "num_input_tokens_seen": 1671168000, | |
| "step": 25500, | |
| "train_runtime": 16676.4765, | |
| "train_tokens_per_second": 100211.096 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.16214688122272491, | |
| "learning_rate": 0.00025657303644721695, | |
| "loss": 0.3154, | |
| "num_input_tokens_seen": 1677721600, | |
| "step": 25600, | |
| "train_runtime": 16741.4269, | |
| "train_tokens_per_second": 100213.776 | |
| }, | |
| { | |
| "epoch": 0.257, | |
| "grad_norm": 0.13730435073375702, | |
| "learning_rate": 0.00025623753503948004, | |
| "loss": 0.3159, | |
| "num_input_tokens_seen": 1684275200, | |
| "step": 25700, | |
| "train_runtime": 16805.4849, | |
| "train_tokens_per_second": 100221.755 | |
| }, | |
| { | |
| "epoch": 0.258, | |
| "grad_norm": 0.16218283772468567, | |
| "learning_rate": 0.00025590096382094475, | |
| "loss": 0.3111, | |
| "num_input_tokens_seen": 1690828800, | |
| "step": 25800, | |
| "train_runtime": 16869.8548, | |
| "train_tokens_per_second": 100227.821 | |
| }, | |
| { | |
| "epoch": 0.259, | |
| "grad_norm": 0.15016646683216095, | |
| "learning_rate": 0.00025556332618087945, | |
| "loss": 0.3106, | |
| "num_input_tokens_seen": 1697382400, | |
| "step": 25900, | |
| "train_runtime": 16938.0105, | |
| "train_tokens_per_second": 100211.439 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.1398506760597229, | |
| "learning_rate": 0.00025522462551929155, | |
| "loss": 0.313, | |
| "num_input_tokens_seen": 1703936000, | |
| "step": 26000, | |
| "train_runtime": 17003.6995, | |
| "train_tokens_per_second": 100209.722 | |
| }, | |
| { | |
| "epoch": 0.261, | |
| "grad_norm": 0.12380320578813553, | |
| "learning_rate": 0.00025488486524689283, | |
| "loss": 0.3133, | |
| "num_input_tokens_seen": 1710489600, | |
| "step": 26100, | |
| "train_runtime": 17069.3522, | |
| "train_tokens_per_second": 100208.232 | |
| }, | |
| { | |
| "epoch": 0.262, | |
| "grad_norm": 0.14536257088184357, | |
| "learning_rate": 0.00025454404878506555, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 1717043200, | |
| "step": 26200, | |
| "train_runtime": 17132.7395, | |
| "train_tokens_per_second": 100220.003 | |
| }, | |
| { | |
| "epoch": 0.263, | |
| "grad_norm": 0.14442390203475952, | |
| "learning_rate": 0.0002542021795658276, | |
| "loss": 0.311, | |
| "num_input_tokens_seen": 1723596800, | |
| "step": 26300, | |
| "train_runtime": 17196.4745, | |
| "train_tokens_per_second": 100229.66 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.12595972418785095, | |
| "learning_rate": 0.0002538592610317984, | |
| "loss": 0.3118, | |
| "num_input_tokens_seen": 1730150400, | |
| "step": 26400, | |
| "train_runtime": 17266.9358, | |
| "train_tokens_per_second": 100200.199 | |
| }, | |
| { | |
| "epoch": 0.265, | |
| "grad_norm": 0.1587669402360916, | |
| "learning_rate": 0.00025351529663616355, | |
| "loss": 0.3132, | |
| "num_input_tokens_seen": 1736704000, | |
| "step": 26500, | |
| "train_runtime": 17331.5833, | |
| "train_tokens_per_second": 100204.578 | |
| }, | |
| { | |
| "epoch": 0.266, | |
| "grad_norm": 0.1406719982624054, | |
| "learning_rate": 0.00025317028984264087, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 1743257600, | |
| "step": 26600, | |
| "train_runtime": 17395.5945, | |
| "train_tokens_per_second": 100212.591 | |
| }, | |
| { | |
| "epoch": 0.267, | |
| "grad_norm": 0.1677832007408142, | |
| "learning_rate": 0.0002528242441254448, | |
| "loss": 0.309, | |
| "num_input_tokens_seen": 1749811200, | |
| "step": 26700, | |
| "train_runtime": 17459.1185, | |
| "train_tokens_per_second": 100223.342 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 0.13640043139457703, | |
| "learning_rate": 0.000252477162969252, | |
| "loss": 0.3112, | |
| "num_input_tokens_seen": 1756364800, | |
| "step": 26800, | |
| "train_runtime": 17523.2088, | |
| "train_tokens_per_second": 100230.775 | |
| }, | |
| { | |
| "epoch": 0.269, | |
| "grad_norm": 0.12981313467025757, | |
| "learning_rate": 0.00025212904986916584, | |
| "loss": 0.3124, | |
| "num_input_tokens_seen": 1762918400, | |
| "step": 26900, | |
| "train_runtime": 17587.6922, | |
| "train_tokens_per_second": 100235.914 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.14338868856430054, | |
| "learning_rate": 0.00025177990833068133, | |
| "loss": 0.3124, | |
| "num_input_tokens_seen": 1769472000, | |
| "step": 27000, | |
| "train_runtime": 17658.758, | |
| "train_tokens_per_second": 100203.649 | |
| }, | |
| { | |
| "epoch": 0.271, | |
| "grad_norm": 0.17518877983093262, | |
| "learning_rate": 0.0002514297418696499, | |
| "loss": 0.3076, | |
| "num_input_tokens_seen": 1776025600, | |
| "step": 27100, | |
| "train_runtime": 17723.3886, | |
| "train_tokens_per_second": 100208.016 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.1369880735874176, | |
| "learning_rate": 0.0002510785540122439, | |
| "loss": 0.3114, | |
| "num_input_tokens_seen": 1782579200, | |
| "step": 27200, | |
| "train_runtime": 17786.611, | |
| "train_tokens_per_second": 100220.283 | |
| }, | |
| { | |
| "epoch": 0.273, | |
| "grad_norm": 0.15111377835273743, | |
| "learning_rate": 0.0002507263482949212, | |
| "loss": 0.3144, | |
| "num_input_tokens_seen": 1789132800, | |
| "step": 27300, | |
| "train_runtime": 17852.1418, | |
| "train_tokens_per_second": 100219.504 | |
| }, | |
| { | |
| "epoch": 0.274, | |
| "grad_norm": 0.140447199344635, | |
| "learning_rate": 0.0002503731282643894, | |
| "loss": 0.3103, | |
| "num_input_tokens_seen": 1795686400, | |
| "step": 27400, | |
| "train_runtime": 17917.1236, | |
| "train_tokens_per_second": 100221.801 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.1373315006494522, | |
| "learning_rate": 0.0002500188974775704, | |
| "loss": 0.3095, | |
| "num_input_tokens_seen": 1802240000, | |
| "step": 27500, | |
| "train_runtime": 17981.4799, | |
| "train_tokens_per_second": 100227.568 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.1453147530555725, | |
| "learning_rate": 0.00024966365950156416, | |
| "loss": 0.3085, | |
| "num_input_tokens_seen": 1808793600, | |
| "step": 27600, | |
| "train_runtime": 18052.109, | |
| "train_tokens_per_second": 100198.464 | |
| }, | |
| { | |
| "epoch": 0.277, | |
| "grad_norm": 0.19097484648227692, | |
| "learning_rate": 0.00024930741791361326, | |
| "loss": 0.3128, | |
| "num_input_tokens_seen": 1815347200, | |
| "step": 27700, | |
| "train_runtime": 18117.9773, | |
| "train_tokens_per_second": 100195.909 | |
| }, | |
| { | |
| "epoch": 0.278, | |
| "grad_norm": 0.2222718745470047, | |
| "learning_rate": 0.0002489501763010664, | |
| "loss": 0.3107, | |
| "num_input_tokens_seen": 1821900800, | |
| "step": 27800, | |
| "train_runtime": 18178.1946, | |
| "train_tokens_per_second": 100224.519 | |
| }, | |
| { | |
| "epoch": 0.279, | |
| "grad_norm": 0.16960225999355316, | |
| "learning_rate": 0.00024859193826134285, | |
| "loss": 0.3093, | |
| "num_input_tokens_seen": 1828454400, | |
| "step": 27900, | |
| "train_runtime": 18248.1866, | |
| "train_tokens_per_second": 100199.238 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.15540289878845215, | |
| "learning_rate": 0.00024823270740189556, | |
| "loss": 0.3084, | |
| "num_input_tokens_seen": 1835008000, | |
| "step": 28000, | |
| "train_runtime": 18313.0722, | |
| "train_tokens_per_second": 100202.084 | |
| }, | |
| { | |
| "epoch": 0.281, | |
| "grad_norm": 0.1421203911304474, | |
| "learning_rate": 0.00024787248734017527, | |
| "loss": 0.3119, | |
| "num_input_tokens_seen": 1841561600, | |
| "step": 28100, | |
| "train_runtime": 18377.039, | |
| "train_tokens_per_second": 100209.919 | |
| }, | |
| { | |
| "epoch": 0.282, | |
| "grad_norm": 0.131204292178154, | |
| "learning_rate": 0.0002475112817035941, | |
| "loss": 0.3127, | |
| "num_input_tokens_seen": 1848115200, | |
| "step": 28200, | |
| "train_runtime": 18441.4656, | |
| "train_tokens_per_second": 100215.202 | |
| }, | |
| { | |
| "epoch": 0.283, | |
| "grad_norm": 0.1507508009672165, | |
| "learning_rate": 0.0002471490941294887, | |
| "loss": 0.3118, | |
| "num_input_tokens_seen": 1854668800, | |
| "step": 28300, | |
| "train_runtime": 18511.3095, | |
| "train_tokens_per_second": 100191.118 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 0.12522923946380615, | |
| "learning_rate": 0.000246785928265084, | |
| "loss": 0.3104, | |
| "num_input_tokens_seen": 1861222400, | |
| "step": 28400, | |
| "train_runtime": 18574.4697, | |
| "train_tokens_per_second": 100203.259 | |
| }, | |
| { | |
| "epoch": 0.285, | |
| "grad_norm": 0.2087126076221466, | |
| "learning_rate": 0.0002464217877674562, | |
| "loss": 0.3132, | |
| "num_input_tokens_seen": 1867776000, | |
| "step": 28500, | |
| "train_runtime": 18638.8332, | |
| "train_tokens_per_second": 100208.848 | |
| }, | |
| { | |
| "epoch": 0.286, | |
| "grad_norm": 0.1495303064584732, | |
| "learning_rate": 0.0002460566763034961, | |
| "loss": 0.3159, | |
| "num_input_tokens_seen": 1874329600, | |
| "step": 28600, | |
| "train_runtime": 18703.8924, | |
| "train_tokens_per_second": 100210.671 | |
| }, | |
| { | |
| "epoch": 0.287, | |
| "grad_norm": 0.14563380181789398, | |
| "learning_rate": 0.00024569059754987196, | |
| "loss": 0.3116, | |
| "num_input_tokens_seen": 1880883200, | |
| "step": 28700, | |
| "train_runtime": 18774.7813, | |
| "train_tokens_per_second": 100181.364 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.12803615629673004, | |
| "learning_rate": 0.00024532355519299296, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 1887436800, | |
| "step": 28800, | |
| "train_runtime": 18838.435, | |
| "train_tokens_per_second": 100190.743 | |
| }, | |
| { | |
| "epoch": 0.289, | |
| "grad_norm": 0.5618897676467896, | |
| "learning_rate": 0.0002449555529289714, | |
| "loss": 0.3129, | |
| "num_input_tokens_seen": 1893990400, | |
| "step": 28900, | |
| "train_runtime": 18901.8999, | |
| "train_tokens_per_second": 100201.06 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.15488959848880768, | |
| "learning_rate": 0.0002445865944635861, | |
| "loss": 0.3155, | |
| "num_input_tokens_seen": 1900544000, | |
| "step": 29000, | |
| "train_runtime": 18967.9894, | |
| "train_tokens_per_second": 100197.441 | |
| }, | |
| { | |
| "epoch": 0.291, | |
| "grad_norm": 0.13676992058753967, | |
| "learning_rate": 0.0002442166835122446, | |
| "loss": 0.3101, | |
| "num_input_tokens_seen": 1907097600, | |
| "step": 29100, | |
| "train_runtime": 19031.1664, | |
| "train_tokens_per_second": 100209.181 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 0.11402736604213715, | |
| "learning_rate": 0.00024384582379994614, | |
| "loss": 0.3094, | |
| "num_input_tokens_seen": 1913651200, | |
| "step": 29200, | |
| "train_runtime": 19096.1775, | |
| "train_tokens_per_second": 100211.218 | |
| }, | |
| { | |
| "epoch": 0.293, | |
| "grad_norm": 0.1358448714017868, | |
| "learning_rate": 0.00024347401906124388, | |
| "loss": 0.309, | |
| "num_input_tokens_seen": 1920204800, | |
| "step": 29300, | |
| "train_runtime": 19165.3098, | |
| "train_tokens_per_second": 100191.691 | |
| }, | |
| { | |
| "epoch": 0.294, | |
| "grad_norm": 0.14608891308307648, | |
| "learning_rate": 0.0002431012730402075, | |
| "loss": 0.3119, | |
| "num_input_tokens_seen": 1926758400, | |
| "step": 29400, | |
| "train_runtime": 19230.3069, | |
| "train_tokens_per_second": 100193.845 | |
| }, | |
| { | |
| "epoch": 0.295, | |
| "grad_norm": 0.1501711755990982, | |
| "learning_rate": 0.00024272758949038517, | |
| "loss": 0.3091, | |
| "num_input_tokens_seen": 1933312000, | |
| "step": 29500, | |
| "train_runtime": 19294.7627, | |
| "train_tokens_per_second": 100198.796 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.1614496409893036, | |
| "learning_rate": 0.00024235297217476616, | |
| "loss": 0.3104, | |
| "num_input_tokens_seen": 1939865600, | |
| "step": 29600, | |
| "train_runtime": 19364.7415, | |
| "train_tokens_per_second": 100175.135 | |
| }, | |
| { | |
| "epoch": 0.297, | |
| "grad_norm": 0.11902807652950287, | |
| "learning_rate": 0.00024197742486574268, | |
| "loss": 0.3126, | |
| "num_input_tokens_seen": 1946419200, | |
| "step": 29700, | |
| "train_runtime": 19429.1038, | |
| "train_tokens_per_second": 100180.596 | |
| }, | |
| { | |
| "epoch": 0.298, | |
| "grad_norm": 0.12998123466968536, | |
| "learning_rate": 0.0002416009513450719, | |
| "loss": 0.3102, | |
| "num_input_tokens_seen": 1952972800, | |
| "step": 29800, | |
| "train_runtime": 19494.2244, | |
| "train_tokens_per_second": 100182.124 | |
| }, | |
| { | |
| "epoch": 0.299, | |
| "grad_norm": 0.2079559862613678, | |
| "learning_rate": 0.00024122355540383806, | |
| "loss": 0.311, | |
| "num_input_tokens_seen": 1959526400, | |
| "step": 29900, | |
| "train_runtime": 19559.2072, | |
| "train_tokens_per_second": 100184.347 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.15128397941589355, | |
| "learning_rate": 0.00024084524084241405, | |
| "loss": 0.3076, | |
| "num_input_tokens_seen": 1966080000, | |
| "step": 30000, | |
| "train_runtime": 19623.3669, | |
| "train_tokens_per_second": 100190.758 | |
| }, | |
| { | |
| "epoch": 0.301, | |
| "grad_norm": 0.13512304425239563, | |
| "learning_rate": 0.00024046601147042332, | |
| "loss": 0.3119, | |
| "num_input_tokens_seen": 1972633600, | |
| "step": 30100, | |
| "train_runtime": 19688.91, | |
| "train_tokens_per_second": 100190.086 | |
| }, | |
| { | |
| "epoch": 0.302, | |
| "grad_norm": 0.12716713547706604, | |
| "learning_rate": 0.0002400858711067015, | |
| "loss": 0.3093, | |
| "num_input_tokens_seen": 1979187200, | |
| "step": 30200, | |
| "train_runtime": 19753.5863, | |
| "train_tokens_per_second": 100193.816 | |
| }, | |
| { | |
| "epoch": 0.303, | |
| "grad_norm": 0.1301889717578888, | |
| "learning_rate": 0.00023970482357925772, | |
| "loss": 0.31, | |
| "num_input_tokens_seen": 1985740800, | |
| "step": 30300, | |
| "train_runtime": 19823.6081, | |
| "train_tokens_per_second": 100170.503 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.13871292769908905, | |
| "learning_rate": 0.00023932287272523646, | |
| "loss": 0.3084, | |
| "num_input_tokens_seen": 1992294400, | |
| "step": 30400, | |
| "train_runtime": 19887.7656, | |
| "train_tokens_per_second": 100176.885 | |
| }, | |
| { | |
| "epoch": 0.305, | |
| "grad_norm": 0.12449346482753754, | |
| "learning_rate": 0.00023894002239087847, | |
| "loss": 0.3276, | |
| "num_input_tokens_seen": 1998848000, | |
| "step": 30500, | |
| "train_runtime": 19952.5714, | |
| "train_tokens_per_second": 100179.97 | |
| }, | |
| { | |
| "epoch": 0.306, | |
| "grad_norm": 0.1523977369070053, | |
| "learning_rate": 0.0002385562764314825, | |
| "loss": 0.3097, | |
| "num_input_tokens_seen": 2005401600, | |
| "step": 30600, | |
| "train_runtime": 20017.8352, | |
| "train_tokens_per_second": 100180.743 | |
| }, | |
| { | |
| "epoch": 0.307, | |
| "grad_norm": 0.1439458280801773, | |
| "learning_rate": 0.00023817163871136596, | |
| "loss": 0.3048, | |
| "num_input_tokens_seen": 2011955200, | |
| "step": 30700, | |
| "train_runtime": 20081.8889, | |
| "train_tokens_per_second": 100187.548 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 0.12756380438804626, | |
| "learning_rate": 0.00023778611310382652, | |
| "loss": 0.3075, | |
| "num_input_tokens_seen": 2018508800, | |
| "step": 30800, | |
| "train_runtime": 20145.6107, | |
| "train_tokens_per_second": 100195.96 | |
| }, | |
| { | |
| "epoch": 0.309, | |
| "grad_norm": 0.14607320725917816, | |
| "learning_rate": 0.0002373997034911027, | |
| "loss": 0.3139, | |
| "num_input_tokens_seen": 2025062400, | |
| "step": 30900, | |
| "train_runtime": 20210.9796, | |
| "train_tokens_per_second": 100196.153 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.12456675618886948, | |
| "learning_rate": 0.00023701241376433506, | |
| "loss": 0.3089, | |
| "num_input_tokens_seen": 2031616000, | |
| "step": 31000, | |
| "train_runtime": 20281.0675, | |
| "train_tokens_per_second": 100173.031 | |
| }, | |
| { | |
| "epoch": 0.311, | |
| "grad_norm": 0.13834626972675323, | |
| "learning_rate": 0.0002366242478235268, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2038169600, | |
| "step": 31100, | |
| "train_runtime": 20346.0263, | |
| "train_tokens_per_second": 100175.315 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.1534184068441391, | |
| "learning_rate": 0.00023623520957750471, | |
| "loss": 0.3082, | |
| "num_input_tokens_seen": 2044723200, | |
| "step": 31200, | |
| "train_runtime": 20409.76, | |
| "train_tokens_per_second": 100183.598 | |
| }, | |
| { | |
| "epoch": 0.313, | |
| "grad_norm": 0.12966671586036682, | |
| "learning_rate": 0.00023584530294387953, | |
| "loss": 0.3126, | |
| "num_input_tokens_seen": 2051276800, | |
| "step": 31300, | |
| "train_runtime": 20475.6348, | |
| "train_tokens_per_second": 100181.353 | |
| }, | |
| { | |
| "epoch": 0.314, | |
| "grad_norm": 0.14474999904632568, | |
| "learning_rate": 0.00023545453184900682, | |
| "loss": 0.3091, | |
| "num_input_tokens_seen": 2057830400, | |
| "step": 31400, | |
| "train_runtime": 20539.196, | |
| "train_tokens_per_second": 100190.407 | |
| }, | |
| { | |
| "epoch": 0.315, | |
| "grad_norm": 0.13208946585655212, | |
| "learning_rate": 0.00023506290022794706, | |
| "loss": 0.3095, | |
| "num_input_tokens_seen": 2064384000, | |
| "step": 31500, | |
| "train_runtime": 20604.221, | |
| "train_tokens_per_second": 100192.286 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 0.15090374648571014, | |
| "learning_rate": 0.00023467041202442643, | |
| "loss": 0.3073, | |
| "num_input_tokens_seen": 2070937600, | |
| "step": 31600, | |
| "train_runtime": 20674.5759, | |
| "train_tokens_per_second": 100168.323 | |
| }, | |
| { | |
| "epoch": 0.317, | |
| "grad_norm": 0.18638543784618378, | |
| "learning_rate": 0.00023427707119079669, | |
| "loss": 0.312, | |
| "num_input_tokens_seen": 2077491200, | |
| "step": 31700, | |
| "train_runtime": 20738.8671, | |
| "train_tokens_per_second": 100173.804 | |
| }, | |
| { | |
| "epoch": 0.318, | |
| "grad_norm": 0.1385478377342224, | |
| "learning_rate": 0.0002338828816879957, | |
| "loss": 0.3095, | |
| "num_input_tokens_seen": 2084044800, | |
| "step": 31800, | |
| "train_runtime": 20802.7906, | |
| "train_tokens_per_second": 100181.021 | |
| }, | |
| { | |
| "epoch": 0.319, | |
| "grad_norm": 0.15265443921089172, | |
| "learning_rate": 0.00023348784748550744, | |
| "loss": 0.3103, | |
| "num_input_tokens_seen": 2090598400, | |
| "step": 31900, | |
| "train_runtime": 20868.0311, | |
| "train_tokens_per_second": 100181.871 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.15918248891830444, | |
| "learning_rate": 0.00023309197256132184, | |
| "loss": 0.3102, | |
| "num_input_tokens_seen": 2097152000, | |
| "step": 32000, | |
| "train_runtime": 20937.8931, | |
| "train_tokens_per_second": 100160.603 | |
| }, | |
| { | |
| "epoch": 0.321, | |
| "grad_norm": 0.14801020920276642, | |
| "learning_rate": 0.00023269526090189505, | |
| "loss": 0.3147, | |
| "num_input_tokens_seen": 2103705600, | |
| "step": 32100, | |
| "train_runtime": 21002.9142, | |
| "train_tokens_per_second": 100162.557 | |
| }, | |
| { | |
| "epoch": 0.322, | |
| "grad_norm": 0.18616679310798645, | |
| "learning_rate": 0.00023229771650210907, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 2110259200, | |
| "step": 32200, | |
| "train_runtime": 21067.872, | |
| "train_tokens_per_second": 100164.801 | |
| }, | |
| { | |
| "epoch": 0.323, | |
| "grad_norm": 0.13931268453598022, | |
| "learning_rate": 0.00023189934336523163, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 2116812800, | |
| "step": 32300, | |
| "train_runtime": 21131.2256, | |
| "train_tokens_per_second": 100174.634 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 0.1734631061553955, | |
| "learning_rate": 0.00023150014550287574, | |
| "loss": 0.3112, | |
| "num_input_tokens_seen": 2123366400, | |
| "step": 32400, | |
| "train_runtime": 21201.6285, | |
| "train_tokens_per_second": 100151.099 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 0.13876596093177795, | |
| "learning_rate": 0.00023110012693495943, | |
| "loss": 0.31, | |
| "num_input_tokens_seen": 2129920000, | |
| "step": 32500, | |
| "train_runtime": 21265.8205, | |
| "train_tokens_per_second": 100156.963 | |
| }, | |
| { | |
| "epoch": 0.326, | |
| "grad_norm": 0.20441171526908875, | |
| "learning_rate": 0.00023069929168966527, | |
| "loss": 0.3095, | |
| "num_input_tokens_seen": 2136473600, | |
| "step": 32600, | |
| "train_runtime": 21329.6315, | |
| "train_tokens_per_second": 100164.581 | |
| }, | |
| { | |
| "epoch": 0.327, | |
| "grad_norm": 0.12022672593593597, | |
| "learning_rate": 0.0002302976438033997, | |
| "loss": 0.3089, | |
| "num_input_tokens_seen": 2143027200, | |
| "step": 32700, | |
| "train_runtime": 21394.0086, | |
| "train_tokens_per_second": 100169.502 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.23158074915409088, | |
| "learning_rate": 0.0002298951873207525, | |
| "loss": 0.3121, | |
| "num_input_tokens_seen": 2149580800, | |
| "step": 32800, | |
| "train_runtime": 21459.8938, | |
| "train_tokens_per_second": 100167.355 | |
| }, | |
| { | |
| "epoch": 0.329, | |
| "grad_norm": 0.11978685855865479, | |
| "learning_rate": 0.00022949192629445606, | |
| "loss": 0.308, | |
| "num_input_tokens_seen": 2156134400, | |
| "step": 32900, | |
| "train_runtime": 21524.2825, | |
| "train_tokens_per_second": 100172.185 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.16882842779159546, | |
| "learning_rate": 0.0002290878647853443, | |
| "loss": 0.3076, | |
| "num_input_tokens_seen": 2162688000, | |
| "step": 33000, | |
| "train_runtime": 21595.0222, | |
| "train_tokens_per_second": 100147.524 | |
| }, | |
| { | |
| "epoch": 0.331, | |
| "grad_norm": 0.1368299126625061, | |
| "learning_rate": 0.00022868300686231224, | |
| "loss": 0.3078, | |
| "num_input_tokens_seen": 2169241600, | |
| "step": 33100, | |
| "train_runtime": 21659.0361, | |
| "train_tokens_per_second": 100154.115 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 0.13301041722297668, | |
| "learning_rate": 0.00022827735660227457, | |
| "loss": 0.3103, | |
| "num_input_tokens_seen": 2175795200, | |
| "step": 33200, | |
| "train_runtime": 21723.8934, | |
| "train_tokens_per_second": 100156.779 | |
| }, | |
| { | |
| "epoch": 0.333, | |
| "grad_norm": 0.13545189797878265, | |
| "learning_rate": 0.000227870918090125, | |
| "loss": 0.3068, | |
| "num_input_tokens_seen": 2182348800, | |
| "step": 33300, | |
| "train_runtime": 21788.4359, | |
| "train_tokens_per_second": 100160.875 | |
| }, | |
| { | |
| "epoch": 0.334, | |
| "grad_norm": 0.2138141542673111, | |
| "learning_rate": 0.00022746369541869476, | |
| "loss": 0.3059, | |
| "num_input_tokens_seen": 2188902400, | |
| "step": 33400, | |
| "train_runtime": 21853.4857, | |
| "train_tokens_per_second": 100162.621 | |
| }, | |
| { | |
| "epoch": 0.335, | |
| "grad_norm": 0.1255991905927658, | |
| "learning_rate": 0.00022705569268871163, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 2195456000, | |
| "step": 33500, | |
| "train_runtime": 21918.1728, | |
| "train_tokens_per_second": 100166.014 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.1330287754535675, | |
| "learning_rate": 0.00022664691400875865, | |
| "loss": 0.3093, | |
| "num_input_tokens_seen": 2202009600, | |
| "step": 33600, | |
| "train_runtime": 21987.6743, | |
| "train_tokens_per_second": 100147.454 | |
| }, | |
| { | |
| "epoch": 0.337, | |
| "grad_norm": 0.1321260631084442, | |
| "learning_rate": 0.00022623736349523254, | |
| "loss": 0.3109, | |
| "num_input_tokens_seen": 2208563200, | |
| "step": 33700, | |
| "train_runtime": 22052.5483, | |
| "train_tokens_per_second": 100150.022 | |
| }, | |
| { | |
| "epoch": 0.338, | |
| "grad_norm": 0.13865865767002106, | |
| "learning_rate": 0.00022582704527230238, | |
| "loss": 0.3068, | |
| "num_input_tokens_seen": 2215116800, | |
| "step": 33800, | |
| "train_runtime": 22117.0958, | |
| "train_tokens_per_second": 100154.054 | |
| }, | |
| { | |
| "epoch": 0.339, | |
| "grad_norm": 0.13597998023033142, | |
| "learning_rate": 0.0002254159634718682, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2221670400, | |
| "step": 33900, | |
| "train_runtime": 22180.0605, | |
| "train_tokens_per_second": 100165.209 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.14176584780216217, | |
| "learning_rate": 0.00022500412223351915, | |
| "loss": 0.3114, | |
| "num_input_tokens_seen": 2228224000, | |
| "step": 34000, | |
| "train_runtime": 22251.2759, | |
| "train_tokens_per_second": 100139.157 | |
| }, | |
| { | |
| "epoch": 0.341, | |
| "grad_norm": 0.13006241619586945, | |
| "learning_rate": 0.0002245915257044919, | |
| "loss": 0.3071, | |
| "num_input_tokens_seen": 2234777600, | |
| "step": 34100, | |
| "train_runtime": 22315.7056, | |
| "train_tokens_per_second": 100143.712 | |
| }, | |
| { | |
| "epoch": 0.342, | |
| "grad_norm": 0.186634823679924, | |
| "learning_rate": 0.00022417817803962892, | |
| "loss": 0.3032, | |
| "num_input_tokens_seen": 2241331200, | |
| "step": 34200, | |
| "train_runtime": 22380.1064, | |
| "train_tokens_per_second": 100148.371 | |
| }, | |
| { | |
| "epoch": 0.343, | |
| "grad_norm": 0.1767393946647644, | |
| "learning_rate": 0.0002237640834013366, | |
| "loss": 0.3085, | |
| "num_input_tokens_seen": 2247884800, | |
| "step": 34300, | |
| "train_runtime": 22444.6012, | |
| "train_tokens_per_second": 100152.584 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.15075454115867615, | |
| "learning_rate": 0.0002233492459595434, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 2254438400, | |
| "step": 34400, | |
| "train_runtime": 22509.6493, | |
| "train_tokens_per_second": 100154.31 | |
| }, | |
| { | |
| "epoch": 0.345, | |
| "grad_norm": 0.15754783153533936, | |
| "learning_rate": 0.00022293366989165772, | |
| "loss": 0.307, | |
| "num_input_tokens_seen": 2260992000, | |
| "step": 34500, | |
| "train_runtime": 22579.4848, | |
| "train_tokens_per_second": 100134.791 | |
| }, | |
| { | |
| "epoch": 0.346, | |
| "grad_norm": 0.13372038304805756, | |
| "learning_rate": 0.00022251735938252587, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2267545600, | |
| "step": 34600, | |
| "train_runtime": 22643.953, | |
| "train_tokens_per_second": 100139.123 | |
| }, | |
| { | |
| "epoch": 0.347, | |
| "grad_norm": 0.17753738164901733, | |
| "learning_rate": 0.0002221003186243902, | |
| "loss": 0.3087, | |
| "num_input_tokens_seen": 2274099200, | |
| "step": 34700, | |
| "train_runtime": 22708.6869, | |
| "train_tokens_per_second": 100142.259 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 0.1375788450241089, | |
| "learning_rate": 0.00022168255181684643, | |
| "loss": 0.3064, | |
| "num_input_tokens_seen": 2280652800, | |
| "step": 34800, | |
| "train_runtime": 22774.2018, | |
| "train_tokens_per_second": 100141.942 | |
| }, | |
| { | |
| "epoch": 0.349, | |
| "grad_norm": 0.14929898083209991, | |
| "learning_rate": 0.00022126406316680172, | |
| "loss": 0.3108, | |
| "num_input_tokens_seen": 2287206400, | |
| "step": 34900, | |
| "train_runtime": 22839.776, | |
| "train_tokens_per_second": 100141.367 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.15789327025413513, | |
| "learning_rate": 0.00022084485688843208, | |
| "loss": 0.3082, | |
| "num_input_tokens_seen": 2293760000, | |
| "step": 35000, | |
| "train_runtime": 22904.3853, | |
| "train_tokens_per_second": 100145.015 | |
| }, | |
| { | |
| "epoch": 0.351, | |
| "grad_norm": 0.1339723765850067, | |
| "learning_rate": 0.00022042493720314003, | |
| "loss": 0.3127, | |
| "num_input_tokens_seen": 2300313600, | |
| "step": 35100, | |
| "train_runtime": 22968.8594, | |
| "train_tokens_per_second": 100149.231 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.14159700274467468, | |
| "learning_rate": 0.00022000430833951228, | |
| "loss": 0.3096, | |
| "num_input_tokens_seen": 2306867200, | |
| "step": 35200, | |
| "train_runtime": 23033.0283, | |
| "train_tokens_per_second": 100154.751 | |
| }, | |
| { | |
| "epoch": 0.353, | |
| "grad_norm": 0.17289403080940247, | |
| "learning_rate": 0.00021958297453327673, | |
| "loss": 0.3058, | |
| "num_input_tokens_seen": 2313420800, | |
| "step": 35300, | |
| "train_runtime": 23103.5037, | |
| "train_tokens_per_second": 100132.899 | |
| }, | |
| { | |
| "epoch": 0.354, | |
| "grad_norm": 0.1353076845407486, | |
| "learning_rate": 0.00021916094002726012, | |
| "loss": 0.3048, | |
| "num_input_tokens_seen": 2319974400, | |
| "step": 35400, | |
| "train_runtime": 23166.8292, | |
| "train_tokens_per_second": 100142.077 | |
| }, | |
| { | |
| "epoch": 0.355, | |
| "grad_norm": 0.12303294241428375, | |
| "learning_rate": 0.00021873820907134534, | |
| "loss": 0.3102, | |
| "num_input_tokens_seen": 2326528000, | |
| "step": 35500, | |
| "train_runtime": 23232.6655, | |
| "train_tokens_per_second": 100140.382 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 0.14765286445617676, | |
| "learning_rate": 0.0002183147859224283, | |
| "loss": 0.3106, | |
| "num_input_tokens_seen": 2333081600, | |
| "step": 35600, | |
| "train_runtime": 23296.4196, | |
| "train_tokens_per_second": 100147.647 | |
| }, | |
| { | |
| "epoch": 0.357, | |
| "grad_norm": 0.13833215832710266, | |
| "learning_rate": 0.00021789067484437544, | |
| "loss": 0.3055, | |
| "num_input_tokens_seen": 2339635200, | |
| "step": 35700, | |
| "train_runtime": 23361.5704, | |
| "train_tokens_per_second": 100148.884 | |
| }, | |
| { | |
| "epoch": 0.358, | |
| "grad_norm": 0.13157132267951965, | |
| "learning_rate": 0.00021746588010798068, | |
| "loss": 0.3081, | |
| "num_input_tokens_seen": 2346188800, | |
| "step": 35800, | |
| "train_runtime": 23430.7927, | |
| "train_tokens_per_second": 100132.711 | |
| }, | |
| { | |
| "epoch": 0.359, | |
| "grad_norm": 0.12913836538791656, | |
| "learning_rate": 0.00021704040599092216, | |
| "loss": 0.3094, | |
| "num_input_tokens_seen": 2352742400, | |
| "step": 35900, | |
| "train_runtime": 23495.4052, | |
| "train_tokens_per_second": 100136.277 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.13528013229370117, | |
| "learning_rate": 0.00021661425677771965, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2359296000, | |
| "step": 36000, | |
| "train_runtime": 23559.8424, | |
| "train_tokens_per_second": 100140.568 | |
| }, | |
| { | |
| "epoch": 0.361, | |
| "grad_norm": 0.15519119799137115, | |
| "learning_rate": 0.00021618743675969095, | |
| "loss": 0.3065, | |
| "num_input_tokens_seen": 2365849600, | |
| "step": 36100, | |
| "train_runtime": 23624.7603, | |
| "train_tokens_per_second": 100142.798 | |
| }, | |
| { | |
| "epoch": 0.362, | |
| "grad_norm": 0.14744772017002106, | |
| "learning_rate": 0.0002157599502349089, | |
| "loss": 0.3068, | |
| "num_input_tokens_seen": 2372403200, | |
| "step": 36200, | |
| "train_runtime": 23688.8845, | |
| "train_tokens_per_second": 100148.371 | |
| }, | |
| { | |
| "epoch": 0.363, | |
| "grad_norm": 0.13838911056518555, | |
| "learning_rate": 0.00021533180150815802, | |
| "loss": 0.3097, | |
| "num_input_tokens_seen": 2378956800, | |
| "step": 36300, | |
| "train_runtime": 23759.9908, | |
| "train_tokens_per_second": 100124.483 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 0.12536117434501648, | |
| "learning_rate": 0.00021490299489089132, | |
| "loss": 0.3067, | |
| "num_input_tokens_seen": 2385510400, | |
| "step": 36400, | |
| "train_runtime": 23823.7123, | |
| "train_tokens_per_second": 100131.767 | |
| }, | |
| { | |
| "epoch": 0.365, | |
| "grad_norm": 0.14205192029476166, | |
| "learning_rate": 0.00021447353470118656, | |
| "loss": 0.3049, | |
| "num_input_tokens_seen": 2392064000, | |
| "step": 36500, | |
| "train_runtime": 23887.5453, | |
| "train_tokens_per_second": 100138.544 | |
| }, | |
| { | |
| "epoch": 0.366, | |
| "grad_norm": 0.11950815469026566, | |
| "learning_rate": 0.00021404342526370326, | |
| "loss": 0.3072, | |
| "num_input_tokens_seen": 2398617600, | |
| "step": 36600, | |
| "train_runtime": 23951.3108, | |
| "train_tokens_per_second": 100145.567 | |
| }, | |
| { | |
| "epoch": 0.367, | |
| "grad_norm": 0.1286599189043045, | |
| "learning_rate": 0.00021361267090963846, | |
| "loss": 0.3096, | |
| "num_input_tokens_seen": 2405171200, | |
| "step": 36700, | |
| "train_runtime": 24016.5354, | |
| "train_tokens_per_second": 100146.468 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.12663663923740387, | |
| "learning_rate": 0.0002131812759766839, | |
| "loss": 0.3054, | |
| "num_input_tokens_seen": 2411724800, | |
| "step": 36800, | |
| "train_runtime": 24085.8974, | |
| "train_tokens_per_second": 100130.162 | |
| }, | |
| { | |
| "epoch": 0.369, | |
| "grad_norm": 0.16495896875858307, | |
| "learning_rate": 0.00021274924480898169, | |
| "loss": 0.3037, | |
| "num_input_tokens_seen": 2418278400, | |
| "step": 36900, | |
| "train_runtime": 24149.4634, | |
| "train_tokens_per_second": 100137.977 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.13351881504058838, | |
| "learning_rate": 0.00021231658175708087, | |
| "loss": 0.309, | |
| "num_input_tokens_seen": 2424832000, | |
| "step": 37000, | |
| "train_runtime": 24214.3635, | |
| "train_tokens_per_second": 100140.233 | |
| }, | |
| { | |
| "epoch": 0.371, | |
| "grad_norm": 0.13137440383434296, | |
| "learning_rate": 0.00021188329117789357, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2431385600, | |
| "step": 37100, | |
| "train_runtime": 24284.8537, | |
| "train_tokens_per_second": 100119.426 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 0.17069390416145325, | |
| "learning_rate": 0.0002114493774346512, | |
| "loss": 0.3075, | |
| "num_input_tokens_seen": 2437939200, | |
| "step": 37200, | |
| "train_runtime": 24349.7441, | |
| "train_tokens_per_second": 100121.759 | |
| }, | |
| { | |
| "epoch": 0.373, | |
| "grad_norm": 0.13554754853248596, | |
| "learning_rate": 0.00021101484489686025, | |
| "loss": 0.3056, | |
| "num_input_tokens_seen": 2444492800, | |
| "step": 37300, | |
| "train_runtime": 24413.4106, | |
| "train_tokens_per_second": 100129.099 | |
| }, | |
| { | |
| "epoch": 0.374, | |
| "grad_norm": 0.24161159992218018, | |
| "learning_rate": 0.00021057969794025866, | |
| "loss": 0.3084, | |
| "num_input_tokens_seen": 2451046400, | |
| "step": 37400, | |
| "train_runtime": 24479.2787, | |
| "train_tokens_per_second": 100127.395 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 0.11480960994958878, | |
| "learning_rate": 0.00021014394094677128, | |
| "loss": 0.3065, | |
| "num_input_tokens_seen": 2457600000, | |
| "step": 37500, | |
| "train_runtime": 24543.1085, | |
| "train_tokens_per_second": 100134.015 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.1333978921175003, | |
| "learning_rate": 0.00020970757830446633, | |
| "loss": 0.3047, | |
| "num_input_tokens_seen": 2464153600, | |
| "step": 37600, | |
| "train_runtime": 24612.4036, | |
| "train_tokens_per_second": 100118.365 | |
| }, | |
| { | |
| "epoch": 0.377, | |
| "grad_norm": 0.1306515485048294, | |
| "learning_rate": 0.00020927061440751072, | |
| "loss": 0.3039, | |
| "num_input_tokens_seen": 2470707200, | |
| "step": 37700, | |
| "train_runtime": 24676.7406, | |
| "train_tokens_per_second": 100122.915 | |
| }, | |
| { | |
| "epoch": 0.378, | |
| "grad_norm": 0.19177651405334473, | |
| "learning_rate": 0.00020883305365612602, | |
| "loss": 0.3091, | |
| "num_input_tokens_seen": 2477260800, | |
| "step": 37800, | |
| "train_runtime": 24742.4612, | |
| "train_tokens_per_second": 100121.842 | |
| }, | |
| { | |
| "epoch": 0.379, | |
| "grad_norm": 0.14794479310512543, | |
| "learning_rate": 0.00020839490045654425, | |
| "loss": 0.3103, | |
| "num_input_tokens_seen": 2483814400, | |
| "step": 37900, | |
| "train_runtime": 24807.833, | |
| "train_tokens_per_second": 100122.183 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.1391579508781433, | |
| "learning_rate": 0.00020795615922096313, | |
| "loss": 0.305, | |
| "num_input_tokens_seen": 2490368000, | |
| "step": 38000, | |
| "train_runtime": 24871.0815, | |
| "train_tokens_per_second": 100131.07 | |
| }, | |
| { | |
| "epoch": 0.381, | |
| "grad_norm": 0.14466038346290588, | |
| "learning_rate": 0.00020751683436750207, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2496921600, | |
| "step": 38100, | |
| "train_runtime": 24941.5584, | |
| "train_tokens_per_second": 100110.89 | |
| }, | |
| { | |
| "epoch": 0.382, | |
| "grad_norm": 0.14706650376319885, | |
| "learning_rate": 0.00020707693032015752, | |
| "loss": 0.3131, | |
| "num_input_tokens_seen": 2503475200, | |
| "step": 38200, | |
| "train_runtime": 25006.658, | |
| "train_tokens_per_second": 100112.346 | |
| }, | |
| { | |
| "epoch": 0.383, | |
| "grad_norm": 0.1455349326133728, | |
| "learning_rate": 0.00020663645150875834, | |
| "loss": 0.3058, | |
| "num_input_tokens_seen": 2510028800, | |
| "step": 38300, | |
| "train_runtime": 25070.3473, | |
| "train_tokens_per_second": 100119.427 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.13858123123645782, | |
| "learning_rate": 0.00020619540236892125, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2516582400, | |
| "step": 38400, | |
| "train_runtime": 25135.6982, | |
| "train_tokens_per_second": 100119.853 | |
| }, | |
| { | |
| "epoch": 0.385, | |
| "grad_norm": 0.17408473789691925, | |
| "learning_rate": 0.00020575378734200616, | |
| "loss": 0.3068, | |
| "num_input_tokens_seen": 2523136000, | |
| "step": 38500, | |
| "train_runtime": 25206.1351, | |
| "train_tokens_per_second": 100100.075 | |
| }, | |
| { | |
| "epoch": 0.386, | |
| "grad_norm": 0.12729153037071228, | |
| "learning_rate": 0.0002053116108750715, | |
| "loss": 0.3062, | |
| "num_input_tokens_seen": 2529689600, | |
| "step": 38600, | |
| "train_runtime": 25270.823, | |
| "train_tokens_per_second": 100103.174 | |
| }, | |
| { | |
| "epoch": 0.387, | |
| "grad_norm": 0.15452224016189575, | |
| "learning_rate": 0.0002048688774208294, | |
| "loss": 0.3029, | |
| "num_input_tokens_seen": 2536243200, | |
| "step": 38700, | |
| "train_runtime": 25334.6018, | |
| "train_tokens_per_second": 100109.851 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 0.11749983578920364, | |
| "learning_rate": 0.0002044255914376009, | |
| "loss": 0.3055, | |
| "num_input_tokens_seen": 2542796800, | |
| "step": 38800, | |
| "train_runtime": 25398.9456, | |
| "train_tokens_per_second": 100114.266 | |
| }, | |
| { | |
| "epoch": 0.389, | |
| "grad_norm": 0.12558670341968536, | |
| "learning_rate": 0.00020398175738927082, | |
| "loss": 0.307, | |
| "num_input_tokens_seen": 2549350400, | |
| "step": 38900, | |
| "train_runtime": 25469.3443, | |
| "train_tokens_per_second": 100094.858 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.11652723699808121, | |
| "learning_rate": 0.00020353737974524312, | |
| "loss": 0.3059, | |
| "num_input_tokens_seen": 2555904000, | |
| "step": 39000, | |
| "train_runtime": 25534.1962, | |
| "train_tokens_per_second": 100097.296 | |
| }, | |
| { | |
| "epoch": 0.391, | |
| "grad_norm": 0.14530417323112488, | |
| "learning_rate": 0.00020309246298039584, | |
| "loss": 0.3043, | |
| "num_input_tokens_seen": 2562457600, | |
| "step": 39100, | |
| "train_runtime": 25597.7668, | |
| "train_tokens_per_second": 100104.733 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.2145591825246811, | |
| "learning_rate": 0.0002026470115750357, | |
| "loss": 0.3097, | |
| "num_input_tokens_seen": 2569011200, | |
| "step": 39200, | |
| "train_runtime": 25662.2383, | |
| "train_tokens_per_second": 100108.618 | |
| }, | |
| { | |
| "epoch": 0.393, | |
| "grad_norm": 0.13407446444034576, | |
| "learning_rate": 0.0002022010300148535, | |
| "loss": 0.3072, | |
| "num_input_tokens_seen": 2575564800, | |
| "step": 39300, | |
| "train_runtime": 25726.7635, | |
| "train_tokens_per_second": 100112.274 | |
| }, | |
| { | |
| "epoch": 0.394, | |
| "grad_norm": 0.20070548355579376, | |
| "learning_rate": 0.0002017545227908786, | |
| "loss": 0.3042, | |
| "num_input_tokens_seen": 2582118400, | |
| "step": 39400, | |
| "train_runtime": 25798.3829, | |
| "train_tokens_per_second": 100088.382 | |
| }, | |
| { | |
| "epoch": 0.395, | |
| "grad_norm": 0.12969562411308289, | |
| "learning_rate": 0.00020130749439943376, | |
| "loss": 0.3025, | |
| "num_input_tokens_seen": 2588672000, | |
| "step": 39500, | |
| "train_runtime": 25861.9837, | |
| "train_tokens_per_second": 100095.647 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 0.22430787980556488, | |
| "learning_rate": 0.00020085994934208998, | |
| "loss": 0.3075, | |
| "num_input_tokens_seen": 2595225600, | |
| "step": 39600, | |
| "train_runtime": 25927.1388, | |
| "train_tokens_per_second": 100096.876 | |
| }, | |
| { | |
| "epoch": 0.397, | |
| "grad_norm": 0.1543964445590973, | |
| "learning_rate": 0.00020041189212562094, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2601779200, | |
| "step": 39700, | |
| "train_runtime": 25990.8084, | |
| "train_tokens_per_second": 100103.82 | |
| }, | |
| { | |
| "epoch": 0.398, | |
| "grad_norm": 0.17474599182605743, | |
| "learning_rate": 0.0001999633272619579, | |
| "loss": 0.3026, | |
| "num_input_tokens_seen": 2608332800, | |
| "step": 39800, | |
| "train_runtime": 26055.1661, | |
| "train_tokens_per_second": 100108.086 | |
| }, | |
| { | |
| "epoch": 0.399, | |
| "grad_norm": 0.12200487405061722, | |
| "learning_rate": 0.00019951425926814404, | |
| "loss": 0.3051, | |
| "num_input_tokens_seen": 2614886400, | |
| "step": 39900, | |
| "train_runtime": 26125.5167, | |
| "train_tokens_per_second": 100089.366 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.12909364700317383, | |
| "learning_rate": 0.00019906469266628904, | |
| "loss": 0.3083, | |
| "num_input_tokens_seen": 2621440000, | |
| "step": 40000, | |
| "train_runtime": 26189.9855, | |
| "train_tokens_per_second": 100093.221 | |
| }, | |
| { | |
| "epoch": 0.401, | |
| "grad_norm": 0.14507311582565308, | |
| "learning_rate": 0.0001986146319835236, | |
| "loss": 0.3063, | |
| "num_input_tokens_seen": 2627993600, | |
| "step": 40100, | |
| "train_runtime": 26254.1189, | |
| "train_tokens_per_second": 100098.335 | |
| }, | |
| { | |
| "epoch": 0.402, | |
| "grad_norm": 0.15015749633312225, | |
| "learning_rate": 0.00019816408175195383, | |
| "loss": 0.3024, | |
| "num_input_tokens_seen": 2634547200, | |
| "step": 40200, | |
| "train_runtime": 26317.4656, | |
| "train_tokens_per_second": 100106.417 | |
| }, | |
| { | |
| "epoch": 0.403, | |
| "grad_norm": 0.1793050467967987, | |
| "learning_rate": 0.0001977130465086155, | |
| "loss": 0.3058, | |
| "num_input_tokens_seen": 2641100800, | |
| "step": 40300, | |
| "train_runtime": 26387.6285, | |
| "train_tokens_per_second": 100088.6 | |
| }, | |
| { | |
| "epoch": 0.404, | |
| "grad_norm": 0.13494957983493805, | |
| "learning_rate": 0.0001972615307954286, | |
| "loss": 0.3058, | |
| "num_input_tokens_seen": 2647654400, | |
| "step": 40400, | |
| "train_runtime": 26452.3646, | |
| "train_tokens_per_second": 100091.407 | |
| }, | |
| { | |
| "epoch": 0.405, | |
| "grad_norm": 0.15225248038768768, | |
| "learning_rate": 0.00019680953915915124, | |
| "loss": 0.3032, | |
| "num_input_tokens_seen": 2654208000, | |
| "step": 40500, | |
| "train_runtime": 26516.6796, | |
| "train_tokens_per_second": 100095.79 | |
| }, | |
| { | |
| "epoch": 0.406, | |
| "grad_norm": 0.15482735633850098, | |
| "learning_rate": 0.00019635707615133427, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2660761600, | |
| "step": 40600, | |
| "train_runtime": 26585.3848, | |
| "train_tokens_per_second": 100083.622 | |
| }, | |
| { | |
| "epoch": 0.407, | |
| "grad_norm": 0.15725013613700867, | |
| "learning_rate": 0.00019590414632827513, | |
| "loss": 0.3101, | |
| "num_input_tokens_seen": 2667315200, | |
| "step": 40700, | |
| "train_runtime": 26649.9092, | |
| "train_tokens_per_second": 100087.215 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.16835036873817444, | |
| "learning_rate": 0.00019545075425097204, | |
| "loss": 0.3049, | |
| "num_input_tokens_seen": 2673868800, | |
| "step": 40800, | |
| "train_runtime": 26714.9814, | |
| "train_tokens_per_second": 100088.739 | |
| }, | |
| { | |
| "epoch": 0.409, | |
| "grad_norm": 0.167361319065094, | |
| "learning_rate": 0.00019499690448507827, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 2680422400, | |
| "step": 40900, | |
| "train_runtime": 26779.2716, | |
| "train_tokens_per_second": 100093.178 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.1781291663646698, | |
| "learning_rate": 0.00019454260160085588, | |
| "loss": 0.3005, | |
| "num_input_tokens_seen": 2686976000, | |
| "step": 41000, | |
| "train_runtime": 26843.9197, | |
| "train_tokens_per_second": 100096.261 | |
| }, | |
| { | |
| "epoch": 0.411, | |
| "grad_norm": 0.1289975345134735, | |
| "learning_rate": 0.0001940878501731299, | |
| "loss": 0.3085, | |
| "num_input_tokens_seen": 2693529600, | |
| "step": 41100, | |
| "train_runtime": 26914.2047, | |
| "train_tokens_per_second": 100078.365 | |
| }, | |
| { | |
| "epoch": 0.412, | |
| "grad_norm": 0.12804220616817474, | |
| "learning_rate": 0.00019363265478124214, | |
| "loss": 0.3062, | |
| "num_input_tokens_seen": 2700083200, | |
| "step": 41200, | |
| "train_runtime": 26979.3069, | |
| "train_tokens_per_second": 100079.784 | |
| }, | |
| { | |
| "epoch": 0.413, | |
| "grad_norm": 0.14838483929634094, | |
| "learning_rate": 0.00019317702000900516, | |
| "loss": 0.3065, | |
| "num_input_tokens_seen": 2706636800, | |
| "step": 41300, | |
| "train_runtime": 27043.7101, | |
| "train_tokens_per_second": 100083.783 | |
| }, | |
| { | |
| "epoch": 0.414, | |
| "grad_norm": 0.3049434423446655, | |
| "learning_rate": 0.000192720950444656, | |
| "loss": 0.3075, | |
| "num_input_tokens_seen": 2713190400, | |
| "step": 41400, | |
| "train_runtime": 27108.2869, | |
| "train_tokens_per_second": 100087.121 | |
| }, | |
| { | |
| "epoch": 0.415, | |
| "grad_norm": 0.16474822163581848, | |
| "learning_rate": 0.00019226445068081018, | |
| "loss": 0.3087, | |
| "num_input_tokens_seen": 2719744000, | |
| "step": 41500, | |
| "train_runtime": 27173.4382, | |
| "train_tokens_per_second": 100088.328 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.18445253372192383, | |
| "learning_rate": 0.00019180752531441523, | |
| "loss": 0.3065, | |
| "num_input_tokens_seen": 2726297600, | |
| "step": 41600, | |
| "train_runtime": 27237.7945, | |
| "train_tokens_per_second": 100092.45 | |
| }, | |
| { | |
| "epoch": 0.417, | |
| "grad_norm": 0.1226682960987091, | |
| "learning_rate": 0.00019135017894670456, | |
| "loss": 0.3062, | |
| "num_input_tokens_seen": 2732851200, | |
| "step": 41700, | |
| "train_runtime": 27307.5255, | |
| "train_tokens_per_second": 100076.852 | |
| }, | |
| { | |
| "epoch": 0.418, | |
| "grad_norm": 0.12846247851848602, | |
| "learning_rate": 0.0001908924161831509, | |
| "loss": 0.3064, | |
| "num_input_tokens_seen": 2739404800, | |
| "step": 41800, | |
| "train_runtime": 27371.4125, | |
| "train_tokens_per_second": 100082.698 | |
| }, | |
| { | |
| "epoch": 0.419, | |
| "grad_norm": 0.14241133630275726, | |
| "learning_rate": 0.0001904342416334203, | |
| "loss": 0.3048, | |
| "num_input_tokens_seen": 2745958400, | |
| "step": 41900, | |
| "train_runtime": 27436.5912, | |
| "train_tokens_per_second": 100083.803 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.19496770203113556, | |
| "learning_rate": 0.00018997565991132532, | |
| "loss": 0.3046, | |
| "num_input_tokens_seen": 2752512000, | |
| "step": 42000, | |
| "train_runtime": 27500.5131, | |
| "train_tokens_per_second": 100089.478 | |
| }, | |
| { | |
| "epoch": 0.421, | |
| "grad_norm": 0.16859756410121918, | |
| "learning_rate": 0.0001895166756347789, | |
| "loss": 0.3082, | |
| "num_input_tokens_seen": 2759065600, | |
| "step": 42100, | |
| "train_runtime": 27570.8932, | |
| "train_tokens_per_second": 100071.68 | |
| }, | |
| { | |
| "epoch": 0.422, | |
| "grad_norm": 0.13300351798534393, | |
| "learning_rate": 0.0001890572934257475, | |
| "loss": 0.3065, | |
| "num_input_tokens_seen": 2765619200, | |
| "step": 42200, | |
| "train_runtime": 27634.6434, | |
| "train_tokens_per_second": 100077.977 | |
| }, | |
| { | |
| "epoch": 0.423, | |
| "grad_norm": 0.14460822939872742, | |
| "learning_rate": 0.00018859751791020497, | |
| "loss": 0.3055, | |
| "num_input_tokens_seen": 2772172800, | |
| "step": 42300, | |
| "train_runtime": 27700.3395, | |
| "train_tokens_per_second": 100077.214 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.1369091421365738, | |
| "learning_rate": 0.0001881373537180856, | |
| "loss": 0.3026, | |
| "num_input_tokens_seen": 2778726400, | |
| "step": 42400, | |
| "train_runtime": 27764.0211, | |
| "train_tokens_per_second": 100083.716 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 0.15593157708644867, | |
| "learning_rate": 0.00018767680548323766, | |
| "loss": 0.3014, | |
| "num_input_tokens_seen": 2785280000, | |
| "step": 42500, | |
| "train_runtime": 27828.3317, | |
| "train_tokens_per_second": 100087.926 | |
| }, | |
| { | |
| "epoch": 0.426, | |
| "grad_norm": 0.18689674139022827, | |
| "learning_rate": 0.0001872158778433768, | |
| "loss": 0.3041, | |
| "num_input_tokens_seen": 2791833600, | |
| "step": 42600, | |
| "train_runtime": 27897.9539, | |
| "train_tokens_per_second": 100073.059 | |
| }, | |
| { | |
| "epoch": 0.427, | |
| "grad_norm": 0.1532142609357834, | |
| "learning_rate": 0.0001867545754400392, | |
| "loss": 0.3041, | |
| "num_input_tokens_seen": 2798387200, | |
| "step": 42700, | |
| "train_runtime": 27964.2157, | |
| "train_tokens_per_second": 100070.291 | |
| }, | |
| { | |
| "epoch": 0.428, | |
| "grad_norm": 0.12894967198371887, | |
| "learning_rate": 0.000186292902918535, | |
| "loss": 0.3047, | |
| "num_input_tokens_seen": 2804940800, | |
| "step": 42800, | |
| "train_runtime": 28028.1798, | |
| "train_tokens_per_second": 100075.739 | |
| }, | |
| { | |
| "epoch": 0.429, | |
| "grad_norm": 0.14526289701461792, | |
| "learning_rate": 0.00018583086492790136, | |
| "loss": 0.3097, | |
| "num_input_tokens_seen": 2811494400, | |
| "step": 42900, | |
| "train_runtime": 28093.2724, | |
| "train_tokens_per_second": 100077.142 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.15546266734600067, | |
| "learning_rate": 0.00018536846612085566, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2818048000, | |
| "step": 43000, | |
| "train_runtime": 28157.8145, | |
| "train_tokens_per_second": 100080.495 | |
| }, | |
| { | |
| "epoch": 0.431, | |
| "grad_norm": 0.16307438910007477, | |
| "learning_rate": 0.00018490571115374878, | |
| "loss": 0.3073, | |
| "num_input_tokens_seen": 2824601600, | |
| "step": 43100, | |
| "train_runtime": 28227.9591, | |
| "train_tokens_per_second": 100063.968 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.1360054761171341, | |
| "learning_rate": 0.00018444260468651816, | |
| "loss": 0.3013, | |
| "num_input_tokens_seen": 2831155200, | |
| "step": 43200, | |
| "train_runtime": 28291.3921, | |
| "train_tokens_per_second": 100071.258 | |
| }, | |
| { | |
| "epoch": 0.433, | |
| "grad_norm": 0.1404498666524887, | |
| "learning_rate": 0.00018397915138264068, | |
| "loss": 0.3066, | |
| "num_input_tokens_seen": 2837708800, | |
| "step": 43300, | |
| "train_runtime": 28355.3195, | |
| "train_tokens_per_second": 100076.771 | |
| }, | |
| { | |
| "epoch": 0.434, | |
| "grad_norm": 0.1926499307155609, | |
| "learning_rate": 0.00018351535590908606, | |
| "loss": 0.3012, | |
| "num_input_tokens_seen": 2844262400, | |
| "step": 43400, | |
| "train_runtime": 28420.6726, | |
| "train_tokens_per_second": 100077.237 | |
| }, | |
| { | |
| "epoch": 0.435, | |
| "grad_norm": 0.13713879883289337, | |
| "learning_rate": 0.00018305122293626948, | |
| "loss": 0.3029, | |
| "num_input_tokens_seen": 2850816000, | |
| "step": 43500, | |
| "train_runtime": 28490.1826, | |
| "train_tokens_per_second": 100063.1 | |
| }, | |
| { | |
| "epoch": 0.436, | |
| "grad_norm": 0.1541578322649002, | |
| "learning_rate": 0.00018258675713800492, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2857369600, | |
| "step": 43600, | |
| "train_runtime": 28555.7903, | |
| "train_tokens_per_second": 100062.704 | |
| }, | |
| { | |
| "epoch": 0.437, | |
| "grad_norm": 0.14117270708084106, | |
| "learning_rate": 0.00018212196319145773, | |
| "loss": 0.3053, | |
| "num_input_tokens_seen": 2863923200, | |
| "step": 43700, | |
| "train_runtime": 28622.1811, | |
| "train_tokens_per_second": 100059.572 | |
| }, | |
| { | |
| "epoch": 0.438, | |
| "grad_norm": 0.14943140745162964, | |
| "learning_rate": 0.00018165684577709778, | |
| "loss": 0.3043, | |
| "num_input_tokens_seen": 2870476800, | |
| "step": 43800, | |
| "train_runtime": 28686.5648, | |
| "train_tokens_per_second": 100063.455 | |
| }, | |
| { | |
| "epoch": 0.439, | |
| "grad_norm": 0.14043770730495453, | |
| "learning_rate": 0.0001811914095786524, | |
| "loss": 0.3048, | |
| "num_input_tokens_seen": 2877030400, | |
| "step": 43900, | |
| "train_runtime": 28751.3532, | |
| "train_tokens_per_second": 100065.913 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.17811591923236847, | |
| "learning_rate": 0.0001807256592830588, | |
| "loss": 0.3088, | |
| "num_input_tokens_seen": 2883584000, | |
| "step": 44000, | |
| "train_runtime": 28815.5193, | |
| "train_tokens_per_second": 100070.52 | |
| }, | |
| { | |
| "epoch": 0.441, | |
| "grad_norm": 0.14588113129138947, | |
| "learning_rate": 0.00018025959958041732, | |
| "loss": 0.3017, | |
| "num_input_tokens_seen": 2890137600, | |
| "step": 44100, | |
| "train_runtime": 28880.019, | |
| "train_tokens_per_second": 100073.951 | |
| }, | |
| { | |
| "epoch": 0.442, | |
| "grad_norm": 0.22986213862895966, | |
| "learning_rate": 0.00017979323516394407, | |
| "loss": 0.3049, | |
| "num_input_tokens_seen": 2896691200, | |
| "step": 44200, | |
| "train_runtime": 28945.7871, | |
| "train_tokens_per_second": 100072.981 | |
| }, | |
| { | |
| "epoch": 0.443, | |
| "grad_norm": 0.853501558303833, | |
| "learning_rate": 0.00017932657072992344, | |
| "loss": 0.3081, | |
| "num_input_tokens_seen": 2903244800, | |
| "step": 44300, | |
| "train_runtime": 29016.3509, | |
| "train_tokens_per_second": 100055.476 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 0.15835335850715637, | |
| "learning_rate": 0.00017885961097766117, | |
| "loss": 0.3035, | |
| "num_input_tokens_seen": 2909798400, | |
| "step": 44400, | |
| "train_runtime": 29079.9877, | |
| "train_tokens_per_second": 100061.886 | |
| }, | |
| { | |
| "epoch": 0.445, | |
| "grad_norm": 0.25418880581855774, | |
| "learning_rate": 0.00017839236060943674, | |
| "loss": 0.3014, | |
| "num_input_tokens_seen": 2916352000, | |
| "step": 44500, | |
| "train_runtime": 29144.3776, | |
| "train_tokens_per_second": 100065.681 | |
| }, | |
| { | |
| "epoch": 0.446, | |
| "grad_norm": 0.14922253787517548, | |
| "learning_rate": 0.0001779248243304562, | |
| "loss": 0.3038, | |
| "num_input_tokens_seen": 2922905600, | |
| "step": 44600, | |
| "train_runtime": 29208.2393, | |
| "train_tokens_per_second": 100071.27 | |
| }, | |
| { | |
| "epoch": 0.447, | |
| "grad_norm": 0.14103923738002777, | |
| "learning_rate": 0.00017745700684880465, | |
| "loss": 0.3064, | |
| "num_input_tokens_seen": 2929459200, | |
| "step": 44700, | |
| "train_runtime": 29273.1105, | |
| "train_tokens_per_second": 100073.383 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.15813007950782776, | |
| "learning_rate": 0.000176988912875399, | |
| "loss": 0.3049, | |
| "num_input_tokens_seen": 2936012800, | |
| "step": 44800, | |
| "train_runtime": 29342.9224, | |
| "train_tokens_per_second": 100058.636 | |
| }, | |
| { | |
| "epoch": 0.449, | |
| "grad_norm": 0.1471075564622879, | |
| "learning_rate": 0.00017652054712394028, | |
| "loss": 0.3029, | |
| "num_input_tokens_seen": 2942566400, | |
| "step": 44900, | |
| "train_runtime": 29408.1792, | |
| "train_tokens_per_second": 100059.455 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.16910097002983093, | |
| "learning_rate": 0.0001760519143108665, | |
| "loss": 0.3026, | |
| "num_input_tokens_seen": 2949120000, | |
| "step": 45000, | |
| "train_runtime": 29472.6802, | |
| "train_tokens_per_second": 100062.837 | |
| }, | |
| { | |
| "epoch": 0.451, | |
| "grad_norm": 0.15087512135505676, | |
| "learning_rate": 0.00017558301915530483, | |
| "loss": 0.305, | |
| "num_input_tokens_seen": 2955673600, | |
| "step": 45100, | |
| "train_runtime": 29537.0324, | |
| "train_tokens_per_second": 100066.708 | |
| }, | |
| { | |
| "epoch": 0.452, | |
| "grad_norm": 0.16292531788349152, | |
| "learning_rate": 0.00017511386637902428, | |
| "loss": 0.305, | |
| "num_input_tokens_seen": 2962227200, | |
| "step": 45200, | |
| "train_runtime": 29600.4356, | |
| "train_tokens_per_second": 100073.77 | |
| }, | |
| { | |
| "epoch": 0.453, | |
| "grad_norm": 0.14504611492156982, | |
| "learning_rate": 0.00017464446070638814, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 2968780800, | |
| "step": 45300, | |
| "train_runtime": 29670.2849, | |
| "train_tokens_per_second": 100059.06 | |
| }, | |
| { | |
| "epoch": 0.454, | |
| "grad_norm": 0.14068329334259033, | |
| "learning_rate": 0.00017417480686430622, | |
| "loss": 0.3096, | |
| "num_input_tokens_seen": 2975334400, | |
| "step": 45400, | |
| "train_runtime": 29735.31, | |
| "train_tokens_per_second": 100060.648 | |
| }, | |
| { | |
| "epoch": 0.455, | |
| "grad_norm": 0.139748677611351, | |
| "learning_rate": 0.00017370490958218765, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 2981888000, | |
| "step": 45500, | |
| "train_runtime": 29800.4491, | |
| "train_tokens_per_second": 100061.848 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.1487821340560913, | |
| "learning_rate": 0.00017323477359189272, | |
| "loss": 0.3023, | |
| "num_input_tokens_seen": 2988441600, | |
| "step": 45600, | |
| "train_runtime": 29869.053, | |
| "train_tokens_per_second": 100051.434 | |
| }, | |
| { | |
| "epoch": 0.457, | |
| "grad_norm": 0.15015476942062378, | |
| "learning_rate": 0.00017276440362768564, | |
| "loss": 0.3028, | |
| "num_input_tokens_seen": 2994995200, | |
| "step": 45700, | |
| "train_runtime": 29933.644, | |
| "train_tokens_per_second": 100054.481 | |
| }, | |
| { | |
| "epoch": 0.458, | |
| "grad_norm": 0.1298416256904602, | |
| "learning_rate": 0.0001722938044261868, | |
| "loss": 0.3058, | |
| "num_input_tokens_seen": 3001548800, | |
| "step": 45800, | |
| "train_runtime": 29997.6813, | |
| "train_tokens_per_second": 100059.36 | |
| }, | |
| { | |
| "epoch": 0.459, | |
| "grad_norm": 0.1956530213356018, | |
| "learning_rate": 0.0001718229807263249, | |
| "loss": 0.3033, | |
| "num_input_tokens_seen": 3008102400, | |
| "step": 45900, | |
| "train_runtime": 30067.1877, | |
| "train_tokens_per_second": 100046.018 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.15267929434776306, | |
| "learning_rate": 0.0001713519372692894, | |
| "loss": 0.3028, | |
| "num_input_tokens_seen": 3014656000, | |
| "step": 46000, | |
| "train_runtime": 30131.0143, | |
| "train_tokens_per_second": 100051.594 | |
| }, | |
| { | |
| "epoch": 0.461, | |
| "grad_norm": 0.13846905529499054, | |
| "learning_rate": 0.0001708806787984826, | |
| "loss": 0.3036, | |
| "num_input_tokens_seen": 3021209600, | |
| "step": 46100, | |
| "train_runtime": 30195.5066, | |
| "train_tokens_per_second": 100054.94 | |
| }, | |
| { | |
| "epoch": 0.462, | |
| "grad_norm": 0.13704828917980194, | |
| "learning_rate": 0.00017040921005947212, | |
| "loss": 0.3094, | |
| "num_input_tokens_seen": 3027763200, | |
| "step": 46200, | |
| "train_runtime": 30260.3523, | |
| "train_tokens_per_second": 100057.104 | |
| }, | |
| { | |
| "epoch": 0.463, | |
| "grad_norm": 0.15288543701171875, | |
| "learning_rate": 0.0001699375357999429, | |
| "loss": 0.3014, | |
| "num_input_tokens_seen": 3034316800, | |
| "step": 46300, | |
| "train_runtime": 30325.5675, | |
| "train_tokens_per_second": 100058.039 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.19963988661766052, | |
| "learning_rate": 0.0001694656607696496, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 3040870400, | |
| "step": 46400, | |
| "train_runtime": 30399.8434, | |
| "train_tokens_per_second": 100029.147 | |
| }, | |
| { | |
| "epoch": 0.465, | |
| "grad_norm": 0.14533430337905884, | |
| "learning_rate": 0.0001689935897203684, | |
| "loss": 0.3056, | |
| "num_input_tokens_seen": 3047424000, | |
| "step": 46500, | |
| "train_runtime": 30464.3563, | |
| "train_tokens_per_second": 100032.443 | |
| }, | |
| { | |
| "epoch": 0.466, | |
| "grad_norm": 0.14005503058433533, | |
| "learning_rate": 0.0001685213274058496, | |
| "loss": 0.3016, | |
| "num_input_tokens_seen": 3053977600, | |
| "step": 46600, | |
| "train_runtime": 30528.7292, | |
| "train_tokens_per_second": 100036.185 | |
| }, | |
| { | |
| "epoch": 0.467, | |
| "grad_norm": 0.17612388730049133, | |
| "learning_rate": 0.00016804887858176944, | |
| "loss": 0.3006, | |
| "num_input_tokens_seen": 3060531200, | |
| "step": 46700, | |
| "train_runtime": 30592.7142, | |
| "train_tokens_per_second": 100041.179 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 0.13526348769664764, | |
| "learning_rate": 0.00016757624800568238, | |
| "loss": 0.3001, | |
| "num_input_tokens_seen": 3067084800, | |
| "step": 46800, | |
| "train_runtime": 30656.5144, | |
| "train_tokens_per_second": 100046.755 | |
| }, | |
| { | |
| "epoch": 0.469, | |
| "grad_norm": 0.6205772161483765, | |
| "learning_rate": 0.00016710344043697301, | |
| "loss": 0.3016, | |
| "num_input_tokens_seen": 3073638400, | |
| "step": 46900, | |
| "train_runtime": 30727.0215, | |
| "train_tokens_per_second": 100030.47 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 0.15328101813793182, | |
| "learning_rate": 0.0001666304606368083, | |
| "loss": 0.3049, | |
| "num_input_tokens_seen": 3080192000, | |
| "step": 47000, | |
| "train_runtime": 30792.0203, | |
| "train_tokens_per_second": 100032.15 | |
| }, | |
| { | |
| "epoch": 0.471, | |
| "grad_norm": 0.1804981380701065, | |
| "learning_rate": 0.00016615731336808962, | |
| "loss": 0.3008, | |
| "num_input_tokens_seen": 3086745600, | |
| "step": 47100, | |
| "train_runtime": 30856.1119, | |
| "train_tokens_per_second": 100036.764 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.1460595428943634, | |
| "learning_rate": 0.0001656840033954047, | |
| "loss": 0.2996, | |
| "num_input_tokens_seen": 3093299200, | |
| "step": 47200, | |
| "train_runtime": 30922.3293, | |
| "train_tokens_per_second": 100034.482 | |
| }, | |
| { | |
| "epoch": 0.473, | |
| "grad_norm": 0.17493313550949097, | |
| "learning_rate": 0.00016521053548497973, | |
| "loss": 0.3005, | |
| "num_input_tokens_seen": 3099852800, | |
| "step": 47300, | |
| "train_runtime": 30985.6891, | |
| "train_tokens_per_second": 100041.435 | |
| }, | |
| { | |
| "epoch": 0.474, | |
| "grad_norm": 0.11990969628095627, | |
| "learning_rate": 0.0001647369144046313, | |
| "loss": 0.2995, | |
| "num_input_tokens_seen": 3106406400, | |
| "step": 47400, | |
| "train_runtime": 31056.5152, | |
| "train_tokens_per_second": 100024.307 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 0.15634778141975403, | |
| "learning_rate": 0.00016426314492371842, | |
| "loss": 0.3054, | |
| "num_input_tokens_seen": 3112960000, | |
| "step": 47500, | |
| "train_runtime": 31121.0302, | |
| "train_tokens_per_second": 100027.537 | |
| }, | |
| { | |
| "epoch": 0.476, | |
| "grad_norm": 0.14218732714653015, | |
| "learning_rate": 0.0001637892318130945, | |
| "loss": 0.3036, | |
| "num_input_tokens_seen": 3119513600, | |
| "step": 47600, | |
| "train_runtime": 31185.6411, | |
| "train_tokens_per_second": 100030.446 | |
| }, | |
| { | |
| "epoch": 0.477, | |
| "grad_norm": 0.147688090801239, | |
| "learning_rate": 0.00016331517984505934, | |
| "loss": 0.3003, | |
| "num_input_tokens_seen": 3126067200, | |
| "step": 47700, | |
| "train_runtime": 31250.7507, | |
| "train_tokens_per_second": 100031.748 | |
| }, | |
| { | |
| "epoch": 0.478, | |
| "grad_norm": 0.1728331595659256, | |
| "learning_rate": 0.00016284099379331092, | |
| "loss": 0.2997, | |
| "num_input_tokens_seen": 3132620800, | |
| "step": 47800, | |
| "train_runtime": 31321.2751, | |
| "train_tokens_per_second": 100015.749 | |
| }, | |
| { | |
| "epoch": 0.479, | |
| "grad_norm": 0.12835726141929626, | |
| "learning_rate": 0.00016236667843289759, | |
| "loss": 0.2989, | |
| "num_input_tokens_seen": 3139174400, | |
| "step": 47900, | |
| "train_runtime": 31386.2974, | |
| "train_tokens_per_second": 100017.353 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.13368946313858032, | |
| "learning_rate": 0.00016189223854016973, | |
| "loss": 0.3078, | |
| "num_input_tokens_seen": 3145728000, | |
| "step": 48000, | |
| "train_runtime": 31451.659, | |
| "train_tokens_per_second": 100017.872 | |
| }, | |
| { | |
| "epoch": 0.481, | |
| "grad_norm": 0.12727653980255127, | |
| "learning_rate": 0.00016141767889273182, | |
| "loss": 0.3017, | |
| "num_input_tokens_seen": 3152281600, | |
| "step": 48100, | |
| "train_runtime": 31516.5086, | |
| "train_tokens_per_second": 100020.013 | |
| }, | |
| { | |
| "epoch": 0.482, | |
| "grad_norm": 0.16222263872623444, | |
| "learning_rate": 0.00016094300426939417, | |
| "loss": 0.3009, | |
| "num_input_tokens_seen": 3158835200, | |
| "step": 48200, | |
| "train_runtime": 31581.3453, | |
| "train_tokens_per_second": 100022.186 | |
| }, | |
| { | |
| "epoch": 0.483, | |
| "grad_norm": 0.15287387371063232, | |
| "learning_rate": 0.00016046821945012505, | |
| "loss": 0.2975, | |
| "num_input_tokens_seen": 3165388800, | |
| "step": 48300, | |
| "train_runtime": 31645.8484, | |
| "train_tokens_per_second": 100025.405 | |
| }, | |
| { | |
| "epoch": 0.484, | |
| "grad_norm": 0.13035738468170166, | |
| "learning_rate": 0.00015999332921600226, | |
| "loss": 0.3046, | |
| "num_input_tokens_seen": 3171942400, | |
| "step": 48400, | |
| "train_runtime": 31716.5254, | |
| "train_tokens_per_second": 100009.139 | |
| }, | |
| { | |
| "epoch": 0.485, | |
| "grad_norm": 0.16508948802947998, | |
| "learning_rate": 0.00015951833834916532, | |
| "loss": 0.3061, | |
| "num_input_tokens_seen": 3178496000, | |
| "step": 48500, | |
| "train_runtime": 31781.7614, | |
| "train_tokens_per_second": 100010.064 | |
| }, | |
| { | |
| "epoch": 0.486, | |
| "grad_norm": 0.1543286293745041, | |
| "learning_rate": 0.00015904325163276672, | |
| "loss": 0.2995, | |
| "num_input_tokens_seen": 3185049600, | |
| "step": 48600, | |
| "train_runtime": 31847.2029, | |
| "train_tokens_per_second": 100010.34 | |
| }, | |
| { | |
| "epoch": 0.487, | |
| "grad_norm": 0.13470540940761566, | |
| "learning_rate": 0.00015856807385092466, | |
| "loss": 0.3067, | |
| "num_input_tokens_seen": 3191603200, | |
| "step": 48700, | |
| "train_runtime": 31911.0411, | |
| "train_tokens_per_second": 100015.64 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.15521059930324554, | |
| "learning_rate": 0.00015809280978867405, | |
| "loss": 0.3009, | |
| "num_input_tokens_seen": 3198156800, | |
| "step": 48800, | |
| "train_runtime": 31975.3091, | |
| "train_tokens_per_second": 100019.574 | |
| }, | |
| { | |
| "epoch": 0.489, | |
| "grad_norm": 0.16505663096904755, | |
| "learning_rate": 0.0001576174642319187, | |
| "loss": 0.3019, | |
| "num_input_tokens_seen": 3204710400, | |
| "step": 48900, | |
| "train_runtime": 32039.3359, | |
| "train_tokens_per_second": 100024.246 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.15701062977313995, | |
| "learning_rate": 0.0001571420419673831, | |
| "loss": 0.3025, | |
| "num_input_tokens_seen": 3211264000, | |
| "step": 49000, | |
| "train_runtime": 32104.9123, | |
| "train_tokens_per_second": 100024.07 | |
| }, | |
| { | |
| "epoch": 0.491, | |
| "grad_norm": 0.22376379370689392, | |
| "learning_rate": 0.0001566665477825642, | |
| "loss": 0.3035, | |
| "num_input_tokens_seen": 3217817600, | |
| "step": 49100, | |
| "train_runtime": 32177.5739, | |
| "train_tokens_per_second": 100001.871 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 0.1716614067554474, | |
| "learning_rate": 0.0001561909864656831, | |
| "loss": 0.3046, | |
| "num_input_tokens_seen": 3224371200, | |
| "step": 49200, | |
| "train_runtime": 32241.8903, | |
| "train_tokens_per_second": 100005.65 | |
| }, | |
| { | |
| "epoch": 0.493, | |
| "grad_norm": 0.17557290196418762, | |
| "learning_rate": 0.00015571536280563705, | |
| "loss": 0.2987, | |
| "num_input_tokens_seen": 3230924800, | |
| "step": 49300, | |
| "train_runtime": 32307.4373, | |
| "train_tokens_per_second": 100005.605 | |
| }, | |
| { | |
| "epoch": 0.494, | |
| "grad_norm": 0.16884572803974152, | |
| "learning_rate": 0.000155239681591951, | |
| "loss": 0.2986, | |
| "num_input_tokens_seen": 3237478400, | |
| "step": 49400, | |
| "train_runtime": 32371.4412, | |
| "train_tokens_per_second": 100010.326 | |
| }, | |
| { | |
| "epoch": 0.495, | |
| "grad_norm": 0.15279650688171387, | |
| "learning_rate": 0.00015476394761472953, | |
| "loss": 0.2982, | |
| "num_input_tokens_seen": 3244032000, | |
| "step": 49500, | |
| "train_runtime": 32436.5241, | |
| "train_tokens_per_second": 100011.702 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.1866491436958313, | |
| "learning_rate": 0.00015428816566460843, | |
| "loss": 0.3038, | |
| "num_input_tokens_seen": 3250585600, | |
| "step": 49600, | |
| "train_runtime": 32508.3167, | |
| "train_tokens_per_second": 99992.43 | |
| }, | |
| { | |
| "epoch": 0.497, | |
| "grad_norm": 0.14084835350513458, | |
| "learning_rate": 0.00015381234053270669, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 3257139200, | |
| "step": 49700, | |
| "train_runtime": 32572.1194, | |
| "train_tokens_per_second": 99997.767 | |
| }, | |
| { | |
| "epoch": 0.498, | |
| "grad_norm": 0.16111333668231964, | |
| "learning_rate": 0.0001533364770105781, | |
| "loss": 0.3015, | |
| "num_input_tokens_seen": 3263692800, | |
| "step": 49800, | |
| "train_runtime": 32637.2501, | |
| "train_tokens_per_second": 99999.013 | |
| }, | |
| { | |
| "epoch": 0.499, | |
| "grad_norm": 0.14655210077762604, | |
| "learning_rate": 0.0001528605798901631, | |
| "loss": 0.3012, | |
| "num_input_tokens_seen": 3270246400, | |
| "step": 49900, | |
| "train_runtime": 32707.4201, | |
| "train_tokens_per_second": 99984.847 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1385914832353592, | |
| "learning_rate": 0.00015238465396374027, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 3276800000, | |
| "step": 50000, | |
| "train_runtime": 32772.7798, | |
| "train_tokens_per_second": 99985.415 | |
| }, | |
| { | |
| "epoch": 0.501, | |
| "grad_norm": 0.1433262825012207, | |
| "learning_rate": 0.00015190870402387858, | |
| "loss": 0.3006, | |
| "num_input_tokens_seen": 3283353600, | |
| "step": 50100, | |
| "train_runtime": 32837.3412, | |
| "train_tokens_per_second": 99988.412 | |
| }, | |
| { | |
| "epoch": 0.502, | |
| "grad_norm": 0.15529057383537292, | |
| "learning_rate": 0.00015143273486338857, | |
| "loss": 0.2995, | |
| "num_input_tokens_seen": 3289907200, | |
| "step": 50200, | |
| "train_runtime": 32902.1033, | |
| "train_tokens_per_second": 99990.787 | |
| }, | |
| { | |
| "epoch": 0.503, | |
| "grad_norm": 0.1301671862602234, | |
| "learning_rate": 0.00015095675127527438, | |
| "loss": 0.3055, | |
| "num_input_tokens_seen": 3296460800, | |
| "step": 50300, | |
| "train_runtime": 32967.0743, | |
| "train_tokens_per_second": 99992.519 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.1454419493675232, | |
| "learning_rate": 0.00015048075805268547, | |
| "loss": 0.3036, | |
| "num_input_tokens_seen": 3303014400, | |
| "step": 50400, | |
| "train_runtime": 33033.1243, | |
| "train_tokens_per_second": 99990.978 | |
| }, | |
| { | |
| "epoch": 0.505, | |
| "grad_norm": 0.1473357379436493, | |
| "learning_rate": 0.00015000475998886825, | |
| "loss": 0.3018, | |
| "num_input_tokens_seen": 3309568000, | |
| "step": 50500, | |
| "train_runtime": 33105.2406, | |
| "train_tokens_per_second": 99971.121 | |
| }, | |
| { | |
| "epoch": 0.506, | |
| "grad_norm": 0.13996386528015137, | |
| "learning_rate": 0.00014952876187711804, | |
| "loss": 0.2974, | |
| "num_input_tokens_seen": 3316121600, | |
| "step": 50600, | |
| "train_runtime": 33169.1198, | |
| "train_tokens_per_second": 99976.171 | |
| }, | |
| { | |
| "epoch": 0.507, | |
| "grad_norm": 0.14000660181045532, | |
| "learning_rate": 0.00014905276851073053, | |
| "loss": 0.2992, | |
| "num_input_tokens_seen": 3322675200, | |
| "step": 50700, | |
| "train_runtime": 33234.0005, | |
| "train_tokens_per_second": 99978.19 | |
| }, | |
| { | |
| "epoch": 0.508, | |
| "grad_norm": 0.14661286771297455, | |
| "learning_rate": 0.00014857678468295352, | |
| "loss": 0.3045, | |
| "num_input_tokens_seen": 3329228800, | |
| "step": 50800, | |
| "train_runtime": 33299.7758, | |
| "train_tokens_per_second": 99977.514 | |
| }, | |
| { | |
| "epoch": 0.509, | |
| "grad_norm": 0.15111635625362396, | |
| "learning_rate": 0.00014810081518693902, | |
| "loss": 0.3006, | |
| "num_input_tokens_seen": 3335782400, | |
| "step": 50900, | |
| "train_runtime": 33370.9097, | |
| "train_tokens_per_second": 99960.787 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.12965109944343567, | |
| "learning_rate": 0.0001476248648156945, | |
| "loss": 0.2986, | |
| "num_input_tokens_seen": 3342336000, | |
| "step": 51000, | |
| "train_runtime": 33435.7602, | |
| "train_tokens_per_second": 99962.913 | |
| }, | |
| { | |
| "epoch": 0.511, | |
| "grad_norm": 0.13791891932487488, | |
| "learning_rate": 0.00014714893836203485, | |
| "loss": 0.2994, | |
| "num_input_tokens_seen": 3348889600, | |
| "step": 51100, | |
| "train_runtime": 33500.2878, | |
| "train_tokens_per_second": 99965.995 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.1420348435640335, | |
| "learning_rate": 0.0001466730406185343, | |
| "loss": 0.2996, | |
| "num_input_tokens_seen": 3355443200, | |
| "step": 51200, | |
| "train_runtime": 33564.5521, | |
| "train_tokens_per_second": 99969.849 | |
| }, | |
| { | |
| "epoch": 0.513, | |
| "grad_norm": 0.1938745528459549, | |
| "learning_rate": 0.0001461971763774778, | |
| "loss": 0.3007, | |
| "num_input_tokens_seen": 3361996800, | |
| "step": 51300, | |
| "train_runtime": 33630.8004, | |
| "train_tokens_per_second": 99967.79 | |
| }, | |
| { | |
| "epoch": 0.514, | |
| "grad_norm": 0.1449531763792038, | |
| "learning_rate": 0.0001457213504308129, | |
| "loss": 0.3011, | |
| "num_input_tokens_seen": 3368550400, | |
| "step": 51400, | |
| "train_runtime": 33696.4447, | |
| "train_tokens_per_second": 99967.532 | |
| }, | |
| { | |
| "epoch": 0.515, | |
| "grad_norm": 0.16473324596881866, | |
| "learning_rate": 0.00014524556757010177, | |
| "loss": 0.3005, | |
| "num_input_tokens_seen": 3375104000, | |
| "step": 51500, | |
| "train_runtime": 33766.6492, | |
| "train_tokens_per_second": 99953.773 | |
| }, | |
| { | |
| "epoch": 0.516, | |
| "grad_norm": 0.1542610377073288, | |
| "learning_rate": 0.00014476983258647234, | |
| "loss": 0.3012, | |
| "num_input_tokens_seen": 3381657600, | |
| "step": 51600, | |
| "train_runtime": 33832.0917, | |
| "train_tokens_per_second": 99954.139 | |
| }, | |
| { | |
| "epoch": 0.517, | |
| "grad_norm": 0.1388223022222519, | |
| "learning_rate": 0.0001442941502705707, | |
| "loss": 0.3031, | |
| "num_input_tokens_seen": 3388211200, | |
| "step": 51700, | |
| "train_runtime": 33896.7212, | |
| "train_tokens_per_second": 99956.901 | |
| }, | |
| { | |
| "epoch": 0.518, | |
| "grad_norm": 0.19452647864818573, | |
| "learning_rate": 0.0001438185254125125, | |
| "loss": 0.3011, | |
| "num_input_tokens_seen": 3394764800, | |
| "step": 51800, | |
| "train_runtime": 33962.0557, | |
| "train_tokens_per_second": 99957.577 | |
| }, | |
| { | |
| "epoch": 0.519, | |
| "grad_norm": 0.16043786704540253, | |
| "learning_rate": 0.00014334296280183473, | |
| "loss": 0.2997, | |
| "num_input_tokens_seen": 3401318400, | |
| "step": 51900, | |
| "train_runtime": 34027.5551, | |
| "train_tokens_per_second": 99957.766 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.19769923388957977, | |
| "learning_rate": 0.00014286746722744768, | |
| "loss": 0.3007, | |
| "num_input_tokens_seen": 3407872000, | |
| "step": 52000, | |
| "train_runtime": 34098.2307, | |
| "train_tokens_per_second": 99942.781 | |
| }, | |
| { | |
| "epoch": 0.521, | |
| "grad_norm": 0.1524592489004135, | |
| "learning_rate": 0.00014239204347758647, | |
| "loss": 0.299, | |
| "num_input_tokens_seen": 3414425600, | |
| "step": 52100, | |
| "train_runtime": 34164.2522, | |
| "train_tokens_per_second": 99941.47 | |
| }, | |
| { | |
| "epoch": 0.522, | |
| "grad_norm": 0.14221727848052979, | |
| "learning_rate": 0.00014191669633976294, | |
| "loss": 0.3029, | |
| "num_input_tokens_seen": 3420979200, | |
| "step": 52200, | |
| "train_runtime": 34227.7165, | |
| "train_tokens_per_second": 99947.632 | |
| }, | |
| { | |
| "epoch": 0.523, | |
| "grad_norm": 0.15958262979984283, | |
| "learning_rate": 0.00014144143060071756, | |
| "loss": 0.3005, | |
| "num_input_tokens_seen": 3427532800, | |
| "step": 52300, | |
| "train_runtime": 34292.9446, | |
| "train_tokens_per_second": 99948.629 | |
| }, | |
| { | |
| "epoch": 0.524, | |
| "grad_norm": 0.1545192301273346, | |
| "learning_rate": 0.000140966251046371, | |
| "loss": 0.3024, | |
| "num_input_tokens_seen": 3434086400, | |
| "step": 52400, | |
| "train_runtime": 34357.5392, | |
| "train_tokens_per_second": 99951.466 | |
| }, | |
| { | |
| "epoch": 0.525, | |
| "grad_norm": 0.14636173844337463, | |
| "learning_rate": 0.0001404911624617761, | |
| "loss": 0.2967, | |
| "num_input_tokens_seen": 3440640000, | |
| "step": 52500, | |
| "train_runtime": 34423.9361, | |
| "train_tokens_per_second": 99949.058 | |
| }, | |
| { | |
| "epoch": 0.526, | |
| "grad_norm": 0.26764926314353943, | |
| "learning_rate": 0.00014001616963106966, | |
| "loss": 0.2982, | |
| "num_input_tokens_seen": 3447193600, | |
| "step": 52600, | |
| "train_runtime": 34489.4544, | |
| "train_tokens_per_second": 99949.206 | |
| }, | |
| { | |
| "epoch": 0.527, | |
| "grad_norm": 0.20636320114135742, | |
| "learning_rate": 0.00013954127733742416, | |
| "loss": 0.3011, | |
| "num_input_tokens_seen": 3453747200, | |
| "step": 52700, | |
| "train_runtime": 34559.9071, | |
| "train_tokens_per_second": 99935.083 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.1523534059524536, | |
| "learning_rate": 0.0001390664903629998, | |
| "loss": 0.3042, | |
| "num_input_tokens_seen": 3460300800, | |
| "step": 52800, | |
| "train_runtime": 34624.4507, | |
| "train_tokens_per_second": 99938.071 | |
| }, | |
| { | |
| "epoch": 0.529, | |
| "grad_norm": 0.15213948488235474, | |
| "learning_rate": 0.0001385918134888961, | |
| "loss": 0.3024, | |
| "num_input_tokens_seen": 3466854400, | |
| "step": 52900, | |
| "train_runtime": 34690.2273, | |
| "train_tokens_per_second": 99937.495 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.14115960896015167, | |
| "learning_rate": 0.00013811725149510387, | |
| "loss": 0.2999, | |
| "num_input_tokens_seen": 3473408000, | |
| "step": 53000, | |
| "train_runtime": 34756.5786, | |
| "train_tokens_per_second": 99935.268 | |
| }, | |
| { | |
| "epoch": 0.531, | |
| "grad_norm": 0.16747893393039703, | |
| "learning_rate": 0.0001376428091604572, | |
| "loss": 0.3011, | |
| "num_input_tokens_seen": 3479961600, | |
| "step": 53100, | |
| "train_runtime": 34823.0381, | |
| "train_tokens_per_second": 99932.74 | |
| }, | |
| { | |
| "epoch": 0.532, | |
| "grad_norm": 0.1266140639781952, | |
| "learning_rate": 0.00013716849126258512, | |
| "loss": 0.2985, | |
| "num_input_tokens_seen": 3486515200, | |
| "step": 53200, | |
| "train_runtime": 34892.7557, | |
| "train_tokens_per_second": 99920.89 | |
| }, | |
| { | |
| "epoch": 0.533, | |
| "grad_norm": 0.14753171801567078, | |
| "learning_rate": 0.00013669430257786354, | |
| "loss": 0.2996, | |
| "num_input_tokens_seen": 3493068800, | |
| "step": 53300, | |
| "train_runtime": 34957.0461, | |
| "train_tokens_per_second": 99924.599 | |
| }, | |
| { | |
| "epoch": 0.534, | |
| "grad_norm": 0.2617182731628418, | |
| "learning_rate": 0.00013622024788136728, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 3499622400, | |
| "step": 53400, | |
| "train_runtime": 35022.8837, | |
| "train_tokens_per_second": 99923.879 | |
| }, | |
| { | |
| "epoch": 0.535, | |
| "grad_norm": 0.17150761187076569, | |
| "learning_rate": 0.00013574633194682185, | |
| "loss": 0.3027, | |
| "num_input_tokens_seen": 3506176000, | |
| "step": 53500, | |
| "train_runtime": 35088.2396, | |
| "train_tokens_per_second": 99924.534 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.16566570103168488, | |
| "learning_rate": 0.0001352725595465555, | |
| "loss": 0.2999, | |
| "num_input_tokens_seen": 3512729600, | |
| "step": 53600, | |
| "train_runtime": 35153.6189, | |
| "train_tokens_per_second": 99925.12 | |
| }, | |
| { | |
| "epoch": 0.537, | |
| "grad_norm": 0.13577675819396973, | |
| "learning_rate": 0.000134798935451451, | |
| "loss": 0.2969, | |
| "num_input_tokens_seen": 3519283200, | |
| "step": 53700, | |
| "train_runtime": 35225.0068, | |
| "train_tokens_per_second": 99908.659 | |
| }, | |
| { | |
| "epoch": 0.538, | |
| "grad_norm": 0.20843537151813507, | |
| "learning_rate": 0.00013432546443089768, | |
| "loss": 0.2967, | |
| "num_input_tokens_seen": 3525836800, | |
| "step": 53800, | |
| "train_runtime": 35288.0858, | |
| "train_tokens_per_second": 99915.785 | |
| }, | |
| { | |
| "epoch": 0.539, | |
| "grad_norm": 0.15664201974868774, | |
| "learning_rate": 0.0001338521512527436, | |
| "loss": 0.3007, | |
| "num_input_tokens_seen": 3532390400, | |
| "step": 53900, | |
| "train_runtime": 35353.7477, | |
| "train_tokens_per_second": 99915.586 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.14205297827720642, | |
| "learning_rate": 0.00013337900068324712, | |
| "loss": 0.3001, | |
| "num_input_tokens_seen": 3538944000, | |
| "step": 54000, | |
| "train_runtime": 35423.5891, | |
| "train_tokens_per_second": 99903.598 | |
| }, | |
| { | |
| "epoch": 0.541, | |
| "grad_norm": 0.13229498267173767, | |
| "learning_rate": 0.00013290601748702918, | |
| "loss": 0.2931, | |
| "num_input_tokens_seen": 3545497600, | |
| "step": 54100, | |
| "train_runtime": 35489.6646, | |
| "train_tokens_per_second": 99902.257 | |
| }, | |
| { | |
| "epoch": 0.542, | |
| "grad_norm": 0.1380510926246643, | |
| "learning_rate": 0.00013243320642702543, | |
| "loss": 0.3116, | |
| "num_input_tokens_seen": 3552051200, | |
| "step": 54200, | |
| "train_runtime": 35554.9224, | |
| "train_tokens_per_second": 99903.219 | |
| }, | |
| { | |
| "epoch": 0.543, | |
| "grad_norm": 0.16735288500785828, | |
| "learning_rate": 0.0001319605722644379, | |
| "loss": 0.2998, | |
| "num_input_tokens_seen": 3558604800, | |
| "step": 54300, | |
| "train_runtime": 35619.4728, | |
| "train_tokens_per_second": 99906.161 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.17502574622631073, | |
| "learning_rate": 0.0001314881197586874, | |
| "loss": 0.3004, | |
| "num_input_tokens_seen": 3565158400, | |
| "step": 54400, | |
| "train_runtime": 35685.8161, | |
| "train_tokens_per_second": 99904.074 | |
| }, | |
| { | |
| "epoch": 0.545, | |
| "grad_norm": 0.14805424213409424, | |
| "learning_rate": 0.0001310158536673654, | |
| "loss": 0.2983, | |
| "num_input_tokens_seen": 3571712000, | |
| "step": 54500, | |
| "train_runtime": 35750.1467, | |
| "train_tokens_per_second": 99907.618 | |
| }, | |
| { | |
| "epoch": 0.546, | |
| "grad_norm": 0.1533045917749405, | |
| "learning_rate": 0.0001305437787461862, | |
| "loss": 0.2976, | |
| "num_input_tokens_seen": 3578265600, | |
| "step": 54600, | |
| "train_runtime": 35816.4973, | |
| "train_tokens_per_second": 99905.515 | |
| }, | |
| { | |
| "epoch": 0.547, | |
| "grad_norm": 0.18475773930549622, | |
| "learning_rate": 0.00013007189974893903, | |
| "loss": 0.2951, | |
| "num_input_tokens_seen": 3584819200, | |
| "step": 54700, | |
| "train_runtime": 35886.6478, | |
| "train_tokens_per_second": 99892.841 | |
| }, | |
| { | |
| "epoch": 0.548, | |
| "grad_norm": 0.13913068175315857, | |
| "learning_rate": 0.00012960022142744016, | |
| "loss": 0.297, | |
| "num_input_tokens_seen": 3591372800, | |
| "step": 54800, | |
| "train_runtime": 35950.7798, | |
| "train_tokens_per_second": 99896.937 | |
| }, | |
| { | |
| "epoch": 0.549, | |
| "grad_norm": 0.15448203682899475, | |
| "learning_rate": 0.00012912874853148506, | |
| "loss": 0.303, | |
| "num_input_tokens_seen": 3597926400, | |
| "step": 54900, | |
| "train_runtime": 36015.8762, | |
| "train_tokens_per_second": 99898.344 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 0.15416036546230316, | |
| "learning_rate": 0.00012865748580880053, | |
| "loss": 0.2979, | |
| "num_input_tokens_seen": 3604480000, | |
| "step": 55000, | |
| "train_runtime": 36080.201, | |
| "train_tokens_per_second": 99901.883 | |
| }, | |
| { | |
| "epoch": 0.551, | |
| "grad_norm": 0.14506150782108307, | |
| "learning_rate": 0.0001281864380049969, | |
| "loss": 0.2983, | |
| "num_input_tokens_seen": 3611033600, | |
| "step": 55100, | |
| "train_runtime": 36150.2521, | |
| "train_tokens_per_second": 99889.583 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.17357710003852844, | |
| "learning_rate": 0.00012771560986352042, | |
| "loss": 0.2986, | |
| "num_input_tokens_seen": 3617587200, | |
| "step": 55200, | |
| "train_runtime": 36215.2659, | |
| "train_tokens_per_second": 99891.223 | |
| }, | |
| { | |
| "epoch": 0.553, | |
| "grad_norm": 0.16711916029453278, | |
| "learning_rate": 0.0001272450061256052, | |
| "loss": 0.2979, | |
| "num_input_tokens_seen": 3624140800, | |
| "step": 55300, | |
| "train_runtime": 36279.3222, | |
| "train_tokens_per_second": 99895.494 | |
| }, | |
| { | |
| "epoch": 0.554, | |
| "grad_norm": 0.1502256691455841, | |
| "learning_rate": 0.00012677463153022565, | |
| "loss": 0.3007, | |
| "num_input_tokens_seen": 3630694400, | |
| "step": 55400, | |
| "train_runtime": 36345.9552, | |
| "train_tokens_per_second": 99892.667 | |
| }, | |
| { | |
| "epoch": 0.555, | |
| "grad_norm": 0.15480037033557892, | |
| "learning_rate": 0.0001263044908140488, | |
| "loss": 0.2975, | |
| "num_input_tokens_seen": 3637248000, | |
| "step": 55500, | |
| "train_runtime": 36415.9598, | |
| "train_tokens_per_second": 99880.602 | |
| }, | |
| { | |
| "epoch": 0.556, | |
| "grad_norm": 0.15693609416484833, | |
| "learning_rate": 0.00012583458871138632, | |
| "loss": 0.2978, | |
| "num_input_tokens_seen": 3643801600, | |
| "step": 55600, | |
| "train_runtime": 36480.5541, | |
| "train_tokens_per_second": 99883.395 | |
| }, | |
| { | |
| "epoch": 0.557, | |
| "grad_norm": 0.147445410490036, | |
| "learning_rate": 0.00012536492995414723, | |
| "loss": 0.2991, | |
| "num_input_tokens_seen": 3650355200, | |
| "step": 55700, | |
| "train_runtime": 36545.2319, | |
| "train_tokens_per_second": 99885.95 | |
| }, | |
| { | |
| "epoch": 0.558, | |
| "grad_norm": 0.13640980422496796, | |
| "learning_rate": 0.00012489551927179007, | |
| "loss": 0.2987, | |
| "num_input_tokens_seen": 3656908800, | |
| "step": 55800, | |
| "train_runtime": 36611.0993, | |
| "train_tokens_per_second": 99885.25 | |
| }, | |
| { | |
| "epoch": 0.559, | |
| "grad_norm": 0.14373840391635895, | |
| "learning_rate": 0.00012442636139127508, | |
| "loss": 0.3, | |
| "num_input_tokens_seen": 3663462400, | |
| "step": 55900, | |
| "train_runtime": 36676.4606, | |
| "train_tokens_per_second": 99885.931 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.14679211378097534, | |
| "learning_rate": 0.00012395746103701695, | |
| "loss": 0.2996, | |
| "num_input_tokens_seen": 3670016000, | |
| "step": 56000, | |
| "train_runtime": 36748.2938, | |
| "train_tokens_per_second": 99869.018 | |
| }, | |
| { | |
| "epoch": 0.561, | |
| "grad_norm": 0.15536077320575714, | |
| "learning_rate": 0.00012348882293083708, | |
| "loss": 0.2953, | |
| "num_input_tokens_seen": 3676569600, | |
| "step": 56100, | |
| "train_runtime": 36813.4246, | |
| "train_tokens_per_second": 99870.35 | |
| }, | |
| { | |
| "epoch": 0.562, | |
| "grad_norm": 0.16678054630756378, | |
| "learning_rate": 0.00012302045179191594, | |
| "loss": 0.2969, | |
| "num_input_tokens_seen": 3683123200, | |
| "step": 56200, | |
| "train_runtime": 36877.8431, | |
| "train_tokens_per_second": 99873.607 | |
| }, | |
| { | |
| "epoch": 0.563, | |
| "grad_norm": 0.15781697630882263, | |
| "learning_rate": 0.00012255235233674572, | |
| "loss": 0.2972, | |
| "num_input_tokens_seen": 3689676800, | |
| "step": 56300, | |
| "train_runtime": 36943.2178, | |
| "train_tokens_per_second": 99874.267 | |
| }, | |
| { | |
| "epoch": 0.564, | |
| "grad_norm": 0.13541863858699799, | |
| "learning_rate": 0.00012208452927908278, | |
| "loss": 0.302, | |
| "num_input_tokens_seen": 3696230400, | |
| "step": 56400, | |
| "train_runtime": 37008.8029, | |
| "train_tokens_per_second": 99874.357 | |
| }, | |
| { | |
| "epoch": 0.565, | |
| "grad_norm": 0.1400034874677658, | |
| "learning_rate": 0.00012161698732990003, | |
| "loss": 0.3, | |
| "num_input_tokens_seen": 3702784000, | |
| "step": 56500, | |
| "train_runtime": 37078.9889, | |
| "train_tokens_per_second": 99862.054 | |
| }, | |
| { | |
| "epoch": 0.566, | |
| "grad_norm": 0.1511828452348709, | |
| "learning_rate": 0.00012114973119733987, | |
| "loss": 0.3017, | |
| "num_input_tokens_seen": 3709337600, | |
| "step": 56600, | |
| "train_runtime": 37144.0507, | |
| "train_tokens_per_second": 99863.573 | |
| }, | |
| { | |
| "epoch": 0.567, | |
| "grad_norm": 0.15576902031898499, | |
| "learning_rate": 0.00012068276558666616, | |
| "loss": 0.2981, | |
| "num_input_tokens_seen": 3715891200, | |
| "step": 56700, | |
| "train_runtime": 37206.97, | |
| "train_tokens_per_second": 99870.836 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.24084219336509705, | |
| "learning_rate": 0.00012021609520021752, | |
| "loss": 0.3025, | |
| "num_input_tokens_seen": 3722444800, | |
| "step": 56800, | |
| "train_runtime": 37278.1305, | |
| "train_tokens_per_second": 99855.995 | |
| }, | |
| { | |
| "epoch": 0.569, | |
| "grad_norm": 0.16832643747329712, | |
| "learning_rate": 0.00011974972473735957, | |
| "loss": 0.301, | |
| "num_input_tokens_seen": 3728998400, | |
| "step": 56900, | |
| "train_runtime": 37343.2452, | |
| "train_tokens_per_second": 99857.374 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.18326181173324585, | |
| "learning_rate": 0.00011928365889443764, | |
| "loss": 0.2987, | |
| "num_input_tokens_seen": 3735552000, | |
| "step": 57000, | |
| "train_runtime": 37407.594, | |
| "train_tokens_per_second": 99860.793 | |
| }, | |
| { | |
| "epoch": 0.571, | |
| "grad_norm": 0.15526984632015228, | |
| "learning_rate": 0.00011881790236472966, | |
| "loss": 0.2991, | |
| "num_input_tokens_seen": 3742105600, | |
| "step": 57100, | |
| "train_runtime": 37474.3952, | |
| "train_tokens_per_second": 99857.665 | |
| }, | |
| { | |
| "epoch": 0.572, | |
| "grad_norm": 0.18177416920661926, | |
| "learning_rate": 0.00011835245983839869, | |
| "loss": 0.3002, | |
| "num_input_tokens_seen": 3748659200, | |
| "step": 57200, | |
| "train_runtime": 37538.8922, | |
| "train_tokens_per_second": 99860.677 | |
| }, | |
| { | |
| "epoch": 0.573, | |
| "grad_norm": 0.1915498822927475, | |
| "learning_rate": 0.00011788733600244575, | |
| "loss": 0.2986, | |
| "num_input_tokens_seen": 3755212800, | |
| "step": 57300, | |
| "train_runtime": 37605.3867, | |
| "train_tokens_per_second": 99858.375 | |
| }, | |
| { | |
| "epoch": 0.574, | |
| "grad_norm": 0.15175184607505798, | |
| "learning_rate": 0.00011742253554066278, | |
| "loss": 0.3015, | |
| "num_input_tokens_seen": 3761766400, | |
| "step": 57400, | |
| "train_runtime": 37678.0051, | |
| "train_tokens_per_second": 99839.851 | |
| }, | |
| { | |
| "epoch": 0.575, | |
| "grad_norm": 0.16369026899337769, | |
| "learning_rate": 0.00011695806313358523, | |
| "loss": 0.3003, | |
| "num_input_tokens_seen": 3768320000, | |
| "step": 57500, | |
| "train_runtime": 37742.0245, | |
| "train_tokens_per_second": 99844.141 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.16646848618984222, | |
| "learning_rate": 0.00011649392345844506, | |
| "loss": 0.2972, | |
| "num_input_tokens_seen": 3774873600, | |
| "step": 57600, | |
| "train_runtime": 37807.5481, | |
| "train_tokens_per_second": 99844.444 | |
| }, | |
| { | |
| "epoch": 0.577, | |
| "grad_norm": 0.14035099744796753, | |
| "learning_rate": 0.00011603012118912372, | |
| "loss": 0.2985, | |
| "num_input_tokens_seen": 3781427200, | |
| "step": 57700, | |
| "train_runtime": 37871.8826, | |
| "train_tokens_per_second": 99847.88 | |
| }, | |
| { | |
| "epoch": 0.578, | |
| "grad_norm": 0.14899714291095734, | |
| "learning_rate": 0.00011556666099610485, | |
| "loss": 0.3008, | |
| "num_input_tokens_seen": 3787980800, | |
| "step": 57800, | |
| "train_runtime": 37943.2827, | |
| "train_tokens_per_second": 99832.712 | |
| }, | |
| { | |
| "epoch": 0.579, | |
| "grad_norm": 0.15600667893886566, | |
| "learning_rate": 0.00011510354754642745, | |
| "loss": 0.303, | |
| "num_input_tokens_seen": 3794534400, | |
| "step": 57900, | |
| "train_runtime": 38008.9332, | |
| "train_tokens_per_second": 99832.699 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.1631072610616684, | |
| "learning_rate": 0.00011464078550363887, | |
| "loss": 0.2978, | |
| "num_input_tokens_seen": 3801088000, | |
| "step": 58000, | |
| "train_runtime": 38073.7575, | |
| "train_tokens_per_second": 99834.853 | |
| }, | |
| { | |
| "epoch": 0.581, | |
| "grad_norm": 0.1560899019241333, | |
| "learning_rate": 0.0001141783795277477, | |
| "loss": 0.299, | |
| "num_input_tokens_seen": 3807641600, | |
| "step": 58100, | |
| "train_runtime": 38139.694, | |
| "train_tokens_per_second": 99834.089 | |
| }, | |
| { | |
| "epoch": 0.582, | |
| "grad_norm": 0.1506076604127884, | |
| "learning_rate": 0.00011371633427517696, | |
| "loss": 0.2985, | |
| "num_input_tokens_seen": 3814195200, | |
| "step": 58200, | |
| "train_runtime": 38209.9556, | |
| "train_tokens_per_second": 99822.026 | |
| }, | |
| { | |
| "epoch": 0.583, | |
| "grad_norm": 0.16049940884113312, | |
| "learning_rate": 0.00011325465439871731, | |
| "loss": 0.2998, | |
| "num_input_tokens_seen": 3820748800, | |
| "step": 58300, | |
| "train_runtime": 38274.5015, | |
| "train_tokens_per_second": 99824.913 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.15604519844055176, | |
| "learning_rate": 0.00011279334454747989, | |
| "loss": 0.2969, | |
| "num_input_tokens_seen": 3827302400, | |
| "step": 58400, | |
| "train_runtime": 38341.4547, | |
| "train_tokens_per_second": 99821.523 | |
| }, | |
| { | |
| "epoch": 0.585, | |
| "grad_norm": 0.15963351726531982, | |
| "learning_rate": 0.00011233240936684981, | |
| "loss": 0.2988, | |
| "num_input_tokens_seen": 3833856000, | |
| "step": 58500, | |
| "train_runtime": 38406.0222, | |
| "train_tokens_per_second": 99824.345 | |
| }, | |
| { | |
| "epoch": 0.586, | |
| "grad_norm": 0.15443411469459534, | |
| "learning_rate": 0.00011187185349843916, | |
| "loss": 0.298, | |
| "num_input_tokens_seen": 3840409600, | |
| "step": 58600, | |
| "train_runtime": 38472.0656, | |
| "train_tokens_per_second": 99823.327 | |
| }, | |
| { | |
| "epoch": 0.587, | |
| "grad_norm": 0.15459220111370087, | |
| "learning_rate": 0.00011141168158004053, | |
| "loss": 0.3004, | |
| "num_input_tokens_seen": 3846963200, | |
| "step": 58700, | |
| "train_runtime": 38542.0532, | |
| "train_tokens_per_second": 99812.098 | |
| }, | |
| { | |
| "epoch": 0.588, | |
| "grad_norm": 0.16199928522109985, | |
| "learning_rate": 0.00011095189824557998, | |
| "loss": 0.2985, | |
| "num_input_tokens_seen": 3853516800, | |
| "step": 58800, | |
| "train_runtime": 38609.4411, | |
| "train_tokens_per_second": 99807.63 | |
| }, | |
| { | |
| "epoch": 0.589, | |
| "grad_norm": 0.2209610939025879, | |
| "learning_rate": 0.00011049250812507054, | |
| "loss": 0.3005, | |
| "num_input_tokens_seen": 3860070400, | |
| "step": 58900, | |
| "train_runtime": 38675.4402, | |
| "train_tokens_per_second": 99806.761 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.22285670042037964, | |
| "learning_rate": 0.00011003351584456571, | |
| "loss": 0.298, | |
| "num_input_tokens_seen": 3866624000, | |
| "step": 59000, | |
| "train_runtime": 38740.3065, | |
| "train_tokens_per_second": 99808.813 | |
| }, | |
| { | |
| "epoch": 0.591, | |
| "grad_norm": 0.2148812711238861, | |
| "learning_rate": 0.0001095749260261126, | |
| "loss": 0.2966, | |
| "num_input_tokens_seen": 3873177600, | |
| "step": 59100, | |
| "train_runtime": 38806.3344, | |
| "train_tokens_per_second": 99807.871 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.21284043788909912, | |
| "learning_rate": 0.00010911674328770559, | |
| "loss": 0.3009, | |
| "num_input_tokens_seen": 3879731200, | |
| "step": 59200, | |
| "train_runtime": 38871.8466, | |
| "train_tokens_per_second": 99808.256 | |
| }, | |
| { | |
| "epoch": 0.593, | |
| "grad_norm": 0.1655593365430832, | |
| "learning_rate": 0.00010865897224323979, | |
| "loss": 0.2981, | |
| "num_input_tokens_seen": 3886284800, | |
| "step": 59300, | |
| "train_runtime": 38937.7196, | |
| "train_tokens_per_second": 99807.714 | |
| }, | |
| { | |
| "epoch": 0.594, | |
| "grad_norm": 0.17153207957744598, | |
| "learning_rate": 0.00010820161750246453, | |
| "loss": 0.3042, | |
| "num_input_tokens_seen": 3892838400, | |
| "step": 59400, | |
| "train_runtime": 39004.8582, | |
| "train_tokens_per_second": 99803.937 | |
| }, | |
| { | |
| "epoch": 0.595, | |
| "grad_norm": 0.15362666547298431, | |
| "learning_rate": 0.00010774468367093696, | |
| "loss": 0.3001, | |
| "num_input_tokens_seen": 3899392000, | |
| "step": 59500, | |
| "train_runtime": 39068.7475, | |
| "train_tokens_per_second": 99808.472 | |
| }, | |
| { | |
| "epoch": 0.596, | |
| "grad_norm": 0.15481388568878174, | |
| "learning_rate": 0.00010728817534997573, | |
| "loss": 0.2973, | |
| "num_input_tokens_seen": 3905945600, | |
| "step": 59600, | |
| "train_runtime": 39137.2916, | |
| "train_tokens_per_second": 99801.122 | |
| }, | |
| { | |
| "epoch": 0.597, | |
| "grad_norm": 0.1292748749256134, | |
| "learning_rate": 0.00010683209713661453, | |
| "loss": 0.2993, | |
| "num_input_tokens_seen": 3912499200, | |
| "step": 59700, | |
| "train_runtime": 39198.2818, | |
| "train_tokens_per_second": 99813.028 | |
| }, | |
| { | |
| "epoch": 0.598, | |
| "grad_norm": 0.14853951334953308, | |
| "learning_rate": 0.00010637645362355589, | |
| "loss": 0.2967, | |
| "num_input_tokens_seen": 3919052800, | |
| "step": 59800, | |
| "train_runtime": 39262.6162, | |
| "train_tokens_per_second": 99816.395 | |
| }, | |
| { | |
| "epoch": 0.599, | |
| "grad_norm": 0.13745439052581787, | |
| "learning_rate": 0.00010592124939912497, | |
| "loss": 0.3023, | |
| "num_input_tokens_seen": 3925606400, | |
| "step": 59900, | |
| "train_runtime": 39328.4755, | |
| "train_tokens_per_second": 99815.88 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.14352121949195862, | |
| "learning_rate": 0.00010546648904722326, | |
| "loss": 0.2973, | |
| "num_input_tokens_seen": 3932160000, | |
| "step": 60000, | |
| "train_runtime": 39393.6967, | |
| "train_tokens_per_second": 99816.984 | |
| }, | |
| { | |
| "epoch": 0.601, | |
| "grad_norm": 0.16375063359737396, | |
| "learning_rate": 0.0001050121771472824, | |
| "loss": 0.2934, | |
| "num_input_tokens_seen": 3938713600, | |
| "step": 60100, | |
| "train_runtime": 39465.7876, | |
| "train_tokens_per_second": 99800.709 | |
| }, | |
| { | |
| "epoch": 0.602, | |
| "grad_norm": 0.144679456949234, | |
| "learning_rate": 0.0001045583182742182, | |
| "loss": 0.2983, | |
| "num_input_tokens_seen": 3945267200, | |
| "step": 60200, | |
| "train_runtime": 39531.166, | |
| "train_tokens_per_second": 99801.438 | |
| }, | |
| { | |
| "epoch": 0.603, | |
| "grad_norm": 0.33903974294662476, | |
| "learning_rate": 0.00010410491699838448, | |
| "loss": 0.2981, | |
| "num_input_tokens_seen": 3951820800, | |
| "step": 60300, | |
| "train_runtime": 39596.8662, | |
| "train_tokens_per_second": 99801.352 | |
| }, | |
| { | |
| "epoch": 0.604, | |
| "grad_norm": 0.1823410987854004, | |
| "learning_rate": 0.00010365197788552707, | |
| "loss": 0.2986, | |
| "num_input_tokens_seen": 3958374400, | |
| "step": 60400, | |
| "train_runtime": 39664.1206, | |
| "train_tokens_per_second": 99797.357 | |
| }, | |
| { | |
| "epoch": 0.605, | |
| "grad_norm": 0.18758277595043182, | |
| "learning_rate": 0.00010319950549673778, | |
| "loss": 0.2967, | |
| "num_input_tokens_seen": 3964928000, | |
| "step": 60500, | |
| "train_runtime": 39728.4695, | |
| "train_tokens_per_second": 99800.673 | |
| }, | |
| { | |
| "epoch": 0.606, | |
| "grad_norm": 0.173909991979599, | |
| "learning_rate": 0.00010274750438840855, | |
| "loss": 0.2981, | |
| "num_input_tokens_seen": 3971481600, | |
| "step": 60600, | |
| "train_runtime": 39794.5098, | |
| "train_tokens_per_second": 99799.737 | |
| }, | |
| { | |
| "epoch": 0.607, | |
| "grad_norm": 0.14504651725292206, | |
| "learning_rate": 0.00010229597911218554, | |
| "loss": 0.2967, | |
| "num_input_tokens_seen": 3978035200, | |
| "step": 60700, | |
| "train_runtime": 39864.8024, | |
| "train_tokens_per_second": 99788.158 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.1418026238679886, | |
| "learning_rate": 0.00010184493421492324, | |
| "loss": 0.2976, | |
| "num_input_tokens_seen": 3984588800, | |
| "step": 60800, | |
| "train_runtime": 39931.2064, | |
| "train_tokens_per_second": 99786.337 | |
| }, | |
| { | |
| "epoch": 0.609, | |
| "grad_norm": 0.18415790796279907, | |
| "learning_rate": 0.0001013943742386388, | |
| "loss": 0.2997, | |
| "num_input_tokens_seen": 3991142400, | |
| "step": 60900, | |
| "train_runtime": 39996.7127, | |
| "train_tokens_per_second": 99786.761 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.14107364416122437, | |
| "learning_rate": 0.00010094430372046616, | |
| "loss": 0.2979, | |
| "num_input_tokens_seen": 3997696000, | |
| "step": 61000, | |
| "train_runtime": 40068.6157, | |
| "train_tokens_per_second": 99771.253 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 3997696000, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.5963643723776e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |