| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3, | |
| "eval_steps": 500, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001, | |
| "grad_norm": 20.30853843688965, | |
| "learning_rate": 2.97e-05, | |
| "loss": 6.3768, | |
| "num_input_tokens_seen": 6553600, | |
| "step": 100, | |
| "train_runtime": 67.7249, | |
| "train_tokens_per_second": 96767.992 | |
| }, | |
| { | |
| "epoch": 0.002, | |
| "grad_norm": 12.960590362548828, | |
| "learning_rate": 5.97e-05, | |
| "loss": 1.5751, | |
| "num_input_tokens_seen": 13107200, | |
| "step": 200, | |
| "train_runtime": 121.8001, | |
| "train_tokens_per_second": 107612.389 | |
| }, | |
| { | |
| "epoch": 0.003, | |
| "grad_norm": 4.234757900238037, | |
| "learning_rate": 8.969999999999998e-05, | |
| "loss": 1.3419, | |
| "num_input_tokens_seen": 19660800, | |
| "step": 300, | |
| "train_runtime": 175.7236, | |
| "train_tokens_per_second": 111884.791 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 2.8189597129821777, | |
| "learning_rate": 0.0001197, | |
| "loss": 1.1467, | |
| "num_input_tokens_seen": 26214400, | |
| "step": 400, | |
| "train_runtime": 229.814, | |
| "train_tokens_per_second": 114067.895 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 4.593051910400391, | |
| "learning_rate": 0.00014969999999999998, | |
| "loss": 1.02, | |
| "num_input_tokens_seen": 32768000, | |
| "step": 500, | |
| "train_runtime": 288.5437, | |
| "train_tokens_per_second": 113563.389 | |
| }, | |
| { | |
| "epoch": 0.006, | |
| "grad_norm": 1.745302677154541, | |
| "learning_rate": 0.00017969999999999998, | |
| "loss": 0.9381, | |
| "num_input_tokens_seen": 39321600, | |
| "step": 600, | |
| "train_runtime": 343.6994, | |
| "train_tokens_per_second": 114406.961 | |
| }, | |
| { | |
| "epoch": 0.007, | |
| "grad_norm": 2.180215358734131, | |
| "learning_rate": 0.00020969999999999997, | |
| "loss": 0.8912, | |
| "num_input_tokens_seen": 45875200, | |
| "step": 700, | |
| "train_runtime": 398.5625, | |
| "train_tokens_per_second": 115101.641 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 1.5782365798950195, | |
| "learning_rate": 0.0002397, | |
| "loss": 0.8525, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 800, | |
| "train_runtime": 453.9393, | |
| "train_tokens_per_second": 115497.369 | |
| }, | |
| { | |
| "epoch": 0.009, | |
| "grad_norm": 1.306846022605896, | |
| "learning_rate": 0.0002697, | |
| "loss": 0.8411, | |
| "num_input_tokens_seen": 58982400, | |
| "step": 900, | |
| "train_runtime": 508.8336, | |
| "train_tokens_per_second": 115916.867 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.9485424160957336, | |
| "learning_rate": 0.00029969999999999997, | |
| "loss": 0.8207, | |
| "num_input_tokens_seen": 65536000, | |
| "step": 1000, | |
| "train_runtime": 569.5888, | |
| "train_tokens_per_second": 115058.451 | |
| }, | |
| { | |
| "epoch": 0.011, | |
| "grad_norm": 0.8611180186271667, | |
| "learning_rate": 0.00029999925978027874, | |
| "loss": 0.8158, | |
| "num_input_tokens_seen": 72089600, | |
| "step": 1100, | |
| "train_runtime": 623.5912, | |
| "train_tokens_per_second": 115603.946 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.8997637033462524, | |
| "learning_rate": 0.0002999970091452017, | |
| "loss": 0.7948, | |
| "num_input_tokens_seen": 78643200, | |
| "step": 1200, | |
| "train_runtime": 679.5203, | |
| "train_tokens_per_second": 115733.415 | |
| }, | |
| { | |
| "epoch": 0.013, | |
| "grad_norm": 0.6279132962226868, | |
| "learning_rate": 0.00029999324804190795, | |
| "loss": 0.7852, | |
| "num_input_tokens_seen": 85196800, | |
| "step": 1300, | |
| "train_runtime": 739.7161, | |
| "train_tokens_per_second": 115174.993 | |
| }, | |
| { | |
| "epoch": 0.014, | |
| "grad_norm": 0.5959413051605225, | |
| "learning_rate": 0.0002999879765082716, | |
| "loss": 0.7736, | |
| "num_input_tokens_seen": 91750400, | |
| "step": 1400, | |
| "train_runtime": 794.2065, | |
| "train_tokens_per_second": 115524.612 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 0.6557334065437317, | |
| "learning_rate": 0.000299981194597377, | |
| "loss": 0.7631, | |
| "num_input_tokens_seen": 98304000, | |
| "step": 1500, | |
| "train_runtime": 847.9465, | |
| "train_tokens_per_second": 115931.844 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.7466850876808167, | |
| "learning_rate": 0.0002999729023775179, | |
| "loss": 0.7564, | |
| "num_input_tokens_seen": 104857600, | |
| "step": 1600, | |
| "train_runtime": 907.419, | |
| "train_tokens_per_second": 115555.874 | |
| }, | |
| { | |
| "epoch": 0.017, | |
| "grad_norm": 0.6613496541976929, | |
| "learning_rate": 0.0002999630999321969, | |
| "loss": 0.7664, | |
| "num_input_tokens_seen": 111411200, | |
| "step": 1700, | |
| "train_runtime": 957.6918, | |
| "train_tokens_per_second": 116333.047 | |
| }, | |
| { | |
| "epoch": 0.018, | |
| "grad_norm": 0.49886658787727356, | |
| "learning_rate": 0.00029995178736012443, | |
| "loss": 0.7577, | |
| "num_input_tokens_seen": 117964800, | |
| "step": 1800, | |
| "train_runtime": 1017.5123, | |
| "train_tokens_per_second": 115934.516 | |
| }, | |
| { | |
| "epoch": 0.019, | |
| "grad_norm": 0.5621941089630127, | |
| "learning_rate": 0.0002999389647752181, | |
| "loss": 0.743, | |
| "num_input_tokens_seen": 124518400, | |
| "step": 1900, | |
| "train_runtime": 1070.9552, | |
| "train_tokens_per_second": 116268.538 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.46181556582450867, | |
| "learning_rate": 0.00029992463230660104, | |
| "loss": 0.7429, | |
| "num_input_tokens_seen": 131072000, | |
| "step": 2000, | |
| "train_runtime": 1130.6338, | |
| "train_tokens_per_second": 115927.894 | |
| }, | |
| { | |
| "epoch": 0.021, | |
| "grad_norm": 0.4571639895439148, | |
| "learning_rate": 0.00029990879009860117, | |
| "loss": 0.7349, | |
| "num_input_tokens_seen": 137625600, | |
| "step": 2100, | |
| "train_runtime": 1185.0559, | |
| "train_tokens_per_second": 116134.269 | |
| }, | |
| { | |
| "epoch": 0.022, | |
| "grad_norm": 0.5650346875190735, | |
| "learning_rate": 0.0002998914383107493, | |
| "loss": 0.7373, | |
| "num_input_tokens_seen": 144179200, | |
| "step": 2200, | |
| "train_runtime": 1239.4558, | |
| "train_tokens_per_second": 116324.603 | |
| }, | |
| { | |
| "epoch": 0.023, | |
| "grad_norm": 0.5640336871147156, | |
| "learning_rate": 0.0002998725771177778, | |
| "loss": 0.7383, | |
| "num_input_tokens_seen": 150732800, | |
| "step": 2300, | |
| "train_runtime": 1294.8284, | |
| "train_tokens_per_second": 116411.407 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.4288141429424286, | |
| "learning_rate": 0.00029985220670961847, | |
| "loss": 0.7256, | |
| "num_input_tokens_seen": 157286400, | |
| "step": 2400, | |
| "train_runtime": 1353.9338, | |
| "train_tokens_per_second": 116169.935 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 0.7532113194465637, | |
| "learning_rate": 0.0002998303272914014, | |
| "loss": 0.7301, | |
| "num_input_tokens_seen": 163840000, | |
| "step": 2500, | |
| "train_runtime": 1408.3918, | |
| "train_tokens_per_second": 116331.266 | |
| }, | |
| { | |
| "epoch": 0.026, | |
| "grad_norm": 0.42602330446243286, | |
| "learning_rate": 0.00029980693908345185, | |
| "loss": 0.7238, | |
| "num_input_tokens_seen": 170393600, | |
| "step": 2600, | |
| "train_runtime": 1461.9698, | |
| "train_tokens_per_second": 116550.699 | |
| }, | |
| { | |
| "epoch": 0.027, | |
| "grad_norm": 0.655644953250885, | |
| "learning_rate": 0.00029978204232128895, | |
| "loss": 0.7213, | |
| "num_input_tokens_seen": 176947200, | |
| "step": 2700, | |
| "train_runtime": 1523.0525, | |
| "train_tokens_per_second": 116179.321 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 0.3962925970554352, | |
| "learning_rate": 0.0002997556372556227, | |
| "loss": 0.7238, | |
| "num_input_tokens_seen": 183500800, | |
| "step": 2800, | |
| "train_runtime": 1577.6914, | |
| "train_tokens_per_second": 116309.692 | |
| }, | |
| { | |
| "epoch": 0.029, | |
| "grad_norm": 0.44488221406936646, | |
| "learning_rate": 0.0002997277241523519, | |
| "loss": 0.7218, | |
| "num_input_tokens_seen": 190054400, | |
| "step": 2900, | |
| "train_runtime": 1631.4556, | |
| "train_tokens_per_second": 116493.763 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.3886267840862274, | |
| "learning_rate": 0.00029969830329256125, | |
| "loss": 0.7091, | |
| "num_input_tokens_seen": 196608000, | |
| "step": 3000, | |
| "train_runtime": 1686.3399, | |
| "train_tokens_per_second": 116588.595 | |
| }, | |
| { | |
| "epoch": 0.031, | |
| "grad_norm": 0.46353062987327576, | |
| "learning_rate": 0.00029966737497251836, | |
| "loss": 0.7132, | |
| "num_input_tokens_seen": 203161600, | |
| "step": 3100, | |
| "train_runtime": 1745.3481, | |
| "train_tokens_per_second": 116401.765 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.45476260781288147, | |
| "learning_rate": 0.0002996349395036711, | |
| "loss": 0.7099, | |
| "num_input_tokens_seen": 209715200, | |
| "step": 3200, | |
| "train_runtime": 1799.8354, | |
| "train_tokens_per_second": 116519.096 | |
| }, | |
| { | |
| "epoch": 0.033, | |
| "grad_norm": 0.5311458706855774, | |
| "learning_rate": 0.00029960099721264435, | |
| "loss": 0.7162, | |
| "num_input_tokens_seen": 216268800, | |
| "step": 3300, | |
| "train_runtime": 1854.9316, | |
| "train_tokens_per_second": 116591.252 | |
| }, | |
| { | |
| "epoch": 0.034, | |
| "grad_norm": 0.4546497166156769, | |
| "learning_rate": 0.0002995655484412365, | |
| "loss": 0.7046, | |
| "num_input_tokens_seen": 222822400, | |
| "step": 3400, | |
| "train_runtime": 1909.1868, | |
| "train_tokens_per_second": 116710.63 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 0.6349391937255859, | |
| "learning_rate": 0.00029952859354641636, | |
| "loss": 0.7073, | |
| "num_input_tokens_seen": 229376000, | |
| "step": 3500, | |
| "train_runtime": 1969.0427, | |
| "train_tokens_per_second": 116491.125 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.493557870388031, | |
| "learning_rate": 0.00029949013290031924, | |
| "loss": 0.7021, | |
| "num_input_tokens_seen": 235929600, | |
| "step": 3600, | |
| "train_runtime": 2023.4924, | |
| "train_tokens_per_second": 116595.251 | |
| }, | |
| { | |
| "epoch": 0.037, | |
| "grad_norm": 0.3123897314071655, | |
| "learning_rate": 0.00029945016689024353, | |
| "loss": 0.696, | |
| "num_input_tokens_seen": 242483200, | |
| "step": 3700, | |
| "train_runtime": 2077.3277, | |
| "train_tokens_per_second": 116728.432 | |
| }, | |
| { | |
| "epoch": 0.038, | |
| "grad_norm": 0.43099701404571533, | |
| "learning_rate": 0.0002994086959186464, | |
| "loss": 0.6997, | |
| "num_input_tokens_seen": 249036800, | |
| "step": 3800, | |
| "train_runtime": 2132.1624, | |
| "train_tokens_per_second": 116800.109 | |
| }, | |
| { | |
| "epoch": 0.039, | |
| "grad_norm": 0.3274790346622467, | |
| "learning_rate": 0.00029936572040314014, | |
| "loss": 0.7034, | |
| "num_input_tokens_seen": 255590400, | |
| "step": 3900, | |
| "train_runtime": 2193.0795, | |
| "train_tokens_per_second": 116544.063 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.33723658323287964, | |
| "learning_rate": 0.0002993212407764877, | |
| "loss": 0.6977, | |
| "num_input_tokens_seen": 262144000, | |
| "step": 4000, | |
| "train_runtime": 2246.9274, | |
| "train_tokens_per_second": 116667.767 | |
| }, | |
| { | |
| "epoch": 0.041, | |
| "grad_norm": 0.42106112837791443, | |
| "learning_rate": 0.00029927525748659834, | |
| "loss": 0.7078, | |
| "num_input_tokens_seen": 268697600, | |
| "step": 4100, | |
| "train_runtime": 2302.3386, | |
| "train_tokens_per_second": 116706.38 | |
| }, | |
| { | |
| "epoch": 0.042, | |
| "grad_norm": 0.3567107915878296, | |
| "learning_rate": 0.0002992277709965234, | |
| "loss": 0.6967, | |
| "num_input_tokens_seen": 275251200, | |
| "step": 4200, | |
| "train_runtime": 2356.6313, | |
| "train_tokens_per_second": 116798.585 | |
| }, | |
| { | |
| "epoch": 0.043, | |
| "grad_norm": 0.4269777834415436, | |
| "learning_rate": 0.0002991787817844513, | |
| "loss": 0.6976, | |
| "num_input_tokens_seen": 281804800, | |
| "step": 4300, | |
| "train_runtime": 2417.993, | |
| "train_tokens_per_second": 116544.92 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 0.4035900831222534, | |
| "learning_rate": 0.0002991282903437028, | |
| "loss": 0.6915, | |
| "num_input_tokens_seen": 288358400, | |
| "step": 4400, | |
| "train_runtime": 2471.1514, | |
| "train_tokens_per_second": 116689.896 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 0.3197948634624481, | |
| "learning_rate": 0.0002990762971827262, | |
| "loss": 0.6905, | |
| "num_input_tokens_seen": 294912000, | |
| "step": 4500, | |
| "train_runtime": 2525.6119, | |
| "train_tokens_per_second": 116768.533 | |
| }, | |
| { | |
| "epoch": 0.046, | |
| "grad_norm": 0.3118247389793396, | |
| "learning_rate": 0.00029902280282509197, | |
| "loss": 0.6959, | |
| "num_input_tokens_seen": 301465600, | |
| "step": 4600, | |
| "train_runtime": 2587.1477, | |
| "train_tokens_per_second": 116524.311 | |
| }, | |
| { | |
| "epoch": 0.047, | |
| "grad_norm": 0.38194310665130615, | |
| "learning_rate": 0.0002989678078094878, | |
| "loss": 0.6812, | |
| "num_input_tokens_seen": 308019200, | |
| "step": 4700, | |
| "train_runtime": 2645.2255, | |
| "train_tokens_per_second": 116443.456 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.5399206876754761, | |
| "learning_rate": 0.00029891131268971284, | |
| "loss": 0.6843, | |
| "num_input_tokens_seen": 314572800, | |
| "step": 4800, | |
| "train_runtime": 2702.2839, | |
| "train_tokens_per_second": 116409.974 | |
| }, | |
| { | |
| "epoch": 0.049, | |
| "grad_norm": 0.3325801491737366, | |
| "learning_rate": 0.0002988533180346723, | |
| "loss": 0.6803, | |
| "num_input_tokens_seen": 321126400, | |
| "step": 4900, | |
| "train_runtime": 2761.723, | |
| "train_tokens_per_second": 116277.558 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.41960838437080383, | |
| "learning_rate": 0.0002987938244283717, | |
| "loss": 0.6772, | |
| "num_input_tokens_seen": 327680000, | |
| "step": 5000, | |
| "train_runtime": 2818.6212, | |
| "train_tokens_per_second": 116255.424 | |
| }, | |
| { | |
| "epoch": 0.051, | |
| "grad_norm": 0.40982258319854736, | |
| "learning_rate": 0.00029873283246991105, | |
| "loss": 0.6861, | |
| "num_input_tokens_seen": 334233600, | |
| "step": 5100, | |
| "train_runtime": 2873.5528, | |
| "train_tokens_per_second": 116313.715 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 0.31062355637550354, | |
| "learning_rate": 0.0002986703427734787, | |
| "loss": 0.6852, | |
| "num_input_tokens_seen": 340787200, | |
| "step": 5200, | |
| "train_runtime": 2929.1895, | |
| "train_tokens_per_second": 116341.805 | |
| }, | |
| { | |
| "epoch": 0.053, | |
| "grad_norm": 0.3270035684108734, | |
| "learning_rate": 0.00029860635596834517, | |
| "loss": 0.6854, | |
| "num_input_tokens_seen": 347340800, | |
| "step": 5300, | |
| "train_runtime": 2983.4815, | |
| "train_tokens_per_second": 116421.301 | |
| }, | |
| { | |
| "epoch": 0.054, | |
| "grad_norm": 0.3153812289237976, | |
| "learning_rate": 0.0002985408726988569, | |
| "loss": 0.6826, | |
| "num_input_tokens_seen": 353894400, | |
| "step": 5400, | |
| "train_runtime": 3042.2104, | |
| "train_tokens_per_second": 116328.049 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 0.328072190284729, | |
| "learning_rate": 0.0002984738936244296, | |
| "loss": 0.6796, | |
| "num_input_tokens_seen": 360448000, | |
| "step": 5500, | |
| "train_runtime": 3096.1295, | |
| "train_tokens_per_second": 116418.904 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.32828816771507263, | |
| "learning_rate": 0.0002984054194195419, | |
| "loss": 0.6927, | |
| "num_input_tokens_seen": 367001600, | |
| "step": 5600, | |
| "train_runtime": 3151.7383, | |
| "train_tokens_per_second": 116444.184 | |
| }, | |
| { | |
| "epoch": 0.057, | |
| "grad_norm": 0.3481093645095825, | |
| "learning_rate": 0.0002983354507737283, | |
| "loss": 0.6876, | |
| "num_input_tokens_seen": 373555200, | |
| "step": 5700, | |
| "train_runtime": 3207.4, | |
| "train_tokens_per_second": 116466.67 | |
| }, | |
| { | |
| "epoch": 0.058, | |
| "grad_norm": 0.4054113030433655, | |
| "learning_rate": 0.00029826398839157215, | |
| "loss": 0.6868, | |
| "num_input_tokens_seen": 380108800, | |
| "step": 5800, | |
| "train_runtime": 3267.0971, | |
| "train_tokens_per_second": 116344.508 | |
| }, | |
| { | |
| "epoch": 0.059, | |
| "grad_norm": 0.3671651780605316, | |
| "learning_rate": 0.000298191032992699, | |
| "loss": 0.6818, | |
| "num_input_tokens_seen": 386662400, | |
| "step": 5900, | |
| "train_runtime": 3320.6147, | |
| "train_tokens_per_second": 116443.019 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.2815225422382355, | |
| "learning_rate": 0.0002981165853117688, | |
| "loss": 0.6735, | |
| "num_input_tokens_seen": 393216000, | |
| "step": 6000, | |
| "train_runtime": 3380.4372, | |
| "train_tokens_per_second": 116321.049 | |
| }, | |
| { | |
| "epoch": 0.061, | |
| "grad_norm": 0.5300637483596802, | |
| "learning_rate": 0.000298040646098469, | |
| "loss": 0.6792, | |
| "num_input_tokens_seen": 399769600, | |
| "step": 6100, | |
| "train_runtime": 3434.9665, | |
| "train_tokens_per_second": 116382.388 | |
| }, | |
| { | |
| "epoch": 0.062, | |
| "grad_norm": 0.28088632225990295, | |
| "learning_rate": 0.0002979632161175064, | |
| "loss": 0.6763, | |
| "num_input_tokens_seen": 406323200, | |
| "step": 6200, | |
| "train_runtime": 3490.2084, | |
| "train_tokens_per_second": 116418.034 | |
| }, | |
| { | |
| "epoch": 0.063, | |
| "grad_norm": 0.5535532236099243, | |
| "learning_rate": 0.0002978842961486003, | |
| "loss": 0.6769, | |
| "num_input_tokens_seen": 412876800, | |
| "step": 6300, | |
| "train_runtime": 3587.6493, | |
| "train_tokens_per_second": 115082.821 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.3341565728187561, | |
| "learning_rate": 0.0002978038869864738, | |
| "loss": 0.6734, | |
| "num_input_tokens_seen": 419430400, | |
| "step": 6400, | |
| "train_runtime": 3650.2458, | |
| "train_tokens_per_second": 114904.702 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 0.34299543499946594, | |
| "learning_rate": 0.0002977219894408463, | |
| "loss": 0.671, | |
| "num_input_tokens_seen": 425984000, | |
| "step": 6500, | |
| "train_runtime": 3704.075, | |
| "train_tokens_per_second": 115004.151 | |
| }, | |
| { | |
| "epoch": 0.066, | |
| "grad_norm": 0.3468911349773407, | |
| "learning_rate": 0.0002976386043364251, | |
| "loss": 0.6799, | |
| "num_input_tokens_seen": 432537600, | |
| "step": 6600, | |
| "train_runtime": 3758.2332, | |
| "train_tokens_per_second": 115090.677 | |
| }, | |
| { | |
| "epoch": 0.067, | |
| "grad_norm": 0.29494985938072205, | |
| "learning_rate": 0.00029755373251289733, | |
| "loss": 0.6836, | |
| "num_input_tokens_seen": 439091200, | |
| "step": 6700, | |
| "train_runtime": 3818.8992, | |
| "train_tokens_per_second": 114978.475 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 0.43209826946258545, | |
| "learning_rate": 0.0002974673748249213, | |
| "loss": 0.6721, | |
| "num_input_tokens_seen": 445644800, | |
| "step": 6800, | |
| "train_runtime": 3873.8852, | |
| "train_tokens_per_second": 115038.204 | |
| }, | |
| { | |
| "epoch": 0.069, | |
| "grad_norm": 0.36356812715530396, | |
| "learning_rate": 0.00029737953214211804, | |
| "loss": 0.6742, | |
| "num_input_tokens_seen": 452198400, | |
| "step": 6900, | |
| "train_runtime": 3928.8659, | |
| "train_tokens_per_second": 115096.42 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.4425353705883026, | |
| "learning_rate": 0.0002972902053490623, | |
| "loss": 0.6691, | |
| "num_input_tokens_seen": 458752000, | |
| "step": 7000, | |
| "train_runtime": 3982.4622, | |
| "train_tokens_per_second": 115193.058 | |
| }, | |
| { | |
| "epoch": 0.071, | |
| "grad_norm": 0.3378838300704956, | |
| "learning_rate": 0.00029719939534527393, | |
| "loss": 0.6817, | |
| "num_input_tokens_seen": 465305600, | |
| "step": 7100, | |
| "train_runtime": 4038.9423, | |
| "train_tokens_per_second": 115204.814 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.46528419852256775, | |
| "learning_rate": 0.00029710710304520866, | |
| "loss": 0.6731, | |
| "num_input_tokens_seen": 471859200, | |
| "step": 7200, | |
| "train_runtime": 4094.7269, | |
| "train_tokens_per_second": 115235.817 | |
| }, | |
| { | |
| "epoch": 0.073, | |
| "grad_norm": 0.45732709765434265, | |
| "learning_rate": 0.00029701332937824885, | |
| "loss": 0.6667, | |
| "num_input_tokens_seen": 478412800, | |
| "step": 7300, | |
| "train_runtime": 4155.1372, | |
| "train_tokens_per_second": 115137.666 | |
| }, | |
| { | |
| "epoch": 0.074, | |
| "grad_norm": 0.28316208720207214, | |
| "learning_rate": 0.0002969180752886944, | |
| "loss": 0.6735, | |
| "num_input_tokens_seen": 484966400, | |
| "step": 7400, | |
| "train_runtime": 4208.4074, | |
| "train_tokens_per_second": 115237.512 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 0.35296332836151123, | |
| "learning_rate": 0.0002968213417357529, | |
| "loss": 0.674, | |
| "num_input_tokens_seen": 491520000, | |
| "step": 7500, | |
| "train_runtime": 4262.7857, | |
| "train_tokens_per_second": 115304.882 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 0.37089261412620544, | |
| "learning_rate": 0.00029672312969353015, | |
| "loss": 0.6696, | |
| "num_input_tokens_seen": 498073600, | |
| "step": 7600, | |
| "train_runtime": 4324.696, | |
| "train_tokens_per_second": 115169.622 | |
| }, | |
| { | |
| "epoch": 0.077, | |
| "grad_norm": 0.3451579809188843, | |
| "learning_rate": 0.00029662344015102027, | |
| "loss": 0.669, | |
| "num_input_tokens_seen": 504627200, | |
| "step": 7700, | |
| "train_runtime": 4380.0469, | |
| "train_tokens_per_second": 115210.456 | |
| }, | |
| { | |
| "epoch": 0.078, | |
| "grad_norm": 0.2890200912952423, | |
| "learning_rate": 0.00029652227411209594, | |
| "loss": 0.6672, | |
| "num_input_tokens_seen": 511180800, | |
| "step": 7800, | |
| "train_runtime": 4433.6829, | |
| "train_tokens_per_second": 115294.849 | |
| }, | |
| { | |
| "epoch": 0.079, | |
| "grad_norm": 0.3042987287044525, | |
| "learning_rate": 0.0002964196325954979, | |
| "loss": 0.6642, | |
| "num_input_tokens_seen": 517734400, | |
| "step": 7900, | |
| "train_runtime": 4488.6662, | |
| "train_tokens_per_second": 115342.593 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.34017419815063477, | |
| "learning_rate": 0.0002963155166348253, | |
| "loss": 0.6689, | |
| "num_input_tokens_seen": 524288000, | |
| "step": 8000, | |
| "train_runtime": 4548.362, | |
| "train_tokens_per_second": 115269.63 | |
| }, | |
| { | |
| "epoch": 0.081, | |
| "grad_norm": 0.5584019422531128, | |
| "learning_rate": 0.0002962099272785246, | |
| "loss": 0.6703, | |
| "num_input_tokens_seen": 530841600, | |
| "step": 8100, | |
| "train_runtime": 4605.2398, | |
| "train_tokens_per_second": 115269.047 | |
| }, | |
| { | |
| "epoch": 0.082, | |
| "grad_norm": 0.2589603364467621, | |
| "learning_rate": 0.0002961028655898794, | |
| "loss": 0.6634, | |
| "num_input_tokens_seen": 537395200, | |
| "step": 8200, | |
| "train_runtime": 4660.2455, | |
| "train_tokens_per_second": 115314.783 | |
| }, | |
| { | |
| "epoch": 0.083, | |
| "grad_norm": 0.41600939631462097, | |
| "learning_rate": 0.0002959943326469998, | |
| "loss": 0.6611, | |
| "num_input_tokens_seen": 543948800, | |
| "step": 8300, | |
| "train_runtime": 4714.0847, | |
| "train_tokens_per_second": 115388.0 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.28658175468444824, | |
| "learning_rate": 0.0002958843295428112, | |
| "loss": 0.659, | |
| "num_input_tokens_seen": 550502400, | |
| "step": 8400, | |
| "train_runtime": 4768.8365, | |
| "train_tokens_per_second": 115437.466 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 0.290019154548645, | |
| "learning_rate": 0.0002957728573850438, | |
| "loss": 0.661, | |
| "num_input_tokens_seen": 557056000, | |
| "step": 8500, | |
| "train_runtime": 4828.5019, | |
| "train_tokens_per_second": 115368.289 | |
| }, | |
| { | |
| "epoch": 0.086, | |
| "grad_norm": 0.3445529341697693, | |
| "learning_rate": 0.0002956599172962209, | |
| "loss": 0.6745, | |
| "num_input_tokens_seen": 563609600, | |
| "step": 8600, | |
| "train_runtime": 4883.6807, | |
| "train_tokens_per_second": 115406.727 | |
| }, | |
| { | |
| "epoch": 0.087, | |
| "grad_norm": 0.3387536406517029, | |
| "learning_rate": 0.0002955455104136479, | |
| "loss": 0.6596, | |
| "num_input_tokens_seen": 570163200, | |
| "step": 8700, | |
| "train_runtime": 4940.0949, | |
| "train_tokens_per_second": 115415.435 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.3031691312789917, | |
| "learning_rate": 0.00029542963788940096, | |
| "loss": 0.658, | |
| "num_input_tokens_seen": 576716800, | |
| "step": 8800, | |
| "train_runtime": 4993.542, | |
| "train_tokens_per_second": 115492.529 | |
| }, | |
| { | |
| "epoch": 0.089, | |
| "grad_norm": 0.3859446048736572, | |
| "learning_rate": 0.00029531230089031505, | |
| "loss": 0.6684, | |
| "num_input_tokens_seen": 583270400, | |
| "step": 8900, | |
| "train_runtime": 5048.437, | |
| "train_tokens_per_second": 115534.848 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.28773999214172363, | |
| "learning_rate": 0.0002951935005979724, | |
| "loss": 0.6585, | |
| "num_input_tokens_seen": 589824000, | |
| "step": 9000, | |
| "train_runtime": 5107.8168, | |
| "train_tokens_per_second": 115474.775 | |
| }, | |
| { | |
| "epoch": 0.091, | |
| "grad_norm": 0.3639370799064636, | |
| "learning_rate": 0.0002950732382086907, | |
| "loss": 0.6573, | |
| "num_input_tokens_seen": 596377600, | |
| "step": 9100, | |
| "train_runtime": 5164.5839, | |
| "train_tokens_per_second": 115474.473 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.3325030505657196, | |
| "learning_rate": 0.0002949515149335108, | |
| "loss": 0.6634, | |
| "num_input_tokens_seen": 602931200, | |
| "step": 9200, | |
| "train_runtime": 5219.0432, | |
| "train_tokens_per_second": 115525.237 | |
| }, | |
| { | |
| "epoch": 0.093, | |
| "grad_norm": 0.3296424448490143, | |
| "learning_rate": 0.0002948283319981848, | |
| "loss": 0.65, | |
| "num_input_tokens_seen": 609484800, | |
| "step": 9300, | |
| "train_runtime": 5273.9916, | |
| "train_tokens_per_second": 115564.234 | |
| }, | |
| { | |
| "epoch": 0.094, | |
| "grad_norm": 0.323024719953537, | |
| "learning_rate": 0.00029470369064316354, | |
| "loss": 0.6533, | |
| "num_input_tokens_seen": 616038400, | |
| "step": 9400, | |
| "train_runtime": 5327.8342, | |
| "train_tokens_per_second": 115626.421 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 0.31108859181404114, | |
| "learning_rate": 0.00029457759212358397, | |
| "loss": 0.6549, | |
| "num_input_tokens_seen": 622592000, | |
| "step": 9500, | |
| "train_runtime": 5386.4463, | |
| "train_tokens_per_second": 115584.927 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.43114957213401794, | |
| "learning_rate": 0.00029445003770925686, | |
| "loss": 0.6518, | |
| "num_input_tokens_seen": 629145600, | |
| "step": 9600, | |
| "train_runtime": 5441.6464, | |
| "train_tokens_per_second": 115616.773 | |
| }, | |
| { | |
| "epoch": 0.097, | |
| "grad_norm": 0.3269326090812683, | |
| "learning_rate": 0.00029432102868465367, | |
| "loss": 0.6539, | |
| "num_input_tokens_seen": 635699200, | |
| "step": 9700, | |
| "train_runtime": 5497.0335, | |
| "train_tokens_per_second": 115644.046 | |
| }, | |
| { | |
| "epoch": 0.098, | |
| "grad_norm": 0.3119284510612488, | |
| "learning_rate": 0.0002941905663488939, | |
| "loss": 0.6525, | |
| "num_input_tokens_seen": 642252800, | |
| "step": 9800, | |
| "train_runtime": 5557.4204, | |
| "train_tokens_per_second": 115566.712 | |
| }, | |
| { | |
| "epoch": 0.099, | |
| "grad_norm": 0.30968910455703735, | |
| "learning_rate": 0.0002940586520157318, | |
| "loss": 0.6593, | |
| "num_input_tokens_seen": 648806400, | |
| "step": 9900, | |
| "train_runtime": 5612.1781, | |
| "train_tokens_per_second": 115606.881 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.3167921006679535, | |
| "learning_rate": 0.00029392528701354325, | |
| "loss": 0.6516, | |
| "num_input_tokens_seen": 655360000, | |
| "step": 10000, | |
| "train_runtime": 5665.7095, | |
| "train_tokens_per_second": 115671.303 | |
| }, | |
| { | |
| "epoch": 0.101, | |
| "grad_norm": 0.3000660538673401, | |
| "learning_rate": 0.00029379047268531243, | |
| "loss": 0.6579, | |
| "num_input_tokens_seen": 661913600, | |
| "step": 10100, | |
| "train_runtime": 5721.9414, | |
| "train_tokens_per_second": 115679.899 | |
| }, | |
| { | |
| "epoch": 0.102, | |
| "grad_norm": 0.30189189314842224, | |
| "learning_rate": 0.00029365421038861795, | |
| "loss": 0.6594, | |
| "num_input_tokens_seen": 668467200, | |
| "step": 10200, | |
| "train_runtime": 5777.1263, | |
| "train_tokens_per_second": 115709.294 | |
| }, | |
| { | |
| "epoch": 0.103, | |
| "grad_norm": 0.2689358592033386, | |
| "learning_rate": 0.0002935165014956198, | |
| "loss": 0.6527, | |
| "num_input_tokens_seen": 675020800, | |
| "step": 10300, | |
| "train_runtime": 5832.7222, | |
| "train_tokens_per_second": 115729.977 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.40012845396995544, | |
| "learning_rate": 0.0002933773473930448, | |
| "loss": 0.6442, | |
| "num_input_tokens_seen": 681574400, | |
| "step": 10400, | |
| "train_runtime": 5888.393, | |
| "train_tokens_per_second": 115748.796 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 0.26425933837890625, | |
| "learning_rate": 0.0002932367494821734, | |
| "loss": 0.655, | |
| "num_input_tokens_seen": 688128000, | |
| "step": 10500, | |
| "train_runtime": 5949.719, | |
| "train_tokens_per_second": 115657.227 | |
| }, | |
| { | |
| "epoch": 0.106, | |
| "grad_norm": 0.3352152705192566, | |
| "learning_rate": 0.00029309470917882497, | |
| "loss": 0.6513, | |
| "num_input_tokens_seen": 694681600, | |
| "step": 10600, | |
| "train_runtime": 6005.1104, | |
| "train_tokens_per_second": 115681.738 | |
| }, | |
| { | |
| "epoch": 0.107, | |
| "grad_norm": 0.2809161841869354, | |
| "learning_rate": 0.0002929512279133437, | |
| "loss": 0.6544, | |
| "num_input_tokens_seen": 701235200, | |
| "step": 10700, | |
| "train_runtime": 6060.3088, | |
| "train_tokens_per_second": 115709.482 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.29120925068855286, | |
| "learning_rate": 0.0002928063071305844, | |
| "loss": 0.6502, | |
| "num_input_tokens_seen": 707788800, | |
| "step": 10800, | |
| "train_runtime": 6114.2187, | |
| "train_tokens_per_second": 115761.119 | |
| }, | |
| { | |
| "epoch": 0.109, | |
| "grad_norm": 0.312067449092865, | |
| "learning_rate": 0.0002926599482898978, | |
| "loss": 0.6495, | |
| "num_input_tokens_seen": 714342400, | |
| "step": 10900, | |
| "train_runtime": 6169.1634, | |
| "train_tokens_per_second": 115792.426 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.4553526043891907, | |
| "learning_rate": 0.00029251215286511573, | |
| "loss": 0.6508, | |
| "num_input_tokens_seen": 720896000, | |
| "step": 11000, | |
| "train_runtime": 6230.1722, | |
| "train_tokens_per_second": 115710.445 | |
| }, | |
| { | |
| "epoch": 0.111, | |
| "grad_norm": 0.2880023419857025, | |
| "learning_rate": 0.00029236292234453647, | |
| "loss": 0.6476, | |
| "num_input_tokens_seen": 727449600, | |
| "step": 11100, | |
| "train_runtime": 6284.648, | |
| "train_tokens_per_second": 115750.253 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.3704841136932373, | |
| "learning_rate": 0.0002922122582309097, | |
| "loss": 0.6559, | |
| "num_input_tokens_seen": 734003200, | |
| "step": 11200, | |
| "train_runtime": 6340.8669, | |
| "train_tokens_per_second": 115757.547 | |
| }, | |
| { | |
| "epoch": 0.113, | |
| "grad_norm": 0.34455180168151855, | |
| "learning_rate": 0.0002920601620414215, | |
| "loss": 0.6482, | |
| "num_input_tokens_seen": 740556800, | |
| "step": 11300, | |
| "train_runtime": 6393.5327, | |
| "train_tokens_per_second": 115829.046 | |
| }, | |
| { | |
| "epoch": 0.114, | |
| "grad_norm": 0.3323168456554413, | |
| "learning_rate": 0.0002919066353076786, | |
| "loss": 0.6489, | |
| "num_input_tokens_seen": 747110400, | |
| "step": 11400, | |
| "train_runtime": 6449.7175, | |
| "train_tokens_per_second": 115836.144 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 0.4480803608894348, | |
| "learning_rate": 0.00029175167957569366, | |
| "loss": 0.6492, | |
| "num_input_tokens_seen": 753664000, | |
| "step": 11500, | |
| "train_runtime": 6503.6523, | |
| "train_tokens_per_second": 115883.194 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 0.2677996754646301, | |
| "learning_rate": 0.0002915952964058691, | |
| "loss": 0.6462, | |
| "num_input_tokens_seen": 760217600, | |
| "step": 11600, | |
| "train_runtime": 6565.0474, | |
| "train_tokens_per_second": 115797.731 | |
| }, | |
| { | |
| "epoch": 0.117, | |
| "grad_norm": 0.2838340699672699, | |
| "learning_rate": 0.00029143748737298173, | |
| "loss": 0.6574, | |
| "num_input_tokens_seen": 766771200, | |
| "step": 11700, | |
| "train_runtime": 6620.4342, | |
| "train_tokens_per_second": 115818.868 | |
| }, | |
| { | |
| "epoch": 0.118, | |
| "grad_norm": 0.3221312165260315, | |
| "learning_rate": 0.00029127825406616677, | |
| "loss": 0.6456, | |
| "num_input_tokens_seen": 773324800, | |
| "step": 11800, | |
| "train_runtime": 6676.1674, | |
| "train_tokens_per_second": 115833.645 | |
| }, | |
| { | |
| "epoch": 0.119, | |
| "grad_norm": 0.2825019657611847, | |
| "learning_rate": 0.0002911175980889019, | |
| "loss": 0.6526, | |
| "num_input_tokens_seen": 779878400, | |
| "step": 11900, | |
| "train_runtime": 6730.5574, | |
| "train_tokens_per_second": 115871.295 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.38775157928466797, | |
| "learning_rate": 0.00029095552105899095, | |
| "loss": 0.6454, | |
| "num_input_tokens_seen": 786432000, | |
| "step": 12000, | |
| "train_runtime": 6784.0533, | |
| "train_tokens_per_second": 115923.617 | |
| }, | |
| { | |
| "epoch": 0.121, | |
| "grad_norm": 0.32168275117874146, | |
| "learning_rate": 0.0002907920246085478, | |
| "loss": 0.644, | |
| "num_input_tokens_seen": 792985600, | |
| "step": 12100, | |
| "train_runtime": 6844.3129, | |
| "train_tokens_per_second": 115860.512 | |
| }, | |
| { | |
| "epoch": 0.122, | |
| "grad_norm": 0.3085991442203522, | |
| "learning_rate": 0.00029062711038397996, | |
| "loss": 0.6609, | |
| "num_input_tokens_seen": 799539200, | |
| "step": 12200, | |
| "train_runtime": 6899.148, | |
| "train_tokens_per_second": 115889.557 | |
| }, | |
| { | |
| "epoch": 0.123, | |
| "grad_norm": 0.26165974140167236, | |
| "learning_rate": 0.00029046078004597175, | |
| "loss": 0.6436, | |
| "num_input_tokens_seen": 806092800, | |
| "step": 12300, | |
| "train_runtime": 6953.2129, | |
| "train_tokens_per_second": 115930.983 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.5689886212348938, | |
| "learning_rate": 0.00029029303526946796, | |
| "loss": 0.6432, | |
| "num_input_tokens_seen": 812646400, | |
| "step": 12400, | |
| "train_runtime": 7008.0879, | |
| "train_tokens_per_second": 115958.363 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 0.3152099847793579, | |
| "learning_rate": 0.0002901238777436565, | |
| "loss": 0.6391, | |
| "num_input_tokens_seen": 819200000, | |
| "step": 12500, | |
| "train_runtime": 7067.2869, | |
| "train_tokens_per_second": 115914.355 | |
| }, | |
| { | |
| "epoch": 0.126, | |
| "grad_norm": 0.28095683455467224, | |
| "learning_rate": 0.00028995330917195184, | |
| "loss": 0.645, | |
| "num_input_tokens_seen": 825753600, | |
| "step": 12600, | |
| "train_runtime": 7122.3035, | |
| "train_tokens_per_second": 115939.121 | |
| }, | |
| { | |
| "epoch": 0.127, | |
| "grad_norm": 0.271088570356369, | |
| "learning_rate": 0.00028978133127197765, | |
| "loss": 0.6452, | |
| "num_input_tokens_seen": 832307200, | |
| "step": 12700, | |
| "train_runtime": 7177.0079, | |
| "train_tokens_per_second": 115968.55 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.3357028067111969, | |
| "learning_rate": 0.0002896079457755493, | |
| "loss": 0.6477, | |
| "num_input_tokens_seen": 838860800, | |
| "step": 12800, | |
| "train_runtime": 7232.8071, | |
| "train_tokens_per_second": 115979.977 | |
| }, | |
| { | |
| "epoch": 0.129, | |
| "grad_norm": 0.4945499897003174, | |
| "learning_rate": 0.000289433154428657, | |
| "loss": 0.6455, | |
| "num_input_tokens_seen": 845414400, | |
| "step": 12900, | |
| "train_runtime": 7287.7238, | |
| "train_tokens_per_second": 116005.274 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.43597710132598877, | |
| "learning_rate": 0.0002892569589914476, | |
| "loss": 0.6422, | |
| "num_input_tokens_seen": 851968000, | |
| "step": 13000, | |
| "train_runtime": 7347.1365, | |
| "train_tokens_per_second": 115959.19 | |
| }, | |
| { | |
| "epoch": 0.131, | |
| "grad_norm": 0.2706185281276703, | |
| "learning_rate": 0.0002890793612382072, | |
| "loss": 0.6434, | |
| "num_input_tokens_seen": 858521600, | |
| "step": 13100, | |
| "train_runtime": 7401.7618, | |
| "train_tokens_per_second": 115988.818 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.22793404757976532, | |
| "learning_rate": 0.0002889003629573432, | |
| "loss": 0.646, | |
| "num_input_tokens_seen": 865075200, | |
| "step": 13200, | |
| "train_runtime": 7457.327, | |
| "train_tokens_per_second": 116003.389 | |
| }, | |
| { | |
| "epoch": 0.133, | |
| "grad_norm": 0.2563342750072479, | |
| "learning_rate": 0.00028871996595136626, | |
| "loss": 0.6503, | |
| "num_input_tokens_seen": 871628800, | |
| "step": 13300, | |
| "train_runtime": 7511.6003, | |
| "train_tokens_per_second": 116037.697 | |
| }, | |
| { | |
| "epoch": 0.134, | |
| "grad_norm": 0.30800577998161316, | |
| "learning_rate": 0.0002885381720368723, | |
| "loss": 0.6378, | |
| "num_input_tokens_seen": 878182400, | |
| "step": 13400, | |
| "train_runtime": 7571.585, | |
| "train_tokens_per_second": 115983.959 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 0.2970588505268097, | |
| "learning_rate": 0.000288354983044524, | |
| "loss": 0.6375, | |
| "num_input_tokens_seen": 884736000, | |
| "step": 13500, | |
| "train_runtime": 7625.682, | |
| "train_tokens_per_second": 116020.573 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.24373579025268555, | |
| "learning_rate": 0.00028817040081903245, | |
| "loss": 0.6436, | |
| "num_input_tokens_seen": 891289600, | |
| "step": 13600, | |
| "train_runtime": 7679.3213, | |
| "train_tokens_per_second": 116063.591 | |
| }, | |
| { | |
| "epoch": 0.137, | |
| "grad_norm": 0.6528549194335938, | |
| "learning_rate": 0.00028798442721913867, | |
| "loss": 0.639, | |
| "num_input_tokens_seen": 897843200, | |
| "step": 13700, | |
| "train_runtime": 7735.7681, | |
| "train_tokens_per_second": 116063.872 | |
| }, | |
| { | |
| "epoch": 0.138, | |
| "grad_norm": 0.2750374376773834, | |
| "learning_rate": 0.00028779706411759465, | |
| "loss": 0.6362, | |
| "num_input_tokens_seen": 904396800, | |
| "step": 13800, | |
| "train_runtime": 7796.3703, | |
| "train_tokens_per_second": 116002.288 | |
| }, | |
| { | |
| "epoch": 0.139, | |
| "grad_norm": 0.3658313751220703, | |
| "learning_rate": 0.00028760831340114484, | |
| "loss": 0.6424, | |
| "num_input_tokens_seen": 910950400, | |
| "step": 13900, | |
| "train_runtime": 7851.8089, | |
| "train_tokens_per_second": 116017.903 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.2838168740272522, | |
| "learning_rate": 0.00028741817697050683, | |
| "loss": 0.6424, | |
| "num_input_tokens_seen": 917504000, | |
| "step": 14000, | |
| "train_runtime": 7906.2377, | |
| "train_tokens_per_second": 116048.117 | |
| }, | |
| { | |
| "epoch": 0.141, | |
| "grad_norm": 0.30455613136291504, | |
| "learning_rate": 0.00028722665674035233, | |
| "loss": 0.637, | |
| "num_input_tokens_seen": 924057600, | |
| "step": 14100, | |
| "train_runtime": 7961.9367, | |
| "train_tokens_per_second": 116059.401 | |
| }, | |
| { | |
| "epoch": 0.142, | |
| "grad_norm": 0.3287622630596161, | |
| "learning_rate": 0.0002870337546392879, | |
| "loss": 0.6612, | |
| "num_input_tokens_seen": 930611200, | |
| "step": 14200, | |
| "train_runtime": 8016.7903, | |
| "train_tokens_per_second": 116082.767 | |
| }, | |
| { | |
| "epoch": 0.143, | |
| "grad_norm": 0.2602579593658447, | |
| "learning_rate": 0.00028683947260983576, | |
| "loss": 0.6433, | |
| "num_input_tokens_seen": 937164800, | |
| "step": 14300, | |
| "train_runtime": 8072.3352, | |
| "train_tokens_per_second": 116095.873 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.4514355957508087, | |
| "learning_rate": 0.00028664381260841356, | |
| "loss": 0.6346, | |
| "num_input_tokens_seen": 943718400, | |
| "step": 14400, | |
| "train_runtime": 8133.1299, | |
| "train_tokens_per_second": 116033.853 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 0.7613756060600281, | |
| "learning_rate": 0.0002864467766053154, | |
| "loss": 0.6383, | |
| "num_input_tokens_seen": 950272000, | |
| "step": 14500, | |
| "train_runtime": 8186.629, | |
| "train_tokens_per_second": 116076.103 | |
| }, | |
| { | |
| "epoch": 0.146, | |
| "grad_norm": 0.28044015169143677, | |
| "learning_rate": 0.00028624836658469165, | |
| "loss": 0.6363, | |
| "num_input_tokens_seen": 956825600, | |
| "step": 14600, | |
| "train_runtime": 8246.5064, | |
| "train_tokens_per_second": 116027.995 | |
| }, | |
| { | |
| "epoch": 0.147, | |
| "grad_norm": 0.3139192759990692, | |
| "learning_rate": 0.00028604858454452906, | |
| "loss": 0.65, | |
| "num_input_tokens_seen": 963379200, | |
| "step": 14700, | |
| "train_runtime": 8296.4517, | |
| "train_tokens_per_second": 116119.424 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.33300819993019104, | |
| "learning_rate": 0.00028584743249663057, | |
| "loss": 0.6409, | |
| "num_input_tokens_seen": 969932800, | |
| "step": 14800, | |
| "train_runtime": 8351.7667, | |
| "train_tokens_per_second": 116135.045 | |
| }, | |
| { | |
| "epoch": 0.149, | |
| "grad_norm": 0.35637542605400085, | |
| "learning_rate": 0.000285644912466595, | |
| "loss": 0.6356, | |
| "num_input_tokens_seen": 976486400, | |
| "step": 14900, | |
| "train_runtime": 8412.3756, | |
| "train_tokens_per_second": 116077.366 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.2518846392631531, | |
| "learning_rate": 0.00028544102649379684, | |
| "loss": 0.6435, | |
| "num_input_tokens_seen": 983040000, | |
| "step": 15000, | |
| "train_runtime": 8466.8259, | |
| "train_tokens_per_second": 116104.904 | |
| }, | |
| { | |
| "epoch": 0.151, | |
| "grad_norm": 0.2983591854572296, | |
| "learning_rate": 0.00028523577663136556, | |
| "loss": 0.638, | |
| "num_input_tokens_seen": 989593600, | |
| "step": 15100, | |
| "train_runtime": 8526.2848, | |
| "train_tokens_per_second": 116063.869 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.26403748989105225, | |
| "learning_rate": 0.000285029164946165, | |
| "loss": 0.6439, | |
| "num_input_tokens_seen": 996147200, | |
| "step": 15200, | |
| "train_runtime": 8581.358, | |
| "train_tokens_per_second": 116082.7 | |
| }, | |
| { | |
| "epoch": 0.153, | |
| "grad_norm": 0.3652186393737793, | |
| "learning_rate": 0.0002848211935187725, | |
| "loss": 0.6499, | |
| "num_input_tokens_seen": 1002700800, | |
| "step": 15300, | |
| "train_runtime": 8637.0354, | |
| "train_tokens_per_second": 116093.167 | |
| }, | |
| { | |
| "epoch": 0.154, | |
| "grad_norm": 0.3816509246826172, | |
| "learning_rate": 0.0002846118644434581, | |
| "loss": 0.6355, | |
| "num_input_tokens_seen": 1009254400, | |
| "step": 15400, | |
| "train_runtime": 8691.2267, | |
| "train_tokens_per_second": 116123.355 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 0.35392388701438904, | |
| "learning_rate": 0.00028440117982816326, | |
| "loss": 0.6286, | |
| "num_input_tokens_seen": 1015808000, | |
| "step": 15500, | |
| "train_runtime": 8745.8114, | |
| "train_tokens_per_second": 116147.943 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.34099990129470825, | |
| "learning_rate": 0.0002841891417944796, | |
| "loss": 0.6396, | |
| "num_input_tokens_seen": 1022361600, | |
| "step": 15600, | |
| "train_runtime": 8805.4075, | |
| "train_tokens_per_second": 116106.108 | |
| }, | |
| { | |
| "epoch": 0.157, | |
| "grad_norm": 0.2872321605682373, | |
| "learning_rate": 0.0002839757524776279, | |
| "loss": 0.6436, | |
| "num_input_tokens_seen": 1028915200, | |
| "step": 15700, | |
| "train_runtime": 8860.6468, | |
| "train_tokens_per_second": 116121.907 | |
| }, | |
| { | |
| "epoch": 0.158, | |
| "grad_norm": 0.2647675573825836, | |
| "learning_rate": 0.0002837610140264361, | |
| "loss": 0.636, | |
| "num_input_tokens_seen": 1035468800, | |
| "step": 15800, | |
| "train_runtime": 8915.1622, | |
| "train_tokens_per_second": 116146.94 | |
| }, | |
| { | |
| "epoch": 0.159, | |
| "grad_norm": 0.28176337480545044, | |
| "learning_rate": 0.0002835449286033182, | |
| "loss": 0.6322, | |
| "num_input_tokens_seen": 1042022400, | |
| "step": 15900, | |
| "train_runtime": 8975.5504, | |
| "train_tokens_per_second": 116095.655 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.31414464116096497, | |
| "learning_rate": 0.0002833274983842518, | |
| "loss": 0.6282, | |
| "num_input_tokens_seen": 1048576000, | |
| "step": 16000, | |
| "train_runtime": 9029.8157, | |
| "train_tokens_per_second": 116123.744 | |
| }, | |
| { | |
| "epoch": 0.161, | |
| "grad_norm": 0.3065328598022461, | |
| "learning_rate": 0.0002831087255587569, | |
| "loss": 0.6333, | |
| "num_input_tokens_seen": 1055129600, | |
| "step": 16100, | |
| "train_runtime": 9084.3619, | |
| "train_tokens_per_second": 116147.905 | |
| }, | |
| { | |
| "epoch": 0.162, | |
| "grad_norm": 0.28930503129959106, | |
| "learning_rate": 0.0002828886123298734, | |
| "loss": 0.6333, | |
| "num_input_tokens_seen": 1061683200, | |
| "step": 16200, | |
| "train_runtime": 9139.4895, | |
| "train_tokens_per_second": 116164.388 | |
| }, | |
| { | |
| "epoch": 0.163, | |
| "grad_norm": 0.28349098563194275, | |
| "learning_rate": 0.00028266716091413906, | |
| "loss": 0.6368, | |
| "num_input_tokens_seen": 1068236800, | |
| "step": 16300, | |
| "train_runtime": 9194.7322, | |
| "train_tokens_per_second": 116179.219 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.2510078251361847, | |
| "learning_rate": 0.0002824443735415673, | |
| "loss": 0.6348, | |
| "num_input_tokens_seen": 1074790400, | |
| "step": 16400, | |
| "train_runtime": 9254.9047, | |
| "train_tokens_per_second": 116131.979 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 0.35605230927467346, | |
| "learning_rate": 0.0002822202524556243, | |
| "loss": 0.639, | |
| "num_input_tokens_seen": 1081344000, | |
| "step": 16500, | |
| "train_runtime": 9310.1385, | |
| "train_tokens_per_second": 116146.929 | |
| }, | |
| { | |
| "epoch": 0.166, | |
| "grad_norm": 0.518841564655304, | |
| "learning_rate": 0.00028199479991320695, | |
| "loss": 0.6424, | |
| "num_input_tokens_seen": 1087897600, | |
| "step": 16600, | |
| "train_runtime": 9365.0545, | |
| "train_tokens_per_second": 116165.645 | |
| }, | |
| { | |
| "epoch": 0.167, | |
| "grad_norm": 0.2293197512626648, | |
| "learning_rate": 0.00028176801818461994, | |
| "loss": 0.6316, | |
| "num_input_tokens_seen": 1094451200, | |
| "step": 16700, | |
| "train_runtime": 9419.4884, | |
| "train_tokens_per_second": 116190.09 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.2790850102901459, | |
| "learning_rate": 0.00028153990955355273, | |
| "loss": 0.636, | |
| "num_input_tokens_seen": 1101004800, | |
| "step": 16800, | |
| "train_runtime": 9476.046, | |
| "train_tokens_per_second": 116188.207 | |
| }, | |
| { | |
| "epoch": 0.169, | |
| "grad_norm": 0.2565983235836029, | |
| "learning_rate": 0.00028131047631705665, | |
| "loss": 0.6351, | |
| "num_input_tokens_seen": 1107558400, | |
| "step": 16900, | |
| "train_runtime": 9536.06, | |
| "train_tokens_per_second": 116144.236 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.2592833638191223, | |
| "learning_rate": 0.00028107972078552187, | |
| "loss": 0.637, | |
| "num_input_tokens_seen": 1114112000, | |
| "step": 17000, | |
| "train_runtime": 9591.4651, | |
| "train_tokens_per_second": 116156.603 | |
| }, | |
| { | |
| "epoch": 0.171, | |
| "grad_norm": 0.2744060754776001, | |
| "learning_rate": 0.0002808476452826541, | |
| "loss": 0.6313, | |
| "num_input_tokens_seen": 1120665600, | |
| "step": 17100, | |
| "train_runtime": 9644.9448, | |
| "train_tokens_per_second": 116192.018 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.2991725504398346, | |
| "learning_rate": 0.00028061425214545094, | |
| "loss": 0.6298, | |
| "num_input_tokens_seen": 1127219200, | |
| "step": 17200, | |
| "train_runtime": 9706.1229, | |
| "train_tokens_per_second": 116134.857 | |
| }, | |
| { | |
| "epoch": 0.173, | |
| "grad_norm": 0.3391658365726471, | |
| "learning_rate": 0.00028037954372417883, | |
| "loss": 0.6316, | |
| "num_input_tokens_seen": 1133772800, | |
| "step": 17300, | |
| "train_runtime": 9760.541, | |
| "train_tokens_per_second": 116158.807 | |
| }, | |
| { | |
| "epoch": 0.174, | |
| "grad_norm": 0.3975388705730438, | |
| "learning_rate": 0.0002801435223823488, | |
| "loss": 0.639, | |
| "num_input_tokens_seen": 1140326400, | |
| "step": 17400, | |
| "train_runtime": 9816.1595, | |
| "train_tokens_per_second": 116168.284 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 0.3754902482032776, | |
| "learning_rate": 0.00027990619049669336, | |
| "loss": 0.6391, | |
| "num_input_tokens_seen": 1146880000, | |
| "step": 17500, | |
| "train_runtime": 9870.38, | |
| "train_tokens_per_second": 116194.108 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.2742336094379425, | |
| "learning_rate": 0.00027966755045714177, | |
| "loss": 0.6425, | |
| "num_input_tokens_seen": 1153433600, | |
| "step": 17600, | |
| "train_runtime": 9924.9127, | |
| "train_tokens_per_second": 116215.994 | |
| }, | |
| { | |
| "epoch": 0.177, | |
| "grad_norm": 0.31107792258262634, | |
| "learning_rate": 0.00027942760466679673, | |
| "loss": 0.6312, | |
| "num_input_tokens_seen": 1159987200, | |
| "step": 17700, | |
| "train_runtime": 9985.7287, | |
| "train_tokens_per_second": 116164.502 | |
| }, | |
| { | |
| "epoch": 0.178, | |
| "grad_norm": 0.2781549096107483, | |
| "learning_rate": 0.00027918635554190956, | |
| "loss": 0.6447, | |
| "num_input_tokens_seen": 1166540800, | |
| "step": 17800, | |
| "train_runtime": 10039.8303, | |
| "train_tokens_per_second": 116191.287 | |
| }, | |
| { | |
| "epoch": 0.179, | |
| "grad_norm": 0.28636643290519714, | |
| "learning_rate": 0.00027894380551185636, | |
| "loss": 0.6383, | |
| "num_input_tokens_seen": 1173094400, | |
| "step": 17900, | |
| "train_runtime": 10096.4761, | |
| "train_tokens_per_second": 116188.498 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.25008583068847656, | |
| "learning_rate": 0.00027869995701911314, | |
| "loss": 0.6286, | |
| "num_input_tokens_seen": 1179648000, | |
| "step": 18000, | |
| "train_runtime": 10150.5104, | |
| "train_tokens_per_second": 116215.634 | |
| }, | |
| { | |
| "epoch": 0.181, | |
| "grad_norm": 0.2600831985473633, | |
| "learning_rate": 0.0002784548125192316, | |
| "loss": 0.6265, | |
| "num_input_tokens_seen": 1186201600, | |
| "step": 18100, | |
| "train_runtime": 10205.6407, | |
| "train_tokens_per_second": 116229.998 | |
| }, | |
| { | |
| "epoch": 0.182, | |
| "grad_norm": 0.2586776614189148, | |
| "learning_rate": 0.0002782083744808141, | |
| "loss": 0.6292, | |
| "num_input_tokens_seen": 1192755200, | |
| "step": 18200, | |
| "train_runtime": 10267.3133, | |
| "train_tokens_per_second": 116170.138 | |
| }, | |
| { | |
| "epoch": 0.183, | |
| "grad_norm": 0.6283107399940491, | |
| "learning_rate": 0.000277960645385489, | |
| "loss": 0.6364, | |
| "num_input_tokens_seen": 1199308800, | |
| "step": 18300, | |
| "train_runtime": 10323.5921, | |
| "train_tokens_per_second": 116171.657 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.2882465124130249, | |
| "learning_rate": 0.00027771162772788544, | |
| "loss": 0.6318, | |
| "num_input_tokens_seen": 1205862400, | |
| "step": 18400, | |
| "train_runtime": 10377.5746, | |
| "train_tokens_per_second": 116198.866 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 0.3091796934604645, | |
| "learning_rate": 0.00027746132401560857, | |
| "loss": 0.6269, | |
| "num_input_tokens_seen": 1212416000, | |
| "step": 18500, | |
| "train_runtime": 10432.3665, | |
| "train_tokens_per_second": 116216.776 | |
| }, | |
| { | |
| "epoch": 0.186, | |
| "grad_norm": 0.280862957239151, | |
| "learning_rate": 0.0002772097367692139, | |
| "loss": 0.6323, | |
| "num_input_tokens_seen": 1218969600, | |
| "step": 18600, | |
| "train_runtime": 10487.2106, | |
| "train_tokens_per_second": 116233.921 | |
| }, | |
| { | |
| "epoch": 0.187, | |
| "grad_norm": 0.24253763258457184, | |
| "learning_rate": 0.00027695686852218226, | |
| "loss": 0.6374, | |
| "num_input_tokens_seen": 1225523200, | |
| "step": 18700, | |
| "train_runtime": 10547.3913, | |
| "train_tokens_per_second": 116192.067 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.26167231798171997, | |
| "learning_rate": 0.00027670272182089416, | |
| "loss": 0.6357, | |
| "num_input_tokens_seen": 1232076800, | |
| "step": 18800, | |
| "train_runtime": 10602.8292, | |
| "train_tokens_per_second": 116202.645 | |
| }, | |
| { | |
| "epoch": 0.189, | |
| "grad_norm": 0.31137940287590027, | |
| "learning_rate": 0.0002764472992246039, | |
| "loss": 0.6305, | |
| "num_input_tokens_seen": 1238630400, | |
| "step": 18900, | |
| "train_runtime": 10657.9196, | |
| "train_tokens_per_second": 116216.903 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.2601984441280365, | |
| "learning_rate": 0.0002761906033054143, | |
| "loss": 0.6301, | |
| "num_input_tokens_seen": 1245184000, | |
| "step": 19000, | |
| "train_runtime": 10712.1812, | |
| "train_tokens_per_second": 116240.005 | |
| }, | |
| { | |
| "epoch": 0.191, | |
| "grad_norm": 0.42287951707839966, | |
| "learning_rate": 0.00027593263664825045, | |
| "loss": 0.6324, | |
| "num_input_tokens_seen": 1251737600, | |
| "step": 19100, | |
| "train_runtime": 10766.7423, | |
| "train_tokens_per_second": 116259.642 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.28982725739479065, | |
| "learning_rate": 0.00027567340185083363, | |
| "loss": 0.6291, | |
| "num_input_tokens_seen": 1258291200, | |
| "step": 19200, | |
| "train_runtime": 10826.785, | |
| "train_tokens_per_second": 116220.208 | |
| }, | |
| { | |
| "epoch": 0.193, | |
| "grad_norm": 0.2896488308906555, | |
| "learning_rate": 0.00027541290152365537, | |
| "loss": 0.6354, | |
| "num_input_tokens_seen": 1264844800, | |
| "step": 19300, | |
| "train_runtime": 10881.8628, | |
| "train_tokens_per_second": 116234.216 | |
| }, | |
| { | |
| "epoch": 0.194, | |
| "grad_norm": 0.27890026569366455, | |
| "learning_rate": 0.00027515113828995117, | |
| "loss": 0.6339, | |
| "num_input_tokens_seen": 1271398400, | |
| "step": 19400, | |
| "train_runtime": 10937.449, | |
| "train_tokens_per_second": 116242.682 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 0.2991676926612854, | |
| "learning_rate": 0.00027488811478567374, | |
| "loss": 0.6282, | |
| "num_input_tokens_seen": 1277952000, | |
| "step": 19500, | |
| "train_runtime": 10991.5778, | |
| "train_tokens_per_second": 116266.475 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.2899467349052429, | |
| "learning_rate": 0.0002746238336594671, | |
| "loss": 0.6264, | |
| "num_input_tokens_seen": 1284505600, | |
| "step": 19600, | |
| "train_runtime": 11050.8685, | |
| "train_tokens_per_second": 116235.714 | |
| }, | |
| { | |
| "epoch": 0.197, | |
| "grad_norm": 0.32908034324645996, | |
| "learning_rate": 0.00027435829757263894, | |
| "loss": 0.6321, | |
| "num_input_tokens_seen": 1291059200, | |
| "step": 19700, | |
| "train_runtime": 11105.6721, | |
| "train_tokens_per_second": 116252.234 | |
| }, | |
| { | |
| "epoch": 0.198, | |
| "grad_norm": 0.23919178545475006, | |
| "learning_rate": 0.0002740915091991349, | |
| "loss": 0.6342, | |
| "num_input_tokens_seen": 1297612800, | |
| "step": 19800, | |
| "train_runtime": 11160.1183, | |
| "train_tokens_per_second": 116272.316 | |
| }, | |
| { | |
| "epoch": 0.199, | |
| "grad_norm": 0.5094599723815918, | |
| "learning_rate": 0.0002738234712255109, | |
| "loss": 0.6317, | |
| "num_input_tokens_seen": 1304166400, | |
| "step": 19900, | |
| "train_runtime": 11221.0166, | |
| "train_tokens_per_second": 116225.333 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.37415555119514465, | |
| "learning_rate": 0.00027355418635090635, | |
| "loss": 0.634, | |
| "num_input_tokens_seen": 1310720000, | |
| "step": 20000, | |
| "train_runtime": 11276.9767, | |
| "train_tokens_per_second": 116229.734 | |
| }, | |
| { | |
| "epoch": 0.201, | |
| "grad_norm": 0.39444148540496826, | |
| "learning_rate": 0.000273283657287017, | |
| "loss": 0.6272, | |
| "num_input_tokens_seen": 1317273600, | |
| "step": 20100, | |
| "train_runtime": 11332.3114, | |
| "train_tokens_per_second": 116240.505 | |
| }, | |
| { | |
| "epoch": 0.202, | |
| "grad_norm": 0.2751108705997467, | |
| "learning_rate": 0.00027301188675806745, | |
| "loss": 0.6385, | |
| "num_input_tokens_seen": 1323827200, | |
| "step": 20200, | |
| "train_runtime": 11387.621, | |
| "train_tokens_per_second": 116251.427 | |
| }, | |
| { | |
| "epoch": 0.203, | |
| "grad_norm": 0.2579997479915619, | |
| "learning_rate": 0.0002727388775007839, | |
| "loss": 0.6278, | |
| "num_input_tokens_seen": 1330380800, | |
| "step": 20300, | |
| "train_runtime": 11441.506, | |
| "train_tokens_per_second": 116276.721 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.27806708216667175, | |
| "learning_rate": 0.0002724646322643666, | |
| "loss": 0.6292, | |
| "num_input_tokens_seen": 1336934400, | |
| "step": 20400, | |
| "train_runtime": 11496.0966, | |
| "train_tokens_per_second": 116294.638 | |
| }, | |
| { | |
| "epoch": 0.205, | |
| "grad_norm": 0.48101556301116943, | |
| "learning_rate": 0.000272189153810462, | |
| "loss": 0.6335, | |
| "num_input_tokens_seen": 1343488000, | |
| "step": 20500, | |
| "train_runtime": 11557.1454, | |
| "train_tokens_per_second": 116247.391 | |
| }, | |
| { | |
| "epoch": 0.206, | |
| "grad_norm": 0.24878458678722382, | |
| "learning_rate": 0.0002719124449131351, | |
| "loss": 0.6309, | |
| "num_input_tokens_seen": 1350041600, | |
| "step": 20600, | |
| "train_runtime": 11613.0753, | |
| "train_tokens_per_second": 116251.86 | |
| }, | |
| { | |
| "epoch": 0.207, | |
| "grad_norm": 0.2999299466609955, | |
| "learning_rate": 0.00027163450835884144, | |
| "loss": 0.627, | |
| "num_input_tokens_seen": 1356595200, | |
| "step": 20700, | |
| "train_runtime": 11667.4657, | |
| "train_tokens_per_second": 116271.625 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.2567562162876129, | |
| "learning_rate": 0.00027135534694639894, | |
| "loss": 0.6326, | |
| "num_input_tokens_seen": 1363148800, | |
| "step": 20800, | |
| "train_runtime": 11723.8934, | |
| "train_tokens_per_second": 116271.0 | |
| }, | |
| { | |
| "epoch": 0.209, | |
| "grad_norm": 0.3484431207180023, | |
| "learning_rate": 0.00027107496348696003, | |
| "loss": 0.6356, | |
| "num_input_tokens_seen": 1369702400, | |
| "step": 20900, | |
| "train_runtime": 11777.6307, | |
| "train_tokens_per_second": 116296.939 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.23648300766944885, | |
| "learning_rate": 0.00027079336080398296, | |
| "loss": 0.6256, | |
| "num_input_tokens_seen": 1376256000, | |
| "step": 21000, | |
| "train_runtime": 11838.2167, | |
| "train_tokens_per_second": 116255.348 | |
| }, | |
| { | |
| "epoch": 0.211, | |
| "grad_norm": 0.33549532294273376, | |
| "learning_rate": 0.00027051054173320366, | |
| "loss": 0.6271, | |
| "num_input_tokens_seen": 1382809600, | |
| "step": 21100, | |
| "train_runtime": 11893.4157, | |
| "train_tokens_per_second": 116266.819 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.2517142593860626, | |
| "learning_rate": 0.000270226509122607, | |
| "loss": 0.6254, | |
| "num_input_tokens_seen": 1389363200, | |
| "step": 21200, | |
| "train_runtime": 11947.4932, | |
| "train_tokens_per_second": 116289.098 | |
| }, | |
| { | |
| "epoch": 0.213, | |
| "grad_norm": 0.2526894807815552, | |
| "learning_rate": 0.0002699412658323983, | |
| "loss": 0.633, | |
| "num_input_tokens_seen": 1395916800, | |
| "step": 21300, | |
| "train_runtime": 12002.0117, | |
| "train_tokens_per_second": 116306.902 | |
| }, | |
| { | |
| "epoch": 0.214, | |
| "grad_norm": 0.283974289894104, | |
| "learning_rate": 0.00026965481473497423, | |
| "loss": 0.6273, | |
| "num_input_tokens_seen": 1402470400, | |
| "step": 21400, | |
| "train_runtime": 12063.5743, | |
| "train_tokens_per_second": 116256.622 | |
| }, | |
| { | |
| "epoch": 0.215, | |
| "grad_norm": 0.24663467705249786, | |
| "learning_rate": 0.0002693671587148942, | |
| "loss": 0.6236, | |
| "num_input_tokens_seen": 1409024000, | |
| "step": 21500, | |
| "train_runtime": 12118.0936, | |
| "train_tokens_per_second": 116274.395 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.30240553617477417, | |
| "learning_rate": 0.0002690783006688511, | |
| "loss": 0.6272, | |
| "num_input_tokens_seen": 1415577600, | |
| "step": 21600, | |
| "train_runtime": 12174.4024, | |
| "train_tokens_per_second": 116274.915 | |
| }, | |
| { | |
| "epoch": 0.217, | |
| "grad_norm": 0.2617557644844055, | |
| "learning_rate": 0.0002687882435056423, | |
| "loss": 0.6256, | |
| "num_input_tokens_seen": 1422131200, | |
| "step": 21700, | |
| "train_runtime": 12229.4977, | |
| "train_tokens_per_second": 116286.967 | |
| }, | |
| { | |
| "epoch": 0.218, | |
| "grad_norm": 0.3469904661178589, | |
| "learning_rate": 0.0002684969901461402, | |
| "loss": 0.634, | |
| "num_input_tokens_seen": 1428684800, | |
| "step": 21800, | |
| "train_runtime": 12284.4989, | |
| "train_tokens_per_second": 116299.803 | |
| }, | |
| { | |
| "epoch": 0.219, | |
| "grad_norm": 0.32695531845092773, | |
| "learning_rate": 0.000268204543523263, | |
| "loss": 0.6343, | |
| "num_input_tokens_seen": 1435238400, | |
| "step": 21900, | |
| "train_runtime": 12340.2707, | |
| "train_tokens_per_second": 116305.261 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.2909862995147705, | |
| "learning_rate": 0.0002679109065819447, | |
| "loss": 0.6275, | |
| "num_input_tokens_seen": 1441792000, | |
| "step": 22000, | |
| "train_runtime": 12395.2144, | |
| "train_tokens_per_second": 116318.44 | |
| }, | |
| { | |
| "epoch": 0.221, | |
| "grad_norm": 0.32462117075920105, | |
| "learning_rate": 0.0002676160822791062, | |
| "loss": 0.6265, | |
| "num_input_tokens_seen": 1448345600, | |
| "step": 22100, | |
| "train_runtime": 12456.5575, | |
| "train_tokens_per_second": 116271.738 | |
| }, | |
| { | |
| "epoch": 0.222, | |
| "grad_norm": 0.32560595870018005, | |
| "learning_rate": 0.00026732007358362496, | |
| "loss": 0.6434, | |
| "num_input_tokens_seen": 1454899200, | |
| "step": 22200, | |
| "train_runtime": 12512.3995, | |
| "train_tokens_per_second": 116276.594 | |
| }, | |
| { | |
| "epoch": 0.223, | |
| "grad_norm": 0.2810288369655609, | |
| "learning_rate": 0.0002670228834763052, | |
| "loss": 0.6289, | |
| "num_input_tokens_seen": 1461452800, | |
| "step": 22300, | |
| "train_runtime": 12567.5646, | |
| "train_tokens_per_second": 116287.67 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.2596298158168793, | |
| "learning_rate": 0.00026672451494984804, | |
| "loss": 0.6287, | |
| "num_input_tokens_seen": 1468006400, | |
| "step": 22400, | |
| "train_runtime": 12622.4346, | |
| "train_tokens_per_second": 116301.367 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 0.2850113809108734, | |
| "learning_rate": 0.0002664249710088213, | |
| "loss": 0.6242, | |
| "num_input_tokens_seen": 1474560000, | |
| "step": 22500, | |
| "train_runtime": 12676.5798, | |
| "train_tokens_per_second": 116321.596 | |
| }, | |
| { | |
| "epoch": 0.226, | |
| "grad_norm": 0.27683302760124207, | |
| "learning_rate": 0.00026612425466962893, | |
| "loss": 0.6206, | |
| "num_input_tokens_seen": 1481113600, | |
| "step": 22600, | |
| "train_runtime": 12737.6237, | |
| "train_tokens_per_second": 116278.643 | |
| }, | |
| { | |
| "epoch": 0.227, | |
| "grad_norm": 0.239657461643219, | |
| "learning_rate": 0.00026582236896048134, | |
| "loss": 0.6225, | |
| "num_input_tokens_seen": 1487667200, | |
| "step": 22700, | |
| "train_runtime": 12792.2969, | |
| "train_tokens_per_second": 116293.987 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.23831160366535187, | |
| "learning_rate": 0.00026551931692136413, | |
| "loss": 0.6239, | |
| "num_input_tokens_seen": 1494220800, | |
| "step": 22800, | |
| "train_runtime": 12846.1745, | |
| "train_tokens_per_second": 116316.402 | |
| }, | |
| { | |
| "epoch": 0.229, | |
| "grad_norm": 0.34589797258377075, | |
| "learning_rate": 0.00026521510160400804, | |
| "loss": 0.6247, | |
| "num_input_tokens_seen": 1500774400, | |
| "step": 22900, | |
| "train_runtime": 12900.799, | |
| "train_tokens_per_second": 116331.895 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.2497359812259674, | |
| "learning_rate": 0.00026490972607185793, | |
| "loss": 0.62, | |
| "num_input_tokens_seen": 1507328000, | |
| "step": 23000, | |
| "train_runtime": 12961.1601, | |
| "train_tokens_per_second": 116295.763 | |
| }, | |
| { | |
| "epoch": 0.231, | |
| "grad_norm": 0.24375373125076294, | |
| "learning_rate": 0.0002646031934000421, | |
| "loss": 0.6315, | |
| "num_input_tokens_seen": 1513881600, | |
| "step": 23100, | |
| "train_runtime": 13015.5663, | |
| "train_tokens_per_second": 116313.156 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.26220017671585083, | |
| "learning_rate": 0.00026429550667534095, | |
| "loss": 0.6285, | |
| "num_input_tokens_seen": 1520435200, | |
| "step": 23200, | |
| "train_runtime": 13070.9925, | |
| "train_tokens_per_second": 116321.327 | |
| }, | |
| { | |
| "epoch": 0.233, | |
| "grad_norm": 0.3479808568954468, | |
| "learning_rate": 0.0002639866689961565, | |
| "loss": 0.6212, | |
| "num_input_tokens_seen": 1526988800, | |
| "step": 23300, | |
| "train_runtime": 13127.7973, | |
| "train_tokens_per_second": 116317.213 | |
| }, | |
| { | |
| "epoch": 0.234, | |
| "grad_norm": 0.2807121276855469, | |
| "learning_rate": 0.00026367668347248083, | |
| "loss": 0.6233, | |
| "num_input_tokens_seen": 1533542400, | |
| "step": 23400, | |
| "train_runtime": 13181.874, | |
| "train_tokens_per_second": 116337.206 | |
| }, | |
| { | |
| "epoch": 0.235, | |
| "grad_norm": 0.22995448112487793, | |
| "learning_rate": 0.0002633655532258646, | |
| "loss": 0.6322, | |
| "num_input_tokens_seen": 1540096000, | |
| "step": 23500, | |
| "train_runtime": 13243.5947, | |
| "train_tokens_per_second": 116289.877 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 0.2977929413318634, | |
| "learning_rate": 0.000263053281389386, | |
| "loss": 0.6255, | |
| "num_input_tokens_seen": 1546649600, | |
| "step": 23600, | |
| "train_runtime": 13297.1642, | |
| "train_tokens_per_second": 116314.244 | |
| }, | |
| { | |
| "epoch": 0.237, | |
| "grad_norm": 0.2991272211074829, | |
| "learning_rate": 0.0002627398711076189, | |
| "loss": 0.6178, | |
| "num_input_tokens_seen": 1553203200, | |
| "step": 23700, | |
| "train_runtime": 13352.7857, | |
| "train_tokens_per_second": 116320.537 | |
| }, | |
| { | |
| "epoch": 0.238, | |
| "grad_norm": 0.26155802607536316, | |
| "learning_rate": 0.0002624253255366014, | |
| "loss": 0.6173, | |
| "num_input_tokens_seen": 1559756800, | |
| "step": 23800, | |
| "train_runtime": 13406.4815, | |
| "train_tokens_per_second": 116343.486 | |
| }, | |
| { | |
| "epoch": 0.239, | |
| "grad_norm": 0.3786696493625641, | |
| "learning_rate": 0.0002621096478438039, | |
| "loss": 0.6275, | |
| "num_input_tokens_seen": 1566310400, | |
| "step": 23900, | |
| "train_runtime": 13461.9758, | |
| "train_tokens_per_second": 116350.707 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.347703754901886, | |
| "learning_rate": 0.00026179284120809727, | |
| "loss": 0.6317, | |
| "num_input_tokens_seen": 1572864000, | |
| "step": 24000, | |
| "train_runtime": 13518.3676, | |
| "train_tokens_per_second": 116350.143 | |
| }, | |
| { | |
| "epoch": 0.241, | |
| "grad_norm": 0.2794933021068573, | |
| "learning_rate": 0.0002614749088197208, | |
| "loss": 0.6213, | |
| "num_input_tokens_seen": 1579417600, | |
| "step": 24100, | |
| "train_runtime": 13573.1619, | |
| "train_tokens_per_second": 116363.277 | |
| }, | |
| { | |
| "epoch": 0.242, | |
| "grad_norm": 0.4467460811138153, | |
| "learning_rate": 0.00026115585388025015, | |
| "loss": 0.618, | |
| "num_input_tokens_seen": 1585971200, | |
| "step": 24200, | |
| "train_runtime": 13633.9103, | |
| "train_tokens_per_second": 116325.483 | |
| }, | |
| { | |
| "epoch": 0.243, | |
| "grad_norm": 0.3274633288383484, | |
| "learning_rate": 0.00026083567960256493, | |
| "loss": 0.6198, | |
| "num_input_tokens_seen": 1592524800, | |
| "step": 24300, | |
| "train_runtime": 13690.8223, | |
| "train_tokens_per_second": 116320.61 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.26471319794654846, | |
| "learning_rate": 0.00026051438921081667, | |
| "loss": 0.621, | |
| "num_input_tokens_seen": 1599078400, | |
| "step": 24400, | |
| "train_runtime": 13746.056, | |
| "train_tokens_per_second": 116329.979 | |
| }, | |
| { | |
| "epoch": 0.245, | |
| "grad_norm": 0.29486411809921265, | |
| "learning_rate": 0.00026019198594039595, | |
| "loss": 0.6275, | |
| "num_input_tokens_seen": 1605632000, | |
| "step": 24500, | |
| "train_runtime": 13802.5927, | |
| "train_tokens_per_second": 116328.29 | |
| }, | |
| { | |
| "epoch": 0.246, | |
| "grad_norm": 0.31198298931121826, | |
| "learning_rate": 0.00025986847303790026, | |
| "loss": 0.6232, | |
| "num_input_tokens_seen": 1612185600, | |
| "step": 24600, | |
| "train_runtime": 13856.7259, | |
| "train_tokens_per_second": 116346.792 | |
| }, | |
| { | |
| "epoch": 0.247, | |
| "grad_norm": 0.28400614857673645, | |
| "learning_rate": 0.00025954385376110076, | |
| "loss": 0.6213, | |
| "num_input_tokens_seen": 1618739200, | |
| "step": 24700, | |
| "train_runtime": 13911.4019, | |
| "train_tokens_per_second": 116360.609 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.24376364052295685, | |
| "learning_rate": 0.00025921813137891005, | |
| "loss": 0.6279, | |
| "num_input_tokens_seen": 1625292800, | |
| "step": 24800, | |
| "train_runtime": 13968.2353, | |
| "train_tokens_per_second": 116356.345 | |
| }, | |
| { | |
| "epoch": 0.249, | |
| "grad_norm": 0.28743863105773926, | |
| "learning_rate": 0.000258891309171349, | |
| "loss": 0.6238, | |
| "num_input_tokens_seen": 1631846400, | |
| "step": 24900, | |
| "train_runtime": 14022.4846, | |
| "train_tokens_per_second": 116373.557 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.33175599575042725, | |
| "learning_rate": 0.00025856339042951344, | |
| "loss": 0.6159, | |
| "num_input_tokens_seen": 1638400000, | |
| "step": 25000, | |
| "train_runtime": 14076.9493, | |
| "train_tokens_per_second": 116388.855 | |
| }, | |
| { | |
| "epoch": 0.251, | |
| "grad_norm": 0.31394195556640625, | |
| "learning_rate": 0.0002582343784555415, | |
| "loss": 0.6194, | |
| "num_input_tokens_seen": 1644953600, | |
| "step": 25100, | |
| "train_runtime": 14137.6518, | |
| "train_tokens_per_second": 116352.675 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.2717108428478241, | |
| "learning_rate": 0.00025790427656258017, | |
| "loss": 0.63, | |
| "num_input_tokens_seen": 1651507200, | |
| "step": 25200, | |
| "train_runtime": 14191.6974, | |
| "train_tokens_per_second": 116371.365 | |
| }, | |
| { | |
| "epoch": 0.253, | |
| "grad_norm": 0.2907046377658844, | |
| "learning_rate": 0.00025757308807475185, | |
| "loss": 0.6214, | |
| "num_input_tokens_seen": 1658060800, | |
| "step": 25300, | |
| "train_runtime": 14247.6012, | |
| "train_tokens_per_second": 116374.734 | |
| }, | |
| { | |
| "epoch": 0.254, | |
| "grad_norm": 0.25791919231414795, | |
| "learning_rate": 0.00025724081632712086, | |
| "loss": 0.6197, | |
| "num_input_tokens_seen": 1664614400, | |
| "step": 25400, | |
| "train_runtime": 14302.1699, | |
| "train_tokens_per_second": 116388.94 | |
| }, | |
| { | |
| "epoch": 0.255, | |
| "grad_norm": 0.23399506509304047, | |
| "learning_rate": 0.0002569074646656601, | |
| "loss": 0.6145, | |
| "num_input_tokens_seen": 1671168000, | |
| "step": 25500, | |
| "train_runtime": 14362.8828, | |
| "train_tokens_per_second": 116353.243 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.3080284297466278, | |
| "learning_rate": 0.00025657303644721695, | |
| "loss": 0.6288, | |
| "num_input_tokens_seen": 1677721600, | |
| "step": 25600, | |
| "train_runtime": 14418.2746, | |
| "train_tokens_per_second": 116360.775 | |
| }, | |
| { | |
| "epoch": 0.257, | |
| "grad_norm": 0.27764490246772766, | |
| "learning_rate": 0.00025623753503948004, | |
| "loss": 0.6302, | |
| "num_input_tokens_seen": 1684275200, | |
| "step": 25700, | |
| "train_runtime": 14472.1462, | |
| "train_tokens_per_second": 116380.471 | |
| }, | |
| { | |
| "epoch": 0.258, | |
| "grad_norm": 0.3530017137527466, | |
| "learning_rate": 0.00025590096382094475, | |
| "loss": 0.6205, | |
| "num_input_tokens_seen": 1690828800, | |
| "step": 25800, | |
| "train_runtime": 14527.093, | |
| "train_tokens_per_second": 116391.407 | |
| }, | |
| { | |
| "epoch": 0.259, | |
| "grad_norm": 0.28179532289505005, | |
| "learning_rate": 0.00025556332618087945, | |
| "loss": 0.6196, | |
| "num_input_tokens_seen": 1697382400, | |
| "step": 25900, | |
| "train_runtime": 14586.2883, | |
| "train_tokens_per_second": 116368.357 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.27701336145401, | |
| "learning_rate": 0.00025522462551929155, | |
| "loss": 0.6244, | |
| "num_input_tokens_seen": 1703936000, | |
| "step": 26000, | |
| "train_runtime": 14641.6003, | |
| "train_tokens_per_second": 116376.35 | |
| }, | |
| { | |
| "epoch": 0.261, | |
| "grad_norm": 0.2609594464302063, | |
| "learning_rate": 0.00025488486524689283, | |
| "loss": 0.625, | |
| "num_input_tokens_seen": 1710489600, | |
| "step": 26100, | |
| "train_runtime": 14698.0992, | |
| "train_tokens_per_second": 116374.884 | |
| }, | |
| { | |
| "epoch": 0.262, | |
| "grad_norm": 0.2739205062389374, | |
| "learning_rate": 0.00025454404878506555, | |
| "loss": 0.6214, | |
| "num_input_tokens_seen": 1717043200, | |
| "step": 26200, | |
| "train_runtime": 14752.0923, | |
| "train_tokens_per_second": 116393.198 | |
| }, | |
| { | |
| "epoch": 0.263, | |
| "grad_norm": 0.27936309576034546, | |
| "learning_rate": 0.0002542021795658276, | |
| "loss": 0.6203, | |
| "num_input_tokens_seen": 1723596800, | |
| "step": 26300, | |
| "train_runtime": 14806.7928, | |
| "train_tokens_per_second": 116405.816 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.24206426739692688, | |
| "learning_rate": 0.0002538592610317984, | |
| "loss": 0.622, | |
| "num_input_tokens_seen": 1730150400, | |
| "step": 26400, | |
| "train_runtime": 14868.0925, | |
| "train_tokens_per_second": 116366.669 | |
| }, | |
| { | |
| "epoch": 0.265, | |
| "grad_norm": 0.29860714077949524, | |
| "learning_rate": 0.00025351529663616355, | |
| "loss": 0.6244, | |
| "num_input_tokens_seen": 1736704000, | |
| "step": 26500, | |
| "train_runtime": 14923.7941, | |
| "train_tokens_per_second": 116371.479 | |
| }, | |
| { | |
| "epoch": 0.266, | |
| "grad_norm": 0.2928908169269562, | |
| "learning_rate": 0.00025317028984264087, | |
| "loss": 0.6183, | |
| "num_input_tokens_seen": 1743257600, | |
| "step": 26600, | |
| "train_runtime": 14978.8307, | |
| "train_tokens_per_second": 116381.421 | |
| }, | |
| { | |
| "epoch": 0.267, | |
| "grad_norm": 0.32871657609939575, | |
| "learning_rate": 0.0002528242441254448, | |
| "loss": 0.6163, | |
| "num_input_tokens_seen": 1749811200, | |
| "step": 26700, | |
| "train_runtime": 15032.6295, | |
| "train_tokens_per_second": 116400.873 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 0.26268327236175537, | |
| "learning_rate": 0.000252477162969252, | |
| "loss": 0.6211, | |
| "num_input_tokens_seen": 1756364800, | |
| "step": 26800, | |
| "train_runtime": 15087.2644, | |
| "train_tokens_per_second": 116413.735 | |
| }, | |
| { | |
| "epoch": 0.269, | |
| "grad_norm": 0.248652845621109, | |
| "learning_rate": 0.00025212904986916584, | |
| "loss": 0.6231, | |
| "num_input_tokens_seen": 1762918400, | |
| "step": 26900, | |
| "train_runtime": 15142.5638, | |
| "train_tokens_per_second": 116421.395 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.28964775800704956, | |
| "learning_rate": 0.00025177990833068133, | |
| "loss": 0.6233, | |
| "num_input_tokens_seen": 1769472000, | |
| "step": 27000, | |
| "train_runtime": 15204.3907, | |
| "train_tokens_per_second": 116379.014 | |
| }, | |
| { | |
| "epoch": 0.271, | |
| "grad_norm": 0.3479403555393219, | |
| "learning_rate": 0.0002514297418696499, | |
| "loss": 0.6136, | |
| "num_input_tokens_seen": 1776025600, | |
| "step": 27100, | |
| "train_runtime": 15259.9986, | |
| "train_tokens_per_second": 116384.388 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.2727760076522827, | |
| "learning_rate": 0.0002510785540122439, | |
| "loss": 0.6215, | |
| "num_input_tokens_seen": 1782579200, | |
| "step": 27200, | |
| "train_runtime": 15313.7249, | |
| "train_tokens_per_second": 116404.024 | |
| }, | |
| { | |
| "epoch": 0.273, | |
| "grad_norm": 0.2885916531085968, | |
| "learning_rate": 0.0002507263482949212, | |
| "loss": 0.6269, | |
| "num_input_tokens_seen": 1789132800, | |
| "step": 27300, | |
| "train_runtime": 15370.4778, | |
| "train_tokens_per_second": 116400.598 | |
| }, | |
| { | |
| "epoch": 0.274, | |
| "grad_norm": 0.2588540017604828, | |
| "learning_rate": 0.0002503731282643894, | |
| "loss": 0.619, | |
| "num_input_tokens_seen": 1795686400, | |
| "step": 27400, | |
| "train_runtime": 15425.7714, | |
| "train_tokens_per_second": 116408.208 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.29681408405303955, | |
| "learning_rate": 0.0002500188974775704, | |
| "loss": 0.6175, | |
| "num_input_tokens_seen": 1802240000, | |
| "step": 27500, | |
| "train_runtime": 15480.68, | |
| "train_tokens_per_second": 116418.658 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.319444864988327, | |
| "learning_rate": 0.00024966365950156416, | |
| "loss": 0.6155, | |
| "num_input_tokens_seen": 1808793600, | |
| "step": 27600, | |
| "train_runtime": 15541.0108, | |
| "train_tokens_per_second": 116388.414 | |
| }, | |
| { | |
| "epoch": 0.277, | |
| "grad_norm": 0.3779368996620178, | |
| "learning_rate": 0.00024930741791361326, | |
| "loss": 0.6238, | |
| "num_input_tokens_seen": 1815347200, | |
| "step": 27700, | |
| "train_runtime": 15597.4681, | |
| "train_tokens_per_second": 116387.3 | |
| }, | |
| { | |
| "epoch": 0.278, | |
| "grad_norm": 0.4451746940612793, | |
| "learning_rate": 0.0002489501763010664, | |
| "loss": 0.6201, | |
| "num_input_tokens_seen": 1821900800, | |
| "step": 27800, | |
| "train_runtime": 15648.3014, | |
| "train_tokens_per_second": 116428.024 | |
| }, | |
| { | |
| "epoch": 0.279, | |
| "grad_norm": 0.35030296444892883, | |
| "learning_rate": 0.00024859193826134285, | |
| "loss": 0.6173, | |
| "num_input_tokens_seen": 1828454400, | |
| "step": 27900, | |
| "train_runtime": 15708.8593, | |
| "train_tokens_per_second": 116396.383 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.2962912321090698, | |
| "learning_rate": 0.00024823270740189556, | |
| "loss": 0.6154, | |
| "num_input_tokens_seen": 1835008000, | |
| "step": 28000, | |
| "train_runtime": 15764.0565, | |
| "train_tokens_per_second": 116404.556 | |
| }, | |
| { | |
| "epoch": 0.281, | |
| "grad_norm": 0.2939753234386444, | |
| "learning_rate": 0.00024787248734017527, | |
| "loss": 0.6222, | |
| "num_input_tokens_seen": 1841561600, | |
| "step": 28100, | |
| "train_runtime": 15818.596, | |
| "train_tokens_per_second": 116417.513 | |
| }, | |
| { | |
| "epoch": 0.282, | |
| "grad_norm": 0.24470455944538116, | |
| "learning_rate": 0.0002475112817035941, | |
| "loss": 0.6238, | |
| "num_input_tokens_seen": 1848115200, | |
| "step": 28200, | |
| "train_runtime": 15872.9134, | |
| "train_tokens_per_second": 116432.01 | |
| }, | |
| { | |
| "epoch": 0.283, | |
| "grad_norm": 0.31226930022239685, | |
| "learning_rate": 0.0002471490941294887, | |
| "loss": 0.6212, | |
| "num_input_tokens_seen": 1854668800, | |
| "step": 28300, | |
| "train_runtime": 15933.0328, | |
| "train_tokens_per_second": 116404.003 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 0.2585756182670593, | |
| "learning_rate": 0.000246785928265084, | |
| "loss": 0.6191, | |
| "num_input_tokens_seen": 1861222400, | |
| "step": 28400, | |
| "train_runtime": 15987.3279, | |
| "train_tokens_per_second": 116418.604 | |
| }, | |
| { | |
| "epoch": 0.285, | |
| "grad_norm": 0.4430062770843506, | |
| "learning_rate": 0.0002464217877674562, | |
| "loss": 0.6249, | |
| "num_input_tokens_seen": 1867776000, | |
| "step": 28500, | |
| "train_runtime": 16041.837, | |
| "train_tokens_per_second": 116431.553 | |
| }, | |
| { | |
| "epoch": 0.286, | |
| "grad_norm": 0.31980106234550476, | |
| "learning_rate": 0.0002460566763034961, | |
| "loss": 0.6303, | |
| "num_input_tokens_seen": 1874329600, | |
| "step": 28600, | |
| "train_runtime": 16098.6135, | |
| "train_tokens_per_second": 116428.014 | |
| }, | |
| { | |
| "epoch": 0.287, | |
| "grad_norm": 0.26904401183128357, | |
| "learning_rate": 0.00024569059754987196, | |
| "loss": 0.6214, | |
| "num_input_tokens_seen": 1880883200, | |
| "step": 28700, | |
| "train_runtime": 16159.9296, | |
| "train_tokens_per_second": 116391.794 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.2635185420513153, | |
| "learning_rate": 0.00024532355519299296, | |
| "loss": 0.6181, | |
| "num_input_tokens_seen": 1887436800, | |
| "step": 28800, | |
| "train_runtime": 16214.468, | |
| "train_tokens_per_second": 116404.485 | |
| }, | |
| { | |
| "epoch": 0.289, | |
| "grad_norm": 1.0330250263214111, | |
| "learning_rate": 0.0002449555529289714, | |
| "loss": 0.6241, | |
| "num_input_tokens_seen": 1893990400, | |
| "step": 28900, | |
| "train_runtime": 16268.5971, | |
| "train_tokens_per_second": 116420.02 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.3014289438724518, | |
| "learning_rate": 0.0002445865944635861, | |
| "loss": 0.6295, | |
| "num_input_tokens_seen": 1900544000, | |
| "step": 29000, | |
| "train_runtime": 16324.9935, | |
| "train_tokens_per_second": 116419.281 | |
| }, | |
| { | |
| "epoch": 0.291, | |
| "grad_norm": 0.2632603347301483, | |
| "learning_rate": 0.0002442166835122446, | |
| "loss": 0.6188, | |
| "num_input_tokens_seen": 1907097600, | |
| "step": 29100, | |
| "train_runtime": 16378.6859, | |
| "train_tokens_per_second": 116437.766 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 0.22746869921684265, | |
| "learning_rate": 0.00024384582379994614, | |
| "loss": 0.6172, | |
| "num_input_tokens_seen": 1913651200, | |
| "step": 29200, | |
| "train_runtime": 16434.3593, | |
| "train_tokens_per_second": 116442.093 | |
| }, | |
| { | |
| "epoch": 0.293, | |
| "grad_norm": 0.26981279253959656, | |
| "learning_rate": 0.00024347401906124388, | |
| "loss": 0.6161, | |
| "num_input_tokens_seen": 1920204800, | |
| "step": 29300, | |
| "train_runtime": 16494.2752, | |
| "train_tokens_per_second": 116416.44 | |
| }, | |
| { | |
| "epoch": 0.294, | |
| "grad_norm": 0.30709710717201233, | |
| "learning_rate": 0.0002431012730402075, | |
| "loss": 0.6225, | |
| "num_input_tokens_seen": 1926758400, | |
| "step": 29400, | |
| "train_runtime": 16549.5618, | |
| "train_tokens_per_second": 116423.53 | |
| }, | |
| { | |
| "epoch": 0.295, | |
| "grad_norm": 0.27512043714523315, | |
| "learning_rate": 0.00024272758949038517, | |
| "loss": 0.6167, | |
| "num_input_tokens_seen": 1933312000, | |
| "step": 29500, | |
| "train_runtime": 16604.0285, | |
| "train_tokens_per_second": 116436.321 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.33047741651535034, | |
| "learning_rate": 0.00024235297217476616, | |
| "loss": 0.6194, | |
| "num_input_tokens_seen": 1939865600, | |
| "step": 29600, | |
| "train_runtime": 16665.4033, | |
| "train_tokens_per_second": 116400.76 | |
| }, | |
| { | |
| "epoch": 0.297, | |
| "grad_norm": 0.23181872069835663, | |
| "learning_rate": 0.00024197742486574268, | |
| "loss": 0.6235, | |
| "num_input_tokens_seen": 1946419200, | |
| "step": 29700, | |
| "train_runtime": 16719.5543, | |
| "train_tokens_per_second": 116415.735 | |
| }, | |
| { | |
| "epoch": 0.298, | |
| "grad_norm": 0.25960394740104675, | |
| "learning_rate": 0.0002416009513450719, | |
| "loss": 0.6189, | |
| "num_input_tokens_seen": 1952972800, | |
| "step": 29800, | |
| "train_runtime": 16775.5144, | |
| "train_tokens_per_second": 116418.058 | |
| }, | |
| { | |
| "epoch": 0.299, | |
| "grad_norm": 0.3980496823787689, | |
| "learning_rate": 0.00024122355540383806, | |
| "loss": 0.6205, | |
| "num_input_tokens_seen": 1959526400, | |
| "step": 29900, | |
| "train_runtime": 16831.9075, | |
| "train_tokens_per_second": 116417.369 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.274140328168869, | |
| "learning_rate": 0.00024084524084241405, | |
| "loss": 0.6137, | |
| "num_input_tokens_seen": 1966080000, | |
| "step": 30000, | |
| "train_runtime": 16885.9089, | |
| "train_tokens_per_second": 116433.176 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 1966080000, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.755848237056e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |