| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 14860, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002692333580130578, |
| "grad_norm": 0.482421875, |
| "learning_rate": 0.0, |
| "loss": 4.6193, |
| "num_input_tokens_seen": 65536, |
| "step": 1, |
| "train_runtime": 35.1493, |
| "train_tokens_per_second": 1864.502 |
| }, |
| { |
| "epoch": 0.002692333580130578, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.0089686098654709e-06, |
| "loss": 4.9615, |
| "num_input_tokens_seen": 655360, |
| "step": 10, |
| "train_runtime": 82.7179, |
| "train_tokens_per_second": 7922.835 |
| }, |
| { |
| "epoch": 0.005384667160261156, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.1300448430493275e-06, |
| "loss": 4.8258, |
| "num_input_tokens_seen": 1310720, |
| "step": 20, |
| "train_runtime": 166.8773, |
| "train_tokens_per_second": 7854.392 |
| }, |
| { |
| "epoch": 0.008077000740391735, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.251121076233184e-06, |
| "loss": 4.9214, |
| "num_input_tokens_seen": 1966080, |
| "step": 30, |
| "train_runtime": 256.0013, |
| "train_tokens_per_second": 7679.96 |
| }, |
| { |
| "epoch": 0.010769334320522312, |
| "grad_norm": 0.4765625, |
| "learning_rate": 4.372197309417041e-06, |
| "loss": 4.8702, |
| "num_input_tokens_seen": 2621440, |
| "step": 40, |
| "train_runtime": 346.1026, |
| "train_tokens_per_second": 7574.171 |
| }, |
| { |
| "epoch": 0.013461667900652891, |
| "grad_norm": 0.5, |
| "learning_rate": 5.493273542600897e-06, |
| "loss": 4.8648, |
| "num_input_tokens_seen": 3276800, |
| "step": 50, |
| "train_runtime": 436.0808, |
| "train_tokens_per_second": 7514.205 |
| }, |
| { |
| "epoch": 0.01615400148078347, |
| "grad_norm": 0.474609375, |
| "learning_rate": 6.614349775784753e-06, |
| "loss": 4.9171, |
| "num_input_tokens_seen": 3932160, |
| "step": 60, |
| "train_runtime": 526.5782, |
| "train_tokens_per_second": 7467.381 |
| }, |
| { |
| "epoch": 0.018846335060914047, |
| "grad_norm": 0.4921875, |
| "learning_rate": 7.73542600896861e-06, |
| "loss": 4.8927, |
| "num_input_tokens_seen": 4587520, |
| "step": 70, |
| "train_runtime": 616.6177, |
| "train_tokens_per_second": 7439.813 |
| }, |
| { |
| "epoch": 0.021538668641044624, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.856502242152467e-06, |
| "loss": 4.9268, |
| "num_input_tokens_seen": 5242880, |
| "step": 80, |
| "train_runtime": 707.2158, |
| "train_tokens_per_second": 7413.409 |
| }, |
| { |
| "epoch": 0.024231002221175205, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.977578475336324e-06, |
| "loss": 4.9262, |
| "num_input_tokens_seen": 5898240, |
| "step": 90, |
| "train_runtime": 797.3568, |
| "train_tokens_per_second": 7397.241 |
| }, |
| { |
| "epoch": 0.026923335801305782, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.109865470852018e-05, |
| "loss": 4.8498, |
| "num_input_tokens_seen": 6553600, |
| "step": 100, |
| "train_runtime": 887.9768, |
| "train_tokens_per_second": 7380.373 |
| }, |
| { |
| "epoch": 0.02961566938143636, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.2219730941704037e-05, |
| "loss": 4.8954, |
| "num_input_tokens_seen": 7208960, |
| "step": 110, |
| "train_runtime": 978.7816, |
| "train_tokens_per_second": 7365.239 |
| }, |
| { |
| "epoch": 0.03230800296156694, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.3340807174887892e-05, |
| "loss": 4.8873, |
| "num_input_tokens_seen": 7864320, |
| "step": 120, |
| "train_runtime": 1070.0192, |
| "train_tokens_per_second": 7349.7 |
| }, |
| { |
| "epoch": 0.03500033654169751, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.4461883408071749e-05, |
| "loss": 4.9083, |
| "num_input_tokens_seen": 8519680, |
| "step": 130, |
| "train_runtime": 1160.919, |
| "train_tokens_per_second": 7338.738 |
| }, |
| { |
| "epoch": 0.037692670121828094, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.5582959641255608e-05, |
| "loss": 4.8152, |
| "num_input_tokens_seen": 9175040, |
| "step": 140, |
| "train_runtime": 1251.55, |
| "train_tokens_per_second": 7330.942 |
| }, |
| { |
| "epoch": 0.040385003701958674, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.6704035874439464e-05, |
| "loss": 4.8949, |
| "num_input_tokens_seen": 9830400, |
| "step": 150, |
| "train_runtime": 1342.147, |
| "train_tokens_per_second": 7324.384 |
| }, |
| { |
| "epoch": 0.04307733728208925, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.7825112107623318e-05, |
| "loss": 4.7979, |
| "num_input_tokens_seen": 10485760, |
| "step": 160, |
| "train_runtime": 1432.7472, |
| "train_tokens_per_second": 7318.639 |
| }, |
| { |
| "epoch": 0.04576967086221983, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.8946188340807175e-05, |
| "loss": 4.9055, |
| "num_input_tokens_seen": 11141120, |
| "step": 170, |
| "train_runtime": 1523.0122, |
| "train_tokens_per_second": 7315.187 |
| }, |
| { |
| "epoch": 0.04846200444235041, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.006726457399103e-05, |
| "loss": 4.79, |
| "num_input_tokens_seen": 11796480, |
| "step": 180, |
| "train_runtime": 1614.1075, |
| "train_tokens_per_second": 7308.361 |
| }, |
| { |
| "epoch": 0.05115433802248098, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.1188340807174888e-05, |
| "loss": 4.8685, |
| "num_input_tokens_seen": 12451840, |
| "step": 190, |
| "train_runtime": 1704.6942, |
| "train_tokens_per_second": 7304.442 |
| }, |
| { |
| "epoch": 0.053846671602611564, |
| "grad_norm": 0.55078125, |
| "learning_rate": 2.2309417040358745e-05, |
| "loss": 4.7882, |
| "num_input_tokens_seen": 13107200, |
| "step": 200, |
| "train_runtime": 1795.85, |
| "train_tokens_per_second": 7298.605 |
| }, |
| { |
| "epoch": 0.056539005182742144, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.3430493273542602e-05, |
| "loss": 4.918, |
| "num_input_tokens_seen": 13762560, |
| "step": 210, |
| "train_runtime": 1886.2187, |
| "train_tokens_per_second": 7296.376 |
| }, |
| { |
| "epoch": 0.05923133876287272, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.455156950672646e-05, |
| "loss": 4.8228, |
| "num_input_tokens_seen": 14417920, |
| "step": 220, |
| "train_runtime": 1977.1485, |
| "train_tokens_per_second": 7292.28 |
| }, |
| { |
| "epoch": 0.0619236723430033, |
| "grad_norm": 0.5, |
| "learning_rate": 2.567264573991032e-05, |
| "loss": 4.8993, |
| "num_input_tokens_seen": 15073280, |
| "step": 230, |
| "train_runtime": 2067.7789, |
| "train_tokens_per_second": 7289.6 |
| }, |
| { |
| "epoch": 0.06461600592313388, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.6793721973094172e-05, |
| "loss": 4.8871, |
| "num_input_tokens_seen": 15728640, |
| "step": 240, |
| "train_runtime": 2158.7764, |
| "train_tokens_per_second": 7285.905 |
| }, |
| { |
| "epoch": 0.06730833950326445, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.7914798206278025e-05, |
| "loss": 4.8695, |
| "num_input_tokens_seen": 16384000, |
| "step": 250, |
| "train_runtime": 2249.0647, |
| "train_tokens_per_second": 7284.806 |
| }, |
| { |
| "epoch": 0.07000067308339503, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.9035874439461886e-05, |
| "loss": 4.8812, |
| "num_input_tokens_seen": 17039360, |
| "step": 260, |
| "train_runtime": 2339.559, |
| "train_tokens_per_second": 7283.15 |
| }, |
| { |
| "epoch": 0.07269300666352561, |
| "grad_norm": 0.52734375, |
| "learning_rate": 3.015695067264574e-05, |
| "loss": 4.9003, |
| "num_input_tokens_seen": 17694720, |
| "step": 270, |
| "train_runtime": 2429.9591, |
| "train_tokens_per_second": 7281.9 |
| }, |
| { |
| "epoch": 0.07538534024365619, |
| "grad_norm": 0.5859375, |
| "learning_rate": 3.12780269058296e-05, |
| "loss": 4.8836, |
| "num_input_tokens_seen": 18350080, |
| "step": 280, |
| "train_runtime": 2520.4305, |
| "train_tokens_per_second": 7280.534 |
| }, |
| { |
| "epoch": 0.07807767382378676, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.2399103139013456e-05, |
| "loss": 4.87, |
| "num_input_tokens_seen": 19005440, |
| "step": 290, |
| "train_runtime": 2610.6174, |
| "train_tokens_per_second": 7280.056 |
| }, |
| { |
| "epoch": 0.08077000740391735, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.3520179372197316e-05, |
| "loss": 4.9218, |
| "num_input_tokens_seen": 19660800, |
| "step": 300, |
| "train_runtime": 2701.2793, |
| "train_tokens_per_second": 7278.329 |
| }, |
| { |
| "epoch": 0.08346234098404792, |
| "grad_norm": 0.52734375, |
| "learning_rate": 3.464125560538117e-05, |
| "loss": 4.8102, |
| "num_input_tokens_seen": 20316160, |
| "step": 310, |
| "train_runtime": 2791.906, |
| "train_tokens_per_second": 7276.807 |
| }, |
| { |
| "epoch": 0.0861546745641785, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.576233183856502e-05, |
| "loss": 4.9781, |
| "num_input_tokens_seen": 20971520, |
| "step": 320, |
| "train_runtime": 2882.398, |
| "train_tokens_per_second": 7275.72 |
| }, |
| { |
| "epoch": 0.08884700814430908, |
| "grad_norm": 0.490234375, |
| "learning_rate": 3.688340807174888e-05, |
| "loss": 4.8558, |
| "num_input_tokens_seen": 21626880, |
| "step": 330, |
| "train_runtime": 2973.0085, |
| "train_tokens_per_second": 7274.409 |
| }, |
| { |
| "epoch": 0.09153934172443966, |
| "grad_norm": 0.546875, |
| "learning_rate": 3.8004484304932737e-05, |
| "loss": 4.9521, |
| "num_input_tokens_seen": 22282240, |
| "step": 340, |
| "train_runtime": 3063.4637, |
| "train_tokens_per_second": 7273.545 |
| }, |
| { |
| "epoch": 0.09423167530457023, |
| "grad_norm": 0.55859375, |
| "learning_rate": 3.91255605381166e-05, |
| "loss": 4.8852, |
| "num_input_tokens_seen": 22937600, |
| "step": 350, |
| "train_runtime": 3153.35, |
| "train_tokens_per_second": 7274.042 |
| }, |
| { |
| "epoch": 0.09692400888470082, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.024663677130045e-05, |
| "loss": 4.7577, |
| "num_input_tokens_seen": 23592960, |
| "step": 360, |
| "train_runtime": 3244.206, |
| "train_tokens_per_second": 7272.337 |
| }, |
| { |
| "epoch": 0.09961634246483139, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.1367713004484303e-05, |
| "loss": 4.8759, |
| "num_input_tokens_seen": 24248320, |
| "step": 370, |
| "train_runtime": 3334.7646, |
| "train_tokens_per_second": 7271.374 |
| }, |
| { |
| "epoch": 0.10230867604496197, |
| "grad_norm": 0.5546875, |
| "learning_rate": 4.2488789237668164e-05, |
| "loss": 4.811, |
| "num_input_tokens_seen": 24903680, |
| "step": 380, |
| "train_runtime": 3425.76, |
| "train_tokens_per_second": 7269.534 |
| }, |
| { |
| "epoch": 0.10500100962509255, |
| "grad_norm": 0.5703125, |
| "learning_rate": 4.360986547085202e-05, |
| "loss": 4.7531, |
| "num_input_tokens_seen": 25559040, |
| "step": 390, |
| "train_runtime": 3516.2291, |
| "train_tokens_per_second": 7268.878 |
| }, |
| { |
| "epoch": 0.10769334320522313, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.473094170403588e-05, |
| "loss": 4.717, |
| "num_input_tokens_seen": 26214400, |
| "step": 400, |
| "train_runtime": 3606.4366, |
| "train_tokens_per_second": 7268.782 |
| }, |
| { |
| "epoch": 0.1103856767853537, |
| "grad_norm": 0.51953125, |
| "learning_rate": 4.585201793721973e-05, |
| "loss": 4.7769, |
| "num_input_tokens_seen": 26869760, |
| "step": 410, |
| "train_runtime": 3697.0154, |
| "train_tokens_per_second": 7267.96 |
| }, |
| { |
| "epoch": 0.11307801036548429, |
| "grad_norm": 0.5546875, |
| "learning_rate": 4.697309417040359e-05, |
| "loss": 4.7951, |
| "num_input_tokens_seen": 27525120, |
| "step": 420, |
| "train_runtime": 3787.3387, |
| "train_tokens_per_second": 7267.668 |
| }, |
| { |
| "epoch": 0.11577034394561486, |
| "grad_norm": 0.671875, |
| "learning_rate": 4.8094170403587444e-05, |
| "loss": 4.8145, |
| "num_input_tokens_seen": 28180480, |
| "step": 430, |
| "train_runtime": 3877.3269, |
| "train_tokens_per_second": 7268.018 |
| }, |
| { |
| "epoch": 0.11846267752574544, |
| "grad_norm": 0.6328125, |
| "learning_rate": 4.92152466367713e-05, |
| "loss": 4.7604, |
| "num_input_tokens_seen": 28835840, |
| "step": 440, |
| "train_runtime": 3967.668, |
| "train_tokens_per_second": 7267.705 |
| }, |
| { |
| "epoch": 0.12115501110587602, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.9999994655793676e-05, |
| "loss": 4.8413, |
| "num_input_tokens_seen": 29491200, |
| "step": 450, |
| "train_runtime": 4058.2143, |
| "train_tokens_per_second": 7267.039 |
| }, |
| { |
| "epoch": 0.1238473446860066, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.999989964774474e-05, |
| "loss": 4.7132, |
| "num_input_tokens_seen": 30146560, |
| "step": 460, |
| "train_runtime": 4148.5833, |
| "train_tokens_per_second": 7266.712 |
| }, |
| { |
| "epoch": 0.12653967826613718, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.99996858800747e-05, |
| "loss": 4.7559, |
| "num_input_tokens_seen": 30801920, |
| "step": 470, |
| "train_runtime": 4238.9481, |
| "train_tokens_per_second": 7266.407 |
| }, |
| { |
| "epoch": 0.12923201184626776, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.999935335379901e-05, |
| "loss": 4.7794, |
| "num_input_tokens_seen": 31457280, |
| "step": 480, |
| "train_runtime": 4329.1664, |
| "train_tokens_per_second": 7266.36 |
| }, |
| { |
| "epoch": 0.13192434542639833, |
| "grad_norm": 0.58984375, |
| "learning_rate": 4.9998902070497324e-05, |
| "loss": 4.8323, |
| "num_input_tokens_seen": 32112640, |
| "step": 490, |
| "train_runtime": 4419.7433, |
| "train_tokens_per_second": 7265.725 |
| }, |
| { |
| "epoch": 0.1346166790065289, |
| "grad_norm": 0.58984375, |
| "learning_rate": 4.999833203231341e-05, |
| "loss": 4.6969, |
| "num_input_tokens_seen": 32768000, |
| "step": 500, |
| "train_runtime": 4510.5207, |
| "train_tokens_per_second": 7264.793 |
| }, |
| { |
| "epoch": 0.13730901258665948, |
| "grad_norm": 0.56640625, |
| "learning_rate": 4.9997643241955186e-05, |
| "loss": 4.7646, |
| "num_input_tokens_seen": 33423360, |
| "step": 510, |
| "train_runtime": 4620.4996, |
| "train_tokens_per_second": 7233.711 |
| }, |
| { |
| "epoch": 0.14000134616679005, |
| "grad_norm": 0.640625, |
| "learning_rate": 4.9996835702694675e-05, |
| "loss": 4.8101, |
| "num_input_tokens_seen": 34078720, |
| "step": 520, |
| "train_runtime": 4710.7686, |
| "train_tokens_per_second": 7234.217 |
| }, |
| { |
| "epoch": 0.14269367974692065, |
| "grad_norm": 0.7265625, |
| "learning_rate": 4.999590941836802e-05, |
| "loss": 4.7956, |
| "num_input_tokens_seen": 34734080, |
| "step": 530, |
| "train_runtime": 4801.1604, |
| "train_tokens_per_second": 7234.518 |
| }, |
| { |
| "epoch": 0.14538601332705123, |
| "grad_norm": 0.58203125, |
| "learning_rate": 4.999486439337546e-05, |
| "loss": 4.6643, |
| "num_input_tokens_seen": 35389440, |
| "step": 540, |
| "train_runtime": 4891.786, |
| "train_tokens_per_second": 7234.462 |
| }, |
| { |
| "epoch": 0.1480783469071818, |
| "grad_norm": 0.58203125, |
| "learning_rate": 4.999370063268126e-05, |
| "loss": 4.7488, |
| "num_input_tokens_seen": 36044800, |
| "step": 550, |
| "train_runtime": 4982.5576, |
| "train_tokens_per_second": 7234.196 |
| }, |
| { |
| "epoch": 0.15077068048731238, |
| "grad_norm": 0.625, |
| "learning_rate": 4.999241814181378e-05, |
| "loss": 4.7162, |
| "num_input_tokens_seen": 36700160, |
| "step": 560, |
| "train_runtime": 5073.5063, |
| "train_tokens_per_second": 7233.688 |
| }, |
| { |
| "epoch": 0.15346301406744295, |
| "grad_norm": 0.6328125, |
| "learning_rate": 4.999101692686534e-05, |
| "loss": 4.7735, |
| "num_input_tokens_seen": 37355520, |
| "step": 570, |
| "train_runtime": 5164.0672, |
| "train_tokens_per_second": 7233.74 |
| }, |
| { |
| "epoch": 0.15615534764757352, |
| "grad_norm": 0.63671875, |
| "learning_rate": 4.9989496994492305e-05, |
| "loss": 4.7035, |
| "num_input_tokens_seen": 38010880, |
| "step": 580, |
| "train_runtime": 5254.4846, |
| "train_tokens_per_second": 7233.988 |
| }, |
| { |
| "epoch": 0.15884768122770412, |
| "grad_norm": 0.61328125, |
| "learning_rate": 4.998785835191495e-05, |
| "loss": 4.6791, |
| "num_input_tokens_seen": 38666240, |
| "step": 590, |
| "train_runtime": 5344.8949, |
| "train_tokens_per_second": 7234.238 |
| }, |
| { |
| "epoch": 0.1615400148078347, |
| "grad_norm": 0.6796875, |
| "learning_rate": 4.9986101006917496e-05, |
| "loss": 4.6101, |
| "num_input_tokens_seen": 39321600, |
| "step": 600, |
| "train_runtime": 5435.7009, |
| "train_tokens_per_second": 7233.952 |
| }, |
| { |
| "epoch": 0.16423234838796527, |
| "grad_norm": 0.625, |
| "learning_rate": 4.9984224967848035e-05, |
| "loss": 4.6882, |
| "num_input_tokens_seen": 39976960, |
| "step": 610, |
| "train_runtime": 5525.6374, |
| "train_tokens_per_second": 7234.814 |
| }, |
| { |
| "epoch": 0.16692468196809584, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.998223024361852e-05, |
| "loss": 4.7693, |
| "num_input_tokens_seen": 40632320, |
| "step": 620, |
| "train_runtime": 5616.3935, |
| "train_tokens_per_second": 7234.593 |
| }, |
| { |
| "epoch": 0.16961701554822642, |
| "grad_norm": 0.66015625, |
| "learning_rate": 4.9980116843704694e-05, |
| "loss": 4.6585, |
| "num_input_tokens_seen": 41287680, |
| "step": 630, |
| "train_runtime": 5706.8994, |
| "train_tokens_per_second": 7234.696 |
| }, |
| { |
| "epoch": 0.172309349128357, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.997788477814606e-05, |
| "loss": 4.6682, |
| "num_input_tokens_seen": 41943040, |
| "step": 640, |
| "train_runtime": 5797.3335, |
| "train_tokens_per_second": 7234.885 |
| }, |
| { |
| "epoch": 0.1750016827084876, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.9975534057545815e-05, |
| "loss": 4.6915, |
| "num_input_tokens_seen": 42598400, |
| "step": 650, |
| "train_runtime": 5888.074, |
| "train_tokens_per_second": 7234.692 |
| }, |
| { |
| "epoch": 0.17769401628861817, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.997306469307086e-05, |
| "loss": 4.7539, |
| "num_input_tokens_seen": 43253760, |
| "step": 660, |
| "train_runtime": 5978.6175, |
| "train_tokens_per_second": 7234.743 |
| }, |
| { |
| "epoch": 0.18038634986874874, |
| "grad_norm": 0.6640625, |
| "learning_rate": 4.997047669645165e-05, |
| "loss": 4.5955, |
| "num_input_tokens_seen": 43909120, |
| "step": 670, |
| "train_runtime": 6069.1762, |
| "train_tokens_per_second": 7234.774 |
| }, |
| { |
| "epoch": 0.18307868344887931, |
| "grad_norm": 0.69140625, |
| "learning_rate": 4.9967770079982235e-05, |
| "loss": 4.6347, |
| "num_input_tokens_seen": 44564480, |
| "step": 680, |
| "train_runtime": 6160.0413, |
| "train_tokens_per_second": 7234.445 |
| }, |
| { |
| "epoch": 0.1857710170290099, |
| "grad_norm": 0.78515625, |
| "learning_rate": 4.9964944856520116e-05, |
| "loss": 4.6122, |
| "num_input_tokens_seen": 45219840, |
| "step": 690, |
| "train_runtime": 6250.9481, |
| "train_tokens_per_second": 7234.077 |
| }, |
| { |
| "epoch": 0.18846335060914046, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.996200103948626e-05, |
| "loss": 4.6617, |
| "num_input_tokens_seen": 45875200, |
| "step": 700, |
| "train_runtime": 6341.6991, |
| "train_tokens_per_second": 7233.897 |
| }, |
| { |
| "epoch": 0.19115568418927106, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.995893864286498e-05, |
| "loss": 4.7138, |
| "num_input_tokens_seen": 46530560, |
| "step": 710, |
| "train_runtime": 6431.9076, |
| "train_tokens_per_second": 7234.333 |
| }, |
| { |
| "epoch": 0.19384801776940164, |
| "grad_norm": 0.703125, |
| "learning_rate": 4.9955757681203896e-05, |
| "loss": 4.5922, |
| "num_input_tokens_seen": 47185920, |
| "step": 720, |
| "train_runtime": 6522.506, |
| "train_tokens_per_second": 7234.324 |
| }, |
| { |
| "epoch": 0.1965403513495322, |
| "grad_norm": 0.7578125, |
| "learning_rate": 4.995245816961387e-05, |
| "loss": 4.7699, |
| "num_input_tokens_seen": 47841280, |
| "step": 730, |
| "train_runtime": 6613.3112, |
| "train_tokens_per_second": 7234.089 |
| }, |
| { |
| "epoch": 0.19923268492966278, |
| "grad_norm": 0.6484375, |
| "learning_rate": 4.9949040123768896e-05, |
| "loss": 4.7121, |
| "num_input_tokens_seen": 48496640, |
| "step": 740, |
| "train_runtime": 6703.96, |
| "train_tokens_per_second": 7234.029 |
| }, |
| { |
| "epoch": 0.20192501850979336, |
| "grad_norm": 0.63671875, |
| "learning_rate": 4.994550355990609e-05, |
| "loss": 4.7765, |
| "num_input_tokens_seen": 49152000, |
| "step": 750, |
| "train_runtime": 6794.2239, |
| "train_tokens_per_second": 7234.38 |
| }, |
| { |
| "epoch": 0.20461735208992393, |
| "grad_norm": 0.6171875, |
| "learning_rate": 4.994184849482556e-05, |
| "loss": 4.6858, |
| "num_input_tokens_seen": 49807360, |
| "step": 760, |
| "train_runtime": 6885.0159, |
| "train_tokens_per_second": 7234.168 |
| }, |
| { |
| "epoch": 0.20730968567005453, |
| "grad_norm": 0.6796875, |
| "learning_rate": 4.993807494589032e-05, |
| "loss": 4.6799, |
| "num_input_tokens_seen": 50462720, |
| "step": 770, |
| "train_runtime": 6975.1885, |
| "train_tokens_per_second": 7234.603 |
| }, |
| { |
| "epoch": 0.2100020192501851, |
| "grad_norm": 0.69140625, |
| "learning_rate": 4.9934182931026284e-05, |
| "loss": 4.6223, |
| "num_input_tokens_seen": 51118080, |
| "step": 780, |
| "train_runtime": 7066.0675, |
| "train_tokens_per_second": 7234.304 |
| }, |
| { |
| "epoch": 0.21269435283031568, |
| "grad_norm": 0.66015625, |
| "learning_rate": 4.993017246872207e-05, |
| "loss": 4.7387, |
| "num_input_tokens_seen": 51773440, |
| "step": 790, |
| "train_runtime": 7156.7484, |
| "train_tokens_per_second": 7234.213 |
| }, |
| { |
| "epoch": 0.21538668641044625, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.9926043578029e-05, |
| "loss": 4.6983, |
| "num_input_tokens_seen": 52428800, |
| "step": 800, |
| "train_runtime": 7247.421, |
| "train_tokens_per_second": 7234.132 |
| }, |
| { |
| "epoch": 0.21807901999057683, |
| "grad_norm": 0.65625, |
| "learning_rate": 4.992179627856097e-05, |
| "loss": 4.6995, |
| "num_input_tokens_seen": 53084160, |
| "step": 810, |
| "train_runtime": 7337.4649, |
| "train_tokens_per_second": 7234.673 |
| }, |
| { |
| "epoch": 0.2207713535707074, |
| "grad_norm": 0.625, |
| "learning_rate": 4.9917430590494375e-05, |
| "loss": 4.7396, |
| "num_input_tokens_seen": 53739520, |
| "step": 820, |
| "train_runtime": 7427.9552, |
| "train_tokens_per_second": 7234.766 |
| }, |
| { |
| "epoch": 0.22346368715083798, |
| "grad_norm": 0.63671875, |
| "learning_rate": 4.991294653456799e-05, |
| "loss": 4.7064, |
| "num_input_tokens_seen": 54394880, |
| "step": 830, |
| "train_runtime": 7518.6869, |
| "train_tokens_per_second": 7234.625 |
| }, |
| { |
| "epoch": 0.22615602073096858, |
| "grad_norm": 0.62109375, |
| "learning_rate": 4.9908344132082894e-05, |
| "loss": 4.7164, |
| "num_input_tokens_seen": 55050240, |
| "step": 840, |
| "train_runtime": 7609.0857, |
| "train_tokens_per_second": 7234.804 |
| }, |
| { |
| "epoch": 0.22884835431109915, |
| "grad_norm": 0.76953125, |
| "learning_rate": 4.9903623404902366e-05, |
| "loss": 4.6648, |
| "num_input_tokens_seen": 55705600, |
| "step": 850, |
| "train_runtime": 7699.3938, |
| "train_tokens_per_second": 7235.063 |
| }, |
| { |
| "epoch": 0.23154068789122972, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.989878437545175e-05, |
| "loss": 4.7801, |
| "num_input_tokens_seen": 56360960, |
| "step": 860, |
| "train_runtime": 7789.7806, |
| "train_tokens_per_second": 7235.244 |
| }, |
| { |
| "epoch": 0.2342330214713603, |
| "grad_norm": 0.7109375, |
| "learning_rate": 4.98938270667184e-05, |
| "loss": 4.697, |
| "num_input_tokens_seen": 57016320, |
| "step": 870, |
| "train_runtime": 7880.6012, |
| "train_tokens_per_second": 7235.022 |
| }, |
| { |
| "epoch": 0.23692535505149087, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.988875150225154e-05, |
| "loss": 4.6899, |
| "num_input_tokens_seen": 57671680, |
| "step": 880, |
| "train_runtime": 7970.6998, |
| "train_tokens_per_second": 7235.46 |
| }, |
| { |
| "epoch": 0.23961768863162144, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.9883557706162146e-05, |
| "loss": 4.7879, |
| "num_input_tokens_seen": 58327040, |
| "step": 890, |
| "train_runtime": 8061.5195, |
| "train_tokens_per_second": 7235.241 |
| }, |
| { |
| "epoch": 0.24231002221175205, |
| "grad_norm": 0.66015625, |
| "learning_rate": 4.987824570312285e-05, |
| "loss": 4.7247, |
| "num_input_tokens_seen": 58982400, |
| "step": 900, |
| "train_runtime": 8152.3787, |
| "train_tokens_per_second": 7234.993 |
| }, |
| { |
| "epoch": 0.24500235579188262, |
| "grad_norm": 0.71484375, |
| "learning_rate": 4.98728155183678e-05, |
| "loss": 4.7704, |
| "num_input_tokens_seen": 59637760, |
| "step": 910, |
| "train_runtime": 8242.3679, |
| "train_tokens_per_second": 7235.513 |
| }, |
| { |
| "epoch": 0.2476946893720132, |
| "grad_norm": 0.7578125, |
| "learning_rate": 4.986726717769259e-05, |
| "loss": 4.7249, |
| "num_input_tokens_seen": 60293120, |
| "step": 920, |
| "train_runtime": 8333.1851, |
| "train_tokens_per_second": 7235.303 |
| }, |
| { |
| "epoch": 0.25038702295214377, |
| "grad_norm": 0.7578125, |
| "learning_rate": 4.986160070745405e-05, |
| "loss": 4.6593, |
| "num_input_tokens_seen": 60948480, |
| "step": 930, |
| "train_runtime": 8423.8926, |
| "train_tokens_per_second": 7235.192 |
| }, |
| { |
| "epoch": 0.25307935653227437, |
| "grad_norm": 0.6796875, |
| "learning_rate": 4.9855816134570233e-05, |
| "loss": 4.7105, |
| "num_input_tokens_seen": 61603840, |
| "step": 940, |
| "train_runtime": 8514.3643, |
| "train_tokens_per_second": 7235.284 |
| }, |
| { |
| "epoch": 0.2557716901124049, |
| "grad_norm": 0.6640625, |
| "learning_rate": 4.9849913486520174e-05, |
| "loss": 4.7001, |
| "num_input_tokens_seen": 62259200, |
| "step": 950, |
| "train_runtime": 8605.0711, |
| "train_tokens_per_second": 7235.176 |
| }, |
| { |
| "epoch": 0.2584640236925355, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.9843892791343835e-05, |
| "loss": 4.6863, |
| "num_input_tokens_seen": 62914560, |
| "step": 960, |
| "train_runtime": 8695.5454, |
| "train_tokens_per_second": 7235.263 |
| }, |
| { |
| "epoch": 0.26115635727266606, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.983775407764197e-05, |
| "loss": 4.6958, |
| "num_input_tokens_seen": 63569920, |
| "step": 970, |
| "train_runtime": 8786.3319, |
| "train_tokens_per_second": 7235.092 |
| }, |
| { |
| "epoch": 0.26384869085279666, |
| "grad_norm": 0.7265625, |
| "learning_rate": 4.983149737457593e-05, |
| "loss": 4.6252, |
| "num_input_tokens_seen": 64225280, |
| "step": 980, |
| "train_runtime": 8876.7708, |
| "train_tokens_per_second": 7235.208 |
| }, |
| { |
| "epoch": 0.26654102443292726, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.982512271186759e-05, |
| "loss": 4.6473, |
| "num_input_tokens_seen": 64880640, |
| "step": 990, |
| "train_runtime": 8966.8048, |
| "train_tokens_per_second": 7235.648 |
| }, |
| { |
| "epoch": 0.2692333580130578, |
| "grad_norm": 0.65625, |
| "learning_rate": 4.981863011979917e-05, |
| "loss": 4.6721, |
| "num_input_tokens_seen": 65536000, |
| "step": 1000, |
| "train_runtime": 9058.0411, |
| "train_tokens_per_second": 7235.118 |
| }, |
| { |
| "epoch": 0.2719256915931884, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.98120196292131e-05, |
| "loss": 4.6244, |
| "num_input_tokens_seen": 66191360, |
| "step": 1010, |
| "train_runtime": 9165.1685, |
| "train_tokens_per_second": 7222.056 |
| }, |
| { |
| "epoch": 0.27461802517331896, |
| "grad_norm": 0.7734375, |
| "learning_rate": 4.98052912715119e-05, |
| "loss": 4.7093, |
| "num_input_tokens_seen": 66846720, |
| "step": 1020, |
| "train_runtime": 9256.0483, |
| "train_tokens_per_second": 7221.95 |
| }, |
| { |
| "epoch": 0.27731035875344956, |
| "grad_norm": 0.8046875, |
| "learning_rate": 4.9798445078657964e-05, |
| "loss": 4.574, |
| "num_input_tokens_seen": 67502080, |
| "step": 1030, |
| "train_runtime": 9346.5081, |
| "train_tokens_per_second": 7222.171 |
| }, |
| { |
| "epoch": 0.2800026923335801, |
| "grad_norm": 0.6640625, |
| "learning_rate": 4.979148108317348e-05, |
| "loss": 4.6194, |
| "num_input_tokens_seen": 68157440, |
| "step": 1040, |
| "train_runtime": 9437.3179, |
| "train_tokens_per_second": 7222.12 |
| }, |
| { |
| "epoch": 0.2826950259137107, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.978439931814024e-05, |
| "loss": 4.5994, |
| "num_input_tokens_seen": 68812800, |
| "step": 1050, |
| "train_runtime": 9528.5122, |
| "train_tokens_per_second": 7221.778 |
| }, |
| { |
| "epoch": 0.2853873594938413, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.977719981719949e-05, |
| "loss": 4.719, |
| "num_input_tokens_seen": 69468160, |
| "step": 1060, |
| "train_runtime": 9619.4388, |
| "train_tokens_per_second": 7221.644 |
| }, |
| { |
| "epoch": 0.28807969307397185, |
| "grad_norm": 0.70703125, |
| "learning_rate": 4.9769882614551775e-05, |
| "loss": 4.6091, |
| "num_input_tokens_seen": 70123520, |
| "step": 1070, |
| "train_runtime": 9710.4832, |
| "train_tokens_per_second": 7221.424 |
| }, |
| { |
| "epoch": 0.29077202665410246, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.9762447744956754e-05, |
| "loss": 4.6141, |
| "num_input_tokens_seen": 70778880, |
| "step": 1080, |
| "train_runtime": 9801.2058, |
| "train_tokens_per_second": 7221.446 |
| }, |
| { |
| "epoch": 0.293464360234233, |
| "grad_norm": 0.671875, |
| "learning_rate": 4.975489524373306e-05, |
| "loss": 4.7019, |
| "num_input_tokens_seen": 71434240, |
| "step": 1090, |
| "train_runtime": 9891.44, |
| "train_tokens_per_second": 7221.824 |
| }, |
| { |
| "epoch": 0.2961566938143636, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.9747225146758127e-05, |
| "loss": 4.7062, |
| "num_input_tokens_seen": 72089600, |
| "step": 1100, |
| "train_runtime": 9981.7977, |
| "train_tokens_per_second": 7222.106 |
| }, |
| { |
| "epoch": 0.2988490273944942, |
| "grad_norm": 0.7890625, |
| "learning_rate": 4.973943749046801e-05, |
| "loss": 4.6744, |
| "num_input_tokens_seen": 72744960, |
| "step": 1110, |
| "train_runtime": 10072.6114, |
| "train_tokens_per_second": 7222.056 |
| }, |
| { |
| "epoch": 0.30154136097462475, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.973153231185722e-05, |
| "loss": 4.6924, |
| "num_input_tokens_seen": 73400320, |
| "step": 1120, |
| "train_runtime": 10163.7303, |
| "train_tokens_per_second": 7221.789 |
| }, |
| { |
| "epoch": 0.30423369455475535, |
| "grad_norm": 0.71875, |
| "learning_rate": 4.972350964847856e-05, |
| "loss": 4.6235, |
| "num_input_tokens_seen": 74055680, |
| "step": 1130, |
| "train_runtime": 10254.3897, |
| "train_tokens_per_second": 7221.852 |
| }, |
| { |
| "epoch": 0.3069260281348859, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.971536953844292e-05, |
| "loss": 4.6009, |
| "num_input_tokens_seen": 74711040, |
| "step": 1140, |
| "train_runtime": 10344.7658, |
| "train_tokens_per_second": 7222.11 |
| }, |
| { |
| "epoch": 0.3096183617150165, |
| "grad_norm": 0.80859375, |
| "learning_rate": 4.97071120204191e-05, |
| "loss": 4.6823, |
| "num_input_tokens_seen": 75366400, |
| "step": 1150, |
| "train_runtime": 10435.3375, |
| "train_tokens_per_second": 7222.229 |
| }, |
| { |
| "epoch": 0.31231069529514704, |
| "grad_norm": 0.8046875, |
| "learning_rate": 4.969873713363365e-05, |
| "loss": 4.5743, |
| "num_input_tokens_seen": 76021760, |
| "step": 1160, |
| "train_runtime": 10525.9663, |
| "train_tokens_per_second": 7222.307 |
| }, |
| { |
| "epoch": 0.31500302887527765, |
| "grad_norm": 0.765625, |
| "learning_rate": 4.9690244917870666e-05, |
| "loss": 4.5756, |
| "num_input_tokens_seen": 76677120, |
| "step": 1170, |
| "train_runtime": 10616.9618, |
| "train_tokens_per_second": 7222.134 |
| }, |
| { |
| "epoch": 0.31769536245540825, |
| "grad_norm": 0.75390625, |
| "learning_rate": 4.96816354134716e-05, |
| "loss": 4.663, |
| "num_input_tokens_seen": 77332480, |
| "step": 1180, |
| "train_runtime": 10707.8238, |
| "train_tokens_per_second": 7222.054 |
| }, |
| { |
| "epoch": 0.3203876960355388, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.967290866133509e-05, |
| "loss": 4.6054, |
| "num_input_tokens_seen": 77987840, |
| "step": 1190, |
| "train_runtime": 10798.3303, |
| "train_tokens_per_second": 7222.213 |
| }, |
| { |
| "epoch": 0.3230800296156694, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.9664064702916714e-05, |
| "loss": 4.6963, |
| "num_input_tokens_seen": 78643200, |
| "step": 1200, |
| "train_runtime": 10888.809, |
| "train_tokens_per_second": 7222.388 |
| }, |
| { |
| "epoch": 0.32577236319579994, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.965510358022886e-05, |
| "loss": 4.5616, |
| "num_input_tokens_seen": 79298560, |
| "step": 1210, |
| "train_runtime": 10979.4745, |
| "train_tokens_per_second": 7222.437 |
| }, |
| { |
| "epoch": 0.32846469677593054, |
| "grad_norm": 0.7265625, |
| "learning_rate": 4.964602533584046e-05, |
| "loss": 4.58, |
| "num_input_tokens_seen": 79953920, |
| "step": 1220, |
| "train_runtime": 11069.847, |
| "train_tokens_per_second": 7222.676 |
| }, |
| { |
| "epoch": 0.3311570303560611, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.9636830012876874e-05, |
| "loss": 4.5397, |
| "num_input_tokens_seen": 80609280, |
| "step": 1230, |
| "train_runtime": 11160.1095, |
| "train_tokens_per_second": 7222.983 |
| }, |
| { |
| "epoch": 0.3338493639361917, |
| "grad_norm": 0.78125, |
| "learning_rate": 4.9627517655019576e-05, |
| "loss": 4.7348, |
| "num_input_tokens_seen": 81264640, |
| "step": 1240, |
| "train_runtime": 11250.738, |
| "train_tokens_per_second": 7223.05 |
| }, |
| { |
| "epoch": 0.3365416975163223, |
| "grad_norm": 0.765625, |
| "learning_rate": 4.9618088306506033e-05, |
| "loss": 4.6434, |
| "num_input_tokens_seen": 81920000, |
| "step": 1250, |
| "train_runtime": 11341.6932, |
| "train_tokens_per_second": 7222.907 |
| }, |
| { |
| "epoch": 0.33923403109645284, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.9608542012129464e-05, |
| "loss": 4.5681, |
| "num_input_tokens_seen": 82575360, |
| "step": 1260, |
| "train_runtime": 11432.4636, |
| "train_tokens_per_second": 7222.884 |
| }, |
| { |
| "epoch": 0.34192636467658344, |
| "grad_norm": 0.79296875, |
| "learning_rate": 4.9598878817238614e-05, |
| "loss": 4.5804, |
| "num_input_tokens_seen": 83230720, |
| "step": 1270, |
| "train_runtime": 11523.2458, |
| "train_tokens_per_second": 7222.854 |
| }, |
| { |
| "epoch": 0.344618698256714, |
| "grad_norm": 0.7890625, |
| "learning_rate": 4.9589098767737576e-05, |
| "loss": 4.603, |
| "num_input_tokens_seen": 83886080, |
| "step": 1280, |
| "train_runtime": 11613.741, |
| "train_tokens_per_second": 7223.002 |
| }, |
| { |
| "epoch": 0.3473110318368446, |
| "grad_norm": 0.69140625, |
| "learning_rate": 4.9579201910085515e-05, |
| "loss": 4.6111, |
| "num_input_tokens_seen": 84541440, |
| "step": 1290, |
| "train_runtime": 11704.8318, |
| "train_tokens_per_second": 7222.781 |
| }, |
| { |
| "epoch": 0.3500033654169752, |
| "grad_norm": 0.82421875, |
| "learning_rate": 4.956918829129652e-05, |
| "loss": 4.5381, |
| "num_input_tokens_seen": 85196800, |
| "step": 1300, |
| "train_runtime": 11795.0371, |
| "train_tokens_per_second": 7223.106 |
| }, |
| { |
| "epoch": 0.35269569899710573, |
| "grad_norm": 0.76171875, |
| "learning_rate": 4.955905795893933e-05, |
| "loss": 4.5358, |
| "num_input_tokens_seen": 85852160, |
| "step": 1310, |
| "train_runtime": 11885.3743, |
| "train_tokens_per_second": 7223.345 |
| }, |
| { |
| "epoch": 0.35538803257723633, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.9548810961137084e-05, |
| "loss": 4.534, |
| "num_input_tokens_seen": 86507520, |
| "step": 1320, |
| "train_runtime": 11976.5494, |
| "train_tokens_per_second": 7223.075 |
| }, |
| { |
| "epoch": 0.3580803661573669, |
| "grad_norm": 0.7265625, |
| "learning_rate": 4.953844734656719e-05, |
| "loss": 4.5764, |
| "num_input_tokens_seen": 87162880, |
| "step": 1330, |
| "train_runtime": 12067.2148, |
| "train_tokens_per_second": 7223.115 |
| }, |
| { |
| "epoch": 0.3607726997374975, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.9527967164460995e-05, |
| "loss": 4.5397, |
| "num_input_tokens_seen": 87818240, |
| "step": 1340, |
| "train_runtime": 12157.8843, |
| "train_tokens_per_second": 7223.152 |
| }, |
| { |
| "epoch": 0.36346503331762803, |
| "grad_norm": 0.765625, |
| "learning_rate": 4.9517370464603595e-05, |
| "loss": 4.6979, |
| "num_input_tokens_seen": 88473600, |
| "step": 1350, |
| "train_runtime": 12248.3715, |
| "train_tokens_per_second": 7223.295 |
| }, |
| { |
| "epoch": 0.36615736689775863, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.950665729733359e-05, |
| "loss": 4.6323, |
| "num_input_tokens_seen": 89128960, |
| "step": 1360, |
| "train_runtime": 12338.7122, |
| "train_tokens_per_second": 7223.522 |
| }, |
| { |
| "epoch": 0.36884970047788923, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.949582771354287e-05, |
| "loss": 4.5588, |
| "num_input_tokens_seen": 89784320, |
| "step": 1370, |
| "train_runtime": 12429.1409, |
| "train_tokens_per_second": 7223.695 |
| }, |
| { |
| "epoch": 0.3715420340580198, |
| "grad_norm": 0.75390625, |
| "learning_rate": 4.948488176467631e-05, |
| "loss": 4.5774, |
| "num_input_tokens_seen": 90439680, |
| "step": 1380, |
| "train_runtime": 12519.317, |
| "train_tokens_per_second": 7224.011 |
| }, |
| { |
| "epoch": 0.3742343676381504, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.94738195027316e-05, |
| "loss": 4.519, |
| "num_input_tokens_seen": 91095040, |
| "step": 1390, |
| "train_runtime": 12610.0394, |
| "train_tokens_per_second": 7224.009 |
| }, |
| { |
| "epoch": 0.3769267012182809, |
| "grad_norm": 0.8125, |
| "learning_rate": 4.946264098025895e-05, |
| "loss": 4.565, |
| "num_input_tokens_seen": 91750400, |
| "step": 1400, |
| "train_runtime": 12700.1302, |
| "train_tokens_per_second": 7224.367 |
| }, |
| { |
| "epoch": 0.3796190347984115, |
| "grad_norm": 0.83203125, |
| "learning_rate": 4.945134625036087e-05, |
| "loss": 4.5675, |
| "num_input_tokens_seen": 92405760, |
| "step": 1410, |
| "train_runtime": 12790.5268, |
| "train_tokens_per_second": 7224.547 |
| }, |
| { |
| "epoch": 0.3823113683785421, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.9439935366691855e-05, |
| "loss": 4.6965, |
| "num_input_tokens_seen": 93061120, |
| "step": 1420, |
| "train_runtime": 12880.7495, |
| "train_tokens_per_second": 7224.822 |
| }, |
| { |
| "epoch": 0.3850037019586727, |
| "grad_norm": 0.79296875, |
| "learning_rate": 4.9428408383458244e-05, |
| "loss": 4.5986, |
| "num_input_tokens_seen": 93716480, |
| "step": 1430, |
| "train_runtime": 12971.4303, |
| "train_tokens_per_second": 7224.838 |
| }, |
| { |
| "epoch": 0.3876960355388033, |
| "grad_norm": 0.71875, |
| "learning_rate": 4.941676535541785e-05, |
| "loss": 4.4871, |
| "num_input_tokens_seen": 94371840, |
| "step": 1440, |
| "train_runtime": 13061.9531, |
| "train_tokens_per_second": 7224.941 |
| }, |
| { |
| "epoch": 0.3903883691189338, |
| "grad_norm": 0.82421875, |
| "learning_rate": 4.940500633787976e-05, |
| "loss": 4.5071, |
| "num_input_tokens_seen": 95027200, |
| "step": 1450, |
| "train_runtime": 13152.757, |
| "train_tokens_per_second": 7224.888 |
| }, |
| { |
| "epoch": 0.3930807026990644, |
| "grad_norm": 0.75, |
| "learning_rate": 4.9393131386704054e-05, |
| "loss": 4.4905, |
| "num_input_tokens_seen": 95682560, |
| "step": 1460, |
| "train_runtime": 13243.3978, |
| "train_tokens_per_second": 7224.925 |
| }, |
| { |
| "epoch": 0.39577303627919497, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.938114055830155e-05, |
| "loss": 4.4752, |
| "num_input_tokens_seen": 96337920, |
| "step": 1470, |
| "train_runtime": 13333.6559, |
| "train_tokens_per_second": 7225.169 |
| }, |
| { |
| "epoch": 0.39846536985932557, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.936903390963353e-05, |
| "loss": 4.5764, |
| "num_input_tokens_seen": 96993280, |
| "step": 1480, |
| "train_runtime": 13424.6859, |
| "train_tokens_per_second": 7224.994 |
| }, |
| { |
| "epoch": 0.40115770343945617, |
| "grad_norm": 0.83984375, |
| "learning_rate": 4.935681149821147e-05, |
| "loss": 4.6151, |
| "num_input_tokens_seen": 97648640, |
| "step": 1490, |
| "train_runtime": 13515.4724, |
| "train_tokens_per_second": 7224.952 |
| }, |
| { |
| "epoch": 0.4038500370195867, |
| "grad_norm": 0.7734375, |
| "learning_rate": 4.9344473382096747e-05, |
| "loss": 4.4216, |
| "num_input_tokens_seen": 98304000, |
| "step": 1500, |
| "train_runtime": 13606.7544, |
| "train_tokens_per_second": 7224.647 |
| }, |
| { |
| "epoch": 0.4065423705997173, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.93320196199004e-05, |
| "loss": 4.6077, |
| "num_input_tokens_seen": 98959360, |
| "step": 1510, |
| "train_runtime": 13713.7734, |
| "train_tokens_per_second": 7216.056 |
| }, |
| { |
| "epoch": 0.40923470417984786, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.931945027078283e-05, |
| "loss": 4.4706, |
| "num_input_tokens_seen": 99614720, |
| "step": 1520, |
| "train_runtime": 13803.8041, |
| "train_tokens_per_second": 7216.469 |
| }, |
| { |
| "epoch": 0.41192703775997846, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.9306765394453524e-05, |
| "loss": 4.5019, |
| "num_input_tokens_seen": 100270080, |
| "step": 1530, |
| "train_runtime": 13894.7642, |
| "train_tokens_per_second": 7216.393 |
| }, |
| { |
| "epoch": 0.41461937134010907, |
| "grad_norm": 0.828125, |
| "learning_rate": 4.9293965051170775e-05, |
| "loss": 4.5346, |
| "num_input_tokens_seen": 100925440, |
| "step": 1540, |
| "train_runtime": 13985.7264, |
| "train_tokens_per_second": 7216.317 |
| }, |
| { |
| "epoch": 0.4173117049202396, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.928104930174137e-05, |
| "loss": 4.5612, |
| "num_input_tokens_seen": 101580800, |
| "step": 1550, |
| "train_runtime": 14076.8567, |
| "train_tokens_per_second": 7216.156 |
| }, |
| { |
| "epoch": 0.4200040385003702, |
| "grad_norm": 0.81640625, |
| "learning_rate": 4.926801820752035e-05, |
| "loss": 4.5452, |
| "num_input_tokens_seen": 102236160, |
| "step": 1560, |
| "train_runtime": 14167.6497, |
| "train_tokens_per_second": 7216.169 |
| }, |
| { |
| "epoch": 0.42269637208050076, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.925487183041065e-05, |
| "loss": 4.555, |
| "num_input_tokens_seen": 102891520, |
| "step": 1570, |
| "train_runtime": 14258.1592, |
| "train_tokens_per_second": 7216.326 |
| }, |
| { |
| "epoch": 0.42538870566063136, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.924161023286291e-05, |
| "loss": 4.4827, |
| "num_input_tokens_seen": 103546880, |
| "step": 1580, |
| "train_runtime": 14349.9231, |
| "train_tokens_per_second": 7215.849 |
| }, |
| { |
| "epoch": 0.4280810392407619, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.9228233477875044e-05, |
| "loss": 4.4487, |
| "num_input_tokens_seen": 104202240, |
| "step": 1590, |
| "train_runtime": 14440.8839, |
| "train_tokens_per_second": 7215.78 |
| }, |
| { |
| "epoch": 0.4307733728208925, |
| "grad_norm": 0.765625, |
| "learning_rate": 4.921474162899206e-05, |
| "loss": 4.3774, |
| "num_input_tokens_seen": 104857600, |
| "step": 1600, |
| "train_runtime": 14532.0545, |
| "train_tokens_per_second": 7215.607 |
| }, |
| { |
| "epoch": 0.4334657064010231, |
| "grad_norm": 0.7421875, |
| "learning_rate": 4.920113475030568e-05, |
| "loss": 4.5434, |
| "num_input_tokens_seen": 105512960, |
| "step": 1610, |
| "train_runtime": 14623.2846, |
| "train_tokens_per_second": 7215.408 |
| }, |
| { |
| "epoch": 0.43615803998115366, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.9187412906454066e-05, |
| "loss": 4.4334, |
| "num_input_tokens_seen": 106168320, |
| "step": 1620, |
| "train_runtime": 14714.7556, |
| "train_tokens_per_second": 7215.092 |
| }, |
| { |
| "epoch": 0.43885037356128426, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.917357616262153e-05, |
| "loss": 4.5523, |
| "num_input_tokens_seen": 106823680, |
| "step": 1630, |
| "train_runtime": 14805.4885, |
| "train_tokens_per_second": 7215.14 |
| }, |
| { |
| "epoch": 0.4415427071414148, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.91596245845382e-05, |
| "loss": 4.5495, |
| "num_input_tokens_seen": 107479040, |
| "step": 1640, |
| "train_runtime": 14896.2954, |
| "train_tokens_per_second": 7215.152 |
| }, |
| { |
| "epoch": 0.4442350407215454, |
| "grad_norm": 0.828125, |
| "learning_rate": 4.914555823847969e-05, |
| "loss": 4.496, |
| "num_input_tokens_seen": 108134400, |
| "step": 1650, |
| "train_runtime": 14987.0507, |
| "train_tokens_per_second": 7215.189 |
| }, |
| { |
| "epoch": 0.44692737430167595, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.913137719126684e-05, |
| "loss": 4.4586, |
| "num_input_tokens_seen": 108789760, |
| "step": 1660, |
| "train_runtime": 15078.4493, |
| "train_tokens_per_second": 7214.917 |
| }, |
| { |
| "epoch": 0.44961970788180655, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.911708151026535e-05, |
| "loss": 4.4917, |
| "num_input_tokens_seen": 109445120, |
| "step": 1670, |
| "train_runtime": 15169.1949, |
| "train_tokens_per_second": 7214.959 |
| }, |
| { |
| "epoch": 0.45231204146193715, |
| "grad_norm": 0.80859375, |
| "learning_rate": 4.910267126338547e-05, |
| "loss": 4.444, |
| "num_input_tokens_seen": 110100480, |
| "step": 1680, |
| "train_runtime": 15260.849, |
| "train_tokens_per_second": 7214.571 |
| }, |
| { |
| "epoch": 0.4550043750420677, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.90881465190817e-05, |
| "loss": 4.5261, |
| "num_input_tokens_seen": 110755840, |
| "step": 1690, |
| "train_runtime": 15351.7417, |
| "train_tokens_per_second": 7214.546 |
| }, |
| { |
| "epoch": 0.4576967086221983, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.9073507346352446e-05, |
| "loss": 4.4845, |
| "num_input_tokens_seen": 111411200, |
| "step": 1700, |
| "train_runtime": 15443.3127, |
| "train_tokens_per_second": 7214.203 |
| }, |
| { |
| "epoch": 0.46038904220232885, |
| "grad_norm": 0.83984375, |
| "learning_rate": 4.905875381473968e-05, |
| "loss": 4.4624, |
| "num_input_tokens_seen": 112066560, |
| "step": 1710, |
| "train_runtime": 15534.4543, |
| "train_tokens_per_second": 7214.065 |
| }, |
| { |
| "epoch": 0.46308137578245945, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.904388599432864e-05, |
| "loss": 4.4836, |
| "num_input_tokens_seen": 112721920, |
| "step": 1720, |
| "train_runtime": 15625.6018, |
| "train_tokens_per_second": 7213.925 |
| }, |
| { |
| "epoch": 0.46577370936259005, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.902890395574749e-05, |
| "loss": 4.4618, |
| "num_input_tokens_seen": 113377280, |
| "step": 1730, |
| "train_runtime": 15716.5331, |
| "train_tokens_per_second": 7213.886 |
| }, |
| { |
| "epoch": 0.4684660429427206, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.901380777016695e-05, |
| "loss": 4.3769, |
| "num_input_tokens_seen": 114032640, |
| "step": 1740, |
| "train_runtime": 15807.8819, |
| "train_tokens_per_second": 7213.657 |
| }, |
| { |
| "epoch": 0.4711583765228512, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.899859750930001e-05, |
| "loss": 4.553, |
| "num_input_tokens_seen": 114688000, |
| "step": 1750, |
| "train_runtime": 15899.4164, |
| "train_tokens_per_second": 7213.347 |
| }, |
| { |
| "epoch": 0.47385071010298174, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.898327324540154e-05, |
| "loss": 4.4616, |
| "num_input_tokens_seen": 115343360, |
| "step": 1760, |
| "train_runtime": 15990.6527, |
| "train_tokens_per_second": 7213.174 |
| }, |
| { |
| "epoch": 0.47654304368311234, |
| "grad_norm": 0.83984375, |
| "learning_rate": 4.8967835051267995e-05, |
| "loss": 4.455, |
| "num_input_tokens_seen": 115998720, |
| "step": 1770, |
| "train_runtime": 16082.1424, |
| "train_tokens_per_second": 7212.89 |
| }, |
| { |
| "epoch": 0.4792353772632429, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.895228300023703e-05, |
| "loss": 4.4788, |
| "num_input_tokens_seen": 116654080, |
| "step": 1780, |
| "train_runtime": 16173.2498, |
| "train_tokens_per_second": 7212.779 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.893661716618716e-05, |
| "loss": 4.347, |
| "num_input_tokens_seen": 117309440, |
| "step": 1790, |
| "train_runtime": 16264.5135, |
| "train_tokens_per_second": 7212.601 |
| }, |
| { |
| "epoch": 0.4846200444235041, |
| "grad_norm": 0.8125, |
| "learning_rate": 4.892083762353744e-05, |
| "loss": 4.4545, |
| "num_input_tokens_seen": 117964800, |
| "step": 1800, |
| "train_runtime": 16355.8454, |
| "train_tokens_per_second": 7212.394 |
| }, |
| { |
| "epoch": 0.48731237800363464, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.890494444724706e-05, |
| "loss": 4.4354, |
| "num_input_tokens_seen": 118620160, |
| "step": 1810, |
| "train_runtime": 16447.3794, |
| "train_tokens_per_second": 7212.101 |
| }, |
| { |
| "epoch": 0.49000471158376524, |
| "grad_norm": 0.85546875, |
| "learning_rate": 4.8888937712815034e-05, |
| "loss": 4.4927, |
| "num_input_tokens_seen": 119275520, |
| "step": 1820, |
| "train_runtime": 16538.3871, |
| "train_tokens_per_second": 7212.041 |
| }, |
| { |
| "epoch": 0.4926970451638958, |
| "grad_norm": 0.76171875, |
| "learning_rate": 4.887281749627981e-05, |
| "loss": 4.4122, |
| "num_input_tokens_seen": 119930880, |
| "step": 1830, |
| "train_runtime": 16629.7048, |
| "train_tokens_per_second": 7211.847 |
| }, |
| { |
| "epoch": 0.4953893787440264, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.8856583874218926e-05, |
| "loss": 4.3483, |
| "num_input_tokens_seen": 120586240, |
| "step": 1840, |
| "train_runtime": 16721.1413, |
| "train_tokens_per_second": 7211.603 |
| }, |
| { |
| "epoch": 0.498081712324157, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.884023692374865e-05, |
| "loss": 4.3592, |
| "num_input_tokens_seen": 121241600, |
| "step": 1850, |
| "train_runtime": 16812.5418, |
| "train_tokens_per_second": 7211.378 |
| }, |
| { |
| "epoch": 0.5007740459042875, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.8823776722523596e-05, |
| "loss": 4.4645, |
| "num_input_tokens_seen": 121896960, |
| "step": 1860, |
| "train_runtime": 16903.8606, |
| "train_tokens_per_second": 7211.191 |
| }, |
| { |
| "epoch": 0.5034663794844181, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.880720334873638e-05, |
| "loss": 4.4003, |
| "num_input_tokens_seen": 122552320, |
| "step": 1870, |
| "train_runtime": 16995.2262, |
| "train_tokens_per_second": 7210.985 |
| }, |
| { |
| "epoch": 0.5061587130645487, |
| "grad_norm": 0.9140625, |
| "learning_rate": 4.879051688111719e-05, |
| "loss": 4.474, |
| "num_input_tokens_seen": 123207680, |
| "step": 1880, |
| "train_runtime": 17086.3329, |
| "train_tokens_per_second": 7210.891 |
| }, |
| { |
| "epoch": 0.5088510466446793, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.877371739893352e-05, |
| "loss": 4.4175, |
| "num_input_tokens_seen": 123863040, |
| "step": 1890, |
| "train_runtime": 17177.0386, |
| "train_tokens_per_second": 7210.966 |
| }, |
| { |
| "epoch": 0.5115433802248098, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.875680498198968e-05, |
| "loss": 4.3441, |
| "num_input_tokens_seen": 124518400, |
| "step": 1900, |
| "train_runtime": 17267.9962, |
| "train_tokens_per_second": 7210.935 |
| }, |
| { |
| "epoch": 0.5142357138049405, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.873977971062649e-05, |
| "loss": 4.5013, |
| "num_input_tokens_seen": 125173760, |
| "step": 1910, |
| "train_runtime": 17359.2257, |
| "train_tokens_per_second": 7210.792 |
| }, |
| { |
| "epoch": 0.516928047385071, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.872264166572086e-05, |
| "loss": 4.3947, |
| "num_input_tokens_seen": 125829120, |
| "step": 1920, |
| "train_runtime": 17450.3296, |
| "train_tokens_per_second": 7210.702 |
| }, |
| { |
| "epoch": 0.5196203809652016, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.870539092868542e-05, |
| "loss": 4.5027, |
| "num_input_tokens_seen": 126484480, |
| "step": 1930, |
| "train_runtime": 17541.1892, |
| "train_tokens_per_second": 7210.713 |
| }, |
| { |
| "epoch": 0.5223127145453321, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.868802758146816e-05, |
| "loss": 4.4597, |
| "num_input_tokens_seen": 127139840, |
| "step": 1940, |
| "train_runtime": 17632.9992, |
| "train_tokens_per_second": 7210.335 |
| }, |
| { |
| "epoch": 0.5250050481254628, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.867055170655197e-05, |
| "loss": 4.4397, |
| "num_input_tokens_seen": 127795200, |
| "step": 1950, |
| "train_runtime": 17724.0126, |
| "train_tokens_per_second": 7210.286 |
| }, |
| { |
| "epoch": 0.5276973817055933, |
| "grad_norm": 0.85546875, |
| "learning_rate": 4.865296338695432e-05, |
| "loss": 4.2699, |
| "num_input_tokens_seen": 128450560, |
| "step": 1960, |
| "train_runtime": 17815.026, |
| "train_tokens_per_second": 7210.237 |
| }, |
| { |
| "epoch": 0.5303897152857239, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.863526270622683e-05, |
| "loss": 4.4383, |
| "num_input_tokens_seen": 129105920, |
| "step": 1970, |
| "train_runtime": 17905.7807, |
| "train_tokens_per_second": 7210.293 |
| }, |
| { |
| "epoch": 0.5330820488658545, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.86174497484549e-05, |
| "loss": 4.4799, |
| "num_input_tokens_seen": 129761280, |
| "step": 1980, |
| "train_runtime": 17997.6594, |
| "train_tokens_per_second": 7209.898 |
| }, |
| { |
| "epoch": 0.5357743824459851, |
| "grad_norm": 0.8046875, |
| "learning_rate": 4.859952459825726e-05, |
| "loss": 4.4029, |
| "num_input_tokens_seen": 130416640, |
| "step": 1990, |
| "train_runtime": 18088.8465, |
| "train_tokens_per_second": 7209.782 |
| }, |
| { |
| "epoch": 0.5384667160261156, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.8581487340785614e-05, |
| "loss": 4.3795, |
| "num_input_tokens_seen": 131072000, |
| "step": 2000, |
| "train_runtime": 18180.2553, |
| "train_tokens_per_second": 7209.58 |
| }, |
| { |
| "epoch": 0.5411590496062462, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.856333806172422e-05, |
| "loss": 4.3712, |
| "num_input_tokens_seen": 131727360, |
| "step": 2010, |
| "train_runtime": 18289.7026, |
| "train_tokens_per_second": 7202.269 |
| }, |
| { |
| "epoch": 0.5438513831863768, |
| "grad_norm": 0.7890625, |
| "learning_rate": 4.8545076847289495e-05, |
| "loss": 4.4262, |
| "num_input_tokens_seen": 132382720, |
| "step": 2020, |
| "train_runtime": 18380.7598, |
| "train_tokens_per_second": 7202.244 |
| }, |
| { |
| "epoch": 0.5465437167665074, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.8526703784229566e-05, |
| "loss": 4.4754, |
| "num_input_tokens_seen": 133038080, |
| "step": 2030, |
| "train_runtime": 18471.8385, |
| "train_tokens_per_second": 7202.211 |
| }, |
| { |
| "epoch": 0.5492360503466379, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.8508218959823916e-05, |
| "loss": 4.409, |
| "num_input_tokens_seen": 133693440, |
| "step": 2040, |
| "train_runtime": 18563.4788, |
| "train_tokens_per_second": 7201.96 |
| }, |
| { |
| "epoch": 0.5519283839267686, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.848962246188292e-05, |
| "loss": 4.3891, |
| "num_input_tokens_seen": 134348800, |
| "step": 2050, |
| "train_runtime": 18654.1561, |
| "train_tokens_per_second": 7202.084 |
| }, |
| { |
| "epoch": 0.5546207175068991, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.8470914378747464e-05, |
| "loss": 4.3823, |
| "num_input_tokens_seen": 135004160, |
| "step": 2060, |
| "train_runtime": 18745.2629, |
| "train_tokens_per_second": 7202.041 |
| }, |
| { |
| "epoch": 0.5573130510870297, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.845209479928849e-05, |
| "loss": 4.3553, |
| "num_input_tokens_seen": 135659520, |
| "step": 2070, |
| "train_runtime": 18836.6425, |
| "train_tokens_per_second": 7201.895 |
| }, |
| { |
| "epoch": 0.5600053846671602, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.843316381290661e-05, |
| "loss": 4.3418, |
| "num_input_tokens_seen": 136314880, |
| "step": 2080, |
| "train_runtime": 18927.6258, |
| "train_tokens_per_second": 7201.901 |
| }, |
| { |
| "epoch": 0.5626977182472909, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.8414121509531645e-05, |
| "loss": 4.4231, |
| "num_input_tokens_seen": 136970240, |
| "step": 2090, |
| "train_runtime": 19018.1391, |
| "train_tokens_per_second": 7202.084 |
| }, |
| { |
| "epoch": 0.5653900518274214, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.839496797962224e-05, |
| "loss": 4.4212, |
| "num_input_tokens_seen": 137625600, |
| "step": 2100, |
| "train_runtime": 19109.2304, |
| "train_tokens_per_second": 7202.048 |
| }, |
| { |
| "epoch": 0.568082385407552, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.837570331416539e-05, |
| "loss": 4.4107, |
| "num_input_tokens_seen": 138280960, |
| "step": 2110, |
| "train_runtime": 19200.5208, |
| "train_tokens_per_second": 7201.938 |
| }, |
| { |
| "epoch": 0.5707747189876826, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.835632760467604e-05, |
| "loss": 4.3413, |
| "num_input_tokens_seen": 138936320, |
| "step": 2120, |
| "train_runtime": 19292.2653, |
| "train_tokens_per_second": 7201.659 |
| }, |
| { |
| "epoch": 0.5734670525678132, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.8336840943196636e-05, |
| "loss": 4.3488, |
| "num_input_tokens_seen": 139591680, |
| "step": 2130, |
| "train_runtime": 19382.8953, |
| "train_tokens_per_second": 7201.797 |
| }, |
| { |
| "epoch": 0.5761593861479437, |
| "grad_norm": 0.88671875, |
| "learning_rate": 4.8317243422296695e-05, |
| "loss": 4.3576, |
| "num_input_tokens_seen": 140247040, |
| "step": 2140, |
| "train_runtime": 19474.0961, |
| "train_tokens_per_second": 7201.723 |
| }, |
| { |
| "epoch": 0.5788517197280743, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.8297535135072345e-05, |
| "loss": 4.3835, |
| "num_input_tokens_seen": 140902400, |
| "step": 2150, |
| "train_runtime": 19565.1786, |
| "train_tokens_per_second": 7201.693 |
| }, |
| { |
| "epoch": 0.5815440533082049, |
| "grad_norm": 0.9140625, |
| "learning_rate": 4.8277716175145926e-05, |
| "loss": 4.4169, |
| "num_input_tokens_seen": 141557760, |
| "step": 2160, |
| "train_runtime": 19656.3068, |
| "train_tokens_per_second": 7201.646 |
| }, |
| { |
| "epoch": 0.5842363868883355, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.825778663666549e-05, |
| "loss": 4.3731, |
| "num_input_tokens_seen": 142213120, |
| "step": 2170, |
| "train_runtime": 19747.5839, |
| "train_tokens_per_second": 7201.545 |
| }, |
| { |
| "epoch": 0.586928720468466, |
| "grad_norm": 0.796875, |
| "learning_rate": 4.8237746614304404e-05, |
| "loss": 4.3626, |
| "num_input_tokens_seen": 142868480, |
| "step": 2180, |
| "train_runtime": 19838.3892, |
| "train_tokens_per_second": 7201.617 |
| }, |
| { |
| "epoch": 0.5896210540485967, |
| "grad_norm": 0.83203125, |
| "learning_rate": 4.821759620326086e-05, |
| "loss": 4.4174, |
| "num_input_tokens_seen": 143523840, |
| "step": 2190, |
| "train_runtime": 19929.5944, |
| "train_tokens_per_second": 7201.543 |
| }, |
| { |
| "epoch": 0.5923133876287272, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.819733549925746e-05, |
| "loss": 4.2844, |
| "num_input_tokens_seen": 144179200, |
| "step": 2200, |
| "train_runtime": 20020.9319, |
| "train_tokens_per_second": 7201.423 |
| }, |
| { |
| "epoch": 0.5950057212088578, |
| "grad_norm": 0.875, |
| "learning_rate": 4.817696459854072e-05, |
| "loss": 4.3374, |
| "num_input_tokens_seen": 144834560, |
| "step": 2210, |
| "train_runtime": 20111.5767, |
| "train_tokens_per_second": 7201.552 |
| }, |
| { |
| "epoch": 0.5976980547889884, |
| "grad_norm": 0.875, |
| "learning_rate": 4.815648359788065e-05, |
| "loss": 4.3078, |
| "num_input_tokens_seen": 145489920, |
| "step": 2220, |
| "train_runtime": 20203.0906, |
| "train_tokens_per_second": 7201.369 |
| }, |
| { |
| "epoch": 0.600390388369119, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.8135892594570284e-05, |
| "loss": 4.422, |
| "num_input_tokens_seen": 146145280, |
| "step": 2230, |
| "train_runtime": 20294.0976, |
| "train_tokens_per_second": 7201.369 |
| }, |
| { |
| "epoch": 0.6030827219492495, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.81151916864252e-05, |
| "loss": 4.3721, |
| "num_input_tokens_seen": 146800640, |
| "step": 2240, |
| "train_runtime": 20384.6845, |
| "train_tokens_per_second": 7201.516 |
| }, |
| { |
| "epoch": 0.60577505552938, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.809438097178306e-05, |
| "loss": 4.3855, |
| "num_input_tokens_seen": 147456000, |
| "step": 2250, |
| "train_runtime": 20476.0301, |
| "train_tokens_per_second": 7201.396 |
| }, |
| { |
| "epoch": 0.6084673891095107, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.807346054950319e-05, |
| "loss": 4.2855, |
| "num_input_tokens_seen": 148111360, |
| "step": 2260, |
| "train_runtime": 20567.2353, |
| "train_tokens_per_second": 7201.326 |
| }, |
| { |
| "epoch": 0.6111597226896412, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.805243051896603e-05, |
| "loss": 4.3628, |
| "num_input_tokens_seen": 148766720, |
| "step": 2270, |
| "train_runtime": 20657.7792, |
| "train_tokens_per_second": 7201.487 |
| }, |
| { |
| "epoch": 0.6138520562697718, |
| "grad_norm": 0.8828125, |
| "learning_rate": 4.8031290980072714e-05, |
| "loss": 4.4122, |
| "num_input_tokens_seen": 149422080, |
| "step": 2280, |
| "train_runtime": 20749.1493, |
| "train_tokens_per_second": 7201.359 |
| }, |
| { |
| "epoch": 0.6165443898499025, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.80100420332446e-05, |
| "loss": 4.4566, |
| "num_input_tokens_seen": 150077440, |
| "step": 2290, |
| "train_runtime": 20840.466, |
| "train_tokens_per_second": 7201.252 |
| }, |
| { |
| "epoch": 0.619236723430033, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.798868377942276e-05, |
| "loss": 4.4265, |
| "num_input_tokens_seen": 150732800, |
| "step": 2300, |
| "train_runtime": 20931.623, |
| "train_tokens_per_second": 7201.2 |
| }, |
| { |
| "epoch": 0.6219290570101635, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.796721632006754e-05, |
| "loss": 4.3916, |
| "num_input_tokens_seen": 151388160, |
| "step": 2310, |
| "train_runtime": 21022.8072, |
| "train_tokens_per_second": 7201.139 |
| }, |
| { |
| "epoch": 0.6246213905902941, |
| "grad_norm": 0.85546875, |
| "learning_rate": 4.794563975715803e-05, |
| "loss": 4.2368, |
| "num_input_tokens_seen": 152043520, |
| "step": 2320, |
| "train_runtime": 21114.2106, |
| "train_tokens_per_second": 7201.004 |
| }, |
| { |
| "epoch": 0.6273137241704247, |
| "grad_norm": 0.8359375, |
| "learning_rate": 4.792395419319163e-05, |
| "loss": 4.3248, |
| "num_input_tokens_seen": 152698880, |
| "step": 2330, |
| "train_runtime": 21205.6312, |
| "train_tokens_per_second": 7200.865 |
| }, |
| { |
| "epoch": 0.6300060577505553, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.7902159731183524e-05, |
| "loss": 4.2703, |
| "num_input_tokens_seen": 153354240, |
| "step": 2340, |
| "train_runtime": 21297.559, |
| "train_tokens_per_second": 7200.555 |
| }, |
| { |
| "epoch": 0.6326983913306858, |
| "grad_norm": 0.8359375, |
| "learning_rate": 4.7880256474666194e-05, |
| "loss": 4.3804, |
| "num_input_tokens_seen": 154009600, |
| "step": 2350, |
| "train_runtime": 21389.4114, |
| "train_tokens_per_second": 7200.273 |
| }, |
| { |
| "epoch": 0.6353907249108165, |
| "grad_norm": 1.0, |
| "learning_rate": 4.785824452768898e-05, |
| "loss": 4.3842, |
| "num_input_tokens_seen": 154664960, |
| "step": 2360, |
| "train_runtime": 21480.3832, |
| "train_tokens_per_second": 7200.289 |
| }, |
| { |
| "epoch": 0.638083058490947, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.783612399481751e-05, |
| "loss": 4.2943, |
| "num_input_tokens_seen": 155320320, |
| "step": 2370, |
| "train_runtime": 21571.6877, |
| "train_tokens_per_second": 7200.193 |
| }, |
| { |
| "epoch": 0.6407753920710776, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.781389498113324e-05, |
| "loss": 4.326, |
| "num_input_tokens_seen": 155975680, |
| "step": 2380, |
| "train_runtime": 21663.2694, |
| "train_tokens_per_second": 7200.006 |
| }, |
| { |
| "epoch": 0.6434677256512081, |
| "grad_norm": 0.828125, |
| "learning_rate": 4.779155759223298e-05, |
| "loss": 4.3084, |
| "num_input_tokens_seen": 156631040, |
| "step": 2390, |
| "train_runtime": 21754.438, |
| "train_tokens_per_second": 7199.958 |
| }, |
| { |
| "epoch": 0.6461600592313388, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.776911193422835e-05, |
| "loss": 4.3083, |
| "num_input_tokens_seen": 157286400, |
| "step": 2400, |
| "train_runtime": 21845.5014, |
| "train_tokens_per_second": 7199.945 |
| }, |
| { |
| "epoch": 0.6488523928114693, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.7746558113745276e-05, |
| "loss": 4.3757, |
| "num_input_tokens_seen": 157941760, |
| "step": 2410, |
| "train_runtime": 21937.6885, |
| "train_tokens_per_second": 7199.563 |
| }, |
| { |
| "epoch": 0.6515447263915999, |
| "grad_norm": 1.0, |
| "learning_rate": 4.7723896237923526e-05, |
| "loss": 4.3263, |
| "num_input_tokens_seen": 158597120, |
| "step": 2420, |
| "train_runtime": 22028.8679, |
| "train_tokens_per_second": 7199.513 |
| }, |
| { |
| "epoch": 0.6542370599717305, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.770112641441616e-05, |
| "loss": 4.3565, |
| "num_input_tokens_seen": 159252480, |
| "step": 2430, |
| "train_runtime": 22120.3426, |
| "train_tokens_per_second": 7199.368 |
| }, |
| { |
| "epoch": 0.6569293935518611, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.767824875138904e-05, |
| "loss": 4.4035, |
| "num_input_tokens_seen": 159907840, |
| "step": 2440, |
| "train_runtime": 22211.4295, |
| "train_tokens_per_second": 7199.349 |
| }, |
| { |
| "epoch": 0.6596217271319916, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.7655263357520304e-05, |
| "loss": 4.3352, |
| "num_input_tokens_seen": 160563200, |
| "step": 2450, |
| "train_runtime": 22302.9882, |
| "train_tokens_per_second": 7199.179 |
| }, |
| { |
| "epoch": 0.6623140607121222, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.763217034199986e-05, |
| "loss": 4.3358, |
| "num_input_tokens_seen": 161218560, |
| "step": 2460, |
| "train_runtime": 22394.5547, |
| "train_tokens_per_second": 7199.007 |
| }, |
| { |
| "epoch": 0.6650063942922528, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.760896981452885e-05, |
| "loss": 4.3606, |
| "num_input_tokens_seen": 161873920, |
| "step": 2470, |
| "train_runtime": 22486.3138, |
| "train_tokens_per_second": 7198.775 |
| }, |
| { |
| "epoch": 0.6676987278723834, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.758566188531916e-05, |
| "loss": 4.2709, |
| "num_input_tokens_seen": 162529280, |
| "step": 2480, |
| "train_runtime": 22577.8549, |
| "train_tokens_per_second": 7198.615 |
| }, |
| { |
| "epoch": 0.6703910614525139, |
| "grad_norm": 0.8828125, |
| "learning_rate": 4.756224666509286e-05, |
| "loss": 4.2548, |
| "num_input_tokens_seen": 163184640, |
| "step": 2490, |
| "train_runtime": 22669.1281, |
| "train_tokens_per_second": 7198.541 |
| }, |
| { |
| "epoch": 0.6730833950326446, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.753872426508171e-05, |
| "loss": 4.2854, |
| "num_input_tokens_seen": 163840000, |
| "step": 2500, |
| "train_runtime": 22760.6762, |
| "train_tokens_per_second": 7198.38 |
| }, |
| { |
| "epoch": 0.6757757286127751, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.751509479702662e-05, |
| "loss": 4.3176, |
| "num_input_tokens_seen": 164495360, |
| "step": 2510, |
| "train_runtime": 22869.0727, |
| "train_tokens_per_second": 7192.918 |
| }, |
| { |
| "epoch": 0.6784680621929057, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.749135837317709e-05, |
| "loss": 4.3155, |
| "num_input_tokens_seen": 165150720, |
| "step": 2520, |
| "train_runtime": 22959.138, |
| "train_tokens_per_second": 7193.246 |
| }, |
| { |
| "epoch": 0.6811603957730363, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.746751510629073e-05, |
| "loss": 4.322, |
| "num_input_tokens_seen": 165806080, |
| "step": 2530, |
| "train_runtime": 23050.3772, |
| "train_tokens_per_second": 7193.205 |
| }, |
| { |
| "epoch": 0.6838527293531669, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.744356510963268e-05, |
| "loss": 4.2189, |
| "num_input_tokens_seen": 166461440, |
| "step": 2540, |
| "train_runtime": 23141.7466, |
| "train_tokens_per_second": 7193.123 |
| }, |
| { |
| "epoch": 0.6865450629332974, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.741950849697512e-05, |
| "loss": 4.31, |
| "num_input_tokens_seen": 167116800, |
| "step": 2550, |
| "train_runtime": 23232.5085, |
| "train_tokens_per_second": 7193.231 |
| }, |
| { |
| "epoch": 0.689237396513428, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.7395345382596644e-05, |
| "loss": 4.457, |
| "num_input_tokens_seen": 167772160, |
| "step": 2560, |
| "train_runtime": 23323.2423, |
| "train_tokens_per_second": 7193.346 |
| }, |
| { |
| "epoch": 0.6919297300935586, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.7371075881281826e-05, |
| "loss": 4.2846, |
| "num_input_tokens_seen": 168427520, |
| "step": 2570, |
| "train_runtime": 23414.198, |
| "train_tokens_per_second": 7193.393 |
| }, |
| { |
| "epoch": 0.6946220636736892, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.7346700108320605e-05, |
| "loss": 4.3602, |
| "num_input_tokens_seen": 169082880, |
| "step": 2580, |
| "train_runtime": 23505.6086, |
| "train_tokens_per_second": 7193.299 |
| }, |
| { |
| "epoch": 0.6973143972538197, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.732221817950773e-05, |
| "loss": 4.2612, |
| "num_input_tokens_seen": 169738240, |
| "step": 2590, |
| "train_runtime": 23596.4702, |
| "train_tokens_per_second": 7193.374 |
| }, |
| { |
| "epoch": 0.7000067308339504, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.729763021114227e-05, |
| "loss": 4.3303, |
| "num_input_tokens_seen": 170393600, |
| "step": 2600, |
| "train_runtime": 23687.1091, |
| "train_tokens_per_second": 7193.516 |
| }, |
| { |
| "epoch": 0.7026990644140809, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.727293632002699e-05, |
| "loss": 4.3715, |
| "num_input_tokens_seen": 171048960, |
| "step": 2610, |
| "train_runtime": 23778.7081, |
| "train_tokens_per_second": 7193.366 |
| }, |
| { |
| "epoch": 0.7053913979942115, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.7248136623467855e-05, |
| "loss": 4.3184, |
| "num_input_tokens_seen": 171704320, |
| "step": 2620, |
| "train_runtime": 23869.4479, |
| "train_tokens_per_second": 7193.477 |
| }, |
| { |
| "epoch": 0.708083731574342, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.722323123927344e-05, |
| "loss": 4.2171, |
| "num_input_tokens_seen": 172359680, |
| "step": 2630, |
| "train_runtime": 23960.14, |
| "train_tokens_per_second": 7193.601 |
| }, |
| { |
| "epoch": 0.7107760651544727, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.719822028575438e-05, |
| "loss": 4.3047, |
| "num_input_tokens_seen": 173015040, |
| "step": 2640, |
| "train_runtime": 24050.8669, |
| "train_tokens_per_second": 7193.713 |
| }, |
| { |
| "epoch": 0.7134683987346032, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.717310388172281e-05, |
| "loss": 4.3117, |
| "num_input_tokens_seen": 173670400, |
| "step": 2650, |
| "train_runtime": 24141.4796, |
| "train_tokens_per_second": 7193.859 |
| }, |
| { |
| "epoch": 0.7161607323147338, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.714788214649179e-05, |
| "loss": 4.2947, |
| "num_input_tokens_seen": 174325760, |
| "step": 2660, |
| "train_runtime": 24232.2176, |
| "train_tokens_per_second": 7193.966 |
| }, |
| { |
| "epoch": 0.7188530658948644, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.712255519987474e-05, |
| "loss": 4.2709, |
| "num_input_tokens_seen": 174981120, |
| "step": 2670, |
| "train_runtime": 24322.9895, |
| "train_tokens_per_second": 7194.063 |
| }, |
| { |
| "epoch": 0.721545399474995, |
| "grad_norm": 0.88671875, |
| "learning_rate": 4.70971231621849e-05, |
| "loss": 4.3155, |
| "num_input_tokens_seen": 175636480, |
| "step": 2680, |
| "train_runtime": 24413.3721, |
| "train_tokens_per_second": 7194.274 |
| }, |
| { |
| "epoch": 0.7242377330551255, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.707158615423471e-05, |
| "loss": 4.2957, |
| "num_input_tokens_seen": 176291840, |
| "step": 2690, |
| "train_runtime": 24504.1257, |
| "train_tokens_per_second": 7194.374 |
| }, |
| { |
| "epoch": 0.7269300666352561, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.70459442973353e-05, |
| "loss": 4.2754, |
| "num_input_tokens_seen": 176947200, |
| "step": 2700, |
| "train_runtime": 24594.1858, |
| "train_tokens_per_second": 7194.676 |
| }, |
| { |
| "epoch": 0.7296224002153867, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.702019771329581e-05, |
| "loss": 4.2962, |
| "num_input_tokens_seen": 177602560, |
| "step": 2710, |
| "train_runtime": 24685.4443, |
| "train_tokens_per_second": 7194.627 |
| }, |
| { |
| "epoch": 0.7323147337955173, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.699434652442293e-05, |
| "loss": 4.2323, |
| "num_input_tokens_seen": 178257920, |
| "step": 2720, |
| "train_runtime": 24775.7353, |
| "train_tokens_per_second": 7194.859 |
| }, |
| { |
| "epoch": 0.7350070673756478, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.696839085352026e-05, |
| "loss": 4.2707, |
| "num_input_tokens_seen": 178913280, |
| "step": 2730, |
| "train_runtime": 24866.4687, |
| "train_tokens_per_second": 7194.961 |
| }, |
| { |
| "epoch": 0.7376994009557785, |
| "grad_norm": 1.0, |
| "learning_rate": 4.6942330823887706e-05, |
| "loss": 4.2595, |
| "num_input_tokens_seen": 179568640, |
| "step": 2740, |
| "train_runtime": 24957.2903, |
| "train_tokens_per_second": 7195.038 |
| }, |
| { |
| "epoch": 0.740391734535909, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.691616655932094e-05, |
| "loss": 4.3064, |
| "num_input_tokens_seen": 180224000, |
| "step": 2750, |
| "train_runtime": 25047.8997, |
| "train_tokens_per_second": 7195.174 |
| }, |
| { |
| "epoch": 0.7430840681160396, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.6889898184110784e-05, |
| "loss": 4.2911, |
| "num_input_tokens_seen": 180879360, |
| "step": 2760, |
| "train_runtime": 25138.519, |
| "train_tokens_per_second": 7195.307 |
| }, |
| { |
| "epoch": 0.7457764016961702, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.686352582304263e-05, |
| "loss": 4.2602, |
| "num_input_tokens_seen": 181534720, |
| "step": 2770, |
| "train_runtime": 25229.243, |
| "train_tokens_per_second": 7195.409 |
| }, |
| { |
| "epoch": 0.7484687352763008, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.6837049601395845e-05, |
| "loss": 4.2676, |
| "num_input_tokens_seen": 182190080, |
| "step": 2780, |
| "train_runtime": 25319.4176, |
| "train_tokens_per_second": 7195.666 |
| }, |
| { |
| "epoch": 0.7511610688564313, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.6810469644943175e-05, |
| "loss": 4.2867, |
| "num_input_tokens_seen": 182845440, |
| "step": 2790, |
| "train_runtime": 25410.0216, |
| "train_tokens_per_second": 7195.8 |
| }, |
| { |
| "epoch": 0.7538534024365618, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.6783786079950165e-05, |
| "loss": 4.3149, |
| "num_input_tokens_seen": 183500800, |
| "step": 2800, |
| "train_runtime": 25500.6347, |
| "train_tokens_per_second": 7195.931 |
| }, |
| { |
| "epoch": 0.7565457360166925, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.67569990331745e-05, |
| "loss": 4.2937, |
| "num_input_tokens_seen": 184156160, |
| "step": 2810, |
| "train_runtime": 25591.425, |
| "train_tokens_per_second": 7196.01 |
| }, |
| { |
| "epoch": 0.759238069596823, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.67301086318655e-05, |
| "loss": 4.2444, |
| "num_input_tokens_seen": 184811520, |
| "step": 2820, |
| "train_runtime": 25681.4577, |
| "train_tokens_per_second": 7196.302 |
| }, |
| { |
| "epoch": 0.7619304031769536, |
| "grad_norm": 0.8828125, |
| "learning_rate": 4.6703115003763406e-05, |
| "loss": 4.263, |
| "num_input_tokens_seen": 185466880, |
| "step": 2830, |
| "train_runtime": 25772.2848, |
| "train_tokens_per_second": 7196.369 |
| }, |
| { |
| "epoch": 0.7646227367570843, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.6676018277098874e-05, |
| "loss": 4.2548, |
| "num_input_tokens_seen": 186122240, |
| "step": 2840, |
| "train_runtime": 25863.216, |
| "train_tokens_per_second": 7196.407 |
| }, |
| { |
| "epoch": 0.7673150703372148, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.664881858059229e-05, |
| "loss": 4.2596, |
| "num_input_tokens_seen": 186777600, |
| "step": 2850, |
| "train_runtime": 25953.599, |
| "train_tokens_per_second": 7196.597 |
| }, |
| { |
| "epoch": 0.7700074039173453, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.662151604345321e-05, |
| "loss": 4.2311, |
| "num_input_tokens_seen": 187432960, |
| "step": 2860, |
| "train_runtime": 26043.8592, |
| "train_tokens_per_second": 7196.82 |
| }, |
| { |
| "epoch": 0.7726997374974759, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.6594110795379695e-05, |
| "loss": 4.3322, |
| "num_input_tokens_seen": 188088320, |
| "step": 2870, |
| "train_runtime": 26134.4702, |
| "train_tokens_per_second": 7196.944 |
| }, |
| { |
| "epoch": 0.7753920710776065, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.656660296655775e-05, |
| "loss": 4.3234, |
| "num_input_tokens_seen": 188743680, |
| "step": 2880, |
| "train_runtime": 26225.4231, |
| "train_tokens_per_second": 7196.974 |
| }, |
| { |
| "epoch": 0.7780844046577371, |
| "grad_norm": 0.9140625, |
| "learning_rate": 4.653899268766069e-05, |
| "loss": 4.1814, |
| "num_input_tokens_seen": 189399040, |
| "step": 2890, |
| "train_runtime": 26316.4752, |
| "train_tokens_per_second": 7196.976 |
| }, |
| { |
| "epoch": 0.7807767382378676, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.6511280089848466e-05, |
| "loss": 4.3011, |
| "num_input_tokens_seen": 190054400, |
| "step": 2900, |
| "train_runtime": 26407.6103, |
| "train_tokens_per_second": 7196.956 |
| }, |
| { |
| "epoch": 0.7834690718179983, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.6483465304767124e-05, |
| "loss": 4.3018, |
| "num_input_tokens_seen": 190709760, |
| "step": 2910, |
| "train_runtime": 26498.1593, |
| "train_tokens_per_second": 7197.095 |
| }, |
| { |
| "epoch": 0.7861614053981288, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.6455548464548126e-05, |
| "loss": 4.2726, |
| "num_input_tokens_seen": 191365120, |
| "step": 2920, |
| "train_runtime": 26589.1325, |
| "train_tokens_per_second": 7197.118 |
| }, |
| { |
| "epoch": 0.7888537389782594, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.642752970180774e-05, |
| "loss": 4.3334, |
| "num_input_tokens_seen": 192020480, |
| "step": 2930, |
| "train_runtime": 26680.0364, |
| "train_tokens_per_second": 7197.16 |
| }, |
| { |
| "epoch": 0.7915460725583899, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.639940914964641e-05, |
| "loss": 4.2296, |
| "num_input_tokens_seen": 192675840, |
| "step": 2940, |
| "train_runtime": 26770.9407, |
| "train_tokens_per_second": 7197.201 |
| }, |
| { |
| "epoch": 0.7942384061385206, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.6371186941648116e-05, |
| "loss": 4.2387, |
| "num_input_tokens_seen": 193331200, |
| "step": 2950, |
| "train_runtime": 26861.686, |
| "train_tokens_per_second": 7197.285 |
| }, |
| { |
| "epoch": 0.7969307397186511, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.634286321187973e-05, |
| "loss": 4.1788, |
| "num_input_tokens_seen": 193986560, |
| "step": 2960, |
| "train_runtime": 26952.8251, |
| "train_tokens_per_second": 7197.263 |
| }, |
| { |
| "epoch": 0.7996230732987817, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.631443809489043e-05, |
| "loss": 4.2251, |
| "num_input_tokens_seen": 194641920, |
| "step": 2970, |
| "train_runtime": 27043.1608, |
| "train_tokens_per_second": 7197.455 |
| }, |
| { |
| "epoch": 0.8023154068789123, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.628591172571098e-05, |
| "loss": 4.1999, |
| "num_input_tokens_seen": 195297280, |
| "step": 2980, |
| "train_runtime": 27134.199, |
| "train_tokens_per_second": 7197.459 |
| }, |
| { |
| "epoch": 0.8050077404590429, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.6257284239853186e-05, |
| "loss": 4.3704, |
| "num_input_tokens_seen": 195952640, |
| "step": 2990, |
| "train_runtime": 27224.5966, |
| "train_tokens_per_second": 7197.632 |
| }, |
| { |
| "epoch": 0.8077000740391734, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.6228555773309155e-05, |
| "loss": 4.2884, |
| "num_input_tokens_seen": 196608000, |
| "step": 3000, |
| "train_runtime": 27315.5079, |
| "train_tokens_per_second": 7197.67 |
| }, |
| { |
| "epoch": 0.810392407619304, |
| "grad_norm": 0.8828125, |
| "learning_rate": 4.619972646255069e-05, |
| "loss": 4.2231, |
| "num_input_tokens_seen": 197263360, |
| "step": 3010, |
| "train_runtime": 27424.1093, |
| "train_tokens_per_second": 7193.064 |
| }, |
| { |
| "epoch": 0.8130847411994346, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.617079644452869e-05, |
| "loss": 4.2115, |
| "num_input_tokens_seen": 197918720, |
| "step": 3020, |
| "train_runtime": 27514.8792, |
| "train_tokens_per_second": 7193.152 |
| }, |
| { |
| "epoch": 0.8157770747795652, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.614176585667239e-05, |
| "loss": 4.2903, |
| "num_input_tokens_seen": 198574080, |
| "step": 3030, |
| "train_runtime": 27605.6503, |
| "train_tokens_per_second": 7193.24 |
| }, |
| { |
| "epoch": 0.8184694083596957, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.611263483688885e-05, |
| "loss": 4.2218, |
| "num_input_tokens_seen": 199229440, |
| "step": 3040, |
| "train_runtime": 27697.4045, |
| "train_tokens_per_second": 7193.073 |
| }, |
| { |
| "epoch": 0.8211617419398264, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.608340352356215e-05, |
| "loss": 4.2355, |
| "num_input_tokens_seen": 199884800, |
| "step": 3050, |
| "train_runtime": 27787.9614, |
| "train_tokens_per_second": 7193.216 |
| }, |
| { |
| "epoch": 0.8238540755199569, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.605407205555285e-05, |
| "loss": 4.1975, |
| "num_input_tokens_seen": 200540160, |
| "step": 3060, |
| "train_runtime": 27878.9629, |
| "train_tokens_per_second": 7193.243 |
| }, |
| { |
| "epoch": 0.8265464091000875, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.602464057219727e-05, |
| "loss": 4.2887, |
| "num_input_tokens_seen": 201195520, |
| "step": 3070, |
| "train_runtime": 27970.4176, |
| "train_tokens_per_second": 7193.154 |
| }, |
| { |
| "epoch": 0.8292387426802181, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.599510921330683e-05, |
| "loss": 4.205, |
| "num_input_tokens_seen": 201850880, |
| "step": 3080, |
| "train_runtime": 28061.1136, |
| "train_tokens_per_second": 7193.26 |
| }, |
| { |
| "epoch": 0.8319310762603487, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.5965478119167424e-05, |
| "loss": 4.246, |
| "num_input_tokens_seen": 202506240, |
| "step": 3090, |
| "train_runtime": 28151.5595, |
| "train_tokens_per_second": 7193.429 |
| }, |
| { |
| "epoch": 0.8346234098404792, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.5935747430538726e-05, |
| "loss": 4.2395, |
| "num_input_tokens_seen": 203161600, |
| "step": 3100, |
| "train_runtime": 28242.6937, |
| "train_tokens_per_second": 7193.422 |
| }, |
| { |
| "epoch": 0.8373157434206098, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.59059172886535e-05, |
| "loss": 4.1347, |
| "num_input_tokens_seen": 203816960, |
| "step": 3110, |
| "train_runtime": 28334.0092, |
| "train_tokens_per_second": 7193.368 |
| }, |
| { |
| "epoch": 0.8400080770007404, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.587598783521697e-05, |
| "loss": 4.2245, |
| "num_input_tokens_seen": 204472320, |
| "step": 3120, |
| "train_runtime": 28425.3655, |
| "train_tokens_per_second": 7193.305 |
| }, |
| { |
| "epoch": 0.842700410580871, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.584595921240614e-05, |
| "loss": 4.1877, |
| "num_input_tokens_seen": 205127680, |
| "step": 3130, |
| "train_runtime": 28516.1829, |
| "train_tokens_per_second": 7193.378 |
| }, |
| { |
| "epoch": 0.8453927441610015, |
| "grad_norm": 0.875, |
| "learning_rate": 4.581583156286908e-05, |
| "loss": 4.2275, |
| "num_input_tokens_seen": 205783040, |
| "step": 3140, |
| "train_runtime": 28607.2046, |
| "train_tokens_per_second": 7193.399 |
| }, |
| { |
| "epoch": 0.8480850777411322, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.5785605029724315e-05, |
| "loss": 4.1242, |
| "num_input_tokens_seen": 206438400, |
| "step": 3150, |
| "train_runtime": 28698.0895, |
| "train_tokens_per_second": 7193.454 |
| }, |
| { |
| "epoch": 0.8507774113212627, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.575527975656007e-05, |
| "loss": 4.2295, |
| "num_input_tokens_seen": 207093760, |
| "step": 3160, |
| "train_runtime": 28789.2485, |
| "train_tokens_per_second": 7193.441 |
| }, |
| { |
| "epoch": 0.8534697449013933, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.572485588743365e-05, |
| "loss": 4.1666, |
| "num_input_tokens_seen": 207749120, |
| "step": 3170, |
| "train_runtime": 28880.2276, |
| "train_tokens_per_second": 7193.472 |
| }, |
| { |
| "epoch": 0.8561620784815238, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.569433356687072e-05, |
| "loss": 4.224, |
| "num_input_tokens_seen": 208404480, |
| "step": 3180, |
| "train_runtime": 28970.6224, |
| "train_tokens_per_second": 7193.649 |
| }, |
| { |
| "epoch": 0.8588544120616545, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.566371293986463e-05, |
| "loss": 4.1672, |
| "num_input_tokens_seen": 209059840, |
| "step": 3190, |
| "train_runtime": 29061.8644, |
| "train_tokens_per_second": 7193.614 |
| }, |
| { |
| "epoch": 0.861546745641785, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.563299415187572e-05, |
| "loss": 4.1537, |
| "num_input_tokens_seen": 209715200, |
| "step": 3200, |
| "train_runtime": 29152.5619, |
| "train_tokens_per_second": 7193.714 |
| }, |
| { |
| "epoch": 0.8642390792219156, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.560217734883066e-05, |
| "loss": 4.2219, |
| "num_input_tokens_seen": 210370560, |
| "step": 3210, |
| "train_runtime": 29243.5634, |
| "train_tokens_per_second": 7193.739 |
| }, |
| { |
| "epoch": 0.8669314128020462, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.557126267712169e-05, |
| "loss": 4.1849, |
| "num_input_tokens_seen": 211025920, |
| "step": 3220, |
| "train_runtime": 29333.9838, |
| "train_tokens_per_second": 7193.906 |
| }, |
| { |
| "epoch": 0.8696237463821768, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.5540250283606e-05, |
| "loss": 4.2267, |
| "num_input_tokens_seen": 211681280, |
| "step": 3230, |
| "train_runtime": 29425.3427, |
| "train_tokens_per_second": 7193.842 |
| }, |
| { |
| "epoch": 0.8723160799623073, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.550914031560498e-05, |
| "loss": 4.2194, |
| "num_input_tokens_seen": 212336640, |
| "step": 3240, |
| "train_runtime": 29516.178, |
| "train_tokens_per_second": 7193.907 |
| }, |
| { |
| "epoch": 0.8750084135424379, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.5477932920903546e-05, |
| "loss": 4.2358, |
| "num_input_tokens_seen": 212992000, |
| "step": 3250, |
| "train_runtime": 29606.7227, |
| "train_tokens_per_second": 7194.042 |
| }, |
| { |
| "epoch": 0.8777007471225685, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.544662824774943e-05, |
| "loss": 4.2741, |
| "num_input_tokens_seen": 213647360, |
| "step": 3260, |
| "train_runtime": 29697.6281, |
| "train_tokens_per_second": 7194.088 |
| }, |
| { |
| "epoch": 0.8803930807026991, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.5415226444852464e-05, |
| "loss": 4.2825, |
| "num_input_tokens_seen": 214302720, |
| "step": 3270, |
| "train_runtime": 29788.4901, |
| "train_tokens_per_second": 7194.145 |
| }, |
| { |
| "epoch": 0.8830854142828296, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.538372766138391e-05, |
| "loss": 4.1546, |
| "num_input_tokens_seen": 214958080, |
| "step": 3280, |
| "train_runtime": 29879.3258, |
| "train_tokens_per_second": 7194.208 |
| }, |
| { |
| "epoch": 0.8857777478629603, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.535213204697571e-05, |
| "loss": 4.1899, |
| "num_input_tokens_seen": 215613440, |
| "step": 3290, |
| "train_runtime": 29970.0037, |
| "train_tokens_per_second": 7194.308 |
| }, |
| { |
| "epoch": 0.8884700814430908, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.5320439751719786e-05, |
| "loss": 4.2076, |
| "num_input_tokens_seen": 216268800, |
| "step": 3300, |
| "train_runtime": 30060.9314, |
| "train_tokens_per_second": 7194.348 |
| }, |
| { |
| "epoch": 0.8911624150232214, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.528865092616734e-05, |
| "loss": 4.1606, |
| "num_input_tokens_seen": 216924160, |
| "step": 3310, |
| "train_runtime": 30151.2458, |
| "train_tokens_per_second": 7194.534 |
| }, |
| { |
| "epoch": 0.8938547486033519, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.525676572132814e-05, |
| "loss": 4.1753, |
| "num_input_tokens_seen": 217579520, |
| "step": 3320, |
| "train_runtime": 30241.7705, |
| "train_tokens_per_second": 7194.669 |
| }, |
| { |
| "epoch": 0.8965470821834826, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.522478428866979e-05, |
| "loss": 4.2919, |
| "num_input_tokens_seen": 218234880, |
| "step": 3330, |
| "train_runtime": 30332.5616, |
| "train_tokens_per_second": 7194.74 |
| }, |
| { |
| "epoch": 0.8992394157636131, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.519270678011701e-05, |
| "loss": 4.1882, |
| "num_input_tokens_seen": 218890240, |
| "step": 3340, |
| "train_runtime": 30423.4601, |
| "train_tokens_per_second": 7194.785 |
| }, |
| { |
| "epoch": 0.9019317493437436, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.516053334805091e-05, |
| "loss": 4.2376, |
| "num_input_tokens_seen": 219545600, |
| "step": 3350, |
| "train_runtime": 30513.7311, |
| "train_tokens_per_second": 7194.977 |
| }, |
| { |
| "epoch": 0.9046240829238743, |
| "grad_norm": 0.88671875, |
| "learning_rate": 4.512826414530831e-05, |
| "loss": 4.1968, |
| "num_input_tokens_seen": 220200960, |
| "step": 3360, |
| "train_runtime": 30604.2642, |
| "train_tokens_per_second": 7195.107 |
| }, |
| { |
| "epoch": 0.9073164165040049, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.509589932518094e-05, |
| "loss": 4.1383, |
| "num_input_tokens_seen": 220856320, |
| "step": 3370, |
| "train_runtime": 30694.8063, |
| "train_tokens_per_second": 7195.234 |
| }, |
| { |
| "epoch": 0.9100087500841354, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.506343904141478e-05, |
| "loss": 4.2102, |
| "num_input_tokens_seen": 221511680, |
| "step": 3380, |
| "train_runtime": 30785.3617, |
| "train_tokens_per_second": 7195.357 |
| }, |
| { |
| "epoch": 0.912701083664266, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.5030883448209276e-05, |
| "loss": 4.2352, |
| "num_input_tokens_seen": 222167040, |
| "step": 3390, |
| "train_runtime": 30875.9189, |
| "train_tokens_per_second": 7195.479 |
| }, |
| { |
| "epoch": 0.9153934172443966, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.499823270021666e-05, |
| "loss": 4.1812, |
| "num_input_tokens_seen": 222822400, |
| "step": 3400, |
| "train_runtime": 30966.6114, |
| "train_tokens_per_second": 7195.569 |
| }, |
| { |
| "epoch": 0.9180857508245271, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.496548695254116e-05, |
| "loss": 4.1502, |
| "num_input_tokens_seen": 223477760, |
| "step": 3410, |
| "train_runtime": 31056.7817, |
| "train_tokens_per_second": 7195.78 |
| }, |
| { |
| "epoch": 0.9207780844046577, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.4932646360738305e-05, |
| "loss": 4.2178, |
| "num_input_tokens_seen": 224133120, |
| "step": 3420, |
| "train_runtime": 31147.1211, |
| "train_tokens_per_second": 7195.95 |
| }, |
| { |
| "epoch": 0.9234704179847883, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.489971108081418e-05, |
| "loss": 4.1711, |
| "num_input_tokens_seen": 224788480, |
| "step": 3430, |
| "train_runtime": 31237.7219, |
| "train_tokens_per_second": 7196.059 |
| }, |
| { |
| "epoch": 0.9261627515649189, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.486668126922466e-05, |
| "loss": 4.1814, |
| "num_input_tokens_seen": 225443840, |
| "step": 3440, |
| "train_runtime": 31328.2106, |
| "train_tokens_per_second": 7196.193 |
| }, |
| { |
| "epoch": 0.9288550851450494, |
| "grad_norm": 1.0, |
| "learning_rate": 4.48335570828747e-05, |
| "loss": 4.175, |
| "num_input_tokens_seen": 226099200, |
| "step": 3450, |
| "train_runtime": 31419.1368, |
| "train_tokens_per_second": 7196.226 |
| }, |
| { |
| "epoch": 0.9315474187251801, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.480033867911755e-05, |
| "loss": 4.2191, |
| "num_input_tokens_seen": 226754560, |
| "step": 3460, |
| "train_runtime": 31509.2505, |
| "train_tokens_per_second": 7196.444 |
| }, |
| { |
| "epoch": 0.9342397523053106, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.476702621575406e-05, |
| "loss": 4.2263, |
| "num_input_tokens_seen": 227409920, |
| "step": 3470, |
| "train_runtime": 31600.9475, |
| "train_tokens_per_second": 7196.301 |
| }, |
| { |
| "epoch": 0.9369320858854412, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.4733619851031885e-05, |
| "loss": 4.2899, |
| "num_input_tokens_seen": 228065280, |
| "step": 3480, |
| "train_runtime": 31691.2678, |
| "train_tokens_per_second": 7196.471 |
| }, |
| { |
| "epoch": 0.9396244194655717, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.470011974364474e-05, |
| "loss": 4.0834, |
| "num_input_tokens_seen": 228720640, |
| "step": 3490, |
| "train_runtime": 31782.787, |
| "train_tokens_per_second": 7196.368 |
| }, |
| { |
| "epoch": 0.9423167530457024, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.466652605273166e-05, |
| "loss": 4.2763, |
| "num_input_tokens_seen": 229376000, |
| "step": 3500, |
| "train_runtime": 31873.3236, |
| "train_tokens_per_second": 7196.488 |
| }, |
| { |
| "epoch": 0.9450090866258329, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.463283893787628e-05, |
| "loss": 4.1294, |
| "num_input_tokens_seen": 230031360, |
| "step": 3510, |
| "train_runtime": 31997.0965, |
| "train_tokens_per_second": 7189.132 |
| }, |
| { |
| "epoch": 0.9477014202059635, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.459905855910597e-05, |
| "loss": 4.1761, |
| "num_input_tokens_seen": 230686720, |
| "step": 3520, |
| "train_runtime": 32087.0281, |
| "train_tokens_per_second": 7189.407 |
| }, |
| { |
| "epoch": 0.9503937537860941, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.4565185076891175e-05, |
| "loss": 4.1342, |
| "num_input_tokens_seen": 231342080, |
| "step": 3530, |
| "train_runtime": 32177.1945, |
| "train_tokens_per_second": 7189.629 |
| }, |
| { |
| "epoch": 0.9530860873662247, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.453121865214463e-05, |
| "loss": 4.1464, |
| "num_input_tokens_seen": 231997440, |
| "step": 3540, |
| "train_runtime": 32267.9925, |
| "train_tokens_per_second": 7189.708 |
| }, |
| { |
| "epoch": 0.9557784209463552, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.449715944622057e-05, |
| "loss": 4.2182, |
| "num_input_tokens_seen": 232652800, |
| "step": 3550, |
| "train_runtime": 32358.5662, |
| "train_tokens_per_second": 7189.836 |
| }, |
| { |
| "epoch": 0.9584707545264858, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.4463007620913975e-05, |
| "loss": 4.1777, |
| "num_input_tokens_seen": 233308160, |
| "step": 3560, |
| "train_runtime": 32449.4776, |
| "train_tokens_per_second": 7189.89 |
| }, |
| { |
| "epoch": 0.9611630881066164, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.442876333845982e-05, |
| "loss": 4.094, |
| "num_input_tokens_seen": 233963520, |
| "step": 3570, |
| "train_runtime": 32540.2049, |
| "train_tokens_per_second": 7189.983 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.439442676153227e-05, |
| "loss": 4.1403, |
| "num_input_tokens_seen": 234618880, |
| "step": 3580, |
| "train_runtime": 32631.0037, |
| "train_tokens_per_second": 7190.06 |
| }, |
| { |
| "epoch": 0.9665477552668775, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.4359998053243925e-05, |
| "loss": 4.1258, |
| "num_input_tokens_seen": 235274240, |
| "step": 3590, |
| "train_runtime": 32721.4244, |
| "train_tokens_per_second": 7190.22 |
| }, |
| { |
| "epoch": 0.9692400888470082, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.432547737714508e-05, |
| "loss": 4.1197, |
| "num_input_tokens_seen": 235929600, |
| "step": 3600, |
| "train_runtime": 32812.1821, |
| "train_tokens_per_second": 7190.305 |
| }, |
| { |
| "epoch": 0.9719324224271387, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.429086489722287e-05, |
| "loss": 4.1698, |
| "num_input_tokens_seen": 236584960, |
| "step": 3610, |
| "train_runtime": 32903.0276, |
| "train_tokens_per_second": 7190.371 |
| }, |
| { |
| "epoch": 0.9746247560072693, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.425616077790056e-05, |
| "loss": 4.2282, |
| "num_input_tokens_seen": 237240320, |
| "step": 3620, |
| "train_runtime": 32993.439, |
| "train_tokens_per_second": 7190.53 |
| }, |
| { |
| "epoch": 0.9773170895873999, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.422136518403673e-05, |
| "loss": 4.3195, |
| "num_input_tokens_seen": 237895680, |
| "step": 3630, |
| "train_runtime": 33084.7741, |
| "train_tokens_per_second": 7190.488 |
| }, |
| { |
| "epoch": 0.9800094231675305, |
| "grad_norm": 1.0, |
| "learning_rate": 4.4186478280924516e-05, |
| "loss": 4.1702, |
| "num_input_tokens_seen": 238551040, |
| "step": 3640, |
| "train_runtime": 33175.3782, |
| "train_tokens_per_second": 7190.605 |
| }, |
| { |
| "epoch": 0.982701756747661, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.4151500234290796e-05, |
| "loss": 4.1488, |
| "num_input_tokens_seen": 239206400, |
| "step": 3650, |
| "train_runtime": 33266.5303, |
| "train_tokens_per_second": 7190.603 |
| }, |
| { |
| "epoch": 0.9853940903277916, |
| "grad_norm": 1.0, |
| "learning_rate": 4.411643121029541e-05, |
| "loss": 4.2429, |
| "num_input_tokens_seen": 239861760, |
| "step": 3660, |
| "train_runtime": 33357.1993, |
| "train_tokens_per_second": 7190.704 |
| }, |
| { |
| "epoch": 0.9880864239079222, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.40812713755304e-05, |
| "loss": 4.2334, |
| "num_input_tokens_seen": 240517120, |
| "step": 3670, |
| "train_runtime": 33447.6459, |
| "train_tokens_per_second": 7190.853 |
| }, |
| { |
| "epoch": 0.9907787574880528, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.4046020897019166e-05, |
| "loss": 4.1692, |
| "num_input_tokens_seen": 241172480, |
| "step": 3680, |
| "train_runtime": 33538.6244, |
| "train_tokens_per_second": 7190.888 |
| }, |
| { |
| "epoch": 0.9934710910681833, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.4010679942215745e-05, |
| "loss": 4.1434, |
| "num_input_tokens_seen": 241827840, |
| "step": 3690, |
| "train_runtime": 33629.0822, |
| "train_tokens_per_second": 7191.033 |
| }, |
| { |
| "epoch": 0.996163424648314, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.397524867900392e-05, |
| "loss": 4.2236, |
| "num_input_tokens_seen": 242483200, |
| "step": 3700, |
| "train_runtime": 33719.8306, |
| "train_tokens_per_second": 7191.116 |
| }, |
| { |
| "epoch": 0.9988557582284445, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.393972727569652e-05, |
| "loss": 4.1554, |
| "num_input_tokens_seen": 243138560, |
| "step": 3710, |
| "train_runtime": 33810.897, |
| "train_tokens_per_second": 7191.13 |
| }, |
| { |
| "epoch": 1.0013461667900654, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.390411590103455e-05, |
| "loss": 4.1643, |
| "num_input_tokens_seen": 243744768, |
| "step": 3720, |
| "train_runtime": 33895.1256, |
| "train_tokens_per_second": 7191.145 |
| }, |
| { |
| "epoch": 1.004038500370196, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.3868414724186424e-05, |
| "loss": 4.1963, |
| "num_input_tokens_seen": 244400128, |
| "step": 3730, |
| "train_runtime": 33986.4957, |
| "train_tokens_per_second": 7191.095 |
| }, |
| { |
| "epoch": 1.0067308339503265, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.3832623914747154e-05, |
| "loss": 4.1447, |
| "num_input_tokens_seen": 245055488, |
| "step": 3740, |
| "train_runtime": 34077.1663, |
| "train_tokens_per_second": 7191.193 |
| }, |
| { |
| "epoch": 1.009423167530457, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.379674364273755e-05, |
| "loss": 4.1409, |
| "num_input_tokens_seen": 245710848, |
| "step": 3750, |
| "train_runtime": 34167.718, |
| "train_tokens_per_second": 7191.316 |
| }, |
| { |
| "epoch": 1.0121155011105876, |
| "grad_norm": 0.9140625, |
| "learning_rate": 4.3760774078603375e-05, |
| "loss": 4.1164, |
| "num_input_tokens_seen": 246366208, |
| "step": 3760, |
| "train_runtime": 34258.1953, |
| "train_tokens_per_second": 7191.453 |
| }, |
| { |
| "epoch": 1.0148078346907181, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.372471539321461e-05, |
| "loss": 4.0986, |
| "num_input_tokens_seen": 247021568, |
| "step": 3770, |
| "train_runtime": 34349.3094, |
| "train_tokens_per_second": 7191.457 |
| }, |
| { |
| "epoch": 1.0175001682708487, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.368856775786456e-05, |
| "loss": 4.1294, |
| "num_input_tokens_seen": 247676928, |
| "step": 3780, |
| "train_runtime": 34439.4855, |
| "train_tokens_per_second": 7191.656 |
| }, |
| { |
| "epoch": 1.0201925018509794, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.36523313442691e-05, |
| "loss": 4.0792, |
| "num_input_tokens_seen": 248332288, |
| "step": 3790, |
| "train_runtime": 34530.928, |
| "train_tokens_per_second": 7191.59 |
| }, |
| { |
| "epoch": 1.02288483543111, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.361600632456583e-05, |
| "loss": 4.1988, |
| "num_input_tokens_seen": 248987648, |
| "step": 3800, |
| "train_runtime": 34621.3595, |
| "train_tokens_per_second": 7191.735 |
| }, |
| { |
| "epoch": 1.0255771690112405, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.3579592871313265e-05, |
| "loss": 4.1271, |
| "num_input_tokens_seen": 249643008, |
| "step": 3810, |
| "train_runtime": 34712.6294, |
| "train_tokens_per_second": 7191.706 |
| }, |
| { |
| "epoch": 1.028269502591371, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.354309115749001e-05, |
| "loss": 4.1366, |
| "num_input_tokens_seen": 250298368, |
| "step": 3820, |
| "train_runtime": 34803.1708, |
| "train_tokens_per_second": 7191.827 |
| }, |
| { |
| "epoch": 1.0309618361715016, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.3506501356493965e-05, |
| "loss": 4.083, |
| "num_input_tokens_seen": 250953728, |
| "step": 3830, |
| "train_runtime": 34894.1642, |
| "train_tokens_per_second": 7191.854 |
| }, |
| { |
| "epoch": 1.0336541697516322, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.346982364214145e-05, |
| "loss": 4.1208, |
| "num_input_tokens_seen": 251609088, |
| "step": 3840, |
| "train_runtime": 34985.3424, |
| "train_tokens_per_second": 7191.843 |
| }, |
| { |
| "epoch": 1.0363465033317627, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.343305818866643e-05, |
| "loss": 4.1373, |
| "num_input_tokens_seen": 252264448, |
| "step": 3850, |
| "train_runtime": 35076.5599, |
| "train_tokens_per_second": 7191.824 |
| }, |
| { |
| "epoch": 1.0390388369118935, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.339620517071965e-05, |
| "loss": 4.0883, |
| "num_input_tokens_seen": 252919808, |
| "step": 3860, |
| "train_runtime": 35167.2199, |
| "train_tokens_per_second": 7191.919 |
| }, |
| { |
| "epoch": 1.041731170492024, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.335926476336785e-05, |
| "loss": 4.1411, |
| "num_input_tokens_seen": 253575168, |
| "step": 3870, |
| "train_runtime": 35257.9069, |
| "train_tokens_per_second": 7192.009 |
| }, |
| { |
| "epoch": 1.0444235040721546, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.332223714209286e-05, |
| "loss": 4.0782, |
| "num_input_tokens_seen": 254230528, |
| "step": 3880, |
| "train_runtime": 35348.4394, |
| "train_tokens_per_second": 7192.129 |
| }, |
| { |
| "epoch": 1.047115837652285, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.328512248279085e-05, |
| "loss": 4.1105, |
| "num_input_tokens_seen": 254885888, |
| "step": 3890, |
| "train_runtime": 35439.1569, |
| "train_tokens_per_second": 7192.211 |
| }, |
| { |
| "epoch": 1.0498081712324157, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.3247920961771445e-05, |
| "loss": 4.1275, |
| "num_input_tokens_seen": 255541248, |
| "step": 3900, |
| "train_runtime": 35529.4127, |
| "train_tokens_per_second": 7192.386 |
| }, |
| { |
| "epoch": 1.0525005048125462, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.3210632755756884e-05, |
| "loss": 4.0792, |
| "num_input_tokens_seen": 256196608, |
| "step": 3910, |
| "train_runtime": 35620.2122, |
| "train_tokens_per_second": 7192.45 |
| }, |
| { |
| "epoch": 1.0551928383926767, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.3173258041881226e-05, |
| "loss": 4.1166, |
| "num_input_tokens_seen": 256851968, |
| "step": 3920, |
| "train_runtime": 35710.6281, |
| "train_tokens_per_second": 7192.592 |
| }, |
| { |
| "epoch": 1.0578851719728075, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.313579699768945e-05, |
| "loss": 4.0884, |
| "num_input_tokens_seen": 257507328, |
| "step": 3930, |
| "train_runtime": 35801.49, |
| "train_tokens_per_second": 7192.643 |
| }, |
| { |
| "epoch": 1.060577505552938, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.309824980113664e-05, |
| "loss": 4.0969, |
| "num_input_tokens_seen": 258162688, |
| "step": 3940, |
| "train_runtime": 35891.9587, |
| "train_tokens_per_second": 7192.772 |
| }, |
| { |
| "epoch": 1.0632698391330686, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.306061663058715e-05, |
| "loss": 4.1934, |
| "num_input_tokens_seen": 258818048, |
| "step": 3950, |
| "train_runtime": 35982.4641, |
| "train_tokens_per_second": 7192.894 |
| }, |
| { |
| "epoch": 1.0659621727131992, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.302289766481374e-05, |
| "loss": 4.0758, |
| "num_input_tokens_seen": 259473408, |
| "step": 3960, |
| "train_runtime": 36073.417, |
| "train_tokens_per_second": 7192.926 |
| }, |
| { |
| "epoch": 1.0686545062933297, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.2985093082996744e-05, |
| "loss": 4.1234, |
| "num_input_tokens_seen": 260128768, |
| "step": 3970, |
| "train_runtime": 36164.2317, |
| "train_tokens_per_second": 7192.985 |
| }, |
| { |
| "epoch": 1.0713468398734602, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.294720306472317e-05, |
| "loss": 4.1192, |
| "num_input_tokens_seen": 260784128, |
| "step": 3980, |
| "train_runtime": 36255.0669, |
| "train_tokens_per_second": 7193.039 |
| }, |
| { |
| "epoch": 1.074039173453591, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.2909227789985935e-05, |
| "loss": 4.0864, |
| "num_input_tokens_seen": 261439488, |
| "step": 3990, |
| "train_runtime": 36345.6406, |
| "train_tokens_per_second": 7193.146 |
| }, |
| { |
| "epoch": 1.0767315070337216, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.287116743918292e-05, |
| "loss": 4.1621, |
| "num_input_tokens_seen": 262094848, |
| "step": 4000, |
| "train_runtime": 36436.248, |
| "train_tokens_per_second": 7193.245 |
| }, |
| { |
| "epoch": 1.079423840613852, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.283302219311616e-05, |
| "loss": 4.1096, |
| "num_input_tokens_seen": 262750208, |
| "step": 4010, |
| "train_runtime": 36545.2724, |
| "train_tokens_per_second": 7189.718 |
| }, |
| { |
| "epoch": 1.0821161741939826, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.279479223299099e-05, |
| "loss": 4.1474, |
| "num_input_tokens_seen": 263405568, |
| "step": 4020, |
| "train_runtime": 36636.1801, |
| "train_tokens_per_second": 7189.766 |
| }, |
| { |
| "epoch": 1.0848085077741132, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.275647774041517e-05, |
| "loss": 4.1458, |
| "num_input_tokens_seen": 264060928, |
| "step": 4030, |
| "train_runtime": 36726.9109, |
| "train_tokens_per_second": 7189.849 |
| }, |
| { |
| "epoch": 1.0875008413542437, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.2718078897397994e-05, |
| "loss": 4.0886, |
| "num_input_tokens_seen": 264716288, |
| "step": 4040, |
| "train_runtime": 36816.916, |
| "train_tokens_per_second": 7190.072 |
| }, |
| { |
| "epoch": 1.0901931749343743, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.267959588634949e-05, |
| "loss": 4.1204, |
| "num_input_tokens_seen": 265371648, |
| "step": 4050, |
| "train_runtime": 36907.5704, |
| "train_tokens_per_second": 7190.168 |
| }, |
| { |
| "epoch": 1.092885508514505, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.26410288900795e-05, |
| "loss": 4.0423, |
| "num_input_tokens_seen": 266027008, |
| "step": 4060, |
| "train_runtime": 36997.9691, |
| "train_tokens_per_second": 7190.314 |
| }, |
| { |
| "epoch": 1.0955778420946356, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.2602378091796834e-05, |
| "loss": 4.0993, |
| "num_input_tokens_seen": 266682368, |
| "step": 4070, |
| "train_runtime": 37088.8087, |
| "train_tokens_per_second": 7190.373 |
| }, |
| { |
| "epoch": 1.0982701756747661, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.25636436751084e-05, |
| "loss": 4.0873, |
| "num_input_tokens_seen": 267337728, |
| "step": 4080, |
| "train_runtime": 37179.7577, |
| "train_tokens_per_second": 7190.411 |
| }, |
| { |
| "epoch": 1.1009625092548967, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.252482582401832e-05, |
| "loss": 4.0804, |
| "num_input_tokens_seen": 267993088, |
| "step": 4090, |
| "train_runtime": 37270.755, |
| "train_tokens_per_second": 7190.439 |
| }, |
| { |
| "epoch": 1.1036548428350272, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.248592472292707e-05, |
| "loss": 4.1809, |
| "num_input_tokens_seen": 268648448, |
| "step": 4100, |
| "train_runtime": 37361.4309, |
| "train_tokens_per_second": 7190.529 |
| }, |
| { |
| "epoch": 1.1063471764151578, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.244694055663058e-05, |
| "loss": 4.1186, |
| "num_input_tokens_seen": 269303808, |
| "step": 4110, |
| "train_runtime": 37452.1799, |
| "train_tokens_per_second": 7190.604 |
| }, |
| { |
| "epoch": 1.1090395099952883, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.24078735103194e-05, |
| "loss": 4.087, |
| "num_input_tokens_seen": 269959168, |
| "step": 4120, |
| "train_runtime": 37542.0572, |
| "train_tokens_per_second": 7190.846 |
| }, |
| { |
| "epoch": 1.111731843575419, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.236872376957777e-05, |
| "loss": 4.1264, |
| "num_input_tokens_seen": 270614528, |
| "step": 4130, |
| "train_runtime": 37633.1209, |
| "train_tokens_per_second": 7190.861 |
| }, |
| { |
| "epoch": 1.1144241771555496, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.232949152038277e-05, |
| "loss": 4.1552, |
| "num_input_tokens_seen": 271269888, |
| "step": 4140, |
| "train_runtime": 37723.8949, |
| "train_tokens_per_second": 7190.93 |
| }, |
| { |
| "epoch": 1.1171165107356802, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.2290176949103444e-05, |
| "loss": 4.1153, |
| "num_input_tokens_seen": 271925248, |
| "step": 4150, |
| "train_runtime": 37814.3548, |
| "train_tokens_per_second": 7191.059 |
| }, |
| { |
| "epoch": 1.1198088443158107, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.225078024249988e-05, |
| "loss": 3.9972, |
| "num_input_tokens_seen": 272580608, |
| "step": 4160, |
| "train_runtime": 37905.2274, |
| "train_tokens_per_second": 7191.109 |
| }, |
| { |
| "epoch": 1.1225011778959413, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.221130158772234e-05, |
| "loss": 4.1237, |
| "num_input_tokens_seen": 273235968, |
| "step": 4170, |
| "train_runtime": 37995.663, |
| "train_tokens_per_second": 7191.241 |
| }, |
| { |
| "epoch": 1.1251935114760718, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.217174117231038e-05, |
| "loss": 4.0567, |
| "num_input_tokens_seen": 273891328, |
| "step": 4180, |
| "train_runtime": 38086.6329, |
| "train_tokens_per_second": 7191.272 |
| }, |
| { |
| "epoch": 1.1278858450562024, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.2132099184191956e-05, |
| "loss": 4.1517, |
| "num_input_tokens_seen": 274546688, |
| "step": 4190, |
| "train_runtime": 38176.9747, |
| "train_tokens_per_second": 7191.421 |
| }, |
| { |
| "epoch": 1.1305781786363331, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.209237581168253e-05, |
| "loss": 4.1753, |
| "num_input_tokens_seen": 275202048, |
| "step": 4200, |
| "train_runtime": 38267.894, |
| "train_tokens_per_second": 7191.46 |
| }, |
| { |
| "epoch": 1.1332705122164637, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.205257124348416e-05, |
| "loss": 4.1628, |
| "num_input_tokens_seen": 275857408, |
| "step": 4210, |
| "train_runtime": 38358.472, |
| "train_tokens_per_second": 7191.564 |
| }, |
| { |
| "epoch": 1.1359628457965942, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.201268566868462e-05, |
| "loss": 4.0838, |
| "num_input_tokens_seen": 276512768, |
| "step": 4220, |
| "train_runtime": 38449.6346, |
| "train_tokens_per_second": 7191.558 |
| }, |
| { |
| "epoch": 1.1386551793767248, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.197271927675651e-05, |
| "loss": 4.0511, |
| "num_input_tokens_seen": 277168128, |
| "step": 4230, |
| "train_runtime": 38540.098, |
| "train_tokens_per_second": 7191.682 |
| }, |
| { |
| "epoch": 1.1413475129568553, |
| "grad_norm": 1.0, |
| "learning_rate": 4.1932672257556315e-05, |
| "loss": 4.0671, |
| "num_input_tokens_seen": 277823488, |
| "step": 4240, |
| "train_runtime": 38630.478, |
| "train_tokens_per_second": 7191.821 |
| }, |
| { |
| "epoch": 1.1440398465369859, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.189254480132357e-05, |
| "loss": 4.0356, |
| "num_input_tokens_seen": 278478848, |
| "step": 4250, |
| "train_runtime": 38722.0947, |
| "train_tokens_per_second": 7191.73 |
| }, |
| { |
| "epoch": 1.1467321801171164, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.1852337098679894e-05, |
| "loss": 4.1653, |
| "num_input_tokens_seen": 279134208, |
| "step": 4260, |
| "train_runtime": 38812.5776, |
| "train_tokens_per_second": 7191.849 |
| }, |
| { |
| "epoch": 1.1494245136972472, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.1812049340628126e-05, |
| "loss": 4.1336, |
| "num_input_tokens_seen": 279789568, |
| "step": 4270, |
| "train_runtime": 38903.0508, |
| "train_tokens_per_second": 7191.97 |
| }, |
| { |
| "epoch": 1.1521168472773777, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.177168171855137e-05, |
| "loss": 4.0297, |
| "num_input_tokens_seen": 280444928, |
| "step": 4280, |
| "train_runtime": 38993.719, |
| "train_tokens_per_second": 7192.054 |
| }, |
| { |
| "epoch": 1.1548091808575083, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.173123442421214e-05, |
| "loss": 4.1431, |
| "num_input_tokens_seen": 281100288, |
| "step": 4290, |
| "train_runtime": 39084.2184, |
| "train_tokens_per_second": 7192.169 |
| }, |
| { |
| "epoch": 1.1575015144376388, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.1690707649751435e-05, |
| "loss": 4.1921, |
| "num_input_tokens_seen": 281755648, |
| "step": 4300, |
| "train_runtime": 39175.0783, |
| "train_tokens_per_second": 7192.217 |
| }, |
| { |
| "epoch": 1.1601938480177694, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.1650101587687795e-05, |
| "loss": 4.1002, |
| "num_input_tokens_seen": 282411008, |
| "step": 4310, |
| "train_runtime": 39265.6359, |
| "train_tokens_per_second": 7192.32 |
| }, |
| { |
| "epoch": 1.1628861815979, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.1609416430916417e-05, |
| "loss": 4.1173, |
| "num_input_tokens_seen": 283066368, |
| "step": 4320, |
| "train_runtime": 39356.8918, |
| "train_tokens_per_second": 7192.295 |
| }, |
| { |
| "epoch": 1.1655785151780305, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.156865237270822e-05, |
| "loss": 4.1631, |
| "num_input_tokens_seen": 283721728, |
| "step": 4330, |
| "train_runtime": 39447.6761, |
| "train_tokens_per_second": 7192.356 |
| }, |
| { |
| "epoch": 1.1682708487581612, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.1527809606708955e-05, |
| "loss": 4.1155, |
| "num_input_tokens_seen": 284377088, |
| "step": 4340, |
| "train_runtime": 39538.1818, |
| "train_tokens_per_second": 7192.468 |
| }, |
| { |
| "epoch": 1.1709631823382918, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.148688832693827e-05, |
| "loss": 4.0624, |
| "num_input_tokens_seen": 285032448, |
| "step": 4350, |
| "train_runtime": 39629.3436, |
| "train_tokens_per_second": 7192.459 |
| }, |
| { |
| "epoch": 1.1736555159184223, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.144588872778874e-05, |
| "loss": 4.0805, |
| "num_input_tokens_seen": 285687808, |
| "step": 4360, |
| "train_runtime": 39720.1522, |
| "train_tokens_per_second": 7192.515 |
| }, |
| { |
| "epoch": 1.1763478494985529, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.1404811004025043e-05, |
| "loss": 4.1736, |
| "num_input_tokens_seen": 286343168, |
| "step": 4370, |
| "train_runtime": 39811.0389, |
| "train_tokens_per_second": 7192.557 |
| }, |
| { |
| "epoch": 1.1790401830786834, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.136365535078296e-05, |
| "loss": 4.0432, |
| "num_input_tokens_seen": 286998528, |
| "step": 4380, |
| "train_runtime": 39901.6261, |
| "train_tokens_per_second": 7192.652 |
| }, |
| { |
| "epoch": 1.181732516658814, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.132242196356846e-05, |
| "loss": 4.1073, |
| "num_input_tokens_seen": 287653888, |
| "step": 4390, |
| "train_runtime": 39992.7877, |
| "train_tokens_per_second": 7192.644 |
| }, |
| { |
| "epoch": 1.1844248502389445, |
| "grad_norm": 1.0, |
| "learning_rate": 4.128111103825679e-05, |
| "loss": 4.1021, |
| "num_input_tokens_seen": 288309248, |
| "step": 4400, |
| "train_runtime": 40083.5048, |
| "train_tokens_per_second": 7192.716 |
| }, |
| { |
| "epoch": 1.1871171838190753, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.123972277109153e-05, |
| "loss": 4.0843, |
| "num_input_tokens_seen": 288964608, |
| "step": 4410, |
| "train_runtime": 40174.7665, |
| "train_tokens_per_second": 7192.689 |
| }, |
| { |
| "epoch": 1.1898095173992058, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.119825735868367e-05, |
| "loss": 4.0853, |
| "num_input_tokens_seen": 289619968, |
| "step": 4420, |
| "train_runtime": 40265.4969, |
| "train_tokens_per_second": 7192.758 |
| }, |
| { |
| "epoch": 1.1925018509793364, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.115671499801066e-05, |
| "loss": 4.0748, |
| "num_input_tokens_seen": 290275328, |
| "step": 4430, |
| "train_runtime": 40356.3231, |
| "train_tokens_per_second": 7192.809 |
| }, |
| { |
| "epoch": 1.195194184559467, |
| "grad_norm": 1.0, |
| "learning_rate": 4.11150958864155e-05, |
| "loss": 4.1187, |
| "num_input_tokens_seen": 290930688, |
| "step": 4440, |
| "train_runtime": 40447.5539, |
| "train_tokens_per_second": 7192.788 |
| }, |
| { |
| "epoch": 1.1978865181395975, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.107340022160577e-05, |
| "loss": 4.1389, |
| "num_input_tokens_seen": 291586048, |
| "step": 4450, |
| "train_runtime": 40538.5359, |
| "train_tokens_per_second": 7192.812 |
| }, |
| { |
| "epoch": 1.200578851719728, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.1031628201652726e-05, |
| "loss": 4.0362, |
| "num_input_tokens_seen": 292241408, |
| "step": 4460, |
| "train_runtime": 40628.8905, |
| "train_tokens_per_second": 7192.946 |
| }, |
| { |
| "epoch": 1.2032711852998585, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.098978002499035e-05, |
| "loss": 4.1541, |
| "num_input_tokens_seen": 292896768, |
| "step": 4470, |
| "train_runtime": 40719.8886, |
| "train_tokens_per_second": 7192.966 |
| }, |
| { |
| "epoch": 1.2059635188799893, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.094785589041436e-05, |
| "loss": 4.0388, |
| "num_input_tokens_seen": 293552128, |
| "step": 4480, |
| "train_runtime": 40810.8039, |
| "train_tokens_per_second": 7193.0 |
| }, |
| { |
| "epoch": 1.2086558524601199, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.0905855997081345e-05, |
| "loss": 4.1092, |
| "num_input_tokens_seen": 294207488, |
| "step": 4490, |
| "train_runtime": 40901.4178, |
| "train_tokens_per_second": 7193.088 |
| }, |
| { |
| "epoch": 1.2113481860402504, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.0863780544507756e-05, |
| "loss": 3.9746, |
| "num_input_tokens_seen": 294862848, |
| "step": 4500, |
| "train_runtime": 40992.0143, |
| "train_tokens_per_second": 7193.178 |
| }, |
| { |
| "epoch": 1.214040519620381, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.082162973256898e-05, |
| "loss": 4.1136, |
| "num_input_tokens_seen": 295518208, |
| "step": 4510, |
| "train_runtime": 41103.9394, |
| "train_tokens_per_second": 7189.535 |
| }, |
| { |
| "epoch": 1.2167328532005115, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.0779403761498414e-05, |
| "loss": 4.0529, |
| "num_input_tokens_seen": 296173568, |
| "step": 4520, |
| "train_runtime": 41197.3252, |
| "train_tokens_per_second": 7189.146 |
| }, |
| { |
| "epoch": 1.219425186780642, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.0737102831886465e-05, |
| "loss": 4.203, |
| "num_input_tokens_seen": 296828928, |
| "step": 4530, |
| "train_runtime": 41286.8522, |
| "train_tokens_per_second": 7189.43 |
| }, |
| { |
| "epoch": 1.2221175203607726, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.069472714467965e-05, |
| "loss": 4.1483, |
| "num_input_tokens_seen": 297484288, |
| "step": 4540, |
| "train_runtime": 41377.9341, |
| "train_tokens_per_second": 7189.443 |
| }, |
| { |
| "epoch": 1.2248098539409034, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.0652276901179574e-05, |
| "loss": 4.0595, |
| "num_input_tokens_seen": 298139648, |
| "step": 4550, |
| "train_runtime": 41468.2417, |
| "train_tokens_per_second": 7189.59 |
| }, |
| { |
| "epoch": 1.227502187521034, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.0609752303042063e-05, |
| "loss": 4.0823, |
| "num_input_tokens_seen": 298795008, |
| "step": 4560, |
| "train_runtime": 41559.5192, |
| "train_tokens_per_second": 7189.568 |
| }, |
| { |
| "epoch": 1.2301945211011645, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.0567153552276125e-05, |
| "loss": 4.1371, |
| "num_input_tokens_seen": 299450368, |
| "step": 4570, |
| "train_runtime": 41650.0624, |
| "train_tokens_per_second": 7189.674 |
| }, |
| { |
| "epoch": 1.232886854681295, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.0524480851243026e-05, |
| "loss": 4.0857, |
| "num_input_tokens_seen": 300105728, |
| "step": 4580, |
| "train_runtime": 41741.0104, |
| "train_tokens_per_second": 7189.709 |
| }, |
| { |
| "epoch": 1.2355791882614255, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.048173440265535e-05, |
| "loss": 4.0981, |
| "num_input_tokens_seen": 300761088, |
| "step": 4590, |
| "train_runtime": 41831.73, |
| "train_tokens_per_second": 7189.784 |
| }, |
| { |
| "epoch": 1.238271521841556, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.043891440957598e-05, |
| "loss": 4.0393, |
| "num_input_tokens_seen": 301416448, |
| "step": 4600, |
| "train_runtime": 41922.3234, |
| "train_tokens_per_second": 7189.879 |
| }, |
| { |
| "epoch": 1.2409638554216866, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.039602107541717e-05, |
| "loss": 3.9847, |
| "num_input_tokens_seen": 302071808, |
| "step": 4610, |
| "train_runtime": 42012.9274, |
| "train_tokens_per_second": 7189.973 |
| }, |
| { |
| "epoch": 1.2436561890018174, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.035305460393961e-05, |
| "loss": 3.9565, |
| "num_input_tokens_seen": 302727168, |
| "step": 4620, |
| "train_runtime": 42104.2059, |
| "train_tokens_per_second": 7189.951 |
| }, |
| { |
| "epoch": 1.246348522581948, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.0310015199251375e-05, |
| "loss": 3.9737, |
| "num_input_tokens_seen": 303382528, |
| "step": 4630, |
| "train_runtime": 42194.6319, |
| "train_tokens_per_second": 7190.074 |
| }, |
| { |
| "epoch": 1.2490408561620785, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.0266903065807013e-05, |
| "loss": 4.1593, |
| "num_input_tokens_seen": 304037888, |
| "step": 4640, |
| "train_runtime": 42285.6276, |
| "train_tokens_per_second": 7190.1 |
| }, |
| { |
| "epoch": 1.251733189742209, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.0223718408406593e-05, |
| "loss": 4.0606, |
| "num_input_tokens_seen": 304693248, |
| "step": 4650, |
| "train_runtime": 42376.5765, |
| "train_tokens_per_second": 7190.134 |
| }, |
| { |
| "epoch": 1.2544255233223396, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.018046143219466e-05, |
| "loss": 4.0414, |
| "num_input_tokens_seen": 305348608, |
| "step": 4660, |
| "train_runtime": 42467.4287, |
| "train_tokens_per_second": 7190.184 |
| }, |
| { |
| "epoch": 1.2571178569024704, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.0137132342659345e-05, |
| "loss": 4.0417, |
| "num_input_tokens_seen": 306003968, |
| "step": 4670, |
| "train_runtime": 42558.2623, |
| "train_tokens_per_second": 7190.236 |
| }, |
| { |
| "epoch": 1.2598101904826007, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.00937313456313e-05, |
| "loss": 4.0062, |
| "num_input_tokens_seen": 306659328, |
| "step": 4680, |
| "train_runtime": 42649.2409, |
| "train_tokens_per_second": 7190.265 |
| }, |
| { |
| "epoch": 1.2625025240627314, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.0050258647282815e-05, |
| "loss": 4.0743, |
| "num_input_tokens_seen": 307314688, |
| "step": 4690, |
| "train_runtime": 42739.8579, |
| "train_tokens_per_second": 7190.354 |
| }, |
| { |
| "epoch": 1.265194857642862, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.0006714454126756e-05, |
| "loss": 4.063, |
| "num_input_tokens_seen": 307970048, |
| "step": 4700, |
| "train_runtime": 42830.7285, |
| "train_tokens_per_second": 7190.399 |
| }, |
| { |
| "epoch": 1.2678871912229925, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.9963098973015625e-05, |
| "loss": 4.1403, |
| "num_input_tokens_seen": 308625408, |
| "step": 4710, |
| "train_runtime": 42921.0004, |
| "train_tokens_per_second": 7190.546 |
| }, |
| { |
| "epoch": 1.270579524803123, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.991941241114057e-05, |
| "loss": 4.1257, |
| "num_input_tokens_seen": 309280768, |
| "step": 4720, |
| "train_runtime": 43011.9497, |
| "train_tokens_per_second": 7190.578 |
| }, |
| { |
| "epoch": 1.2732718583832536, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.9875654976030416e-05, |
| "loss": 4.0648, |
| "num_input_tokens_seen": 309936128, |
| "step": 4730, |
| "train_runtime": 43102.7908, |
| "train_tokens_per_second": 7190.628 |
| }, |
| { |
| "epoch": 1.2759641919633844, |
| "grad_norm": 1.0, |
| "learning_rate": 3.983182687555066e-05, |
| "loss": 3.9744, |
| "num_input_tokens_seen": 310591488, |
| "step": 4740, |
| "train_runtime": 43193.9112, |
| "train_tokens_per_second": 7190.631 |
| }, |
| { |
| "epoch": 1.2786565255435147, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.9787928317902465e-05, |
| "loss": 4.0177, |
| "num_input_tokens_seen": 311246848, |
| "step": 4750, |
| "train_runtime": 43284.833, |
| "train_tokens_per_second": 7190.668 |
| }, |
| { |
| "epoch": 1.2813488591236455, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.974395951162172e-05, |
| "loss": 4.1278, |
| "num_input_tokens_seen": 311902208, |
| "step": 4760, |
| "train_runtime": 43375.2348, |
| "train_tokens_per_second": 7190.79 |
| }, |
| { |
| "epoch": 1.284041192703776, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.9699920665578016e-05, |
| "loss": 3.9982, |
| "num_input_tokens_seen": 312557568, |
| "step": 4770, |
| "train_runtime": 43466.3882, |
| "train_tokens_per_second": 7190.788 |
| }, |
| { |
| "epoch": 1.2867335262839066, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.965581198897366e-05, |
| "loss": 3.9781, |
| "num_input_tokens_seen": 313212928, |
| "step": 4780, |
| "train_runtime": 43556.6919, |
| "train_tokens_per_second": 7190.926 |
| }, |
| { |
| "epoch": 1.2894258598640371, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.961163369134266e-05, |
| "loss": 3.9825, |
| "num_input_tokens_seen": 313868288, |
| "step": 4790, |
| "train_runtime": 43647.9657, |
| "train_tokens_per_second": 7190.903 |
| }, |
| { |
| "epoch": 1.2921181934441677, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.956738598254981e-05, |
| "loss": 4.1024, |
| "num_input_tokens_seen": 314523648, |
| "step": 4800, |
| "train_runtime": 43738.6087, |
| "train_tokens_per_second": 7190.984 |
| }, |
| { |
| "epoch": 1.2948105270242984, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.9523069072789576e-05, |
| "loss": 4.0959, |
| "num_input_tokens_seen": 315179008, |
| "step": 4810, |
| "train_runtime": 43829.2011, |
| "train_tokens_per_second": 7191.074 |
| }, |
| { |
| "epoch": 1.2975028606044288, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.947868317258519e-05, |
| "loss": 4.0322, |
| "num_input_tokens_seen": 315834368, |
| "step": 4820, |
| "train_runtime": 43920.1552, |
| "train_tokens_per_second": 7191.103 |
| }, |
| { |
| "epoch": 1.3001951941845595, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.943422849278759e-05, |
| "loss": 4.0566, |
| "num_input_tokens_seen": 316489728, |
| "step": 4830, |
| "train_runtime": 44010.9066, |
| "train_tokens_per_second": 7191.166 |
| }, |
| { |
| "epoch": 1.30288752776469, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.938970524457449e-05, |
| "loss": 4.0945, |
| "num_input_tokens_seen": 317145088, |
| "step": 4840, |
| "train_runtime": 44101.059, |
| "train_tokens_per_second": 7191.326 |
| }, |
| { |
| "epoch": 1.3055798613448206, |
| "grad_norm": 1.125, |
| "learning_rate": 3.9345113639449274e-05, |
| "loss": 4.0747, |
| "num_input_tokens_seen": 317800448, |
| "step": 4850, |
| "train_runtime": 44191.7055, |
| "train_tokens_per_second": 7191.405 |
| }, |
| { |
| "epoch": 1.3082721949249512, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.930045388924008e-05, |
| "loss": 4.0492, |
| "num_input_tokens_seen": 318455808, |
| "step": 4860, |
| "train_runtime": 44282.9561, |
| "train_tokens_per_second": 7191.385 |
| }, |
| { |
| "epoch": 1.3109645285050817, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.925572620609878e-05, |
| "loss": 4.0212, |
| "num_input_tokens_seen": 319111168, |
| "step": 4870, |
| "train_runtime": 44373.8261, |
| "train_tokens_per_second": 7191.428 |
| }, |
| { |
| "epoch": 1.3136568620852125, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.921093080249992e-05, |
| "loss": 4.0731, |
| "num_input_tokens_seen": 319766528, |
| "step": 4880, |
| "train_runtime": 44464.7093, |
| "train_tokens_per_second": 7191.468 |
| }, |
| { |
| "epoch": 1.3163491956653428, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.916606789123977e-05, |
| "loss": 4.094, |
| "num_input_tokens_seen": 320421888, |
| "step": 4890, |
| "train_runtime": 44554.734, |
| "train_tokens_per_second": 7191.646 |
| }, |
| { |
| "epoch": 1.3190415292454736, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.912113768543526e-05, |
| "loss": 3.9722, |
| "num_input_tokens_seen": 321077248, |
| "step": 4900, |
| "train_runtime": 44645.687, |
| "train_tokens_per_second": 7191.674 |
| }, |
| { |
| "epoch": 1.3217338628256041, |
| "grad_norm": 0.9140625, |
| "learning_rate": 3.907614039852304e-05, |
| "loss": 4.0809, |
| "num_input_tokens_seen": 321732608, |
| "step": 4910, |
| "train_runtime": 44736.4925, |
| "train_tokens_per_second": 7191.726 |
| }, |
| { |
| "epoch": 1.3244261964057347, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.903107624425838e-05, |
| "loss": 4.0212, |
| "num_input_tokens_seen": 322387968, |
| "step": 4920, |
| "train_runtime": 44827.482, |
| "train_tokens_per_second": 7191.748 |
| }, |
| { |
| "epoch": 1.3271185299858652, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.898594543671422e-05, |
| "loss": 4.0774, |
| "num_input_tokens_seen": 323043328, |
| "step": 4930, |
| "train_runtime": 44918.0801, |
| "train_tokens_per_second": 7191.833 |
| }, |
| { |
| "epoch": 1.3298108635659958, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.894074819028013e-05, |
| "loss": 4.0552, |
| "num_input_tokens_seen": 323698688, |
| "step": 4940, |
| "train_runtime": 45008.9135, |
| "train_tokens_per_second": 7191.88 |
| }, |
| { |
| "epoch": 1.3325031971461265, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.889548471966128e-05, |
| "loss": 4.0884, |
| "num_input_tokens_seen": 324354048, |
| "step": 4950, |
| "train_runtime": 45099.1757, |
| "train_tokens_per_second": 7192.017 |
| }, |
| { |
| "epoch": 1.3351955307262569, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.885015523987744e-05, |
| "loss": 3.992, |
| "num_input_tokens_seen": 325009408, |
| "step": 4960, |
| "train_runtime": 45190.2096, |
| "train_tokens_per_second": 7192.031 |
| }, |
| { |
| "epoch": 1.3378878643063876, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.880475996626195e-05, |
| "loss": 4.0486, |
| "num_input_tokens_seen": 325664768, |
| "step": 4970, |
| "train_runtime": 45280.9408, |
| "train_tokens_per_second": 7192.094 |
| }, |
| { |
| "epoch": 1.3405801978865182, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.875929911446069e-05, |
| "loss": 3.9412, |
| "num_input_tokens_seen": 326320128, |
| "step": 4980, |
| "train_runtime": 45371.3567, |
| "train_tokens_per_second": 7192.206 |
| }, |
| { |
| "epoch": 1.3432725314666487, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.8713772900431075e-05, |
| "loss": 4.0498, |
| "num_input_tokens_seen": 326975488, |
| "step": 4990, |
| "train_runtime": 45462.0751, |
| "train_tokens_per_second": 7192.269 |
| }, |
| { |
| "epoch": 1.3459648650467793, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.866818154044101e-05, |
| "loss": 4.0712, |
| "num_input_tokens_seen": 327630848, |
| "step": 5000, |
| "train_runtime": 45552.9489, |
| "train_tokens_per_second": 7192.308 |
| }, |
| { |
| "epoch": 1.3486571986269098, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.8622525251067864e-05, |
| "loss": 4.0049, |
| "num_input_tokens_seen": 328286208, |
| "step": 5010, |
| "train_runtime": 45661.5242, |
| "train_tokens_per_second": 7189.559 |
| }, |
| { |
| "epoch": 1.3513495322070406, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.8576804249197456e-05, |
| "loss": 4.0657, |
| "num_input_tokens_seen": 328941568, |
| "step": 5020, |
| "train_runtime": 45752.0747, |
| "train_tokens_per_second": 7189.654 |
| }, |
| { |
| "epoch": 1.354041865787171, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.853101875202301e-05, |
| "loss": 4.0619, |
| "num_input_tokens_seen": 329596928, |
| "step": 5030, |
| "train_runtime": 45843.0305, |
| "train_tokens_per_second": 7189.685 |
| }, |
| { |
| "epoch": 1.3567341993673017, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.848516897704414e-05, |
| "loss": 4.1361, |
| "num_input_tokens_seen": 330252288, |
| "step": 5040, |
| "train_runtime": 45933.7716, |
| "train_tokens_per_second": 7189.749 |
| }, |
| { |
| "epoch": 1.3594265329474322, |
| "grad_norm": 0.9140625, |
| "learning_rate": 3.843925514206578e-05, |
| "loss": 4.083, |
| "num_input_tokens_seen": 330907648, |
| "step": 5050, |
| "train_runtime": 46024.504, |
| "train_tokens_per_second": 7189.815 |
| }, |
| { |
| "epoch": 1.3621188665275628, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.839327746519721e-05, |
| "loss": 4.0152, |
| "num_input_tokens_seen": 331563008, |
| "step": 5060, |
| "train_runtime": 46115.7035, |
| "train_tokens_per_second": 7189.807 |
| }, |
| { |
| "epoch": 1.3648112001076933, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.834723616485095e-05, |
| "loss": 4.0064, |
| "num_input_tokens_seen": 332218368, |
| "step": 5070, |
| "train_runtime": 46206.8061, |
| "train_tokens_per_second": 7189.815 |
| }, |
| { |
| "epoch": 1.3675035336878238, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.8301131459741786e-05, |
| "loss": 3.984, |
| "num_input_tokens_seen": 332873728, |
| "step": 5080, |
| "train_runtime": 46298.4821, |
| "train_tokens_per_second": 7189.733 |
| }, |
| { |
| "epoch": 1.3701958672679546, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.825496356888568e-05, |
| "loss": 4.0889, |
| "num_input_tokens_seen": 333529088, |
| "step": 5090, |
| "train_runtime": 46389.6726, |
| "train_tokens_per_second": 7189.727 |
| }, |
| { |
| "epoch": 1.3728882008480852, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.820873271159877e-05, |
| "loss": 3.9429, |
| "num_input_tokens_seen": 334184448, |
| "step": 5100, |
| "train_runtime": 46480.8345, |
| "train_tokens_per_second": 7189.726 |
| }, |
| { |
| "epoch": 1.3755805344282157, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.816243910749629e-05, |
| "loss": 4.0672, |
| "num_input_tokens_seen": 334839808, |
| "step": 5110, |
| "train_runtime": 46572.051, |
| "train_tokens_per_second": 7189.716 |
| }, |
| { |
| "epoch": 1.3782728680083463, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.8116082976491576e-05, |
| "loss": 3.9873, |
| "num_input_tokens_seen": 335495168, |
| "step": 5120, |
| "train_runtime": 46663.0437, |
| "train_tokens_per_second": 7189.74 |
| }, |
| { |
| "epoch": 1.3809652015884768, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.8069664538794955e-05, |
| "loss": 4.0912, |
| "num_input_tokens_seen": 336150528, |
| "step": 5130, |
| "train_runtime": 46753.8595, |
| "train_tokens_per_second": 7189.792 |
| }, |
| { |
| "epoch": 1.3836575351686073, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.802318401491277e-05, |
| "loss": 4.057, |
| "num_input_tokens_seen": 336805888, |
| "step": 5140, |
| "train_runtime": 46844.4928, |
| "train_tokens_per_second": 7189.872 |
| }, |
| { |
| "epoch": 1.386349868748738, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.797664162564626e-05, |
| "loss": 4.0411, |
| "num_input_tokens_seen": 337461248, |
| "step": 5150, |
| "train_runtime": 46935.7326, |
| "train_tokens_per_second": 7189.858 |
| }, |
| { |
| "epoch": 1.3890422023288687, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.7930037592090605e-05, |
| "loss": 4.0293, |
| "num_input_tokens_seen": 338116608, |
| "step": 5160, |
| "train_runtime": 47026.6833, |
| "train_tokens_per_second": 7189.888 |
| }, |
| { |
| "epoch": 1.3917345359089992, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.788337213563377e-05, |
| "loss": 3.9478, |
| "num_input_tokens_seen": 338771968, |
| "step": 5170, |
| "train_runtime": 47118.4452, |
| "train_tokens_per_second": 7189.795 |
| }, |
| { |
| "epoch": 1.3944268694891297, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.7836645477955524e-05, |
| "loss": 4.0632, |
| "num_input_tokens_seen": 339427328, |
| "step": 5180, |
| "train_runtime": 47209.0334, |
| "train_tokens_per_second": 7189.881 |
| }, |
| { |
| "epoch": 1.3971192030692603, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.7789857841026363e-05, |
| "loss": 3.9842, |
| "num_input_tokens_seen": 340082688, |
| "step": 5190, |
| "train_runtime": 47299.8697, |
| "train_tokens_per_second": 7189.929 |
| }, |
| { |
| "epoch": 1.3998115366493908, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.774300944710647e-05, |
| "loss": 4.0714, |
| "num_input_tokens_seen": 340738048, |
| "step": 5200, |
| "train_runtime": 47390.6283, |
| "train_tokens_per_second": 7189.988 |
| }, |
| { |
| "epoch": 1.4025038702295214, |
| "grad_norm": 0.9140625, |
| "learning_rate": 3.769610051874463e-05, |
| "loss": 4.0991, |
| "num_input_tokens_seen": 341393408, |
| "step": 5210, |
| "train_runtime": 47481.7596, |
| "train_tokens_per_second": 7189.991 |
| }, |
| { |
| "epoch": 1.405196203809652, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.7649131278777194e-05, |
| "loss": 4.0515, |
| "num_input_tokens_seen": 342048768, |
| "step": 5220, |
| "train_runtime": 47572.5591, |
| "train_tokens_per_second": 7190.043 |
| }, |
| { |
| "epoch": 1.4078885373897827, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.7602101950327036e-05, |
| "loss": 3.9602, |
| "num_input_tokens_seen": 342704128, |
| "step": 5230, |
| "train_runtime": 47663.3771, |
| "train_tokens_per_second": 7190.093 |
| }, |
| { |
| "epoch": 1.4105808709699132, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.755501275680245e-05, |
| "loss": 3.9706, |
| "num_input_tokens_seen": 343359488, |
| "step": 5240, |
| "train_runtime": 47754.3796, |
| "train_tokens_per_second": 7190.115 |
| }, |
| { |
| "epoch": 1.4132732045500438, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.7507863921896144e-05, |
| "loss": 4.0457, |
| "num_input_tokens_seen": 344014848, |
| "step": 5250, |
| "train_runtime": 47845.8588, |
| "train_tokens_per_second": 7190.065 |
| }, |
| { |
| "epoch": 1.4159655381301743, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.7460655669584114e-05, |
| "loss": 4.0667, |
| "num_input_tokens_seen": 344670208, |
| "step": 5260, |
| "train_runtime": 47936.3933, |
| "train_tokens_per_second": 7190.157 |
| }, |
| { |
| "epoch": 1.4186578717103049, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.741338822412463e-05, |
| "loss": 3.9787, |
| "num_input_tokens_seen": 345325568, |
| "step": 5270, |
| "train_runtime": 48027.5225, |
| "train_tokens_per_second": 7190.16 |
| }, |
| { |
| "epoch": 1.4213502052904354, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.736606181005715e-05, |
| "loss": 4.092, |
| "num_input_tokens_seen": 345980928, |
| "step": 5280, |
| "train_runtime": 48118.394, |
| "train_tokens_per_second": 7190.201 |
| }, |
| { |
| "epoch": 1.424042538870566, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.731867665220124e-05, |
| "loss": 4.0368, |
| "num_input_tokens_seen": 346636288, |
| "step": 5290, |
| "train_runtime": 48209.5568, |
| "train_tokens_per_second": 7190.199 |
| }, |
| { |
| "epoch": 1.4267348724506967, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.727123297565557e-05, |
| "loss": 4.1142, |
| "num_input_tokens_seen": 347291648, |
| "step": 5300, |
| "train_runtime": 48300.6605, |
| "train_tokens_per_second": 7190.205 |
| }, |
| { |
| "epoch": 1.4294272060308273, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.722373100579674e-05, |
| "loss": 4.0787, |
| "num_input_tokens_seen": 347947008, |
| "step": 5310, |
| "train_runtime": 48391.9849, |
| "train_tokens_per_second": 7190.178 |
| }, |
| { |
| "epoch": 1.4321195396109578, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.717617096827831e-05, |
| "loss": 3.9859, |
| "num_input_tokens_seen": 348602368, |
| "step": 5320, |
| "train_runtime": 48483.2543, |
| "train_tokens_per_second": 7190.16 |
| }, |
| { |
| "epoch": 1.4348118731910884, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.712855308902967e-05, |
| "loss": 4.0245, |
| "num_input_tokens_seen": 349257728, |
| "step": 5330, |
| "train_runtime": 48574.3918, |
| "train_tokens_per_second": 7190.162 |
| }, |
| { |
| "epoch": 1.437504206771219, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.708087759425497e-05, |
| "loss": 3.9664, |
| "num_input_tokens_seen": 349913088, |
| "step": 5340, |
| "train_runtime": 48666.116, |
| "train_tokens_per_second": 7190.076 |
| }, |
| { |
| "epoch": 1.4401965403513495, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.703314471043206e-05, |
| "loss": 3.9756, |
| "num_input_tokens_seen": 350568448, |
| "step": 5350, |
| "train_runtime": 48757.3819, |
| "train_tokens_per_second": 7190.059 |
| }, |
| { |
| "epoch": 1.44288887393148, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.6985354664311434e-05, |
| "loss": 4.0478, |
| "num_input_tokens_seen": 351223808, |
| "step": 5360, |
| "train_runtime": 48847.8506, |
| "train_tokens_per_second": 7190.159 |
| }, |
| { |
| "epoch": 1.4455812075116108, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.6937507682915105e-05, |
| "loss": 4.1172, |
| "num_input_tokens_seen": 351879168, |
| "step": 5370, |
| "train_runtime": 48940.0294, |
| "train_tokens_per_second": 7190.007 |
| }, |
| { |
| "epoch": 1.4482735410917413, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.688960399353557e-05, |
| "loss": 3.9969, |
| "num_input_tokens_seen": 352534528, |
| "step": 5380, |
| "train_runtime": 49030.843, |
| "train_tokens_per_second": 7190.056 |
| }, |
| { |
| "epoch": 1.4509658746718719, |
| "grad_norm": 1.0, |
| "learning_rate": 3.684164382373469e-05, |
| "loss": 4.0159, |
| "num_input_tokens_seen": 353189888, |
| "step": 5390, |
| "train_runtime": 49121.8378, |
| "train_tokens_per_second": 7190.079 |
| }, |
| { |
| "epoch": 1.4536582082520024, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.6793627401342655e-05, |
| "loss": 3.9705, |
| "num_input_tokens_seen": 353845248, |
| "step": 5400, |
| "train_runtime": 49213.3395, |
| "train_tokens_per_second": 7190.027 |
| }, |
| { |
| "epoch": 1.456350541832133, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.6745554954456854e-05, |
| "loss": 4.0728, |
| "num_input_tokens_seen": 354500608, |
| "step": 5410, |
| "train_runtime": 49304.2357, |
| "train_tokens_per_second": 7190.064 |
| }, |
| { |
| "epoch": 1.4590428754122635, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.669742671144084e-05, |
| "loss": 4.0177, |
| "num_input_tokens_seen": 355155968, |
| "step": 5420, |
| "train_runtime": 49398.2246, |
| "train_tokens_per_second": 7189.65 |
| }, |
| { |
| "epoch": 1.461735208992394, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.66492429009232e-05, |
| "loss": 4.0315, |
| "num_input_tokens_seen": 355811328, |
| "step": 5430, |
| "train_runtime": 49488.7546, |
| "train_tokens_per_second": 7189.741 |
| }, |
| { |
| "epoch": 1.4644275425725248, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.660100375179649e-05, |
| "loss": 3.9762, |
| "num_input_tokens_seen": 356466688, |
| "step": 5440, |
| "train_runtime": 49580.1514, |
| "train_tokens_per_second": 7189.706 |
| }, |
| { |
| "epoch": 1.4671198761526554, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.655270949321616e-05, |
| "loss": 3.9728, |
| "num_input_tokens_seen": 357122048, |
| "step": 5450, |
| "train_runtime": 49671.019, |
| "train_tokens_per_second": 7189.747 |
| }, |
| { |
| "epoch": 1.469812209732786, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.650436035459945e-05, |
| "loss": 4.0728, |
| "num_input_tokens_seen": 357777408, |
| "step": 5460, |
| "train_runtime": 49761.4624, |
| "train_tokens_per_second": 7189.849 |
| }, |
| { |
| "epoch": 1.4725045433129165, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.6455956565624286e-05, |
| "loss": 4.0256, |
| "num_input_tokens_seen": 358432768, |
| "step": 5470, |
| "train_runtime": 49852.6514, |
| "train_tokens_per_second": 7189.844 |
| }, |
| { |
| "epoch": 1.475196876893047, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.6407498356228225e-05, |
| "loss": 3.9752, |
| "num_input_tokens_seen": 359088128, |
| "step": 5480, |
| "train_runtime": 49943.9357, |
| "train_tokens_per_second": 7189.824 |
| }, |
| { |
| "epoch": 1.4778892104731776, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.635898595660733e-05, |
| "loss": 4.0104, |
| "num_input_tokens_seen": 359743488, |
| "step": 5490, |
| "train_runtime": 50034.7254, |
| "train_tokens_per_second": 7189.876 |
| }, |
| { |
| "epoch": 1.480581544053308, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.63104195972151e-05, |
| "loss": 3.9878, |
| "num_input_tokens_seen": 360398848, |
| "step": 5500, |
| "train_runtime": 50125.3273, |
| "train_tokens_per_second": 7189.955 |
| }, |
| { |
| "epoch": 1.4832738776334389, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.626179950876134e-05, |
| "loss": 4.1044, |
| "num_input_tokens_seen": 361054208, |
| "step": 5510, |
| "train_runtime": 50233.9465, |
| "train_tokens_per_second": 7187.455 |
| }, |
| { |
| "epoch": 1.4859662112135694, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.6213125922211135e-05, |
| "loss": 3.9922, |
| "num_input_tokens_seen": 361709568, |
| "step": 5520, |
| "train_runtime": 50325.7876, |
| "train_tokens_per_second": 7187.36 |
| }, |
| { |
| "epoch": 1.4886585447937, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.616439906878367e-05, |
| "loss": 4.009, |
| "num_input_tokens_seen": 362364928, |
| "step": 5530, |
| "train_runtime": 50417.0272, |
| "train_tokens_per_second": 7187.352 |
| }, |
| { |
| "epoch": 1.4913508783738305, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.611561917995117e-05, |
| "loss": 4.0106, |
| "num_input_tokens_seen": 363020288, |
| "step": 5540, |
| "train_runtime": 50508.3602, |
| "train_tokens_per_second": 7187.331 |
| }, |
| { |
| "epoch": 1.494043211953961, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.606678648743783e-05, |
| "loss": 3.9998, |
| "num_input_tokens_seen": 363675648, |
| "step": 5550, |
| "train_runtime": 50599.5219, |
| "train_tokens_per_second": 7187.334 |
| }, |
| { |
| "epoch": 1.4967355455340916, |
| "grad_norm": 1.0, |
| "learning_rate": 3.601790122321864e-05, |
| "loss": 4.0427, |
| "num_input_tokens_seen": 364331008, |
| "step": 5560, |
| "train_runtime": 50690.8134, |
| "train_tokens_per_second": 7187.318 |
| }, |
| { |
| "epoch": 1.4994278791142222, |
| "grad_norm": 0.921875, |
| "learning_rate": 3.5968963619518366e-05, |
| "loss": 3.9806, |
| "num_input_tokens_seen": 364986368, |
| "step": 5570, |
| "train_runtime": 50782.1551, |
| "train_tokens_per_second": 7187.296 |
| }, |
| { |
| "epoch": 1.502120212694353, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.591997390881039e-05, |
| "loss": 4.0842, |
| "num_input_tokens_seen": 365641728, |
| "step": 5580, |
| "train_runtime": 50873.1287, |
| "train_tokens_per_second": 7187.325 |
| }, |
| { |
| "epoch": 1.5048125462744832, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.5870932323815624e-05, |
| "loss": 4.062, |
| "num_input_tokens_seen": 366297088, |
| "step": 5590, |
| "train_runtime": 50964.1659, |
| "train_tokens_per_second": 7187.346 |
| }, |
| { |
| "epoch": 1.507504879854614, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.582183909750141e-05, |
| "loss": 3.9796, |
| "num_input_tokens_seen": 366952448, |
| "step": 5600, |
| "train_runtime": 51055.039, |
| "train_tokens_per_second": 7187.389 |
| }, |
| { |
| "epoch": 1.5101972134347446, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.5772694463080394e-05, |
| "loss": 3.916, |
| "num_input_tokens_seen": 367607808, |
| "step": 5610, |
| "train_runtime": 51145.8693, |
| "train_tokens_per_second": 7187.439 |
| }, |
| { |
| "epoch": 1.512889547014875, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.572349865400944e-05, |
| "loss": 4.062, |
| "num_input_tokens_seen": 368263168, |
| "step": 5620, |
| "train_runtime": 51237.2955, |
| "train_tokens_per_second": 7187.404 |
| }, |
| { |
| "epoch": 1.5155818805950059, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.567425190398852e-05, |
| "loss": 4.0634, |
| "num_input_tokens_seen": 368918528, |
| "step": 5630, |
| "train_runtime": 51328.4263, |
| "train_tokens_per_second": 7187.412 |
| }, |
| { |
| "epoch": 1.5182742141751362, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.562495444695958e-05, |
| "loss": 3.9609, |
| "num_input_tokens_seen": 369573888, |
| "step": 5640, |
| "train_runtime": 51419.2465, |
| "train_tokens_per_second": 7187.462 |
| }, |
| { |
| "epoch": 1.520966547755267, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.557560651710546e-05, |
| "loss": 4.0745, |
| "num_input_tokens_seen": 370229248, |
| "step": 5650, |
| "train_runtime": 51510.6964, |
| "train_tokens_per_second": 7187.425 |
| }, |
| { |
| "epoch": 1.5236588813353975, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.552620834884876e-05, |
| "loss": 4.0192, |
| "num_input_tokens_seen": 370884608, |
| "step": 5660, |
| "train_runtime": 51602.3065, |
| "train_tokens_per_second": 7187.365 |
| }, |
| { |
| "epoch": 1.526351214915528, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.547676017685072e-05, |
| "loss": 4.0617, |
| "num_input_tokens_seen": 371539968, |
| "step": 5670, |
| "train_runtime": 51693.1661, |
| "train_tokens_per_second": 7187.41 |
| }, |
| { |
| "epoch": 1.5290435484956586, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.542726223601013e-05, |
| "loss": 4.0181, |
| "num_input_tokens_seen": 372195328, |
| "step": 5680, |
| "train_runtime": 51784.1565, |
| "train_tokens_per_second": 7187.436 |
| }, |
| { |
| "epoch": 1.5317358820757891, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.537771476146222e-05, |
| "loss": 3.9385, |
| "num_input_tokens_seen": 372850688, |
| "step": 5690, |
| "train_runtime": 51874.6753, |
| "train_tokens_per_second": 7187.528 |
| }, |
| { |
| "epoch": 1.53442821565592, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.532811798857749e-05, |
| "loss": 3.9561, |
| "num_input_tokens_seen": 373506048, |
| "step": 5700, |
| "train_runtime": 51965.8624, |
| "train_tokens_per_second": 7187.527 |
| }, |
| { |
| "epoch": 1.5371205492360502, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.527847215296065e-05, |
| "loss": 4.0395, |
| "num_input_tokens_seen": 374161408, |
| "step": 5710, |
| "train_runtime": 52056.8871, |
| "train_tokens_per_second": 7187.549 |
| }, |
| { |
| "epoch": 1.539812882816181, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.522877749044948e-05, |
| "loss": 4.0609, |
| "num_input_tokens_seen": 374816768, |
| "step": 5720, |
| "train_runtime": 52148.1425, |
| "train_tokens_per_second": 7187.538 |
| }, |
| { |
| "epoch": 1.5425052163963116, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.517903423711368e-05, |
| "loss": 4.098, |
| "num_input_tokens_seen": 375472128, |
| "step": 5730, |
| "train_runtime": 52239.0426, |
| "train_tokens_per_second": 7187.577 |
| }, |
| { |
| "epoch": 1.545197549976442, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.5129242629253815e-05, |
| "loss": 4.0239, |
| "num_input_tokens_seen": 376127488, |
| "step": 5740, |
| "train_runtime": 52330.2141, |
| "train_tokens_per_second": 7187.578 |
| }, |
| { |
| "epoch": 1.5478898835565726, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.5079402903400124e-05, |
| "loss": 4.0425, |
| "num_input_tokens_seen": 376782848, |
| "step": 5750, |
| "train_runtime": 52421.5113, |
| "train_tokens_per_second": 7187.562 |
| }, |
| { |
| "epoch": 1.5505822171367032, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.502951529631143e-05, |
| "loss": 3.992, |
| "num_input_tokens_seen": 377438208, |
| "step": 5760, |
| "train_runtime": 52512.2071, |
| "train_tokens_per_second": 7187.628 |
| }, |
| { |
| "epoch": 1.553274550716834, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.4979580044974026e-05, |
| "loss": 4.0186, |
| "num_input_tokens_seen": 378093568, |
| "step": 5770, |
| "train_runtime": 52603.1367, |
| "train_tokens_per_second": 7187.662 |
| }, |
| { |
| "epoch": 1.5559668842969643, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.492959738660052e-05, |
| "loss": 3.9302, |
| "num_input_tokens_seen": 378748928, |
| "step": 5780, |
| "train_runtime": 52694.318, |
| "train_tokens_per_second": 7187.662 |
| }, |
| { |
| "epoch": 1.558659217877095, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.487956755862874e-05, |
| "loss": 3.9378, |
| "num_input_tokens_seen": 379404288, |
| "step": 5790, |
| "train_runtime": 52784.9202, |
| "train_tokens_per_second": 7187.74 |
| }, |
| { |
| "epoch": 1.5613515514572256, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.482949079872056e-05, |
| "loss": 4.0948, |
| "num_input_tokens_seen": 380059648, |
| "step": 5800, |
| "train_runtime": 52875.9022, |
| "train_tokens_per_second": 7187.767 |
| }, |
| { |
| "epoch": 1.5640438850373561, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.4779367344760825e-05, |
| "loss": 4.0237, |
| "num_input_tokens_seen": 380715008, |
| "step": 5810, |
| "train_runtime": 52966.9492, |
| "train_tokens_per_second": 7187.784 |
| }, |
| { |
| "epoch": 1.5667362186174867, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.472919743485619e-05, |
| "loss": 4.0448, |
| "num_input_tokens_seen": 381370368, |
| "step": 5820, |
| "train_runtime": 53058.0795, |
| "train_tokens_per_second": 7187.791 |
| }, |
| { |
| "epoch": 1.5694285521976172, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.4678981307333985e-05, |
| "loss": 4.075, |
| "num_input_tokens_seen": 382025728, |
| "step": 5830, |
| "train_runtime": 53149.127, |
| "train_tokens_per_second": 7187.808 |
| }, |
| { |
| "epoch": 1.572120885777748, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.46287192007411e-05, |
| "loss": 4.0984, |
| "num_input_tokens_seen": 382681088, |
| "step": 5840, |
| "train_runtime": 53239.7537, |
| "train_tokens_per_second": 7187.882 |
| }, |
| { |
| "epoch": 1.5748132193578783, |
| "grad_norm": 1.125, |
| "learning_rate": 3.457841135384284e-05, |
| "loss": 4.0243, |
| "num_input_tokens_seen": 383336448, |
| "step": 5850, |
| "train_runtime": 53330.9167, |
| "train_tokens_per_second": 7187.884 |
| }, |
| { |
| "epoch": 1.577505552938009, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.452805800562181e-05, |
| "loss": 3.9802, |
| "num_input_tokens_seen": 383991808, |
| "step": 5860, |
| "train_runtime": 53421.5398, |
| "train_tokens_per_second": 7187.958 |
| }, |
| { |
| "epoch": 1.5801978865181396, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.447765939527673e-05, |
| "loss": 4.0104, |
| "num_input_tokens_seen": 384647168, |
| "step": 5870, |
| "train_runtime": 53512.1225, |
| "train_tokens_per_second": 7188.038 |
| }, |
| { |
| "epoch": 1.5828902200982702, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.442721576222139e-05, |
| "loss": 4.0854, |
| "num_input_tokens_seen": 385302528, |
| "step": 5880, |
| "train_runtime": 53603.182, |
| "train_tokens_per_second": 7188.053 |
| }, |
| { |
| "epoch": 1.5855825536784007, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.4376727346083384e-05, |
| "loss": 4.0028, |
| "num_input_tokens_seen": 385957888, |
| "step": 5890, |
| "train_runtime": 53694.6503, |
| "train_tokens_per_second": 7188.014 |
| }, |
| { |
| "epoch": 1.5882748872585313, |
| "grad_norm": 1.0, |
| "learning_rate": 3.43261943867031e-05, |
| "loss": 4.0289, |
| "num_input_tokens_seen": 386613248, |
| "step": 5900, |
| "train_runtime": 53785.2225, |
| "train_tokens_per_second": 7188.094 |
| }, |
| { |
| "epoch": 1.590967220838662, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.427561712413251e-05, |
| "loss": 3.9998, |
| "num_input_tokens_seen": 387268608, |
| "step": 5910, |
| "train_runtime": 53876.8747, |
| "train_tokens_per_second": 7188.03 |
| }, |
| { |
| "epoch": 1.5936595544187924, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.422499579863404e-05, |
| "loss": 4.0511, |
| "num_input_tokens_seen": 387923968, |
| "step": 5920, |
| "train_runtime": 53967.5433, |
| "train_tokens_per_second": 7188.098 |
| }, |
| { |
| "epoch": 1.5963518879989231, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.417433065067942e-05, |
| "loss": 4.1121, |
| "num_input_tokens_seen": 388579328, |
| "step": 5930, |
| "train_runtime": 54058.0362, |
| "train_tokens_per_second": 7188.188 |
| }, |
| { |
| "epoch": 1.5990442215790537, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.4123621920948577e-05, |
| "loss": 4.0063, |
| "num_input_tokens_seen": 389234688, |
| "step": 5940, |
| "train_runtime": 54149.1209, |
| "train_tokens_per_second": 7188.2 |
| }, |
| { |
| "epoch": 1.6017365551591842, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.407286985032846e-05, |
| "loss": 3.9714, |
| "num_input_tokens_seen": 389890048, |
| "step": 5950, |
| "train_runtime": 54240.6637, |
| "train_tokens_per_second": 7188.15 |
| }, |
| { |
| "epoch": 1.6044288887393148, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.40220746799119e-05, |
| "loss": 3.9757, |
| "num_input_tokens_seen": 390545408, |
| "step": 5960, |
| "train_runtime": 54331.414, |
| "train_tokens_per_second": 7188.206 |
| }, |
| { |
| "epoch": 1.6071212223194453, |
| "grad_norm": 1.0, |
| "learning_rate": 3.397123665099647e-05, |
| "loss": 3.9941, |
| "num_input_tokens_seen": 391200768, |
| "step": 5970, |
| "train_runtime": 54422.2535, |
| "train_tokens_per_second": 7188.25 |
| }, |
| { |
| "epoch": 1.609813555899576, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.3920356005083344e-05, |
| "loss": 3.991, |
| "num_input_tokens_seen": 391856128, |
| "step": 5980, |
| "train_runtime": 54512.9514, |
| "train_tokens_per_second": 7188.312 |
| }, |
| { |
| "epoch": 1.6125058894797064, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.386943298387615e-05, |
| "loss": 4.0555, |
| "num_input_tokens_seen": 392511488, |
| "step": 5990, |
| "train_runtime": 54604.0111, |
| "train_tokens_per_second": 7188.327 |
| }, |
| { |
| "epoch": 1.6151982230598372, |
| "grad_norm": 1.0, |
| "learning_rate": 3.38184678292798e-05, |
| "loss": 4.0397, |
| "num_input_tokens_seen": 393166848, |
| "step": 6000, |
| "train_runtime": 54695.0368, |
| "train_tokens_per_second": 7188.346 |
| }, |
| { |
| "epoch": 1.6178905566399677, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.3767460783399355e-05, |
| "loss": 4.0446, |
| "num_input_tokens_seen": 393822208, |
| "step": 6010, |
| "train_runtime": 54808.6027, |
| "train_tokens_per_second": 7185.409 |
| }, |
| { |
| "epoch": 1.6205828902200983, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.3716412088538905e-05, |
| "loss": 3.9582, |
| "num_input_tokens_seen": 394477568, |
| "step": 6020, |
| "train_runtime": 54898.5567, |
| "train_tokens_per_second": 7185.573 |
| }, |
| { |
| "epoch": 1.6232752238002288, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.366532198720036e-05, |
| "loss": 3.9191, |
| "num_input_tokens_seen": 395132928, |
| "step": 6030, |
| "train_runtime": 54989.6002, |
| "train_tokens_per_second": 7185.594 |
| }, |
| { |
| "epoch": 1.6259675573803594, |
| "grad_norm": 1.0, |
| "learning_rate": 3.3614190722082327e-05, |
| "loss": 3.9044, |
| "num_input_tokens_seen": 395788288, |
| "step": 6040, |
| "train_runtime": 55080.1763, |
| "train_tokens_per_second": 7185.676 |
| }, |
| { |
| "epoch": 1.6286598909604901, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.356301853607898e-05, |
| "loss": 4.0952, |
| "num_input_tokens_seen": 396443648, |
| "step": 6050, |
| "train_runtime": 55171.3889, |
| "train_tokens_per_second": 7185.675 |
| }, |
| { |
| "epoch": 1.6313522245406205, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.3511805672278876e-05, |
| "loss": 4.0242, |
| "num_input_tokens_seen": 397099008, |
| "step": 6060, |
| "train_runtime": 55262.5814, |
| "train_tokens_per_second": 7185.676 |
| }, |
| { |
| "epoch": 1.6340445581207512, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.346055237396379e-05, |
| "loss": 3.9988, |
| "num_input_tokens_seen": 397754368, |
| "step": 6070, |
| "train_runtime": 55353.1031, |
| "train_tokens_per_second": 7185.765 |
| }, |
| { |
| "epoch": 1.6367368917008818, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.340925888460761e-05, |
| "loss": 3.9703, |
| "num_input_tokens_seen": 398409728, |
| "step": 6080, |
| "train_runtime": 55444.0405, |
| "train_tokens_per_second": 7185.799 |
| }, |
| { |
| "epoch": 1.6394292252810123, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.3357925447875105e-05, |
| "loss": 4.0456, |
| "num_input_tokens_seen": 399065088, |
| "step": 6090, |
| "train_runtime": 55535.1639, |
| "train_tokens_per_second": 7185.809 |
| }, |
| { |
| "epoch": 1.6421215588611429, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.330655230762085e-05, |
| "loss": 4.0108, |
| "num_input_tokens_seen": 399720448, |
| "step": 6100, |
| "train_runtime": 55625.2117, |
| "train_tokens_per_second": 7185.958 |
| }, |
| { |
| "epoch": 1.6448138924412734, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.3255139707888005e-05, |
| "loss": 3.9459, |
| "num_input_tokens_seen": 400375808, |
| "step": 6110, |
| "train_runtime": 55716.3074, |
| "train_tokens_per_second": 7185.972 |
| }, |
| { |
| "epoch": 1.6475062260214042, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.320368789290718e-05, |
| "loss": 3.9569, |
| "num_input_tokens_seen": 401031168, |
| "step": 6120, |
| "train_runtime": 55807.5546, |
| "train_tokens_per_second": 7185.966 |
| }, |
| { |
| "epoch": 1.6501985596015345, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.31521971070953e-05, |
| "loss": 3.8892, |
| "num_input_tokens_seen": 401686528, |
| "step": 6130, |
| "train_runtime": 55898.3697, |
| "train_tokens_per_second": 7186.015 |
| }, |
| { |
| "epoch": 1.6528908931816653, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.310066759505437e-05, |
| "loss": 4.0045, |
| "num_input_tokens_seen": 402341888, |
| "step": 6140, |
| "train_runtime": 55989.3295, |
| "train_tokens_per_second": 7186.046 |
| }, |
| { |
| "epoch": 1.6555832267617958, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.3049099601570394e-05, |
| "loss": 4.0136, |
| "num_input_tokens_seen": 402997248, |
| "step": 6150, |
| "train_runtime": 56080.6133, |
| "train_tokens_per_second": 7186.035 |
| }, |
| { |
| "epoch": 1.6582755603419264, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.2997493371612176e-05, |
| "loss": 3.9676, |
| "num_input_tokens_seen": 403652608, |
| "step": 6160, |
| "train_runtime": 56171.1951, |
| "train_tokens_per_second": 7186.114 |
| }, |
| { |
| "epoch": 1.660967893922057, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.294584915033015e-05, |
| "loss": 4.0019, |
| "num_input_tokens_seen": 404307968, |
| "step": 6170, |
| "train_runtime": 56262.7175, |
| "train_tokens_per_second": 7186.073 |
| }, |
| { |
| "epoch": 1.6636602275021874, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.289416718305522e-05, |
| "loss": 3.9407, |
| "num_input_tokens_seen": 404963328, |
| "step": 6180, |
| "train_runtime": 56353.3711, |
| "train_tokens_per_second": 7186.142 |
| }, |
| { |
| "epoch": 1.6663525610823182, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.284244771529762e-05, |
| "loss": 4.0951, |
| "num_input_tokens_seen": 405618688, |
| "step": 6190, |
| "train_runtime": 56444.4303, |
| "train_tokens_per_second": 7186.16 |
| }, |
| { |
| "epoch": 1.6690448946624485, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.279069099274569e-05, |
| "loss": 4.0132, |
| "num_input_tokens_seen": 406274048, |
| "step": 6200, |
| "train_runtime": 56535.5036, |
| "train_tokens_per_second": 7186.175 |
| }, |
| { |
| "epoch": 1.6717372282425793, |
| "grad_norm": 1.0, |
| "learning_rate": 3.2738897261264796e-05, |
| "loss": 4.0009, |
| "num_input_tokens_seen": 406929408, |
| "step": 6210, |
| "train_runtime": 56625.8426, |
| "train_tokens_per_second": 7186.284 |
| }, |
| { |
| "epoch": 1.6744295618227099, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.2687066766896056e-05, |
| "loss": 4.0089, |
| "num_input_tokens_seen": 407584768, |
| "step": 6220, |
| "train_runtime": 56717.0759, |
| "train_tokens_per_second": 7186.28 |
| }, |
| { |
| "epoch": 1.6771218954028404, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.263519975585527e-05, |
| "loss": 4.0005, |
| "num_input_tokens_seen": 408240128, |
| "step": 6230, |
| "train_runtime": 56807.9677, |
| "train_tokens_per_second": 7186.318 |
| }, |
| { |
| "epoch": 1.679814228982971, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.258329647453169e-05, |
| "loss": 4.0387, |
| "num_input_tokens_seen": 408895488, |
| "step": 6240, |
| "train_runtime": 56899.2401, |
| "train_tokens_per_second": 7186.308 |
| }, |
| { |
| "epoch": 1.6825065625631015, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.253135716948685e-05, |
| "loss": 3.9846, |
| "num_input_tokens_seen": 409550848, |
| "step": 6250, |
| "train_runtime": 56989.5839, |
| "train_tokens_per_second": 7186.416 |
| }, |
| { |
| "epoch": 1.6851988961432323, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.2479382087453445e-05, |
| "loss": 3.9494, |
| "num_input_tokens_seen": 410206208, |
| "step": 6260, |
| "train_runtime": 57081.0846, |
| "train_tokens_per_second": 7186.377 |
| }, |
| { |
| "epoch": 1.6878912297233626, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.24273714753341e-05, |
| "loss": 4.0214, |
| "num_input_tokens_seen": 410861568, |
| "step": 6270, |
| "train_runtime": 57172.0125, |
| "train_tokens_per_second": 7186.411 |
| }, |
| { |
| "epoch": 1.6905835633034934, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.237532558020023e-05, |
| "loss": 3.9752, |
| "num_input_tokens_seen": 411516928, |
| "step": 6280, |
| "train_runtime": 57263.3216, |
| "train_tokens_per_second": 7186.396 |
| }, |
| { |
| "epoch": 1.693275896883624, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.232324464929087e-05, |
| "loss": 3.9727, |
| "num_input_tokens_seen": 412172288, |
| "step": 6290, |
| "train_runtime": 57353.7021, |
| "train_tokens_per_second": 7186.498 |
| }, |
| { |
| "epoch": 1.6959682304637544, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.2271128930011466e-05, |
| "loss": 4.0558, |
| "num_input_tokens_seen": 412827648, |
| "step": 6300, |
| "train_runtime": 57444.8662, |
| "train_tokens_per_second": 7186.502 |
| }, |
| { |
| "epoch": 1.698660564043885, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.221897866993274e-05, |
| "loss": 4.0699, |
| "num_input_tokens_seen": 413483008, |
| "step": 6310, |
| "train_runtime": 57535.7838, |
| "train_tokens_per_second": 7186.536 |
| }, |
| { |
| "epoch": 1.7013528976240155, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.216679411678949e-05, |
| "loss": 3.97, |
| "num_input_tokens_seen": 414138368, |
| "step": 6320, |
| "train_runtime": 57626.993, |
| "train_tokens_per_second": 7186.534 |
| }, |
| { |
| "epoch": 1.7040452312041463, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.2114575518479415e-05, |
| "loss": 4.1078, |
| "num_input_tokens_seen": 414793728, |
| "step": 6330, |
| "train_runtime": 57717.747, |
| "train_tokens_per_second": 7186.589 |
| }, |
| { |
| "epoch": 1.7067375647842766, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.2062323123061964e-05, |
| "loss": 3.9, |
| "num_input_tokens_seen": 415449088, |
| "step": 6340, |
| "train_runtime": 57808.6745, |
| "train_tokens_per_second": 7186.622 |
| }, |
| { |
| "epoch": 1.7094298983644074, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.2010037178757116e-05, |
| "loss": 4.0176, |
| "num_input_tokens_seen": 416104448, |
| "step": 6350, |
| "train_runtime": 57899.7806, |
| "train_tokens_per_second": 7186.633 |
| }, |
| { |
| "epoch": 1.712122231944538, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.195771793394421e-05, |
| "loss": 4.0038, |
| "num_input_tokens_seen": 416759808, |
| "step": 6360, |
| "train_runtime": 57990.7902, |
| "train_tokens_per_second": 7186.655 |
| }, |
| { |
| "epoch": 1.7148145655246685, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.1905365637160814e-05, |
| "loss": 3.9994, |
| "num_input_tokens_seen": 417415168, |
| "step": 6370, |
| "train_runtime": 58081.7605, |
| "train_tokens_per_second": 7186.682 |
| }, |
| { |
| "epoch": 1.717506899104799, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.1852980537101464e-05, |
| "loss": 4.0242, |
| "num_input_tokens_seen": 418070528, |
| "step": 6380, |
| "train_runtime": 58173.008, |
| "train_tokens_per_second": 7186.675 |
| }, |
| { |
| "epoch": 1.7201992326849296, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.180056288261655e-05, |
| "loss": 4.0096, |
| "num_input_tokens_seen": 418725888, |
| "step": 6390, |
| "train_runtime": 58263.8091, |
| "train_tokens_per_second": 7186.724 |
| }, |
| { |
| "epoch": 1.7228915662650603, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.174811292271111e-05, |
| "loss": 4.0482, |
| "num_input_tokens_seen": 419381248, |
| "step": 6400, |
| "train_runtime": 58354.9614, |
| "train_tokens_per_second": 7186.728 |
| }, |
| { |
| "epoch": 1.7255838998451907, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.1695630906543636e-05, |
| "loss": 4.0342, |
| "num_input_tokens_seen": 420036608, |
| "step": 6410, |
| "train_runtime": 58445.4659, |
| "train_tokens_per_second": 7186.813 |
| }, |
| { |
| "epoch": 1.7282762334253214, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.1643117083424893e-05, |
| "loss": 3.9961, |
| "num_input_tokens_seen": 420691968, |
| "step": 6420, |
| "train_runtime": 58536.7942, |
| "train_tokens_per_second": 7186.795 |
| }, |
| { |
| "epoch": 1.730968567005452, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.1590571702816775e-05, |
| "loss": 3.9685, |
| "num_input_tokens_seen": 421347328, |
| "step": 6430, |
| "train_runtime": 58627.8959, |
| "train_tokens_per_second": 7186.806 |
| }, |
| { |
| "epoch": 1.7336609005855825, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.153799501433103e-05, |
| "loss": 3.9833, |
| "num_input_tokens_seen": 422002688, |
| "step": 6440, |
| "train_runtime": 58718.8139, |
| "train_tokens_per_second": 7186.839 |
| }, |
| { |
| "epoch": 1.736353234165713, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.1485387267728195e-05, |
| "loss": 4.1101, |
| "num_input_tokens_seen": 422658048, |
| "step": 6450, |
| "train_runtime": 58809.7237, |
| "train_tokens_per_second": 7186.874 |
| }, |
| { |
| "epoch": 1.7390455677458436, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.1432748712916304e-05, |
| "loss": 4.123, |
| "num_input_tokens_seen": 423313408, |
| "step": 6460, |
| "train_runtime": 58900.9514, |
| "train_tokens_per_second": 7186.869 |
| }, |
| { |
| "epoch": 1.7417379013259744, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.138007959994976e-05, |
| "loss": 3.9765, |
| "num_input_tokens_seen": 423968768, |
| "step": 6470, |
| "train_runtime": 58992.128, |
| "train_tokens_per_second": 7186.87 |
| }, |
| { |
| "epoch": 1.7444302349061047, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.132738017902811e-05, |
| "loss": 4.003, |
| "num_input_tokens_seen": 424624128, |
| "step": 6480, |
| "train_runtime": 59083.3309, |
| "train_tokens_per_second": 7186.868 |
| }, |
| { |
| "epoch": 1.7471225684862355, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.1274650700494896e-05, |
| "loss": 3.9553, |
| "num_input_tokens_seen": 425279488, |
| "step": 6490, |
| "train_runtime": 59174.4889, |
| "train_tokens_per_second": 7186.872 |
| }, |
| { |
| "epoch": 1.749814902066366, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.122189141483644e-05, |
| "loss": 4.123, |
| "num_input_tokens_seen": 425934848, |
| "step": 6500, |
| "train_runtime": 59264.7894, |
| "train_tokens_per_second": 7186.98 |
| }, |
| { |
| "epoch": 1.7525072356464966, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.116910257268066e-05, |
| "loss": 4.1079, |
| "num_input_tokens_seen": 426590208, |
| "step": 6510, |
| "train_runtime": 59373.0354, |
| "train_tokens_per_second": 7184.915 |
| }, |
| { |
| "epoch": 1.7551995692266273, |
| "grad_norm": 1.0, |
| "learning_rate": 3.1116284424795875e-05, |
| "loss": 4.033, |
| "num_input_tokens_seen": 427245568, |
| "step": 6520, |
| "train_runtime": 59463.6665, |
| "train_tokens_per_second": 7184.985 |
| }, |
| { |
| "epoch": 1.7578919028067577, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.106343722208962e-05, |
| "loss": 4.0505, |
| "num_input_tokens_seen": 427900928, |
| "step": 6530, |
| "train_runtime": 59554.4574, |
| "train_tokens_per_second": 7185.036 |
| }, |
| { |
| "epoch": 1.7605842363868884, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.1010561215607464e-05, |
| "loss": 4.009, |
| "num_input_tokens_seen": 428556288, |
| "step": 6540, |
| "train_runtime": 59645.9384, |
| "train_tokens_per_second": 7185.004 |
| }, |
| { |
| "epoch": 1.7632765699670188, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.0957656656531785e-05, |
| "loss": 3.8864, |
| "num_input_tokens_seen": 429211648, |
| "step": 6550, |
| "train_runtime": 59736.628, |
| "train_tokens_per_second": 7185.067 |
| }, |
| { |
| "epoch": 1.7659689035471495, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.090472379618061e-05, |
| "loss": 4.0765, |
| "num_input_tokens_seen": 429867008, |
| "step": 6560, |
| "train_runtime": 59827.3922, |
| "train_tokens_per_second": 7185.12 |
| }, |
| { |
| "epoch": 1.76866123712728, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.0851762886006415e-05, |
| "loss": 3.9765, |
| "num_input_tokens_seen": 430522368, |
| "step": 6570, |
| "train_runtime": 59918.2991, |
| "train_tokens_per_second": 7185.157 |
| }, |
| { |
| "epoch": 1.7713535707074106, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.0798774177594894e-05, |
| "loss": 4.0562, |
| "num_input_tokens_seen": 431177728, |
| "step": 6580, |
| "train_runtime": 60009.4082, |
| "train_tokens_per_second": 7185.169 |
| }, |
| { |
| "epoch": 1.7740459042875414, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.074575792266383e-05, |
| "loss": 4.0406, |
| "num_input_tokens_seen": 431833088, |
| "step": 6590, |
| "train_runtime": 60101.1242, |
| "train_tokens_per_second": 7185.108 |
| }, |
| { |
| "epoch": 1.7767382378676717, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.069271437306185e-05, |
| "loss": 3.9112, |
| "num_input_tokens_seen": 432488448, |
| "step": 6600, |
| "train_runtime": 60192.3902, |
| "train_tokens_per_second": 7185.102 |
| }, |
| { |
| "epoch": 1.7794305714478025, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.063964378076723e-05, |
| "loss": 3.9743, |
| "num_input_tokens_seen": 433143808, |
| "step": 6610, |
| "train_runtime": 60283.5332, |
| "train_tokens_per_second": 7185.11 |
| }, |
| { |
| "epoch": 1.7821229050279328, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.058654639788673e-05, |
| "loss": 4.0205, |
| "num_input_tokens_seen": 433799168, |
| "step": 6620, |
| "train_runtime": 60374.3752, |
| "train_tokens_per_second": 7185.154 |
| }, |
| { |
| "epoch": 1.7848152386080636, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.0533422476654355e-05, |
| "loss": 3.9233, |
| "num_input_tokens_seen": 434454528, |
| "step": 6630, |
| "train_runtime": 60464.7616, |
| "train_tokens_per_second": 7185.252 |
| }, |
| { |
| "epoch": 1.7875075721881941, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.0480272269430193e-05, |
| "loss": 3.9677, |
| "num_input_tokens_seen": 435109888, |
| "step": 6640, |
| "train_runtime": 60555.7399, |
| "train_tokens_per_second": 7185.279 |
| }, |
| { |
| "epoch": 1.7901999057683247, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.0427096028699192e-05, |
| "loss": 4.0849, |
| "num_input_tokens_seen": 435765248, |
| "step": 6650, |
| "train_runtime": 60646.5633, |
| "train_tokens_per_second": 7185.325 |
| }, |
| { |
| "epoch": 1.7928922393484554, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.0373894007069985e-05, |
| "loss": 3.9531, |
| "num_input_tokens_seen": 436420608, |
| "step": 6660, |
| "train_runtime": 60738.1385, |
| "train_tokens_per_second": 7185.281 |
| }, |
| { |
| "epoch": 1.7955845729285858, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.0320666457273657e-05, |
| "loss": 3.9727, |
| "num_input_tokens_seen": 437075968, |
| "step": 6670, |
| "train_runtime": 60829.4049, |
| "train_tokens_per_second": 7185.274 |
| }, |
| { |
| "epoch": 1.7982769065087165, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.0267413632162566e-05, |
| "loss": 4.005, |
| "num_input_tokens_seen": 437731328, |
| "step": 6680, |
| "train_runtime": 60920.3104, |
| "train_tokens_per_second": 7185.31 |
| }, |
| { |
| "epoch": 1.8009692400888468, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.0214135784709146e-05, |
| "loss": 3.9385, |
| "num_input_tokens_seen": 438386688, |
| "step": 6690, |
| "train_runtime": 61011.0006, |
| "train_tokens_per_second": 7185.371 |
| }, |
| { |
| "epoch": 1.8036615736689776, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.0160833168004694e-05, |
| "loss": 3.9885, |
| "num_input_tokens_seen": 439042048, |
| "step": 6700, |
| "train_runtime": 61101.2415, |
| "train_tokens_per_second": 7185.485 |
| }, |
| { |
| "epoch": 1.8063539072491082, |
| "grad_norm": 0.921875, |
| "learning_rate": 3.010750603525816e-05, |
| "loss": 4.0583, |
| "num_input_tokens_seen": 439697408, |
| "step": 6710, |
| "train_runtime": 61192.4341, |
| "train_tokens_per_second": 7185.486 |
| }, |
| { |
| "epoch": 1.8090462408292387, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.005415463979496e-05, |
| "loss": 3.9452, |
| "num_input_tokens_seen": 440352768, |
| "step": 6720, |
| "train_runtime": 61283.4288, |
| "train_tokens_per_second": 7185.511 |
| }, |
| { |
| "epoch": 1.8117385744093695, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.000077923505579e-05, |
| "loss": 4.0071, |
| "num_input_tokens_seen": 441008128, |
| "step": 6730, |
| "train_runtime": 61374.2641, |
| "train_tokens_per_second": 7185.555 |
| }, |
| { |
| "epoch": 1.8144309079894998, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.9947380074595372e-05, |
| "loss": 3.9955, |
| "num_input_tokens_seen": 441663488, |
| "step": 6740, |
| "train_runtime": 61465.5537, |
| "train_tokens_per_second": 7185.545 |
| }, |
| { |
| "epoch": 1.8171232415696306, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.9893957412081286e-05, |
| "loss": 3.977, |
| "num_input_tokens_seen": 442318848, |
| "step": 6750, |
| "train_runtime": 61556.3225, |
| "train_tokens_per_second": 7185.596 |
| }, |
| { |
| "epoch": 1.8198155751497609, |
| "grad_norm": 1.0, |
| "learning_rate": 2.984051150129276e-05, |
| "loss": 3.976, |
| "num_input_tokens_seen": 442974208, |
| "step": 6760, |
| "train_runtime": 61647.2052, |
| "train_tokens_per_second": 7185.633 |
| }, |
| { |
| "epoch": 1.8225079087298917, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.9787042596119453e-05, |
| "loss": 4.016, |
| "num_input_tokens_seen": 443629568, |
| "step": 6770, |
| "train_runtime": 61738.2432, |
| "train_tokens_per_second": 7185.653 |
| }, |
| { |
| "epoch": 1.8252002423100222, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.9733550950560268e-05, |
| "loss": 4.0733, |
| "num_input_tokens_seen": 444284928, |
| "step": 6780, |
| "train_runtime": 61829.2087, |
| "train_tokens_per_second": 7185.68 |
| }, |
| { |
| "epoch": 1.8278925758901527, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.9680036818722113e-05, |
| "loss": 3.9411, |
| "num_input_tokens_seen": 444940288, |
| "step": 6790, |
| "train_runtime": 61919.8373, |
| "train_tokens_per_second": 7185.747 |
| }, |
| { |
| "epoch": 1.8305849094702835, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.962650045481875e-05, |
| "loss": 3.9126, |
| "num_input_tokens_seen": 445595648, |
| "step": 6800, |
| "train_runtime": 62011.0093, |
| "train_tokens_per_second": 7185.751 |
| }, |
| { |
| "epoch": 1.8332772430504138, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.9572942113169515e-05, |
| "loss": 3.9839, |
| "num_input_tokens_seen": 446251008, |
| "step": 6810, |
| "train_runtime": 62101.7996, |
| "train_tokens_per_second": 7185.798 |
| }, |
| { |
| "epoch": 1.8359695766305446, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.951936204819818e-05, |
| "loss": 3.9176, |
| "num_input_tokens_seen": 446906368, |
| "step": 6820, |
| "train_runtime": 62192.8181, |
| "train_tokens_per_second": 7185.82 |
| }, |
| { |
| "epoch": 1.8386619102106752, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.946576051443168e-05, |
| "loss": 3.9816, |
| "num_input_tokens_seen": 447561728, |
| "step": 6830, |
| "train_runtime": 62283.5597, |
| "train_tokens_per_second": 7185.873 |
| }, |
| { |
| "epoch": 1.8413542437908057, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.9412137766498952e-05, |
| "loss": 4.0402, |
| "num_input_tokens_seen": 448217088, |
| "step": 6840, |
| "train_runtime": 62374.6178, |
| "train_tokens_per_second": 7185.889 |
| }, |
| { |
| "epoch": 1.8440465773709362, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.9358494059129714e-05, |
| "loss": 4.0046, |
| "num_input_tokens_seen": 448872448, |
| "step": 6850, |
| "train_runtime": 62465.6805, |
| "train_tokens_per_second": 7185.905 |
| }, |
| { |
| "epoch": 1.8467389109510668, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.9304829647153243e-05, |
| "loss": 4.0306, |
| "num_input_tokens_seen": 449527808, |
| "step": 6860, |
| "train_runtime": 62556.3264, |
| "train_tokens_per_second": 7185.969 |
| }, |
| { |
| "epoch": 1.8494312445311976, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.925114478549717e-05, |
| "loss": 3.8527, |
| "num_input_tokens_seen": 450183168, |
| "step": 6870, |
| "train_runtime": 62647.2501, |
| "train_tokens_per_second": 7186.0 |
| }, |
| { |
| "epoch": 1.8521235781113279, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.9197439729186272e-05, |
| "loss": 3.9802, |
| "num_input_tokens_seen": 450838528, |
| "step": 6880, |
| "train_runtime": 62738.6239, |
| "train_tokens_per_second": 7185.981 |
| }, |
| { |
| "epoch": 1.8548159116914587, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.914371473334126e-05, |
| "loss": 3.9518, |
| "num_input_tokens_seen": 451493888, |
| "step": 6890, |
| "train_runtime": 62829.5332, |
| "train_tokens_per_second": 7186.014 |
| }, |
| { |
| "epoch": 1.8575082452715892, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.908997005317756e-05, |
| "loss": 4.0215, |
| "num_input_tokens_seen": 452149248, |
| "step": 6900, |
| "train_runtime": 62920.7037, |
| "train_tokens_per_second": 7186.017 |
| }, |
| { |
| "epoch": 1.8602005788517197, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.9036205944004114e-05, |
| "loss": 3.8857, |
| "num_input_tokens_seen": 452804608, |
| "step": 6910, |
| "train_runtime": 63012.0036, |
| "train_tokens_per_second": 7186.006 |
| }, |
| { |
| "epoch": 1.8628929124318503, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.8982422661222154e-05, |
| "loss": 4.0057, |
| "num_input_tokens_seen": 453459968, |
| "step": 6920, |
| "train_runtime": 63102.6813, |
| "train_tokens_per_second": 7186.065 |
| }, |
| { |
| "epoch": 1.8655852460119808, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.8928620460324007e-05, |
| "loss": 3.97, |
| "num_input_tokens_seen": 454115328, |
| "step": 6930, |
| "train_runtime": 63193.6179, |
| "train_tokens_per_second": 7186.095 |
| }, |
| { |
| "epoch": 1.8682775795921116, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.887479959689185e-05, |
| "loss": 4.0438, |
| "num_input_tokens_seen": 454770688, |
| "step": 6940, |
| "train_runtime": 63284.6204, |
| "train_tokens_per_second": 7186.117 |
| }, |
| { |
| "epoch": 1.870969913172242, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.882096032659652e-05, |
| "loss": 3.9974, |
| "num_input_tokens_seen": 455426048, |
| "step": 6950, |
| "train_runtime": 63375.5135, |
| "train_tokens_per_second": 7186.152 |
| }, |
| { |
| "epoch": 1.8736622467523727, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.8767102905196308e-05, |
| "loss": 4.0249, |
| "num_input_tokens_seen": 456081408, |
| "step": 6960, |
| "train_runtime": 63466.6018, |
| "train_tokens_per_second": 7186.164 |
| }, |
| { |
| "epoch": 1.8763545803325032, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.8713227588535705e-05, |
| "loss": 3.9506, |
| "num_input_tokens_seen": 456736768, |
| "step": 6970, |
| "train_runtime": 63557.4534, |
| "train_tokens_per_second": 7186.203 |
| }, |
| { |
| "epoch": 1.8790469139126338, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.8659334632544244e-05, |
| "loss": 4.0156, |
| "num_input_tokens_seen": 457392128, |
| "step": 6980, |
| "train_runtime": 63648.8191, |
| "train_tokens_per_second": 7186.184 |
| }, |
| { |
| "epoch": 1.8817392474927643, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.860542429323521e-05, |
| "loss": 4.0436, |
| "num_input_tokens_seen": 458047488, |
| "step": 6990, |
| "train_runtime": 63739.4385, |
| "train_tokens_per_second": 7186.249 |
| }, |
| { |
| "epoch": 1.8844315810728949, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.8551496826704517e-05, |
| "loss": 3.9041, |
| "num_input_tokens_seen": 458702848, |
| "step": 7000, |
| "train_runtime": 63830.3107, |
| "train_tokens_per_second": 7186.286 |
| }, |
| { |
| "epoch": 1.8871239146530256, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.84975524891294e-05, |
| "loss": 3.9567, |
| "num_input_tokens_seen": 459358208, |
| "step": 7010, |
| "train_runtime": 63939.4485, |
| "train_tokens_per_second": 7184.269 |
| }, |
| { |
| "epoch": 1.889816248233156, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.8443591536767244e-05, |
| "loss": 3.9643, |
| "num_input_tokens_seen": 460013568, |
| "step": 7020, |
| "train_runtime": 64030.6445, |
| "train_tokens_per_second": 7184.272 |
| }, |
| { |
| "epoch": 1.8925085818132867, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.8389614225954382e-05, |
| "loss": 3.9844, |
| "num_input_tokens_seen": 460668928, |
| "step": 7030, |
| "train_runtime": 64121.8034, |
| "train_tokens_per_second": 7184.279 |
| }, |
| { |
| "epoch": 1.8952009153934173, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.8335620813104834e-05, |
| "loss": 4.0291, |
| "num_input_tokens_seen": 461324288, |
| "step": 7040, |
| "train_runtime": 64212.97, |
| "train_tokens_per_second": 7184.285 |
| }, |
| { |
| "epoch": 1.8978932489735478, |
| "grad_norm": 1.0, |
| "learning_rate": 2.828161155470912e-05, |
| "loss": 3.9941, |
| "num_input_tokens_seen": 461979648, |
| "step": 7050, |
| "train_runtime": 64304.1397, |
| "train_tokens_per_second": 7184.291 |
| }, |
| { |
| "epoch": 1.9005855825536784, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.8227586707333035e-05, |
| "loss": 3.9564, |
| "num_input_tokens_seen": 462635008, |
| "step": 7060, |
| "train_runtime": 64395.1051, |
| "train_tokens_per_second": 7184.319 |
| }, |
| { |
| "epoch": 1.903277916133809, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.817354652761643e-05, |
| "loss": 3.964, |
| "num_input_tokens_seen": 463290368, |
| "step": 7070, |
| "train_runtime": 64485.9399, |
| "train_tokens_per_second": 7184.362 |
| }, |
| { |
| "epoch": 1.9059702497139397, |
| "grad_norm": 1.0, |
| "learning_rate": 2.811949127227198e-05, |
| "loss": 3.9875, |
| "num_input_tokens_seen": 463945728, |
| "step": 7080, |
| "train_runtime": 64577.2564, |
| "train_tokens_per_second": 7184.352 |
| }, |
| { |
| "epoch": 1.90866258329407, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.806542119808398e-05, |
| "loss": 4.0569, |
| "num_input_tokens_seen": 464601088, |
| "step": 7090, |
| "train_runtime": 64668.0339, |
| "train_tokens_per_second": 7184.401 |
| }, |
| { |
| "epoch": 1.9113549168742008, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.8011336561907125e-05, |
| "loss": 3.9746, |
| "num_input_tokens_seen": 465256448, |
| "step": 7100, |
| "train_runtime": 64759.001, |
| "train_tokens_per_second": 7184.429 |
| }, |
| { |
| "epoch": 1.9140472504543313, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.7957237620665285e-05, |
| "loss": 4.0546, |
| "num_input_tokens_seen": 465911808, |
| "step": 7110, |
| "train_runtime": 64849.8303, |
| "train_tokens_per_second": 7184.472 |
| }, |
| { |
| "epoch": 1.9167395840344619, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.790312463135027e-05, |
| "loss": 4.0233, |
| "num_input_tokens_seen": 466567168, |
| "step": 7120, |
| "train_runtime": 64941.3773, |
| "train_tokens_per_second": 7184.436 |
| }, |
| { |
| "epoch": 1.9194319176145924, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.7848997851020652e-05, |
| "loss": 3.9364, |
| "num_input_tokens_seen": 467222528, |
| "step": 7130, |
| "train_runtime": 65032.2044, |
| "train_tokens_per_second": 7184.479 |
| }, |
| { |
| "epoch": 1.922124251194723, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.7794857536800496e-05, |
| "loss": 3.9537, |
| "num_input_tokens_seen": 467877888, |
| "step": 7140, |
| "train_runtime": 65123.1618, |
| "train_tokens_per_second": 7184.508 |
| }, |
| { |
| "epoch": 1.9248165847748537, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.774070394587816e-05, |
| "loss": 3.9303, |
| "num_input_tokens_seen": 468533248, |
| "step": 7150, |
| "train_runtime": 65214.4402, |
| "train_tokens_per_second": 7184.502 |
| }, |
| { |
| "epoch": 1.927508918354984, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.768653733550507e-05, |
| "loss": 3.9496, |
| "num_input_tokens_seen": 469188608, |
| "step": 7160, |
| "train_runtime": 65305.3984, |
| "train_tokens_per_second": 7184.53 |
| }, |
| { |
| "epoch": 1.9302012519351148, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.7632357962994508e-05, |
| "loss": 3.9278, |
| "num_input_tokens_seen": 469843968, |
| "step": 7170, |
| "train_runtime": 65396.6545, |
| "train_tokens_per_second": 7184.526 |
| }, |
| { |
| "epoch": 1.9328935855152454, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.757816608572038e-05, |
| "loss": 3.9639, |
| "num_input_tokens_seen": 470499328, |
| "step": 7180, |
| "train_runtime": 65487.742, |
| "train_tokens_per_second": 7184.54 |
| }, |
| { |
| "epoch": 1.935585919095376, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.7523961961115986e-05, |
| "loss": 3.9429, |
| "num_input_tokens_seen": 471154688, |
| "step": 7190, |
| "train_runtime": 65578.8448, |
| "train_tokens_per_second": 7184.553 |
| }, |
| { |
| "epoch": 1.9382782526755065, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.7469745846672818e-05, |
| "loss": 3.9361, |
| "num_input_tokens_seen": 471810048, |
| "step": 7200, |
| "train_runtime": 65670.0383, |
| "train_tokens_per_second": 7184.556 |
| }, |
| { |
| "epoch": 1.940970586255637, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.7415517999939316e-05, |
| "loss": 3.9282, |
| "num_input_tokens_seen": 472465408, |
| "step": 7210, |
| "train_runtime": 65761.1225, |
| "train_tokens_per_second": 7184.57 |
| }, |
| { |
| "epoch": 1.9436629198357678, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.7361278678519654e-05, |
| "loss": 3.9673, |
| "num_input_tokens_seen": 473120768, |
| "step": 7220, |
| "train_runtime": 65852.8059, |
| "train_tokens_per_second": 7184.52 |
| }, |
| { |
| "epoch": 1.946355253415898, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.7307028140072515e-05, |
| "loss": 3.965, |
| "num_input_tokens_seen": 473776128, |
| "step": 7230, |
| "train_runtime": 65944.3346, |
| "train_tokens_per_second": 7184.486 |
| }, |
| { |
| "epoch": 1.9490475869960289, |
| "grad_norm": 1.0, |
| "learning_rate": 2.7252766642309873e-05, |
| "loss": 4.0568, |
| "num_input_tokens_seen": 474431488, |
| "step": 7240, |
| "train_runtime": 66035.3472, |
| "train_tokens_per_second": 7184.508 |
| }, |
| { |
| "epoch": 1.9517399205761594, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.7198494442995752e-05, |
| "loss": 4.0275, |
| "num_input_tokens_seen": 475086848, |
| "step": 7250, |
| "train_runtime": 66126.6077, |
| "train_tokens_per_second": 7184.504 |
| }, |
| { |
| "epoch": 1.95443225415629, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.714421179994503e-05, |
| "loss": 3.9956, |
| "num_input_tokens_seen": 475742208, |
| "step": 7260, |
| "train_runtime": 66217.8895, |
| "train_tokens_per_second": 7184.497 |
| }, |
| { |
| "epoch": 1.9571245877364205, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.708991897102218e-05, |
| "loss": 3.9954, |
| "num_input_tokens_seen": 476397568, |
| "step": 7270, |
| "train_runtime": 66309.0076, |
| "train_tokens_per_second": 7184.508 |
| }, |
| { |
| "epoch": 1.959816921316551, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.703561621414008e-05, |
| "loss": 4.0028, |
| "num_input_tokens_seen": 477052928, |
| "step": 7280, |
| "train_runtime": 66400.4447, |
| "train_tokens_per_second": 7184.484 |
| }, |
| { |
| "epoch": 1.9625092548966818, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.6981303787258744e-05, |
| "loss": 4.0518, |
| "num_input_tokens_seen": 477708288, |
| "step": 7290, |
| "train_runtime": 66491.4046, |
| "train_tokens_per_second": 7184.512 |
| }, |
| { |
| "epoch": 1.9652015884768121, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.6926981948384146e-05, |
| "loss": 3.926, |
| "num_input_tokens_seen": 478363648, |
| "step": 7300, |
| "train_runtime": 66582.7307, |
| "train_tokens_per_second": 7184.5 |
| }, |
| { |
| "epoch": 1.967893922056943, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.687265095556696e-05, |
| "loss": 4.0207, |
| "num_input_tokens_seen": 479019008, |
| "step": 7310, |
| "train_runtime": 66673.8653, |
| "train_tokens_per_second": 7184.509 |
| }, |
| { |
| "epoch": 1.9705862556370735, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.6818311066901336e-05, |
| "loss": 3.9799, |
| "num_input_tokens_seen": 479674368, |
| "step": 7320, |
| "train_runtime": 66765.1344, |
| "train_tokens_per_second": 7184.504 |
| }, |
| { |
| "epoch": 1.973278589217204, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.6763962540523714e-05, |
| "loss": 3.9499, |
| "num_input_tokens_seen": 480329728, |
| "step": 7330, |
| "train_runtime": 66856.3141, |
| "train_tokens_per_second": 7184.508 |
| }, |
| { |
| "epoch": 1.9759709227973346, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.6709605634611534e-05, |
| "loss": 3.9893, |
| "num_input_tokens_seen": 480985088, |
| "step": 7340, |
| "train_runtime": 66947.2542, |
| "train_tokens_per_second": 7184.538 |
| }, |
| { |
| "epoch": 1.978663256377465, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.665524060738206e-05, |
| "loss": 4.0101, |
| "num_input_tokens_seen": 481640448, |
| "step": 7350, |
| "train_runtime": 67038.5524, |
| "train_tokens_per_second": 7184.529 |
| }, |
| { |
| "epoch": 1.9813555899575959, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.660086771709112e-05, |
| "loss": 4.0528, |
| "num_input_tokens_seen": 482295808, |
| "step": 7360, |
| "train_runtime": 67129.9715, |
| "train_tokens_per_second": 7184.508 |
| }, |
| { |
| "epoch": 1.9840479235377262, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.6546487222031918e-05, |
| "loss": 3.9714, |
| "num_input_tokens_seen": 482951168, |
| "step": 7370, |
| "train_runtime": 67220.7779, |
| "train_tokens_per_second": 7184.552 |
| }, |
| { |
| "epoch": 1.986740257117857, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.6492099380533764e-05, |
| "loss": 4.0543, |
| "num_input_tokens_seen": 483606528, |
| "step": 7380, |
| "train_runtime": 67311.9445, |
| "train_tokens_per_second": 7184.557 |
| }, |
| { |
| "epoch": 1.9894325906979875, |
| "grad_norm": 1.0, |
| "learning_rate": 2.643770445096087e-05, |
| "loss": 4.017, |
| "num_input_tokens_seen": 484261888, |
| "step": 7390, |
| "train_runtime": 67403.3963, |
| "train_tokens_per_second": 7184.532 |
| }, |
| { |
| "epoch": 1.992124924278118, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.638330269171113e-05, |
| "loss": 4.0822, |
| "num_input_tokens_seen": 484917248, |
| "step": 7400, |
| "train_runtime": 67494.964, |
| "train_tokens_per_second": 7184.495 |
| }, |
| { |
| "epoch": 1.9948172578582486, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.6328894361214867e-05, |
| "loss": 4.0175, |
| "num_input_tokens_seen": 485572608, |
| "step": 7410, |
| "train_runtime": 67586.4077, |
| "train_tokens_per_second": 7184.471 |
| }, |
| { |
| "epoch": 1.9975095914383791, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.6274479717933637e-05, |
| "loss": 4.0293, |
| "num_input_tokens_seen": 486227968, |
| "step": 7420, |
| "train_runtime": 67677.1172, |
| "train_tokens_per_second": 7184.525 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.0625, |
| "learning_rate": 2.622005902035896e-05, |
| "loss": 3.9542, |
| "num_input_tokens_seen": 486834176, |
| "step": 7430, |
| "train_runtime": 67761.968, |
| "train_tokens_per_second": 7184.475 |
| }, |
| { |
| "epoch": 2.0026923335801308, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.616563252701114e-05, |
| "loss": 4.0162, |
| "num_input_tokens_seen": 487489536, |
| "step": 7440, |
| "train_runtime": 67852.7594, |
| "train_tokens_per_second": 7184.52 |
| }, |
| { |
| "epoch": 2.005384667160261, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.6111200496438e-05, |
| "loss": 4.0107, |
| "num_input_tokens_seen": 488144896, |
| "step": 7450, |
| "train_runtime": 67944.3204, |
| "train_tokens_per_second": 7184.484 |
| }, |
| { |
| "epoch": 2.008077000740392, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.6056763187213678e-05, |
| "loss": 3.87, |
| "num_input_tokens_seen": 488800256, |
| "step": 7460, |
| "train_runtime": 68035.4837, |
| "train_tokens_per_second": 7184.49 |
| }, |
| { |
| "epoch": 2.010769334320522, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.6002320857937373e-05, |
| "loss": 3.9323, |
| "num_input_tokens_seen": 489455616, |
| "step": 7470, |
| "train_runtime": 68126.8899, |
| "train_tokens_per_second": 7184.47 |
| }, |
| { |
| "epoch": 2.013461667900653, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.5947873767232146e-05, |
| "loss": 4.031, |
| "num_input_tokens_seen": 490110976, |
| "step": 7480, |
| "train_runtime": 68217.9602, |
| "train_tokens_per_second": 7184.486 |
| }, |
| { |
| "epoch": 2.0161540014807833, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.5893422173743664e-05, |
| "loss": 3.8792, |
| "num_input_tokens_seen": 490766336, |
| "step": 7490, |
| "train_runtime": 68309.4455, |
| "train_tokens_per_second": 7184.458 |
| }, |
| { |
| "epoch": 2.018846335060914, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.5838966336138992e-05, |
| "loss": 3.9895, |
| "num_input_tokens_seen": 491421696, |
| "step": 7500, |
| "train_runtime": 68400.9196, |
| "train_tokens_per_second": 7184.431 |
| }, |
| { |
| "epoch": 2.021538668641045, |
| "grad_norm": 1.0, |
| "learning_rate": 2.578450651310535e-05, |
| "loss": 3.875, |
| "num_input_tokens_seen": 492077056, |
| "step": 7510, |
| "train_runtime": 68510.0869, |
| "train_tokens_per_second": 7182.549 |
| }, |
| { |
| "epoch": 2.024231002221175, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.5730042963348898e-05, |
| "loss": 4.0199, |
| "num_input_tokens_seen": 492732416, |
| "step": 7520, |
| "train_runtime": 68600.2883, |
| "train_tokens_per_second": 7182.658 |
| }, |
| { |
| "epoch": 2.026923335801306, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.56755759455935e-05, |
| "loss": 3.9386, |
| "num_input_tokens_seen": 493387776, |
| "step": 7530, |
| "train_runtime": 68691.7421, |
| "train_tokens_per_second": 7182.636 |
| }, |
| { |
| "epoch": 2.0296156693814362, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.5621105718579484e-05, |
| "loss": 3.9828, |
| "num_input_tokens_seen": 494043136, |
| "step": 7540, |
| "train_runtime": 68782.8621, |
| "train_tokens_per_second": 7182.649 |
| }, |
| { |
| "epoch": 2.032308002961567, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.5566632541062435e-05, |
| "loss": 3.8517, |
| "num_input_tokens_seen": 494698496, |
| "step": 7550, |
| "train_runtime": 68874.1653, |
| "train_tokens_per_second": 7182.642 |
| }, |
| { |
| "epoch": 2.0350003365416973, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.5512156671811943e-05, |
| "loss": 4.0755, |
| "num_input_tokens_seen": 495353856, |
| "step": 7560, |
| "train_runtime": 68965.1858, |
| "train_tokens_per_second": 7182.665 |
| }, |
| { |
| "epoch": 2.037692670121828, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.5457678369610394e-05, |
| "loss": 3.9296, |
| "num_input_tokens_seen": 496009216, |
| "step": 7570, |
| "train_runtime": 69056.7428, |
| "train_tokens_per_second": 7182.633 |
| }, |
| { |
| "epoch": 2.040385003701959, |
| "grad_norm": 0.91015625, |
| "learning_rate": 2.5403197893251723e-05, |
| "loss": 3.9863, |
| "num_input_tokens_seen": 496664576, |
| "step": 7580, |
| "train_runtime": 69147.3015, |
| "train_tokens_per_second": 7182.704 |
| }, |
| { |
| "epoch": 2.043077337282089, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.5348715501540203e-05, |
| "loss": 3.9717, |
| "num_input_tokens_seen": 497319936, |
| "step": 7590, |
| "train_runtime": 69238.82, |
| "train_tokens_per_second": 7182.675 |
| }, |
| { |
| "epoch": 2.04576967086222, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.52942314532892e-05, |
| "loss": 3.8741, |
| "num_input_tokens_seen": 497975296, |
| "step": 7600, |
| "train_runtime": 69330.4323, |
| "train_tokens_per_second": 7182.637 |
| }, |
| { |
| "epoch": 2.0484620044423503, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.5239746007319954e-05, |
| "loss": 3.8784, |
| "num_input_tokens_seen": 498630656, |
| "step": 7610, |
| "train_runtime": 69421.1704, |
| "train_tokens_per_second": 7182.689 |
| }, |
| { |
| "epoch": 2.051154338022481, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.5185259422460334e-05, |
| "loss": 3.9551, |
| "num_input_tokens_seen": 499286016, |
| "step": 7620, |
| "train_runtime": 69512.3931, |
| "train_tokens_per_second": 7182.691 |
| }, |
| { |
| "epoch": 2.0538466716026114, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.5130771957543632e-05, |
| "loss": 3.9249, |
| "num_input_tokens_seen": 499941376, |
| "step": 7630, |
| "train_runtime": 69603.3932, |
| "train_tokens_per_second": 7182.716 |
| }, |
| { |
| "epoch": 2.056539005182742, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.507628387140731e-05, |
| "loss": 3.948, |
| "num_input_tokens_seen": 500596736, |
| "step": 7640, |
| "train_runtime": 69694.7297, |
| "train_tokens_per_second": 7182.706 |
| }, |
| { |
| "epoch": 2.059231338762873, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.502179542289178e-05, |
| "loss": 3.9351, |
| "num_input_tokens_seen": 501252096, |
| "step": 7650, |
| "train_runtime": 69786.3479, |
| "train_tokens_per_second": 7182.667 |
| }, |
| { |
| "epoch": 2.0619236723430032, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.4967306870839198e-05, |
| "loss": 4.0082, |
| "num_input_tokens_seen": 501907456, |
| "step": 7660, |
| "train_runtime": 69877.0105, |
| "train_tokens_per_second": 7182.727 |
| }, |
| { |
| "epoch": 2.064616005923134, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.4912818474092173e-05, |
| "loss": 3.9455, |
| "num_input_tokens_seen": 502562816, |
| "step": 7670, |
| "train_runtime": 69968.4304, |
| "train_tokens_per_second": 7182.708 |
| }, |
| { |
| "epoch": 2.0673083395032643, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.4858330491492624e-05, |
| "loss": 3.9224, |
| "num_input_tokens_seen": 503218176, |
| "step": 7680, |
| "train_runtime": 70060.2435, |
| "train_tokens_per_second": 7182.65 |
| }, |
| { |
| "epoch": 2.070000673083395, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.480384318188045e-05, |
| "loss": 3.9099, |
| "num_input_tokens_seen": 503873536, |
| "step": 7690, |
| "train_runtime": 70151.399, |
| "train_tokens_per_second": 7182.658 |
| }, |
| { |
| "epoch": 2.0726930066635254, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.4749356804092392e-05, |
| "loss": 3.8047, |
| "num_input_tokens_seen": 504528896, |
| "step": 7700, |
| "train_runtime": 70242.717, |
| "train_tokens_per_second": 7182.651 |
| }, |
| { |
| "epoch": 2.075385340243656, |
| "grad_norm": 1.0, |
| "learning_rate": 2.4694871616960764e-05, |
| "loss": 3.9816, |
| "num_input_tokens_seen": 505184256, |
| "step": 7710, |
| "train_runtime": 70334.448, |
| "train_tokens_per_second": 7182.601 |
| }, |
| { |
| "epoch": 2.078077673823787, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.464038787931219e-05, |
| "loss": 3.8623, |
| "num_input_tokens_seen": 505839616, |
| "step": 7720, |
| "train_runtime": 70425.3555, |
| "train_tokens_per_second": 7182.635 |
| }, |
| { |
| "epoch": 2.0807700074039173, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.4585905849966454e-05, |
| "loss": 3.9311, |
| "num_input_tokens_seen": 506494976, |
| "step": 7730, |
| "train_runtime": 70516.6306, |
| "train_tokens_per_second": 7182.632 |
| }, |
| { |
| "epoch": 2.083462340984048, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.4531425787735175e-05, |
| "loss": 4.0023, |
| "num_input_tokens_seen": 507150336, |
| "step": 7740, |
| "train_runtime": 70608.3253, |
| "train_tokens_per_second": 7182.586 |
| }, |
| { |
| "epoch": 2.0861546745641784, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.447694795142067e-05, |
| "loss": 3.9401, |
| "num_input_tokens_seen": 507805696, |
| "step": 7750, |
| "train_runtime": 70699.7316, |
| "train_tokens_per_second": 7182.569 |
| }, |
| { |
| "epoch": 2.088847008144309, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.442247259981467e-05, |
| "loss": 3.9075, |
| "num_input_tokens_seen": 508461056, |
| "step": 7760, |
| "train_runtime": 70791.0769, |
| "train_tokens_per_second": 7182.559 |
| }, |
| { |
| "epoch": 2.0915393417244394, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.4367999991697086e-05, |
| "loss": 4.0235, |
| "num_input_tokens_seen": 509116416, |
| "step": 7770, |
| "train_runtime": 70882.845, |
| "train_tokens_per_second": 7182.505 |
| }, |
| { |
| "epoch": 2.09423167530457, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.4313530385834823e-05, |
| "loss": 4.0668, |
| "num_input_tokens_seen": 509771776, |
| "step": 7780, |
| "train_runtime": 70973.9031, |
| "train_tokens_per_second": 7182.524 |
| }, |
| { |
| "epoch": 2.096924008884701, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.4259064040980492e-05, |
| "loss": 3.9675, |
| "num_input_tokens_seen": 510427136, |
| "step": 7790, |
| "train_runtime": 71065.1618, |
| "train_tokens_per_second": 7182.523 |
| }, |
| { |
| "epoch": 2.0996163424648313, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.420460121587125e-05, |
| "loss": 3.9474, |
| "num_input_tokens_seen": 511082496, |
| "step": 7800, |
| "train_runtime": 71156.9178, |
| "train_tokens_per_second": 7182.471 |
| }, |
| { |
| "epoch": 2.102308676044962, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.4150142169227492e-05, |
| "loss": 3.9166, |
| "num_input_tokens_seen": 511737856, |
| "step": 7810, |
| "train_runtime": 71248.0112, |
| "train_tokens_per_second": 7182.486 |
| }, |
| { |
| "epoch": 2.1050010096250924, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.4095687159751703e-05, |
| "loss": 3.9603, |
| "num_input_tokens_seen": 512393216, |
| "step": 7820, |
| "train_runtime": 71339.3142, |
| "train_tokens_per_second": 7182.48 |
| }, |
| { |
| "epoch": 2.107693343205223, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.404123644612718e-05, |
| "loss": 3.9247, |
| "num_input_tokens_seen": 513048576, |
| "step": 7830, |
| "train_runtime": 71430.987, |
| "train_tokens_per_second": 7182.437 |
| }, |
| { |
| "epoch": 2.1103856767853535, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.3986790287016784e-05, |
| "loss": 3.9972, |
| "num_input_tokens_seen": 513703936, |
| "step": 7840, |
| "train_runtime": 71521.7458, |
| "train_tokens_per_second": 7182.486 |
| }, |
| { |
| "epoch": 2.1130780103654843, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.3932348941061783e-05, |
| "loss": 3.9288, |
| "num_input_tokens_seen": 514359296, |
| "step": 7850, |
| "train_runtime": 71613.2492, |
| "train_tokens_per_second": 7182.46 |
| }, |
| { |
| "epoch": 2.115770343945615, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.387791266688054e-05, |
| "loss": 4.0332, |
| "num_input_tokens_seen": 515014656, |
| "step": 7860, |
| "train_runtime": 71704.5297, |
| "train_tokens_per_second": 7182.456 |
| }, |
| { |
| "epoch": 2.1184626775257454, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.3823481723067366e-05, |
| "loss": 3.9433, |
| "num_input_tokens_seen": 515670016, |
| "step": 7870, |
| "train_runtime": 71795.876, |
| "train_tokens_per_second": 7182.446 |
| }, |
| { |
| "epoch": 2.121155011105876, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.3769056368191208e-05, |
| "loss": 3.8531, |
| "num_input_tokens_seen": 516325376, |
| "step": 7880, |
| "train_runtime": 71887.3636, |
| "train_tokens_per_second": 7182.422 |
| }, |
| { |
| "epoch": 2.1238473446860064, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.3714636860794495e-05, |
| "loss": 3.9497, |
| "num_input_tokens_seen": 516980736, |
| "step": 7890, |
| "train_runtime": 71978.4865, |
| "train_tokens_per_second": 7182.434 |
| }, |
| { |
| "epoch": 2.126539678266137, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.366022345939188e-05, |
| "loss": 3.9821, |
| "num_input_tokens_seen": 517636096, |
| "step": 7900, |
| "train_runtime": 72070.3063, |
| "train_tokens_per_second": 7182.377 |
| }, |
| { |
| "epoch": 2.129232011846268, |
| "grad_norm": 1.0, |
| "learning_rate": 2.3605816422468976e-05, |
| "loss": 3.9986, |
| "num_input_tokens_seen": 518291456, |
| "step": 7910, |
| "train_runtime": 72161.9237, |
| "train_tokens_per_second": 7182.34 |
| }, |
| { |
| "epoch": 2.1319243454263983, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.3551416008481205e-05, |
| "loss": 3.9442, |
| "num_input_tokens_seen": 518946816, |
| "step": 7920, |
| "train_runtime": 72253.5881, |
| "train_tokens_per_second": 7182.298 |
| }, |
| { |
| "epoch": 2.134616679006529, |
| "grad_norm": 1.0, |
| "learning_rate": 2.3497022475852487e-05, |
| "loss": 3.9685, |
| "num_input_tokens_seen": 519602176, |
| "step": 7930, |
| "train_runtime": 72344.6661, |
| "train_tokens_per_second": 7182.315 |
| }, |
| { |
| "epoch": 2.1373090125866594, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.344263608297409e-05, |
| "loss": 4.0041, |
| "num_input_tokens_seen": 520257536, |
| "step": 7940, |
| "train_runtime": 72436.1344, |
| "train_tokens_per_second": 7182.293 |
| }, |
| { |
| "epoch": 2.14000134616679, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.3388257088203326e-05, |
| "loss": 3.9341, |
| "num_input_tokens_seen": 520912896, |
| "step": 7950, |
| "train_runtime": 72527.844, |
| "train_tokens_per_second": 7182.247 |
| }, |
| { |
| "epoch": 2.1426936797469205, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.33338857498624e-05, |
| "loss": 4.0325, |
| "num_input_tokens_seen": 521568256, |
| "step": 7960, |
| "train_runtime": 72619.3594, |
| "train_tokens_per_second": 7182.221 |
| }, |
| { |
| "epoch": 2.1453860133270513, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.327952232623714e-05, |
| "loss": 3.937, |
| "num_input_tokens_seen": 522223616, |
| "step": 7970, |
| "train_runtime": 72710.6428, |
| "train_tokens_per_second": 7182.217 |
| }, |
| { |
| "epoch": 2.148078346907182, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.3225167075575744e-05, |
| "loss": 3.9797, |
| "num_input_tokens_seen": 522878976, |
| "step": 7980, |
| "train_runtime": 72802.8888, |
| "train_tokens_per_second": 7182.119 |
| }, |
| { |
| "epoch": 2.1507706804873123, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.3170820256087628e-05, |
| "loss": 3.933, |
| "num_input_tokens_seen": 523534336, |
| "step": 7990, |
| "train_runtime": 72895.0433, |
| "train_tokens_per_second": 7182.029 |
| }, |
| { |
| "epoch": 2.153463014067443, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.3116482125942112e-05, |
| "loss": 4.0139, |
| "num_input_tokens_seen": 524189696, |
| "step": 8000, |
| "train_runtime": 72987.0823, |
| "train_tokens_per_second": 7181.952 |
| }, |
| { |
| "epoch": 2.1561553476475734, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.306215294326729e-05, |
| "loss": 3.8985, |
| "num_input_tokens_seen": 524845056, |
| "step": 8010, |
| "train_runtime": 73107.6287, |
| "train_tokens_per_second": 7179.074 |
| }, |
| { |
| "epoch": 2.158847681227704, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.3007832966148695e-05, |
| "loss": 3.9383, |
| "num_input_tokens_seen": 525500416, |
| "step": 8020, |
| "train_runtime": 73199.5993, |
| "train_tokens_per_second": 7179.007 |
| }, |
| { |
| "epoch": 2.1615400148078345, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.2953522452628176e-05, |
| "loss": 3.8953, |
| "num_input_tokens_seen": 526155776, |
| "step": 8030, |
| "train_runtime": 73291.6216, |
| "train_tokens_per_second": 7178.935 |
| }, |
| { |
| "epoch": 2.1642323483879653, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.289922166070262e-05, |
| "loss": 3.9527, |
| "num_input_tokens_seen": 526811136, |
| "step": 8040, |
| "train_runtime": 73384.1019, |
| "train_tokens_per_second": 7178.818 |
| }, |
| { |
| "epoch": 2.166924681968096, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.2844930848322695e-05, |
| "loss": 4.0024, |
| "num_input_tokens_seen": 527466496, |
| "step": 8050, |
| "train_runtime": 73475.8145, |
| "train_tokens_per_second": 7178.777 |
| }, |
| { |
| "epoch": 2.1696170155482264, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.279065027339171e-05, |
| "loss": 3.8969, |
| "num_input_tokens_seen": 528121856, |
| "step": 8060, |
| "train_runtime": 73567.1917, |
| "train_tokens_per_second": 7178.769 |
| }, |
| { |
| "epoch": 2.172309349128357, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.27363801937643e-05, |
| "loss": 3.9977, |
| "num_input_tokens_seen": 528777216, |
| "step": 8070, |
| "train_runtime": 73658.6764, |
| "train_tokens_per_second": 7178.75 |
| }, |
| { |
| "epoch": 2.1750016827084875, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.268212086724528e-05, |
| "loss": 3.9677, |
| "num_input_tokens_seen": 529432576, |
| "step": 8080, |
| "train_runtime": 73750.8176, |
| "train_tokens_per_second": 7178.667 |
| }, |
| { |
| "epoch": 2.1776940162886183, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.262787255158837e-05, |
| "loss": 3.9226, |
| "num_input_tokens_seen": 530087936, |
| "step": 8090, |
| "train_runtime": 73842.6244, |
| "train_tokens_per_second": 7178.617 |
| }, |
| { |
| "epoch": 2.1803863498687486, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.257363550449497e-05, |
| "loss": 4.0062, |
| "num_input_tokens_seen": 530743296, |
| "step": 8100, |
| "train_runtime": 73934.5186, |
| "train_tokens_per_second": 7178.559 |
| }, |
| { |
| "epoch": 2.1830786834488793, |
| "grad_norm": 1.0, |
| "learning_rate": 2.251940998361297e-05, |
| "loss": 3.9742, |
| "num_input_tokens_seen": 531398656, |
| "step": 8110, |
| "train_runtime": 74026.3714, |
| "train_tokens_per_second": 7178.505 |
| }, |
| { |
| "epoch": 2.18577101702901, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.246519624653548e-05, |
| "loss": 4.0204, |
| "num_input_tokens_seen": 532054016, |
| "step": 8120, |
| "train_runtime": 74118.3082, |
| "train_tokens_per_second": 7178.443 |
| }, |
| { |
| "epoch": 2.1884633506091404, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.2410994550799674e-05, |
| "loss": 3.924, |
| "num_input_tokens_seen": 532709376, |
| "step": 8130, |
| "train_runtime": 74209.9458, |
| "train_tokens_per_second": 7178.409 |
| }, |
| { |
| "epoch": 2.191155684189271, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.2356805153885473e-05, |
| "loss": 3.8933, |
| "num_input_tokens_seen": 533364736, |
| "step": 8140, |
| "train_runtime": 74301.8978, |
| "train_tokens_per_second": 7178.346 |
| }, |
| { |
| "epoch": 2.1938480177694015, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.230262831321441e-05, |
| "loss": 3.99, |
| "num_input_tokens_seen": 534020096, |
| "step": 8150, |
| "train_runtime": 74393.435, |
| "train_tokens_per_second": 7178.323 |
| }, |
| { |
| "epoch": 2.1965403513495323, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.224846428614838e-05, |
| "loss": 3.8841, |
| "num_input_tokens_seen": 534675456, |
| "step": 8160, |
| "train_runtime": 74485.4277, |
| "train_tokens_per_second": 7178.256 |
| }, |
| { |
| "epoch": 2.1992326849296626, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.219431332998836e-05, |
| "loss": 3.8712, |
| "num_input_tokens_seen": 535330816, |
| "step": 8170, |
| "train_runtime": 74577.0024, |
| "train_tokens_per_second": 7178.229 |
| }, |
| { |
| "epoch": 2.2019250185097934, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.2140175701973283e-05, |
| "loss": 3.9662, |
| "num_input_tokens_seen": 535986176, |
| "step": 8180, |
| "train_runtime": 74668.8087, |
| "train_tokens_per_second": 7178.18 |
| }, |
| { |
| "epoch": 2.204617352089924, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.2086051659278738e-05, |
| "loss": 3.9555, |
| "num_input_tokens_seen": 536641536, |
| "step": 8190, |
| "train_runtime": 74760.7332, |
| "train_tokens_per_second": 7178.12 |
| }, |
| { |
| "epoch": 2.2073096856700545, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.20319414590158e-05, |
| "loss": 3.9335, |
| "num_input_tokens_seen": 537296896, |
| "step": 8200, |
| "train_runtime": 74851.828, |
| "train_tokens_per_second": 7178.14 |
| }, |
| { |
| "epoch": 2.2100020192501852, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.197784535822976e-05, |
| "loss": 3.9308, |
| "num_input_tokens_seen": 537952256, |
| "step": 8210, |
| "train_runtime": 74943.4274, |
| "train_tokens_per_second": 7178.111 |
| }, |
| { |
| "epoch": 2.2126943528303156, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.192376361389896e-05, |
| "loss": 3.9608, |
| "num_input_tokens_seen": 538607616, |
| "step": 8220, |
| "train_runtime": 75035.4487, |
| "train_tokens_per_second": 7178.042 |
| }, |
| { |
| "epoch": 2.2153866864104463, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.1869696482933535e-05, |
| "loss": 3.9234, |
| "num_input_tokens_seen": 539262976, |
| "step": 8230, |
| "train_runtime": 75127.5116, |
| "train_tokens_per_second": 7177.969 |
| }, |
| { |
| "epoch": 2.2180790199905767, |
| "grad_norm": 1.0, |
| "learning_rate": 2.181564422217418e-05, |
| "loss": 3.8744, |
| "num_input_tokens_seen": 539918336, |
| "step": 8240, |
| "train_runtime": 75219.0028, |
| "train_tokens_per_second": 7177.951 |
| }, |
| { |
| "epoch": 2.2207713535707074, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.1761607088390982e-05, |
| "loss": 3.9672, |
| "num_input_tokens_seen": 540573696, |
| "step": 8250, |
| "train_runtime": 75311.1549, |
| "train_tokens_per_second": 7177.87 |
| }, |
| { |
| "epoch": 2.223463687150838, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.1707585338282134e-05, |
| "loss": 3.9026, |
| "num_input_tokens_seen": 541229056, |
| "step": 8260, |
| "train_runtime": 75402.1748, |
| "train_tokens_per_second": 7177.897 |
| }, |
| { |
| "epoch": 2.2261560207309685, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.1653579228472787e-05, |
| "loss": 3.946, |
| "num_input_tokens_seen": 541884416, |
| "step": 8270, |
| "train_runtime": 75494.0303, |
| "train_tokens_per_second": 7177.845 |
| }, |
| { |
| "epoch": 2.2288483543110993, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.159958901551376e-05, |
| "loss": 3.9629, |
| "num_input_tokens_seen": 542539776, |
| "step": 8280, |
| "train_runtime": 75585.735, |
| "train_tokens_per_second": 7177.806 |
| }, |
| { |
| "epoch": 2.2315406878912296, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.154561495588038e-05, |
| "loss": 3.8611, |
| "num_input_tokens_seen": 543195136, |
| "step": 8290, |
| "train_runtime": 75677.483, |
| "train_tokens_per_second": 7177.764 |
| }, |
| { |
| "epoch": 2.2342330214713604, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.1491657305971244e-05, |
| "loss": 3.8859, |
| "num_input_tokens_seen": 543850496, |
| "step": 8300, |
| "train_runtime": 75768.9086, |
| "train_tokens_per_second": 7177.753 |
| }, |
| { |
| "epoch": 2.2369253550514907, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.143771632210696e-05, |
| "loss": 3.9045, |
| "num_input_tokens_seen": 544505856, |
| "step": 8310, |
| "train_runtime": 75860.6864, |
| "train_tokens_per_second": 7177.708 |
| }, |
| { |
| "epoch": 2.2396176886316215, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.138379226052901e-05, |
| "loss": 3.9382, |
| "num_input_tokens_seen": 545161216, |
| "step": 8320, |
| "train_runtime": 75952.4776, |
| "train_tokens_per_second": 7177.662 |
| }, |
| { |
| "epoch": 2.2423100222117522, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.1329885377398446e-05, |
| "loss": 3.9432, |
| "num_input_tokens_seen": 545816576, |
| "step": 8330, |
| "train_runtime": 76044.383, |
| "train_tokens_per_second": 7177.605 |
| }, |
| { |
| "epoch": 2.2450023557918826, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.1275995928794758e-05, |
| "loss": 3.9323, |
| "num_input_tokens_seen": 546471936, |
| "step": 8340, |
| "train_runtime": 76136.3929, |
| "train_tokens_per_second": 7177.539 |
| }, |
| { |
| "epoch": 2.2476946893720133, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.1222124170714575e-05, |
| "loss": 3.9404, |
| "num_input_tokens_seen": 547127296, |
| "step": 8350, |
| "train_runtime": 76227.9893, |
| "train_tokens_per_second": 7177.512 |
| }, |
| { |
| "epoch": 2.2503870229521437, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.1168270359070514e-05, |
| "loss": 3.9462, |
| "num_input_tokens_seen": 547782656, |
| "step": 8360, |
| "train_runtime": 76319.1941, |
| "train_tokens_per_second": 7177.521 |
| }, |
| { |
| "epoch": 2.2530793565322744, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.1114434749689944e-05, |
| "loss": 3.9809, |
| "num_input_tokens_seen": 548438016, |
| "step": 8370, |
| "train_runtime": 76411.0251, |
| "train_tokens_per_second": 7177.472 |
| }, |
| { |
| "epoch": 2.2557716901124047, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.1060617598313733e-05, |
| "loss": 3.8728, |
| "num_input_tokens_seen": 549093376, |
| "step": 8380, |
| "train_runtime": 76502.5914, |
| "train_tokens_per_second": 7177.448 |
| }, |
| { |
| "epoch": 2.2584640236925355, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.1006819160595108e-05, |
| "loss": 3.9735, |
| "num_input_tokens_seen": 549748736, |
| "step": 8390, |
| "train_runtime": 76595.1593, |
| "train_tokens_per_second": 7177.33 |
| }, |
| { |
| "epoch": 2.2611563572726663, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.0953039692098364e-05, |
| "loss": 3.9304, |
| "num_input_tokens_seen": 550404096, |
| "step": 8400, |
| "train_runtime": 76686.6123, |
| "train_tokens_per_second": 7177.317 |
| }, |
| { |
| "epoch": 2.2638486908527966, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.089927944829771e-05, |
| "loss": 3.9103, |
| "num_input_tokens_seen": 551059456, |
| "step": 8410, |
| "train_runtime": 76778.7614, |
| "train_tokens_per_second": 7177.238 |
| }, |
| { |
| "epoch": 2.2665410244329274, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.0845538684576005e-05, |
| "loss": 3.958, |
| "num_input_tokens_seen": 551714816, |
| "step": 8420, |
| "train_runtime": 76870.7391, |
| "train_tokens_per_second": 7177.176 |
| }, |
| { |
| "epoch": 2.2692333580130577, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.079181765622359e-05, |
| "loss": 3.9227, |
| "num_input_tokens_seen": 552370176, |
| "step": 8430, |
| "train_runtime": 76961.857, |
| "train_tokens_per_second": 7177.194 |
| }, |
| { |
| "epoch": 2.2719256915931885, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.0738116618437055e-05, |
| "loss": 3.9221, |
| "num_input_tokens_seen": 553025536, |
| "step": 8440, |
| "train_runtime": 77053.3704, |
| "train_tokens_per_second": 7177.175 |
| }, |
| { |
| "epoch": 2.274618025173319, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.0684435826318008e-05, |
| "loss": 3.954, |
| "num_input_tokens_seen": 553680896, |
| "step": 8450, |
| "train_runtime": 77145.0088, |
| "train_tokens_per_second": 7177.145 |
| }, |
| { |
| "epoch": 2.2773103587534496, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.063077553487191e-05, |
| "loss": 3.9332, |
| "num_input_tokens_seen": 554336256, |
| "step": 8460, |
| "train_runtime": 77237.3794, |
| "train_tokens_per_second": 7177.046 |
| }, |
| { |
| "epoch": 2.2800026923335803, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.0577135999006798e-05, |
| "loss": 3.8525, |
| "num_input_tokens_seen": 554991616, |
| "step": 8470, |
| "train_runtime": 77328.7228, |
| "train_tokens_per_second": 7177.044 |
| }, |
| { |
| "epoch": 2.2826950259137107, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.0523517473532144e-05, |
| "loss": 3.9421, |
| "num_input_tokens_seen": 555646976, |
| "step": 8480, |
| "train_runtime": 77420.9627, |
| "train_tokens_per_second": 7176.958 |
| }, |
| { |
| "epoch": 2.2853873594938414, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.0469920213157613e-05, |
| "loss": 3.9269, |
| "num_input_tokens_seen": 556302336, |
| "step": 8490, |
| "train_runtime": 77512.6117, |
| "train_tokens_per_second": 7176.927 |
| }, |
| { |
| "epoch": 2.2880796930739717, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.0416344472491817e-05, |
| "loss": 4.017, |
| "num_input_tokens_seen": 556957696, |
| "step": 8500, |
| "train_runtime": 77604.4223, |
| "train_tokens_per_second": 7176.881 |
| }, |
| { |
| "epoch": 2.2907720266541025, |
| "grad_norm": 1.0, |
| "learning_rate": 2.0362790506041186e-05, |
| "loss": 3.8685, |
| "num_input_tokens_seen": 557613056, |
| "step": 8510, |
| "train_runtime": 77719.4257, |
| "train_tokens_per_second": 7174.693 |
| }, |
| { |
| "epoch": 2.293464360234233, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.0309258568208675e-05, |
| "loss": 4.0096, |
| "num_input_tokens_seen": 558268416, |
| "step": 8520, |
| "train_runtime": 77811.0797, |
| "train_tokens_per_second": 7174.665 |
| }, |
| { |
| "epoch": 2.2961566938143636, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.0255748913292626e-05, |
| "loss": 4.0394, |
| "num_input_tokens_seen": 558923776, |
| "step": 8530, |
| "train_runtime": 77902.6246, |
| "train_tokens_per_second": 7174.646 |
| }, |
| { |
| "epoch": 2.2988490273944944, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.0202261795485495e-05, |
| "loss": 4.0142, |
| "num_input_tokens_seen": 559579136, |
| "step": 8540, |
| "train_runtime": 77994.7083, |
| "train_tokens_per_second": 7174.578 |
| }, |
| { |
| "epoch": 2.3015413609746247, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.0148797468872704e-05, |
| "loss": 3.9483, |
| "num_input_tokens_seen": 560234496, |
| "step": 8550, |
| "train_runtime": 78086.5915, |
| "train_tokens_per_second": 7174.529 |
| }, |
| { |
| "epoch": 2.3042336945547555, |
| "grad_norm": 1.0, |
| "learning_rate": 2.0095356187431417e-05, |
| "loss": 4.0448, |
| "num_input_tokens_seen": 560889856, |
| "step": 8560, |
| "train_runtime": 78178.1001, |
| "train_tokens_per_second": 7174.514 |
| }, |
| { |
| "epoch": 2.306926028134886, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.0041938205029274e-05, |
| "loss": 3.9285, |
| "num_input_tokens_seen": 561545216, |
| "step": 8570, |
| "train_runtime": 78269.9302, |
| "train_tokens_per_second": 7174.469 |
| }, |
| { |
| "epoch": 2.3096183617150166, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.99885437754233e-05, |
| "loss": 4.0154, |
| "num_input_tokens_seen": 562200576, |
| "step": 8580, |
| "train_runtime": 78361.4008, |
| "train_tokens_per_second": 7174.458 |
| }, |
| { |
| "epoch": 2.312310695295147, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9935173152258575e-05, |
| "loss": 3.9581, |
| "num_input_tokens_seen": 562855936, |
| "step": 8590, |
| "train_runtime": 78453.1924, |
| "train_tokens_per_second": 7174.417 |
| }, |
| { |
| "epoch": 2.3150030288752776, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.9881826589067136e-05, |
| "loss": 3.8998, |
| "num_input_tokens_seen": 563511296, |
| "step": 8600, |
| "train_runtime": 78544.1591, |
| "train_tokens_per_second": 7174.452 |
| }, |
| { |
| "epoch": 2.3176953624554084, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9828504339266686e-05, |
| "loss": 3.8947, |
| "num_input_tokens_seen": 564166656, |
| "step": 8610, |
| "train_runtime": 78636.1489, |
| "train_tokens_per_second": 7174.393 |
| }, |
| { |
| "epoch": 2.3203876960355387, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.9775206656159466e-05, |
| "loss": 3.9563, |
| "num_input_tokens_seen": 564822016, |
| "step": 8620, |
| "train_runtime": 78727.3979, |
| "train_tokens_per_second": 7174.402 |
| }, |
| { |
| "epoch": 2.3230800296156695, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.9721933792931e-05, |
| "loss": 3.9166, |
| "num_input_tokens_seen": 565477376, |
| "step": 8630, |
| "train_runtime": 78819.0122, |
| "train_tokens_per_second": 7174.378 |
| }, |
| { |
| "epoch": 2.3257723631958, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.9668686002648887e-05, |
| "loss": 3.8972, |
| "num_input_tokens_seen": 566132736, |
| "step": 8640, |
| "train_runtime": 78910.9, |
| "train_tokens_per_second": 7174.329 |
| }, |
| { |
| "epoch": 2.3284646967759306, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.9615463538261663e-05, |
| "loss": 3.7837, |
| "num_input_tokens_seen": 566788096, |
| "step": 8650, |
| "train_runtime": 79002.2736, |
| "train_tokens_per_second": 7174.326 |
| }, |
| { |
| "epoch": 2.331157030356061, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.9562266652597504e-05, |
| "loss": 3.9452, |
| "num_input_tokens_seen": 567443456, |
| "step": 8660, |
| "train_runtime": 79093.8075, |
| "train_tokens_per_second": 7174.31 |
| }, |
| { |
| "epoch": 2.3338493639361917, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.9509095598363134e-05, |
| "loss": 3.8569, |
| "num_input_tokens_seen": 568098816, |
| "step": 8670, |
| "train_runtime": 79185.6114, |
| "train_tokens_per_second": 7174.268 |
| }, |
| { |
| "epoch": 2.3365416975163225, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9455950628142508e-05, |
| "loss": 3.9199, |
| "num_input_tokens_seen": 568754176, |
| "step": 8680, |
| "train_runtime": 79277.143, |
| "train_tokens_per_second": 7174.252 |
| }, |
| { |
| "epoch": 2.339234031096453, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.9402831994395712e-05, |
| "loss": 3.9144, |
| "num_input_tokens_seen": 569409536, |
| "step": 8690, |
| "train_runtime": 79368.6331, |
| "train_tokens_per_second": 7174.239 |
| }, |
| { |
| "epoch": 2.3419263646765835, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9349739949457725e-05, |
| "loss": 3.9841, |
| "num_input_tokens_seen": 570064896, |
| "step": 8700, |
| "train_runtime": 79459.4951, |
| "train_tokens_per_second": 7174.283 |
| }, |
| { |
| "epoch": 2.344618698256714, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9296674745537187e-05, |
| "loss": 3.9162, |
| "num_input_tokens_seen": 570720256, |
| "step": 8710, |
| "train_runtime": 79551.4372, |
| "train_tokens_per_second": 7174.229 |
| }, |
| { |
| "epoch": 2.3473110318368446, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.924363663471526e-05, |
| "loss": 3.8481, |
| "num_input_tokens_seen": 571375616, |
| "step": 8720, |
| "train_runtime": 79642.3623, |
| "train_tokens_per_second": 7174.268 |
| }, |
| { |
| "epoch": 2.350003365416975, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.9190625868944386e-05, |
| "loss": 3.9566, |
| "num_input_tokens_seen": 572030976, |
| "step": 8730, |
| "train_runtime": 79733.9595, |
| "train_tokens_per_second": 7174.245 |
| }, |
| { |
| "epoch": 2.3526956989971057, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.9137642700047126e-05, |
| "loss": 3.9721, |
| "num_input_tokens_seen": 572686336, |
| "step": 8740, |
| "train_runtime": 79825.6602, |
| "train_tokens_per_second": 7174.214 |
| }, |
| { |
| "epoch": 2.3553880325772365, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.9084687379714914e-05, |
| "loss": 3.8708, |
| "num_input_tokens_seen": 573341696, |
| "step": 8750, |
| "train_runtime": 79917.2004, |
| "train_tokens_per_second": 7174.196 |
| }, |
| { |
| "epoch": 2.358080366157367, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.9031760159506923e-05, |
| "loss": 3.9549, |
| "num_input_tokens_seen": 573997056, |
| "step": 8760, |
| "train_runtime": 80008.431, |
| "train_tokens_per_second": 7174.207 |
| }, |
| { |
| "epoch": 2.3607726997374976, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.8978861290848836e-05, |
| "loss": 3.9081, |
| "num_input_tokens_seen": 574652416, |
| "step": 8770, |
| "train_runtime": 80099.4108, |
| "train_tokens_per_second": 7174.24 |
| }, |
| { |
| "epoch": 2.363465033317628, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.8925991025031623e-05, |
| "loss": 3.9408, |
| "num_input_tokens_seen": 575307776, |
| "step": 8780, |
| "train_runtime": 80191.0045, |
| "train_tokens_per_second": 7174.218 |
| }, |
| { |
| "epoch": 2.3661573668977587, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.887314961321043e-05, |
| "loss": 3.8723, |
| "num_input_tokens_seen": 575963136, |
| "step": 8790, |
| "train_runtime": 80282.2053, |
| "train_tokens_per_second": 7174.232 |
| }, |
| { |
| "epoch": 2.368849700477889, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8820337306403274e-05, |
| "loss": 3.9218, |
| "num_input_tokens_seen": 576618496, |
| "step": 8800, |
| "train_runtime": 80373.7228, |
| "train_tokens_per_second": 7174.217 |
| }, |
| { |
| "epoch": 2.3715420340580198, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8767554355489974e-05, |
| "loss": 3.921, |
| "num_input_tokens_seen": 577273856, |
| "step": 8810, |
| "train_runtime": 80464.6105, |
| "train_tokens_per_second": 7174.258 |
| }, |
| { |
| "epoch": 2.3742343676381505, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.8714801011210842e-05, |
| "loss": 3.854, |
| "num_input_tokens_seen": 577929216, |
| "step": 8820, |
| "train_runtime": 80556.5972, |
| "train_tokens_per_second": 7174.201 |
| }, |
| { |
| "epoch": 2.376926701218281, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.8662077524165583e-05, |
| "loss": 3.9925, |
| "num_input_tokens_seen": 578584576, |
| "step": 8830, |
| "train_runtime": 80647.9657, |
| "train_tokens_per_second": 7174.199 |
| }, |
| { |
| "epoch": 2.3796190347984116, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8609384144812068e-05, |
| "loss": 3.9418, |
| "num_input_tokens_seen": 579239936, |
| "step": 8840, |
| "train_runtime": 80739.7775, |
| "train_tokens_per_second": 7174.158 |
| }, |
| { |
| "epoch": 2.382311368378542, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.8556721123465107e-05, |
| "loss": 3.955, |
| "num_input_tokens_seen": 579895296, |
| "step": 8850, |
| "train_runtime": 80831.2192, |
| "train_tokens_per_second": 7174.15 |
| }, |
| { |
| "epoch": 2.3850037019586727, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.8504088710295346e-05, |
| "loss": 3.9767, |
| "num_input_tokens_seen": 580550656, |
| "step": 8860, |
| "train_runtime": 80922.8283, |
| "train_tokens_per_second": 7174.127 |
| }, |
| { |
| "epoch": 2.387696035538803, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8451487155327986e-05, |
| "loss": 3.9133, |
| "num_input_tokens_seen": 581206016, |
| "step": 8870, |
| "train_runtime": 81013.9561, |
| "train_tokens_per_second": 7174.147 |
| }, |
| { |
| "epoch": 2.390388369118934, |
| "grad_norm": 1.0, |
| "learning_rate": 1.8398916708441672e-05, |
| "loss": 4.023, |
| "num_input_tokens_seen": 581861376, |
| "step": 8880, |
| "train_runtime": 81105.7181, |
| "train_tokens_per_second": 7174.11 |
| }, |
| { |
| "epoch": 2.3930807026990646, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8346377619367267e-05, |
| "loss": 3.8944, |
| "num_input_tokens_seen": 582516736, |
| "step": 8890, |
| "train_runtime": 81197.261, |
| "train_tokens_per_second": 7174.093 |
| }, |
| { |
| "epoch": 2.395773036279195, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.8293870137686648e-05, |
| "loss": 3.8692, |
| "num_input_tokens_seen": 583172096, |
| "step": 8900, |
| "train_runtime": 81289.0326, |
| "train_tokens_per_second": 7174.056 |
| }, |
| { |
| "epoch": 2.3984653698593257, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.8241394512831576e-05, |
| "loss": 3.9668, |
| "num_input_tokens_seen": 583827456, |
| "step": 8910, |
| "train_runtime": 81380.7073, |
| "train_tokens_per_second": 7174.028 |
| }, |
| { |
| "epoch": 2.401157703439456, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.8188950994082456e-05, |
| "loss": 3.8961, |
| "num_input_tokens_seen": 584482816, |
| "step": 8920, |
| "train_runtime": 81472.3337, |
| "train_tokens_per_second": 7174.004 |
| }, |
| { |
| "epoch": 2.4038500370195868, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.813653983056719e-05, |
| "loss": 3.9315, |
| "num_input_tokens_seen": 585138176, |
| "step": 8930, |
| "train_runtime": 81563.6768, |
| "train_tokens_per_second": 7174.004 |
| }, |
| { |
| "epoch": 2.406542370599717, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.8084161271259966e-05, |
| "loss": 3.9244, |
| "num_input_tokens_seen": 585793536, |
| "step": 8940, |
| "train_runtime": 81655.1173, |
| "train_tokens_per_second": 7173.997 |
| }, |
| { |
| "epoch": 2.409234704179848, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.8031815564980104e-05, |
| "loss": 3.9473, |
| "num_input_tokens_seen": 586448896, |
| "step": 8950, |
| "train_runtime": 81747.0607, |
| "train_tokens_per_second": 7173.945 |
| }, |
| { |
| "epoch": 2.4119270377599786, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.797950296039086e-05, |
| "loss": 4.0002, |
| "num_input_tokens_seen": 587104256, |
| "step": 8960, |
| "train_runtime": 81838.6641, |
| "train_tokens_per_second": 7173.923 |
| }, |
| { |
| "epoch": 2.414619371340109, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7927223705998214e-05, |
| "loss": 3.8831, |
| "num_input_tokens_seen": 587759616, |
| "step": 8970, |
| "train_runtime": 81930.2317, |
| "train_tokens_per_second": 7173.904 |
| }, |
| { |
| "epoch": 2.4173117049202397, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7874978050149765e-05, |
| "loss": 3.9407, |
| "num_input_tokens_seen": 588414976, |
| "step": 8980, |
| "train_runtime": 82021.6546, |
| "train_tokens_per_second": 7173.898 |
| }, |
| { |
| "epoch": 2.42000403850037, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.7822766241033456e-05, |
| "loss": 4.0271, |
| "num_input_tokens_seen": 589070336, |
| "step": 8990, |
| "train_runtime": 82113.2156, |
| "train_tokens_per_second": 7173.88 |
| }, |
| { |
| "epoch": 2.422696372080501, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.7770588526676497e-05, |
| "loss": 3.9653, |
| "num_input_tokens_seen": 589725696, |
| "step": 9000, |
| "train_runtime": 82204.6575, |
| "train_tokens_per_second": 7173.872 |
| }, |
| { |
| "epoch": 2.425388705660631, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.771844515494408e-05, |
| "loss": 4.016, |
| "num_input_tokens_seen": 590381056, |
| "step": 9010, |
| "train_runtime": 82312.9264, |
| "train_tokens_per_second": 7172.398 |
| }, |
| { |
| "epoch": 2.428081039240762, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.7666336373538293e-05, |
| "loss": 3.8489, |
| "num_input_tokens_seen": 591036416, |
| "step": 9020, |
| "train_runtime": 82404.3747, |
| "train_tokens_per_second": 7172.391 |
| }, |
| { |
| "epoch": 2.4307733728208927, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7614262429996907e-05, |
| "loss": 3.954, |
| "num_input_tokens_seen": 591691776, |
| "step": 9030, |
| "train_runtime": 82495.3911, |
| "train_tokens_per_second": 7172.422 |
| }, |
| { |
| "epoch": 2.433465706401023, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.7562223571692164e-05, |
| "loss": 3.9313, |
| "num_input_tokens_seen": 592347136, |
| "step": 9040, |
| "train_runtime": 82586.7823, |
| "train_tokens_per_second": 7172.421 |
| }, |
| { |
| "epoch": 2.4361580399811538, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.751022004582969e-05, |
| "loss": 3.8823, |
| "num_input_tokens_seen": 593002496, |
| "step": 9050, |
| "train_runtime": 82677.9376, |
| "train_tokens_per_second": 7172.439 |
| }, |
| { |
| "epoch": 2.438850373561284, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.7458252099447205e-05, |
| "loss": 3.9563, |
| "num_input_tokens_seen": 593657856, |
| "step": 9060, |
| "train_runtime": 82769.4217, |
| "train_tokens_per_second": 7172.43 |
| }, |
| { |
| "epoch": 2.441542707141415, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.7406319979413472e-05, |
| "loss": 3.9923, |
| "num_input_tokens_seen": 594313216, |
| "step": 9070, |
| "train_runtime": 82860.4577, |
| "train_tokens_per_second": 7172.459 |
| }, |
| { |
| "epoch": 2.444235040721545, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.735442393242701e-05, |
| "loss": 3.9398, |
| "num_input_tokens_seen": 594968576, |
| "step": 9080, |
| "train_runtime": 82951.846, |
| "train_tokens_per_second": 7172.457 |
| }, |
| { |
| "epoch": 2.446927374301676, |
| "grad_norm": 1.0, |
| "learning_rate": 1.7302564205015022e-05, |
| "loss": 3.9483, |
| "num_input_tokens_seen": 595623936, |
| "step": 9090, |
| "train_runtime": 83043.0825, |
| "train_tokens_per_second": 7172.469 |
| }, |
| { |
| "epoch": 2.4496197078818067, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.725074104353217e-05, |
| "loss": 3.9473, |
| "num_input_tokens_seen": 596279296, |
| "step": 9100, |
| "train_runtime": 83134.9393, |
| "train_tokens_per_second": 7172.427 |
| }, |
| { |
| "epoch": 2.452312041461937, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.7198954694159374e-05, |
| "loss": 4.0133, |
| "num_input_tokens_seen": 596934656, |
| "step": 9110, |
| "train_runtime": 83226.6166, |
| "train_tokens_per_second": 7172.401 |
| }, |
| { |
| "epoch": 2.455004375042068, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.7147205402902746e-05, |
| "loss": 3.971, |
| "num_input_tokens_seen": 597590016, |
| "step": 9120, |
| "train_runtime": 83317.6613, |
| "train_tokens_per_second": 7172.429 |
| }, |
| { |
| "epoch": 2.457696708622198, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.7095493415592284e-05, |
| "loss": 3.9413, |
| "num_input_tokens_seen": 598245376, |
| "step": 9130, |
| "train_runtime": 83408.8708, |
| "train_tokens_per_second": 7172.443 |
| }, |
| { |
| "epoch": 2.460389042202329, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.704381897788085e-05, |
| "loss": 3.9971, |
| "num_input_tokens_seen": 598900736, |
| "step": 9140, |
| "train_runtime": 83500.1884, |
| "train_tokens_per_second": 7172.448 |
| }, |
| { |
| "epoch": 2.4630813757824592, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.699218233524286e-05, |
| "loss": 3.9231, |
| "num_input_tokens_seen": 599556096, |
| "step": 9150, |
| "train_runtime": 83591.4932, |
| "train_tokens_per_second": 7172.453 |
| }, |
| { |
| "epoch": 2.46577370936259, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.6940583732973248e-05, |
| "loss": 4.0531, |
| "num_input_tokens_seen": 600211456, |
| "step": 9160, |
| "train_runtime": 83683.1001, |
| "train_tokens_per_second": 7172.433 |
| }, |
| { |
| "epoch": 2.4684660429427208, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.6889023416186223e-05, |
| "loss": 3.9988, |
| "num_input_tokens_seen": 600866816, |
| "step": 9170, |
| "train_runtime": 83773.9429, |
| "train_tokens_per_second": 7172.479 |
| }, |
| { |
| "epoch": 2.471158376522851, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.6837501629814088e-05, |
| "loss": 3.9686, |
| "num_input_tokens_seen": 601522176, |
| "step": 9180, |
| "train_runtime": 83866.3011, |
| "train_tokens_per_second": 7172.394 |
| }, |
| { |
| "epoch": 2.473850710102982, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6786018618606163e-05, |
| "loss": 3.9876, |
| "num_input_tokens_seen": 602177536, |
| "step": 9190, |
| "train_runtime": 83957.9068, |
| "train_tokens_per_second": 7172.374 |
| }, |
| { |
| "epoch": 2.476543043683112, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.6734574627127524e-05, |
| "loss": 3.9547, |
| "num_input_tokens_seen": 602832896, |
| "step": 9200, |
| "train_runtime": 84049.3473, |
| "train_tokens_per_second": 7172.369 |
| }, |
| { |
| "epoch": 2.479235377263243, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.6683169899757938e-05, |
| "loss": 4.0162, |
| "num_input_tokens_seen": 603488256, |
| "step": 9210, |
| "train_runtime": 84140.7588, |
| "train_tokens_per_second": 7172.365 |
| }, |
| { |
| "epoch": 2.4819277108433733, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.6631804680690594e-05, |
| "loss": 3.9212, |
| "num_input_tokens_seen": 604143616, |
| "step": 9220, |
| "train_runtime": 84232.6052, |
| "train_tokens_per_second": 7172.325 |
| }, |
| { |
| "epoch": 2.484620044423504, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.658047921393104e-05, |
| "loss": 4.011, |
| "num_input_tokens_seen": 604798976, |
| "step": 9230, |
| "train_runtime": 84324.1924, |
| "train_tokens_per_second": 7172.307 |
| }, |
| { |
| "epoch": 2.487312378003635, |
| "grad_norm": 1.0, |
| "learning_rate": 1.652919374329599e-05, |
| "loss": 3.9753, |
| "num_input_tokens_seen": 605454336, |
| "step": 9240, |
| "train_runtime": 84415.5753, |
| "train_tokens_per_second": 7172.306 |
| }, |
| { |
| "epoch": 2.490004711583765, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6477948512412115e-05, |
| "loss": 3.8978, |
| "num_input_tokens_seen": 606109696, |
| "step": 9250, |
| "train_runtime": 84507.2116, |
| "train_tokens_per_second": 7172.284 |
| }, |
| { |
| "epoch": 2.492697045163896, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.6426743764714993e-05, |
| "loss": 4.1365, |
| "num_input_tokens_seen": 606765056, |
| "step": 9260, |
| "train_runtime": 84599.1369, |
| "train_tokens_per_second": 7172.237 |
| }, |
| { |
| "epoch": 2.495389378744026, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.6375579743447827e-05, |
| "loss": 3.9571, |
| "num_input_tokens_seen": 607420416, |
| "step": 9270, |
| "train_runtime": 84690.608, |
| "train_tokens_per_second": 7172.229 |
| }, |
| { |
| "epoch": 2.498081712324157, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.6324456691660402e-05, |
| "loss": 3.9577, |
| "num_input_tokens_seen": 608075776, |
| "step": 9280, |
| "train_runtime": 84782.2711, |
| "train_tokens_per_second": 7172.204 |
| }, |
| { |
| "epoch": 2.5007740459042873, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.6273374852207862e-05, |
| "loss": 3.9495, |
| "num_input_tokens_seen": 608731136, |
| "step": 9290, |
| "train_runtime": 84874.7787, |
| "train_tokens_per_second": 7172.109 |
| }, |
| { |
| "epoch": 2.503466379484418, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.622233446774957e-05, |
| "loss": 3.9344, |
| "num_input_tokens_seen": 609386496, |
| "step": 9300, |
| "train_runtime": 84966.6974, |
| "train_tokens_per_second": 7172.063 |
| }, |
| { |
| "epoch": 2.506158713064549, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.6171335780747982e-05, |
| "loss": 3.9531, |
| "num_input_tokens_seen": 610041856, |
| "step": 9310, |
| "train_runtime": 85058.3056, |
| "train_tokens_per_second": 7172.043 |
| }, |
| { |
| "epoch": 2.508851046644679, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.6120379033467435e-05, |
| "loss": 3.9955, |
| "num_input_tokens_seen": 610697216, |
| "step": 9320, |
| "train_runtime": 85150.3157, |
| "train_tokens_per_second": 7171.99 |
| }, |
| { |
| "epoch": 2.51154338022481, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.6069464467973093e-05, |
| "loss": 3.8716, |
| "num_input_tokens_seen": 611352576, |
| "step": 9330, |
| "train_runtime": 85242.2507, |
| "train_tokens_per_second": 7171.943 |
| }, |
| { |
| "epoch": 2.5142357138049407, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.6018592326129678e-05, |
| "loss": 3.924, |
| "num_input_tokens_seen": 612007936, |
| "step": 9340, |
| "train_runtime": 85333.8473, |
| "train_tokens_per_second": 7171.925 |
| }, |
| { |
| "epoch": 2.516928047385071, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.5967762849600422e-05, |
| "loss": 3.936, |
| "num_input_tokens_seen": 612663296, |
| "step": 9350, |
| "train_runtime": 85425.593, |
| "train_tokens_per_second": 7171.894 |
| }, |
| { |
| "epoch": 2.5196203809652014, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.5916976279845884e-05, |
| "loss": 3.9575, |
| "num_input_tokens_seen": 613318656, |
| "step": 9360, |
| "train_runtime": 85517.71, |
| "train_tokens_per_second": 7171.832 |
| }, |
| { |
| "epoch": 2.522312714545332, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.5866232858122758e-05, |
| "loss": 3.9254, |
| "num_input_tokens_seen": 613974016, |
| "step": 9370, |
| "train_runtime": 85609.6872, |
| "train_tokens_per_second": 7171.782 |
| }, |
| { |
| "epoch": 2.525005048125463, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.5815532825482822e-05, |
| "loss": 3.8467, |
| "num_input_tokens_seen": 614629376, |
| "step": 9380, |
| "train_runtime": 85701.5941, |
| "train_tokens_per_second": 7171.738 |
| }, |
| { |
| "epoch": 2.527697381705593, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.576487642277168e-05, |
| "loss": 3.9184, |
| "num_input_tokens_seen": 615284736, |
| "step": 9390, |
| "train_runtime": 85794.0017, |
| "train_tokens_per_second": 7171.652 |
| }, |
| { |
| "epoch": 2.530389715285724, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.571426389062773e-05, |
| "loss": 3.9802, |
| "num_input_tokens_seen": 615940096, |
| "step": 9400, |
| "train_runtime": 85886.318, |
| "train_tokens_per_second": 7171.574 |
| }, |
| { |
| "epoch": 2.5330820488658548, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.566369546948092e-05, |
| "loss": 3.884, |
| "num_input_tokens_seen": 616595456, |
| "step": 9410, |
| "train_runtime": 85978.2226, |
| "train_tokens_per_second": 7171.531 |
| }, |
| { |
| "epoch": 2.535774382445985, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.561317139955169e-05, |
| "loss": 3.9143, |
| "num_input_tokens_seen": 617250816, |
| "step": 9420, |
| "train_runtime": 86070.5047, |
| "train_tokens_per_second": 7171.456 |
| }, |
| { |
| "epoch": 2.5384667160261154, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.5562691920849786e-05, |
| "loss": 4.0689, |
| "num_input_tokens_seen": 617906176, |
| "step": 9430, |
| "train_runtime": 86161.6282, |
| "train_tokens_per_second": 7171.477 |
| }, |
| { |
| "epoch": 2.541159049606246, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.5512257273173102e-05, |
| "loss": 3.8768, |
| "num_input_tokens_seen": 618561536, |
| "step": 9440, |
| "train_runtime": 86253.1252, |
| "train_tokens_per_second": 7171.468 |
| }, |
| { |
| "epoch": 2.543851383186377, |
| "grad_norm": 1.0, |
| "learning_rate": 1.546186769610661e-05, |
| "loss": 4.0194, |
| "num_input_tokens_seen": 619216896, |
| "step": 9450, |
| "train_runtime": 86345.733, |
| "train_tokens_per_second": 7171.366 |
| }, |
| { |
| "epoch": 2.5465437167665073, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.5411523429021123e-05, |
| "loss": 3.976, |
| "num_input_tokens_seen": 619872256, |
| "step": 9460, |
| "train_runtime": 86437.2789, |
| "train_tokens_per_second": 7171.353 |
| }, |
| { |
| "epoch": 2.549236050346638, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.5361224711072277e-05, |
| "loss": 3.9104, |
| "num_input_tokens_seen": 620527616, |
| "step": 9470, |
| "train_runtime": 86529.5616, |
| "train_tokens_per_second": 7171.279 |
| }, |
| { |
| "epoch": 2.551928383926769, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.5310971781199273e-05, |
| "loss": 3.8955, |
| "num_input_tokens_seen": 621182976, |
| "step": 9480, |
| "train_runtime": 86621.4372, |
| "train_tokens_per_second": 7171.238 |
| }, |
| { |
| "epoch": 2.554620717506899, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.5260764878123833e-05, |
| "loss": 3.9429, |
| "num_input_tokens_seen": 621838336, |
| "step": 9490, |
| "train_runtime": 86712.9665, |
| "train_tokens_per_second": 7171.227 |
| }, |
| { |
| "epoch": 2.5573130510870294, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.5210604240349042e-05, |
| "loss": 3.921, |
| "num_input_tokens_seen": 622493696, |
| "step": 9500, |
| "train_runtime": 86805.012, |
| "train_tokens_per_second": 7171.172 |
| }, |
| { |
| "epoch": 2.56000538466716, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.5160490106158164e-05, |
| "loss": 3.9021, |
| "num_input_tokens_seen": 623149056, |
| "step": 9510, |
| "train_runtime": 86914.2944, |
| "train_tokens_per_second": 7169.696 |
| }, |
| { |
| "epoch": 2.562697718247291, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.5110422713613603e-05, |
| "loss": 3.9917, |
| "num_input_tokens_seen": 623804416, |
| "step": 9520, |
| "train_runtime": 87006.6394, |
| "train_tokens_per_second": 7169.619 |
| }, |
| { |
| "epoch": 2.5653900518274213, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.5060402300555677e-05, |
| "loss": 3.9477, |
| "num_input_tokens_seen": 624459776, |
| "step": 9530, |
| "train_runtime": 87098.3272, |
| "train_tokens_per_second": 7169.596 |
| }, |
| { |
| "epoch": 2.568082385407552, |
| "grad_norm": 1.0, |
| "learning_rate": 1.5010429104601565e-05, |
| "loss": 3.9293, |
| "num_input_tokens_seen": 625115136, |
| "step": 9540, |
| "train_runtime": 87190.5284, |
| "train_tokens_per_second": 7169.53 |
| }, |
| { |
| "epoch": 2.570774718987683, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.4960503363144116e-05, |
| "loss": 3.9775, |
| "num_input_tokens_seen": 625770496, |
| "step": 9550, |
| "train_runtime": 87282.3472, |
| "train_tokens_per_second": 7169.497 |
| }, |
| { |
| "epoch": 2.573467052567813, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.4910625313350778e-05, |
| "loss": 3.9451, |
| "num_input_tokens_seen": 626425856, |
| "step": 9560, |
| "train_runtime": 87374.4718, |
| "train_tokens_per_second": 7169.438 |
| }, |
| { |
| "epoch": 2.5761593861479435, |
| "grad_norm": 0.91796875, |
| "learning_rate": 1.486079519216245e-05, |
| "loss": 4.0295, |
| "num_input_tokens_seen": 627081216, |
| "step": 9570, |
| "train_runtime": 87467.3126, |
| "train_tokens_per_second": 7169.321 |
| }, |
| { |
| "epoch": 2.5788517197280743, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.4811013236292304e-05, |
| "loss": 3.9548, |
| "num_input_tokens_seen": 627736576, |
| "step": 9580, |
| "train_runtime": 87558.703, |
| "train_tokens_per_second": 7169.322 |
| }, |
| { |
| "epoch": 2.581544053308205, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.4761279682224765e-05, |
| "loss": 3.96, |
| "num_input_tokens_seen": 628391936, |
| "step": 9590, |
| "train_runtime": 87650.9007, |
| "train_tokens_per_second": 7169.258 |
| }, |
| { |
| "epoch": 2.5842363868883353, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.4711594766214281e-05, |
| "loss": 4.0199, |
| "num_input_tokens_seen": 629047296, |
| "step": 9600, |
| "train_runtime": 87743.0317, |
| "train_tokens_per_second": 7169.199 |
| }, |
| { |
| "epoch": 2.586928720468466, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.4661958724284292e-05, |
| "loss": 3.8755, |
| "num_input_tokens_seen": 629702656, |
| "step": 9610, |
| "train_runtime": 87835.7675, |
| "train_tokens_per_second": 7169.092 |
| }, |
| { |
| "epoch": 2.589621054048597, |
| "grad_norm": 1.0, |
| "learning_rate": 1.4612371792226026e-05, |
| "loss": 3.9478, |
| "num_input_tokens_seen": 630358016, |
| "step": 9620, |
| "train_runtime": 87927.4765, |
| "train_tokens_per_second": 7169.068 |
| }, |
| { |
| "epoch": 2.592313387628727, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.456283420559745e-05, |
| "loss": 3.9678, |
| "num_input_tokens_seen": 631013376, |
| "step": 9630, |
| "train_runtime": 88019.1614, |
| "train_tokens_per_second": 7169.046 |
| }, |
| { |
| "epoch": 2.5950057212088575, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.4513346199722112e-05, |
| "loss": 3.9617, |
| "num_input_tokens_seen": 631668736, |
| "step": 9640, |
| "train_runtime": 88111.4148, |
| "train_tokens_per_second": 7168.977 |
| }, |
| { |
| "epoch": 2.5976980547889883, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.4463908009688021e-05, |
| "loss": 3.936, |
| "num_input_tokens_seen": 632324096, |
| "step": 9650, |
| "train_runtime": 88203.1555, |
| "train_tokens_per_second": 7168.951 |
| }, |
| { |
| "epoch": 2.600390388369119, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.4414519870346554e-05, |
| "loss": 3.9889, |
| "num_input_tokens_seen": 632979456, |
| "step": 9660, |
| "train_runtime": 88295.399, |
| "train_tokens_per_second": 7168.884 |
| }, |
| { |
| "epoch": 2.6030827219492494, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.4365182016311319e-05, |
| "loss": 3.9093, |
| "num_input_tokens_seen": 633634816, |
| "step": 9670, |
| "train_runtime": 88387.9014, |
| "train_tokens_per_second": 7168.796 |
| }, |
| { |
| "epoch": 2.60577505552938, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.4315894681957037e-05, |
| "loss": 3.9056, |
| "num_input_tokens_seen": 634290176, |
| "step": 9680, |
| "train_runtime": 88479.8584, |
| "train_tokens_per_second": 7168.752 |
| }, |
| { |
| "epoch": 2.608467389109511, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.4266658101418496e-05, |
| "loss": 4.0172, |
| "num_input_tokens_seen": 634945536, |
| "step": 9690, |
| "train_runtime": 88571.9004, |
| "train_tokens_per_second": 7168.702 |
| }, |
| { |
| "epoch": 2.6111597226896412, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.4217472508589286e-05, |
| "loss": 3.9201, |
| "num_input_tokens_seen": 635600896, |
| "step": 9700, |
| "train_runtime": 88663.5355, |
| "train_tokens_per_second": 7168.684 |
| }, |
| { |
| "epoch": 2.6138520562697716, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.4168338137120878e-05, |
| "loss": 3.895, |
| "num_input_tokens_seen": 636256256, |
| "step": 9710, |
| "train_runtime": 88755.9511, |
| "train_tokens_per_second": 7168.604 |
| }, |
| { |
| "epoch": 2.6165443898499023, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.4119255220421374e-05, |
| "loss": 3.9592, |
| "num_input_tokens_seen": 636911616, |
| "step": 9720, |
| "train_runtime": 88847.8034, |
| "train_tokens_per_second": 7168.569 |
| }, |
| { |
| "epoch": 2.619236723430033, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.4070223991654452e-05, |
| "loss": 3.9153, |
| "num_input_tokens_seen": 637566976, |
| "step": 9730, |
| "train_runtime": 88939.424, |
| "train_tokens_per_second": 7168.553 |
| }, |
| { |
| "epoch": 2.6219290570101634, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.4021244683738249e-05, |
| "loss": 3.9623, |
| "num_input_tokens_seen": 638222336, |
| "step": 9740, |
| "train_runtime": 89031.2726, |
| "train_tokens_per_second": 7168.519 |
| }, |
| { |
| "epoch": 2.624621390590294, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3972317529344265e-05, |
| "loss": 3.9298, |
| "num_input_tokens_seen": 638877696, |
| "step": 9750, |
| "train_runtime": 89123.2096, |
| "train_tokens_per_second": 7168.477 |
| }, |
| { |
| "epoch": 2.627313724170425, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.3923442760896244e-05, |
| "loss": 3.9353, |
| "num_input_tokens_seen": 639533056, |
| "step": 9760, |
| "train_runtime": 89215.2725, |
| "train_tokens_per_second": 7168.426 |
| }, |
| { |
| "epoch": 2.6300060577505553, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.3874620610569078e-05, |
| "loss": 3.9251, |
| "num_input_tokens_seen": 640188416, |
| "step": 9770, |
| "train_runtime": 89307.6683, |
| "train_tokens_per_second": 7168.348 |
| }, |
| { |
| "epoch": 2.6326983913306856, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.38258513102877e-05, |
| "loss": 3.8596, |
| "num_input_tokens_seen": 640843776, |
| "step": 9780, |
| "train_runtime": 89399.5391, |
| "train_tokens_per_second": 7168.312 |
| }, |
| { |
| "epoch": 2.6353907249108164, |
| "grad_norm": 1.0, |
| "learning_rate": 1.3777135091725985e-05, |
| "loss": 4.015, |
| "num_input_tokens_seen": 641499136, |
| "step": 9790, |
| "train_runtime": 89491.8769, |
| "train_tokens_per_second": 7168.239 |
| }, |
| { |
| "epoch": 2.638083058490947, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.372847218630565e-05, |
| "loss": 3.9325, |
| "num_input_tokens_seen": 642154496, |
| "step": 9800, |
| "train_runtime": 89583.7249, |
| "train_tokens_per_second": 7168.205 |
| }, |
| { |
| "epoch": 2.6407753920710775, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.367986282519515e-05, |
| "loss": 3.9479, |
| "num_input_tokens_seen": 642809856, |
| "step": 9810, |
| "train_runtime": 89676.1861, |
| "train_tokens_per_second": 7168.122 |
| }, |
| { |
| "epoch": 2.6434677256512082, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.3631307239308575e-05, |
| "loss": 3.9335, |
| "num_input_tokens_seen": 643465216, |
| "step": 9820, |
| "train_runtime": 89767.6734, |
| "train_tokens_per_second": 7168.117 |
| }, |
| { |
| "epoch": 2.646160059231339, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3582805659304598e-05, |
| "loss": 4.004, |
| "num_input_tokens_seen": 644120576, |
| "step": 9830, |
| "train_runtime": 89859.7834, |
| "train_tokens_per_second": 7168.063 |
| }, |
| { |
| "epoch": 2.6488523928114693, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3534358315585278e-05, |
| "loss": 3.9324, |
| "num_input_tokens_seen": 644775936, |
| "step": 9840, |
| "train_runtime": 89951.7227, |
| "train_tokens_per_second": 7168.022 |
| }, |
| { |
| "epoch": 2.6515447263915997, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.3485965438295095e-05, |
| "loss": 3.879, |
| "num_input_tokens_seen": 645431296, |
| "step": 9850, |
| "train_runtime": 90044.1085, |
| "train_tokens_per_second": 7167.946 |
| }, |
| { |
| "epoch": 2.6542370599717304, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.3437627257319752e-05, |
| "loss": 3.9483, |
| "num_input_tokens_seen": 646086656, |
| "step": 9860, |
| "train_runtime": 90136.1279, |
| "train_tokens_per_second": 7167.899 |
| }, |
| { |
| "epoch": 2.656929393551861, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.3389344002285132e-05, |
| "loss": 3.9728, |
| "num_input_tokens_seen": 646742016, |
| "step": 9870, |
| "train_runtime": 90228.3459, |
| "train_tokens_per_second": 7167.836 |
| }, |
| { |
| "epoch": 2.6596217271319915, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.33411159025562e-05, |
| "loss": 3.9763, |
| "num_input_tokens_seen": 647397376, |
| "step": 9880, |
| "train_runtime": 90320.3635, |
| "train_tokens_per_second": 7167.79 |
| }, |
| { |
| "epoch": 2.6623140607121223, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.329294318723591e-05, |
| "loss": 3.9931, |
| "num_input_tokens_seen": 648052736, |
| "step": 9890, |
| "train_runtime": 90412.7519, |
| "train_tokens_per_second": 7167.714 |
| }, |
| { |
| "epoch": 2.665006394292253, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.3244826085164116e-05, |
| "loss": 3.9671, |
| "num_input_tokens_seen": 648708096, |
| "step": 9900, |
| "train_runtime": 90504.3501, |
| "train_tokens_per_second": 7167.701 |
| }, |
| { |
| "epoch": 2.6676987278723834, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.319676482491649e-05, |
| "loss": 3.8095, |
| "num_input_tokens_seen": 649363456, |
| "step": 9910, |
| "train_runtime": 90596.3941, |
| "train_tokens_per_second": 7167.652 |
| }, |
| { |
| "epoch": 2.6703910614525137, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.3148759634803428e-05, |
| "loss": 3.8765, |
| "num_input_tokens_seen": 650018816, |
| "step": 9920, |
| "train_runtime": 90687.7916, |
| "train_tokens_per_second": 7167.655 |
| }, |
| { |
| "epoch": 2.6730833950326445, |
| "grad_norm": 1.0, |
| "learning_rate": 1.3100810742868975e-05, |
| "loss": 3.9887, |
| "num_input_tokens_seen": 650674176, |
| "step": 9930, |
| "train_runtime": 90779.4533, |
| "train_tokens_per_second": 7167.637 |
| }, |
| { |
| "epoch": 2.6757757286127752, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.3052918376889737e-05, |
| "loss": 3.8924, |
| "num_input_tokens_seen": 651329536, |
| "step": 9940, |
| "train_runtime": 90871.6076, |
| "train_tokens_per_second": 7167.58 |
| }, |
| { |
| "epoch": 2.6784680621929056, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.3005082764373791e-05, |
| "loss": 3.9264, |
| "num_input_tokens_seen": 651984896, |
| "step": 9950, |
| "train_runtime": 90963.4747, |
| "train_tokens_per_second": 7167.546 |
| }, |
| { |
| "epoch": 2.6811603957730363, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.295730413255961e-05, |
| "loss": 3.8486, |
| "num_input_tokens_seen": 652640256, |
| "step": 9960, |
| "train_runtime": 91055.2698, |
| "train_tokens_per_second": 7167.518 |
| }, |
| { |
| "epoch": 2.683852729353167, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.290958270841503e-05, |
| "loss": 4.0193, |
| "num_input_tokens_seen": 653295616, |
| "step": 9970, |
| "train_runtime": 91146.9224, |
| "train_tokens_per_second": 7167.501 |
| }, |
| { |
| "epoch": 2.6865450629332974, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.286191871863604e-05, |
| "loss": 3.9871, |
| "num_input_tokens_seen": 653950976, |
| "step": 9980, |
| "train_runtime": 91239.4581, |
| "train_tokens_per_second": 7167.414 |
| }, |
| { |
| "epoch": 2.6892373965134277, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.2814312389645882e-05, |
| "loss": 4.0445, |
| "num_input_tokens_seen": 654606336, |
| "step": 9990, |
| "train_runtime": 91330.7094, |
| "train_tokens_per_second": 7167.429 |
| }, |
| { |
| "epoch": 2.6919297300935585, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.2766763947593835e-05, |
| "loss": 3.9427, |
| "num_input_tokens_seen": 655261696, |
| "step": 10000, |
| "train_runtime": 91422.3951, |
| "train_tokens_per_second": 7167.409 |
| }, |
| { |
| "epoch": 2.6946220636736893, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.2719273618354206e-05, |
| "loss": 3.9393, |
| "num_input_tokens_seen": 655917056, |
| "step": 10010, |
| "train_runtime": 91532.3928, |
| "train_tokens_per_second": 7165.956 |
| }, |
| { |
| "epoch": 2.6973143972538196, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.2671841627525235e-05, |
| "loss": 4.0016, |
| "num_input_tokens_seen": 656572416, |
| "step": 10020, |
| "train_runtime": 91623.8029, |
| "train_tokens_per_second": 7165.959 |
| }, |
| { |
| "epoch": 2.7000067308339504, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.2624468200428041e-05, |
| "loss": 4.0109, |
| "num_input_tokens_seen": 657227776, |
| "step": 10030, |
| "train_runtime": 91715.694, |
| "train_tokens_per_second": 7165.925 |
| }, |
| { |
| "epoch": 2.702699064414081, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.257715356210554e-05, |
| "loss": 3.8675, |
| "num_input_tokens_seen": 657883136, |
| "step": 10040, |
| "train_runtime": 91807.8194, |
| "train_tokens_per_second": 7165.873 |
| }, |
| { |
| "epoch": 2.7053913979942115, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.2529897937321369e-05, |
| "loss": 3.834, |
| "num_input_tokens_seen": 658538496, |
| "step": 10050, |
| "train_runtime": 91900.0271, |
| "train_tokens_per_second": 7165.814 |
| }, |
| { |
| "epoch": 2.708083731574342, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.2482701550558837e-05, |
| "loss": 3.8913, |
| "num_input_tokens_seen": 659193856, |
| "step": 10060, |
| "train_runtime": 91991.8394, |
| "train_tokens_per_second": 7165.786 |
| }, |
| { |
| "epoch": 2.7107760651544726, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.2435564626019844e-05, |
| "loss": 4.0047, |
| "num_input_tokens_seen": 659849216, |
| "step": 10070, |
| "train_runtime": 92083.4627, |
| "train_tokens_per_second": 7165.773 |
| }, |
| { |
| "epoch": 2.7134683987346033, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.2388487387623813e-05, |
| "loss": 4.1109, |
| "num_input_tokens_seen": 660504576, |
| "step": 10080, |
| "train_runtime": 92175.2186, |
| "train_tokens_per_second": 7165.75 |
| }, |
| { |
| "epoch": 2.7161607323147337, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.2341470059006663e-05, |
| "loss": 3.9107, |
| "num_input_tokens_seen": 661159936, |
| "step": 10090, |
| "train_runtime": 92267.2511, |
| "train_tokens_per_second": 7165.705 |
| }, |
| { |
| "epoch": 2.7188530658948644, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.2294512863519666e-05, |
| "loss": 3.924, |
| "num_input_tokens_seen": 661815296, |
| "step": 10100, |
| "train_runtime": 92359.1828, |
| "train_tokens_per_second": 7165.669 |
| }, |
| { |
| "epoch": 2.721545399474995, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.2247616024228491e-05, |
| "loss": 3.9895, |
| "num_input_tokens_seen": 662470656, |
| "step": 10110, |
| "train_runtime": 92450.6455, |
| "train_tokens_per_second": 7165.668 |
| }, |
| { |
| "epoch": 2.7242377330551255, |
| "grad_norm": 1.0, |
| "learning_rate": 1.220077976391206e-05, |
| "loss": 4.095, |
| "num_input_tokens_seen": 663126016, |
| "step": 10120, |
| "train_runtime": 92542.7132, |
| "train_tokens_per_second": 7165.621 |
| }, |
| { |
| "epoch": 2.726930066635256, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.2154004305061525e-05, |
| "loss": 3.9587, |
| "num_input_tokens_seen": 663781376, |
| "step": 10130, |
| "train_runtime": 92634.7173, |
| "train_tokens_per_second": 7165.579 |
| }, |
| { |
| "epoch": 2.7296224002153866, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.210728986987921e-05, |
| "loss": 3.8686, |
| "num_input_tokens_seen": 664436736, |
| "step": 10140, |
| "train_runtime": 92726.4649, |
| "train_tokens_per_second": 7165.557 |
| }, |
| { |
| "epoch": 2.7323147337955174, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.2060636680277548e-05, |
| "loss": 3.7919, |
| "num_input_tokens_seen": 665092096, |
| "step": 10150, |
| "train_runtime": 92818.6506, |
| "train_tokens_per_second": 7165.501 |
| }, |
| { |
| "epoch": 2.7350070673756477, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.201404495787804e-05, |
| "loss": 3.8285, |
| "num_input_tokens_seen": 665747456, |
| "step": 10160, |
| "train_runtime": 92910.6656, |
| "train_tokens_per_second": 7165.458 |
| }, |
| { |
| "epoch": 2.7376994009557785, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.1967514924010182e-05, |
| "loss": 3.798, |
| "num_input_tokens_seen": 666402816, |
| "step": 10170, |
| "train_runtime": 93002.6363, |
| "train_tokens_per_second": 7165.419 |
| }, |
| { |
| "epoch": 2.7403917345359092, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1921046799710425e-05, |
| "loss": 3.8745, |
| "num_input_tokens_seen": 667058176, |
| "step": 10180, |
| "train_runtime": 93094.608, |
| "train_tokens_per_second": 7165.379 |
| }, |
| { |
| "epoch": 2.7430840681160396, |
| "grad_norm": 1.0, |
| "learning_rate": 1.1874640805721137e-05, |
| "loss": 3.9656, |
| "num_input_tokens_seen": 667713536, |
| "step": 10190, |
| "train_runtime": 93187.4885, |
| "train_tokens_per_second": 7165.27 |
| }, |
| { |
| "epoch": 2.7457764016961703, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1828297162489529e-05, |
| "loss": 3.9262, |
| "num_input_tokens_seen": 668368896, |
| "step": 10200, |
| "train_runtime": 93279.158, |
| "train_tokens_per_second": 7165.254 |
| }, |
| { |
| "epoch": 2.7484687352763006, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.1782016090166622e-05, |
| "loss": 3.9285, |
| "num_input_tokens_seen": 669024256, |
| "step": 10210, |
| "train_runtime": 93371.5394, |
| "train_tokens_per_second": 7165.184 |
| }, |
| { |
| "epoch": 2.7511610688564314, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.17357978086062e-05, |
| "loss": 3.9583, |
| "num_input_tokens_seen": 669679616, |
| "step": 10220, |
| "train_runtime": 93463.6235, |
| "train_tokens_per_second": 7165.136 |
| }, |
| { |
| "epoch": 2.7538534024365617, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.1689642537363796e-05, |
| "loss": 3.9728, |
| "num_input_tokens_seen": 670334976, |
| "step": 10230, |
| "train_runtime": 93556.1642, |
| "train_tokens_per_second": 7165.054 |
| }, |
| { |
| "epoch": 2.7565457360166925, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1643550495695549e-05, |
| "loss": 3.9459, |
| "num_input_tokens_seen": 670990336, |
| "step": 10240, |
| "train_runtime": 93648.1394, |
| "train_tokens_per_second": 7165.015 |
| }, |
| { |
| "epoch": 2.7592380695968233, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1597521902557303e-05, |
| "loss": 4.0197, |
| "num_input_tokens_seen": 671645696, |
| "step": 10250, |
| "train_runtime": 93739.8958, |
| "train_tokens_per_second": 7164.993 |
| }, |
| { |
| "epoch": 2.7619304031769536, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.1551556976603461e-05, |
| "loss": 3.9275, |
| "num_input_tokens_seen": 672301056, |
| "step": 10260, |
| "train_runtime": 93831.7229, |
| "train_tokens_per_second": 7164.965 |
| }, |
| { |
| "epoch": 2.7646227367570844, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.1505655936185985e-05, |
| "loss": 3.8451, |
| "num_input_tokens_seen": 672956416, |
| "step": 10270, |
| "train_runtime": 93923.4861, |
| "train_tokens_per_second": 7164.943 |
| }, |
| { |
| "epoch": 2.7673150703372147, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1459818999353358e-05, |
| "loss": 3.9533, |
| "num_input_tokens_seen": 673611776, |
| "step": 10280, |
| "train_runtime": 94015.5906, |
| "train_tokens_per_second": 7164.894 |
| }, |
| { |
| "epoch": 2.7700074039173455, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.1414046383849545e-05, |
| "loss": 3.8752, |
| "num_input_tokens_seen": 674267136, |
| "step": 10290, |
| "train_runtime": 94107.2512, |
| "train_tokens_per_second": 7164.88 |
| }, |
| { |
| "epoch": 2.772699737497476, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.1368338307112955e-05, |
| "loss": 3.9406, |
| "num_input_tokens_seen": 674922496, |
| "step": 10300, |
| "train_runtime": 94198.9926, |
| "train_tokens_per_second": 7164.859 |
| }, |
| { |
| "epoch": 2.7753920710776065, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.1322694986275414e-05, |
| "loss": 3.9558, |
| "num_input_tokens_seen": 675577856, |
| "step": 10310, |
| "train_runtime": 94291.3535, |
| "train_tokens_per_second": 7164.791 |
| }, |
| { |
| "epoch": 2.7780844046577373, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.1277116638161136e-05, |
| "loss": 3.9761, |
| "num_input_tokens_seen": 676233216, |
| "step": 10320, |
| "train_runtime": 94383.5654, |
| "train_tokens_per_second": 7164.735 |
| }, |
| { |
| "epoch": 2.7807767382378676, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.1231603479285683e-05, |
| "loss": 4.0315, |
| "num_input_tokens_seen": 676888576, |
| "step": 10330, |
| "train_runtime": 94475.3802, |
| "train_tokens_per_second": 7164.709 |
| }, |
| { |
| "epoch": 2.7834690718179984, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.1186155725854942e-05, |
| "loss": 3.8553, |
| "num_input_tokens_seen": 677543936, |
| "step": 10340, |
| "train_runtime": 94568.0217, |
| "train_tokens_per_second": 7164.62 |
| }, |
| { |
| "epoch": 2.7861614053981287, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.1140773593764099e-05, |
| "loss": 3.8506, |
| "num_input_tokens_seen": 678199296, |
| "step": 10350, |
| "train_runtime": 94659.8542, |
| "train_tokens_per_second": 7164.593 |
| }, |
| { |
| "epoch": 2.7888537389782595, |
| "grad_norm": 0.91796875, |
| "learning_rate": 1.1095457298596598e-05, |
| "loss": 3.8205, |
| "num_input_tokens_seen": 678854656, |
| "step": 10360, |
| "train_runtime": 94752.2918, |
| "train_tokens_per_second": 7164.52 |
| }, |
| { |
| "epoch": 2.79154607255839, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.1050207055623182e-05, |
| "loss": 3.9934, |
| "num_input_tokens_seen": 679510016, |
| "step": 10370, |
| "train_runtime": 94843.5667, |
| "train_tokens_per_second": 7164.535 |
| }, |
| { |
| "epoch": 2.7942384061385206, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.100502307980074e-05, |
| "loss": 3.9097, |
| "num_input_tokens_seen": 680165376, |
| "step": 10380, |
| "train_runtime": 94935.0022, |
| "train_tokens_per_second": 7164.537 |
| }, |
| { |
| "epoch": 2.7969307397186514, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.0959905585771435e-05, |
| "loss": 3.9756, |
| "num_input_tokens_seen": 680820736, |
| "step": 10390, |
| "train_runtime": 95026.9066, |
| "train_tokens_per_second": 7164.505 |
| }, |
| { |
| "epoch": 2.7996230732987817, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.0914854787861579e-05, |
| "loss": 3.9535, |
| "num_input_tokens_seen": 681476096, |
| "step": 10400, |
| "train_runtime": 95118.9361, |
| "train_tokens_per_second": 7164.463 |
| }, |
| { |
| "epoch": 2.8023154068789125, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.086987090008066e-05, |
| "loss": 3.8648, |
| "num_input_tokens_seen": 682131456, |
| "step": 10410, |
| "train_runtime": 95210.7981, |
| "train_tokens_per_second": 7164.434 |
| }, |
| { |
| "epoch": 2.8050077404590428, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.082495413612031e-05, |
| "loss": 3.8493, |
| "num_input_tokens_seen": 682786816, |
| "step": 10420, |
| "train_runtime": 95302.6719, |
| "train_tokens_per_second": 7164.404 |
| }, |
| { |
| "epoch": 2.8077000740391735, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.0780104709353306e-05, |
| "loss": 4.052, |
| "num_input_tokens_seen": 683442176, |
| "step": 10430, |
| "train_runtime": 95394.912, |
| "train_tokens_per_second": 7164.346 |
| }, |
| { |
| "epoch": 2.810392407619304, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.0735322832832534e-05, |
| "loss": 3.9188, |
| "num_input_tokens_seen": 684097536, |
| "step": 10440, |
| "train_runtime": 95486.7033, |
| "train_tokens_per_second": 7164.322 |
| }, |
| { |
| "epoch": 2.8130847411994346, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.0690608719290002e-05, |
| "loss": 3.9258, |
| "num_input_tokens_seen": 684752896, |
| "step": 10450, |
| "train_runtime": 95578.2682, |
| "train_tokens_per_second": 7164.316 |
| }, |
| { |
| "epoch": 2.8157770747795654, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.0645962581135807e-05, |
| "loss": 3.9355, |
| "num_input_tokens_seen": 685408256, |
| "step": 10460, |
| "train_runtime": 95670.2538, |
| "train_tokens_per_second": 7164.278 |
| }, |
| { |
| "epoch": 2.8184694083596957, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.0601384630457139e-05, |
| "loss": 3.9595, |
| "num_input_tokens_seen": 686063616, |
| "step": 10470, |
| "train_runtime": 95761.844, |
| "train_tokens_per_second": 7164.269 |
| }, |
| { |
| "epoch": 2.8211617419398265, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.055687507901726e-05, |
| "loss": 3.8596, |
| "num_input_tokens_seen": 686718976, |
| "step": 10480, |
| "train_runtime": 95854.3168, |
| "train_tokens_per_second": 7164.195 |
| }, |
| { |
| "epoch": 2.823854075519957, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.0512434138254543e-05, |
| "loss": 3.9505, |
| "num_input_tokens_seen": 687374336, |
| "step": 10490, |
| "train_runtime": 95946.6439, |
| "train_tokens_per_second": 7164.131 |
| }, |
| { |
| "epoch": 2.8265464091000876, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.0468062019281375e-05, |
| "loss": 3.9638, |
| "num_input_tokens_seen": 688029696, |
| "step": 10500, |
| "train_runtime": 96038.3957, |
| "train_tokens_per_second": 7164.111 |
| }, |
| { |
| "epoch": 2.829238742680218, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.0423758932883274e-05, |
| "loss": 3.9239, |
| "num_input_tokens_seen": 688685056, |
| "step": 10510, |
| "train_runtime": 96147.6978, |
| "train_tokens_per_second": 7162.783 |
| }, |
| { |
| "epoch": 2.8319310762603487, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.0379525089517762e-05, |
| "loss": 4.0036, |
| "num_input_tokens_seen": 689340416, |
| "step": 10520, |
| "train_runtime": 96239.3496, |
| "train_tokens_per_second": 7162.771 |
| }, |
| { |
| "epoch": 2.8346234098404794, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.0335360699313488e-05, |
| "loss": 3.996, |
| "num_input_tokens_seen": 689995776, |
| "step": 10530, |
| "train_runtime": 96331.657, |
| "train_tokens_per_second": 7162.711 |
| }, |
| { |
| "epoch": 2.8373157434206098, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0291265972069136e-05, |
| "loss": 3.9246, |
| "num_input_tokens_seen": 690651136, |
| "step": 10540, |
| "train_runtime": 96423.5509, |
| "train_tokens_per_second": 7162.681 |
| }, |
| { |
| "epoch": 2.8400080770007405, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.024724111725247e-05, |
| "loss": 3.8981, |
| "num_input_tokens_seen": 691306496, |
| "step": 10550, |
| "train_runtime": 96515.4576, |
| "train_tokens_per_second": 7162.651 |
| }, |
| { |
| "epoch": 2.842700410580871, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.0203286343999336e-05, |
| "loss": 3.8913, |
| "num_input_tokens_seen": 691961856, |
| "step": 10560, |
| "train_runtime": 96608.1186, |
| "train_tokens_per_second": 7162.564 |
| }, |
| { |
| "epoch": 2.8453927441610016, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.0159401861112652e-05, |
| "loss": 3.9764, |
| "num_input_tokens_seen": 692617216, |
| "step": 10570, |
| "train_runtime": 96700.1482, |
| "train_tokens_per_second": 7162.525 |
| }, |
| { |
| "epoch": 2.848085077741132, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.0115587877061447e-05, |
| "loss": 3.848, |
| "num_input_tokens_seen": 693272576, |
| "step": 10580, |
| "train_runtime": 96792.4871, |
| "train_tokens_per_second": 7162.463 |
| }, |
| { |
| "epoch": 2.8507774113212627, |
| "grad_norm": 0.9140625, |
| "learning_rate": 1.0071844599979838e-05, |
| "loss": 3.9022, |
| "num_input_tokens_seen": 693927936, |
| "step": 10590, |
| "train_runtime": 96884.7813, |
| "train_tokens_per_second": 7162.404 |
| }, |
| { |
| "epoch": 2.8534697449013935, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.0028172237666061e-05, |
| "loss": 3.9427, |
| "num_input_tokens_seen": 694583296, |
| "step": 10600, |
| "train_runtime": 96977.1597, |
| "train_tokens_per_second": 7162.339 |
| }, |
| { |
| "epoch": 2.856162078481524, |
| "grad_norm": 0.99609375, |
| "learning_rate": 9.984570997581475e-06, |
| "loss": 3.9405, |
| "num_input_tokens_seen": 695238656, |
| "step": 10610, |
| "train_runtime": 97069.4636, |
| "train_tokens_per_second": 7162.28 |
| }, |
| { |
| "epoch": 2.8588544120616546, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.94104108684957e-06, |
| "loss": 3.8885, |
| "num_input_tokens_seen": 695894016, |
| "step": 10620, |
| "train_runtime": 97161.1311, |
| "train_tokens_per_second": 7162.268 |
| }, |
| { |
| "epoch": 2.861546745641785, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.897582712255037e-06, |
| "loss": 4.0121, |
| "num_input_tokens_seen": 696549376, |
| "step": 10630, |
| "train_runtime": 97253.0394, |
| "train_tokens_per_second": 7162.238 |
| }, |
| { |
| "epoch": 2.8642390792219157, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.854196080242672e-06, |
| "loss": 4.0188, |
| "num_input_tokens_seen": 697204736, |
| "step": 10640, |
| "train_runtime": 97345.146, |
| "train_tokens_per_second": 7162.193 |
| }, |
| { |
| "epoch": 2.866931412802046, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.810881396916535e-06, |
| "loss": 4.0005, |
| "num_input_tokens_seen": 697860096, |
| "step": 10650, |
| "train_runtime": 97436.9304, |
| "train_tokens_per_second": 7162.172 |
| }, |
| { |
| "epoch": 2.8696237463821768, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.76763886803883e-06, |
| "loss": 3.8468, |
| "num_input_tokens_seen": 698515456, |
| "step": 10660, |
| "train_runtime": 97528.7654, |
| "train_tokens_per_second": 7162.148 |
| }, |
| { |
| "epoch": 2.8723160799623075, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.724468699029068e-06, |
| "loss": 3.9389, |
| "num_input_tokens_seen": 699170816, |
| "step": 10670, |
| "train_runtime": 97621.254, |
| "train_tokens_per_second": 7162.076 |
| }, |
| { |
| "epoch": 2.875008413542438, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.681371094962974e-06, |
| "loss": 3.9357, |
| "num_input_tokens_seen": 699826176, |
| "step": 10680, |
| "train_runtime": 97713.0596, |
| "train_tokens_per_second": 7162.054 |
| }, |
| { |
| "epoch": 2.8777007471225686, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.638346260571576e-06, |
| "loss": 3.9306, |
| "num_input_tokens_seen": 700481536, |
| "step": 10690, |
| "train_runtime": 97805.1444, |
| "train_tokens_per_second": 7162.011 |
| }, |
| { |
| "epoch": 2.880393080702699, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.595394400240218e-06, |
| "loss": 3.9851, |
| "num_input_tokens_seen": 701136896, |
| "step": 10700, |
| "train_runtime": 97897.8106, |
| "train_tokens_per_second": 7161.926 |
| }, |
| { |
| "epoch": 2.8830854142828297, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.552515718007584e-06, |
| "loss": 3.9531, |
| "num_input_tokens_seen": 701792256, |
| "step": 10710, |
| "train_runtime": 97990.2175, |
| "train_tokens_per_second": 7161.86 |
| }, |
| { |
| "epoch": 2.88577774786296, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.509710417564738e-06, |
| "loss": 3.9513, |
| "num_input_tokens_seen": 702447616, |
| "step": 10720, |
| "train_runtime": 98081.4876, |
| "train_tokens_per_second": 7161.878 |
| }, |
| { |
| "epoch": 2.888470081443091, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.466978702254136e-06, |
| "loss": 3.9174, |
| "num_input_tokens_seen": 703102976, |
| "step": 10730, |
| "train_runtime": 98174.2376, |
| "train_tokens_per_second": 7161.787 |
| }, |
| { |
| "epoch": 2.8911624150232216, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.424320775068698e-06, |
| "loss": 3.9466, |
| "num_input_tokens_seen": 703758336, |
| "step": 10740, |
| "train_runtime": 98266.7376, |
| "train_tokens_per_second": 7161.715 |
| }, |
| { |
| "epoch": 2.893854748603352, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.381736838650801e-06, |
| "loss": 3.9308, |
| "num_input_tokens_seen": 704413696, |
| "step": 10750, |
| "train_runtime": 98358.7673, |
| "train_tokens_per_second": 7161.677 |
| }, |
| { |
| "epoch": 2.8965470821834827, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.339227095291336e-06, |
| "loss": 3.9239, |
| "num_input_tokens_seen": 705069056, |
| "step": 10760, |
| "train_runtime": 98450.4054, |
| "train_tokens_per_second": 7161.667 |
| }, |
| { |
| "epoch": 2.899239415763613, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.296791746928782e-06, |
| "loss": 3.9094, |
| "num_input_tokens_seen": 705724416, |
| "step": 10770, |
| "train_runtime": 98542.8347, |
| "train_tokens_per_second": 7161.601 |
| }, |
| { |
| "epoch": 2.9019317493437438, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.254430995148147e-06, |
| "loss": 3.819, |
| "num_input_tokens_seen": 706379776, |
| "step": 10780, |
| "train_runtime": 98634.8775, |
| "train_tokens_per_second": 7161.562 |
| }, |
| { |
| "epoch": 2.904624082923874, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.212145041180146e-06, |
| "loss": 3.9131, |
| "num_input_tokens_seen": 707035136, |
| "step": 10790, |
| "train_runtime": 98727.3024, |
| "train_tokens_per_second": 7161.496 |
| }, |
| { |
| "epoch": 2.907316416504005, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.169934085900108e-06, |
| "loss": 3.931, |
| "num_input_tokens_seen": 707690496, |
| "step": 10800, |
| "train_runtime": 98819.7569, |
| "train_tokens_per_second": 7161.427 |
| }, |
| { |
| "epoch": 2.9100087500841356, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.127798329827144e-06, |
| "loss": 3.8224, |
| "num_input_tokens_seen": 708345856, |
| "step": 10810, |
| "train_runtime": 98911.3629, |
| "train_tokens_per_second": 7161.42 |
| }, |
| { |
| "epoch": 2.912701083664266, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.08573797312311e-06, |
| "loss": 3.9507, |
| "num_input_tokens_seen": 709001216, |
| "step": 10820, |
| "train_runtime": 99003.7864, |
| "train_tokens_per_second": 7161.355 |
| }, |
| { |
| "epoch": 2.9153934172443967, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.043753215591685e-06, |
| "loss": 3.8651, |
| "num_input_tokens_seen": 709656576, |
| "step": 10830, |
| "train_runtime": 99095.6505, |
| "train_tokens_per_second": 7161.329 |
| }, |
| { |
| "epoch": 2.918085750824527, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.001844256677427e-06, |
| "loss": 3.9426, |
| "num_input_tokens_seen": 710311936, |
| "step": 10840, |
| "train_runtime": 99188.0519, |
| "train_tokens_per_second": 7161.265 |
| }, |
| { |
| "epoch": 2.920778084404658, |
| "grad_norm": 0.90234375, |
| "learning_rate": 8.960011295464815e-06, |
| "loss": 3.9152, |
| "num_input_tokens_seen": 710967296, |
| "step": 10850, |
| "train_runtime": 99280.5448, |
| "train_tokens_per_second": 7161.195 |
| }, |
| { |
| "epoch": 2.923470417984788, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.918254530677317e-06, |
| "loss": 3.9429, |
| "num_input_tokens_seen": 711622656, |
| "step": 10860, |
| "train_runtime": 99372.7163, |
| "train_tokens_per_second": 7161.147 |
| }, |
| { |
| "epoch": 2.926162751564919, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.876574160676432e-06, |
| "loss": 3.9166, |
| "num_input_tokens_seen": 712278016, |
| "step": 10870, |
| "train_runtime": 99464.7853, |
| "train_tokens_per_second": 7161.107 |
| }, |
| { |
| "epoch": 2.9288550851450497, |
| "grad_norm": 0.95703125, |
| "learning_rate": 8.834970383460738e-06, |
| "loss": 4.0155, |
| "num_input_tokens_seen": 712933376, |
| "step": 10880, |
| "train_runtime": 99556.9298, |
| "train_tokens_per_second": 7161.062 |
| }, |
| { |
| "epoch": 2.93154741872518, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.79344339666501e-06, |
| "loss": 3.8929, |
| "num_input_tokens_seen": 713588736, |
| "step": 10890, |
| "train_runtime": 99649.0027, |
| "train_tokens_per_second": 7161.022 |
| }, |
| { |
| "epoch": 2.9342397523053108, |
| "grad_norm": 0.9921875, |
| "learning_rate": 8.751993397559177e-06, |
| "loss": 3.8957, |
| "num_input_tokens_seen": 714244096, |
| "step": 10900, |
| "train_runtime": 99741.2487, |
| "train_tokens_per_second": 7160.97 |
| }, |
| { |
| "epoch": 2.936932085885441, |
| "grad_norm": 0.97265625, |
| "learning_rate": 8.71062058304751e-06, |
| "loss": 3.8997, |
| "num_input_tokens_seen": 714899456, |
| "step": 10910, |
| "train_runtime": 99833.4505, |
| "train_tokens_per_second": 7160.921 |
| }, |
| { |
| "epoch": 2.939624419465572, |
| "grad_norm": 0.98828125, |
| "learning_rate": 8.66932514966755e-06, |
| "loss": 3.9571, |
| "num_input_tokens_seen": 715554816, |
| "step": 10920, |
| "train_runtime": 99925.6375, |
| "train_tokens_per_second": 7160.873 |
| }, |
| { |
| "epoch": 2.942316753045702, |
| "grad_norm": 0.96875, |
| "learning_rate": 8.628107293589326e-06, |
| "loss": 3.9485, |
| "num_input_tokens_seen": 716210176, |
| "step": 10930, |
| "train_runtime": 100018.0382, |
| "train_tokens_per_second": 7160.81 |
| }, |
| { |
| "epoch": 2.945009086625833, |
| "grad_norm": 0.99609375, |
| "learning_rate": 8.586967210614267e-06, |
| "loss": 3.8976, |
| "num_input_tokens_seen": 716865536, |
| "step": 10940, |
| "train_runtime": 100109.7805, |
| "train_tokens_per_second": 7160.794 |
| }, |
| { |
| "epoch": 2.9477014202059637, |
| "grad_norm": 0.9609375, |
| "learning_rate": 8.545905096174409e-06, |
| "loss": 3.9325, |
| "num_input_tokens_seen": 717520896, |
| "step": 10950, |
| "train_runtime": 100202.1004, |
| "train_tokens_per_second": 7160.737 |
| }, |
| { |
| "epoch": 2.950393753786094, |
| "grad_norm": 0.98046875, |
| "learning_rate": 8.504921145331372e-06, |
| "loss": 3.9919, |
| "num_input_tokens_seen": 718176256, |
| "step": 10960, |
| "train_runtime": 100293.9173, |
| "train_tokens_per_second": 7160.716 |
| }, |
| { |
| "epoch": 2.953086087366225, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.464015552775473e-06, |
| "loss": 3.9009, |
| "num_input_tokens_seen": 718831616, |
| "step": 10970, |
| "train_runtime": 100385.6555, |
| "train_tokens_per_second": 7160.701 |
| }, |
| { |
| "epoch": 2.955778420946355, |
| "grad_norm": 0.95703125, |
| "learning_rate": 8.423188512824807e-06, |
| "loss": 3.7904, |
| "num_input_tokens_seen": 719486976, |
| "step": 10980, |
| "train_runtime": 100477.6356, |
| "train_tokens_per_second": 7160.668 |
| }, |
| { |
| "epoch": 2.958470754526486, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.382440219424297e-06, |
| "loss": 3.8862, |
| "num_input_tokens_seen": 720142336, |
| "step": 10990, |
| "train_runtime": 100569.8027, |
| "train_tokens_per_second": 7160.622 |
| }, |
| { |
| "epoch": 2.961163088106616, |
| "grad_norm": 0.94921875, |
| "learning_rate": 8.341770866144799e-06, |
| "loss": 4.0179, |
| "num_input_tokens_seen": 720797696, |
| "step": 11000, |
| "train_runtime": 100661.6627, |
| "train_tokens_per_second": 7160.598 |
| }, |
| { |
| "epoch": 2.963855421686747, |
| "grad_norm": 0.97265625, |
| "learning_rate": 8.301180646182169e-06, |
| "loss": 3.8564, |
| "num_input_tokens_seen": 721453056, |
| "step": 11010, |
| "train_runtime": 100773.8193, |
| "train_tokens_per_second": 7159.132 |
| }, |
| { |
| "epoch": 2.9665477552668778, |
| "grad_norm": 0.94921875, |
| "learning_rate": 8.260669752356337e-06, |
| "loss": 3.8763, |
| "num_input_tokens_seen": 722108416, |
| "step": 11020, |
| "train_runtime": 100865.0354, |
| "train_tokens_per_second": 7159.155 |
| }, |
| { |
| "epoch": 2.969240088847008, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.220238377110434e-06, |
| "loss": 3.8952, |
| "num_input_tokens_seen": 722763776, |
| "step": 11030, |
| "train_runtime": 100957.4463, |
| "train_tokens_per_second": 7159.093 |
| }, |
| { |
| "epoch": 2.971932422427139, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.179886712509796e-06, |
| "loss": 3.9574, |
| "num_input_tokens_seen": 723419136, |
| "step": 11040, |
| "train_runtime": 101049.1864, |
| "train_tokens_per_second": 7159.079 |
| }, |
| { |
| "epoch": 2.974624756007269, |
| "grad_norm": 0.94921875, |
| "learning_rate": 8.139614950241156e-06, |
| "loss": 3.9683, |
| "num_input_tokens_seen": 724074496, |
| "step": 11050, |
| "train_runtime": 101141.4786, |
| "train_tokens_per_second": 7159.026 |
| }, |
| { |
| "epoch": 2.9773170895874, |
| "grad_norm": 0.97265625, |
| "learning_rate": 8.099423281611621e-06, |
| "loss": 4.0283, |
| "num_input_tokens_seen": 724729856, |
| "step": 11060, |
| "train_runtime": 101233.3511, |
| "train_tokens_per_second": 7159.003 |
| }, |
| { |
| "epoch": 2.9800094231675303, |
| "grad_norm": 0.9609375, |
| "learning_rate": 8.05931189754788e-06, |
| "loss": 3.8768, |
| "num_input_tokens_seen": 725385216, |
| "step": 11070, |
| "train_runtime": 101325.3837, |
| "train_tokens_per_second": 7158.968 |
| }, |
| { |
| "epoch": 2.982701756747661, |
| "grad_norm": 0.95703125, |
| "learning_rate": 8.019280988595182e-06, |
| "loss": 3.9215, |
| "num_input_tokens_seen": 726040576, |
| "step": 11080, |
| "train_runtime": 101417.6096, |
| "train_tokens_per_second": 7158.92 |
| }, |
| { |
| "epoch": 2.985394090327792, |
| "grad_norm": 0.92578125, |
| "learning_rate": 7.979330744916536e-06, |
| "loss": 3.9027, |
| "num_input_tokens_seen": 726695936, |
| "step": 11090, |
| "train_runtime": 101509.4109, |
| "train_tokens_per_second": 7158.902 |
| }, |
| { |
| "epoch": 2.988086423907922, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.939461356291722e-06, |
| "loss": 3.7375, |
| "num_input_tokens_seen": 727351296, |
| "step": 11100, |
| "train_runtime": 101601.1555, |
| "train_tokens_per_second": 7158.888 |
| }, |
| { |
| "epoch": 2.990778757488053, |
| "grad_norm": 0.92578125, |
| "learning_rate": 7.899673012116448e-06, |
| "loss": 3.9314, |
| "num_input_tokens_seen": 728006656, |
| "step": 11110, |
| "train_runtime": 101693.7993, |
| "train_tokens_per_second": 7158.811 |
| }, |
| { |
| "epoch": 2.993471091068183, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.859965901401417e-06, |
| "loss": 3.9704, |
| "num_input_tokens_seen": 728662016, |
| "step": 11120, |
| "train_runtime": 101785.7077, |
| "train_tokens_per_second": 7158.785 |
| }, |
| { |
| "epoch": 2.996163424648314, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.82034021277144e-06, |
| "loss": 3.9414, |
| "num_input_tokens_seen": 729317376, |
| "step": 11130, |
| "train_runtime": 101877.9311, |
| "train_tokens_per_second": 7158.738 |
| }, |
| { |
| "epoch": 2.9988557582284443, |
| "grad_norm": 0.98828125, |
| "learning_rate": 7.780796134464547e-06, |
| "loss": 3.8961, |
| "num_input_tokens_seen": 729972736, |
| "step": 11140, |
| "train_runtime": 101970.4027, |
| "train_tokens_per_second": 7158.673 |
| }, |
| { |
| "epoch": 3.001346166790065, |
| "grad_norm": 0.953125, |
| "learning_rate": 7.741333854331082e-06, |
| "loss": 3.9306, |
| "num_input_tokens_seen": 730578944, |
| "step": 11150, |
| "train_runtime": 102055.0045, |
| "train_tokens_per_second": 7158.678 |
| }, |
| { |
| "epoch": 3.004038500370196, |
| "grad_norm": 0.9296875, |
| "learning_rate": 7.701953559832803e-06, |
| "loss": 3.957, |
| "num_input_tokens_seen": 731234304, |
| "step": 11160, |
| "train_runtime": 102147.0284, |
| "train_tokens_per_second": 7158.645 |
| }, |
| { |
| "epoch": 3.0067308339503263, |
| "grad_norm": 0.94140625, |
| "learning_rate": 7.662655438042046e-06, |
| "loss": 3.9666, |
| "num_input_tokens_seen": 731889664, |
| "step": 11170, |
| "train_runtime": 102239.7093, |
| "train_tokens_per_second": 7158.566 |
| }, |
| { |
| "epoch": 3.009423167530457, |
| "grad_norm": 0.9375, |
| "learning_rate": 7.623439675640726e-06, |
| "loss": 3.872, |
| "num_input_tokens_seen": 732545024, |
| "step": 11180, |
| "train_runtime": 102331.4232, |
| "train_tokens_per_second": 7158.554 |
| }, |
| { |
| "epoch": 3.012115501110588, |
| "grad_norm": 0.98046875, |
| "learning_rate": 7.5843064589195854e-06, |
| "loss": 3.8952, |
| "num_input_tokens_seen": 733200384, |
| "step": 11190, |
| "train_runtime": 102423.6666, |
| "train_tokens_per_second": 7158.506 |
| }, |
| { |
| "epoch": 3.014807834690718, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.54525597377718e-06, |
| "loss": 3.8343, |
| "num_input_tokens_seen": 733855744, |
| "step": 11200, |
| "train_runtime": 102515.5914, |
| "train_tokens_per_second": 7158.479 |
| }, |
| { |
| "epoch": 3.017500168270849, |
| "grad_norm": 0.92578125, |
| "learning_rate": 7.506288405719111e-06, |
| "loss": 3.971, |
| "num_input_tokens_seen": 734511104, |
| "step": 11210, |
| "train_runtime": 102607.6931, |
| "train_tokens_per_second": 7158.441 |
| }, |
| { |
| "epoch": 3.020192501850979, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.467403939857057e-06, |
| "loss": 3.9035, |
| "num_input_tokens_seen": 735166464, |
| "step": 11220, |
| "train_runtime": 102700.0139, |
| "train_tokens_per_second": 7158.387 |
| }, |
| { |
| "epoch": 3.02288483543111, |
| "grad_norm": 0.99609375, |
| "learning_rate": 7.428602760907941e-06, |
| "loss": 3.9609, |
| "num_input_tokens_seen": 735821824, |
| "step": 11230, |
| "train_runtime": 102791.8887, |
| "train_tokens_per_second": 7158.365 |
| }, |
| { |
| "epoch": 3.0255771690112403, |
| "grad_norm": 0.94140625, |
| "learning_rate": 7.3898850531930296e-06, |
| "loss": 3.824, |
| "num_input_tokens_seen": 736477184, |
| "step": 11240, |
| "train_runtime": 102883.9877, |
| "train_tokens_per_second": 7158.327 |
| }, |
| { |
| "epoch": 3.028269502591371, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.351251000637074e-06, |
| "loss": 3.918, |
| "num_input_tokens_seen": 737132544, |
| "step": 11250, |
| "train_runtime": 102976.1963, |
| "train_tokens_per_second": 7158.281 |
| }, |
| { |
| "epoch": 3.030961836171502, |
| "grad_norm": 0.9296875, |
| "learning_rate": 7.312700786767434e-06, |
| "loss": 3.9329, |
| "num_input_tokens_seen": 737787904, |
| "step": 11260, |
| "train_runtime": 103068.404, |
| "train_tokens_per_second": 7158.235 |
| }, |
| { |
| "epoch": 3.033654169751632, |
| "grad_norm": 1.0078125, |
| "learning_rate": 7.274234594713192e-06, |
| "loss": 3.9396, |
| "num_input_tokens_seen": 738443264, |
| "step": 11270, |
| "train_runtime": 103160.4604, |
| "train_tokens_per_second": 7158.201 |
| }, |
| { |
| "epoch": 3.036346503331763, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.2358526072042884e-06, |
| "loss": 3.9039, |
| "num_input_tokens_seen": 739098624, |
| "step": 11280, |
| "train_runtime": 103252.2124, |
| "train_tokens_per_second": 7158.187 |
| }, |
| { |
| "epoch": 3.0390388369118932, |
| "grad_norm": 0.953125, |
| "learning_rate": 7.197555006570692e-06, |
| "loss": 3.8997, |
| "num_input_tokens_seen": 739753984, |
| "step": 11290, |
| "train_runtime": 103344.3731, |
| "train_tokens_per_second": 7158.145 |
| }, |
| { |
| "epoch": 3.041731170492024, |
| "grad_norm": 0.9453125, |
| "learning_rate": 7.159341974741443e-06, |
| "loss": 4.0359, |
| "num_input_tokens_seen": 740409344, |
| "step": 11300, |
| "train_runtime": 103436.4539, |
| "train_tokens_per_second": 7158.108 |
| }, |
| { |
| "epoch": 3.0444235040721543, |
| "grad_norm": 0.90625, |
| "learning_rate": 7.121213693243911e-06, |
| "loss": 3.8726, |
| "num_input_tokens_seen": 741064704, |
| "step": 11310, |
| "train_runtime": 103528.1729, |
| "train_tokens_per_second": 7158.097 |
| }, |
| { |
| "epoch": 3.047115837652285, |
| "grad_norm": 0.9375, |
| "learning_rate": 7.0831703432028e-06, |
| "loss": 3.8816, |
| "num_input_tokens_seen": 741720064, |
| "step": 11320, |
| "train_runtime": 103620.17, |
| "train_tokens_per_second": 7158.066 |
| }, |
| { |
| "epoch": 3.049808171232416, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.0452121053394214e-06, |
| "loss": 3.9057, |
| "num_input_tokens_seen": 742375424, |
| "step": 11330, |
| "train_runtime": 103712.2381, |
| "train_tokens_per_second": 7158.031 |
| }, |
| { |
| "epoch": 3.052500504812546, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.007339159970702e-06, |
| "loss": 3.8628, |
| "num_input_tokens_seen": 743030784, |
| "step": 11340, |
| "train_runtime": 103804.578, |
| "train_tokens_per_second": 7157.977 |
| }, |
| { |
| "epoch": 3.055192838392677, |
| "grad_norm": 0.95703125, |
| "learning_rate": 6.9695516870084575e-06, |
| "loss": 3.8919, |
| "num_input_tokens_seen": 743686144, |
| "step": 11350, |
| "train_runtime": 103896.1137, |
| "train_tokens_per_second": 7157.978 |
| }, |
| { |
| "epoch": 3.0578851719728073, |
| "grad_norm": 0.921875, |
| "learning_rate": 6.93184986595844e-06, |
| "loss": 3.9367, |
| "num_input_tokens_seen": 744341504, |
| "step": 11360, |
| "train_runtime": 103988.6753, |
| "train_tokens_per_second": 7157.909 |
| }, |
| { |
| "epoch": 3.060577505552938, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.894233875919523e-06, |
| "loss": 3.9588, |
| "num_input_tokens_seen": 744996864, |
| "step": 11370, |
| "train_runtime": 104080.7445, |
| "train_tokens_per_second": 7157.874 |
| }, |
| { |
| "epoch": 3.0632698391330684, |
| "grad_norm": 0.92578125, |
| "learning_rate": 6.856703895582858e-06, |
| "loss": 3.9219, |
| "num_input_tokens_seen": 745652224, |
| "step": 11380, |
| "train_runtime": 104172.4223, |
| "train_tokens_per_second": 7157.866 |
| }, |
| { |
| "epoch": 3.065962172713199, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.819260103231007e-06, |
| "loss": 3.9588, |
| "num_input_tokens_seen": 746307584, |
| "step": 11390, |
| "train_runtime": 104264.209, |
| "train_tokens_per_second": 7157.85 |
| }, |
| { |
| "epoch": 3.06865450629333, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.781902676737106e-06, |
| "loss": 3.9269, |
| "num_input_tokens_seen": 746962944, |
| "step": 11400, |
| "train_runtime": 104356.0904, |
| "train_tokens_per_second": 7157.828 |
| }, |
| { |
| "epoch": 3.0713468398734602, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.744631793564027e-06, |
| "loss": 3.9009, |
| "num_input_tokens_seen": 747618304, |
| "step": 11410, |
| "train_runtime": 104448.1552, |
| "train_tokens_per_second": 7157.793 |
| }, |
| { |
| "epoch": 3.074039173453591, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.707447630763505e-06, |
| "loss": 3.9709, |
| "num_input_tokens_seen": 748273664, |
| "step": 11420, |
| "train_runtime": 104539.7768, |
| "train_tokens_per_second": 7157.789 |
| }, |
| { |
| "epoch": 3.0767315070337213, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.670350364975358e-06, |
| "loss": 3.9597, |
| "num_input_tokens_seen": 748929024, |
| "step": 11430, |
| "train_runtime": 104631.99, |
| "train_tokens_per_second": 7157.744 |
| }, |
| { |
| "epoch": 3.079423840613852, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.633340172426552e-06, |
| "loss": 3.9172, |
| "num_input_tokens_seen": 749584384, |
| "step": 11440, |
| "train_runtime": 104724.0749, |
| "train_tokens_per_second": 7157.708 |
| }, |
| { |
| "epoch": 3.0821161741939824, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.596417228930482e-06, |
| "loss": 3.9498, |
| "num_input_tokens_seen": 750239744, |
| "step": 11450, |
| "train_runtime": 104816.6624, |
| "train_tokens_per_second": 7157.638 |
| }, |
| { |
| "epoch": 3.084808507774113, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.5595817098860095e-06, |
| "loss": 3.879, |
| "num_input_tokens_seen": 750895104, |
| "step": 11460, |
| "train_runtime": 104908.4207, |
| "train_tokens_per_second": 7157.625 |
| }, |
| { |
| "epoch": 3.087500841354244, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.522833790276761e-06, |
| "loss": 3.9198, |
| "num_input_tokens_seen": 751550464, |
| "step": 11470, |
| "train_runtime": 105000.5214, |
| "train_tokens_per_second": 7157.588 |
| }, |
| { |
| "epoch": 3.0901931749343743, |
| "grad_norm": 0.94140625, |
| "learning_rate": 6.486173644670169e-06, |
| "loss": 4.0222, |
| "num_input_tokens_seen": 752205824, |
| "step": 11480, |
| "train_runtime": 105092.6597, |
| "train_tokens_per_second": 7157.549 |
| }, |
| { |
| "epoch": 3.092885508514505, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.449601447216752e-06, |
| "loss": 3.8615, |
| "num_input_tokens_seen": 752861184, |
| "step": 11490, |
| "train_runtime": 105184.1985, |
| "train_tokens_per_second": 7157.55 |
| }, |
| { |
| "epoch": 3.0955778420946354, |
| "grad_norm": 0.94921875, |
| "learning_rate": 6.413117371649216e-06, |
| "loss": 3.9926, |
| "num_input_tokens_seen": 753516544, |
| "step": 11500, |
| "train_runtime": 105276.9824, |
| "train_tokens_per_second": 7157.467 |
| }, |
| { |
| "epoch": 3.098270175674766, |
| "grad_norm": 0.95703125, |
| "learning_rate": 6.376721591281651e-06, |
| "loss": 4.0003, |
| "num_input_tokens_seen": 754171904, |
| "step": 11510, |
| "train_runtime": 105386.85, |
| "train_tokens_per_second": 7156.224 |
| }, |
| { |
| "epoch": 3.1009625092548965, |
| "grad_norm": 1.0625, |
| "learning_rate": 6.340414279008719e-06, |
| "loss": 3.9484, |
| "num_input_tokens_seen": 754827264, |
| "step": 11520, |
| "train_runtime": 105478.7999, |
| "train_tokens_per_second": 7156.199 |
| }, |
| { |
| "epoch": 3.1036548428350272, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.304195607304819e-06, |
| "loss": 3.9393, |
| "num_input_tokens_seen": 755482624, |
| "step": 11530, |
| "train_runtime": 105571.1372, |
| "train_tokens_per_second": 7156.147 |
| }, |
| { |
| "epoch": 3.106347176415158, |
| "grad_norm": 0.9453125, |
| "learning_rate": 6.268065748223268e-06, |
| "loss": 3.9351, |
| "num_input_tokens_seen": 756137984, |
| "step": 11540, |
| "train_runtime": 105662.9611, |
| "train_tokens_per_second": 7156.131 |
| }, |
| { |
| "epoch": 3.1090395099952883, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.2320248733954896e-06, |
| "loss": 3.8764, |
| "num_input_tokens_seen": 756793344, |
| "step": 11550, |
| "train_runtime": 105755.0931, |
| "train_tokens_per_second": 7156.094 |
| }, |
| { |
| "epoch": 3.111731843575419, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.1960731540301905e-06, |
| "loss": 3.9, |
| "num_input_tokens_seen": 757448704, |
| "step": 11560, |
| "train_runtime": 105848.1538, |
| "train_tokens_per_second": 7155.994 |
| }, |
| { |
| "epoch": 3.1144241771555494, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.16021076091258e-06, |
| "loss": 4.0642, |
| "num_input_tokens_seen": 758104064, |
| "step": 11570, |
| "train_runtime": 105940.0057, |
| "train_tokens_per_second": 7155.975 |
| }, |
| { |
| "epoch": 3.11711651073568, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.1244378644034845e-06, |
| "loss": 3.8746, |
| "num_input_tokens_seen": 758759424, |
| "step": 11580, |
| "train_runtime": 106032.0039, |
| "train_tokens_per_second": 7155.947 |
| }, |
| { |
| "epoch": 3.1198088443158105, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.088754634438637e-06, |
| "loss": 3.9193, |
| "num_input_tokens_seen": 759414784, |
| "step": 11590, |
| "train_runtime": 106123.8501, |
| "train_tokens_per_second": 7155.929 |
| }, |
| { |
| "epoch": 3.1225011778959413, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.053161240527766e-06, |
| "loss": 3.9818, |
| "num_input_tokens_seen": 760070144, |
| "step": 11600, |
| "train_runtime": 106215.7767, |
| "train_tokens_per_second": 7155.906 |
| }, |
| { |
| "epoch": 3.125193511476072, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.017657851753891e-06, |
| "loss": 3.9894, |
| "num_input_tokens_seen": 760725504, |
| "step": 11610, |
| "train_runtime": 106307.9492, |
| "train_tokens_per_second": 7155.867 |
| }, |
| { |
| "epoch": 3.1278858450562024, |
| "grad_norm": 0.9375, |
| "learning_rate": 5.982244636772441e-06, |
| "loss": 3.8536, |
| "num_input_tokens_seen": 761380864, |
| "step": 11620, |
| "train_runtime": 106399.9187, |
| "train_tokens_per_second": 7155.841 |
| }, |
| { |
| "epoch": 3.130578178636333, |
| "grad_norm": 0.94140625, |
| "learning_rate": 5.9469217638104894e-06, |
| "loss": 3.9471, |
| "num_input_tokens_seen": 762036224, |
| "step": 11630, |
| "train_runtime": 106492.0655, |
| "train_tokens_per_second": 7155.803 |
| }, |
| { |
| "epoch": 3.1332705122164635, |
| "grad_norm": 0.9296875, |
| "learning_rate": 5.911689400665954e-06, |
| "loss": 3.9905, |
| "num_input_tokens_seen": 762691584, |
| "step": 11640, |
| "train_runtime": 106584.3106, |
| "train_tokens_per_second": 7155.758 |
| }, |
| { |
| "epoch": 3.1359628457965942, |
| "grad_norm": 0.99609375, |
| "learning_rate": 5.876547714706787e-06, |
| "loss": 3.9328, |
| "num_input_tokens_seen": 763346944, |
| "step": 11650, |
| "train_runtime": 106676.6118, |
| "train_tokens_per_second": 7155.71 |
| }, |
| { |
| "epoch": 3.138655179376725, |
| "grad_norm": 0.9609375, |
| "learning_rate": 5.841496872870192e-06, |
| "loss": 3.8978, |
| "num_input_tokens_seen": 764002304, |
| "step": 11660, |
| "train_runtime": 106769.0125, |
| "train_tokens_per_second": 7155.656 |
| }, |
| { |
| "epoch": 3.1413475129568553, |
| "grad_norm": 0.94140625, |
| "learning_rate": 5.806537041661828e-06, |
| "loss": 3.953, |
| "num_input_tokens_seen": 764657664, |
| "step": 11670, |
| "train_runtime": 106861.4721, |
| "train_tokens_per_second": 7155.597 |
| }, |
| { |
| "epoch": 3.144039846536986, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.771668387155002e-06, |
| "loss": 3.9492, |
| "num_input_tokens_seen": 765313024, |
| "step": 11680, |
| "train_runtime": 106953.7644, |
| "train_tokens_per_second": 7155.55 |
| }, |
| { |
| "epoch": 3.1467321801171164, |
| "grad_norm": 1.0, |
| "learning_rate": 5.7368910749899305e-06, |
| "loss": 3.9586, |
| "num_input_tokens_seen": 765968384, |
| "step": 11690, |
| "train_runtime": 107045.9565, |
| "train_tokens_per_second": 7155.51 |
| }, |
| { |
| "epoch": 3.149424513697247, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.702205270372868e-06, |
| "loss": 3.9319, |
| "num_input_tokens_seen": 766623744, |
| "step": 11700, |
| "train_runtime": 107137.7942, |
| "train_tokens_per_second": 7155.493 |
| }, |
| { |
| "epoch": 3.1521168472773775, |
| "grad_norm": 0.97265625, |
| "learning_rate": 5.667611138075418e-06, |
| "loss": 3.9963, |
| "num_input_tokens_seen": 767279104, |
| "step": 11710, |
| "train_runtime": 107230.4527, |
| "train_tokens_per_second": 7155.422 |
| }, |
| { |
| "epoch": 3.1548091808575083, |
| "grad_norm": 0.92578125, |
| "learning_rate": 5.63310884243366e-06, |
| "loss": 3.9246, |
| "num_input_tokens_seen": 767934464, |
| "step": 11720, |
| "train_runtime": 107322.4378, |
| "train_tokens_per_second": 7155.395 |
| }, |
| { |
| "epoch": 3.157501514437639, |
| "grad_norm": 0.93359375, |
| "learning_rate": 5.598698547347458e-06, |
| "loss": 4.0042, |
| "num_input_tokens_seen": 768589824, |
| "step": 11730, |
| "train_runtime": 107414.9071, |
| "train_tokens_per_second": 7155.337 |
| }, |
| { |
| "epoch": 3.1601938480177694, |
| "grad_norm": 0.96875, |
| "learning_rate": 5.564380416279588e-06, |
| "loss": 3.8923, |
| "num_input_tokens_seen": 769245184, |
| "step": 11740, |
| "train_runtime": 107507.3025, |
| "train_tokens_per_second": 7155.283 |
| }, |
| { |
| "epoch": 3.1628861815979, |
| "grad_norm": 0.9296875, |
| "learning_rate": 5.530154612255054e-06, |
| "loss": 3.9094, |
| "num_input_tokens_seen": 769900544, |
| "step": 11750, |
| "train_runtime": 107599.2603, |
| "train_tokens_per_second": 7155.259 |
| }, |
| { |
| "epoch": 3.1655785151780305, |
| "grad_norm": 0.98828125, |
| "learning_rate": 5.496021297860237e-06, |
| "loss": 3.9055, |
| "num_input_tokens_seen": 770555904, |
| "step": 11760, |
| "train_runtime": 107691.7989, |
| "train_tokens_per_second": 7155.196 |
| }, |
| { |
| "epoch": 3.1682708487581612, |
| "grad_norm": 0.921875, |
| "learning_rate": 5.461980635242178e-06, |
| "loss": 3.9176, |
| "num_input_tokens_seen": 771211264, |
| "step": 11770, |
| "train_runtime": 107783.6916, |
| "train_tokens_per_second": 7155.176 |
| }, |
| { |
| "epoch": 3.1709631823382916, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.428032786107764e-06, |
| "loss": 3.9859, |
| "num_input_tokens_seen": 771866624, |
| "step": 11780, |
| "train_runtime": 107875.9842, |
| "train_tokens_per_second": 7155.129 |
| }, |
| { |
| "epoch": 3.1736555159184223, |
| "grad_norm": 0.93359375, |
| "learning_rate": 5.394177911722994e-06, |
| "loss": 3.8847, |
| "num_input_tokens_seen": 772521984, |
| "step": 11790, |
| "train_runtime": 107968.1245, |
| "train_tokens_per_second": 7155.093 |
| }, |
| { |
| "epoch": 3.176347849498553, |
| "grad_norm": 0.9375, |
| "learning_rate": 5.3604161729122e-06, |
| "loss": 4.0042, |
| "num_input_tokens_seen": 773177344, |
| "step": 11800, |
| "train_runtime": 108060.6872, |
| "train_tokens_per_second": 7155.029 |
| }, |
| { |
| "epoch": 3.1790401830786834, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.326747730057272e-06, |
| "loss": 3.96, |
| "num_input_tokens_seen": 773832704, |
| "step": 11810, |
| "train_runtime": 108152.2901, |
| "train_tokens_per_second": 7155.028 |
| }, |
| { |
| "epoch": 3.181732516658814, |
| "grad_norm": 0.94140625, |
| "learning_rate": 5.293172743096908e-06, |
| "loss": 3.9886, |
| "num_input_tokens_seen": 774488064, |
| "step": 11820, |
| "train_runtime": 108245.0968, |
| "train_tokens_per_second": 7154.948 |
| }, |
| { |
| "epoch": 3.1844248502389445, |
| "grad_norm": 0.9296875, |
| "learning_rate": 5.259691371525877e-06, |
| "loss": 3.9539, |
| "num_input_tokens_seen": 775143424, |
| "step": 11830, |
| "train_runtime": 108337.5367, |
| "train_tokens_per_second": 7154.892 |
| }, |
| { |
| "epoch": 3.1871171838190753, |
| "grad_norm": 0.92578125, |
| "learning_rate": 5.226303774394192e-06, |
| "loss": 3.8804, |
| "num_input_tokens_seen": 775798784, |
| "step": 11840, |
| "train_runtime": 108429.8092, |
| "train_tokens_per_second": 7154.848 |
| }, |
| { |
| "epoch": 3.1898095173992056, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.193010110306454e-06, |
| "loss": 3.9077, |
| "num_input_tokens_seen": 776454144, |
| "step": 11850, |
| "train_runtime": 108521.9051, |
| "train_tokens_per_second": 7154.815 |
| }, |
| { |
| "epoch": 3.1925018509793364, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.159810537420981e-06, |
| "loss": 3.9966, |
| "num_input_tokens_seen": 777109504, |
| "step": 11860, |
| "train_runtime": 108613.9934, |
| "train_tokens_per_second": 7154.783 |
| }, |
| { |
| "epoch": 3.195194184559467, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.12670521344919e-06, |
| "loss": 3.9349, |
| "num_input_tokens_seen": 777764864, |
| "step": 11870, |
| "train_runtime": 108706.2346, |
| "train_tokens_per_second": 7154.74 |
| }, |
| { |
| "epoch": 3.1978865181395975, |
| "grad_norm": 0.92578125, |
| "learning_rate": 5.0936942956547075e-06, |
| "loss": 3.9206, |
| "num_input_tokens_seen": 778420224, |
| "step": 11880, |
| "train_runtime": 108797.8689, |
| "train_tokens_per_second": 7154.738 |
| }, |
| { |
| "epoch": 3.2005788517197282, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.060777940852751e-06, |
| "loss": 3.9034, |
| "num_input_tokens_seen": 779075584, |
| "step": 11890, |
| "train_runtime": 108890.5878, |
| "train_tokens_per_second": 7154.664 |
| }, |
| { |
| "epoch": 3.2032711852998585, |
| "grad_norm": 0.94140625, |
| "learning_rate": 5.0279563054092924e-06, |
| "loss": 3.8623, |
| "num_input_tokens_seen": 779730944, |
| "step": 11900, |
| "train_runtime": 108982.9782, |
| "train_tokens_per_second": 7154.612 |
| }, |
| { |
| "epoch": 3.2059635188799893, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.995229545240357e-06, |
| "loss": 3.9039, |
| "num_input_tokens_seen": 780386304, |
| "step": 11910, |
| "train_runtime": 109075.6339, |
| "train_tokens_per_second": 7154.543 |
| }, |
| { |
| "epoch": 3.2086558524601196, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.962597815811274e-06, |
| "loss": 3.9737, |
| "num_input_tokens_seen": 781041664, |
| "step": 11920, |
| "train_runtime": 109167.8315, |
| "train_tokens_per_second": 7154.504 |
| }, |
| { |
| "epoch": 3.2113481860402504, |
| "grad_norm": 1.0, |
| "learning_rate": 4.930061272135941e-06, |
| "loss": 4.0498, |
| "num_input_tokens_seen": 781697024, |
| "step": 11930, |
| "train_runtime": 109260.1843, |
| "train_tokens_per_second": 7154.455 |
| }, |
| { |
| "epoch": 3.214040519620381, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.897620068776077e-06, |
| "loss": 3.861, |
| "num_input_tokens_seen": 782352384, |
| "step": 11940, |
| "train_runtime": 109352.1011, |
| "train_tokens_per_second": 7154.434 |
| }, |
| { |
| "epoch": 3.2167328532005115, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.865274359840513e-06, |
| "loss": 3.9858, |
| "num_input_tokens_seen": 783007744, |
| "step": 11950, |
| "train_runtime": 109444.7089, |
| "train_tokens_per_second": 7154.368 |
| }, |
| { |
| "epoch": 3.2194251867806423, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.833024298984415e-06, |
| "loss": 3.9379, |
| "num_input_tokens_seen": 783663104, |
| "step": 11960, |
| "train_runtime": 109536.6778, |
| "train_tokens_per_second": 7154.344 |
| }, |
| { |
| "epoch": 3.2221175203607726, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.80087003940862e-06, |
| "loss": 3.9185, |
| "num_input_tokens_seen": 784318464, |
| "step": 11970, |
| "train_runtime": 109629.3566, |
| "train_tokens_per_second": 7154.274 |
| }, |
| { |
| "epoch": 3.2248098539409034, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.768811733858819e-06, |
| "loss": 3.9845, |
| "num_input_tokens_seen": 784973824, |
| "step": 11980, |
| "train_runtime": 109720.7631, |
| "train_tokens_per_second": 7154.287 |
| }, |
| { |
| "epoch": 3.2275021875210337, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.736849534624946e-06, |
| "loss": 3.9469, |
| "num_input_tokens_seen": 785629184, |
| "step": 11990, |
| "train_runtime": 109814.1506, |
| "train_tokens_per_second": 7154.171 |
| }, |
| { |
| "epoch": 3.2301945211011645, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.704983593540324e-06, |
| "loss": 3.8453, |
| "num_input_tokens_seen": 786284544, |
| "step": 12000, |
| "train_runtime": 109905.8052, |
| "train_tokens_per_second": 7154.168 |
| }, |
| { |
| "epoch": 3.232886854681295, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.673214061981068e-06, |
| "loss": 3.8652, |
| "num_input_tokens_seen": 786939904, |
| "step": 12010, |
| "train_runtime": 110015.6893, |
| "train_tokens_per_second": 7152.979 |
| }, |
| { |
| "epoch": 3.2355791882614255, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.641541090865276e-06, |
| "loss": 3.927, |
| "num_input_tokens_seen": 787595264, |
| "step": 12020, |
| "train_runtime": 110107.01, |
| "train_tokens_per_second": 7152.998 |
| }, |
| { |
| "epoch": 3.2382715218415563, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.6099648306523556e-06, |
| "loss": 3.8873, |
| "num_input_tokens_seen": 788250624, |
| "step": 12030, |
| "train_runtime": 110198.8521, |
| "train_tokens_per_second": 7152.984 |
| }, |
| { |
| "epoch": 3.2409638554216866, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.578485431342297e-06, |
| "loss": 3.9165, |
| "num_input_tokens_seen": 788905984, |
| "step": 12040, |
| "train_runtime": 110291.0577, |
| "train_tokens_per_second": 7152.946 |
| }, |
| { |
| "epoch": 3.2436561890018174, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.547103042474963e-06, |
| "loss": 3.8838, |
| "num_input_tokens_seen": 789561344, |
| "step": 12050, |
| "train_runtime": 110382.4853, |
| "train_tokens_per_second": 7152.959 |
| }, |
| { |
| "epoch": 3.2463485225819477, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.515817813129372e-06, |
| "loss": 3.9319, |
| "num_input_tokens_seen": 790216704, |
| "step": 12060, |
| "train_runtime": 110475.1123, |
| "train_tokens_per_second": 7152.893 |
| }, |
| { |
| "epoch": 3.2490408561620785, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.484629891923004e-06, |
| "loss": 3.8857, |
| "num_input_tokens_seen": 790872064, |
| "step": 12070, |
| "train_runtime": 110566.7673, |
| "train_tokens_per_second": 7152.891 |
| }, |
| { |
| "epoch": 3.2517331897422093, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.453539427011072e-06, |
| "loss": 3.8892, |
| "num_input_tokens_seen": 791527424, |
| "step": 12080, |
| "train_runtime": 110659.4322, |
| "train_tokens_per_second": 7152.824 |
| }, |
| { |
| "epoch": 3.2544255233223396, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.4225465660858664e-06, |
| "loss": 3.9316, |
| "num_input_tokens_seen": 792182784, |
| "step": 12090, |
| "train_runtime": 110752.3031, |
| "train_tokens_per_second": 7152.743 |
| }, |
| { |
| "epoch": 3.2571178569024704, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.391651456375967e-06, |
| "loss": 3.9423, |
| "num_input_tokens_seen": 792838144, |
| "step": 12100, |
| "train_runtime": 110845.2252, |
| "train_tokens_per_second": 7152.659 |
| }, |
| { |
| "epoch": 3.2598101904826007, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.360854244645649e-06, |
| "loss": 3.8601, |
| "num_input_tokens_seen": 793493504, |
| "step": 12110, |
| "train_runtime": 110937.2339, |
| "train_tokens_per_second": 7152.635 |
| }, |
| { |
| "epoch": 3.2625025240627314, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.330155077194078e-06, |
| "loss": 3.9145, |
| "num_input_tokens_seen": 794148864, |
| "step": 12120, |
| "train_runtime": 111030.4523, |
| "train_tokens_per_second": 7152.532 |
| }, |
| { |
| "epoch": 3.2651948576428618, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.299554099854733e-06, |
| "loss": 3.8768, |
| "num_input_tokens_seen": 794804224, |
| "step": 12130, |
| "train_runtime": 111122.8894, |
| "train_tokens_per_second": 7152.48 |
| }, |
| { |
| "epoch": 3.2678871912229925, |
| "grad_norm": 0.95703125, |
| "learning_rate": 4.269051457994586e-06, |
| "loss": 4.1122, |
| "num_input_tokens_seen": 795459584, |
| "step": 12140, |
| "train_runtime": 111215.379, |
| "train_tokens_per_second": 7152.424 |
| }, |
| { |
| "epoch": 3.2705795248031233, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.238647296513526e-06, |
| "loss": 3.8964, |
| "num_input_tokens_seen": 796114944, |
| "step": 12150, |
| "train_runtime": 111307.6864, |
| "train_tokens_per_second": 7152.381 |
| }, |
| { |
| "epoch": 3.2732718583832536, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.208341759843595e-06, |
| "loss": 3.9527, |
| "num_input_tokens_seen": 796770304, |
| "step": 12160, |
| "train_runtime": 111400.1018, |
| "train_tokens_per_second": 7152.33 |
| }, |
| { |
| "epoch": 3.2759641919633844, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.178134991948332e-06, |
| "loss": 3.8812, |
| "num_input_tokens_seen": 797425664, |
| "step": 12170, |
| "train_runtime": 111493.1246, |
| "train_tokens_per_second": 7152.241 |
| }, |
| { |
| "epoch": 3.2786565255435147, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.148027136322089e-06, |
| "loss": 3.9501, |
| "num_input_tokens_seen": 798081024, |
| "step": 12180, |
| "train_runtime": 111585.511, |
| "train_tokens_per_second": 7152.192 |
| }, |
| { |
| "epoch": 3.2813488591236455, |
| "grad_norm": 1.0, |
| "learning_rate": 4.118018335989335e-06, |
| "loss": 4.0134, |
| "num_input_tokens_seen": 798736384, |
| "step": 12190, |
| "train_runtime": 111678.0109, |
| "train_tokens_per_second": 7152.137 |
| }, |
| { |
| "epoch": 3.284041192703776, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.088108733503995e-06, |
| "loss": 3.8888, |
| "num_input_tokens_seen": 799391744, |
| "step": 12200, |
| "train_runtime": 111770.2228, |
| "train_tokens_per_second": 7152.099 |
| }, |
| { |
| "epoch": 3.2867335262839066, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.058298470948763e-06, |
| "loss": 3.9358, |
| "num_input_tokens_seen": 800047104, |
| "step": 12210, |
| "train_runtime": 111862.7143, |
| "train_tokens_per_second": 7152.044 |
| }, |
| { |
| "epoch": 3.2894258598640373, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.028587689934421e-06, |
| "loss": 3.9023, |
| "num_input_tokens_seen": 800702464, |
| "step": 12220, |
| "train_runtime": 111955.0352, |
| "train_tokens_per_second": 7152.0 |
| }, |
| { |
| "epoch": 3.2921181934441677, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.998976531599197e-06, |
| "loss": 3.8785, |
| "num_input_tokens_seen": 801357824, |
| "step": 12230, |
| "train_runtime": 112047.4267, |
| "train_tokens_per_second": 7151.952 |
| }, |
| { |
| "epoch": 3.2948105270242984, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.969465136608028e-06, |
| "loss": 4.003, |
| "num_input_tokens_seen": 802013184, |
| "step": 12240, |
| "train_runtime": 112139.6241, |
| "train_tokens_per_second": 7151.916 |
| }, |
| { |
| "epoch": 3.2975028606044288, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.940053645151984e-06, |
| "loss": 3.9166, |
| "num_input_tokens_seen": 802668544, |
| "step": 12250, |
| "train_runtime": 112231.7633, |
| "train_tokens_per_second": 7151.884 |
| }, |
| { |
| "epoch": 3.3001951941845595, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.910742196947509e-06, |
| "loss": 3.9437, |
| "num_input_tokens_seen": 803323904, |
| "step": 12260, |
| "train_runtime": 112324.1686, |
| "train_tokens_per_second": 7151.835 |
| }, |
| { |
| "epoch": 3.30288752776469, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.881530931235841e-06, |
| "loss": 3.9045, |
| "num_input_tokens_seen": 803979264, |
| "step": 12270, |
| "train_runtime": 112416.5708, |
| "train_tokens_per_second": 7151.786 |
| }, |
| { |
| "epoch": 3.3055798613448206, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.852419986782271e-06, |
| "loss": 3.8566, |
| "num_input_tokens_seen": 804634624, |
| "step": 12280, |
| "train_runtime": 112509.2023, |
| "train_tokens_per_second": 7151.723 |
| }, |
| { |
| "epoch": 3.3082721949249514, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.823409501875558e-06, |
| "loss": 3.8607, |
| "num_input_tokens_seen": 805289984, |
| "step": 12290, |
| "train_runtime": 112601.4274, |
| "train_tokens_per_second": 7151.685 |
| }, |
| { |
| "epoch": 3.3109645285050817, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.7944996143272155e-06, |
| "loss": 3.9406, |
| "num_input_tokens_seen": 805945344, |
| "step": 12300, |
| "train_runtime": 112694.7222, |
| "train_tokens_per_second": 7151.58 |
| }, |
| { |
| "epoch": 3.3136568620852125, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.7656904614708917e-06, |
| "loss": 3.9762, |
| "num_input_tokens_seen": 806600704, |
| "step": 12310, |
| "train_runtime": 112786.9265, |
| "train_tokens_per_second": 7151.544 |
| }, |
| { |
| "epoch": 3.316349195665343, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.7369821801616966e-06, |
| "loss": 3.966, |
| "num_input_tokens_seen": 807256064, |
| "step": 12320, |
| "train_runtime": 112879.4784, |
| "train_tokens_per_second": 7151.486 |
| }, |
| { |
| "epoch": 3.3190415292454736, |
| "grad_norm": 0.9140625, |
| "learning_rate": 3.708374906775561e-06, |
| "loss": 3.9349, |
| "num_input_tokens_seen": 807911424, |
| "step": 12330, |
| "train_runtime": 112972.1945, |
| "train_tokens_per_second": 7151.418 |
| }, |
| { |
| "epoch": 3.321733862825604, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.679868777208584e-06, |
| "loss": 3.7792, |
| "num_input_tokens_seen": 808566784, |
| "step": 12340, |
| "train_runtime": 113064.7919, |
| "train_tokens_per_second": 7151.358 |
| }, |
| { |
| "epoch": 3.3244261964057347, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.6514639268764113e-06, |
| "loss": 3.8643, |
| "num_input_tokens_seen": 809222144, |
| "step": 12350, |
| "train_runtime": 113157.2744, |
| "train_tokens_per_second": 7151.305 |
| }, |
| { |
| "epoch": 3.3271185299858654, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.623160490713534e-06, |
| "loss": 3.8796, |
| "num_input_tokens_seen": 809877504, |
| "step": 12360, |
| "train_runtime": 113250.0104, |
| "train_tokens_per_second": 7151.236 |
| }, |
| { |
| "epoch": 3.3298108635659958, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.5949586031727267e-06, |
| "loss": 3.9112, |
| "num_input_tokens_seen": 810532864, |
| "step": 12370, |
| "train_runtime": 113342.2264, |
| "train_tokens_per_second": 7151.199 |
| }, |
| { |
| "epoch": 3.3325031971461265, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.5668583982243237e-06, |
| "loss": 3.8047, |
| "num_input_tokens_seen": 811188224, |
| "step": 12380, |
| "train_runtime": 113434.978, |
| "train_tokens_per_second": 7151.13 |
| }, |
| { |
| "epoch": 3.335195530726257, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.538860009355674e-06, |
| "loss": 3.8597, |
| "num_input_tokens_seen": 811843584, |
| "step": 12390, |
| "train_runtime": 113527.2728, |
| "train_tokens_per_second": 7151.089 |
| }, |
| { |
| "epoch": 3.3378878643063876, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.5109635695704053e-06, |
| "loss": 3.9051, |
| "num_input_tokens_seen": 812498944, |
| "step": 12400, |
| "train_runtime": 113619.9857, |
| "train_tokens_per_second": 7151.021 |
| }, |
| { |
| "epoch": 3.340580197886518, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.483169211387899e-06, |
| "loss": 3.976, |
| "num_input_tokens_seen": 813154304, |
| "step": 12410, |
| "train_runtime": 113712.2758, |
| "train_tokens_per_second": 7150.981 |
| }, |
| { |
| "epoch": 3.3432725314666487, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.455477066842569e-06, |
| "loss": 3.9116, |
| "num_input_tokens_seen": 813809664, |
| "step": 12420, |
| "train_runtime": 113804.6321, |
| "train_tokens_per_second": 7150.936 |
| }, |
| { |
| "epoch": 3.3459648650467795, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.4278872674832957e-06, |
| "loss": 3.9571, |
| "num_input_tokens_seen": 814465024, |
| "step": 12430, |
| "train_runtime": 113896.5008, |
| "train_tokens_per_second": 7150.922 |
| }, |
| { |
| "epoch": 3.34865719862691, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.4003999443727617e-06, |
| "loss": 3.8961, |
| "num_input_tokens_seen": 815120384, |
| "step": 12440, |
| "train_runtime": 113989.5994, |
| "train_tokens_per_second": 7150.831 |
| }, |
| { |
| "epoch": 3.3513495322070406, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.3730152280868623e-06, |
| "loss": 3.8976, |
| "num_input_tokens_seen": 815775744, |
| "step": 12450, |
| "train_runtime": 114081.8169, |
| "train_tokens_per_second": 7150.796 |
| }, |
| { |
| "epoch": 3.354041865787171, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.345733248714053e-06, |
| "loss": 3.9377, |
| "num_input_tokens_seen": 816431104, |
| "step": 12460, |
| "train_runtime": 114174.4767, |
| "train_tokens_per_second": 7150.732 |
| }, |
| { |
| "epoch": 3.3567341993673017, |
| "grad_norm": 0.94921875, |
| "learning_rate": 3.3185541358547596e-06, |
| "loss": 3.9536, |
| "num_input_tokens_seen": 817086464, |
| "step": 12470, |
| "train_runtime": 114266.5848, |
| "train_tokens_per_second": 7150.703 |
| }, |
| { |
| "epoch": 3.359426532947432, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.2914780186207416e-06, |
| "loss": 3.8851, |
| "num_input_tokens_seen": 817741824, |
| "step": 12480, |
| "train_runtime": 114359.449, |
| "train_tokens_per_second": 7150.628 |
| }, |
| { |
| "epoch": 3.3621188665275628, |
| "grad_norm": 0.91796875, |
| "learning_rate": 3.2645050256345066e-06, |
| "loss": 3.8909, |
| "num_input_tokens_seen": 818397184, |
| "step": 12490, |
| "train_runtime": 114451.976, |
| "train_tokens_per_second": 7150.573 |
| }, |
| { |
| "epoch": 3.3648112001076935, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.237635285028645e-06, |
| "loss": 3.9691, |
| "num_input_tokens_seen": 819052544, |
| "step": 12500, |
| "train_runtime": 114543.7532, |
| "train_tokens_per_second": 7150.565 |
| }, |
| { |
| "epoch": 3.367503533687824, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.2108689244453013e-06, |
| "loss": 3.8528, |
| "num_input_tokens_seen": 819707904, |
| "step": 12510, |
| "train_runtime": 114655.1115, |
| "train_tokens_per_second": 7149.336 |
| }, |
| { |
| "epoch": 3.3701958672679546, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.1842060710354755e-06, |
| "loss": 3.8692, |
| "num_input_tokens_seen": 820363264, |
| "step": 12520, |
| "train_runtime": 114746.4237, |
| "train_tokens_per_second": 7149.358 |
| }, |
| { |
| "epoch": 3.372888200848085, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.1576468514585123e-06, |
| "loss": 3.925, |
| "num_input_tokens_seen": 821018624, |
| "step": 12530, |
| "train_runtime": 114838.9916, |
| "train_tokens_per_second": 7149.302 |
| }, |
| { |
| "epoch": 3.3755805344282157, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.1311913918814106e-06, |
| "loss": 3.9613, |
| "num_input_tokens_seen": 821673984, |
| "step": 12540, |
| "train_runtime": 114931.5525, |
| "train_tokens_per_second": 7149.246 |
| }, |
| { |
| "epoch": 3.378272868008346, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.1048398179783055e-06, |
| "loss": 3.9994, |
| "num_input_tokens_seen": 822329344, |
| "step": 12550, |
| "train_runtime": 115023.9222, |
| "train_tokens_per_second": 7149.203 |
| }, |
| { |
| "epoch": 3.380965201588477, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.0785922549298127e-06, |
| "loss": 3.8232, |
| "num_input_tokens_seen": 822984704, |
| "step": 12560, |
| "train_runtime": 115116.901, |
| "train_tokens_per_second": 7149.121 |
| }, |
| { |
| "epoch": 3.3836575351686076, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.0524488274224577e-06, |
| "loss": 3.9493, |
| "num_input_tokens_seen": 823640064, |
| "step": 12570, |
| "train_runtime": 115209.1857, |
| "train_tokens_per_second": 7149.083 |
| }, |
| { |
| "epoch": 3.386349868748738, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.02640965964808e-06, |
| "loss": 3.8712, |
| "num_input_tokens_seen": 824295424, |
| "step": 12580, |
| "train_runtime": 115301.7326, |
| "train_tokens_per_second": 7149.029 |
| }, |
| { |
| "epoch": 3.3890422023288687, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.000474875303247e-06, |
| "loss": 3.8895, |
| "num_input_tokens_seen": 824950784, |
| "step": 12590, |
| "train_runtime": 115394.0046, |
| "train_tokens_per_second": 7148.992 |
| }, |
| { |
| "epoch": 3.391734535908999, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.974644597588655e-06, |
| "loss": 3.8803, |
| "num_input_tokens_seen": 825606144, |
| "step": 12600, |
| "train_runtime": 115486.5639, |
| "train_tokens_per_second": 7148.937 |
| }, |
| { |
| "epoch": 3.3944268694891297, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.9489189492085622e-06, |
| "loss": 3.9924, |
| "num_input_tokens_seen": 826261504, |
| "step": 12610, |
| "train_runtime": 115579.424, |
| "train_tokens_per_second": 7148.863 |
| }, |
| { |
| "epoch": 3.39711920306926, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.923298052370177e-06, |
| "loss": 4.0049, |
| "num_input_tokens_seen": 826916864, |
| "step": 12620, |
| "train_runtime": 115672.0103, |
| "train_tokens_per_second": 7148.807 |
| }, |
| { |
| "epoch": 3.399811536649391, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.8977820287831303e-06, |
| "loss": 3.9375, |
| "num_input_tokens_seen": 827572224, |
| "step": 12630, |
| "train_runtime": 115763.97, |
| "train_tokens_per_second": 7148.789 |
| }, |
| { |
| "epoch": 3.4025038702295216, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.872370999658816e-06, |
| "loss": 3.8849, |
| "num_input_tokens_seen": 828227584, |
| "step": 12640, |
| "train_runtime": 115856.9261, |
| "train_tokens_per_second": 7148.71 |
| }, |
| { |
| "epoch": 3.405196203809652, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.8470650857099073e-06, |
| "loss": 3.8561, |
| "num_input_tokens_seen": 828882944, |
| "step": 12650, |
| "train_runtime": 115948.9156, |
| "train_tokens_per_second": 7148.691 |
| }, |
| { |
| "epoch": 3.4078885373897827, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.8218644071496993e-06, |
| "loss": 3.9041, |
| "num_input_tokens_seen": 829538304, |
| "step": 12660, |
| "train_runtime": 116041.5761, |
| "train_tokens_per_second": 7148.63 |
| }, |
| { |
| "epoch": 3.410580870969913, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.796769083691608e-06, |
| "loss": 3.9112, |
| "num_input_tokens_seen": 830193664, |
| "step": 12670, |
| "train_runtime": 116133.9458, |
| "train_tokens_per_second": 7148.587 |
| }, |
| { |
| "epoch": 3.413273204550044, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.7717792345485412e-06, |
| "loss": 3.908, |
| "num_input_tokens_seen": 830849024, |
| "step": 12680, |
| "train_runtime": 116226.1902, |
| "train_tokens_per_second": 7148.553 |
| }, |
| { |
| "epoch": 3.415965538130174, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.7468949784323905e-06, |
| "loss": 3.9629, |
| "num_input_tokens_seen": 831504384, |
| "step": 12690, |
| "train_runtime": 116318.5418, |
| "train_tokens_per_second": 7148.511 |
| }, |
| { |
| "epoch": 3.418657871710305, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.722116433553418e-06, |
| "loss": 3.9296, |
| "num_input_tokens_seen": 832159744, |
| "step": 12700, |
| "train_runtime": 116411.3217, |
| "train_tokens_per_second": 7148.443 |
| }, |
| { |
| "epoch": 3.4213502052904357, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.6974437176197214e-06, |
| "loss": 3.9205, |
| "num_input_tokens_seen": 832815104, |
| "step": 12710, |
| "train_runtime": 116504.246, |
| "train_tokens_per_second": 7148.367 |
| }, |
| { |
| "epoch": 3.424042538870566, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.6728769478366638e-06, |
| "loss": 3.904, |
| "num_input_tokens_seen": 833470464, |
| "step": 12720, |
| "train_runtime": 116596.4746, |
| "train_tokens_per_second": 7148.333 |
| }, |
| { |
| "epoch": 3.4267348724506967, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.648416240906326e-06, |
| "loss": 4.0302, |
| "num_input_tokens_seen": 834125824, |
| "step": 12730, |
| "train_runtime": 116688.7306, |
| "train_tokens_per_second": 7148.298 |
| }, |
| { |
| "epoch": 3.429427206030827, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.6240617130269428e-06, |
| "loss": 3.8861, |
| "num_input_tokens_seen": 834781184, |
| "step": 12740, |
| "train_runtime": 116781.4661, |
| "train_tokens_per_second": 7148.233 |
| }, |
| { |
| "epoch": 3.432119539610958, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.599813479892371e-06, |
| "loss": 3.9706, |
| "num_input_tokens_seen": 835436544, |
| "step": 12750, |
| "train_runtime": 116873.9549, |
| "train_tokens_per_second": 7148.184 |
| }, |
| { |
| "epoch": 3.434811873191088, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.5756716566914947e-06, |
| "loss": 3.963, |
| "num_input_tokens_seen": 836091904, |
| "step": 12760, |
| "train_runtime": 116966.4762, |
| "train_tokens_per_second": 7148.133 |
| }, |
| { |
| "epoch": 3.437504206771219, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.551636358107745e-06, |
| "loss": 3.9309, |
| "num_input_tokens_seen": 836747264, |
| "step": 12770, |
| "train_runtime": 117059.1439, |
| "train_tokens_per_second": 7148.073 |
| }, |
| { |
| "epoch": 3.4401965403513497, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.5277076983184765e-06, |
| "loss": 3.9271, |
| "num_input_tokens_seen": 837402624, |
| "step": 12780, |
| "train_runtime": 117151.9522, |
| "train_tokens_per_second": 7148.004 |
| }, |
| { |
| "epoch": 3.44288887393148, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.503885790994509e-06, |
| "loss": 3.935, |
| "num_input_tokens_seen": 838057984, |
| "step": 12790, |
| "train_runtime": 117244.1649, |
| "train_tokens_per_second": 7147.972 |
| }, |
| { |
| "epoch": 3.445581207511611, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.480170749299504e-06, |
| "loss": 3.9176, |
| "num_input_tokens_seen": 838713344, |
| "step": 12800, |
| "train_runtime": 117337.2422, |
| "train_tokens_per_second": 7147.887 |
| }, |
| { |
| "epoch": 3.448273541091741, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.456562685889505e-06, |
| "loss": 3.9086, |
| "num_input_tokens_seen": 839368704, |
| "step": 12810, |
| "train_runtime": 117429.7527, |
| "train_tokens_per_second": 7147.837 |
| }, |
| { |
| "epoch": 3.450965874671872, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.4330617129123405e-06, |
| "loss": 3.882, |
| "num_input_tokens_seen": 840024064, |
| "step": 12820, |
| "train_runtime": 117522.192, |
| "train_tokens_per_second": 7147.791 |
| }, |
| { |
| "epoch": 3.453658208252002, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.4096679420071294e-06, |
| "loss": 4.0154, |
| "num_input_tokens_seen": 840679424, |
| "step": 12830, |
| "train_runtime": 117614.5624, |
| "train_tokens_per_second": 7147.749 |
| }, |
| { |
| "epoch": 3.456350541832133, |
| "grad_norm": 0.91796875, |
| "learning_rate": 2.386381484303729e-06, |
| "loss": 3.8888, |
| "num_input_tokens_seen": 841334784, |
| "step": 12840, |
| "train_runtime": 117707.487, |
| "train_tokens_per_second": 7147.674 |
| }, |
| { |
| "epoch": 3.4590428754122637, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.3632024504222195e-06, |
| "loss": 3.9835, |
| "num_input_tokens_seen": 841990144, |
| "step": 12850, |
| "train_runtime": 117800.2316, |
| "train_tokens_per_second": 7147.61 |
| }, |
| { |
| "epoch": 3.461735208992394, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.3401309504723747e-06, |
| "loss": 3.934, |
| "num_input_tokens_seen": 842645504, |
| "step": 12860, |
| "train_runtime": 117892.9661, |
| "train_tokens_per_second": 7147.547 |
| }, |
| { |
| "epoch": 3.464427542572525, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.317167094053138e-06, |
| "loss": 3.9311, |
| "num_input_tokens_seen": 843300864, |
| "step": 12870, |
| "train_runtime": 117985.8884, |
| "train_tokens_per_second": 7147.472 |
| }, |
| { |
| "epoch": 3.467119876152655, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.294310990252099e-06, |
| "loss": 3.9461, |
| "num_input_tokens_seen": 843956224, |
| "step": 12880, |
| "train_runtime": 118078.3568, |
| "train_tokens_per_second": 7147.425 |
| }, |
| { |
| "epoch": 3.469812209732786, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.2715627476449953e-06, |
| "loss": 3.9396, |
| "num_input_tokens_seen": 844611584, |
| "step": 12890, |
| "train_runtime": 118170.8013, |
| "train_tokens_per_second": 7147.38 |
| }, |
| { |
| "epoch": 3.4725045433129162, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.248922474295148e-06, |
| "loss": 3.8993, |
| "num_input_tokens_seen": 845266944, |
| "step": 12900, |
| "train_runtime": 118263.5744, |
| "train_tokens_per_second": 7147.314 |
| }, |
| { |
| "epoch": 3.475196876893047, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.226390277753024e-06, |
| "loss": 3.9836, |
| "num_input_tokens_seen": 845922304, |
| "step": 12910, |
| "train_runtime": 118356.2, |
| "train_tokens_per_second": 7147.258 |
| }, |
| { |
| "epoch": 3.477889210473178, |
| "grad_norm": 0.9140625, |
| "learning_rate": 2.2039662650556347e-06, |
| "loss": 3.949, |
| "num_input_tokens_seen": 846577664, |
| "step": 12920, |
| "train_runtime": 118449.0696, |
| "train_tokens_per_second": 7147.187 |
| }, |
| { |
| "epoch": 3.480581544053308, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.1816505427261158e-06, |
| "loss": 3.9043, |
| "num_input_tokens_seen": 847233024, |
| "step": 12930, |
| "train_runtime": 118541.3444, |
| "train_tokens_per_second": 7147.152 |
| }, |
| { |
| "epoch": 3.483273877633439, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.1594432167731464e-06, |
| "loss": 3.9428, |
| "num_input_tokens_seen": 847888384, |
| "step": 12940, |
| "train_runtime": 118634.0314, |
| "train_tokens_per_second": 7147.092 |
| }, |
| { |
| "epoch": 3.485966211213569, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.1373443926905076e-06, |
| "loss": 3.9676, |
| "num_input_tokens_seen": 848543744, |
| "step": 12950, |
| "train_runtime": 118726.1138, |
| "train_tokens_per_second": 7147.069 |
| }, |
| { |
| "epoch": 3.4886585447937, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.1153541754565326e-06, |
| "loss": 3.9707, |
| "num_input_tokens_seen": 849199104, |
| "step": 12960, |
| "train_runtime": 118818.8701, |
| "train_tokens_per_second": 7147.005 |
| }, |
| { |
| "epoch": 3.4913508783738303, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.0934726695336386e-06, |
| "loss": 3.8384, |
| "num_input_tokens_seen": 849854464, |
| "step": 12970, |
| "train_runtime": 118911.6083, |
| "train_tokens_per_second": 7146.943 |
| }, |
| { |
| "epoch": 3.494043211953961, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.0716999788678167e-06, |
| "loss": 3.9075, |
| "num_input_tokens_seen": 850509824, |
| "step": 12980, |
| "train_runtime": 119004.2495, |
| "train_tokens_per_second": 7146.886 |
| }, |
| { |
| "epoch": 3.496735545534092, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.0500362068881396e-06, |
| "loss": 4.0169, |
| "num_input_tokens_seen": 851165184, |
| "step": 12990, |
| "train_runtime": 119096.5756, |
| "train_tokens_per_second": 7146.849 |
| }, |
| { |
| "epoch": 3.499427879114222, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.028481456506276e-06, |
| "loss": 3.9316, |
| "num_input_tokens_seen": 851820544, |
| "step": 13000, |
| "train_runtime": 119189.4526, |
| "train_tokens_per_second": 7146.778 |
| }, |
| { |
| "epoch": 3.502120212694353, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.0070358301159996e-06, |
| "loss": 3.8656, |
| "num_input_tokens_seen": 852475904, |
| "step": 13010, |
| "train_runtime": 119304.0506, |
| "train_tokens_per_second": 7145.406 |
| }, |
| { |
| "epoch": 3.5048125462744832, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.9856994295926857e-06, |
| "loss": 3.8804, |
| "num_input_tokens_seen": 853131264, |
| "step": 13020, |
| "train_runtime": 119396.5114, |
| "train_tokens_per_second": 7145.362 |
| }, |
| { |
| "epoch": 3.507504879854614, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.964472356292876e-06, |
| "loss": 3.9249, |
| "num_input_tokens_seen": 853786624, |
| "step": 13030, |
| "train_runtime": 119489.3871, |
| "train_tokens_per_second": 7145.293 |
| }, |
| { |
| "epoch": 3.5101972134347443, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.9433547110537214e-06, |
| "loss": 3.9338, |
| "num_input_tokens_seen": 854441984, |
| "step": 13040, |
| "train_runtime": 119581.9681, |
| "train_tokens_per_second": 7145.241 |
| }, |
| { |
| "epoch": 3.512889547014875, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.922346594192581e-06, |
| "loss": 3.9438, |
| "num_input_tokens_seen": 855097344, |
| "step": 13050, |
| "train_runtime": 119675.863, |
| "train_tokens_per_second": 7145.111 |
| }, |
| { |
| "epoch": 3.515581880595006, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.9014481055064842e-06, |
| "loss": 3.9569, |
| "num_input_tokens_seen": 855752704, |
| "step": 13060, |
| "train_runtime": 119768.6888, |
| "train_tokens_per_second": 7145.045 |
| }, |
| { |
| "epoch": 3.518274214175136, |
| "grad_norm": 0.9140625, |
| "learning_rate": 1.880659344271707e-06, |
| "loss": 3.8864, |
| "num_input_tokens_seen": 856408064, |
| "step": 13070, |
| "train_runtime": 119861.1973, |
| "train_tokens_per_second": 7144.998 |
| }, |
| { |
| "epoch": 3.520966547755267, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.8599804092432477e-06, |
| "loss": 3.9652, |
| "num_input_tokens_seen": 857063424, |
| "step": 13080, |
| "train_runtime": 119954.2411, |
| "train_tokens_per_second": 7144.92 |
| }, |
| { |
| "epoch": 3.5236588813353977, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.8394113986544087e-06, |
| "loss": 3.9942, |
| "num_input_tokens_seen": 857718784, |
| "step": 13090, |
| "train_runtime": 120046.6443, |
| "train_tokens_per_second": 7144.879 |
| }, |
| { |
| "epoch": 3.526351214915528, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.8189524102162958e-06, |
| "loss": 3.9735, |
| "num_input_tokens_seen": 858374144, |
| "step": 13100, |
| "train_runtime": 120139.9898, |
| "train_tokens_per_second": 7144.783 |
| }, |
| { |
| "epoch": 3.5290435484956584, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.798603541117369e-06, |
| "loss": 3.9093, |
| "num_input_tokens_seen": 859029504, |
| "step": 13110, |
| "train_runtime": 120232.3957, |
| "train_tokens_per_second": 7144.742 |
| }, |
| { |
| "epoch": 3.531735882075789, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.7783648880229765e-06, |
| "loss": 3.9128, |
| "num_input_tokens_seen": 859684864, |
| "step": 13120, |
| "train_runtime": 120325.4141, |
| "train_tokens_per_second": 7144.666 |
| }, |
| { |
| "epoch": 3.53442821565592, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.758236547074893e-06, |
| "loss": 3.9422, |
| "num_input_tokens_seen": 860340224, |
| "step": 13130, |
| "train_runtime": 120417.7906, |
| "train_tokens_per_second": 7144.627 |
| }, |
| { |
| "epoch": 3.5371205492360502, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.7382186138908629e-06, |
| "loss": 3.8821, |
| "num_input_tokens_seen": 860995584, |
| "step": 13140, |
| "train_runtime": 120510.245, |
| "train_tokens_per_second": 7144.584 |
| }, |
| { |
| "epoch": 3.539812882816181, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.7183111835641696e-06, |
| "loss": 4.0563, |
| "num_input_tokens_seen": 861650944, |
| "step": 13150, |
| "train_runtime": 120603.6381, |
| "train_tokens_per_second": 7144.486 |
| }, |
| { |
| "epoch": 3.5425052163963118, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.6985143506631301e-06, |
| "loss": 3.8831, |
| "num_input_tokens_seen": 862306304, |
| "step": 13160, |
| "train_runtime": 120696.122, |
| "train_tokens_per_second": 7144.441 |
| }, |
| { |
| "epoch": 3.545197549976442, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.6788282092307151e-06, |
| "loss": 3.8789, |
| "num_input_tokens_seen": 862961664, |
| "step": 13170, |
| "train_runtime": 120789.5116, |
| "train_tokens_per_second": 7144.343 |
| }, |
| { |
| "epoch": 3.5478898835565724, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.6592528527840296e-06, |
| "loss": 3.9771, |
| "num_input_tokens_seen": 863617024, |
| "step": 13180, |
| "train_runtime": 120882.5701, |
| "train_tokens_per_second": 7144.264 |
| }, |
| { |
| "epoch": 3.550582217136703, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.6397883743139387e-06, |
| "loss": 3.9567, |
| "num_input_tokens_seen": 864272384, |
| "step": 13190, |
| "train_runtime": 120975.3607, |
| "train_tokens_per_second": 7144.202 |
| }, |
| { |
| "epoch": 3.553274550716834, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.6204348662845648e-06, |
| "loss": 3.8517, |
| "num_input_tokens_seen": 864927744, |
| "step": 13200, |
| "train_runtime": 121068.3929, |
| "train_tokens_per_second": 7144.125 |
| }, |
| { |
| "epoch": 3.5559668842969643, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.6011924206328965e-06, |
| "loss": 3.9251, |
| "num_input_tokens_seen": 865583104, |
| "step": 13210, |
| "train_runtime": 121161.5615, |
| "train_tokens_per_second": 7144.041 |
| }, |
| { |
| "epoch": 3.558659217877095, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.5820611287683195e-06, |
| "loss": 3.8975, |
| "num_input_tokens_seen": 866238464, |
| "step": 13220, |
| "train_runtime": 121254.6622, |
| "train_tokens_per_second": 7143.96 |
| }, |
| { |
| "epoch": 3.561351551457226, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.5630410815722001e-06, |
| "loss": 3.9955, |
| "num_input_tokens_seen": 866893824, |
| "step": 13230, |
| "train_runtime": 121347.1378, |
| "train_tokens_per_second": 7143.917 |
| }, |
| { |
| "epoch": 3.564043885037356, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.5441323693974441e-06, |
| "loss": 3.9784, |
| "num_input_tokens_seen": 867549184, |
| "step": 13240, |
| "train_runtime": 121440.003, |
| "train_tokens_per_second": 7143.85 |
| }, |
| { |
| "epoch": 3.5667362186174865, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.5253350820680689e-06, |
| "loss": 3.9543, |
| "num_input_tokens_seen": 868204544, |
| "step": 13250, |
| "train_runtime": 121533.1136, |
| "train_tokens_per_second": 7143.769 |
| }, |
| { |
| "epoch": 3.5694285521976172, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.5066493088787853e-06, |
| "loss": 3.9159, |
| "num_input_tokens_seen": 868859904, |
| "step": 13260, |
| "train_runtime": 121625.8864, |
| "train_tokens_per_second": 7143.709 |
| }, |
| { |
| "epoch": 3.572120885777748, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.4880751385945608e-06, |
| "loss": 3.8722, |
| "num_input_tokens_seen": 869515264, |
| "step": 13270, |
| "train_runtime": 121719.2975, |
| "train_tokens_per_second": 7143.611 |
| }, |
| { |
| "epoch": 3.5748132193578783, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.4696126594502035e-06, |
| "loss": 3.9232, |
| "num_input_tokens_seen": 870170624, |
| "step": 13280, |
| "train_runtime": 121812.0493, |
| "train_tokens_per_second": 7143.551 |
| }, |
| { |
| "epoch": 3.577505552938009, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.4512619591499576e-06, |
| "loss": 3.8966, |
| "num_input_tokens_seen": 870825984, |
| "step": 13290, |
| "train_runtime": 121904.7883, |
| "train_tokens_per_second": 7143.493 |
| }, |
| { |
| "epoch": 3.58019788651814, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.4330231248670501e-06, |
| "loss": 3.9572, |
| "num_input_tokens_seen": 871481344, |
| "step": 13300, |
| "train_runtime": 121997.553, |
| "train_tokens_per_second": 7143.433 |
| }, |
| { |
| "epoch": 3.58289022009827, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.414896243243319e-06, |
| "loss": 3.9553, |
| "num_input_tokens_seen": 872136704, |
| "step": 13310, |
| "train_runtime": 122091.1716, |
| "train_tokens_per_second": 7143.323 |
| }, |
| { |
| "epoch": 3.5855825536784005, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.3968814003887665e-06, |
| "loss": 3.8738, |
| "num_input_tokens_seen": 872792064, |
| "step": 13320, |
| "train_runtime": 122183.9693, |
| "train_tokens_per_second": 7143.262 |
| }, |
| { |
| "epoch": 3.5882748872585313, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.3789786818811823e-06, |
| "loss": 3.9605, |
| "num_input_tokens_seen": 873447424, |
| "step": 13330, |
| "train_runtime": 122276.6746, |
| "train_tokens_per_second": 7143.206 |
| }, |
| { |
| "epoch": 3.590967220838662, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.3611881727656956e-06, |
| "loss": 3.9756, |
| "num_input_tokens_seen": 874102784, |
| "step": 13340, |
| "train_runtime": 122369.8263, |
| "train_tokens_per_second": 7143.124 |
| }, |
| { |
| "epoch": 3.5936595544187924, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.3435099575544258e-06, |
| "loss": 3.9266, |
| "num_input_tokens_seen": 874758144, |
| "step": 13350, |
| "train_runtime": 122462.8819, |
| "train_tokens_per_second": 7143.047 |
| }, |
| { |
| "epoch": 3.596351887998923, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.3259441202260276e-06, |
| "loss": 3.976, |
| "num_input_tokens_seen": 875413504, |
| "step": 13360, |
| "train_runtime": 122555.7889, |
| "train_tokens_per_second": 7142.98 |
| }, |
| { |
| "epoch": 3.599044221579054, |
| "grad_norm": 0.90625, |
| "learning_rate": 1.308490744225324e-06, |
| "loss": 4.0271, |
| "num_input_tokens_seen": 876068864, |
| "step": 13370, |
| "train_runtime": 122649.1213, |
| "train_tokens_per_second": 7142.887 |
| }, |
| { |
| "epoch": 3.6017365551591842, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.2911499124629023e-06, |
| "loss": 3.9439, |
| "num_input_tokens_seen": 876724224, |
| "step": 13380, |
| "train_runtime": 122742.1853, |
| "train_tokens_per_second": 7142.811 |
| }, |
| { |
| "epoch": 3.6044288887393146, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.2739217073147154e-06, |
| "loss": 3.9033, |
| "num_input_tokens_seen": 877379584, |
| "step": 13390, |
| "train_runtime": 122835.033, |
| "train_tokens_per_second": 7142.747 |
| }, |
| { |
| "epoch": 3.6071212223194453, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.2568062106216998e-06, |
| "loss": 3.9033, |
| "num_input_tokens_seen": 878034944, |
| "step": 13400, |
| "train_runtime": 122928.5466, |
| "train_tokens_per_second": 7142.645 |
| }, |
| { |
| "epoch": 3.609813555899576, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.239803503689377e-06, |
| "loss": 3.9349, |
| "num_input_tokens_seen": 878690304, |
| "step": 13410, |
| "train_runtime": 123021.7118, |
| "train_tokens_per_second": 7142.563 |
| }, |
| { |
| "epoch": 3.6125058894797064, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.2229136672874674e-06, |
| "loss": 3.9461, |
| "num_input_tokens_seen": 879345664, |
| "step": 13420, |
| "train_runtime": 123114.5592, |
| "train_tokens_per_second": 7142.499 |
| }, |
| { |
| "epoch": 3.615198223059837, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.206136781649525e-06, |
| "loss": 3.895, |
| "num_input_tokens_seen": 880001024, |
| "step": 13430, |
| "train_runtime": 123207.418, |
| "train_tokens_per_second": 7142.435 |
| }, |
| { |
| "epoch": 3.617890556639968, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.1894729264725235e-06, |
| "loss": 3.9249, |
| "num_input_tokens_seen": 880656384, |
| "step": 13440, |
| "train_runtime": 123300.3293, |
| "train_tokens_per_second": 7142.368 |
| }, |
| { |
| "epoch": 3.6205828902200983, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.1729221809165163e-06, |
| "loss": 3.9358, |
| "num_input_tokens_seen": 881311744, |
| "step": 13450, |
| "train_runtime": 123393.1214, |
| "train_tokens_per_second": 7142.309 |
| }, |
| { |
| "epoch": 3.6232752238002286, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1564846236042177e-06, |
| "loss": 3.8576, |
| "num_input_tokens_seen": 881967104, |
| "step": 13460, |
| "train_runtime": 123485.088, |
| "train_tokens_per_second": 7142.296 |
| }, |
| { |
| "epoch": 3.6259675573803594, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1401603326206767e-06, |
| "loss": 3.851, |
| "num_input_tokens_seen": 882622464, |
| "step": 13470, |
| "train_runtime": 123578.7292, |
| "train_tokens_per_second": 7142.188 |
| }, |
| { |
| "epoch": 3.62865989096049, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.123949385512857e-06, |
| "loss": 3.993, |
| "num_input_tokens_seen": 883277824, |
| "step": 13480, |
| "train_runtime": 123671.2898, |
| "train_tokens_per_second": 7142.141 |
| }, |
| { |
| "epoch": 3.6313522245406205, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.1078518592893134e-06, |
| "loss": 3.9331, |
| "num_input_tokens_seen": 883933184, |
| "step": 13490, |
| "train_runtime": 123763.8687, |
| "train_tokens_per_second": 7142.094 |
| }, |
| { |
| "epoch": 3.6340445581207512, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.091867830419796e-06, |
| "loss": 3.8876, |
| "num_input_tokens_seen": 884588544, |
| "step": 13500, |
| "train_runtime": 123856.936, |
| "train_tokens_per_second": 7142.019 |
| }, |
| { |
| "epoch": 3.636736891700882, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.0759973748348945e-06, |
| "loss": 3.8899, |
| "num_input_tokens_seen": 885243904, |
| "step": 13510, |
| "train_runtime": 123967.4324, |
| "train_tokens_per_second": 7140.939 |
| }, |
| { |
| "epoch": 3.6394292252810123, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.0602405679256883e-06, |
| "loss": 3.8452, |
| "num_input_tokens_seen": 885899264, |
| "step": 13520, |
| "train_runtime": 124060.5751, |
| "train_tokens_per_second": 7140.861 |
| }, |
| { |
| "epoch": 3.6421215588611426, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.044597484543372e-06, |
| "loss": 3.9445, |
| "num_input_tokens_seen": 886554624, |
| "step": 13530, |
| "train_runtime": 124153.6451, |
| "train_tokens_per_second": 7140.786 |
| }, |
| { |
| "epoch": 3.6448138924412734, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.0290681989989088e-06, |
| "loss": 3.9449, |
| "num_input_tokens_seen": 887209984, |
| "step": 13540, |
| "train_runtime": 124246.5598, |
| "train_tokens_per_second": 7140.721 |
| }, |
| { |
| "epoch": 3.647506226021404, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.0136527850626826e-06, |
| "loss": 3.8624, |
| "num_input_tokens_seen": 887865344, |
| "step": 13550, |
| "train_runtime": 124339.6232, |
| "train_tokens_per_second": 7140.647 |
| }, |
| { |
| "epoch": 3.6501985596015345, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.983513159641271e-07, |
| "loss": 3.8709, |
| "num_input_tokens_seen": 888520704, |
| "step": 13560, |
| "train_runtime": 124432.4044, |
| "train_tokens_per_second": 7140.589 |
| }, |
| { |
| "epoch": 3.6528908931816653, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.831638643914147e-07, |
| "loss": 3.9275, |
| "num_input_tokens_seen": 889176064, |
| "step": 13570, |
| "train_runtime": 124525.4167, |
| "train_tokens_per_second": 7140.519 |
| }, |
| { |
| "epoch": 3.655583226761796, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.68090502491059e-07, |
| "loss": 3.9328, |
| "num_input_tokens_seen": 889831424, |
| "step": 13580, |
| "train_runtime": 124618.2504, |
| "train_tokens_per_second": 7140.458 |
| }, |
| { |
| "epoch": 3.6582755603419264, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.531313018676297e-07, |
| "loss": 3.8715, |
| "num_input_tokens_seen": 890486784, |
| "step": 13590, |
| "train_runtime": 124710.9332, |
| "train_tokens_per_second": 7140.407 |
| }, |
| { |
| "epoch": 3.6609678939220567, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.382863335833631e-07, |
| "loss": 3.8953, |
| "num_input_tokens_seen": 891142144, |
| "step": 13600, |
| "train_runtime": 124803.5328, |
| "train_tokens_per_second": 7140.36 |
| }, |
| { |
| "epoch": 3.6636602275021874, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.235556681578605e-07, |
| "loss": 3.9701, |
| "num_input_tokens_seen": 891797504, |
| "step": 13610, |
| "train_runtime": 124896.9726, |
| "train_tokens_per_second": 7140.265 |
| }, |
| { |
| "epoch": 3.666352561082318, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.089393755677328e-07, |
| "loss": 3.9312, |
| "num_input_tokens_seen": 892452864, |
| "step": 13620, |
| "train_runtime": 124989.3517, |
| "train_tokens_per_second": 7140.231 |
| }, |
| { |
| "epoch": 3.6690448946624485, |
| "grad_norm": 0.9296875, |
| "learning_rate": 8.944375252462695e-07, |
| "loss": 3.8729, |
| "num_input_tokens_seen": 893108224, |
| "step": 13630, |
| "train_runtime": 125082.8191, |
| "train_tokens_per_second": 7140.135 |
| }, |
| { |
| "epoch": 3.6717372282425793, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.800501860831201e-07, |
| "loss": 3.952, |
| "num_input_tokens_seen": 893763584, |
| "step": 13640, |
| "train_runtime": 125175.6069, |
| "train_tokens_per_second": 7140.078 |
| }, |
| { |
| "epoch": 3.67442956182271, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.657774264239554e-07, |
| "loss": 3.9501, |
| "num_input_tokens_seen": 894418944, |
| "step": 13650, |
| "train_runtime": 125267.9245, |
| "train_tokens_per_second": 7140.048 |
| }, |
| { |
| "epoch": 3.6771218954028404, |
| "grad_norm": 0.93359375, |
| "learning_rate": 8.516193140701484e-07, |
| "loss": 3.7779, |
| "num_input_tokens_seen": 895074304, |
| "step": 13660, |
| "train_runtime": 125360.9193, |
| "train_tokens_per_second": 7139.979 |
| }, |
| { |
| "epoch": 3.6798142289829707, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.375759162784519e-07, |
| "loss": 3.9228, |
| "num_input_tokens_seen": 895729664, |
| "step": 13670, |
| "train_runtime": 125453.9842, |
| "train_tokens_per_second": 7139.906 |
| }, |
| { |
| "epoch": 3.6825065625631015, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.23647299760677e-07, |
| "loss": 3.8713, |
| "num_input_tokens_seen": 896385024, |
| "step": 13680, |
| "train_runtime": 125546.3023, |
| "train_tokens_per_second": 7139.876 |
| }, |
| { |
| "epoch": 3.6851988961432323, |
| "grad_norm": 0.9296875, |
| "learning_rate": 8.098335306833848e-07, |
| "loss": 3.9329, |
| "num_input_tokens_seen": 897040384, |
| "step": 13690, |
| "train_runtime": 125639.1859, |
| "train_tokens_per_second": 7139.814 |
| }, |
| { |
| "epoch": 3.6878912297233626, |
| "grad_norm": 0.9765625, |
| "learning_rate": 7.961346746675452e-07, |
| "loss": 3.8788, |
| "num_input_tokens_seen": 897695744, |
| "step": 13700, |
| "train_runtime": 125732.4529, |
| "train_tokens_per_second": 7139.73 |
| }, |
| { |
| "epoch": 3.6905835633034934, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.825507967882728e-07, |
| "loss": 3.8508, |
| "num_input_tokens_seen": 898351104, |
| "step": 13710, |
| "train_runtime": 125825.2377, |
| "train_tokens_per_second": 7139.673 |
| }, |
| { |
| "epoch": 3.693275896883624, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.690819615744582e-07, |
| "loss": 3.9309, |
| "num_input_tokens_seen": 899006464, |
| "step": 13720, |
| "train_runtime": 125918.1496, |
| "train_tokens_per_second": 7139.61 |
| }, |
| { |
| "epoch": 3.6959682304637544, |
| "grad_norm": 0.9375, |
| "learning_rate": 7.557282330085235e-07, |
| "loss": 3.8989, |
| "num_input_tokens_seen": 899661824, |
| "step": 13730, |
| "train_runtime": 126010.9722, |
| "train_tokens_per_second": 7139.551 |
| }, |
| { |
| "epoch": 3.6986605640438848, |
| "grad_norm": 0.9296875, |
| "learning_rate": 7.42489674526059e-07, |
| "loss": 3.9604, |
| "num_input_tokens_seen": 900317184, |
| "step": 13740, |
| "train_runtime": 126103.5634, |
| "train_tokens_per_second": 7139.506 |
| }, |
| { |
| "epoch": 3.7013528976240155, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.293663490155783e-07, |
| "loss": 3.8815, |
| "num_input_tokens_seen": 900972544, |
| "step": 13750, |
| "train_runtime": 126196.3334, |
| "train_tokens_per_second": 7139.451 |
| }, |
| { |
| "epoch": 3.7040452312041463, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.163583188181694e-07, |
| "loss": 3.9181, |
| "num_input_tokens_seen": 901627904, |
| "step": 13760, |
| "train_runtime": 126289.4151, |
| "train_tokens_per_second": 7139.378 |
| }, |
| { |
| "epoch": 3.7067375647842766, |
| "grad_norm": 0.921875, |
| "learning_rate": 7.034656457272332e-07, |
| "loss": 3.861, |
| "num_input_tokens_seen": 902283264, |
| "step": 13770, |
| "train_runtime": 126382.4093, |
| "train_tokens_per_second": 7139.311 |
| }, |
| { |
| "epoch": 3.7094298983644074, |
| "grad_norm": 0.93359375, |
| "learning_rate": 6.906883909881728e-07, |
| "loss": 3.8926, |
| "num_input_tokens_seen": 902938624, |
| "step": 13780, |
| "train_runtime": 126475.0511, |
| "train_tokens_per_second": 7139.263 |
| }, |
| { |
| "epoch": 3.712122231944538, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.780266152981107e-07, |
| "loss": 3.9374, |
| "num_input_tokens_seen": 903593984, |
| "step": 13790, |
| "train_runtime": 126568.1129, |
| "train_tokens_per_second": 7139.191 |
| }, |
| { |
| "epoch": 3.7148145655246685, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.654803788055968e-07, |
| "loss": 4.0054, |
| "num_input_tokens_seen": 904249344, |
| "step": 13800, |
| "train_runtime": 126661.0167, |
| "train_tokens_per_second": 7139.129 |
| }, |
| { |
| "epoch": 3.717506899104799, |
| "grad_norm": 0.9453125, |
| "learning_rate": 6.530497411103176e-07, |
| "loss": 3.8824, |
| "num_input_tokens_seen": 904904704, |
| "step": 13810, |
| "train_runtime": 126753.906, |
| "train_tokens_per_second": 7139.068 |
| }, |
| { |
| "epoch": 3.7201992326849296, |
| "grad_norm": 0.92578125, |
| "learning_rate": 6.407347612628234e-07, |
| "loss": 3.9466, |
| "num_input_tokens_seen": 905560064, |
| "step": 13820, |
| "train_runtime": 126847.0746, |
| "train_tokens_per_second": 7138.991 |
| }, |
| { |
| "epoch": 3.7228915662650603, |
| "grad_norm": 1.0, |
| "learning_rate": 6.285354977642516e-07, |
| "loss": 3.9317, |
| "num_input_tokens_seen": 906215424, |
| "step": 13830, |
| "train_runtime": 126939.4623, |
| "train_tokens_per_second": 7138.957 |
| }, |
| { |
| "epoch": 3.7255838998451907, |
| "grad_norm": 0.9140625, |
| "learning_rate": 6.164520085660208e-07, |
| "loss": 3.9766, |
| "num_input_tokens_seen": 906870784, |
| "step": 13840, |
| "train_runtime": 127032.893, |
| "train_tokens_per_second": 7138.866 |
| }, |
| { |
| "epoch": 3.7282762334253214, |
| "grad_norm": 0.91796875, |
| "learning_rate": 6.044843510695924e-07, |
| "loss": 3.8449, |
| "num_input_tokens_seen": 907526144, |
| "step": 13850, |
| "train_runtime": 127125.9777, |
| "train_tokens_per_second": 7138.794 |
| }, |
| { |
| "epoch": 3.730968567005452, |
| "grad_norm": 1.046875, |
| "learning_rate": 5.926325821261652e-07, |
| "loss": 4.0254, |
| "num_input_tokens_seen": 908181504, |
| "step": 13860, |
| "train_runtime": 127219.2626, |
| "train_tokens_per_second": 7138.711 |
| }, |
| { |
| "epoch": 3.7336609005855825, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.808967580364366e-07, |
| "loss": 3.9602, |
| "num_input_tokens_seen": 908836864, |
| "step": 13870, |
| "train_runtime": 127311.9513, |
| "train_tokens_per_second": 7138.661 |
| }, |
| { |
| "epoch": 3.736353234165713, |
| "grad_norm": 0.95703125, |
| "learning_rate": 5.692769345503057e-07, |
| "loss": 3.8487, |
| "num_input_tokens_seen": 909492224, |
| "step": 13880, |
| "train_runtime": 127405.1651, |
| "train_tokens_per_second": 7138.582 |
| }, |
| { |
| "epoch": 3.7390455677458436, |
| "grad_norm": 0.96875, |
| "learning_rate": 5.577731668666347e-07, |
| "loss": 3.9313, |
| "num_input_tokens_seen": 910147584, |
| "step": 13890, |
| "train_runtime": 127498.1611, |
| "train_tokens_per_second": 7138.515 |
| }, |
| { |
| "epoch": 3.7417379013259744, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.463855096329601e-07, |
| "loss": 3.9862, |
| "num_input_tokens_seen": 910802944, |
| "step": 13900, |
| "train_runtime": 127591.5238, |
| "train_tokens_per_second": 7138.428 |
| }, |
| { |
| "epoch": 3.7444302349061047, |
| "grad_norm": 0.9609375, |
| "learning_rate": 5.35114016945254e-07, |
| "loss": 3.8747, |
| "num_input_tokens_seen": 911458304, |
| "step": 13910, |
| "train_runtime": 127684.5276, |
| "train_tokens_per_second": 7138.361 |
| }, |
| { |
| "epoch": 3.7471225684862355, |
| "grad_norm": 0.90234375, |
| "learning_rate": 5.239587423476633e-07, |
| "loss": 3.8832, |
| "num_input_tokens_seen": 912113664, |
| "step": 13920, |
| "train_runtime": 127777.5312, |
| "train_tokens_per_second": 7138.295 |
| }, |
| { |
| "epoch": 3.7498149020663663, |
| "grad_norm": 1.0, |
| "learning_rate": 5.12919738832246e-07, |
| "loss": 3.9736, |
| "num_input_tokens_seen": 912769024, |
| "step": 13930, |
| "train_runtime": 127870.422, |
| "train_tokens_per_second": 7138.234 |
| }, |
| { |
| "epoch": 3.7525072356464966, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.019970588387213e-07, |
| "loss": 3.9561, |
| "num_input_tokens_seen": 913424384, |
| "step": 13940, |
| "train_runtime": 127963.6033, |
| "train_tokens_per_second": 7138.158 |
| }, |
| { |
| "epoch": 3.7551995692266273, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.911907542542449e-07, |
| "loss": 3.9202, |
| "num_input_tokens_seen": 914079744, |
| "step": 13950, |
| "train_runtime": 128056.3548, |
| "train_tokens_per_second": 7138.105 |
| }, |
| { |
| "epoch": 3.7578919028067577, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.805008764131147e-07, |
| "loss": 3.8571, |
| "num_input_tokens_seen": 914735104, |
| "step": 13960, |
| "train_runtime": 128149.0609, |
| "train_tokens_per_second": 7138.055 |
| }, |
| { |
| "epoch": 3.7605842363868884, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.699274760965794e-07, |
| "loss": 3.99, |
| "num_input_tokens_seen": 915390464, |
| "step": 13970, |
| "train_runtime": 128242.0726, |
| "train_tokens_per_second": 7137.989 |
| }, |
| { |
| "epoch": 3.7632765699670188, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.5947060353254967e-07, |
| "loss": 3.8639, |
| "num_input_tokens_seen": 916045824, |
| "step": 13980, |
| "train_runtime": 128335.1981, |
| "train_tokens_per_second": 7137.916 |
| }, |
| { |
| "epoch": 3.7659689035471495, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.4913030839540404e-07, |
| "loss": 3.9185, |
| "num_input_tokens_seen": 916701184, |
| "step": 13990, |
| "train_runtime": 128428.5155, |
| "train_tokens_per_second": 7137.832 |
| }, |
| { |
| "epoch": 3.7686612371272803, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.38906639805714e-07, |
| "loss": 3.9674, |
| "num_input_tokens_seen": 917356544, |
| "step": 14000, |
| "train_runtime": 128521.9811, |
| "train_tokens_per_second": 7137.74 |
| }, |
| { |
| "epoch": 3.7713535707074106, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.2879964633003867e-07, |
| "loss": 3.9561, |
| "num_input_tokens_seen": 918011904, |
| "step": 14010, |
| "train_runtime": 128632.4387, |
| "train_tokens_per_second": 7136.706 |
| }, |
| { |
| "epoch": 3.7740459042875414, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.188093759806805e-07, |
| "loss": 3.941, |
| "num_input_tokens_seen": 918667264, |
| "step": 14020, |
| "train_runtime": 128724.8013, |
| "train_tokens_per_second": 7136.676 |
| }, |
| { |
| "epoch": 3.7767382378676717, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.0893587621545493e-07, |
| "loss": 3.9297, |
| "num_input_tokens_seen": 919322624, |
| "step": 14030, |
| "train_runtime": 128818.3904, |
| "train_tokens_per_second": 7136.579 |
| }, |
| { |
| "epoch": 3.7794305714478025, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.9917919393747673e-07, |
| "loss": 3.9339, |
| "num_input_tokens_seen": 919977984, |
| "step": 14040, |
| "train_runtime": 128911.4414, |
| "train_tokens_per_second": 7136.512 |
| }, |
| { |
| "epoch": 3.782122905027933, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.895393754949267e-07, |
| "loss": 3.923, |
| "num_input_tokens_seen": 920633344, |
| "step": 14050, |
| "train_runtime": 129004.4875, |
| "train_tokens_per_second": 7136.444 |
| }, |
| { |
| "epoch": 3.7848152386080636, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.8001646668083537e-07, |
| "loss": 3.9791, |
| "num_input_tokens_seen": 921288704, |
| "step": 14060, |
| "train_runtime": 129097.3418, |
| "train_tokens_per_second": 7136.388 |
| }, |
| { |
| "epoch": 3.7875075721881943, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.706105127328663e-07, |
| "loss": 3.9704, |
| "num_input_tokens_seen": 921944064, |
| "step": 14070, |
| "train_runtime": 129190.3786, |
| "train_tokens_per_second": 7136.321 |
| }, |
| { |
| "epoch": 3.7901999057683247, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.613215583330998e-07, |
| "loss": 3.8051, |
| "num_input_tokens_seen": 922599424, |
| "step": 14080, |
| "train_runtime": 129283.3427, |
| "train_tokens_per_second": 7136.259 |
| }, |
| { |
| "epoch": 3.7928922393484554, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.521496476078245e-07, |
| "loss": 3.8645, |
| "num_input_tokens_seen": 923254784, |
| "step": 14090, |
| "train_runtime": 129376.1908, |
| "train_tokens_per_second": 7136.203 |
| }, |
| { |
| "epoch": 3.7955845729285858, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.4309482412731574e-07, |
| "loss": 3.9249, |
| "num_input_tokens_seen": 923910144, |
| "step": 14100, |
| "train_runtime": 129469.0328, |
| "train_tokens_per_second": 7136.148 |
| }, |
| { |
| "epoch": 3.7982769065087165, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.341571309056463e-07, |
| "loss": 3.8903, |
| "num_input_tokens_seen": 924565504, |
| "step": 14110, |
| "train_runtime": 129561.6755, |
| "train_tokens_per_second": 7136.103 |
| }, |
| { |
| "epoch": 3.800969240088847, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.253366104004646e-07, |
| "loss": 3.8958, |
| "num_input_tokens_seen": 925220864, |
| "step": 14120, |
| "train_runtime": 129654.5103, |
| "train_tokens_per_second": 7136.048 |
| }, |
| { |
| "epoch": 3.8036615736689776, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.1663330451281446e-07, |
| "loss": 3.8679, |
| "num_input_tokens_seen": 925876224, |
| "step": 14130, |
| "train_runtime": 129747.5313, |
| "train_tokens_per_second": 7135.983 |
| }, |
| { |
| "epoch": 3.8063539072491084, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.0804725458690177e-07, |
| "loss": 3.8652, |
| "num_input_tokens_seen": 926531584, |
| "step": 14140, |
| "train_runtime": 129840.1296, |
| "train_tokens_per_second": 7135.942 |
| }, |
| { |
| "epoch": 3.8090462408292387, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.9957850140994447e-07, |
| "loss": 3.9356, |
| "num_input_tokens_seen": 927186944, |
| "step": 14150, |
| "train_runtime": 129933.2246, |
| "train_tokens_per_second": 7135.873 |
| }, |
| { |
| "epoch": 3.8117385744093695, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.912270852119314e-07, |
| "loss": 3.9382, |
| "num_input_tokens_seen": 927842304, |
| "step": 14160, |
| "train_runtime": 130026.321, |
| "train_tokens_per_second": 7135.804 |
| }, |
| { |
| "epoch": 3.8144309079895, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.8299304566546667e-07, |
| "loss": 3.9925, |
| "num_input_tokens_seen": 928497664, |
| "step": 14170, |
| "train_runtime": 130118.8414, |
| "train_tokens_per_second": 7135.766 |
| }, |
| { |
| "epoch": 3.8171232415696306, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.748764218855643e-07, |
| "loss": 3.9259, |
| "num_input_tokens_seen": 929153024, |
| "step": 14180, |
| "train_runtime": 130211.9958, |
| "train_tokens_per_second": 7135.695 |
| }, |
| { |
| "epoch": 3.819815575149761, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.668772524294649e-07, |
| "loss": 3.9098, |
| "num_input_tokens_seen": 929808384, |
| "step": 14190, |
| "train_runtime": 130304.4538, |
| "train_tokens_per_second": 7135.661 |
| }, |
| { |
| "epoch": 3.8225079087298917, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.589955752964529e-07, |
| "loss": 3.9708, |
| "num_input_tokens_seen": 930463744, |
| "step": 14200, |
| "train_runtime": 130397.4744, |
| "train_tokens_per_second": 7135.596 |
| }, |
| { |
| "epoch": 3.8252002423100224, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.5123142792768117e-07, |
| "loss": 3.9448, |
| "num_input_tokens_seen": 931119104, |
| "step": 14210, |
| "train_runtime": 130489.9438, |
| "train_tokens_per_second": 7135.562 |
| }, |
| { |
| "epoch": 3.8278925758901527, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.435848472059826e-07, |
| "loss": 3.8709, |
| "num_input_tokens_seen": 931774464, |
| "step": 14220, |
| "train_runtime": 130582.7644, |
| "train_tokens_per_second": 7135.509 |
| }, |
| { |
| "epoch": 3.8305849094702835, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.3605586945570635e-07, |
| "loss": 4.0202, |
| "num_input_tokens_seen": 932429824, |
| "step": 14230, |
| "train_runtime": 130675.8246, |
| "train_tokens_per_second": 7135.442 |
| }, |
| { |
| "epoch": 3.833277243050414, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.286445304425372e-07, |
| "loss": 4.0128, |
| "num_input_tokens_seen": 933085184, |
| "step": 14240, |
| "train_runtime": 130768.5266, |
| "train_tokens_per_second": 7135.396 |
| }, |
| { |
| "epoch": 3.8359695766305446, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.2135086537332926e-07, |
| "loss": 3.8913, |
| "num_input_tokens_seen": 933740544, |
| "step": 14250, |
| "train_runtime": 130861.4831, |
| "train_tokens_per_second": 7135.335 |
| }, |
| { |
| "epoch": 3.838661910210675, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.1417490889593661e-07, |
| "loss": 3.9366, |
| "num_input_tokens_seen": 934395904, |
| "step": 14260, |
| "train_runtime": 130954.3688, |
| "train_tokens_per_second": 7135.279 |
| }, |
| { |
| "epoch": 3.8413542437908057, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.0711669509905218e-07, |
| "loss": 3.8326, |
| "num_input_tokens_seen": 935051264, |
| "step": 14270, |
| "train_runtime": 131047.4893, |
| "train_tokens_per_second": 7135.209 |
| }, |
| { |
| "epoch": 3.8440465773709365, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.0017625751204138e-07, |
| "loss": 3.9629, |
| "num_input_tokens_seen": 935706624, |
| "step": 14280, |
| "train_runtime": 131140.2652, |
| "train_tokens_per_second": 7135.159 |
| }, |
| { |
| "epoch": 3.846738910951067, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.933536291047866e-07, |
| "loss": 3.9492, |
| "num_input_tokens_seen": 936361984, |
| "step": 14290, |
| "train_runtime": 131233.5766, |
| "train_tokens_per_second": 7135.079 |
| }, |
| { |
| "epoch": 3.8494312445311976, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.86648842287529e-07, |
| "loss": 3.8845, |
| "num_input_tokens_seen": 937017344, |
| "step": 14300, |
| "train_runtime": 131326.1746, |
| "train_tokens_per_second": 7135.039 |
| }, |
| { |
| "epoch": 3.852123578111328, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.8006192891071581e-07, |
| "loss": 3.8817, |
| "num_input_tokens_seen": 937672704, |
| "step": 14310, |
| "train_runtime": 131419.3724, |
| "train_tokens_per_second": 7134.966 |
| }, |
| { |
| "epoch": 3.8548159116914587, |
| "grad_norm": 0.9140625, |
| "learning_rate": 1.735929202648423e-07, |
| "loss": 3.8734, |
| "num_input_tokens_seen": 938328064, |
| "step": 14320, |
| "train_runtime": 131512.5259, |
| "train_tokens_per_second": 7134.895 |
| }, |
| { |
| "epoch": 3.857508245271589, |
| "grad_norm": 1.0, |
| "learning_rate": 1.6724184708031276e-07, |
| "loss": 3.8662, |
| "num_input_tokens_seen": 938983424, |
| "step": 14330, |
| "train_runtime": 131605.8597, |
| "train_tokens_per_second": 7134.815 |
| }, |
| { |
| "epoch": 3.8602005788517197, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.6100873952729078e-07, |
| "loss": 4.0064, |
| "num_input_tokens_seen": 939638784, |
| "step": 14340, |
| "train_runtime": 131698.4257, |
| "train_tokens_per_second": 7134.776 |
| }, |
| { |
| "epoch": 3.8628929124318505, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.5489362721556044e-07, |
| "loss": 3.8501, |
| "num_input_tokens_seen": 940294144, |
| "step": 14350, |
| "train_runtime": 131790.8987, |
| "train_tokens_per_second": 7134.743 |
| }, |
| { |
| "epoch": 3.865585246011981, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.488965391943653e-07, |
| "loss": 4.0097, |
| "num_input_tokens_seen": 940949504, |
| "step": 14360, |
| "train_runtime": 131884.3248, |
| "train_tokens_per_second": 7134.658 |
| }, |
| { |
| "epoch": 3.8682775795921116, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.4301750395230296e-07, |
| "loss": 3.8275, |
| "num_input_tokens_seen": 941604864, |
| "step": 14370, |
| "train_runtime": 131977.2636, |
| "train_tokens_per_second": 7134.599 |
| }, |
| { |
| "epoch": 3.870969913172242, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.3725654941716127e-07, |
| "loss": 3.9239, |
| "num_input_tokens_seen": 942260224, |
| "step": 14380, |
| "train_runtime": 132070.4041, |
| "train_tokens_per_second": 7134.53 |
| }, |
| { |
| "epoch": 3.8736622467523727, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.3161370295580734e-07, |
| "loss": 3.8412, |
| "num_input_tokens_seen": 942915584, |
| "step": 14390, |
| "train_runtime": 132163.8532, |
| "train_tokens_per_second": 7134.444 |
| }, |
| { |
| "epoch": 3.876354580332503, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.2608899137403207e-07, |
| "loss": 3.9429, |
| "num_input_tokens_seen": 943570944, |
| "step": 14400, |
| "train_runtime": 132256.8675, |
| "train_tokens_per_second": 7134.381 |
| }, |
| { |
| "epoch": 3.879046913912634, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.2068244091645588e-07, |
| "loss": 4.0124, |
| "num_input_tokens_seen": 944226304, |
| "step": 14410, |
| "train_runtime": 132349.5777, |
| "train_tokens_per_second": 7134.336 |
| }, |
| { |
| "epoch": 3.8817392474927646, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.153940772663703e-07, |
| "loss": 3.8483, |
| "num_input_tokens_seen": 944881664, |
| "step": 14420, |
| "train_runtime": 132442.9328, |
| "train_tokens_per_second": 7134.255 |
| }, |
| { |
| "epoch": 3.884431581072895, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.1022392554564387e-07, |
| "loss": 3.8698, |
| "num_input_tokens_seen": 945537024, |
| "step": 14430, |
| "train_runtime": 132535.4318, |
| "train_tokens_per_second": 7134.221 |
| }, |
| { |
| "epoch": 3.8871239146530256, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.0517201031458312e-07, |
| "loss": 3.8573, |
| "num_input_tokens_seen": 946192384, |
| "step": 14440, |
| "train_runtime": 132628.1579, |
| "train_tokens_per_second": 7134.174 |
| }, |
| { |
| "epoch": 3.889816248233156, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.0023835557182448e-07, |
| "loss": 3.9935, |
| "num_input_tokens_seen": 946847744, |
| "step": 14450, |
| "train_runtime": 132721.5337, |
| "train_tokens_per_second": 7134.093 |
| }, |
| { |
| "epoch": 3.8925085818132867, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.542298475422318e-08, |
| "loss": 3.9391, |
| "num_input_tokens_seen": 947503104, |
| "step": 14460, |
| "train_runtime": 132814.2323, |
| "train_tokens_per_second": 7134.048 |
| }, |
| { |
| "epoch": 3.895200915393417, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.072592073673392e-08, |
| "loss": 3.9545, |
| "num_input_tokens_seen": 948158464, |
| "step": 14470, |
| "train_runtime": 132907.318, |
| "train_tokens_per_second": 7133.982 |
| }, |
| { |
| "epoch": 3.897893248973548, |
| "grad_norm": 0.9609375, |
| "learning_rate": 8.614718583230819e-08, |
| "loss": 3.9326, |
| "num_input_tokens_seen": 948813824, |
| "step": 14480, |
| "train_runtime": 133000.2245, |
| "train_tokens_per_second": 7133.926 |
| }, |
| { |
| "epoch": 3.9005855825536786, |
| "grad_norm": 0.9375, |
| "learning_rate": 8.168680179178879e-08, |
| "loss": 3.9629, |
| "num_input_tokens_seen": 949469184, |
| "step": 14490, |
| "train_runtime": 133092.8079, |
| "train_tokens_per_second": 7133.888 |
| }, |
| { |
| "epoch": 3.903277916133809, |
| "grad_norm": 0.9765625, |
| "learning_rate": 7.734478980379878e-08, |
| "loss": 3.8664, |
| "num_input_tokens_seen": 950124544, |
| "step": 14500, |
| "train_runtime": 133185.5074, |
| "train_tokens_per_second": 7133.843 |
| }, |
| { |
| "epoch": 3.9059702497139397, |
| "grad_norm": 0.953125, |
| "learning_rate": 7.312117049464995e-08, |
| "loss": 3.9334, |
| "num_input_tokens_seen": 950779904, |
| "step": 14510, |
| "train_runtime": 133295.6189, |
| "train_tokens_per_second": 7132.867 |
| }, |
| { |
| "epoch": 3.90866258329407, |
| "grad_norm": 0.91796875, |
| "learning_rate": 6.901596392824006e-08, |
| "loss": 3.9127, |
| "num_input_tokens_seen": 951435264, |
| "step": 14520, |
| "train_runtime": 133388.1264, |
| "train_tokens_per_second": 7132.833 |
| }, |
| { |
| "epoch": 3.911354916874201, |
| "grad_norm": 0.9609375, |
| "learning_rate": 6.502918960595849e-08, |
| "loss": 3.9548, |
| "num_input_tokens_seen": 952090624, |
| "step": 14530, |
| "train_runtime": 133480.8519, |
| "train_tokens_per_second": 7132.788 |
| }, |
| { |
| "epoch": 3.914047250454331, |
| "grad_norm": 0.9453125, |
| "learning_rate": 6.116086646659192e-08, |
| "loss": 3.9364, |
| "num_input_tokens_seen": 952745984, |
| "step": 14540, |
| "train_runtime": 133574.0035, |
| "train_tokens_per_second": 7132.72 |
| }, |
| { |
| "epoch": 3.916739584034462, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.741101288623818e-08, |
| "loss": 3.9602, |
| "num_input_tokens_seen": 953401344, |
| "step": 14550, |
| "train_runtime": 133666.7795, |
| "train_tokens_per_second": 7132.672 |
| }, |
| { |
| "epoch": 3.9194319176145926, |
| "grad_norm": 0.91015625, |
| "learning_rate": 5.377964667822033e-08, |
| "loss": 3.9444, |
| "num_input_tokens_seen": 954056704, |
| "step": 14560, |
| "train_runtime": 133758.9615, |
| "train_tokens_per_second": 7132.656 |
| }, |
| { |
| "epoch": 3.922124251194723, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.026678509298943e-08, |
| "loss": 3.8375, |
| "num_input_tokens_seen": 954712064, |
| "step": 14570, |
| "train_runtime": 133852.3351, |
| "train_tokens_per_second": 7132.577 |
| }, |
| { |
| "epoch": 3.9248165847748537, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.687244481806075e-08, |
| "loss": 4.0509, |
| "num_input_tokens_seen": 955367424, |
| "step": 14580, |
| "train_runtime": 133945.6488, |
| "train_tokens_per_second": 7132.501 |
| }, |
| { |
| "epoch": 3.927508918354984, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.3596641977916575e-08, |
| "loss": 3.8624, |
| "num_input_tokens_seen": 956022784, |
| "step": 14590, |
| "train_runtime": 134038.3984, |
| "train_tokens_per_second": 7132.455 |
| }, |
| { |
| "epoch": 3.930201251935115, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.043939213395076e-08, |
| "loss": 3.9485, |
| "num_input_tokens_seen": 956678144, |
| "step": 14600, |
| "train_runtime": 134131.6976, |
| "train_tokens_per_second": 7132.379 |
| }, |
| { |
| "epoch": 3.932893585515245, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.740071028436876e-08, |
| "loss": 3.922, |
| "num_input_tokens_seen": 957333504, |
| "step": 14610, |
| "train_runtime": 134224.6729, |
| "train_tokens_per_second": 7132.321 |
| }, |
| { |
| "epoch": 3.935585919095376, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.448061086414045e-08, |
| "loss": 3.9343, |
| "num_input_tokens_seen": 957988864, |
| "step": 14620, |
| "train_runtime": 134317.4462, |
| "train_tokens_per_second": 7132.274 |
| }, |
| { |
| "epoch": 3.9382782526755067, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.167910774491412e-08, |
| "loss": 3.9556, |
| "num_input_tokens_seen": 958644224, |
| "step": 14630, |
| "train_runtime": 134410.7699, |
| "train_tokens_per_second": 7132.198 |
| }, |
| { |
| "epoch": 3.940970586255637, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.8996214234966456e-08, |
| "loss": 3.9511, |
| "num_input_tokens_seen": 959299584, |
| "step": 14640, |
| "train_runtime": 134503.2588, |
| "train_tokens_per_second": 7132.166 |
| }, |
| { |
| "epoch": 3.9436629198357678, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.6431943079122112e-08, |
| "loss": 3.9414, |
| "num_input_tokens_seen": 959954944, |
| "step": 14650, |
| "train_runtime": 134596.4574, |
| "train_tokens_per_second": 7132.097 |
| }, |
| { |
| "epoch": 3.946355253415898, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.39863064587037e-08, |
| "loss": 4.0138, |
| "num_input_tokens_seen": 960610304, |
| "step": 14660, |
| "train_runtime": 134689.6475, |
| "train_tokens_per_second": 7132.028 |
| }, |
| { |
| "epoch": 3.949047586996029, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.165931599147353e-08, |
| "loss": 3.9296, |
| "num_input_tokens_seen": 961265664, |
| "step": 14670, |
| "train_runtime": 134782.3868, |
| "train_tokens_per_second": 7131.983 |
| }, |
| { |
| "epoch": 3.951739920576159, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.9450982731578082e-08, |
| "loss": 3.9363, |
| "num_input_tokens_seen": 961921024, |
| "step": 14680, |
| "train_runtime": 134875.6297, |
| "train_tokens_per_second": 7131.911 |
| }, |
| { |
| "epoch": 3.95443225415629, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.7361317169492518e-08, |
| "loss": 3.955, |
| "num_input_tokens_seen": 962576384, |
| "step": 14690, |
| "train_runtime": 134968.5816, |
| "train_tokens_per_second": 7131.855 |
| }, |
| { |
| "epoch": 3.9571245877364207, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.5390329231970703e-08, |
| "loss": 3.8801, |
| "num_input_tokens_seen": 963231744, |
| "step": 14700, |
| "train_runtime": 135061.2815, |
| "train_tokens_per_second": 7131.813 |
| }, |
| { |
| "epoch": 3.959816921316551, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.3538028282000792e-08, |
| "loss": 3.9704, |
| "num_input_tokens_seen": 963887104, |
| "step": 14710, |
| "train_runtime": 135154.3462, |
| "train_tokens_per_second": 7131.751 |
| }, |
| { |
| "epoch": 3.962509254896682, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.1804423118760844e-08, |
| "loss": 3.8934, |
| "num_input_tokens_seen": 964542464, |
| "step": 14720, |
| "train_runtime": 135247.3859, |
| "train_tokens_per_second": 7131.69 |
| }, |
| { |
| "epoch": 3.965201588476812, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.0189521977577166e-08, |
| "loss": 4.0073, |
| "num_input_tokens_seen": 965197824, |
| "step": 14730, |
| "train_runtime": 135340.1518, |
| "train_tokens_per_second": 7131.644 |
| }, |
| { |
| "epoch": 3.967893922056943, |
| "grad_norm": 0.921875, |
| "learning_rate": 8.693332529879917e-09, |
| "loss": 3.9742, |
| "num_input_tokens_seen": 965853184, |
| "step": 14740, |
| "train_runtime": 135433.0047, |
| "train_tokens_per_second": 7131.594 |
| }, |
| { |
| "epoch": 3.9705862556370732, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.3158618831781215e-09, |
| "loss": 3.8715, |
| "num_input_tokens_seen": 966508544, |
| "step": 14750, |
| "train_runtime": 135526.1675, |
| "train_tokens_per_second": 7131.527 |
| }, |
| { |
| "epoch": 3.973278589217204, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.057116581006939e-09, |
| "loss": 3.9439, |
| "num_input_tokens_seen": 967163904, |
| "step": 14760, |
| "train_runtime": 135619.1962, |
| "train_tokens_per_second": 7131.468 |
| }, |
| { |
| "epoch": 3.9759709227973348, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.917102602922108e-09, |
| "loss": 3.858, |
| "num_input_tokens_seen": 967819264, |
| "step": 14770, |
| "train_runtime": 135711.9871, |
| "train_tokens_per_second": 7131.421 |
| }, |
| { |
| "epoch": 3.978663256377465, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.895825364444438e-09, |
| "loss": 3.9277, |
| "num_input_tokens_seen": 968474624, |
| "step": 14780, |
| "train_runtime": 135804.7135, |
| "train_tokens_per_second": 7131.377 |
| }, |
| { |
| "epoch": 3.981355589957596, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.9932897170542594e-09, |
| "loss": 3.8246, |
| "num_input_tokens_seen": 969129984, |
| "step": 14790, |
| "train_runtime": 135897.7934, |
| "train_tokens_per_second": 7131.315 |
| }, |
| { |
| "epoch": 3.984047923537726, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.2094999481664382e-09, |
| "loss": 4.0439, |
| "num_input_tokens_seen": 969785344, |
| "step": 14800, |
| "train_runtime": 135990.5442, |
| "train_tokens_per_second": 7131.27 |
| }, |
| { |
| "epoch": 3.986740257117857, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.5444597810915228e-09, |
| "loss": 3.9585, |
| "num_input_tokens_seen": 970440704, |
| "step": 14810, |
| "train_runtime": 136083.8449, |
| "train_tokens_per_second": 7131.197 |
| }, |
| { |
| "epoch": 3.9894325906979873, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.981723750412951e-10, |
| "loss": 3.8607, |
| "num_input_tokens_seen": 971096064, |
| "step": 14820, |
| "train_runtime": 136176.9057, |
| "train_tokens_per_second": 7131.136 |
| }, |
| { |
| "epoch": 3.992124924278118, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.706403251037884e-10, |
| "loss": 3.9295, |
| "num_input_tokens_seen": 971751424, |
| "step": 14830, |
| "train_runtime": 136269.7783, |
| "train_tokens_per_second": 7131.085 |
| }, |
| { |
| "epoch": 3.994817257858249, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.6186566222663465e-10, |
| "loss": 3.9395, |
| "num_input_tokens_seen": 972406784, |
| "step": 14840, |
| "train_runtime": 136362.3434, |
| "train_tokens_per_second": 7131.051 |
| }, |
| { |
| "epoch": 3.997509591438379, |
| "grad_norm": 0.89453125, |
| "learning_rate": 7.184985321706527e-11, |
| "loss": 3.9461, |
| "num_input_tokens_seen": 973062144, |
| "step": 14850, |
| "train_runtime": 136455.5477, |
| "train_tokens_per_second": 7130.983 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.96875, |
| "learning_rate": 5.938007224814967e-13, |
| "loss": 3.9392, |
| "num_input_tokens_seen": 973668352, |
| "step": 14860, |
| "train_runtime": 136541.371, |
| "train_tokens_per_second": 7130.94 |
| }, |
| { |
| "epoch": 4.0, |
| "num_input_tokens_seen": 973668352, |
| "step": 14860, |
| "total_flos": 2.1227013472184697e+19, |
| "train_loss": 4.094350151065862, |
| "train_runtime": 136571.1716, |
| "train_samples_per_second": 3.481, |
| "train_steps_per_second": 0.109, |
| "train_tokens_per_second": 7129.384 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 14860, |
| "num_input_tokens_seen": 973668352, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1227013472184697e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|