| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1642, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0060901339829476245, | |
| "grad_norm": 17.5, | |
| "learning_rate": 4.9999267881610354e-05, | |
| "loss": 6.1919, | |
| "num_input_tokens_seen": 655360, | |
| "step": 5, | |
| "train_runtime": 22.3092, | |
| "train_tokens_per_second": 29376.191 | |
| }, | |
| { | |
| "epoch": 0.012180267965895249, | |
| "grad_norm": 3.9375, | |
| "learning_rate": 4.9996293724142536e-05, | |
| "loss": 4.6261, | |
| "num_input_tokens_seen": 1310720, | |
| "step": 10, | |
| "train_runtime": 44.0485, | |
| "train_tokens_per_second": 29756.318 | |
| }, | |
| { | |
| "epoch": 0.018270401948842874, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 4.9991032042166476e-05, | |
| "loss": 4.0276, | |
| "num_input_tokens_seen": 1966080, | |
| "step": 15, | |
| "train_runtime": 65.7637, | |
| "train_tokens_per_second": 29896.111 | |
| }, | |
| { | |
| "epoch": 0.024360535931790498, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 4.998348331720263e-05, | |
| "loss": 3.5105, | |
| "num_input_tokens_seen": 2621440, | |
| "step": 20, | |
| "train_runtime": 87.5389, | |
| "train_tokens_per_second": 29945.994 | |
| }, | |
| { | |
| "epoch": 0.030450669914738125, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 4.997364824006915e-05, | |
| "loss": 2.9731, | |
| "num_input_tokens_seen": 3276800, | |
| "step": 25, | |
| "train_runtime": 109.5185, | |
| "train_tokens_per_second": 29920.05 | |
| }, | |
| { | |
| "epoch": 0.03654080389768575, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 4.996152771081866e-05, | |
| "loss": 2.5386, | |
| "num_input_tokens_seen": 3932160, | |
| "step": 30, | |
| "train_runtime": 131.3684, | |
| "train_tokens_per_second": 29932.308 | |
| }, | |
| { | |
| "epoch": 0.04263093788063337, | |
| "grad_norm": 4.0625, | |
| "learning_rate": 4.9947122838655915e-05, | |
| "loss": 2.1857, | |
| "num_input_tokens_seen": 4587520, | |
| "step": 35, | |
| "train_runtime": 153.1526, | |
| "train_tokens_per_second": 29953.909 | |
| }, | |
| { | |
| "epoch": 0.048721071863580996, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 4.993043494183627e-05, | |
| "loss": 1.9064, | |
| "num_input_tokens_seen": 5242880, | |
| "step": 40, | |
| "train_runtime": 174.9243, | |
| "train_tokens_per_second": 29972.285 | |
| }, | |
| { | |
| "epoch": 0.05481120584652863, | |
| "grad_norm": 4.0, | |
| "learning_rate": 4.9911465547545044e-05, | |
| "loss": 1.7284, | |
| "num_input_tokens_seen": 5898240, | |
| "step": 45, | |
| "train_runtime": 196.7433, | |
| "train_tokens_per_second": 29979.364 | |
| }, | |
| { | |
| "epoch": 0.06090133982947625, | |
| "grad_norm": 3.453125, | |
| "learning_rate": 4.989021639175778e-05, | |
| "loss": 1.5783, | |
| "num_input_tokens_seen": 6553600, | |
| "step": 50, | |
| "train_runtime": 218.5219, | |
| "train_tokens_per_second": 29990.583 | |
| }, | |
| { | |
| "epoch": 0.06699147381242387, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 4.986668941908136e-05, | |
| "loss": 1.4457, | |
| "num_input_tokens_seen": 7208960, | |
| "step": 55, | |
| "train_runtime": 240.2767, | |
| "train_tokens_per_second": 30002.749 | |
| }, | |
| { | |
| "epoch": 0.0730816077953715, | |
| "grad_norm": 2.6875, | |
| "learning_rate": 4.9840886782576024e-05, | |
| "loss": 1.3575, | |
| "num_input_tokens_seen": 7864320, | |
| "step": 60, | |
| "train_runtime": 262.1559, | |
| "train_tokens_per_second": 29998.643 | |
| }, | |
| { | |
| "epoch": 0.07917174177831912, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 4.981281084355839e-05, | |
| "loss": 1.2639, | |
| "num_input_tokens_seen": 8519680, | |
| "step": 65, | |
| "train_runtime": 283.9622, | |
| "train_tokens_per_second": 30002.866 | |
| }, | |
| { | |
| "epoch": 0.08526187576126674, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 4.97824641713853e-05, | |
| "loss": 1.2196, | |
| "num_input_tokens_seen": 9175040, | |
| "step": 70, | |
| "train_runtime": 305.7342, | |
| "train_tokens_per_second": 30009.862 | |
| }, | |
| { | |
| "epoch": 0.09135200974421437, | |
| "grad_norm": 2.25, | |
| "learning_rate": 4.974984954321873e-05, | |
| "loss": 1.1541, | |
| "num_input_tokens_seen": 9830400, | |
| "step": 75, | |
| "train_runtime": 327.4637, | |
| "train_tokens_per_second": 30019.82 | |
| }, | |
| { | |
| "epoch": 0.09744214372716199, | |
| "grad_norm": 2.3125, | |
| "learning_rate": 4.971496994377163e-05, | |
| "loss": 1.1022, | |
| "num_input_tokens_seen": 10485760, | |
| "step": 80, | |
| "train_runtime": 349.2345, | |
| "train_tokens_per_second": 30024.981 | |
| }, | |
| { | |
| "epoch": 0.10353227771010962, | |
| "grad_norm": 1.9140625, | |
| "learning_rate": 4.967782856503473e-05, | |
| "loss": 1.0584, | |
| "num_input_tokens_seen": 11141120, | |
| "step": 85, | |
| "train_runtime": 370.9965, | |
| "train_tokens_per_second": 30030.259 | |
| }, | |
| { | |
| "epoch": 0.10962241169305725, | |
| "grad_norm": 2.125, | |
| "learning_rate": 4.963842880598453e-05, | |
| "loss": 1.0431, | |
| "num_input_tokens_seen": 11796480, | |
| "step": 90, | |
| "train_runtime": 392.6965, | |
| "train_tokens_per_second": 30039.688 | |
| }, | |
| { | |
| "epoch": 0.11571254567600488, | |
| "grad_norm": 1.9375, | |
| "learning_rate": 4.9596774272272115e-05, | |
| "loss": 0.9951, | |
| "num_input_tokens_seen": 12451840, | |
| "step": 95, | |
| "train_runtime": 414.5171, | |
| "train_tokens_per_second": 30039.386 | |
| }, | |
| { | |
| "epoch": 0.1218026796589525, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 4.955286877589331e-05, | |
| "loss": 0.9762, | |
| "num_input_tokens_seen": 13107200, | |
| "step": 100, | |
| "train_runtime": 436.3192, | |
| "train_tokens_per_second": 30040.394 | |
| }, | |
| { | |
| "epoch": 0.1278928136419001, | |
| "grad_norm": 1.75, | |
| "learning_rate": 4.9506716334839756e-05, | |
| "loss": 0.9444, | |
| "num_input_tokens_seen": 13762560, | |
| "step": 105, | |
| "train_runtime": 458.1573, | |
| "train_tokens_per_second": 30038.943 | |
| }, | |
| { | |
| "epoch": 0.13398294762484775, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 4.945832117273118e-05, | |
| "loss": 0.9425, | |
| "num_input_tokens_seen": 14417920, | |
| "step": 110, | |
| "train_runtime": 479.9278, | |
| "train_tokens_per_second": 30041.851 | |
| }, | |
| { | |
| "epoch": 0.14007308160779536, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 4.940768771842896e-05, | |
| "loss": 0.907, | |
| "num_input_tokens_seen": 15073280, | |
| "step": 115, | |
| "train_runtime": 501.7348, | |
| "train_tokens_per_second": 30042.328 | |
| }, | |
| { | |
| "epoch": 0.146163215590743, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 4.9354820605630745e-05, | |
| "loss": 0.8877, | |
| "num_input_tokens_seen": 15728640, | |
| "step": 120, | |
| "train_runtime": 523.4795, | |
| "train_tokens_per_second": 30046.335 | |
| }, | |
| { | |
| "epoch": 0.15225334957369063, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 4.929972467244645e-05, | |
| "loss": 0.9025, | |
| "num_input_tokens_seen": 16384000, | |
| "step": 125, | |
| "train_runtime": 545.2706, | |
| "train_tokens_per_second": 30047.468 | |
| }, | |
| { | |
| "epoch": 0.15834348355663824, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 4.9242404960955456e-05, | |
| "loss": 0.8531, | |
| "num_input_tokens_seen": 17039360, | |
| "step": 130, | |
| "train_runtime": 567.0521, | |
| "train_tokens_per_second": 30049.021 | |
| }, | |
| { | |
| "epoch": 0.16443361753958588, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.918286671674523e-05, | |
| "loss": 0.8443, | |
| "num_input_tokens_seen": 17694720, | |
| "step": 135, | |
| "train_runtime": 588.838, | |
| "train_tokens_per_second": 30050.236 | |
| }, | |
| { | |
| "epoch": 0.1705237515225335, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.912111538843124e-05, | |
| "loss": 0.8392, | |
| "num_input_tokens_seen": 18350080, | |
| "step": 140, | |
| "train_runtime": 610.6656, | |
| "train_tokens_per_second": 30049.309 | |
| }, | |
| { | |
| "epoch": 0.17661388550548113, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.905715662715835e-05, | |
| "loss": 0.8256, | |
| "num_input_tokens_seen": 19005440, | |
| "step": 145, | |
| "train_runtime": 632.4494, | |
| "train_tokens_per_second": 30050.53 | |
| }, | |
| { | |
| "epoch": 0.18270401948842874, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.899099628608365e-05, | |
| "loss": 0.819, | |
| "num_input_tokens_seen": 19660800, | |
| "step": 150, | |
| "train_runtime": 654.24, | |
| "train_tokens_per_second": 30051.355 | |
| }, | |
| { | |
| "epoch": 0.18879415347137637, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.8922640419840826e-05, | |
| "loss": 0.8083, | |
| "num_input_tokens_seen": 20316160, | |
| "step": 155, | |
| "train_runtime": 675.9786, | |
| "train_tokens_per_second": 30054.443 | |
| }, | |
| { | |
| "epoch": 0.19488428745432398, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 4.885209528398603e-05, | |
| "loss": 0.7974, | |
| "num_input_tokens_seen": 20971520, | |
| "step": 160, | |
| "train_runtime": 697.7598, | |
| "train_tokens_per_second": 30055.5 | |
| }, | |
| { | |
| "epoch": 0.20097442143727162, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 4.8779367334425466e-05, | |
| "loss": 0.7856, | |
| "num_input_tokens_seen": 21626880, | |
| "step": 165, | |
| "train_runtime": 719.5124, | |
| "train_tokens_per_second": 30057.687 | |
| }, | |
| { | |
| "epoch": 0.20706455542021923, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.87044632268245e-05, | |
| "loss": 0.7741, | |
| "num_input_tokens_seen": 22282240, | |
| "step": 170, | |
| "train_runtime": 741.2635, | |
| "train_tokens_per_second": 30059.812 | |
| }, | |
| { | |
| "epoch": 0.21315468940316687, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.8627389815998654e-05, | |
| "loss": 0.762, | |
| "num_input_tokens_seen": 22937600, | |
| "step": 175, | |
| "train_runtime": 763.0585, | |
| "train_tokens_per_second": 30060.082 | |
| }, | |
| { | |
| "epoch": 0.2192448233861145, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.854815415528624e-05, | |
| "loss": 0.7554, | |
| "num_input_tokens_seen": 23592960, | |
| "step": 180, | |
| "train_runtime": 784.8921, | |
| "train_tokens_per_second": 30058.857 | |
| }, | |
| { | |
| "epoch": 0.22533495736906212, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 4.8466763495902886e-05, | |
| "loss": 0.7566, | |
| "num_input_tokens_seen": 24248320, | |
| "step": 185, | |
| "train_runtime": 806.6931, | |
| "train_tokens_per_second": 30058.917 | |
| }, | |
| { | |
| "epoch": 0.23142509135200975, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 4.838322528627796e-05, | |
| "loss": 0.7454, | |
| "num_input_tokens_seen": 24903680, | |
| "step": 190, | |
| "train_runtime": 828.4828, | |
| "train_tokens_per_second": 30059.381 | |
| }, | |
| { | |
| "epoch": 0.23751522533495736, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.829754717137291e-05, | |
| "loss": 0.7475, | |
| "num_input_tokens_seen": 25559040, | |
| "step": 195, | |
| "train_runtime": 850.3012, | |
| "train_tokens_per_second": 30058.808 | |
| }, | |
| { | |
| "epoch": 0.243605359317905, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.820973699198164e-05, | |
| "loss": 0.7259, | |
| "num_input_tokens_seen": 26214400, | |
| "step": 200, | |
| "train_runtime": 872.1297, | |
| "train_tokens_per_second": 30057.916 | |
| }, | |
| { | |
| "epoch": 0.2496954933008526, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.811980278401299e-05, | |
| "loss": 0.7284, | |
| "num_input_tokens_seen": 26869760, | |
| "step": 205, | |
| "train_runtime": 893.9692, | |
| "train_tokens_per_second": 30056.694 | |
| }, | |
| { | |
| "epoch": 0.2557856272838002, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 4.802775277775529e-05, | |
| "loss": 0.7169, | |
| "num_input_tokens_seen": 27525120, | |
| "step": 210, | |
| "train_runtime": 915.7965, | |
| "train_tokens_per_second": 30055.935 | |
| }, | |
| { | |
| "epoch": 0.2618757612667479, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.793359539712322e-05, | |
| "loss": 0.7164, | |
| "num_input_tokens_seen": 28180480, | |
| "step": 215, | |
| "train_runtime": 937.6276, | |
| "train_tokens_per_second": 30055.088 | |
| }, | |
| { | |
| "epoch": 0.2679658952496955, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.783733925888685e-05, | |
| "loss": 0.7133, | |
| "num_input_tokens_seen": 28835840, | |
| "step": 220, | |
| "train_runtime": 959.4192, | |
| "train_tokens_per_second": 30055.516 | |
| }, | |
| { | |
| "epoch": 0.2740560292326431, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.773899317188311e-05, | |
| "loss": 0.7116, | |
| "num_input_tokens_seen": 29491200, | |
| "step": 225, | |
| "train_runtime": 981.209, | |
| "train_tokens_per_second": 30055.981 | |
| }, | |
| { | |
| "epoch": 0.2801461632155907, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.763856613620965e-05, | |
| "loss": 0.7029, | |
| "num_input_tokens_seen": 30146560, | |
| "step": 230, | |
| "train_runtime": 1002.9822, | |
| "train_tokens_per_second": 30056.926 | |
| }, | |
| { | |
| "epoch": 0.2862362971985384, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 4.7536067342401194e-05, | |
| "loss": 0.6875, | |
| "num_input_tokens_seen": 30801920, | |
| "step": 235, | |
| "train_runtime": 1024.7415, | |
| "train_tokens_per_second": 30058.234 | |
| }, | |
| { | |
| "epoch": 0.292326431181486, | |
| "grad_norm": 1.625, | |
| "learning_rate": 4.7431506170588456e-05, | |
| "loss": 0.6949, | |
| "num_input_tokens_seen": 31457280, | |
| "step": 240, | |
| "train_runtime": 1046.5332, | |
| "train_tokens_per_second": 30058.558 | |
| }, | |
| { | |
| "epoch": 0.2984165651644336, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.732489218963978e-05, | |
| "loss": 0.6828, | |
| "num_input_tokens_seen": 32112640, | |
| "step": 245, | |
| "train_runtime": 1068.248, | |
| "train_tokens_per_second": 30061.035 | |
| }, | |
| { | |
| "epoch": 0.30450669914738127, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.721623515628537e-05, | |
| "loss": 0.6958, | |
| "num_input_tokens_seen": 32768000, | |
| "step": 250, | |
| "train_runtime": 1090.0205, | |
| "train_tokens_per_second": 30061.819 | |
| }, | |
| { | |
| "epoch": 0.3105968331303289, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.710554501422447e-05, | |
| "loss": 0.6947, | |
| "num_input_tokens_seen": 33423360, | |
| "step": 255, | |
| "train_runtime": 1111.7983, | |
| "train_tokens_per_second": 30062.432 | |
| }, | |
| { | |
| "epoch": 0.3166869671132765, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.6992831893215325e-05, | |
| "loss": 0.6836, | |
| "num_input_tokens_seen": 34078720, | |
| "step": 260, | |
| "train_runtime": 1133.6327, | |
| "train_tokens_per_second": 30061.518 | |
| }, | |
| { | |
| "epoch": 0.3227771010962241, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 4.6878106108148215e-05, | |
| "loss": 0.6701, | |
| "num_input_tokens_seen": 34734080, | |
| "step": 265, | |
| "train_runtime": 1155.4501, | |
| "train_tokens_per_second": 30061.081 | |
| }, | |
| { | |
| "epoch": 0.32886723507917176, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.676137815810142e-05, | |
| "loss": 0.6729, | |
| "num_input_tokens_seen": 35389440, | |
| "step": 270, | |
| "train_runtime": 1177.285, | |
| "train_tokens_per_second": 30060.214 | |
| }, | |
| { | |
| "epoch": 0.33495736906211937, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.664265872538048e-05, | |
| "loss": 0.6687, | |
| "num_input_tokens_seen": 36044800, | |
| "step": 275, | |
| "train_runtime": 1199.1576, | |
| "train_tokens_per_second": 30058.434 | |
| }, | |
| { | |
| "epoch": 0.341047503045067, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.6521958674540554e-05, | |
| "loss": 0.669, | |
| "num_input_tokens_seen": 36700160, | |
| "step": 280, | |
| "train_runtime": 1220.9936, | |
| "train_tokens_per_second": 30057.62 | |
| }, | |
| { | |
| "epoch": 0.3471376370280146, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.639928905139216e-05, | |
| "loss": 0.6637, | |
| "num_input_tokens_seen": 37355520, | |
| "step": 285, | |
| "train_runtime": 1242.9648, | |
| "train_tokens_per_second": 30053.563 | |
| }, | |
| { | |
| "epoch": 0.35322777101096225, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.627466108199037e-05, | |
| "loss": 0.659, | |
| "num_input_tokens_seen": 38010880, | |
| "step": 290, | |
| "train_runtime": 1264.7781, | |
| "train_tokens_per_second": 30053.398 | |
| }, | |
| { | |
| "epoch": 0.35931790499390986, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.614808617160737e-05, | |
| "loss": 0.6573, | |
| "num_input_tokens_seen": 38666240, | |
| "step": 295, | |
| "train_runtime": 1286.5591, | |
| "train_tokens_per_second": 30053.995 | |
| }, | |
| { | |
| "epoch": 0.3654080389768575, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.601957590368884e-05, | |
| "loss": 0.6545, | |
| "num_input_tokens_seen": 39321600, | |
| "step": 300, | |
| "train_runtime": 1308.3044, | |
| "train_tokens_per_second": 30055.39 | |
| }, | |
| { | |
| "epoch": 0.37149817295980514, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.5889142038793766e-05, | |
| "loss": 0.6364, | |
| "num_input_tokens_seen": 39976960, | |
| "step": 305, | |
| "train_runtime": 1330.1444, | |
| "train_tokens_per_second": 30054.601 | |
| }, | |
| { | |
| "epoch": 0.37758830694275275, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.5756796513518276e-05, | |
| "loss": 0.6487, | |
| "num_input_tokens_seen": 40632320, | |
| "step": 310, | |
| "train_runtime": 1351.8973, | |
| "train_tokens_per_second": 30055.773 | |
| }, | |
| { | |
| "epoch": 0.38367844092570036, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.5622551439403226e-05, | |
| "loss": 0.6375, | |
| "num_input_tokens_seen": 41287680, | |
| "step": 315, | |
| "train_runtime": 1373.641, | |
| "train_tokens_per_second": 30057.111 | |
| }, | |
| { | |
| "epoch": 0.38976857490864797, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.548641910182582e-05, | |
| "loss": 0.6449, | |
| "num_input_tokens_seen": 41943040, | |
| "step": 320, | |
| "train_runtime": 1395.3852, | |
| "train_tokens_per_second": 30058.395 | |
| }, | |
| { | |
| "epoch": 0.39585870889159563, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.534841195887531e-05, | |
| "loss": 0.6377, | |
| "num_input_tokens_seen": 42598400, | |
| "step": 325, | |
| "train_runtime": 1417.1617, | |
| "train_tokens_per_second": 30058.955 | |
| }, | |
| { | |
| "epoch": 0.40194884287454324, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.520854264021296e-05, | |
| "loss": 0.6312, | |
| "num_input_tokens_seen": 43253760, | |
| "step": 330, | |
| "train_runtime": 1439.0046, | |
| "train_tokens_per_second": 30058.111 | |
| }, | |
| { | |
| "epoch": 0.40803897685749085, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.506682394591614e-05, | |
| "loss": 0.625, | |
| "num_input_tokens_seen": 43909120, | |
| "step": 335, | |
| "train_runtime": 1460.7409, | |
| "train_tokens_per_second": 30059.485 | |
| }, | |
| { | |
| "epoch": 0.41412911084043846, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.492326884530705e-05, | |
| "loss": 0.6168, | |
| "num_input_tokens_seen": 44564480, | |
| "step": 340, | |
| "train_runtime": 1482.5072, | |
| "train_tokens_per_second": 30060.212 | |
| }, | |
| { | |
| "epoch": 0.42021924482338613, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.477789047576574e-05, | |
| "loss": 0.6228, | |
| "num_input_tokens_seen": 45219840, | |
| "step": 345, | |
| "train_runtime": 1504.2816, | |
| "train_tokens_per_second": 30060.754 | |
| }, | |
| { | |
| "epoch": 0.42630937880633374, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 4.463070214152791e-05, | |
| "loss": 0.62, | |
| "num_input_tokens_seen": 45875200, | |
| "step": 350, | |
| "train_runtime": 1526.0478, | |
| "train_tokens_per_second": 30061.443 | |
| }, | |
| { | |
| "epoch": 0.43239951278928135, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.448171731246736e-05, | |
| "loss": 0.625, | |
| "num_input_tokens_seen": 46530560, | |
| "step": 355, | |
| "train_runtime": 1547.7838, | |
| "train_tokens_per_second": 30062.7 | |
| }, | |
| { | |
| "epoch": 0.438489646772229, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.4330949622863306e-05, | |
| "loss": 0.6146, | |
| "num_input_tokens_seen": 47185920, | |
| "step": 360, | |
| "train_runtime": 1569.5543, | |
| "train_tokens_per_second": 30063.261 | |
| }, | |
| { | |
| "epoch": 0.4445797807551766, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.417841287015263e-05, | |
| "loss": 0.6044, | |
| "num_input_tokens_seen": 47841280, | |
| "step": 365, | |
| "train_runtime": 1591.3459, | |
| "train_tokens_per_second": 30063.407 | |
| }, | |
| { | |
| "epoch": 0.45066991473812423, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.402412101366722e-05, | |
| "loss": 0.6129, | |
| "num_input_tokens_seen": 48496640, | |
| "step": 370, | |
| "train_runtime": 1613.1368, | |
| "train_tokens_per_second": 30063.563 | |
| }, | |
| { | |
| "epoch": 0.45676004872107184, | |
| "grad_norm": 1.75, | |
| "learning_rate": 4.38680881733565e-05, | |
| "loss": 0.6078, | |
| "num_input_tokens_seen": 49152000, | |
| "step": 375, | |
| "train_runtime": 1634.9595, | |
| "train_tokens_per_second": 30063.131 | |
| }, | |
| { | |
| "epoch": 0.4628501827040195, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.371032862849525e-05, | |
| "loss": 0.606, | |
| "num_input_tokens_seen": 49807360, | |
| "step": 380, | |
| "train_runtime": 1656.7683, | |
| "train_tokens_per_second": 30062.96 | |
| }, | |
| { | |
| "epoch": 0.4689403166869671, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.3550856816376815e-05, | |
| "loss": 0.6063, | |
| "num_input_tokens_seen": 50462720, | |
| "step": 385, | |
| "train_runtime": 1678.5702, | |
| "train_tokens_per_second": 30062.919 | |
| }, | |
| { | |
| "epoch": 0.47503045066991473, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.3389687330991914e-05, | |
| "loss": 0.6039, | |
| "num_input_tokens_seen": 51118080, | |
| "step": 390, | |
| "train_runtime": 1700.3779, | |
| "train_tokens_per_second": 30062.776 | |
| }, | |
| { | |
| "epoch": 0.48112058465286234, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.3226834921693064e-05, | |
| "loss": 0.5973, | |
| "num_input_tokens_seen": 51773440, | |
| "step": 395, | |
| "train_runtime": 1722.1624, | |
| "train_tokens_per_second": 30063.042 | |
| }, | |
| { | |
| "epoch": 0.48721071863581, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.306231449184481e-05, | |
| "loss": 0.5986, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 400, | |
| "train_runtime": 1743.9777, | |
| "train_tokens_per_second": 30062.77 | |
| }, | |
| { | |
| "epoch": 0.4933008526187576, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 4.289614109745984e-05, | |
| "loss": 0.5919, | |
| "num_input_tokens_seen": 53084160, | |
| "step": 405, | |
| "train_runtime": 1765.7704, | |
| "train_tokens_per_second": 30062.889 | |
| }, | |
| { | |
| "epoch": 0.4993909866017052, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.272832994582112e-05, | |
| "loss": 0.6017, | |
| "num_input_tokens_seen": 53739520, | |
| "step": 410, | |
| "train_runtime": 1787.5322, | |
| "train_tokens_per_second": 30063.526 | |
| }, | |
| { | |
| "epoch": 0.5054811205846529, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.255889639409028e-05, | |
| "loss": 0.5838, | |
| "num_input_tokens_seen": 54394880, | |
| "step": 415, | |
| "train_runtime": 1809.3305, | |
| "train_tokens_per_second": 30063.54 | |
| }, | |
| { | |
| "epoch": 0.5115712545676004, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.23878559479021e-05, | |
| "loss": 0.5906, | |
| "num_input_tokens_seen": 55050240, | |
| "step": 420, | |
| "train_runtime": 1831.1085, | |
| "train_tokens_per_second": 30063.888 | |
| }, | |
| { | |
| "epoch": 0.5176613885505481, | |
| "grad_norm": 1.875, | |
| "learning_rate": 4.221522425994563e-05, | |
| "loss": 0.5879, | |
| "num_input_tokens_seen": 55705600, | |
| "step": 425, | |
| "train_runtime": 1852.9092, | |
| "train_tokens_per_second": 30063.859 | |
| }, | |
| { | |
| "epoch": 0.5237515225334958, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.2041017128531665e-05, | |
| "loss": 0.5895, | |
| "num_input_tokens_seen": 56360960, | |
| "step": 430, | |
| "train_runtime": 1874.732, | |
| "train_tokens_per_second": 30063.476 | |
| }, | |
| { | |
| "epoch": 0.5298416565164433, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.186525049614699e-05, | |
| "loss": 0.5794, | |
| "num_input_tokens_seen": 57016320, | |
| "step": 435, | |
| "train_runtime": 1896.6145, | |
| "train_tokens_per_second": 30062.155 | |
| }, | |
| { | |
| "epoch": 0.535931790499391, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.168794044799544e-05, | |
| "loss": 0.5833, | |
| "num_input_tokens_seen": 57671680, | |
| "step": 440, | |
| "train_runtime": 1918.4108, | |
| "train_tokens_per_second": 30062.216 | |
| }, | |
| { | |
| "epoch": 0.5420219244823387, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.150910321052584e-05, | |
| "loss": 0.5748, | |
| "num_input_tokens_seen": 58327040, | |
| "step": 445, | |
| "train_runtime": 1940.1777, | |
| "train_tokens_per_second": 30062.731 | |
| }, | |
| { | |
| "epoch": 0.5481120584652862, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.132875514994701e-05, | |
| "loss": 0.5874, | |
| "num_input_tokens_seen": 58982400, | |
| "step": 450, | |
| "train_runtime": 1961.9296, | |
| "train_tokens_per_second": 30063.464 | |
| }, | |
| { | |
| "epoch": 0.5542021924482339, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.114691277073013e-05, | |
| "loss": 0.5807, | |
| "num_input_tokens_seen": 59637760, | |
| "step": 455, | |
| "train_runtime": 1983.7172, | |
| "train_tokens_per_second": 30063.639 | |
| }, | |
| { | |
| "epoch": 0.5602923264311814, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 4.096359271409822e-05, | |
| "loss": 0.5897, | |
| "num_input_tokens_seen": 60293120, | |
| "step": 460, | |
| "train_runtime": 2005.5211, | |
| "train_tokens_per_second": 30063.569 | |
| }, | |
| { | |
| "epoch": 0.5663824604141291, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.077881175650332e-05, | |
| "loss": 0.5829, | |
| "num_input_tokens_seen": 60948480, | |
| "step": 465, | |
| "train_runtime": 2027.4851, | |
| "train_tokens_per_second": 30061.124 | |
| }, | |
| { | |
| "epoch": 0.5724725943970768, | |
| "grad_norm": 1.84375, | |
| "learning_rate": 4.059258680809114e-05, | |
| "loss": 0.568, | |
| "num_input_tokens_seen": 61603840, | |
| "step": 470, | |
| "train_runtime": 2049.3409, | |
| "train_tokens_per_second": 30060.319 | |
| }, | |
| { | |
| "epoch": 0.5785627283800243, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 4.040493491115355e-05, | |
| "loss": 0.5716, | |
| "num_input_tokens_seen": 62259200, | |
| "step": 475, | |
| "train_runtime": 2071.288, | |
| "train_tokens_per_second": 30058.205 | |
| }, | |
| { | |
| "epoch": 0.584652862362972, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.0215873238568986e-05, | |
| "loss": 0.577, | |
| "num_input_tokens_seen": 62914560, | |
| "step": 480, | |
| "train_runtime": 2093.1653, | |
| "train_tokens_per_second": 30057.139 | |
| }, | |
| { | |
| "epoch": 0.5907429963459196, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.002541909223084e-05, | |
| "loss": 0.5727, | |
| "num_input_tokens_seen": 63569920, | |
| "step": 485, | |
| "train_runtime": 2115.077, | |
| "train_tokens_per_second": 30055.605 | |
| }, | |
| { | |
| "epoch": 0.5968331303288672, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 3.983358990146415e-05, | |
| "loss": 0.5732, | |
| "num_input_tokens_seen": 64225280, | |
| "step": 490, | |
| "train_runtime": 2137.0427, | |
| "train_tokens_per_second": 30053.344 | |
| }, | |
| { | |
| "epoch": 0.6029232643118149, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 3.964040322143049e-05, | |
| "loss": 0.5649, | |
| "num_input_tokens_seen": 64880640, | |
| "step": 495, | |
| "train_runtime": 2158.8505, | |
| "train_tokens_per_second": 30053.327 | |
| }, | |
| { | |
| "epoch": 0.6090133982947625, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 3.9445876731521433e-05, | |
| "loss": 0.5743, | |
| "num_input_tokens_seen": 65536000, | |
| "step": 500, | |
| "train_runtime": 2180.692, | |
| "train_tokens_per_second": 30052.846 | |
| }, | |
| { | |
| "epoch": 0.6151035322777101, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 3.925002823374071e-05, | |
| "loss": 0.5682, | |
| "num_input_tokens_seen": 66191360, | |
| "step": 505, | |
| "train_runtime": 2210.8877, | |
| "train_tokens_per_second": 29938.816 | |
| }, | |
| { | |
| "epoch": 0.6211936662606578, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 3.9052875651074936e-05, | |
| "loss": 0.5651, | |
| "num_input_tokens_seen": 66846720, | |
| "step": 510, | |
| "train_runtime": 2232.8769, | |
| "train_tokens_per_second": 29937.486 | |
| }, | |
| { | |
| "epoch": 0.6272838002436053, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 3.8854437025853505e-05, | |
| "loss": 0.5674, | |
| "num_input_tokens_seen": 67502080, | |
| "step": 515, | |
| "train_runtime": 2254.7216, | |
| "train_tokens_per_second": 29938.1 | |
| }, | |
| { | |
| "epoch": 0.633373934226553, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 3.86547305180974e-05, | |
| "loss": 0.5636, | |
| "num_input_tokens_seen": 68157440, | |
| "step": 520, | |
| "train_runtime": 2276.679, | |
| "train_tokens_per_second": 29937.22 | |
| }, | |
| { | |
| "epoch": 0.6394640682095006, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 3.845377440385731e-05, | |
| "loss": 0.5706, | |
| "num_input_tokens_seen": 68812800, | |
| "step": 525, | |
| "train_runtime": 2298.5683, | |
| "train_tokens_per_second": 29937.244 | |
| }, | |
| { | |
| "epoch": 0.6455542021924482, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.825158707354108e-05, | |
| "loss": 0.5576, | |
| "num_input_tokens_seen": 69468160, | |
| "step": 530, | |
| "train_runtime": 2320.5639, | |
| "train_tokens_per_second": 29935.897 | |
| }, | |
| { | |
| "epoch": 0.6516443361753959, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 3.8048187030230745e-05, | |
| "loss": 0.5558, | |
| "num_input_tokens_seen": 70123520, | |
| "step": 535, | |
| "train_runtime": 2342.5476, | |
| "train_tokens_per_second": 29934.726 | |
| }, | |
| { | |
| "epoch": 0.6577344701583435, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 3.784359288798921e-05, | |
| "loss": 0.5547, | |
| "num_input_tokens_seen": 70778880, | |
| "step": 540, | |
| "train_runtime": 2364.3333, | |
| "train_tokens_per_second": 29936.084 | |
| }, | |
| { | |
| "epoch": 0.6638246041412911, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 3.763782337015683e-05, | |
| "loss": 0.5675, | |
| "num_input_tokens_seen": 71434240, | |
| "step": 545, | |
| "train_runtime": 2386.1478, | |
| "train_tokens_per_second": 29937.056 | |
| }, | |
| { | |
| "epoch": 0.6699147381242387, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.743089730763792e-05, | |
| "loss": 0.5597, | |
| "num_input_tokens_seen": 72089600, | |
| "step": 550, | |
| "train_runtime": 2407.9479, | |
| "train_tokens_per_second": 29938.189 | |
| }, | |
| { | |
| "epoch": 0.6760048721071864, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 3.722283363717743e-05, | |
| "loss": 0.5529, | |
| "num_input_tokens_seen": 72744960, | |
| "step": 555, | |
| "train_runtime": 2429.7703, | |
| "train_tokens_per_second": 29939.027 | |
| }, | |
| { | |
| "epoch": 0.682095006090134, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 3.7013651399628004e-05, | |
| "loss": 0.5622, | |
| "num_input_tokens_seen": 73400320, | |
| "step": 560, | |
| "train_runtime": 2451.4626, | |
| "train_tokens_per_second": 29941.44 | |
| }, | |
| { | |
| "epoch": 0.6881851400730816, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 3.6803369738207444e-05, | |
| "loss": 0.5582, | |
| "num_input_tokens_seen": 74055680, | |
| "step": 565, | |
| "train_runtime": 2473.2169, | |
| "train_tokens_per_second": 29943.06 | |
| }, | |
| { | |
| "epoch": 0.6942752740560292, | |
| "grad_norm": 1.25, | |
| "learning_rate": 3.6592007896746846e-05, | |
| "loss": 0.551, | |
| "num_input_tokens_seen": 74711040, | |
| "step": 570, | |
| "train_runtime": 2494.9949, | |
| "train_tokens_per_second": 29944.366 | |
| }, | |
| { | |
| "epoch": 0.7003654080389768, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.6379585217929474e-05, | |
| "loss": 0.5601, | |
| "num_input_tokens_seen": 75366400, | |
| "step": 575, | |
| "train_runtime": 2516.7415, | |
| "train_tokens_per_second": 29946.024 | |
| }, | |
| { | |
| "epoch": 0.7064555420219245, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 3.6166121141520655e-05, | |
| "loss": 0.5487, | |
| "num_input_tokens_seen": 76021760, | |
| "step": 580, | |
| "train_runtime": 2538.5185, | |
| "train_tokens_per_second": 29947.294 | |
| }, | |
| { | |
| "epoch": 0.7125456760048721, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 3.595163520258873e-05, | |
| "loss": 0.5604, | |
| "num_input_tokens_seen": 76677120, | |
| "step": 585, | |
| "train_runtime": 2560.24, | |
| "train_tokens_per_second": 29949.192 | |
| }, | |
| { | |
| "epoch": 0.7186358099878197, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 3.573614702971735e-05, | |
| "loss": 0.5521, | |
| "num_input_tokens_seen": 77332480, | |
| "step": 590, | |
| "train_runtime": 2582.0217, | |
| "train_tokens_per_second": 29950.36 | |
| }, | |
| { | |
| "epoch": 0.7247259439707674, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.551967634320911e-05, | |
| "loss": 0.5472, | |
| "num_input_tokens_seen": 77987840, | |
| "step": 595, | |
| "train_runtime": 2603.7692, | |
| "train_tokens_per_second": 29951.902 | |
| }, | |
| { | |
| "epoch": 0.730816077953715, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 3.530224295328096e-05, | |
| "loss": 0.5447, | |
| "num_input_tokens_seen": 78643200, | |
| "step": 600, | |
| "train_runtime": 2625.5133, | |
| "train_tokens_per_second": 29953.457 | |
| }, | |
| { | |
| "epoch": 0.7369062119366626, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 3.508386675825116e-05, | |
| "loss": 0.5441, | |
| "num_input_tokens_seen": 79298560, | |
| "step": 605, | |
| "train_runtime": 2647.2508, | |
| "train_tokens_per_second": 29955.061 | |
| }, | |
| { | |
| "epoch": 0.7429963459196103, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 3.486456774271837e-05, | |
| "loss": 0.5417, | |
| "num_input_tokens_seen": 79953920, | |
| "step": 610, | |
| "train_runtime": 2668.9717, | |
| "train_tokens_per_second": 29956.825 | |
| }, | |
| { | |
| "epoch": 0.7490864799025578, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 3.464436597573276e-05, | |
| "loss": 0.5495, | |
| "num_input_tokens_seen": 80609280, | |
| "step": 615, | |
| "train_runtime": 2690.7063, | |
| "train_tokens_per_second": 29958.409 | |
| }, | |
| { | |
| "epoch": 0.7551766138855055, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 3.4423281608959376e-05, | |
| "loss": 0.5388, | |
| "num_input_tokens_seen": 81264640, | |
| "step": 620, | |
| "train_runtime": 2712.4728, | |
| "train_tokens_per_second": 29959.615 | |
| }, | |
| { | |
| "epoch": 0.761266747868453, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 3.420133487483402e-05, | |
| "loss": 0.5358, | |
| "num_input_tokens_seen": 81920000, | |
| "step": 625, | |
| "train_runtime": 2734.2134, | |
| "train_tokens_per_second": 29961.085 | |
| }, | |
| { | |
| "epoch": 0.7673568818514007, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 3.3978546084711595e-05, | |
| "loss": 0.5433, | |
| "num_input_tokens_seen": 82575360, | |
| "step": 630, | |
| "train_runtime": 2755.9692, | |
| "train_tokens_per_second": 29962.367 | |
| }, | |
| { | |
| "epoch": 0.7734470158343484, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.375493562700742e-05, | |
| "loss": 0.5464, | |
| "num_input_tokens_seen": 83230720, | |
| "step": 635, | |
| "train_runtime": 2777.7109, | |
| "train_tokens_per_second": 29963.78 | |
| }, | |
| { | |
| "epoch": 0.7795371498172959, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 3.353052396533133e-05, | |
| "loss": 0.5404, | |
| "num_input_tokens_seen": 83886080, | |
| "step": 640, | |
| "train_runtime": 2799.4496, | |
| "train_tokens_per_second": 29965.204 | |
| }, | |
| { | |
| "epoch": 0.7856272838002436, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.330533163661501e-05, | |
| "loss": 0.5427, | |
| "num_input_tokens_seen": 84541440, | |
| "step": 645, | |
| "train_runtime": 2821.1758, | |
| "train_tokens_per_second": 29966.739 | |
| }, | |
| { | |
| "epoch": 0.7917174177831913, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.3079379249232475e-05, | |
| "loss": 0.5393, | |
| "num_input_tokens_seen": 85196800, | |
| "step": 650, | |
| "train_runtime": 2842.9286, | |
| "train_tokens_per_second": 29967.971 | |
| }, | |
| { | |
| "epoch": 0.7978075517661388, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 3.2852687481114235e-05, | |
| "loss": 0.5404, | |
| "num_input_tokens_seen": 85852160, | |
| "step": 655, | |
| "train_runtime": 2864.6768, | |
| "train_tokens_per_second": 29969.23 | |
| }, | |
| { | |
| "epoch": 0.8038976857490865, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 3.2625277077854855e-05, | |
| "loss": 0.5407, | |
| "num_input_tokens_seen": 86507520, | |
| "step": 660, | |
| "train_runtime": 2886.4158, | |
| "train_tokens_per_second": 29970.567 | |
| }, | |
| { | |
| "epoch": 0.8099878197320342, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.239716885081446e-05, | |
| "loss": 0.5304, | |
| "num_input_tokens_seen": 87162880, | |
| "step": 665, | |
| "train_runtime": 2908.1158, | |
| "train_tokens_per_second": 29972.287 | |
| }, | |
| { | |
| "epoch": 0.8160779537149817, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 3.216838367521424e-05, | |
| "loss": 0.5397, | |
| "num_input_tokens_seen": 87818240, | |
| "step": 670, | |
| "train_runtime": 2929.8531, | |
| "train_tokens_per_second": 29973.598 | |
| }, | |
| { | |
| "epoch": 0.8221680876979294, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 3.193894248822599e-05, | |
| "loss": 0.5362, | |
| "num_input_tokens_seen": 88473600, | |
| "step": 675, | |
| "train_runtime": 2951.6047, | |
| "train_tokens_per_second": 29974.745 | |
| }, | |
| { | |
| "epoch": 0.8282582216808769, | |
| "grad_norm": 1.25, | |
| "learning_rate": 3.17088662870561e-05, | |
| "loss": 0.5333, | |
| "num_input_tokens_seen": 89128960, | |
| "step": 680, | |
| "train_runtime": 2973.4611, | |
| "train_tokens_per_second": 29974.82 | |
| }, | |
| { | |
| "epoch": 0.8343483556638246, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 3.147817612702403e-05, | |
| "loss": 0.5333, | |
| "num_input_tokens_seen": 89784320, | |
| "step": 685, | |
| "train_runtime": 2995.2224, | |
| "train_tokens_per_second": 29975.844 | |
| }, | |
| { | |
| "epoch": 0.8404384896467723, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 3.124689311963535e-05, | |
| "loss": 0.5239, | |
| "num_input_tokens_seen": 90439680, | |
| "step": 690, | |
| "train_runtime": 3017.0068, | |
| "train_tokens_per_second": 29976.625 | |
| }, | |
| { | |
| "epoch": 0.8465286236297198, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 3.101503843064981e-05, | |
| "loss": 0.5356, | |
| "num_input_tokens_seen": 91095040, | |
| "step": 695, | |
| "train_runtime": 3038.7428, | |
| "train_tokens_per_second": 29977.872 | |
| }, | |
| { | |
| "epoch": 0.8526187576126675, | |
| "grad_norm": 1.125, | |
| "learning_rate": 3.078263327814438e-05, | |
| "loss": 0.5301, | |
| "num_input_tokens_seen": 91750400, | |
| "step": 700, | |
| "train_runtime": 3060.5883, | |
| "train_tokens_per_second": 29978.028 | |
| }, | |
| { | |
| "epoch": 0.8587088915956151, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 3.0549698930571386e-05, | |
| "loss": 0.5336, | |
| "num_input_tokens_seen": 92405760, | |
| "step": 705, | |
| "train_runtime": 3082.32, | |
| "train_tokens_per_second": 29979.288 | |
| }, | |
| { | |
| "epoch": 0.8647990255785627, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.0316256704812252e-05, | |
| "loss": 0.5262, | |
| "num_input_tokens_seen": 93061120, | |
| "step": 710, | |
| "train_runtime": 3104.0532, | |
| "train_tokens_per_second": 29980.517 | |
| }, | |
| { | |
| "epoch": 0.8708891595615104, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.0082327964226615e-05, | |
| "loss": 0.5249, | |
| "num_input_tokens_seen": 93716480, | |
| "step": 715, | |
| "train_runtime": 3125.8473, | |
| "train_tokens_per_second": 29981.145 | |
| }, | |
| { | |
| "epoch": 0.876979293544458, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 2.9847934116697307e-05, | |
| "loss": 0.5313, | |
| "num_input_tokens_seen": 94371840, | |
| "step": 720, | |
| "train_runtime": 3147.5696, | |
| "train_tokens_per_second": 29982.448 | |
| }, | |
| { | |
| "epoch": 0.8830694275274056, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 2.9613096612671225e-05, | |
| "loss": 0.5308, | |
| "num_input_tokens_seen": 95027200, | |
| "step": 725, | |
| "train_runtime": 3169.2945, | |
| "train_tokens_per_second": 29983.708 | |
| }, | |
| { | |
| "epoch": 0.8891595615103532, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.9377836943196256e-05, | |
| "loss": 0.5318, | |
| "num_input_tokens_seen": 95682560, | |
| "step": 730, | |
| "train_runtime": 3191.0555, | |
| "train_tokens_per_second": 29984.611 | |
| }, | |
| { | |
| "epoch": 0.8952496954933008, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 2.91421766379546e-05, | |
| "loss": 0.5383, | |
| "num_input_tokens_seen": 96337920, | |
| "step": 735, | |
| "train_runtime": 3212.7812, | |
| "train_tokens_per_second": 29985.833 | |
| }, | |
| { | |
| "epoch": 0.9013398294762485, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 2.8906137263292442e-05, | |
| "loss": 0.532, | |
| "num_input_tokens_seen": 96993280, | |
| "step": 740, | |
| "train_runtime": 3234.5148, | |
| "train_tokens_per_second": 29986.965 | |
| }, | |
| { | |
| "epoch": 0.9074299634591961, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 2.8669740420246334e-05, | |
| "loss": 0.5222, | |
| "num_input_tokens_seen": 97648640, | |
| "step": 745, | |
| "train_runtime": 3256.233, | |
| "train_tokens_per_second": 29988.222 | |
| }, | |
| { | |
| "epoch": 0.9135200974421437, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.843300774256638e-05, | |
| "loss": 0.52, | |
| "num_input_tokens_seen": 98304000, | |
| "step": 750, | |
| "train_runtime": 3277.9714, | |
| "train_tokens_per_second": 29989.279 | |
| }, | |
| { | |
| "epoch": 0.9196102314250914, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 2.819596089473646e-05, | |
| "loss": 0.5194, | |
| "num_input_tokens_seen": 98959360, | |
| "step": 755, | |
| "train_runtime": 3299.7126, | |
| "train_tokens_per_second": 29990.297 | |
| }, | |
| { | |
| "epoch": 0.925700365408039, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.795862156999157e-05, | |
| "loss": 0.5278, | |
| "num_input_tokens_seen": 99614720, | |
| "step": 760, | |
| "train_runtime": 3321.428, | |
| "train_tokens_per_second": 29991.534 | |
| }, | |
| { | |
| "epoch": 0.9317904993909866, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.7721011488332615e-05, | |
| "loss": 0.5221, | |
| "num_input_tokens_seen": 100270080, | |
| "step": 765, | |
| "train_runtime": 3343.2094, | |
| "train_tokens_per_second": 29992.163 | |
| }, | |
| { | |
| "epoch": 0.9378806333739342, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.748315239453868e-05, | |
| "loss": 0.5159, | |
| "num_input_tokens_seen": 100925440, | |
| "step": 770, | |
| "train_runtime": 3364.9146, | |
| "train_tokens_per_second": 29993.463 | |
| }, | |
| { | |
| "epoch": 0.9439707673568819, | |
| "grad_norm": 1.25, | |
| "learning_rate": 2.7245066056177093e-05, | |
| "loss": 0.5135, | |
| "num_input_tokens_seen": 101580800, | |
| "step": 775, | |
| "train_runtime": 3386.6156, | |
| "train_tokens_per_second": 29994.783 | |
| }, | |
| { | |
| "epoch": 0.9500609013398295, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.7006774261611373e-05, | |
| "loss": 0.5237, | |
| "num_input_tokens_seen": 102236160, | |
| "step": 780, | |
| "train_runtime": 3408.3514, | |
| "train_tokens_per_second": 29995.78 | |
| }, | |
| { | |
| "epoch": 0.9561510353227771, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 2.6768298818007253e-05, | |
| "loss": 0.5154, | |
| "num_input_tokens_seen": 102891520, | |
| "step": 785, | |
| "train_runtime": 3430.0863, | |
| "train_tokens_per_second": 29996.773 | |
| }, | |
| { | |
| "epoch": 0.9622411693057247, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 2.6529661549337032e-05, | |
| "loss": 0.5177, | |
| "num_input_tokens_seen": 103546880, | |
| "step": 790, | |
| "train_runtime": 3451.8095, | |
| "train_tokens_per_second": 29997.854 | |
| }, | |
| { | |
| "epoch": 0.9683313032886723, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.6290884294382366e-05, | |
| "loss": 0.5142, | |
| "num_input_tokens_seen": 104202240, | |
| "step": 795, | |
| "train_runtime": 3473.5565, | |
| "train_tokens_per_second": 29998.717 | |
| }, | |
| { | |
| "epoch": 0.97442143727162, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 2.6051988904735686e-05, | |
| "loss": 0.5138, | |
| "num_input_tokens_seen": 104857600, | |
| "step": 800, | |
| "train_runtime": 3495.316, | |
| "train_tokens_per_second": 29999.462 | |
| }, | |
| { | |
| "epoch": 0.9805115712545676, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.5812997242800456e-05, | |
| "loss": 0.5225, | |
| "num_input_tokens_seen": 105512960, | |
| "step": 805, | |
| "train_runtime": 3517.0562, | |
| "train_tokens_per_second": 30000.362 | |
| }, | |
| { | |
| "epoch": 0.9866017052375152, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 2.5573931179790472e-05, | |
| "loss": 0.5116, | |
| "num_input_tokens_seen": 106168320, | |
| "step": 810, | |
| "train_runtime": 3538.7869, | |
| "train_tokens_per_second": 30001.331 | |
| }, | |
| { | |
| "epoch": 0.9926918392204629, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 2.5334812593728296e-05, | |
| "loss": 0.526, | |
| "num_input_tokens_seen": 106823680, | |
| "step": 815, | |
| "train_runtime": 3560.5431, | |
| "train_tokens_per_second": 30002.074 | |
| }, | |
| { | |
| "epoch": 0.9987819732034104, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 2.5095663367443123e-05, | |
| "loss": 0.5278, | |
| "num_input_tokens_seen": 107479040, | |
| "step": 820, | |
| "train_runtime": 3582.2344, | |
| "train_tokens_per_second": 30003.352 | |
| }, | |
| { | |
| "epoch": 1.004872107186358, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.485650538656817e-05, | |
| "loss": 0.454, | |
| "num_input_tokens_seen": 108103680, | |
| "step": 825, | |
| "train_runtime": 3603.2471, | |
| "train_tokens_per_second": 30001.74 | |
| }, | |
| { | |
| "epoch": 1.0109622411693058, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 2.461736053753783e-05, | |
| "loss": 0.44, | |
| "num_input_tokens_seen": 108759040, | |
| "step": 830, | |
| "train_runtime": 3624.9733, | |
| "train_tokens_per_second": 30002.714 | |
| }, | |
| { | |
| "epoch": 1.0170523751522533, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.4378250705584737e-05, | |
| "loss": 0.4402, | |
| "num_input_tokens_seen": 109414400, | |
| "step": 835, | |
| "train_runtime": 3646.7181, | |
| "train_tokens_per_second": 30003.526 | |
| }, | |
| { | |
| "epoch": 1.0231425091352009, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.4139197772736942e-05, | |
| "loss": 0.4341, | |
| "num_input_tokens_seen": 110069760, | |
| "step": 840, | |
| "train_runtime": 3668.5013, | |
| "train_tokens_per_second": 30004.013 | |
| }, | |
| { | |
| "epoch": 1.0292326431181487, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.3900223615815438e-05, | |
| "loss": 0.4492, | |
| "num_input_tokens_seen": 110725120, | |
| "step": 845, | |
| "train_runtime": 3690.286, | |
| "train_tokens_per_second": 30004.482 | |
| }, | |
| { | |
| "epoch": 1.0353227771010962, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.3661350104432037e-05, | |
| "loss": 0.4401, | |
| "num_input_tokens_seen": 111380480, | |
| "step": 850, | |
| "train_runtime": 3712.0285, | |
| "train_tokens_per_second": 30005.287 | |
| }, | |
| { | |
| "epoch": 1.0414129110840438, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.3422599098988023e-05, | |
| "loss": 0.4402, | |
| "num_input_tokens_seen": 112035840, | |
| "step": 855, | |
| "train_runtime": 3733.7851, | |
| "train_tokens_per_second": 30005.969 | |
| }, | |
| { | |
| "epoch": 1.0475030450669915, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.3183992448673615e-05, | |
| "loss": 0.4383, | |
| "num_input_tokens_seen": 112691200, | |
| "step": 860, | |
| "train_runtime": 3755.54, | |
| "train_tokens_per_second": 30006.657 | |
| }, | |
| { | |
| "epoch": 1.053593179049939, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 2.294555198946845e-05, | |
| "loss": 0.4408, | |
| "num_input_tokens_seen": 113346560, | |
| "step": 865, | |
| "train_runtime": 3777.2755, | |
| "train_tokens_per_second": 30007.491 | |
| }, | |
| { | |
| "epoch": 1.0596833130328867, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 2.270729954214324e-05, | |
| "loss": 0.4344, | |
| "num_input_tokens_seen": 114001920, | |
| "step": 870, | |
| "train_runtime": 3799.0222, | |
| "train_tokens_per_second": 30008.227 | |
| }, | |
| { | |
| "epoch": 1.0657734470158344, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.2469256910262877e-05, | |
| "loss": 0.4417, | |
| "num_input_tokens_seen": 114657280, | |
| "step": 875, | |
| "train_runtime": 3820.7855, | |
| "train_tokens_per_second": 30008.824 | |
| }, | |
| { | |
| "epoch": 1.071863580998782, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 2.2231445878191107e-05, | |
| "loss": 0.4379, | |
| "num_input_tokens_seen": 115312640, | |
| "step": 880, | |
| "train_runtime": 3842.4905, | |
| "train_tokens_per_second": 30009.87 | |
| }, | |
| { | |
| "epoch": 1.0779537149817295, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.1993888209096897e-05, | |
| "loss": 0.4367, | |
| "num_input_tokens_seen": 115968000, | |
| "step": 885, | |
| "train_runtime": 3864.1859, | |
| "train_tokens_per_second": 30010.979 | |
| }, | |
| { | |
| "epoch": 1.0840438489646773, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.1756605642962827e-05, | |
| "loss": 0.439, | |
| "num_input_tokens_seen": 116623360, | |
| "step": 890, | |
| "train_runtime": 3885.9318, | |
| "train_tokens_per_second": 30011.685 | |
| }, | |
| { | |
| "epoch": 1.0901339829476249, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.1519619894595567e-05, | |
| "loss": 0.4357, | |
| "num_input_tokens_seen": 117278720, | |
| "step": 895, | |
| "train_runtime": 3907.6564, | |
| "train_tokens_per_second": 30012.547 | |
| }, | |
| { | |
| "epoch": 1.0962241169305724, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.1282952651638626e-05, | |
| "loss": 0.4365, | |
| "num_input_tokens_seen": 117934080, | |
| "step": 900, | |
| "train_runtime": 3929.4342, | |
| "train_tokens_per_second": 30012.993 | |
| }, | |
| { | |
| "epoch": 1.1023142509135202, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.1046625572587633e-05, | |
| "loss": 0.4301, | |
| "num_input_tokens_seen": 118589440, | |
| "step": 905, | |
| "train_runtime": 3951.2394, | |
| "train_tokens_per_second": 30013.225 | |
| }, | |
| { | |
| "epoch": 1.1084043848964678, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.0810660284808297e-05, | |
| "loss": 0.4309, | |
| "num_input_tokens_seen": 119244800, | |
| "step": 910, | |
| "train_runtime": 3972.9809, | |
| "train_tokens_per_second": 30013.937 | |
| }, | |
| { | |
| "epoch": 1.1144945188794153, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.0575078382557137e-05, | |
| "loss": 0.4336, | |
| "num_input_tokens_seen": 119900160, | |
| "step": 915, | |
| "train_runtime": 3994.6574, | |
| "train_tokens_per_second": 30015.129 | |
| }, | |
| { | |
| "epoch": 1.1205846528623629, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 2.0339901425005315e-05, | |
| "loss": 0.4329, | |
| "num_input_tokens_seen": 120555520, | |
| "step": 920, | |
| "train_runtime": 4016.3768, | |
| "train_tokens_per_second": 30015.989 | |
| }, | |
| { | |
| "epoch": 1.1266747868453106, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 2.0105150934265687e-05, | |
| "loss": 0.4377, | |
| "num_input_tokens_seen": 121210880, | |
| "step": 925, | |
| "train_runtime": 4038.1192, | |
| "train_tokens_per_second": 30016.667 | |
| }, | |
| { | |
| "epoch": 1.1327649208282582, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 1.9870848393423176e-05, | |
| "loss": 0.4414, | |
| "num_input_tokens_seen": 121866240, | |
| "step": 930, | |
| "train_runtime": 4059.8399, | |
| "train_tokens_per_second": 30017.499 | |
| }, | |
| { | |
| "epoch": 1.1388550548112057, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.963701524456877e-05, | |
| "loss": 0.4327, | |
| "num_input_tokens_seen": 122521600, | |
| "step": 935, | |
| "train_runtime": 4081.578, | |
| "train_tokens_per_second": 30018.194 | |
| }, | |
| { | |
| "epoch": 1.1449451887941535, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.9403672886837264e-05, | |
| "loss": 0.4283, | |
| "num_input_tokens_seen": 123176960, | |
| "step": 940, | |
| "train_runtime": 4103.2994, | |
| "train_tokens_per_second": 30019.004 | |
| }, | |
| { | |
| "epoch": 1.151035322777101, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.9170842674448942e-05, | |
| "loss": 0.4207, | |
| "num_input_tokens_seen": 123832320, | |
| "step": 945, | |
| "train_runtime": 4125.0267, | |
| "train_tokens_per_second": 30019.762 | |
| }, | |
| { | |
| "epoch": 1.1571254567600486, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.89385459147553e-05, | |
| "loss": 0.437, | |
| "num_input_tokens_seen": 124487680, | |
| "step": 950, | |
| "train_runtime": 4146.7599, | |
| "train_tokens_per_second": 30020.47 | |
| }, | |
| { | |
| "epoch": 1.1632155907429964, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1.8706803866289208e-05, | |
| "loss": 0.4381, | |
| "num_input_tokens_seen": 125143040, | |
| "step": 955, | |
| "train_runtime": 4168.4938, | |
| "train_tokens_per_second": 30021.165 | |
| }, | |
| { | |
| "epoch": 1.169305724725944, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.8475637736819335e-05, | |
| "loss": 0.4272, | |
| "num_input_tokens_seen": 125798400, | |
| "step": 960, | |
| "train_runtime": 4190.2193, | |
| "train_tokens_per_second": 30021.913 | |
| }, | |
| { | |
| "epoch": 1.1753958587088915, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.824506868140942e-05, | |
| "loss": 0.4248, | |
| "num_input_tokens_seen": 126453760, | |
| "step": 965, | |
| "train_runtime": 4211.9556, | |
| "train_tokens_per_second": 30022.577 | |
| }, | |
| { | |
| "epoch": 1.1814859926918393, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.801511780048221e-05, | |
| "loss": 0.429, | |
| "num_input_tokens_seen": 127109120, | |
| "step": 970, | |
| "train_runtime": 4233.6883, | |
| "train_tokens_per_second": 30023.259 | |
| }, | |
| { | |
| "epoch": 1.1875761266747868, | |
| "grad_norm": 1.125, | |
| "learning_rate": 1.778580613788853e-05, | |
| "loss": 0.4305, | |
| "num_input_tokens_seen": 127764480, | |
| "step": 975, | |
| "train_runtime": 4255.3913, | |
| "train_tokens_per_second": 30024.144 | |
| }, | |
| { | |
| "epoch": 1.1936662606577344, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.755715467898139e-05, | |
| "loss": 0.4307, | |
| "num_input_tokens_seen": 128419840, | |
| "step": 980, | |
| "train_runtime": 4277.133, | |
| "train_tokens_per_second": 30024.748 | |
| }, | |
| { | |
| "epoch": 1.1997563946406822, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.7329184348695586e-05, | |
| "loss": 0.4238, | |
| "num_input_tokens_seen": 129075200, | |
| "step": 985, | |
| "train_runtime": 4298.8319, | |
| "train_tokens_per_second": 30025.645 | |
| }, | |
| { | |
| "epoch": 1.2058465286236297, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.7101916009632733e-05, | |
| "loss": 0.4402, | |
| "num_input_tokens_seen": 129730560, | |
| "step": 990, | |
| "train_runtime": 4320.5575, | |
| "train_tokens_per_second": 30026.347 | |
| }, | |
| { | |
| "epoch": 1.2119366626065773, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.6875370460152023e-05, | |
| "loss": 0.4324, | |
| "num_input_tokens_seen": 130385920, | |
| "step": 995, | |
| "train_runtime": 4342.3036, | |
| "train_tokens_per_second": 30026.901 | |
| }, | |
| { | |
| "epoch": 1.218026796589525, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1.6649568432466884e-05, | |
| "loss": 0.4349, | |
| "num_input_tokens_seen": 131041280, | |
| "step": 1000, | |
| "train_runtime": 4364.0234, | |
| "train_tokens_per_second": 30027.63 | |
| }, | |
| { | |
| "epoch": 1.2241169305724726, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.6424530590747724e-05, | |
| "loss": 0.4318, | |
| "num_input_tokens_seen": 131696640, | |
| "step": 1005, | |
| "train_runtime": 4393.7613, | |
| "train_tokens_per_second": 29973.553 | |
| }, | |
| { | |
| "epoch": 1.2302070645554202, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 1.6200277529230768e-05, | |
| "loss": 0.4475, | |
| "num_input_tokens_seen": 132352000, | |
| "step": 1010, | |
| "train_runtime": 4415.4584, | |
| "train_tokens_per_second": 29974.69 | |
| }, | |
| { | |
| "epoch": 1.236297198538368, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.5976829770333452e-05, | |
| "loss": 0.4415, | |
| "num_input_tokens_seen": 133007360, | |
| "step": 1015, | |
| "train_runtime": 4437.1808, | |
| "train_tokens_per_second": 29975.646 | |
| }, | |
| { | |
| "epoch": 1.2423873325213155, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.5754207762776325e-05, | |
| "loss": 0.4288, | |
| "num_input_tokens_seen": 133662720, | |
| "step": 1020, | |
| "train_runtime": 4458.8792, | |
| "train_tokens_per_second": 29976.753 | |
| }, | |
| { | |
| "epoch": 1.248477466504263, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.5532431879711657e-05, | |
| "loss": 0.4289, | |
| "num_input_tokens_seen": 134318080, | |
| "step": 1025, | |
| "train_runtime": 4480.5616, | |
| "train_tokens_per_second": 29977.956 | |
| }, | |
| { | |
| "epoch": 1.2545676004872108, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.5311522416859016e-05, | |
| "loss": 0.4246, | |
| "num_input_tokens_seen": 134973440, | |
| "step": 1030, | |
| "train_runtime": 4502.2815, | |
| "train_tokens_per_second": 29978.898 | |
| }, | |
| { | |
| "epoch": 1.2606577344701584, | |
| "grad_norm": 1.125, | |
| "learning_rate": 1.5091499590647936e-05, | |
| "loss": 0.432, | |
| "num_input_tokens_seen": 135628800, | |
| "step": 1035, | |
| "train_runtime": 4524.0863, | |
| "train_tokens_per_second": 29979.269 | |
| }, | |
| { | |
| "epoch": 1.266747868453106, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.4872383536367785e-05, | |
| "loss": 0.4333, | |
| "num_input_tokens_seen": 136284160, | |
| "step": 1040, | |
| "train_runtime": 4545.9201, | |
| "train_tokens_per_second": 29979.444 | |
| }, | |
| { | |
| "epoch": 1.2728380024360537, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.4654194306325093e-05, | |
| "loss": 0.4282, | |
| "num_input_tokens_seen": 136939520, | |
| "step": 1045, | |
| "train_runtime": 4567.69, | |
| "train_tokens_per_second": 29980.038 | |
| }, | |
| { | |
| "epoch": 1.2789281364190013, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.4436951868008536e-05, | |
| "loss": 0.4307, | |
| "num_input_tokens_seen": 137594880, | |
| "step": 1050, | |
| "train_runtime": 4589.5037, | |
| "train_tokens_per_second": 29980.34 | |
| }, | |
| { | |
| "epoch": 1.2850182704019488, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.4220676102261532e-05, | |
| "loss": 0.4323, | |
| "num_input_tokens_seen": 138250240, | |
| "step": 1055, | |
| "train_runtime": 4611.2636, | |
| "train_tokens_per_second": 29980.988 | |
| }, | |
| { | |
| "epoch": 1.2911084043848966, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.4005386801462896e-05, | |
| "loss": 0.428, | |
| "num_input_tokens_seen": 138905600, | |
| "step": 1060, | |
| "train_runtime": 4633.0271, | |
| "train_tokens_per_second": 29981.607 | |
| }, | |
| { | |
| "epoch": 1.2971985383678442, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.3791103667715577e-05, | |
| "loss": 0.4226, | |
| "num_input_tokens_seen": 139560960, | |
| "step": 1065, | |
| "train_runtime": 4654.7487, | |
| "train_tokens_per_second": 29982.491 | |
| }, | |
| { | |
| "epoch": 1.3032886723507917, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.3577846311043593e-05, | |
| "loss": 0.4332, | |
| "num_input_tokens_seen": 140216320, | |
| "step": 1070, | |
| "train_runtime": 4676.4958, | |
| "train_tokens_per_second": 29983.203 | |
| }, | |
| { | |
| "epoch": 1.3093788063337393, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.3365634247597415e-05, | |
| "loss": 0.426, | |
| "num_input_tokens_seen": 140871680, | |
| "step": 1075, | |
| "train_runtime": 4698.2878, | |
| "train_tokens_per_second": 29983.621 | |
| }, | |
| { | |
| "epoch": 1.315468940316687, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.3154486897867996e-05, | |
| "loss": 0.4302, | |
| "num_input_tokens_seen": 141527040, | |
| "step": 1080, | |
| "train_runtime": 4720.0824, | |
| "train_tokens_per_second": 29984.019 | |
| }, | |
| { | |
| "epoch": 1.3215590742996346, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.2944423584909502e-05, | |
| "loss": 0.4306, | |
| "num_input_tokens_seen": 142182400, | |
| "step": 1085, | |
| "train_runtime": 4741.7882, | |
| "train_tokens_per_second": 29984.975 | |
| }, | |
| { | |
| "epoch": 1.3276492082825821, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.273546353257096e-05, | |
| "loss": 0.4204, | |
| "num_input_tokens_seen": 142837760, | |
| "step": 1090, | |
| "train_runtime": 4763.5593, | |
| "train_tokens_per_second": 29985.511 | |
| }, | |
| { | |
| "epoch": 1.3337393422655297, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.2527625863736981e-05, | |
| "loss": 0.4253, | |
| "num_input_tokens_seen": 143493120, | |
| "step": 1095, | |
| "train_runtime": 4785.3061, | |
| "train_tokens_per_second": 29986.195 | |
| }, | |
| { | |
| "epoch": 1.3398294762484775, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.2320929598577777e-05, | |
| "loss": 0.4353, | |
| "num_input_tokens_seen": 144148480, | |
| "step": 1100, | |
| "train_runtime": 4807.0374, | |
| "train_tokens_per_second": 29986.969 | |
| }, | |
| { | |
| "epoch": 1.345919610231425, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.2115393652808526e-05, | |
| "loss": 0.4358, | |
| "num_input_tokens_seen": 144803840, | |
| "step": 1105, | |
| "train_runtime": 4828.7956, | |
| "train_tokens_per_second": 29987.569 | |
| }, | |
| { | |
| "epoch": 1.3520097442143726, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.1911036835958274e-05, | |
| "loss": 0.4386, | |
| "num_input_tokens_seen": 145459200, | |
| "step": 1110, | |
| "train_runtime": 4850.5882, | |
| "train_tokens_per_second": 29987.951 | |
| }, | |
| { | |
| "epoch": 1.3580998781973204, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.1707877849648643e-05, | |
| "loss": 0.4304, | |
| "num_input_tokens_seen": 146114560, | |
| "step": 1115, | |
| "train_runtime": 4872.3825, | |
| "train_tokens_per_second": 29988.319 | |
| }, | |
| { | |
| "epoch": 1.364190012180268, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.1505935285882336e-05, | |
| "loss": 0.4327, | |
| "num_input_tokens_seen": 146769920, | |
| "step": 1120, | |
| "train_runtime": 4894.2345, | |
| "train_tokens_per_second": 29988.33 | |
| }, | |
| { | |
| "epoch": 1.3702801461632155, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.1305227625341657e-05, | |
| "loss": 0.4316, | |
| "num_input_tokens_seen": 147425280, | |
| "step": 1125, | |
| "train_runtime": 4915.9875, | |
| "train_tokens_per_second": 29988.945 | |
| }, | |
| { | |
| "epoch": 1.3763702801461632, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.1105773235697376e-05, | |
| "loss": 0.4247, | |
| "num_input_tokens_seen": 148080640, | |
| "step": 1130, | |
| "train_runtime": 4937.7365, | |
| "train_tokens_per_second": 29989.579 | |
| }, | |
| { | |
| "epoch": 1.3824604141291108, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.0907590369927674e-05, | |
| "loss": 0.4298, | |
| "num_input_tokens_seen": 148736000, | |
| "step": 1135, | |
| "train_runtime": 4959.4689, | |
| "train_tokens_per_second": 29990.308 | |
| }, | |
| { | |
| "epoch": 1.3885505481120584, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.0710697164647807e-05, | |
| "loss": 0.431, | |
| "num_input_tokens_seen": 149391360, | |
| "step": 1140, | |
| "train_runtime": 4981.2189, | |
| "train_tokens_per_second": 29990.925 | |
| }, | |
| { | |
| "epoch": 1.3946406820950061, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.0515111638450395e-05, | |
| "loss": 0.4236, | |
| "num_input_tokens_seen": 150046720, | |
| "step": 1145, | |
| "train_runtime": 5002.928, | |
| "train_tokens_per_second": 29991.781 | |
| }, | |
| { | |
| "epoch": 1.4007308160779537, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.0320851690256324e-05, | |
| "loss": 0.4318, | |
| "num_input_tokens_seen": 150702080, | |
| "step": 1150, | |
| "train_runtime": 5024.6238, | |
| "train_tokens_per_second": 29992.709 | |
| }, | |
| { | |
| "epoch": 1.4068209500609012, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.0127935097676855e-05, | |
| "loss": 0.4371, | |
| "num_input_tokens_seen": 151357440, | |
| "step": 1155, | |
| "train_runtime": 5046.36, | |
| "train_tokens_per_second": 29993.389 | |
| }, | |
| { | |
| "epoch": 1.412911084043849, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 9.936379515386663e-06, | |
| "loss": 0.4213, | |
| "num_input_tokens_seen": 152012800, | |
| "step": 1160, | |
| "train_runtime": 5068.1066, | |
| "train_tokens_per_second": 29994.002 | |
| }, | |
| { | |
| "epoch": 1.4190012180267966, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.74620247350815e-06, | |
| "loss": 0.4245, | |
| "num_input_tokens_seen": 152668160, | |
| "step": 1165, | |
| "train_runtime": 5089.847, | |
| "train_tokens_per_second": 29994.646 | |
| }, | |
| { | |
| "epoch": 1.4250913520097441, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 9.557421376007258e-06, | |
| "loss": 0.4272, | |
| "num_input_tokens_seen": 153323520, | |
| "step": 1170, | |
| "train_runtime": 5111.5715, | |
| "train_tokens_per_second": 29995.378 | |
| }, | |
| { | |
| "epoch": 1.431181485992692, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 9.370053499100698e-06, | |
| "loss": 0.418, | |
| "num_input_tokens_seen": 153978880, | |
| "step": 1175, | |
| "train_runtime": 5133.3011, | |
| "train_tokens_per_second": 29996.074 | |
| }, | |
| { | |
| "epoch": 1.4372716199756395, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 9.184115989674913e-06, | |
| "loss": 0.4314, | |
| "num_input_tokens_seen": 154634240, | |
| "step": 1180, | |
| "train_runtime": 5155.025, | |
| "train_tokens_per_second": 29996.797 | |
| }, | |
| { | |
| "epoch": 1.443361753958587, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 8.999625863716951e-06, | |
| "loss": 0.4283, | |
| "num_input_tokens_seen": 155289600, | |
| "step": 1185, | |
| "train_runtime": 5176.7613, | |
| "train_tokens_per_second": 29997.443 | |
| }, | |
| { | |
| "epoch": 1.4494518879415348, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8.816600004757175e-06, | |
| "loss": 0.4367, | |
| "num_input_tokens_seen": 155944960, | |
| "step": 1190, | |
| "train_runtime": 5198.4684, | |
| "train_tokens_per_second": 29998.251 | |
| }, | |
| { | |
| "epoch": 1.4555420219244823, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8.635055162324276e-06, | |
| "loss": 0.416, | |
| "num_input_tokens_seen": 156600320, | |
| "step": 1195, | |
| "train_runtime": 5220.1609, | |
| "train_tokens_per_second": 29999.137 | |
| }, | |
| { | |
| "epoch": 1.46163215590743, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8.455007950412324e-06, | |
| "loss": 0.4317, | |
| "num_input_tokens_seen": 157255680, | |
| "step": 1200, | |
| "train_runtime": 5241.8684, | |
| "train_tokens_per_second": 29999.929 | |
| }, | |
| { | |
| "epoch": 1.4677222898903777, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 8.276474845960448e-06, | |
| "loss": 0.4237, | |
| "num_input_tokens_seen": 157911040, | |
| "step": 1205, | |
| "train_runtime": 5263.5912, | |
| "train_tokens_per_second": 30000.628 | |
| }, | |
| { | |
| "epoch": 1.4738124238733252, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8.099472187344914e-06, | |
| "loss": 0.4356, | |
| "num_input_tokens_seen": 158566400, | |
| "step": 1210, | |
| "train_runtime": 5285.317, | |
| "train_tokens_per_second": 30001.304 | |
| }, | |
| { | |
| "epoch": 1.4799025578562728, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 7.924016172883908e-06, | |
| "loss": 0.4297, | |
| "num_input_tokens_seen": 159221760, | |
| "step": 1215, | |
| "train_runtime": 5307.0638, | |
| "train_tokens_per_second": 30001.855 | |
| }, | |
| { | |
| "epoch": 1.4859926918392206, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 7.750122859355199e-06, | |
| "loss": 0.4317, | |
| "num_input_tokens_seen": 159877120, | |
| "step": 1220, | |
| "train_runtime": 5328.8039, | |
| "train_tokens_per_second": 30002.44 | |
| }, | |
| { | |
| "epoch": 1.4920828258221681, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 7.577808160526692e-06, | |
| "loss": 0.4311, | |
| "num_input_tokens_seen": 160532480, | |
| "step": 1225, | |
| "train_runtime": 5350.5736, | |
| "train_tokens_per_second": 30002.854 | |
| }, | |
| { | |
| "epoch": 1.4981729598051157, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 7.40708784570005e-06, | |
| "loss": 0.4269, | |
| "num_input_tokens_seen": 161187840, | |
| "step": 1230, | |
| "train_runtime": 5372.3219, | |
| "train_tokens_per_second": 30003.385 | |
| }, | |
| { | |
| "epoch": 1.5042630937880634, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 7.2379775382676375e-06, | |
| "loss": 0.4268, | |
| "num_input_tokens_seen": 161843200, | |
| "step": 1235, | |
| "train_runtime": 5394.073, | |
| "train_tokens_per_second": 30003.895 | |
| }, | |
| { | |
| "epoch": 1.510353227771011, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 7.070492714282706e-06, | |
| "loss": 0.4243, | |
| "num_input_tokens_seen": 162498560, | |
| "step": 1240, | |
| "train_runtime": 5415.8136, | |
| "train_tokens_per_second": 30004.459 | |
| }, | |
| { | |
| "epoch": 1.5164433617539586, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 6.904648701043137e-06, | |
| "loss": 0.4237, | |
| "num_input_tokens_seen": 163153920, | |
| "step": 1245, | |
| "train_runtime": 5437.5717, | |
| "train_tokens_per_second": 30004.923 | |
| }, | |
| { | |
| "epoch": 1.5225334957369063, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 6.740460675688734e-06, | |
| "loss": 0.4214, | |
| "num_input_tokens_seen": 163809280, | |
| "step": 1250, | |
| "train_runtime": 5459.3212, | |
| "train_tokens_per_second": 30005.43 | |
| }, | |
| { | |
| "epoch": 1.5286236297198539, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 6.577943663812344e-06, | |
| "loss": 0.4331, | |
| "num_input_tokens_seen": 164464640, | |
| "step": 1255, | |
| "train_runtime": 5481.0582, | |
| "train_tokens_per_second": 30006.001 | |
| }, | |
| { | |
| "epoch": 1.5347137637028014, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 6.417112538084771e-06, | |
| "loss": 0.4269, | |
| "num_input_tokens_seen": 165120000, | |
| "step": 1260, | |
| "train_runtime": 5502.8244, | |
| "train_tokens_per_second": 30006.409 | |
| }, | |
| { | |
| "epoch": 1.5408038976857492, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 6.257982016893685e-06, | |
| "loss": 0.4197, | |
| "num_input_tokens_seen": 165775360, | |
| "step": 1265, | |
| "train_runtime": 5524.4904, | |
| "train_tokens_per_second": 30007.358 | |
| }, | |
| { | |
| "epoch": 1.5468940316686965, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 6.100566662996732e-06, | |
| "loss": 0.4407, | |
| "num_input_tokens_seen": 166430720, | |
| "step": 1270, | |
| "train_runtime": 5546.194, | |
| "train_tokens_per_second": 30008.096 | |
| }, | |
| { | |
| "epoch": 1.5529841656516443, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 5.944880882188786e-06, | |
| "loss": 0.4268, | |
| "num_input_tokens_seen": 167086080, | |
| "step": 1275, | |
| "train_runtime": 5567.9166, | |
| "train_tokens_per_second": 30008.725 | |
| }, | |
| { | |
| "epoch": 1.559074299634592, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 5.790938921983608e-06, | |
| "loss": 0.4275, | |
| "num_input_tokens_seen": 167741440, | |
| "step": 1280, | |
| "train_runtime": 5589.613, | |
| "train_tokens_per_second": 30009.491 | |
| }, | |
| { | |
| "epoch": 1.5651644336175394, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 5.638754870310042e-06, | |
| "loss": 0.4291, | |
| "num_input_tokens_seen": 168396800, | |
| "step": 1285, | |
| "train_runtime": 5611.3139, | |
| "train_tokens_per_second": 30010.227 | |
| }, | |
| { | |
| "epoch": 1.5712545676004872, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 5.488342654222695e-06, | |
| "loss": 0.4283, | |
| "num_input_tokens_seen": 169052160, | |
| "step": 1290, | |
| "train_runtime": 5633.0305, | |
| "train_tokens_per_second": 30010.872 | |
| }, | |
| { | |
| "epoch": 1.577344701583435, | |
| "grad_norm": 1.25, | |
| "learning_rate": 5.33971603862746e-06, | |
| "loss": 0.4282, | |
| "num_input_tokens_seen": 169707520, | |
| "step": 1295, | |
| "train_runtime": 5654.7734, | |
| "train_tokens_per_second": 30011.374 | |
| }, | |
| { | |
| "epoch": 1.5834348355663823, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 5.192888625021794e-06, | |
| "loss": 0.438, | |
| "num_input_tokens_seen": 170362880, | |
| "step": 1300, | |
| "train_runtime": 5676.6131, | |
| "train_tokens_per_second": 30011.36 | |
| }, | |
| { | |
| "epoch": 1.58952496954933, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 5.047873850250012e-06, | |
| "loss": 0.4227, | |
| "num_input_tokens_seen": 171018240, | |
| "step": 1305, | |
| "train_runtime": 5698.3158, | |
| "train_tokens_per_second": 30012.068 | |
| }, | |
| { | |
| "epoch": 1.5956151035322779, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.9046849852736085e-06, | |
| "loss": 0.4339, | |
| "num_input_tokens_seen": 171673600, | |
| "step": 1310, | |
| "train_runtime": 5720.0011, | |
| "train_tokens_per_second": 30012.862 | |
| }, | |
| { | |
| "epoch": 1.6017052375152252, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.763335133956751e-06, | |
| "loss": 0.4233, | |
| "num_input_tokens_seen": 172328960, | |
| "step": 1315, | |
| "train_runtime": 5741.7125, | |
| "train_tokens_per_second": 30013.512 | |
| }, | |
| { | |
| "epoch": 1.607795371498173, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.6238372318671175e-06, | |
| "loss": 0.4293, | |
| "num_input_tokens_seen": 172984320, | |
| "step": 1320, | |
| "train_runtime": 5763.4052, | |
| "train_tokens_per_second": 30014.256 | |
| }, | |
| { | |
| "epoch": 1.6138855054811205, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.486204045092102e-06, | |
| "loss": 0.422, | |
| "num_input_tokens_seen": 173639680, | |
| "step": 1325, | |
| "train_runtime": 5785.115, | |
| "train_tokens_per_second": 30014.906 | |
| }, | |
| { | |
| "epoch": 1.619975639464068, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.350448169070481e-06, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 174295040, | |
| "step": 1330, | |
| "train_runtime": 5806.8095, | |
| "train_tokens_per_second": 30015.629 | |
| }, | |
| { | |
| "epoch": 1.6260657734470159, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.2165820274398444e-06, | |
| "loss": 0.4258, | |
| "num_input_tokens_seen": 174950400, | |
| "step": 1335, | |
| "train_runtime": 5828.5149, | |
| "train_tokens_per_second": 30016.291 | |
| }, | |
| { | |
| "epoch": 1.6321559074299634, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.084617870899546e-06, | |
| "loss": 0.4212, | |
| "num_input_tokens_seen": 175605760, | |
| "step": 1340, | |
| "train_runtime": 5850.239, | |
| "train_tokens_per_second": 30016.852 | |
| }, | |
| { | |
| "epoch": 1.638246041412911, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.954567776089643e-06, | |
| "loss": 0.4218, | |
| "num_input_tokens_seen": 176261120, | |
| "step": 1345, | |
| "train_runtime": 5872.0406, | |
| "train_tokens_per_second": 30017.013 | |
| }, | |
| { | |
| "epoch": 1.6443361753958587, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.826443644485731e-06, | |
| "loss": 0.4322, | |
| "num_input_tokens_seen": 176916480, | |
| "step": 1350, | |
| "train_runtime": 5893.7566, | |
| "train_tokens_per_second": 30017.609 | |
| }, | |
| { | |
| "epoch": 1.6504263093788063, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.7002572013097147e-06, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 177571840, | |
| "step": 1355, | |
| "train_runtime": 5915.4571, | |
| "train_tokens_per_second": 30018.279 | |
| }, | |
| { | |
| "epoch": 1.6565164433617539, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.5760199944568418e-06, | |
| "loss": 0.4241, | |
| "num_input_tokens_seen": 178227200, | |
| "step": 1360, | |
| "train_runtime": 5937.1539, | |
| "train_tokens_per_second": 30018.963 | |
| }, | |
| { | |
| "epoch": 1.6626065773447016, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 3.4537433934388798e-06, | |
| "loss": 0.4313, | |
| "num_input_tokens_seen": 178882560, | |
| "step": 1365, | |
| "train_runtime": 5958.8509, | |
| "train_tokens_per_second": 30019.64 | |
| }, | |
| { | |
| "epoch": 1.6686967113276492, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.333438588343624e-06, | |
| "loss": 0.4224, | |
| "num_input_tokens_seen": 179537920, | |
| "step": 1370, | |
| "train_runtime": 5980.5729, | |
| "train_tokens_per_second": 30020.187 | |
| }, | |
| { | |
| "epoch": 1.6747868453105967, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 3.2151165888108765e-06, | |
| "loss": 0.4228, | |
| "num_input_tokens_seen": 180193280, | |
| "step": 1375, | |
| "train_runtime": 6002.3024, | |
| "train_tokens_per_second": 30020.693 | |
| }, | |
| { | |
| "epoch": 1.6808769792935445, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.0987882230248816e-06, | |
| "loss": 0.4335, | |
| "num_input_tokens_seen": 180848640, | |
| "step": 1380, | |
| "train_runtime": 6024.0561, | |
| "train_tokens_per_second": 30021.075 | |
| }, | |
| { | |
| "epoch": 1.686967113276492, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 2.9844641367233834e-06, | |
| "loss": 0.4241, | |
| "num_input_tokens_seen": 181504000, | |
| "step": 1385, | |
| "train_runtime": 6045.7757, | |
| "train_tokens_per_second": 30021.623 | |
| }, | |
| { | |
| "epoch": 1.6930572472594396, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.8721547922234055e-06, | |
| "loss": 0.4206, | |
| "num_input_tokens_seen": 182159360, | |
| "step": 1390, | |
| "train_runtime": 6067.6225, | |
| "train_tokens_per_second": 30021.538 | |
| }, | |
| { | |
| "epoch": 1.6991473812423874, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 2.761870467463784e-06, | |
| "loss": 0.4284, | |
| "num_input_tokens_seen": 182814720, | |
| "step": 1395, | |
| "train_runtime": 6089.3947, | |
| "train_tokens_per_second": 30021.822 | |
| }, | |
| { | |
| "epoch": 1.705237515225335, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 2.6536212550645977e-06, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 183470080, | |
| "step": 1400, | |
| "train_runtime": 6111.127, | |
| "train_tokens_per_second": 30022.299 | |
| }, | |
| { | |
| "epoch": 1.7113276492082825, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.547417061403523e-06, | |
| "loss": 0.4351, | |
| "num_input_tokens_seen": 184125440, | |
| "step": 1405, | |
| "train_runtime": 6132.8723, | |
| "train_tokens_per_second": 30022.709 | |
| }, | |
| { | |
| "epoch": 1.7174177831912303, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.4432676057092818e-06, | |
| "loss": 0.42, | |
| "num_input_tokens_seen": 184780800, | |
| "step": 1410, | |
| "train_runtime": 6154.6229, | |
| "train_tokens_per_second": 30023.091 | |
| }, | |
| { | |
| "epoch": 1.7235079171741778, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.3411824191721887e-06, | |
| "loss": 0.4214, | |
| "num_input_tokens_seen": 185436160, | |
| "step": 1415, | |
| "train_runtime": 6176.372, | |
| "train_tokens_per_second": 30023.476 | |
| }, | |
| { | |
| "epoch": 1.7295980511571254, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.24117084407188e-06, | |
| "loss": 0.4281, | |
| "num_input_tokens_seen": 186091520, | |
| "step": 1420, | |
| "train_runtime": 6198.2101, | |
| "train_tokens_per_second": 30023.429 | |
| }, | |
| { | |
| "epoch": 1.7356881851400732, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.143242032922396e-06, | |
| "loss": 0.4217, | |
| "num_input_tokens_seen": 186746880, | |
| "step": 1425, | |
| "train_runtime": 6219.929, | |
| "train_tokens_per_second": 30023.957 | |
| }, | |
| { | |
| "epoch": 1.7417783191230207, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.0474049476345737e-06, | |
| "loss": 0.4236, | |
| "num_input_tokens_seen": 187402240, | |
| "step": 1430, | |
| "train_runtime": 6241.6962, | |
| "train_tokens_per_second": 30024.249 | |
| }, | |
| { | |
| "epoch": 1.7478684531059683, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.953668358695901e-06, | |
| "loss": 0.4193, | |
| "num_input_tokens_seen": 188057600, | |
| "step": 1435, | |
| "train_runtime": 6263.445, | |
| "train_tokens_per_second": 30024.627 | |
| }, | |
| { | |
| "epoch": 1.753958587088916, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.8620408443678904e-06, | |
| "loss": 0.4328, | |
| "num_input_tokens_seen": 188712960, | |
| "step": 1440, | |
| "train_runtime": 6285.263, | |
| "train_tokens_per_second": 30024.672 | |
| }, | |
| { | |
| "epoch": 1.7600487210718636, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.7725307899010586e-06, | |
| "loss": 0.4322, | |
| "num_input_tokens_seen": 189368320, | |
| "step": 1445, | |
| "train_runtime": 6306.9813, | |
| "train_tokens_per_second": 30025.192 | |
| }, | |
| { | |
| "epoch": 1.7661388550548112, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.6851463867675305e-06, | |
| "loss": 0.4276, | |
| "num_input_tokens_seen": 190023680, | |
| "step": 1450, | |
| "train_runtime": 6328.7332, | |
| "train_tokens_per_second": 30025.548 | |
| }, | |
| { | |
| "epoch": 1.772228989037759, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.599895631911405e-06, | |
| "loss": 0.4266, | |
| "num_input_tokens_seen": 190679040, | |
| "step": 1455, | |
| "train_runtime": 6350.4722, | |
| "train_tokens_per_second": 30025.962 | |
| }, | |
| { | |
| "epoch": 1.7783191230207065, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.5167863270169448e-06, | |
| "loss": 0.4233, | |
| "num_input_tokens_seen": 191334400, | |
| "step": 1460, | |
| "train_runtime": 6372.1834, | |
| "train_tokens_per_second": 30026.506 | |
| }, | |
| { | |
| "epoch": 1.784409257003654, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.435826077794572e-06, | |
| "loss": 0.4202, | |
| "num_input_tokens_seen": 191989760, | |
| "step": 1465, | |
| "train_runtime": 6393.9217, | |
| "train_tokens_per_second": 30026.918 | |
| }, | |
| { | |
| "epoch": 1.7904993909866018, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.3570222932848514e-06, | |
| "loss": 0.429, | |
| "num_input_tokens_seen": 192645120, | |
| "step": 1470, | |
| "train_runtime": 6415.6575, | |
| "train_tokens_per_second": 30027.339 | |
| }, | |
| { | |
| "epoch": 1.7965895249695494, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.2803821851804677e-06, | |
| "loss": 0.4373, | |
| "num_input_tokens_seen": 193300480, | |
| "step": 1475, | |
| "train_runtime": 6437.4201, | |
| "train_tokens_per_second": 30027.632 | |
| }, | |
| { | |
| "epoch": 1.802679658952497, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 1.2059127671662285e-06, | |
| "loss": 0.4318, | |
| "num_input_tokens_seen": 193955840, | |
| "step": 1480, | |
| "train_runtime": 6459.1614, | |
| "train_tokens_per_second": 30028.022 | |
| }, | |
| { | |
| "epoch": 1.8087697929354447, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.1336208542772147e-06, | |
| "loss": 0.4266, | |
| "num_input_tokens_seen": 194611200, | |
| "step": 1485, | |
| "train_runtime": 6480.9309, | |
| "train_tokens_per_second": 30028.279 | |
| }, | |
| { | |
| "epoch": 1.814859926918392, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1.0635130622751343e-06, | |
| "loss": 0.4203, | |
| "num_input_tokens_seen": 195266560, | |
| "step": 1490, | |
| "train_runtime": 6502.6538, | |
| "train_tokens_per_second": 30028.749 | |
| }, | |
| { | |
| "epoch": 1.8209500609013398, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.955958070428344e-07, | |
| "loss": 0.4189, | |
| "num_input_tokens_seen": 195921920, | |
| "step": 1495, | |
| "train_runtime": 6524.4119, | |
| "train_tokens_per_second": 30029.055 | |
| }, | |
| { | |
| "epoch": 1.8270401948842876, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 9.298753039971964e-07, | |
| "loss": 0.431, | |
| "num_input_tokens_seen": 196577280, | |
| "step": 1500, | |
| "train_runtime": 6546.1214, | |
| "train_tokens_per_second": 30029.58 | |
| }, | |
| { | |
| "epoch": 1.833130328867235, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8.663575675203151e-07, | |
| "loss": 0.4204, | |
| "num_input_tokens_seen": 197232640, | |
| "step": 1505, | |
| "train_runtime": 6575.1193, | |
| "train_tokens_per_second": 29996.815 | |
| }, | |
| { | |
| "epoch": 1.8392204628501827, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 8.050484104090927e-07, | |
| "loss": 0.4226, | |
| "num_input_tokens_seen": 197888000, | |
| "step": 1510, | |
| "train_runtime": 6596.8218, | |
| "train_tokens_per_second": 29997.475 | |
| }, | |
| { | |
| "epoch": 1.8453105968331305, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 7.459534433433085e-07, | |
| "loss": 0.4262, | |
| "num_input_tokens_seen": 198543360, | |
| "step": 1515, | |
| "train_runtime": 6618.5414, | |
| "train_tokens_per_second": 29998.054 | |
| }, | |
| { | |
| "epoch": 1.8514007308160778, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 6.890780743721209e-07, | |
| "loss": 0.4272, | |
| "num_input_tokens_seen": 199198720, | |
| "step": 1520, | |
| "train_runtime": 6640.1962, | |
| "train_tokens_per_second": 29998.921 | |
| }, | |
| { | |
| "epoch": 1.8574908647990256, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 6.344275084191886e-07, | |
| "loss": 0.4257, | |
| "num_input_tokens_seen": 199854080, | |
| "step": 1525, | |
| "train_runtime": 6661.9288, | |
| "train_tokens_per_second": 29999.432 | |
| }, | |
| { | |
| "epoch": 1.8635809987819734, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 5.820067468063212e-07, | |
| "loss": 0.4249, | |
| "num_input_tokens_seen": 200509440, | |
| "step": 1530, | |
| "train_runtime": 6683.6664, | |
| "train_tokens_per_second": 29999.917 | |
| }, | |
| { | |
| "epoch": 1.8696711327649207, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 5.318205867957893e-07, | |
| "loss": 0.4277, | |
| "num_input_tokens_seen": 201164800, | |
| "step": 1535, | |
| "train_runtime": 6705.4025, | |
| "train_tokens_per_second": 30000.406 | |
| }, | |
| { | |
| "epoch": 1.8757612667478685, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.838736211513233e-07, | |
| "loss": 0.4282, | |
| "num_input_tokens_seen": 201820160, | |
| "step": 1540, | |
| "train_runtime": 6727.1629, | |
| "train_tokens_per_second": 30000.784 | |
| }, | |
| { | |
| "epoch": 1.881851400730816, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.3817023771778596e-07, | |
| "loss": 0.4298, | |
| "num_input_tokens_seen": 202475520, | |
| "step": 1545, | |
| "train_runtime": 6749.1883, | |
| "train_tokens_per_second": 29999.981 | |
| }, | |
| { | |
| "epoch": 1.8879415347137636, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.947146190196205e-07, | |
| "loss": 0.4261, | |
| "num_input_tokens_seen": 203130880, | |
| "step": 1550, | |
| "train_runtime": 6771.0741, | |
| "train_tokens_per_second": 29999.802 | |
| }, | |
| { | |
| "epoch": 1.8940316686967114, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.5351074187811586e-07, | |
| "loss": 0.4294, | |
| "num_input_tokens_seen": 203786240, | |
| "step": 1555, | |
| "train_runtime": 6793.1191, | |
| "train_tokens_per_second": 29998.921 | |
| }, | |
| { | |
| "epoch": 1.900121802679659, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.145623770474365e-07, | |
| "loss": 0.4286, | |
| "num_input_tokens_seen": 204441600, | |
| "step": 1560, | |
| "train_runtime": 6815.1261, | |
| "train_tokens_per_second": 29998.212 | |
| }, | |
| { | |
| "epoch": 1.9062119366626065, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.778730888695652e-07, | |
| "loss": 0.4269, | |
| "num_input_tokens_seen": 205096960, | |
| "step": 1565, | |
| "train_runtime": 6836.9748, | |
| "train_tokens_per_second": 29998.203 | |
| }, | |
| { | |
| "epoch": 1.9123020706455542, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.4344623494810814e-07, | |
| "loss": 0.4283, | |
| "num_input_tokens_seen": 205752320, | |
| "step": 1570, | |
| "train_runtime": 6859.0238, | |
| "train_tokens_per_second": 29997.318 | |
| }, | |
| { | |
| "epoch": 1.9183922046285018, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.1128496584102154e-07, | |
| "loss": 0.4308, | |
| "num_input_tokens_seen": 206407680, | |
| "step": 1575, | |
| "train_runtime": 6880.8878, | |
| "train_tokens_per_second": 29997.245 | |
| }, | |
| { | |
| "epoch": 1.9244823386114494, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.8139222477229212e-07, | |
| "loss": 0.4435, | |
| "num_input_tokens_seen": 207063040, | |
| "step": 1580, | |
| "train_runtime": 6902.6722, | |
| "train_tokens_per_second": 29997.519 | |
| }, | |
| { | |
| "epoch": 1.9305724725943971, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.5377074736259155e-07, | |
| "loss": 0.427, | |
| "num_input_tokens_seen": 207718400, | |
| "step": 1585, | |
| "train_runtime": 6924.4409, | |
| "train_tokens_per_second": 29997.859 | |
| }, | |
| { | |
| "epoch": 1.9366626065773447, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.2842306137892392e-07, | |
| "loss": 0.4222, | |
| "num_input_tokens_seen": 208373760, | |
| "step": 1590, | |
| "train_runtime": 6946.2191, | |
| "train_tokens_per_second": 29998.156 | |
| }, | |
| { | |
| "epoch": 1.9427527405602922, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.0535148650330228e-07, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 209029120, | |
| "step": 1595, | |
| "train_runtime": 6968.3008, | |
| "train_tokens_per_second": 29997.144 | |
| }, | |
| { | |
| "epoch": 1.94884287454324, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 8.455813412046042e-08, | |
| "loss": 0.4268, | |
| "num_input_tokens_seen": 209684480, | |
| "step": 1600, | |
| "train_runtime": 6990.0361, | |
| "train_tokens_per_second": 29997.625 | |
| }, | |
| { | |
| "epoch": 1.9549330085261876, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 6.604490712463218e-08, | |
| "loss": 0.4307, | |
| "num_input_tokens_seen": 210339840, | |
| "step": 1605, | |
| "train_runtime": 7011.7811, | |
| "train_tokens_per_second": 29998.062 | |
| }, | |
| { | |
| "epoch": 1.9610231425091351, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.98134997454075e-08, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 210995200, | |
| "step": 1610, | |
| "train_runtime": 7033.5419, | |
| "train_tokens_per_second": 29998.428 | |
| }, | |
| { | |
| "epoch": 1.967113276492083, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.5865397392684244e-08, | |
| "loss": 0.4349, | |
| "num_input_tokens_seen": 211650560, | |
| "step": 1615, | |
| "train_runtime": 7055.3037, | |
| "train_tokens_per_second": 29998.788 | |
| }, | |
| { | |
| "epoch": 1.9732034104750305, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.420187652074357e-08, | |
| "loss": 0.4288, | |
| "num_input_tokens_seen": 212305920, | |
| "step": 1620, | |
| "train_runtime": 7077.0456, | |
| "train_tokens_per_second": 29999.23 | |
| }, | |
| { | |
| "epoch": 1.979293544457978, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.4824004511415634e-08, | |
| "loss": 0.4133, | |
| "num_input_tokens_seen": 212961280, | |
| "step": 1625, | |
| "train_runtime": 7098.7568, | |
| "train_tokens_per_second": 29999.799 | |
| }, | |
| { | |
| "epoch": 1.9853836784409258, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 7.732639576413236e-09, | |
| "loss": 0.4267, | |
| "num_input_tokens_seen": 213616640, | |
| "step": 1630, | |
| "train_runtime": 7120.4765, | |
| "train_tokens_per_second": 30000.329 | |
| }, | |
| { | |
| "epoch": 1.9914738124238733, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.9284306787918937e-09, | |
| "loss": 0.4308, | |
| "num_input_tokens_seen": 214272000, | |
| "step": 1635, | |
| "train_runtime": 7142.2064, | |
| "train_tokens_per_second": 30000.813 | |
| }, | |
| { | |
| "epoch": 1.997563946406821, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.118174735529001e-10, | |
| "loss": 0.4244, | |
| "num_input_tokens_seen": 214927360, | |
| "step": 1640, | |
| "train_runtime": 7163.9456, | |
| "train_tokens_per_second": 30001.255 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "num_input_tokens_seen": 215158784, | |
| "step": 1642, | |
| "total_flos": 9.00950810931757e+17, | |
| "train_loss": 0.6156596739239872, | |
| "train_runtime": 7180.2112, | |
| "train_samples_per_second": 14.632, | |
| "train_steps_per_second": 0.229 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1642, | |
| "num_input_tokens_seen": 215158784, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.00950810931757e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |