| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 6576, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006083650190114068, | |
| "grad_norm": 194.0, | |
| "learning_rate": 0.0, | |
| "loss": 13.3208, | |
| "num_input_tokens_seen": 131072, | |
| "step": 1, | |
| "train_runtime": 41.7221, | |
| "train_tokens_per_second": 3141.549 | |
| }, | |
| { | |
| "epoch": 0.006083650190114068, | |
| "grad_norm": 15.875, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 12.4449, | |
| "num_input_tokens_seen": 1310720, | |
| "step": 10, | |
| "train_runtime": 360.3457, | |
| "train_tokens_per_second": 3637.396 | |
| }, | |
| { | |
| "epoch": 0.012167300380228136, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 4.797979797979798e-05, | |
| "loss": 11.0058, | |
| "num_input_tokens_seen": 2621440, | |
| "step": 20, | |
| "train_runtime": 844.7738, | |
| "train_tokens_per_second": 3103.126 | |
| }, | |
| { | |
| "epoch": 0.018250950570342206, | |
| "grad_norm": 4.75, | |
| "learning_rate": 7.323232323232324e-05, | |
| "loss": 9.7781, | |
| "num_input_tokens_seen": 3932160, | |
| "step": 30, | |
| "train_runtime": 1327.8156, | |
| "train_tokens_per_second": 2961.375 | |
| }, | |
| { | |
| "epoch": 0.024334600760456272, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 9.848484848484848e-05, | |
| "loss": 8.6293, | |
| "num_input_tokens_seen": 5242880, | |
| "step": 40, | |
| "train_runtime": 1818.9813, | |
| "train_tokens_per_second": 2882.317 | |
| }, | |
| { | |
| "epoch": 0.030418250950570342, | |
| "grad_norm": 5.75, | |
| "learning_rate": 0.00012373737373737374, | |
| "loss": 8.3717, | |
| "num_input_tokens_seen": 6553600, | |
| "step": 50, | |
| "train_runtime": 2311.0689, | |
| "train_tokens_per_second": 2835.744 | |
| }, | |
| { | |
| "epoch": 0.03650190114068441, | |
| "grad_norm": 4.25, | |
| "learning_rate": 0.00014898989898989897, | |
| "loss": 8.144, | |
| "num_input_tokens_seen": 7864320, | |
| "step": 60, | |
| "train_runtime": 2799.9586, | |
| "train_tokens_per_second": 2808.727 | |
| }, | |
| { | |
| "epoch": 0.04258555133079848, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 0.00017424242424242425, | |
| "loss": 7.9474, | |
| "num_input_tokens_seen": 9175040, | |
| "step": 70, | |
| "train_runtime": 3293.677, | |
| "train_tokens_per_second": 2785.653 | |
| }, | |
| { | |
| "epoch": 0.048669201520912544, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 0.0001994949494949495, | |
| "loss": 7.6349, | |
| "num_input_tokens_seen": 10485760, | |
| "step": 80, | |
| "train_runtime": 3748.5901, | |
| "train_tokens_per_second": 2797.254 | |
| }, | |
| { | |
| "epoch": 0.05475285171102662, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 0.00022474747474747475, | |
| "loss": 7.4784, | |
| "num_input_tokens_seen": 11796480, | |
| "step": 90, | |
| "train_runtime": 4186.4419, | |
| "train_tokens_per_second": 2817.782 | |
| }, | |
| { | |
| "epoch": 0.060836501901140684, | |
| "grad_norm": 2.28125, | |
| "learning_rate": 0.00025, | |
| "loss": 7.136, | |
| "num_input_tokens_seen": 13107200, | |
| "step": 100, | |
| "train_runtime": 4674.9867, | |
| "train_tokens_per_second": 2803.687 | |
| }, | |
| { | |
| "epoch": 0.06692015209125475, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 0.00027525252525252526, | |
| "loss": 6.9612, | |
| "num_input_tokens_seen": 14417920, | |
| "step": 110, | |
| "train_runtime": 5176.2495, | |
| "train_tokens_per_second": 2785.399 | |
| }, | |
| { | |
| "epoch": 0.07300380228136882, | |
| "grad_norm": 3.015625, | |
| "learning_rate": 0.0003005050505050505, | |
| "loss": 6.8282, | |
| "num_input_tokens_seen": 15728640, | |
| "step": 120, | |
| "train_runtime": 5683.7739, | |
| "train_tokens_per_second": 2767.288 | |
| }, | |
| { | |
| "epoch": 0.07908745247148288, | |
| "grad_norm": 1.9140625, | |
| "learning_rate": 0.00032575757575757576, | |
| "loss": 6.6848, | |
| "num_input_tokens_seen": 17039360, | |
| "step": 130, | |
| "train_runtime": 6191.256, | |
| "train_tokens_per_second": 2752.165 | |
| }, | |
| { | |
| "epoch": 0.08517110266159696, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 0.000351010101010101, | |
| "loss": 6.4192, | |
| "num_input_tokens_seen": 18350080, | |
| "step": 140, | |
| "train_runtime": 6698.3992, | |
| "train_tokens_per_second": 2739.472 | |
| }, | |
| { | |
| "epoch": 0.09125475285171103, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 0.00037626262626262627, | |
| "loss": 6.1528, | |
| "num_input_tokens_seen": 19660800, | |
| "step": 150, | |
| "train_runtime": 7206.1898, | |
| "train_tokens_per_second": 2728.321 | |
| }, | |
| { | |
| "epoch": 0.09733840304182509, | |
| "grad_norm": 2.953125, | |
| "learning_rate": 0.0004015151515151515, | |
| "loss": 6.1066, | |
| "num_input_tokens_seen": 20971520, | |
| "step": 160, | |
| "train_runtime": 7714.79, | |
| "train_tokens_per_second": 2718.353 | |
| }, | |
| { | |
| "epoch": 0.10342205323193916, | |
| "grad_norm": 2.609375, | |
| "learning_rate": 0.00042676767676767677, | |
| "loss": 5.8847, | |
| "num_input_tokens_seen": 22282240, | |
| "step": 170, | |
| "train_runtime": 8220.9522, | |
| "train_tokens_per_second": 2710.421 | |
| }, | |
| { | |
| "epoch": 0.10950570342205324, | |
| "grad_norm": 6.0, | |
| "learning_rate": 0.0004520202020202021, | |
| "loss": 6.1618, | |
| "num_input_tokens_seen": 23592960, | |
| "step": 180, | |
| "train_runtime": 8731.5822, | |
| "train_tokens_per_second": 2702.026 | |
| }, | |
| { | |
| "epoch": 0.1155893536121673, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 0.0004772727272727273, | |
| "loss": 6.0654, | |
| "num_input_tokens_seen": 24903680, | |
| "step": 190, | |
| "train_runtime": 9242.7977, | |
| "train_tokens_per_second": 2694.388 | |
| }, | |
| { | |
| "epoch": 0.12167300380228137, | |
| "grad_norm": 3.25, | |
| "learning_rate": 0.0004999999696722131, | |
| "loss": 5.6755, | |
| "num_input_tokens_seen": 26214400, | |
| "step": 200, | |
| "train_runtime": 9756.404, | |
| "train_tokens_per_second": 2686.892 | |
| }, | |
| { | |
| "epoch": 0.12775665399239544, | |
| "grad_norm": 123.5, | |
| "learning_rate": 0.0004999963303466888, | |
| "loss": 5.8267, | |
| "num_input_tokens_seen": 27525120, | |
| "step": 210, | |
| "train_runtime": 10265.9757, | |
| "train_tokens_per_second": 2681.199 | |
| }, | |
| { | |
| "epoch": 0.1338403041825095, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 0.0004999866255649598, | |
| "loss": 5.7143, | |
| "num_input_tokens_seen": 28835840, | |
| "step": 220, | |
| "train_runtime": 10768.3145, | |
| "train_tokens_per_second": 2677.842 | |
| }, | |
| { | |
| "epoch": 0.1399239543726236, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 0.000499970855562485, | |
| "loss": 5.4202, | |
| "num_input_tokens_seen": 30146560, | |
| "step": 230, | |
| "train_runtime": 11276.6, | |
| "train_tokens_per_second": 2673.373 | |
| }, | |
| { | |
| "epoch": 0.14600760456273765, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 0.0004999490207218795, | |
| "loss": 5.2555, | |
| "num_input_tokens_seen": 31457280, | |
| "step": 240, | |
| "train_runtime": 11787.6318, | |
| "train_tokens_per_second": 2668.668 | |
| }, | |
| { | |
| "epoch": 0.1520912547528517, | |
| "grad_norm": 1.875, | |
| "learning_rate": 0.0004999211215729038, | |
| "loss": 5.1247, | |
| "num_input_tokens_seen": 32768000, | |
| "step": 250, | |
| "train_runtime": 12298.0499, | |
| "train_tokens_per_second": 2664.487 | |
| }, | |
| { | |
| "epoch": 0.15817490494296577, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 0.0004998871587924522, | |
| "loss": 5.1095, | |
| "num_input_tokens_seen": 34078720, | |
| "step": 260, | |
| "train_runtime": 12764.4114, | |
| "train_tokens_per_second": 2669.823 | |
| }, | |
| { | |
| "epoch": 0.16425855513307985, | |
| "grad_norm": 2.53125, | |
| "learning_rate": 0.000499847133204536, | |
| "loss": 4.9468, | |
| "num_input_tokens_seen": 35389440, | |
| "step": 270, | |
| "train_runtime": 13201.0465, | |
| "train_tokens_per_second": 2680.806 | |
| }, | |
| { | |
| "epoch": 0.1703422053231939, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 0.000499801045780263, | |
| "loss": 4.8639, | |
| "num_input_tokens_seen": 36700160, | |
| "step": 280, | |
| "train_runtime": 13643.729, | |
| "train_tokens_per_second": 2689.892 | |
| }, | |
| { | |
| "epoch": 0.17642585551330797, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 0.0004997488976378147, | |
| "loss": 4.785, | |
| "num_input_tokens_seen": 38010880, | |
| "step": 290, | |
| "train_runtime": 14087.3187, | |
| "train_tokens_per_second": 2698.234 | |
| }, | |
| { | |
| "epoch": 0.18250950570342206, | |
| "grad_norm": 1.9140625, | |
| "learning_rate": 0.0004996906900424189, | |
| "loss": 4.7422, | |
| "num_input_tokens_seen": 39321600, | |
| "step": 300, | |
| "train_runtime": 14530.9563, | |
| "train_tokens_per_second": 2706.057 | |
| }, | |
| { | |
| "epoch": 0.18859315589353612, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 0.0004996264244063187, | |
| "loss": 4.6158, | |
| "num_input_tokens_seen": 40632320, | |
| "step": 310, | |
| "train_runtime": 14975.051, | |
| "train_tokens_per_second": 2713.334 | |
| }, | |
| { | |
| "epoch": 0.19467680608365018, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 0.0004995561022887386, | |
| "loss": 4.5245, | |
| "num_input_tokens_seen": 41943040, | |
| "step": 320, | |
| "train_runtime": 15419.5208, | |
| "train_tokens_per_second": 2720.126 | |
| }, | |
| { | |
| "epoch": 0.20076045627376427, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 0.0004994797253958465, | |
| "loss": 4.4933, | |
| "num_input_tokens_seen": 43253760, | |
| "step": 330, | |
| "train_runtime": 15930.5389, | |
| "train_tokens_per_second": 2715.147 | |
| }, | |
| { | |
| "epoch": 0.20684410646387832, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 0.0004993972955807125, | |
| "loss": 4.4701, | |
| "num_input_tokens_seen": 44564480, | |
| "step": 340, | |
| "train_runtime": 16443.622, | |
| "train_tokens_per_second": 2710.138 | |
| }, | |
| { | |
| "epoch": 0.21292775665399238, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 0.0004993088148432637, | |
| "loss": 4.4604, | |
| "num_input_tokens_seen": 45875200, | |
| "step": 350, | |
| "train_runtime": 16954.6894, | |
| "train_tokens_per_second": 2705.753 | |
| }, | |
| { | |
| "epoch": 0.21901140684410647, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 0.0004992142853302356, | |
| "loss": 4.3468, | |
| "num_input_tokens_seen": 47185920, | |
| "step": 360, | |
| "train_runtime": 17465.395, | |
| "train_tokens_per_second": 2701.681 | |
| }, | |
| { | |
| "epoch": 0.22509505703422053, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 0.0004991137093351205, | |
| "loss": 4.3054, | |
| "num_input_tokens_seen": 48496640, | |
| "step": 370, | |
| "train_runtime": 17969.6261, | |
| "train_tokens_per_second": 2698.812 | |
| }, | |
| { | |
| "epoch": 0.2311787072243346, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 0.0004990070892981112, | |
| "loss": 4.2011, | |
| "num_input_tokens_seen": 49807360, | |
| "step": 380, | |
| "train_runtime": 18474.7265, | |
| "train_tokens_per_second": 2695.973 | |
| }, | |
| { | |
| "epoch": 0.23726235741444868, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 0.0004988944278060427, | |
| "loss": 4.3475, | |
| "num_input_tokens_seen": 51118080, | |
| "step": 390, | |
| "train_runtime": 18979.6402, | |
| "train_tokens_per_second": 2693.311 | |
| }, | |
| { | |
| "epoch": 0.24334600760456274, | |
| "grad_norm": 3.65625, | |
| "learning_rate": 0.0004987757275923281, | |
| "loss": 4.3356, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 400, | |
| "train_runtime": 19484.7862, | |
| "train_tokens_per_second": 2690.756 | |
| }, | |
| { | |
| "epoch": 0.2494296577946768, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 0.0004986509915368937, | |
| "loss": 4.186, | |
| "num_input_tokens_seen": 53739520, | |
| "step": 410, | |
| "train_runtime": 19989.039, | |
| "train_tokens_per_second": 2688.449 | |
| }, | |
| { | |
| "epoch": 0.2555133079847909, | |
| "grad_norm": 51.0, | |
| "learning_rate": 0.0004985202226661082, | |
| "loss": 4.6079, | |
| "num_input_tokens_seen": 55050240, | |
| "step": 420, | |
| "train_runtime": 20493.286, | |
| "train_tokens_per_second": 2686.257 | |
| }, | |
| { | |
| "epoch": 0.26159695817490497, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 0.0004983834241527096, | |
| "loss": 4.3251, | |
| "num_input_tokens_seen": 56360960, | |
| "step": 430, | |
| "train_runtime": 20997.3631, | |
| "train_tokens_per_second": 2684.192 | |
| }, | |
| { | |
| "epoch": 0.267680608365019, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 0.0004982405993157283, | |
| "loss": 4.0773, | |
| "num_input_tokens_seen": 57671680, | |
| "step": 440, | |
| "train_runtime": 21503.8583, | |
| "train_tokens_per_second": 2681.922 | |
| }, | |
| { | |
| "epoch": 0.2737642585551331, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 0.0004980917516204061, | |
| "loss": 4.0371, | |
| "num_input_tokens_seen": 58982400, | |
| "step": 450, | |
| "train_runtime": 22012.6337, | |
| "train_tokens_per_second": 2679.479 | |
| }, | |
| { | |
| "epoch": 0.2798479087452472, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 0.0004979368846781129, | |
| "loss": 4.065, | |
| "num_input_tokens_seen": 60293120, | |
| "step": 460, | |
| "train_runtime": 22521.0154, | |
| "train_tokens_per_second": 2677.194 | |
| }, | |
| { | |
| "epoch": 0.2859315589353612, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 0.0004977760022462584, | |
| "loss": 4.0267, | |
| "num_input_tokens_seen": 61603840, | |
| "step": 470, | |
| "train_runtime": 23025.7928, | |
| "train_tokens_per_second": 2675.428 | |
| }, | |
| { | |
| "epoch": 0.2920152091254753, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 0.0004976091082282013, | |
| "loss": 3.9963, | |
| "num_input_tokens_seen": 62914560, | |
| "step": 480, | |
| "train_runtime": 23530.8991, | |
| "train_tokens_per_second": 2673.7 | |
| }, | |
| { | |
| "epoch": 0.2980988593155893, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 0.0004974362066731545, | |
| "loss": 4.0162, | |
| "num_input_tokens_seen": 64225280, | |
| "step": 490, | |
| "train_runtime": 24031.3065, | |
| "train_tokens_per_second": 2672.567 | |
| }, | |
| { | |
| "epoch": 0.3041825095057034, | |
| "grad_norm": 1.125, | |
| "learning_rate": 0.0004972573017760867, | |
| "loss": 3.9844, | |
| "num_input_tokens_seen": 65536000, | |
| "step": 500, | |
| "train_runtime": 24533.3589, | |
| "train_tokens_per_second": 2671.302 | |
| }, | |
| { | |
| "epoch": 0.3102661596958175, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 0.000497072397877621, | |
| "loss": 3.999, | |
| "num_input_tokens_seen": 66846720, | |
| "step": 510, | |
| "train_runtime": 25032.6558, | |
| "train_tokens_per_second": 2670.381 | |
| }, | |
| { | |
| "epoch": 0.31634980988593153, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 0.0004968814994639292, | |
| "loss": 3.9714, | |
| "num_input_tokens_seen": 68157440, | |
| "step": 520, | |
| "train_runtime": 25526.0535, | |
| "train_tokens_per_second": 2670.113 | |
| }, | |
| { | |
| "epoch": 0.3224334600760456, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 0.0004966846111666232, | |
| "loss": 3.9438, | |
| "num_input_tokens_seen": 69468160, | |
| "step": 530, | |
| "train_runtime": 26024.3771, | |
| "train_tokens_per_second": 2669.35 | |
| }, | |
| { | |
| "epoch": 0.3285171102661597, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 0.0004964817377626425, | |
| "loss": 3.8985, | |
| "num_input_tokens_seen": 70778880, | |
| "step": 540, | |
| "train_runtime": 26527.3154, | |
| "train_tokens_per_second": 2668.151 | |
| }, | |
| { | |
| "epoch": 0.33460076045627374, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 0.0004962728841741383, | |
| "loss": 3.926, | |
| "num_input_tokens_seen": 72089600, | |
| "step": 550, | |
| "train_runtime": 27037.0928, | |
| "train_tokens_per_second": 2666.322 | |
| }, | |
| { | |
| "epoch": 0.3406844106463878, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 0.000496058055468354, | |
| "loss": 3.8868, | |
| "num_input_tokens_seen": 73400320, | |
| "step": 560, | |
| "train_runtime": 27544.463, | |
| "train_tokens_per_second": 2664.794 | |
| }, | |
| { | |
| "epoch": 0.3467680608365019, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 0.0004958372568575029, | |
| "loss": 3.877, | |
| "num_input_tokens_seen": 74711040, | |
| "step": 570, | |
| "train_runtime": 28055.7086, | |
| "train_tokens_per_second": 2662.953 | |
| }, | |
| { | |
| "epoch": 0.35285171102661594, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 0.0004956104936986405, | |
| "loss": 3.8675, | |
| "num_input_tokens_seen": 76021760, | |
| "step": 580, | |
| "train_runtime": 28563.1363, | |
| "train_tokens_per_second": 2661.534 | |
| }, | |
| { | |
| "epoch": 0.35893536121673003, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 0.0004953777714935359, | |
| "loss": 3.8062, | |
| "num_input_tokens_seen": 77332480, | |
| "step": 590, | |
| "train_runtime": 29085.1121, | |
| "train_tokens_per_second": 2658.834 | |
| }, | |
| { | |
| "epoch": 0.3650190114068441, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 0.000495139095888537, | |
| "loss": 3.7848, | |
| "num_input_tokens_seen": 78643200, | |
| "step": 600, | |
| "train_runtime": 29603.854, | |
| "train_tokens_per_second": 2656.519 | |
| }, | |
| { | |
| "epoch": 0.37110266159695815, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 0.0004948944726744348, | |
| "loss": 3.8038, | |
| "num_input_tokens_seen": 79953920, | |
| "step": 610, | |
| "train_runtime": 30112.8786, | |
| "train_tokens_per_second": 2655.14 | |
| }, | |
| { | |
| "epoch": 0.37718631178707224, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 0.0004946439077863219, | |
| "loss": 3.8141, | |
| "num_input_tokens_seen": 81264640, | |
| "step": 620, | |
| "train_runtime": 30628.1397, | |
| "train_tokens_per_second": 2653.267 | |
| }, | |
| { | |
| "epoch": 0.3832699619771863, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 0.000494387407303449, | |
| "loss": 3.7289, | |
| "num_input_tokens_seen": 82575360, | |
| "step": 630, | |
| "train_runtime": 31145.1722, | |
| "train_tokens_per_second": 2651.305 | |
| }, | |
| { | |
| "epoch": 0.38935361216730036, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 0.0004941249774490771, | |
| "loss": 3.7867, | |
| "num_input_tokens_seen": 83886080, | |
| "step": 640, | |
| "train_runtime": 31656.3857, | |
| "train_tokens_per_second": 2649.894 | |
| }, | |
| { | |
| "epoch": 0.39543726235741444, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 0.0004938566245903269, | |
| "loss": 3.7019, | |
| "num_input_tokens_seen": 85196800, | |
| "step": 650, | |
| "train_runtime": 32167.9519, | |
| "train_tokens_per_second": 2648.499 | |
| }, | |
| { | |
| "epoch": 0.40152091254752853, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 0.000493582355238024, | |
| "loss": 3.7298, | |
| "num_input_tokens_seen": 86507520, | |
| "step": 660, | |
| "train_runtime": 32673.7302, | |
| "train_tokens_per_second": 2647.617 | |
| }, | |
| { | |
| "epoch": 0.40760456273764256, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 0.0004933021760465411, | |
| "loss": 3.6524, | |
| "num_input_tokens_seen": 87818240, | |
| "step": 670, | |
| "train_runtime": 33180.4976, | |
| "train_tokens_per_second": 2646.682 | |
| }, | |
| { | |
| "epoch": 0.41368821292775665, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 0.0004930160938136364, | |
| "loss": 3.6602, | |
| "num_input_tokens_seen": 89128960, | |
| "step": 680, | |
| "train_runtime": 33688.0024, | |
| "train_tokens_per_second": 2645.718 | |
| }, | |
| { | |
| "epoch": 0.41977186311787074, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 0.0004927241154802888, | |
| "loss": 3.6685, | |
| "num_input_tokens_seen": 90439680, | |
| "step": 690, | |
| "train_runtime": 34211.2177, | |
| "train_tokens_per_second": 2643.568 | |
| }, | |
| { | |
| "epoch": 0.42585551330798477, | |
| "grad_norm": 0.5, | |
| "learning_rate": 0.0004924262481305295, | |
| "loss": 3.6544, | |
| "num_input_tokens_seen": 91750400, | |
| "step": 700, | |
| "train_runtime": 34733.1748, | |
| "train_tokens_per_second": 2641.578 | |
| }, | |
| { | |
| "epoch": 0.43193916349809885, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 0.0004921224989912701, | |
| "loss": 3.6194, | |
| "num_input_tokens_seen": 93061120, | |
| "step": 710, | |
| "train_runtime": 35254.3191, | |
| "train_tokens_per_second": 2639.708 | |
| }, | |
| { | |
| "epoch": 0.43802281368821294, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 0.000491812875432127, | |
| "loss": 3.6242, | |
| "num_input_tokens_seen": 94371840, | |
| "step": 720, | |
| "train_runtime": 35767.721, | |
| "train_tokens_per_second": 2638.464 | |
| }, | |
| { | |
| "epoch": 0.444106463878327, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 0.0004914973849652431, | |
| "loss": 3.6366, | |
| "num_input_tokens_seen": 95682560, | |
| "step": 730, | |
| "train_runtime": 36285.9776, | |
| "train_tokens_per_second": 2636.902 | |
| }, | |
| { | |
| "epoch": 0.45019011406844106, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 0.0004911760352451052, | |
| "loss": 3.5994, | |
| "num_input_tokens_seen": 96993280, | |
| "step": 740, | |
| "train_runtime": 36794.874, | |
| "train_tokens_per_second": 2636.054 | |
| }, | |
| { | |
| "epoch": 0.45627376425855515, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 0.0004908488340683583, | |
| "loss": 3.5819, | |
| "num_input_tokens_seen": 98304000, | |
| "step": 750, | |
| "train_runtime": 37301.9123, | |
| "train_tokens_per_second": 2635.361 | |
| }, | |
| { | |
| "epoch": 0.4623574144486692, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 0.0004905157893736169, | |
| "loss": 3.5663, | |
| "num_input_tokens_seen": 99614720, | |
| "step": 760, | |
| "train_runtime": 37810.1562, | |
| "train_tokens_per_second": 2634.602 | |
| }, | |
| { | |
| "epoch": 0.46844106463878327, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 0.0004901769092412711, | |
| "loss": 3.5701, | |
| "num_input_tokens_seen": 100925440, | |
| "step": 770, | |
| "train_runtime": 38317.6133, | |
| "train_tokens_per_second": 2633.918 | |
| }, | |
| { | |
| "epoch": 0.47452471482889735, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 0.0004898322018932924, | |
| "loss": 3.5661, | |
| "num_input_tokens_seen": 102236160, | |
| "step": 780, | |
| "train_runtime": 38820.0186, | |
| "train_tokens_per_second": 2633.594 | |
| }, | |
| { | |
| "epoch": 0.4806083650190114, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 0.0004894816756930327, | |
| "loss": 3.5268, | |
| "num_input_tokens_seen": 103546880, | |
| "step": 790, | |
| "train_runtime": 39328.8031, | |
| "train_tokens_per_second": 2632.851 | |
| }, | |
| { | |
| "epoch": 0.4866920152091255, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 0.0004891253391450222, | |
| "loss": 3.5552, | |
| "num_input_tokens_seen": 104857600, | |
| "step": 800, | |
| "train_runtime": 39836.4687, | |
| "train_tokens_per_second": 2632.201 | |
| }, | |
| { | |
| "epoch": 0.49277566539923956, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.0004887632008947625, | |
| "loss": 3.5054, | |
| "num_input_tokens_seen": 106168320, | |
| "step": 810, | |
| "train_runtime": 40344.8403, | |
| "train_tokens_per_second": 2631.522 | |
| }, | |
| { | |
| "epoch": 0.4988593155893536, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 0.0004883952697285176, | |
| "loss": 3.5364, | |
| "num_input_tokens_seen": 107479040, | |
| "step": 820, | |
| "train_runtime": 40853.9425, | |
| "train_tokens_per_second": 2630.812 | |
| }, | |
| { | |
| "epoch": 0.5049429657794677, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.0004880215545730999, | |
| "loss": 3.5194, | |
| "num_input_tokens_seen": 108789760, | |
| "step": 830, | |
| "train_runtime": 41371.0384, | |
| "train_tokens_per_second": 2629.612 | |
| }, | |
| { | |
| "epoch": 0.5110266159695818, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 0.00048764206449565414, | |
| "loss": 3.5011, | |
| "num_input_tokens_seen": 110100480, | |
| "step": 840, | |
| "train_runtime": 41878.8852, | |
| "train_tokens_per_second": 2629.021 | |
| }, | |
| { | |
| "epoch": 0.5171102661596958, | |
| "grad_norm": 0.5, | |
| "learning_rate": 0.00048725680870343726, | |
| "loss": 3.4902, | |
| "num_input_tokens_seen": 111411200, | |
| "step": 850, | |
| "train_runtime": 42387.2869, | |
| "train_tokens_per_second": 2628.411 | |
| }, | |
| { | |
| "epoch": 0.5231939163498099, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 0.000486865796543595, | |
| "loss": 3.4779, | |
| "num_input_tokens_seen": 112721920, | |
| "step": 860, | |
| "train_runtime": 42896.0857, | |
| "train_tokens_per_second": 2627.79 | |
| }, | |
| { | |
| "epoch": 0.529277566539924, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 0.0004864690375029351, | |
| "loss": 3.4634, | |
| "num_input_tokens_seen": 114032640, | |
| "step": 870, | |
| "train_runtime": 43408.6763, | |
| "train_tokens_per_second": 2626.955 | |
| }, | |
| { | |
| "epoch": 0.535361216730038, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 0.0004860665412076973, | |
| "loss": 3.4385, | |
| "num_input_tokens_seen": 115343360, | |
| "step": 880, | |
| "train_runtime": 43915.7309, | |
| "train_tokens_per_second": 2626.47 | |
| }, | |
| { | |
| "epoch": 0.5414448669201521, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.00048565831742331954, | |
| "loss": 3.4531, | |
| "num_input_tokens_seen": 116654080, | |
| "step": 890, | |
| "train_runtime": 44431.4678, | |
| "train_tokens_per_second": 2625.483 | |
| }, | |
| { | |
| "epoch": 0.5475285171102662, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 0.00048524437605420103, | |
| "loss": 3.4657, | |
| "num_input_tokens_seen": 117964800, | |
| "step": 900, | |
| "train_runtime": 44936.808, | |
| "train_tokens_per_second": 2625.126 | |
| }, | |
| { | |
| "epoch": 0.5536121673003802, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 0.00048482472714346227, | |
| "loss": 3.4579, | |
| "num_input_tokens_seen": 119275520, | |
| "step": 910, | |
| "train_runtime": 45444.6195, | |
| "train_tokens_per_second": 2624.635 | |
| }, | |
| { | |
| "epoch": 0.5596958174904944, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 0.000484399380872701, | |
| "loss": 3.4262, | |
| "num_input_tokens_seen": 120586240, | |
| "step": 920, | |
| "train_runtime": 45957.1389, | |
| "train_tokens_per_second": 2623.885 | |
| }, | |
| { | |
| "epoch": 0.5657794676806084, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 0.0004839683475617455, | |
| "loss": 3.4126, | |
| "num_input_tokens_seen": 121896960, | |
| "step": 930, | |
| "train_runtime": 46465.2735, | |
| "train_tokens_per_second": 2623.399 | |
| }, | |
| { | |
| "epoch": 0.5718631178707224, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 0.00048353163766840385, | |
| "loss": 3.4239, | |
| "num_input_tokens_seen": 123207680, | |
| "step": 940, | |
| "train_runtime": 46973.782, | |
| "train_tokens_per_second": 2622.903 | |
| }, | |
| { | |
| "epoch": 0.5779467680608364, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 0.00048308926178821054, | |
| "loss": 3.3839, | |
| "num_input_tokens_seen": 124518400, | |
| "step": 950, | |
| "train_runtime": 47481.0819, | |
| "train_tokens_per_second": 2622.484 | |
| }, | |
| { | |
| "epoch": 0.5840304182509506, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 0.00048264123065416934, | |
| "loss": 3.441, | |
| "num_input_tokens_seen": 125829120, | |
| "step": 960, | |
| "train_runtime": 47997.7462, | |
| "train_tokens_per_second": 2621.563 | |
| }, | |
| { | |
| "epoch": 0.5901140684410646, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 0.00048218755513649257, | |
| "loss": 3.4104, | |
| "num_input_tokens_seen": 127139840, | |
| "step": 970, | |
| "train_runtime": 48505.8936, | |
| "train_tokens_per_second": 2621.121 | |
| }, | |
| { | |
| "epoch": 0.5961977186311787, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 0.0004817282462423378, | |
| "loss": 3.3967, | |
| "num_input_tokens_seen": 128450560, | |
| "step": 980, | |
| "train_runtime": 49013.9882, | |
| "train_tokens_per_second": 2620.692 | |
| }, | |
| { | |
| "epoch": 0.6022813688212928, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 0.00048126331511554044, | |
| "loss": 3.4011, | |
| "num_input_tokens_seen": 129761280, | |
| "step": 990, | |
| "train_runtime": 49525.127, | |
| "train_tokens_per_second": 2620.11 | |
| }, | |
| { | |
| "epoch": 0.6083650190114068, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.0004807927730363437, | |
| "loss": 3.365, | |
| "num_input_tokens_seen": 131072000, | |
| "step": 1000, | |
| "train_runtime": 50032.5177, | |
| "train_tokens_per_second": 2619.736 | |
| }, | |
| { | |
| "epoch": 0.6144486692015209, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 0.00048031663142112464, | |
| "loss": 3.3622, | |
| "num_input_tokens_seen": 132382720, | |
| "step": 1010, | |
| "train_runtime": 50541.1055, | |
| "train_tokens_per_second": 2619.308 | |
| }, | |
| { | |
| "epoch": 0.620532319391635, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 0.0004798349018221173, | |
| "loss": 3.386, | |
| "num_input_tokens_seen": 133693440, | |
| "step": 1020, | |
| "train_runtime": 51052.0538, | |
| "train_tokens_per_second": 2618.767 | |
| }, | |
| { | |
| "epoch": 0.626615969581749, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 0.0004793475959271323, | |
| "loss": 3.3605, | |
| "num_input_tokens_seen": 135004160, | |
| "step": 1030, | |
| "train_runtime": 51561.7454, | |
| "train_tokens_per_second": 2618.301 | |
| }, | |
| { | |
| "epoch": 0.6326996197718631, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 0.0004788547255592736, | |
| "loss": 3.3946, | |
| "num_input_tokens_seen": 136314880, | |
| "step": 1040, | |
| "train_runtime": 52071.6302, | |
| "train_tokens_per_second": 2617.834 | |
| }, | |
| { | |
| "epoch": 0.6387832699619772, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 0.00047835630267665114, | |
| "loss": 3.3573, | |
| "num_input_tokens_seen": 137625600, | |
| "step": 1050, | |
| "train_runtime": 52586.3476, | |
| "train_tokens_per_second": 2617.136 | |
| }, | |
| { | |
| "epoch": 0.6448669201520912, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 0.0004778523393720911, | |
| "loss": 3.3555, | |
| "num_input_tokens_seen": 138936320, | |
| "step": 1060, | |
| "train_runtime": 53105.6264, | |
| "train_tokens_per_second": 2616.226 | |
| }, | |
| { | |
| "epoch": 0.6509505703422053, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 0.00047734284787284237, | |
| "loss": 3.3005, | |
| "num_input_tokens_seen": 140247040, | |
| "step": 1070, | |
| "train_runtime": 53616.3439, | |
| "train_tokens_per_second": 2615.752 | |
| }, | |
| { | |
| "epoch": 0.6570342205323194, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 0.00047682784054027994, | |
| "loss": 3.3505, | |
| "num_input_tokens_seen": 141557760, | |
| "step": 1080, | |
| "train_runtime": 54124.0806, | |
| "train_tokens_per_second": 2615.43 | |
| }, | |
| { | |
| "epoch": 0.6631178707224334, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00047630732986960486, | |
| "loss": 3.3432, | |
| "num_input_tokens_seen": 142868480, | |
| "step": 1090, | |
| "train_runtime": 54632.5779, | |
| "train_tokens_per_second": 2615.079 | |
| }, | |
| { | |
| "epoch": 0.6692015209125475, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 0.0004757813284895412, | |
| "loss": 3.329, | |
| "num_input_tokens_seen": 144179200, | |
| "step": 1100, | |
| "train_runtime": 55142.8421, | |
| "train_tokens_per_second": 2614.649 | |
| }, | |
| { | |
| "epoch": 0.6752851711026616, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 0.0004752498491620296, | |
| "loss": 3.3267, | |
| "num_input_tokens_seen": 145489920, | |
| "step": 1110, | |
| "train_runtime": 55654.8916, | |
| "train_tokens_per_second": 2614.144 | |
| }, | |
| { | |
| "epoch": 0.6813688212927757, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 0.00047471290478191786, | |
| "loss": 3.3118, | |
| "num_input_tokens_seen": 146800640, | |
| "step": 1120, | |
| "train_runtime": 56162.9248, | |
| "train_tokens_per_second": 2613.835 | |
| }, | |
| { | |
| "epoch": 0.6874524714828897, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 0.0004741705083766475, | |
| "loss": 3.357, | |
| "num_input_tokens_seen": 148111360, | |
| "step": 1130, | |
| "train_runtime": 56673.7299, | |
| "train_tokens_per_second": 2613.404 | |
| }, | |
| { | |
| "epoch": 0.6935361216730038, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 0.0004736226731059383, | |
| "loss": 3.3046, | |
| "num_input_tokens_seen": 149422080, | |
| "step": 1140, | |
| "train_runtime": 57182.6726, | |
| "train_tokens_per_second": 2613.066 | |
| }, | |
| { | |
| "epoch": 0.6996197718631179, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 0.00047306941226146884, | |
| "loss": 3.3162, | |
| "num_input_tokens_seen": 150732800, | |
| "step": 1150, | |
| "train_runtime": 57691.3075, | |
| "train_tokens_per_second": 2612.747 | |
| }, | |
| { | |
| "epoch": 0.7057034220532319, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 0.0004725107392665536, | |
| "loss": 3.2922, | |
| "num_input_tokens_seen": 152043520, | |
| "step": 1160, | |
| "train_runtime": 58195.4974, | |
| "train_tokens_per_second": 2612.634 | |
| }, | |
| { | |
| "epoch": 0.711787072243346, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 0.0004719466676758181, | |
| "loss": 3.3052, | |
| "num_input_tokens_seen": 153354240, | |
| "step": 1170, | |
| "train_runtime": 58702.65, | |
| "train_tokens_per_second": 2612.39 | |
| }, | |
| { | |
| "epoch": 0.7178707224334601, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 0.0004713772111748693, | |
| "loss": 3.3115, | |
| "num_input_tokens_seen": 154664960, | |
| "step": 1180, | |
| "train_runtime": 59213.6331, | |
| "train_tokens_per_second": 2611.982 | |
| }, | |
| { | |
| "epoch": 0.7239543726235741, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.0004708023835799638, | |
| "loss": 3.2765, | |
| "num_input_tokens_seen": 155975680, | |
| "step": 1190, | |
| "train_runtime": 59725.0865, | |
| "train_tokens_per_second": 2611.561 | |
| }, | |
| { | |
| "epoch": 0.7300380228136882, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 0.0004702221988376728, | |
| "loss": 3.2856, | |
| "num_input_tokens_seen": 157286400, | |
| "step": 1200, | |
| "train_runtime": 60234.323, | |
| "train_tokens_per_second": 2611.242 | |
| }, | |
| { | |
| "epoch": 0.7361216730038023, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00046963667102454344, | |
| "loss": 3.2904, | |
| "num_input_tokens_seen": 158597120, | |
| "step": 1210, | |
| "train_runtime": 60742.9574, | |
| "train_tokens_per_second": 2610.955 | |
| }, | |
| { | |
| "epoch": 0.7422053231939163, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 0.00046904581434675753, | |
| "loss": 3.2525, | |
| "num_input_tokens_seen": 159907840, | |
| "step": 1220, | |
| "train_runtime": 61259.4126, | |
| "train_tokens_per_second": 2610.339 | |
| }, | |
| { | |
| "epoch": 0.7482889733840304, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 0.00046844964313978667, | |
| "loss": 3.2743, | |
| "num_input_tokens_seen": 161218560, | |
| "step": 1230, | |
| "train_runtime": 61770.0765, | |
| "train_tokens_per_second": 2609.978 | |
| }, | |
| { | |
| "epoch": 0.7543726235741445, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 0.00046784817186804463, | |
| "loss": 3.2488, | |
| "num_input_tokens_seen": 162529280, | |
| "step": 1240, | |
| "train_runtime": 62278.1018, | |
| "train_tokens_per_second": 2609.734 | |
| }, | |
| { | |
| "epoch": 0.7604562737642585, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 0.0004672414151245361, | |
| "loss": 3.2617, | |
| "num_input_tokens_seen": 163840000, | |
| "step": 1250, | |
| "train_runtime": 62782.4473, | |
| "train_tokens_per_second": 2609.647 | |
| }, | |
| { | |
| "epoch": 0.7665399239543726, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 0.0004666293876305031, | |
| "loss": 3.2405, | |
| "num_input_tokens_seen": 165150720, | |
| "step": 1260, | |
| "train_runtime": 63294.8295, | |
| "train_tokens_per_second": 2609.229 | |
| }, | |
| { | |
| "epoch": 0.7726235741444867, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.00046601210423506743, | |
| "loss": 3.2632, | |
| "num_input_tokens_seen": 166461440, | |
| "step": 1270, | |
| "train_runtime": 63800.2584, | |
| "train_tokens_per_second": 2609.103 | |
| }, | |
| { | |
| "epoch": 0.7787072243346007, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.0004653895799148706, | |
| "loss": 3.2089, | |
| "num_input_tokens_seen": 167772160, | |
| "step": 1280, | |
| "train_runtime": 64315.6024, | |
| "train_tokens_per_second": 2608.576 | |
| }, | |
| { | |
| "epoch": 0.7847908745247149, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.00046476182977371014, | |
| "loss": 3.2395, | |
| "num_input_tokens_seen": 169082880, | |
| "step": 1290, | |
| "train_runtime": 64830.3388, | |
| "train_tokens_per_second": 2608.083 | |
| }, | |
| { | |
| "epoch": 0.7908745247148289, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.0004641288690421737, | |
| "loss": 3.2493, | |
| "num_input_tokens_seen": 170393600, | |
| "step": 1300, | |
| "train_runtime": 65347.315, | |
| "train_tokens_per_second": 2607.507 | |
| }, | |
| { | |
| "epoch": 0.7969581749049429, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.000463490713077269, | |
| "loss": 3.2235, | |
| "num_input_tokens_seen": 171704320, | |
| "step": 1310, | |
| "train_runtime": 65855.9256, | |
| "train_tokens_per_second": 2607.272 | |
| }, | |
| { | |
| "epoch": 0.8030418250950571, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.00046284737736205155, | |
| "loss": 3.2458, | |
| "num_input_tokens_seen": 173015040, | |
| "step": 1320, | |
| "train_runtime": 66363.9708, | |
| "train_tokens_per_second": 2607.063 | |
| }, | |
| { | |
| "epoch": 0.8091254752851711, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00046219887750524895, | |
| "loss": 3.2229, | |
| "num_input_tokens_seen": 174325760, | |
| "step": 1330, | |
| "train_runtime": 66872.6674, | |
| "train_tokens_per_second": 2606.831 | |
| }, | |
| { | |
| "epoch": 0.8152091254752851, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00046154522924088203, | |
| "loss": 3.2096, | |
| "num_input_tokens_seen": 175636480, | |
| "step": 1340, | |
| "train_runtime": 67386.1096, | |
| "train_tokens_per_second": 2606.42 | |
| }, | |
| { | |
| "epoch": 0.8212927756653993, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00046088644842788335, | |
| "loss": 3.1919, | |
| "num_input_tokens_seen": 176947200, | |
| "step": 1350, | |
| "train_runtime": 67893.1543, | |
| "train_tokens_per_second": 2606.26 | |
| }, | |
| { | |
| "epoch": 0.8273764258555133, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 0.0004602225510497121, | |
| "loss": 3.2157, | |
| "num_input_tokens_seen": 178257920, | |
| "step": 1360, | |
| "train_runtime": 68401.5702, | |
| "train_tokens_per_second": 2606.05 | |
| }, | |
| { | |
| "epoch": 0.8334600760456273, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 0.00045955355321396663, | |
| "loss": 3.1801, | |
| "num_input_tokens_seen": 179568640, | |
| "step": 1370, | |
| "train_runtime": 68913.0159, | |
| "train_tokens_per_second": 2605.729 | |
| }, | |
| { | |
| "epoch": 0.8395437262357415, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.0004588794711519935, | |
| "loss": 3.1953, | |
| "num_input_tokens_seen": 180879360, | |
| "step": 1380, | |
| "train_runtime": 69424.3103, | |
| "train_tokens_per_second": 2605.418 | |
| }, | |
| { | |
| "epoch": 0.8456273764258555, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.00045820032121849367, | |
| "loss": 3.2086, | |
| "num_input_tokens_seen": 182190080, | |
| "step": 1390, | |
| "train_runtime": 69934.3095, | |
| "train_tokens_per_second": 2605.16 | |
| }, | |
| { | |
| "epoch": 0.8517110266159695, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.0004575161198911256, | |
| "loss": 3.1908, | |
| "num_input_tokens_seen": 183500800, | |
| "step": 1400, | |
| "train_runtime": 70445.1369, | |
| "train_tokens_per_second": 2604.875 | |
| }, | |
| { | |
| "epoch": 0.8577946768060837, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00045682688377010595, | |
| "loss": 3.193, | |
| "num_input_tokens_seen": 184811520, | |
| "step": 1410, | |
| "train_runtime": 70955.9519, | |
| "train_tokens_per_second": 2604.595 | |
| }, | |
| { | |
| "epoch": 0.8638783269961977, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.000456132629577806, | |
| "loss": 3.1847, | |
| "num_input_tokens_seen": 186122240, | |
| "step": 1420, | |
| "train_runtime": 71465.9376, | |
| "train_tokens_per_second": 2604.349 | |
| }, | |
| { | |
| "epoch": 0.8699619771863117, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 0.00045543337415834676, | |
| "loss": 3.1901, | |
| "num_input_tokens_seen": 187432960, | |
| "step": 1430, | |
| "train_runtime": 71976.5411, | |
| "train_tokens_per_second": 2604.084 | |
| }, | |
| { | |
| "epoch": 0.8760456273764259, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.0004547291344771896, | |
| "loss": 3.2061, | |
| "num_input_tokens_seen": 188743680, | |
| "step": 1440, | |
| "train_runtime": 72494.8149, | |
| "train_tokens_per_second": 2603.547 | |
| }, | |
| { | |
| "epoch": 0.8821292775665399, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.0004540199276207252, | |
| "loss": 3.2118, | |
| "num_input_tokens_seen": 190054400, | |
| "step": 1450, | |
| "train_runtime": 73006.8923, | |
| "train_tokens_per_second": 2603.239 | |
| }, | |
| { | |
| "epoch": 0.888212927756654, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00045330577079585855, | |
| "loss": 3.1828, | |
| "num_input_tokens_seen": 191365120, | |
| "step": 1460, | |
| "train_runtime": 73514.9522, | |
| "train_tokens_per_second": 2603.078 | |
| }, | |
| { | |
| "epoch": 0.8942965779467681, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 0.0004525866813295919, | |
| "loss": 3.2098, | |
| "num_input_tokens_seen": 192675840, | |
| "step": 1470, | |
| "train_runtime": 74022.1124, | |
| "train_tokens_per_second": 2602.95 | |
| }, | |
| { | |
| "epoch": 0.9003802281368821, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 0.00045186267666860394, | |
| "loss": 3.176, | |
| "num_input_tokens_seen": 193986560, | |
| "step": 1480, | |
| "train_runtime": 74530.9401, | |
| "train_tokens_per_second": 2602.766 | |
| }, | |
| { | |
| "epoch": 0.9064638783269962, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.0004511337743788266, | |
| "loss": 3.1603, | |
| "num_input_tokens_seen": 195297280, | |
| "step": 1490, | |
| "train_runtime": 75039.6371, | |
| "train_tokens_per_second": 2602.588 | |
| }, | |
| { | |
| "epoch": 0.9125475285171103, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 0.00045039999214501944, | |
| "loss": 3.1512, | |
| "num_input_tokens_seen": 196608000, | |
| "step": 1500, | |
| "train_runtime": 75560.5392, | |
| "train_tokens_per_second": 2601.993 | |
| }, | |
| { | |
| "epoch": 0.9186311787072243, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 0.00044966134777033956, | |
| "loss": 3.1639, | |
| "num_input_tokens_seen": 197918720, | |
| "step": 1510, | |
| "train_runtime": 76079.4478, | |
| "train_tokens_per_second": 2601.474 | |
| }, | |
| { | |
| "epoch": 0.9247148288973384, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 0.00044891785917591055, | |
| "loss": 3.1389, | |
| "num_input_tokens_seen": 199229440, | |
| "step": 1520, | |
| "train_runtime": 76592.6237, | |
| "train_tokens_per_second": 2601.157 | |
| }, | |
| { | |
| "epoch": 0.9307984790874525, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.0004481695444003871, | |
| "loss": 3.1475, | |
| "num_input_tokens_seen": 200540160, | |
| "step": 1530, | |
| "train_runtime": 77104.1479, | |
| "train_tokens_per_second": 2600.9 | |
| }, | |
| { | |
| "epoch": 0.9368821292775665, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 0.00044741642159951795, | |
| "loss": 3.1367, | |
| "num_input_tokens_seen": 201850880, | |
| "step": 1540, | |
| "train_runtime": 77615.4676, | |
| "train_tokens_per_second": 2600.653 | |
| }, | |
| { | |
| "epoch": 0.9429657794676806, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.0004466585090457046, | |
| "loss": 3.1162, | |
| "num_input_tokens_seen": 203161600, | |
| "step": 1550, | |
| "train_runtime": 78127.8408, | |
| "train_tokens_per_second": 2600.374 | |
| }, | |
| { | |
| "epoch": 0.9490494296577947, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 0.00044589582512755854, | |
| "loss": 3.136, | |
| "num_input_tokens_seen": 204472320, | |
| "step": 1560, | |
| "train_runtime": 78643.2833, | |
| "train_tokens_per_second": 2599.997 | |
| }, | |
| { | |
| "epoch": 0.9551330798479087, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 0.0004451283883494551, | |
| "loss": 3.1143, | |
| "num_input_tokens_seen": 205783040, | |
| "step": 1570, | |
| "train_runtime": 79162.5028, | |
| "train_tokens_per_second": 2599.501 | |
| }, | |
| { | |
| "epoch": 0.9612167300380228, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 0.0004443562173310842, | |
| "loss": 3.1332, | |
| "num_input_tokens_seen": 207093760, | |
| "step": 1580, | |
| "train_runtime": 79673.0445, | |
| "train_tokens_per_second": 2599.295 | |
| }, | |
| { | |
| "epoch": 0.9673003802281369, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00044357933080699886, | |
| "loss": 3.1363, | |
| "num_input_tokens_seen": 208404480, | |
| "step": 1590, | |
| "train_runtime": 80187.555, | |
| "train_tokens_per_second": 2598.963 | |
| }, | |
| { | |
| "epoch": 0.973384030418251, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00044279774762616056, | |
| "loss": 3.1414, | |
| "num_input_tokens_seen": 209715200, | |
| "step": 1600, | |
| "train_runtime": 80700.7756, | |
| "train_tokens_per_second": 2598.676 | |
| }, | |
| { | |
| "epoch": 0.979467680608365, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 0.00044201148675148175, | |
| "loss": 3.1076, | |
| "num_input_tokens_seen": 211025920, | |
| "step": 1610, | |
| "train_runtime": 81218.0495, | |
| "train_tokens_per_second": 2598.264 | |
| }, | |
| { | |
| "epoch": 0.9855513307984791, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.000441220567259366, | |
| "loss": 3.1153, | |
| "num_input_tokens_seen": 212336640, | |
| "step": 1620, | |
| "train_runtime": 81730.0271, | |
| "train_tokens_per_second": 2598.025 | |
| }, | |
| { | |
| "epoch": 0.9916349809885932, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00044042500833924535, | |
| "loss": 3.1097, | |
| "num_input_tokens_seen": 213647360, | |
| "step": 1630, | |
| "train_runtime": 82244.6699, | |
| "train_tokens_per_second": 2597.705 | |
| }, | |
| { | |
| "epoch": 0.9977186311787072, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 0.0004396248292931141, | |
| "loss": 3.0839, | |
| "num_input_tokens_seen": 214958080, | |
| "step": 1640, | |
| "train_runtime": 82763.7003, | |
| "train_tokens_per_second": 2597.251 | |
| }, | |
| { | |
| "epoch": 1.0036501901140684, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 0.0004388200495350612, | |
| "loss": 2.9969, | |
| "num_input_tokens_seen": 216227840, | |
| "step": 1650, | |
| "train_runtime": 83205.3748, | |
| "train_tokens_per_second": 2598.724 | |
| }, | |
| { | |
| "epoch": 1.0097338403041825, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.0004380106885907987, | |
| "loss": 2.9191, | |
| "num_input_tokens_seen": 217538560, | |
| "step": 1660, | |
| "train_runtime": 83652.3328, | |
| "train_tokens_per_second": 2600.508 | |
| }, | |
| { | |
| "epoch": 1.0158174904942965, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 0.0004371967660971883, | |
| "loss": 2.897, | |
| "num_input_tokens_seen": 218849280, | |
| "step": 1670, | |
| "train_runtime": 84099.6608, | |
| "train_tokens_per_second": 2602.261 | |
| }, | |
| { | |
| "epoch": 1.0219011406844107, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.00043637830180176475, | |
| "loss": 2.8774, | |
| "num_input_tokens_seen": 220160000, | |
| "step": 1680, | |
| "train_runtime": 84541.9367, | |
| "train_tokens_per_second": 2604.151 | |
| }, | |
| { | |
| "epoch": 1.0279847908745248, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.00043555531556225695, | |
| "loss": 2.9148, | |
| "num_input_tokens_seen": 221470720, | |
| "step": 1690, | |
| "train_runtime": 84990.5402, | |
| "train_tokens_per_second": 2605.828 | |
| }, | |
| { | |
| "epoch": 1.0340684410646388, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.00043472782734610605, | |
| "loss": 2.8941, | |
| "num_input_tokens_seen": 222781440, | |
| "step": 1700, | |
| "train_runtime": 85437.717, | |
| "train_tokens_per_second": 2607.53 | |
| }, | |
| { | |
| "epoch": 1.0401520912547528, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 0.0004338958572299807, | |
| "loss": 2.9016, | |
| "num_input_tokens_seen": 224092160, | |
| "step": 1710, | |
| "train_runtime": 85885.9521, | |
| "train_tokens_per_second": 2609.183 | |
| }, | |
| { | |
| "epoch": 1.0462357414448669, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 0.00043305942539929057, | |
| "loss": 2.904, | |
| "num_input_tokens_seen": 225402880, | |
| "step": 1720, | |
| "train_runtime": 86334.27, | |
| "train_tokens_per_second": 2610.816 | |
| }, | |
| { | |
| "epoch": 1.052319391634981, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 0.00043221855214769606, | |
| "loss": 2.9289, | |
| "num_input_tokens_seen": 226713600, | |
| "step": 1730, | |
| "train_runtime": 86781.5406, | |
| "train_tokens_per_second": 2612.463 | |
| }, | |
| { | |
| "epoch": 1.0584030418250951, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.0004313732578766161, | |
| "loss": 2.9148, | |
| "num_input_tokens_seen": 228024320, | |
| "step": 1740, | |
| "train_runtime": 87228.9311, | |
| "train_tokens_per_second": 2614.09 | |
| }, | |
| { | |
| "epoch": 1.0644866920152092, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.00043052356309473326, | |
| "loss": 2.9057, | |
| "num_input_tokens_seen": 229335040, | |
| "step": 1750, | |
| "train_runtime": 87675.2394, | |
| "train_tokens_per_second": 2615.733 | |
| }, | |
| { | |
| "epoch": 1.0705703422053232, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.0004296694884174962, | |
| "loss": 2.9202, | |
| "num_input_tokens_seen": 230645760, | |
| "step": 1760, | |
| "train_runtime": 88121.946, | |
| "train_tokens_per_second": 2617.348 | |
| }, | |
| { | |
| "epoch": 1.0766539923954372, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 0.00042881105456661915, | |
| "loss": 2.9456, | |
| "num_input_tokens_seen": 231956480, | |
| "step": 1770, | |
| "train_runtime": 88570.7729, | |
| "train_tokens_per_second": 2618.883 | |
| }, | |
| { | |
| "epoch": 1.0827376425855513, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.0004279482823695797, | |
| "loss": 2.9425, | |
| "num_input_tokens_seen": 233267200, | |
| "step": 1780, | |
| "train_runtime": 89019.7651, | |
| "train_tokens_per_second": 2620.398 | |
| }, | |
| { | |
| "epoch": 1.0888212927756653, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 0.00042708119275911276, | |
| "loss": 2.9194, | |
| "num_input_tokens_seen": 234577920, | |
| "step": 1790, | |
| "train_runtime": 89469.0148, | |
| "train_tokens_per_second": 2621.89 | |
| }, | |
| { | |
| "epoch": 1.0949049429657796, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.0004262098067727036, | |
| "loss": 2.8975, | |
| "num_input_tokens_seen": 235888640, | |
| "step": 1800, | |
| "train_runtime": 89917.0004, | |
| "train_tokens_per_second": 2623.404 | |
| }, | |
| { | |
| "epoch": 1.1009885931558936, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 0.0004253341455520766, | |
| "loss": 2.9038, | |
| "num_input_tokens_seen": 237199360, | |
| "step": 1810, | |
| "train_runtime": 90363.5211, | |
| "train_tokens_per_second": 2624.946 | |
| }, | |
| { | |
| "epoch": 1.1070722433460076, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.0004244542303426828, | |
| "loss": 2.9133, | |
| "num_input_tokens_seen": 238510080, | |
| "step": 1820, | |
| "train_runtime": 90809.803, | |
| "train_tokens_per_second": 2626.479 | |
| }, | |
| { | |
| "epoch": 1.1131558935361217, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.0004235700824931841, | |
| "loss": 2.9203, | |
| "num_input_tokens_seen": 239820800, | |
| "step": 1830, | |
| "train_runtime": 91256.862, | |
| "train_tokens_per_second": 2627.976 | |
| }, | |
| { | |
| "epoch": 1.1192395437262357, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.0004226817234549358, | |
| "loss": 2.8757, | |
| "num_input_tokens_seen": 241131520, | |
| "step": 1840, | |
| "train_runtime": 91704.3196, | |
| "train_tokens_per_second": 2629.446 | |
| }, | |
| { | |
| "epoch": 1.1253231939163497, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.0004217891747814656, | |
| "loss": 2.915, | |
| "num_input_tokens_seen": 242442240, | |
| "step": 1850, | |
| "train_runtime": 92152.2557, | |
| "train_tokens_per_second": 2630.888 | |
| }, | |
| { | |
| "epoch": 1.131406844106464, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 0.00042089245812795085, | |
| "loss": 2.9151, | |
| "num_input_tokens_seen": 243752960, | |
| "step": 1860, | |
| "train_runtime": 92600.8297, | |
| "train_tokens_per_second": 2632.298 | |
| }, | |
| { | |
| "epoch": 1.137490494296578, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 0.0004199915952506933, | |
| "loss": 2.8949, | |
| "num_input_tokens_seen": 245063680, | |
| "step": 1870, | |
| "train_runtime": 93047.388, | |
| "train_tokens_per_second": 2633.751 | |
| }, | |
| { | |
| "epoch": 1.143574144486692, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 0.00041908660800659103, | |
| "loss": 2.9332, | |
| "num_input_tokens_seen": 246374400, | |
| "step": 1880, | |
| "train_runtime": 93494.6982, | |
| "train_tokens_per_second": 2635.17 | |
| }, | |
| { | |
| "epoch": 1.149657794676806, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 0.00041817751835260844, | |
| "loss": 2.9199, | |
| "num_input_tokens_seen": 247685120, | |
| "step": 1890, | |
| "train_runtime": 93942.5611, | |
| "train_tokens_per_second": 2636.559 | |
| }, | |
| { | |
| "epoch": 1.15574144486692, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.0004172643483452431, | |
| "loss": 2.9073, | |
| "num_input_tokens_seen": 248995840, | |
| "step": 1900, | |
| "train_runtime": 94390.1139, | |
| "train_tokens_per_second": 2637.944 | |
| }, | |
| { | |
| "epoch": 1.1618250950570341, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 0.00041634712013999107, | |
| "loss": 2.9329, | |
| "num_input_tokens_seen": 250306560, | |
| "step": 1910, | |
| "train_runtime": 94837.9896, | |
| "train_tokens_per_second": 2639.307 | |
| }, | |
| { | |
| "epoch": 1.1679087452471484, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 0.00041542585599080897, | |
| "loss": 2.8958, | |
| "num_input_tokens_seen": 251617280, | |
| "step": 1920, | |
| "train_runtime": 95285.7174, | |
| "train_tokens_per_second": 2640.661 | |
| }, | |
| { | |
| "epoch": 1.1739923954372624, | |
| "grad_norm": 57.75, | |
| "learning_rate": 0.0004145005782495743, | |
| "loss": 2.9621, | |
| "num_input_tokens_seen": 252928000, | |
| "step": 1930, | |
| "train_runtime": 95734.3074, | |
| "train_tokens_per_second": 2641.979 | |
| }, | |
| { | |
| "epoch": 1.1800760456273764, | |
| "grad_norm": 4.25, | |
| "learning_rate": 0.0004135713093655431, | |
| "loss": 2.9531, | |
| "num_input_tokens_seen": 254238720, | |
| "step": 1940, | |
| "train_runtime": 96179.9871, | |
| "train_tokens_per_second": 2643.364 | |
| }, | |
| { | |
| "epoch": 1.1861596958174905, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 0.0004126380718848052, | |
| "loss": 2.9254, | |
| "num_input_tokens_seen": 255549440, | |
| "step": 1950, | |
| "train_runtime": 96629.2522, | |
| "train_tokens_per_second": 2644.638 | |
| }, | |
| { | |
| "epoch": 1.1922433460076045, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 0.00041170088844973715, | |
| "loss": 2.9068, | |
| "num_input_tokens_seen": 256860160, | |
| "step": 1960, | |
| "train_runtime": 97084.0321, | |
| "train_tokens_per_second": 2645.751 | |
| }, | |
| { | |
| "epoch": 1.1983269961977185, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 0.0004107597817984531, | |
| "loss": 2.9029, | |
| "num_input_tokens_seen": 258170880, | |
| "step": 1970, | |
| "train_runtime": 97539.64, | |
| "train_tokens_per_second": 2646.83 | |
| }, | |
| { | |
| "epoch": 1.2044106463878328, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 0.0004098147747642529, | |
| "loss": 2.892, | |
| "num_input_tokens_seen": 259481600, | |
| "step": 1980, | |
| "train_runtime": 97994.7012, | |
| "train_tokens_per_second": 2647.915 | |
| }, | |
| { | |
| "epoch": 1.2104942965779468, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 0.00040886589027506814, | |
| "loss": 2.8836, | |
| "num_input_tokens_seen": 260792320, | |
| "step": 1990, | |
| "train_runtime": 98450.2354, | |
| "train_tokens_per_second": 2648.976 | |
| }, | |
| { | |
| "epoch": 1.2165779467680609, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 0.0004079131513529062, | |
| "loss": 2.897, | |
| "num_input_tokens_seen": 262103040, | |
| "step": 2000, | |
| "train_runtime": 98899.4674, | |
| "train_tokens_per_second": 2650.197 | |
| }, | |
| { | |
| "epoch": 1.222661596958175, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 0.00040695658111329104, | |
| "loss": 2.8802, | |
| "num_input_tokens_seen": 263413760, | |
| "step": 2010, | |
| "train_runtime": 99347.728, | |
| "train_tokens_per_second": 2651.432 | |
| }, | |
| { | |
| "epoch": 1.228745247148289, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 0.00040599620276470297, | |
| "loss": 2.8942, | |
| "num_input_tokens_seen": 264724480, | |
| "step": 2020, | |
| "train_runtime": 99795.6938, | |
| "train_tokens_per_second": 2652.664 | |
| }, | |
| { | |
| "epoch": 1.234828897338403, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 0.00040503203960801516, | |
| "loss": 2.8637, | |
| "num_input_tokens_seen": 266035200, | |
| "step": 2030, | |
| "train_runtime": 100246.5331, | |
| "train_tokens_per_second": 2653.809 | |
| }, | |
| { | |
| "epoch": 1.2409125475285172, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 0.0004040641150359288, | |
| "loss": 2.8851, | |
| "num_input_tokens_seen": 267345920, | |
| "step": 2040, | |
| "train_runtime": 100696.8528, | |
| "train_tokens_per_second": 2654.958 | |
| }, | |
| { | |
| "epoch": 1.2469961977186312, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 0.0004030924525324049, | |
| "loss": 2.8836, | |
| "num_input_tokens_seen": 268656640, | |
| "step": 2050, | |
| "train_runtime": 101148.4193, | |
| "train_tokens_per_second": 2656.064 | |
| }, | |
| { | |
| "epoch": 1.2530798479087453, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.0004021170756720949, | |
| "loss": 2.8915, | |
| "num_input_tokens_seen": 269967360, | |
| "step": 2060, | |
| "train_runtime": 101597.8838, | |
| "train_tokens_per_second": 2657.214 | |
| }, | |
| { | |
| "epoch": 1.2591634980988593, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 0.0004011380081197687, | |
| "loss": 2.9017, | |
| "num_input_tokens_seen": 271278080, | |
| "step": 2070, | |
| "train_runtime": 102048.5088, | |
| "train_tokens_per_second": 2658.325 | |
| }, | |
| { | |
| "epoch": 1.2652471482889733, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.00040015527362974047, | |
| "loss": 2.8829, | |
| "num_input_tokens_seen": 272588800, | |
| "step": 2080, | |
| "train_runtime": 102496.6301, | |
| "train_tokens_per_second": 2659.49 | |
| }, | |
| { | |
| "epoch": 1.2713307984790876, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00039916889604529215, | |
| "loss": 2.8651, | |
| "num_input_tokens_seen": 273899520, | |
| "step": 2090, | |
| "train_runtime": 102945.3454, | |
| "train_tokens_per_second": 2660.63 | |
| }, | |
| { | |
| "epoch": 1.2774144486692016, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00039817889929809536, | |
| "loss": 2.9117, | |
| "num_input_tokens_seen": 275210240, | |
| "step": 2100, | |
| "train_runtime": 103394.12, | |
| "train_tokens_per_second": 2661.759 | |
| }, | |
| { | |
| "epoch": 1.2834980988593157, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 0.00039718530740763015, | |
| "loss": 2.8734, | |
| "num_input_tokens_seen": 276520960, | |
| "step": 2110, | |
| "train_runtime": 103841.6282, | |
| "train_tokens_per_second": 2662.91 | |
| }, | |
| { | |
| "epoch": 1.2895817490494297, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 0.0003961881444806028, | |
| "loss": 2.8978, | |
| "num_input_tokens_seen": 277831680, | |
| "step": 2120, | |
| "train_runtime": 104294.6414, | |
| "train_tokens_per_second": 2663.911 | |
| }, | |
| { | |
| "epoch": 1.2956653992395437, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.0003951874347103607, | |
| "loss": 2.8918, | |
| "num_input_tokens_seen": 279142400, | |
| "step": 2130, | |
| "train_runtime": 104743.1678, | |
| "train_tokens_per_second": 2665.018 | |
| }, | |
| { | |
| "epoch": 1.3017490494296577, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00039418320237630543, | |
| "loss": 2.8742, | |
| "num_input_tokens_seen": 280453120, | |
| "step": 2140, | |
| "train_runtime": 105191.5091, | |
| "train_tokens_per_second": 2666.119 | |
| }, | |
| { | |
| "epoch": 1.3078326996197718, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.00039317547184330347, | |
| "loss": 2.8766, | |
| "num_input_tokens_seen": 281763840, | |
| "step": 2150, | |
| "train_runtime": 105640.1283, | |
| "train_tokens_per_second": 2667.205 | |
| }, | |
| { | |
| "epoch": 1.3139163498098858, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 0.0003921642675610956, | |
| "loss": 2.8618, | |
| "num_input_tokens_seen": 283074560, | |
| "step": 2160, | |
| "train_runtime": 106090.5411, | |
| "train_tokens_per_second": 2668.236 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 0.0003911496140637028, | |
| "loss": 2.9176, | |
| "num_input_tokens_seen": 284385280, | |
| "step": 2170, | |
| "train_runtime": 106543.4269, | |
| "train_tokens_per_second": 2669.196 | |
| }, | |
| { | |
| "epoch": 1.326083650190114, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 0.00039013153596883203, | |
| "loss": 2.8832, | |
| "num_input_tokens_seen": 285696000, | |
| "step": 2180, | |
| "train_runtime": 106996.4598, | |
| "train_tokens_per_second": 2670.144 | |
| }, | |
| { | |
| "epoch": 1.3321673003802281, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 0.00038911005797727816, | |
| "loss": 2.8554, | |
| "num_input_tokens_seen": 287006720, | |
| "step": 2190, | |
| "train_runtime": 107450.6455, | |
| "train_tokens_per_second": 2671.056 | |
| }, | |
| { | |
| "epoch": 1.3382509505703422, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 0.00038808520487232514, | |
| "loss": 2.9136, | |
| "num_input_tokens_seen": 288317440, | |
| "step": 2200, | |
| "train_runtime": 107900.1405, | |
| "train_tokens_per_second": 2672.077 | |
| }, | |
| { | |
| "epoch": 1.3443346007604564, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.00038705700151914446, | |
| "loss": 2.9164, | |
| "num_input_tokens_seen": 289628160, | |
| "step": 2210, | |
| "train_runtime": 108349.4576, | |
| "train_tokens_per_second": 2673.093 | |
| }, | |
| { | |
| "epoch": 1.3504182509505704, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.0003860254728641918, | |
| "loss": 2.8747, | |
| "num_input_tokens_seen": 290938880, | |
| "step": 2220, | |
| "train_runtime": 108798.3026, | |
| "train_tokens_per_second": 2674.112 | |
| }, | |
| { | |
| "epoch": 1.3565019011406845, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00038499064393460213, | |
| "loss": 2.8488, | |
| "num_input_tokens_seen": 292249600, | |
| "step": 2230, | |
| "train_runtime": 109248.832, | |
| "train_tokens_per_second": 2675.082 | |
| }, | |
| { | |
| "epoch": 1.3625855513307985, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.00038395253983758226, | |
| "loss": 2.8678, | |
| "num_input_tokens_seen": 293560320, | |
| "step": 2240, | |
| "train_runtime": 109700.3985, | |
| "train_tokens_per_second": 2676.019 | |
| }, | |
| { | |
| "epoch": 1.3686692015209125, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 0.00038291118575980147, | |
| "loss": 2.8556, | |
| "num_input_tokens_seen": 294871040, | |
| "step": 2250, | |
| "train_runtime": 110151.5807, | |
| "train_tokens_per_second": 2676.957 | |
| }, | |
| { | |
| "epoch": 1.3747528517110266, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.0003818666069667811, | |
| "loss": 2.8687, | |
| "num_input_tokens_seen": 296181760, | |
| "step": 2260, | |
| "train_runtime": 110602.5281, | |
| "train_tokens_per_second": 2677.893 | |
| }, | |
| { | |
| "epoch": 1.3808365019011406, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.0003808188288022806, | |
| "loss": 2.8703, | |
| "num_input_tokens_seen": 297492480, | |
| "step": 2270, | |
| "train_runtime": 111055.5295, | |
| "train_tokens_per_second": 2678.772 | |
| }, | |
| { | |
| "epoch": 1.3869201520912546, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.00037976787668768353, | |
| "loss": 2.8479, | |
| "num_input_tokens_seen": 298803200, | |
| "step": 2280, | |
| "train_runtime": 111506.098, | |
| "train_tokens_per_second": 2679.703 | |
| }, | |
| { | |
| "epoch": 1.3930038022813689, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.00037871377612138014, | |
| "loss": 2.8656, | |
| "num_input_tokens_seen": 300113920, | |
| "step": 2290, | |
| "train_runtime": 111958.2058, | |
| "train_tokens_per_second": 2680.589 | |
| }, | |
| { | |
| "epoch": 1.399087452471483, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00037765655267814926, | |
| "loss": 2.854, | |
| "num_input_tokens_seen": 301424640, | |
| "step": 2300, | |
| "train_runtime": 112412.2892, | |
| "train_tokens_per_second": 2681.421 | |
| }, | |
| { | |
| "epoch": 1.405171102661597, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.0003765962320085373, | |
| "loss": 2.8524, | |
| "num_input_tokens_seen": 302735360, | |
| "step": 2310, | |
| "train_runtime": 112865.0758, | |
| "train_tokens_per_second": 2682.277 | |
| }, | |
| { | |
| "epoch": 1.411254752851711, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00037553283983823615, | |
| "loss": 2.8348, | |
| "num_input_tokens_seen": 304046080, | |
| "step": 2320, | |
| "train_runtime": 113318.0386, | |
| "train_tokens_per_second": 2683.122 | |
| }, | |
| { | |
| "epoch": 1.417338403041825, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 0.00037446640196745927, | |
| "loss": 2.8776, | |
| "num_input_tokens_seen": 305356800, | |
| "step": 2330, | |
| "train_runtime": 113769.7765, | |
| "train_tokens_per_second": 2683.989 | |
| }, | |
| { | |
| "epoch": 1.4234220532319393, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 0.0003733969442703151, | |
| "loss": 2.8572, | |
| "num_input_tokens_seen": 306667520, | |
| "step": 2340, | |
| "train_runtime": 114222.6283, | |
| "train_tokens_per_second": 2684.823 | |
| }, | |
| { | |
| "epoch": 1.4295057034220533, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00037232449269417983, | |
| "loss": 2.8366, | |
| "num_input_tokens_seen": 307978240, | |
| "step": 2350, | |
| "train_runtime": 114676.9671, | |
| "train_tokens_per_second": 2685.615 | |
| }, | |
| { | |
| "epoch": 1.4355893536121673, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.0003712490732590677, | |
| "loss": 2.837, | |
| "num_input_tokens_seen": 309288960, | |
| "step": 2360, | |
| "train_runtime": 115129.6387, | |
| "train_tokens_per_second": 2686.441 | |
| }, | |
| { | |
| "epoch": 1.4416730038022814, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 0.0003701707120569997, | |
| "loss": 2.8182, | |
| "num_input_tokens_seen": 310599680, | |
| "step": 2370, | |
| "train_runtime": 115583.0774, | |
| "train_tokens_per_second": 2687.242 | |
| }, | |
| { | |
| "epoch": 1.4477566539923954, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 0.0003690894352513703, | |
| "loss": 2.8374, | |
| "num_input_tokens_seen": 311910400, | |
| "step": 2380, | |
| "train_runtime": 116035.6048, | |
| "train_tokens_per_second": 2688.058 | |
| }, | |
| { | |
| "epoch": 1.4538403041825094, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.0003680052690763131, | |
| "loss": 2.8273, | |
| "num_input_tokens_seen": 313221120, | |
| "step": 2390, | |
| "train_runtime": 116489.8127, | |
| "train_tokens_per_second": 2688.828 | |
| }, | |
| { | |
| "epoch": 1.4599239543726235, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 0.0003669182398360641, | |
| "loss": 2.8182, | |
| "num_input_tokens_seen": 314531840, | |
| "step": 2400, | |
| "train_runtime": 116943.0324, | |
| "train_tokens_per_second": 2689.616 | |
| }, | |
| { | |
| "epoch": 1.4660076045627377, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.00036582837390432326, | |
| "loss": 2.8156, | |
| "num_input_tokens_seen": 315842560, | |
| "step": 2410, | |
| "train_runtime": 117395.359, | |
| "train_tokens_per_second": 2690.418 | |
| }, | |
| { | |
| "epoch": 1.4720912547528517, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 0.00036473569772361514, | |
| "loss": 2.852, | |
| "num_input_tokens_seen": 317153280, | |
| "step": 2420, | |
| "train_runtime": 117851.896, | |
| "train_tokens_per_second": 2691.117 | |
| }, | |
| { | |
| "epoch": 1.4781749049429658, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 0.00036364023780464694, | |
| "loss": 2.8335, | |
| "num_input_tokens_seen": 318464000, | |
| "step": 2430, | |
| "train_runtime": 118307.3385, | |
| "train_tokens_per_second": 2691.836 | |
| }, | |
| { | |
| "epoch": 1.4842585551330798, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 0.0003625420207256656, | |
| "loss": 2.8399, | |
| "num_input_tokens_seen": 319774720, | |
| "step": 2440, | |
| "train_runtime": 118760.9215, | |
| "train_tokens_per_second": 2692.592 | |
| }, | |
| { | |
| "epoch": 1.4903422053231938, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00036144107313181224, | |
| "loss": 2.8545, | |
| "num_input_tokens_seen": 321085440, | |
| "step": 2450, | |
| "train_runtime": 119212.8391, | |
| "train_tokens_per_second": 2693.38 | |
| }, | |
| { | |
| "epoch": 1.496425855513308, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00036033742173447695, | |
| "loss": 2.8383, | |
| "num_input_tokens_seen": 322396160, | |
| "step": 2460, | |
| "train_runtime": 119664.4567, | |
| "train_tokens_per_second": 2694.168 | |
| }, | |
| { | |
| "epoch": 1.5025095057034221, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.0003592310933106495, | |
| "loss": 2.8197, | |
| "num_input_tokens_seen": 323706880, | |
| "step": 2470, | |
| "train_runtime": 120117.6232, | |
| "train_tokens_per_second": 2694.916 | |
| }, | |
| { | |
| "epoch": 1.5085931558935362, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 0.0003581221147022703, | |
| "loss": 2.8484, | |
| "num_input_tokens_seen": 325017600, | |
| "step": 2480, | |
| "train_runtime": 120570.0469, | |
| "train_tokens_per_second": 2695.674 | |
| }, | |
| { | |
| "epoch": 1.5146768060836502, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 0.0003570105128155791, | |
| "loss": 2.8361, | |
| "num_input_tokens_seen": 326328320, | |
| "step": 2490, | |
| "train_runtime": 121024.5878, | |
| "train_tokens_per_second": 2696.38 | |
| }, | |
| { | |
| "epoch": 1.5207604562737642, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.0003558963146204619, | |
| "loss": 2.7998, | |
| "num_input_tokens_seen": 327639040, | |
| "step": 2500, | |
| "train_runtime": 121477.7685, | |
| "train_tokens_per_second": 2697.111 | |
| }, | |
| { | |
| "epoch": 1.5268441064638782, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 0.0003547795471497971, | |
| "loss": 2.8491, | |
| "num_input_tokens_seen": 328949760, | |
| "step": 2510, | |
| "train_runtime": 121932.2878, | |
| "train_tokens_per_second": 2697.807 | |
| }, | |
| { | |
| "epoch": 1.5329277566539923, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 0.00035366023749879895, | |
| "loss": 2.8505, | |
| "num_input_tokens_seen": 330260480, | |
| "step": 2520, | |
| "train_runtime": 122381.2631, | |
| "train_tokens_per_second": 2698.62 | |
| }, | |
| { | |
| "epoch": 1.5390114068441063, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 0.0003525384128243609, | |
| "loss": 2.8021, | |
| "num_input_tokens_seen": 331571200, | |
| "step": 2530, | |
| "train_runtime": 122830.6404, | |
| "train_tokens_per_second": 2699.418 | |
| }, | |
| { | |
| "epoch": 1.5450950570342206, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 0.00035141410034439613, | |
| "loss": 2.8371, | |
| "num_input_tokens_seen": 332881920, | |
| "step": 2540, | |
| "train_runtime": 123278.651, | |
| "train_tokens_per_second": 2700.24 | |
| }, | |
| { | |
| "epoch": 1.5511787072243346, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 0.0003502873273371775, | |
| "loss": 2.8455, | |
| "num_input_tokens_seen": 334192640, | |
| "step": 2550, | |
| "train_runtime": 123733.2531, | |
| "train_tokens_per_second": 2700.912 | |
| }, | |
| { | |
| "epoch": 1.5572623574144486, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 0.0003491581211406756, | |
| "loss": 2.8163, | |
| "num_input_tokens_seen": 335503360, | |
| "step": 2560, | |
| "train_runtime": 124189.0309, | |
| "train_tokens_per_second": 2701.554 | |
| }, | |
| { | |
| "epoch": 1.5633460076045629, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 0.00034802650915189554, | |
| "loss": 2.8193, | |
| "num_input_tokens_seen": 336814080, | |
| "step": 2570, | |
| "train_runtime": 124645.0232, | |
| "train_tokens_per_second": 2702.186 | |
| }, | |
| { | |
| "epoch": 1.569429657794677, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 0.0003468925188262121, | |
| "loss": 2.8321, | |
| "num_input_tokens_seen": 338124800, | |
| "step": 2580, | |
| "train_runtime": 125098.7972, | |
| "train_tokens_per_second": 2702.862 | |
| }, | |
| { | |
| "epoch": 1.575513307984791, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.00034575617767670377, | |
| "loss": 2.8267, | |
| "num_input_tokens_seen": 339435520, | |
| "step": 2590, | |
| "train_runtime": 125553.6767, | |
| "train_tokens_per_second": 2703.509 | |
| }, | |
| { | |
| "epoch": 1.581596958174905, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 0.0003446175132734852, | |
| "loss": 2.8025, | |
| "num_input_tokens_seen": 340746240, | |
| "step": 2600, | |
| "train_runtime": 126010.9594, | |
| "train_tokens_per_second": 2704.1 | |
| }, | |
| { | |
| "epoch": 1.587680608365019, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 0.0003434765532430382, | |
| "loss": 2.7821, | |
| "num_input_tokens_seen": 342056960, | |
| "step": 2610, | |
| "train_runtime": 126467.8994, | |
| "train_tokens_per_second": 2704.694 | |
| }, | |
| { | |
| "epoch": 1.593764258555133, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.00034233332526754137, | |
| "loss": 2.8006, | |
| "num_input_tokens_seen": 343367680, | |
| "step": 2620, | |
| "train_runtime": 126924.495, | |
| "train_tokens_per_second": 2705.291 | |
| }, | |
| { | |
| "epoch": 1.599847908745247, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.0003411878570841988, | |
| "loss": 2.8201, | |
| "num_input_tokens_seen": 344678400, | |
| "step": 2630, | |
| "train_runtime": 127377.5833, | |
| "train_tokens_per_second": 2705.958 | |
| }, | |
| { | |
| "epoch": 1.605931558935361, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00034004017648456693, | |
| "loss": 2.8014, | |
| "num_input_tokens_seen": 345989120, | |
| "step": 2640, | |
| "train_runtime": 127833.5239, | |
| "train_tokens_per_second": 2706.56 | |
| }, | |
| { | |
| "epoch": 1.6120152091254751, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 0.00033889031131387995, | |
| "loss": 2.8407, | |
| "num_input_tokens_seen": 347299840, | |
| "step": 2650, | |
| "train_runtime": 128291.2954, | |
| "train_tokens_per_second": 2707.119 | |
| }, | |
| { | |
| "epoch": 1.6180988593155894, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 0.0003377382894703748, | |
| "loss": 2.8222, | |
| "num_input_tokens_seen": 348610560, | |
| "step": 2660, | |
| "train_runtime": 128747.4205, | |
| "train_tokens_per_second": 2707.709 | |
| }, | |
| { | |
| "epoch": 1.6241825095057034, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 0.0003365841389046139, | |
| "loss": 2.8098, | |
| "num_input_tokens_seen": 349921280, | |
| "step": 2670, | |
| "train_runtime": 129204.1147, | |
| "train_tokens_per_second": 2708.283 | |
| }, | |
| { | |
| "epoch": 1.6302661596958175, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00033542788761880713, | |
| "loss": 2.8376, | |
| "num_input_tokens_seen": 351232000, | |
| "step": 2680, | |
| "train_runtime": 129660.8296, | |
| "train_tokens_per_second": 2708.852 | |
| }, | |
| { | |
| "epoch": 1.6363498098859317, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 0.00033426956366613254, | |
| "loss": 2.8017, | |
| "num_input_tokens_seen": 352542720, | |
| "step": 2690, | |
| "train_runtime": 130118.2978, | |
| "train_tokens_per_second": 2709.402 | |
| }, | |
| { | |
| "epoch": 1.6424334600760457, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 0.00033310919515005524, | |
| "loss": 2.8027, | |
| "num_input_tokens_seen": 353853440, | |
| "step": 2700, | |
| "train_runtime": 130575.9315, | |
| "train_tokens_per_second": 2709.944 | |
| }, | |
| { | |
| "epoch": 1.6485171102661598, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 0.00033194681022364627, | |
| "loss": 2.8082, | |
| "num_input_tokens_seen": 355164160, | |
| "step": 2710, | |
| "train_runtime": 131034.1343, | |
| "train_tokens_per_second": 2710.471 | |
| }, | |
| { | |
| "epoch": 1.6546007604562738, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.00033078243708889904, | |
| "loss": 2.7959, | |
| "num_input_tokens_seen": 356474880, | |
| "step": 2720, | |
| "train_runtime": 131492.6924, | |
| "train_tokens_per_second": 2710.986 | |
| }, | |
| { | |
| "epoch": 1.6606844106463878, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 0.00032961610399604525, | |
| "loss": 2.7826, | |
| "num_input_tokens_seen": 357785600, | |
| "step": 2730, | |
| "train_runtime": 131952.3753, | |
| "train_tokens_per_second": 2711.475 | |
| }, | |
| { | |
| "epoch": 1.6667680608365019, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.0003284478392428695, | |
| "loss": 2.7911, | |
| "num_input_tokens_seen": 359096320, | |
| "step": 2740, | |
| "train_runtime": 132412.9832, | |
| "train_tokens_per_second": 2711.942 | |
| }, | |
| { | |
| "epoch": 1.672851711026616, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 0.0003272776711740224, | |
| "loss": 2.8126, | |
| "num_input_tokens_seen": 360407040, | |
| "step": 2750, | |
| "train_runtime": 132871.6286, | |
| "train_tokens_per_second": 2712.445 | |
| }, | |
| { | |
| "epoch": 1.67893536121673, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 0.00032610562818033374, | |
| "loss": 2.7689, | |
| "num_input_tokens_seen": 361717760, | |
| "step": 2760, | |
| "train_runtime": 133332.2259, | |
| "train_tokens_per_second": 2712.906 | |
| }, | |
| { | |
| "epoch": 1.685019011406844, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 0.0003249317386981224, | |
| "loss": 2.773, | |
| "num_input_tokens_seen": 363028480, | |
| "step": 2770, | |
| "train_runtime": 133792.8657, | |
| "train_tokens_per_second": 2713.362 | |
| }, | |
| { | |
| "epoch": 1.6911026615969582, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.00032375603120850764, | |
| "loss": 2.7964, | |
| "num_input_tokens_seen": 364339200, | |
| "step": 2780, | |
| "train_runtime": 134252.2551, | |
| "train_tokens_per_second": 2713.84 | |
| }, | |
| { | |
| "epoch": 1.6971863117870722, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.0003225785342367172, | |
| "loss": 2.8291, | |
| "num_input_tokens_seen": 365649920, | |
| "step": 2790, | |
| "train_runtime": 134711.3995, | |
| "train_tokens_per_second": 2714.321 | |
| }, | |
| { | |
| "epoch": 1.7032699619771863, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.0003213992763513958, | |
| "loss": 2.7957, | |
| "num_input_tokens_seen": 366960640, | |
| "step": 2800, | |
| "train_runtime": 135171.8739, | |
| "train_tokens_per_second": 2714.771 | |
| }, | |
| { | |
| "epoch": 1.7093536121673005, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 0.00032021828616391146, | |
| "loss": 2.8162, | |
| "num_input_tokens_seen": 368271360, | |
| "step": 2810, | |
| "train_runtime": 135631.7281, | |
| "train_tokens_per_second": 2715.23 | |
| }, | |
| { | |
| "epoch": 1.7154372623574146, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.000319035592327662, | |
| "loss": 2.7847, | |
| "num_input_tokens_seen": 369582080, | |
| "step": 2820, | |
| "train_runtime": 136091.9477, | |
| "train_tokens_per_second": 2715.679 | |
| }, | |
| { | |
| "epoch": 1.7215209125475286, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 0.0003178512235373791, | |
| "loss": 2.7952, | |
| "num_input_tokens_seen": 370892800, | |
| "step": 2830, | |
| "train_runtime": 136551.491, | |
| "train_tokens_per_second": 2716.139 | |
| }, | |
| { | |
| "epoch": 1.7276045627376426, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 0.00031666520852843285, | |
| "loss": 2.778, | |
| "num_input_tokens_seen": 372203520, | |
| "step": 2840, | |
| "train_runtime": 137010.4585, | |
| "train_tokens_per_second": 2716.607 | |
| }, | |
| { | |
| "epoch": 1.7336882129277567, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.000315477576076134, | |
| "loss": 2.7778, | |
| "num_input_tokens_seen": 373514240, | |
| "step": 2850, | |
| "train_runtime": 137473.9898, | |
| "train_tokens_per_second": 2716.981 | |
| }, | |
| { | |
| "epoch": 1.7397718631178707, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 0.00031428835499503583, | |
| "loss": 2.7703, | |
| "num_input_tokens_seen": 374824960, | |
| "step": 2860, | |
| "train_runtime": 137935.0519, | |
| "train_tokens_per_second": 2717.402 | |
| }, | |
| { | |
| "epoch": 1.7458555133079847, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 0.00031309757413823557, | |
| "loss": 2.7657, | |
| "num_input_tokens_seen": 376135680, | |
| "step": 2870, | |
| "train_runtime": 138396.7751, | |
| "train_tokens_per_second": 2717.807 | |
| }, | |
| { | |
| "epoch": 1.7519391634980988, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.0003119052623966738, | |
| "loss": 2.7978, | |
| "num_input_tokens_seen": 377446400, | |
| "step": 2880, | |
| "train_runtime": 138858.8271, | |
| "train_tokens_per_second": 2718.202 | |
| }, | |
| { | |
| "epoch": 1.7580228136882128, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 0.00031071144869843374, | |
| "loss": 2.7668, | |
| "num_input_tokens_seen": 378757120, | |
| "step": 2890, | |
| "train_runtime": 139319.9239, | |
| "train_tokens_per_second": 2718.614 | |
| }, | |
| { | |
| "epoch": 1.764106463878327, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 0.0003095161620080394, | |
| "loss": 2.8009, | |
| "num_input_tokens_seen": 380067840, | |
| "step": 2900, | |
| "train_runtime": 139782.9031, | |
| "train_tokens_per_second": 2718.987 | |
| }, | |
| { | |
| "epoch": 1.770190114068441, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 0.00030831943132575266, | |
| "loss": 2.7412, | |
| "num_input_tokens_seen": 381378560, | |
| "step": 2910, | |
| "train_runtime": 140241.6223, | |
| "train_tokens_per_second": 2719.439 | |
| }, | |
| { | |
| "epoch": 1.776273764258555, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 0.00030712128568687, | |
| "loss": 2.7809, | |
| "num_input_tokens_seen": 382689280, | |
| "step": 2920, | |
| "train_runtime": 140700.6087, | |
| "train_tokens_per_second": 2719.884 | |
| }, | |
| { | |
| "epoch": 1.7823574144486694, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.00030592175416101767, | |
| "loss": 2.7602, | |
| "num_input_tokens_seen": 384000000, | |
| "step": 2930, | |
| "train_runtime": 141160.4567, | |
| "train_tokens_per_second": 2720.309 | |
| }, | |
| { | |
| "epoch": 1.7884410646387834, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.0003047208658514466, | |
| "loss": 2.7824, | |
| "num_input_tokens_seen": 385310720, | |
| "step": 2940, | |
| "train_runtime": 141622.4855, | |
| "train_tokens_per_second": 2720.689 | |
| }, | |
| { | |
| "epoch": 1.7945247148288974, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.0003035186498943262, | |
| "loss": 2.7823, | |
| "num_input_tokens_seen": 386621440, | |
| "step": 2950, | |
| "train_runtime": 142082.5243, | |
| "train_tokens_per_second": 2721.105 | |
| }, | |
| { | |
| "epoch": 1.8006083650190114, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 0.0003023151354580378, | |
| "loss": 2.7379, | |
| "num_input_tokens_seen": 387932160, | |
| "step": 2960, | |
| "train_runtime": 142540.4824, | |
| "train_tokens_per_second": 2721.558 | |
| }, | |
| { | |
| "epoch": 1.8066920152091255, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 0.00030111035174246615, | |
| "loss": 2.7631, | |
| "num_input_tokens_seen": 389242880, | |
| "step": 2970, | |
| "train_runtime": 142998.9456, | |
| "train_tokens_per_second": 2721.998 | |
| }, | |
| { | |
| "epoch": 1.8127756653992395, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 0.0002999043279782919, | |
| "loss": 2.7424, | |
| "num_input_tokens_seen": 390553600, | |
| "step": 2980, | |
| "train_runtime": 143457.6941, | |
| "train_tokens_per_second": 2722.43 | |
| }, | |
| { | |
| "epoch": 1.8188593155893535, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.00029869709342628185, | |
| "loss": 2.7629, | |
| "num_input_tokens_seen": 391864320, | |
| "step": 2990, | |
| "train_runtime": 143917.5699, | |
| "train_tokens_per_second": 2722.839 | |
| }, | |
| { | |
| "epoch": 1.8249429657794676, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 0.0002974886773765792, | |
| "loss": 2.7772, | |
| "num_input_tokens_seen": 393175040, | |
| "step": 3000, | |
| "train_runtime": 144378.1838, | |
| "train_tokens_per_second": 2723.23 | |
| }, | |
| { | |
| "epoch": 1.8310266159695816, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 0.00029627910914799266, | |
| "loss": 2.7388, | |
| "num_input_tokens_seen": 394485760, | |
| "step": 3010, | |
| "train_runtime": 144840.5, | |
| "train_tokens_per_second": 2723.587 | |
| }, | |
| { | |
| "epoch": 1.8371102661596959, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 0.00029506841808728566, | |
| "loss": 2.7696, | |
| "num_input_tokens_seen": 395796480, | |
| "step": 3020, | |
| "train_runtime": 145299.0583, | |
| "train_tokens_per_second": 2724.013 | |
| }, | |
| { | |
| "epoch": 1.84319391634981, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 0.0002938566335684637, | |
| "loss": 2.7519, | |
| "num_input_tokens_seen": 397107200, | |
| "step": 3030, | |
| "train_runtime": 145760.0107, | |
| "train_tokens_per_second": 2724.391 | |
| }, | |
| { | |
| "epoch": 1.849277566539924, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.00029264378499206206, | |
| "loss": 2.7506, | |
| "num_input_tokens_seen": 398417920, | |
| "step": 3040, | |
| "train_runtime": 146221.9838, | |
| "train_tokens_per_second": 2724.747 | |
| }, | |
| { | |
| "epoch": 1.8553612167300382, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.0002914299017844324, | |
| "loss": 2.7425, | |
| "num_input_tokens_seen": 399728640, | |
| "step": 3050, | |
| "train_runtime": 146684.6189, | |
| "train_tokens_per_second": 2725.089 | |
| }, | |
| { | |
| "epoch": 1.8614448669201522, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.00029021501339702874, | |
| "loss": 2.7421, | |
| "num_input_tokens_seen": 401039360, | |
| "step": 3060, | |
| "train_runtime": 147145.5408, | |
| "train_tokens_per_second": 2725.461 | |
| }, | |
| { | |
| "epoch": 1.8675285171102662, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 0.0002889991493056929, | |
| "loss": 2.7454, | |
| "num_input_tokens_seen": 402350080, | |
| "step": 3070, | |
| "train_runtime": 147606.196, | |
| "train_tokens_per_second": 2725.835 | |
| }, | |
| { | |
| "epoch": 1.8736121673003803, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.0002877823390099395, | |
| "loss": 2.7738, | |
| "num_input_tokens_seen": 403660800, | |
| "step": 3080, | |
| "train_runtime": 148066.6266, | |
| "train_tokens_per_second": 2726.211 | |
| }, | |
| { | |
| "epoch": 1.8796958174904943, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 0.00028656461203224027, | |
| "loss": 2.7429, | |
| "num_input_tokens_seen": 404971520, | |
| "step": 3090, | |
| "train_runtime": 148528.8366, | |
| "train_tokens_per_second": 2726.551 | |
| }, | |
| { | |
| "epoch": 1.8857794676806083, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 0.0002853459979173074, | |
| "loss": 2.7618, | |
| "num_input_tokens_seen": 406282240, | |
| "step": 3100, | |
| "train_runtime": 148990.5945, | |
| "train_tokens_per_second": 2726.899 | |
| }, | |
| { | |
| "epoch": 1.8918631178707224, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 0.000284126526231377, | |
| "loss": 2.7174, | |
| "num_input_tokens_seen": 407592960, | |
| "step": 3110, | |
| "train_runtime": 149437.5489, | |
| "train_tokens_per_second": 2727.514 | |
| }, | |
| { | |
| "epoch": 1.8979467680608364, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.00028290622656149177, | |
| "loss": 2.7381, | |
| "num_input_tokens_seen": 408903680, | |
| "step": 3120, | |
| "train_runtime": 149746.6696, | |
| "train_tokens_per_second": 2730.636 | |
| }, | |
| { | |
| "epoch": 1.9040304182509504, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 0.00028168512851478334, | |
| "loss": 2.7346, | |
| "num_input_tokens_seen": 410214400, | |
| "step": 3130, | |
| "train_runtime": 150045.1286, | |
| "train_tokens_per_second": 2733.94 | |
| }, | |
| { | |
| "epoch": 1.9101140684410647, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 0.00028046326171775326, | |
| "loss": 2.7144, | |
| "num_input_tokens_seen": 411525120, | |
| "step": 3140, | |
| "train_runtime": 150338.7507, | |
| "train_tokens_per_second": 2737.319 | |
| }, | |
| { | |
| "epoch": 1.9161977186311787, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.0002792406558155549, | |
| "loss": 2.7335, | |
| "num_input_tokens_seen": 412835840, | |
| "step": 3150, | |
| "train_runtime": 150626.8637, | |
| "train_tokens_per_second": 2740.785 | |
| }, | |
| { | |
| "epoch": 1.9222813688212927, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 0.00027801734047127404, | |
| "loss": 2.734, | |
| "num_input_tokens_seen": 414146560, | |
| "step": 3160, | |
| "train_runtime": 150915.2559, | |
| "train_tokens_per_second": 2744.233 | |
| }, | |
| { | |
| "epoch": 1.928365019011407, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 0.0002767933453652086, | |
| "loss": 2.7415, | |
| "num_input_tokens_seen": 415457280, | |
| "step": 3170, | |
| "train_runtime": 151202.2316, | |
| "train_tokens_per_second": 2747.693 | |
| }, | |
| { | |
| "epoch": 1.934448669201521, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 0.0002755687001941497, | |
| "loss": 2.7616, | |
| "num_input_tokens_seen": 416768000, | |
| "step": 3180, | |
| "train_runtime": 151489.837, | |
| "train_tokens_per_second": 2751.128 | |
| }, | |
| { | |
| "epoch": 1.940532319391635, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 0.0002743434346706599, | |
| "loss": 2.721, | |
| "num_input_tokens_seen": 418078720, | |
| "step": 3190, | |
| "train_runtime": 151776.2917, | |
| "train_tokens_per_second": 2754.572 | |
| }, | |
| { | |
| "epoch": 1.946615969581749, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 0.0002731175785223531, | |
| "loss": 2.7402, | |
| "num_input_tokens_seen": 419389440, | |
| "step": 3200, | |
| "train_runtime": 152063.2975, | |
| "train_tokens_per_second": 2757.993 | |
| }, | |
| { | |
| "epoch": 1.9526996197718631, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 0.0002718911614911729, | |
| "loss": 2.7048, | |
| "num_input_tokens_seen": 420700160, | |
| "step": 3210, | |
| "train_runtime": 152351.3851, | |
| "train_tokens_per_second": 2761.381 | |
| }, | |
| { | |
| "epoch": 1.9587832699619772, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 0.00027066421333267135, | |
| "loss": 2.7136, | |
| "num_input_tokens_seen": 422010880, | |
| "step": 3220, | |
| "train_runtime": 152637.9334, | |
| "train_tokens_per_second": 2764.784 | |
| }, | |
| { | |
| "epoch": 1.9648669201520912, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 0.0002694367638152866, | |
| "loss": 2.7499, | |
| "num_input_tokens_seen": 423321600, | |
| "step": 3230, | |
| "train_runtime": 152921.9479, | |
| "train_tokens_per_second": 2768.22 | |
| }, | |
| { | |
| "epoch": 1.9709505703422052, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 0.00026820884271962106, | |
| "loss": 2.7487, | |
| "num_input_tokens_seen": 424632320, | |
| "step": 3240, | |
| "train_runtime": 153205.3322, | |
| "train_tokens_per_second": 2771.655 | |
| }, | |
| { | |
| "epoch": 1.9770342205323193, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 0.00026698047983771844, | |
| "loss": 2.7135, | |
| "num_input_tokens_seen": 425943040, | |
| "step": 3250, | |
| "train_runtime": 153491.4373, | |
| "train_tokens_per_second": 2775.028 | |
| }, | |
| { | |
| "epoch": 1.9831178707224335, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 0.00026575170497234135, | |
| "loss": 2.7164, | |
| "num_input_tokens_seen": 427253760, | |
| "step": 3260, | |
| "train_runtime": 153778.9801, | |
| "train_tokens_per_second": 2778.363 | |
| }, | |
| { | |
| "epoch": 1.9892015209125475, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 0.00026452254793624786, | |
| "loss": 2.7397, | |
| "num_input_tokens_seen": 428564480, | |
| "step": 3270, | |
| "train_runtime": 154066.1194, | |
| "train_tokens_per_second": 2781.692 | |
| }, | |
| { | |
| "epoch": 1.9952851711026616, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 0.0002632930385514684, | |
| "loss": 2.7232, | |
| "num_input_tokens_seen": 429875200, | |
| "step": 3280, | |
| "train_runtime": 154352.7772, | |
| "train_tokens_per_second": 2785.018 | |
| }, | |
| { | |
| "epoch": 2.001216730038023, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 0.00026206320664858224, | |
| "loss": 2.654, | |
| "num_input_tokens_seen": 431144960, | |
| "step": 3290, | |
| "train_runtime": 154631.677, | |
| "train_tokens_per_second": 2788.206 | |
| }, | |
| { | |
| "epoch": 2.007300380228137, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 0.00026083308206599335, | |
| "loss": 2.2631, | |
| "num_input_tokens_seen": 432455680, | |
| "step": 3300, | |
| "train_runtime": 154916.1657, | |
| "train_tokens_per_second": 2791.546 | |
| }, | |
| { | |
| "epoch": 2.013384030418251, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 0.0002596026946492073, | |
| "loss": 2.2231, | |
| "num_input_tokens_seen": 433766400, | |
| "step": 3310, | |
| "train_runtime": 155201.2246, | |
| "train_tokens_per_second": 2794.865 | |
| }, | |
| { | |
| "epoch": 2.019467680608365, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 0.00025837207425010583, | |
| "loss": 2.259, | |
| "num_input_tokens_seen": 435077120, | |
| "step": 3320, | |
| "train_runtime": 155486.5979, | |
| "train_tokens_per_second": 2798.165 | |
| }, | |
| { | |
| "epoch": 2.025551330798479, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.0002571412507262241, | |
| "loss": 2.2532, | |
| "num_input_tokens_seen": 436387840, | |
| "step": 3330, | |
| "train_runtime": 155770.6072, | |
| "train_tokens_per_second": 2801.477 | |
| }, | |
| { | |
| "epoch": 2.031634980988593, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 0.00025591025394002486, | |
| "loss": 2.2686, | |
| "num_input_tokens_seen": 437698560, | |
| "step": 3340, | |
| "train_runtime": 156054.2351, | |
| "train_tokens_per_second": 2804.785 | |
| }, | |
| { | |
| "epoch": 2.0377186311787074, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00025467911375817514, | |
| "loss": 2.2856, | |
| "num_input_tokens_seen": 439009280, | |
| "step": 3350, | |
| "train_runtime": 156338.1333, | |
| "train_tokens_per_second": 2808.075 | |
| }, | |
| { | |
| "epoch": 2.0438022813688215, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.0002534478600508207, | |
| "loss": 2.2971, | |
| "num_input_tokens_seen": 440320000, | |
| "step": 3360, | |
| "train_runtime": 156622.5971, | |
| "train_tokens_per_second": 2811.344 | |
| }, | |
| { | |
| "epoch": 2.0498859315589355, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 0.00025221652269086177, | |
| "loss": 2.251, | |
| "num_input_tokens_seen": 441630720, | |
| "step": 3370, | |
| "train_runtime": 156907.7276, | |
| "train_tokens_per_second": 2814.589 | |
| }, | |
| { | |
| "epoch": 2.0559695817490495, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.0002509851315532283, | |
| "loss": 2.2432, | |
| "num_input_tokens_seen": 442941440, | |
| "step": 3380, | |
| "train_runtime": 157192.8368, | |
| "train_tokens_per_second": 2817.822 | |
| }, | |
| { | |
| "epoch": 2.0620532319391636, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 0.0002497537165141547, | |
| "loss": 2.2619, | |
| "num_input_tokens_seen": 444252160, | |
| "step": 3390, | |
| "train_runtime": 157475.922, | |
| "train_tokens_per_second": 2821.08 | |
| }, | |
| { | |
| "epoch": 2.0681368821292776, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 0.00024852230745045566, | |
| "loss": 2.2688, | |
| "num_input_tokens_seen": 445562880, | |
| "step": 3400, | |
| "train_runtime": 157759.8482, | |
| "train_tokens_per_second": 2824.311 | |
| }, | |
| { | |
| "epoch": 2.0742205323193916, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 0.0002472909342388007, | |
| "loss": 2.2595, | |
| "num_input_tokens_seen": 446873600, | |
| "step": 3410, | |
| "train_runtime": 158043.6125, | |
| "train_tokens_per_second": 2827.533 | |
| }, | |
| { | |
| "epoch": 2.0803041825095057, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 0.0002460596267549894, | |
| "loss": 2.2606, | |
| "num_input_tokens_seen": 448184320, | |
| "step": 3420, | |
| "train_runtime": 158327.7009, | |
| "train_tokens_per_second": 2830.739 | |
| }, | |
| { | |
| "epoch": 2.0863878326996197, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00024482841487322675, | |
| "loss": 2.237, | |
| "num_input_tokens_seen": 449495040, | |
| "step": 3430, | |
| "train_runtime": 158615.4509, | |
| "train_tokens_per_second": 2833.867 | |
| }, | |
| { | |
| "epoch": 2.0924714828897337, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 0.0002435973284653984, | |
| "loss": 2.2754, | |
| "num_input_tokens_seen": 450805760, | |
| "step": 3440, | |
| "train_runtime": 158896.2661, | |
| "train_tokens_per_second": 2837.107 | |
| }, | |
| { | |
| "epoch": 2.0985551330798478, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 0.00024236639740034531, | |
| "loss": 2.2738, | |
| "num_input_tokens_seen": 452116480, | |
| "step": 3450, | |
| "train_runtime": 159179.2731, | |
| "train_tokens_per_second": 2840.297 | |
| }, | |
| { | |
| "epoch": 2.104638783269962, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 0.0002411356515431398, | |
| "loss": 2.3094, | |
| "num_input_tokens_seen": 453427200, | |
| "step": 3460, | |
| "train_runtime": 159462.1853, | |
| "train_tokens_per_second": 2843.478 | |
| }, | |
| { | |
| "epoch": 2.110722433460076, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 0.00023990512075436072, | |
| "loss": 2.2732, | |
| "num_input_tokens_seen": 454737920, | |
| "step": 3470, | |
| "train_runtime": 159746.6662, | |
| "train_tokens_per_second": 2846.619 | |
| }, | |
| { | |
| "epoch": 2.1168060836501903, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.00023867483488936847, | |
| "loss": 2.2837, | |
| "num_input_tokens_seen": 456048640, | |
| "step": 3480, | |
| "train_runtime": 160029.9994, | |
| "train_tokens_per_second": 2849.77 | |
| }, | |
| { | |
| "epoch": 2.1228897338403043, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.00023744482379758151, | |
| "loss": 2.291, | |
| "num_input_tokens_seen": 457359360, | |
| "step": 3490, | |
| "train_runtime": 160313.8968, | |
| "train_tokens_per_second": 2852.899 | |
| }, | |
| { | |
| "epoch": 2.1289733840304184, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 0.00023621511732175156, | |
| "loss": 2.2601, | |
| "num_input_tokens_seen": 458670080, | |
| "step": 3500, | |
| "train_runtime": 160597.2641, | |
| "train_tokens_per_second": 2856.027 | |
| }, | |
| { | |
| "epoch": 2.1350570342205324, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.0002349857452972395, | |
| "loss": 2.2518, | |
| "num_input_tokens_seen": 459980800, | |
| "step": 3510, | |
| "train_runtime": 160884.5887, | |
| "train_tokens_per_second": 2859.073 | |
| }, | |
| { | |
| "epoch": 2.1411406844106464, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 0.00023375673755129196, | |
| "loss": 2.2733, | |
| "num_input_tokens_seen": 461291520, | |
| "step": 3520, | |
| "train_runtime": 161164.9586, | |
| "train_tokens_per_second": 2862.232 | |
| }, | |
| { | |
| "epoch": 2.1472243346007605, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.00023252812390231725, | |
| "loss": 2.2898, | |
| "num_input_tokens_seen": 462602240, | |
| "step": 3530, | |
| "train_runtime": 161447.4137, | |
| "train_tokens_per_second": 2865.343 | |
| }, | |
| { | |
| "epoch": 2.1533079847908745, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 0.000231299934159162, | |
| "loss": 2.3022, | |
| "num_input_tokens_seen": 463912960, | |
| "step": 3540, | |
| "train_runtime": 161731.5367, | |
| "train_tokens_per_second": 2868.414 | |
| }, | |
| { | |
| "epoch": 2.1593916349809885, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.000230072198120388, | |
| "loss": 2.2692, | |
| "num_input_tokens_seen": 465223680, | |
| "step": 3550, | |
| "train_runtime": 162015.8766, | |
| "train_tokens_per_second": 2871.47 | |
| }, | |
| { | |
| "epoch": 2.1654752851711025, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00022884494557354938, | |
| "loss": 2.2573, | |
| "num_input_tokens_seen": 466534400, | |
| "step": 3560, | |
| "train_runtime": 162300.8248, | |
| "train_tokens_per_second": 2874.504 | |
| }, | |
| { | |
| "epoch": 2.1715589353612166, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00022761820629446915, | |
| "loss": 2.3002, | |
| "num_input_tokens_seen": 467845120, | |
| "step": 3570, | |
| "train_runtime": 162584.845, | |
| "train_tokens_per_second": 2877.544 | |
| }, | |
| { | |
| "epoch": 2.1776425855513306, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 0.00022639201004651796, | |
| "loss": 2.2796, | |
| "num_input_tokens_seen": 469155840, | |
| "step": 3580, | |
| "train_runtime": 162865.8342, | |
| "train_tokens_per_second": 2880.628 | |
| }, | |
| { | |
| "epoch": 2.183726235741445, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 0.00022516638657989104, | |
| "loss": 2.2629, | |
| "num_input_tokens_seen": 470466560, | |
| "step": 3590, | |
| "train_runtime": 163150.1327, | |
| "train_tokens_per_second": 2883.642 | |
| }, | |
| { | |
| "epoch": 2.189809885931559, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 0.00022394136563088664, | |
| "loss": 2.2593, | |
| "num_input_tokens_seen": 471777280, | |
| "step": 3600, | |
| "train_runtime": 163433.4214, | |
| "train_tokens_per_second": 2886.663 | |
| }, | |
| { | |
| "epoch": 2.195893536121673, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 0.00022271697692118474, | |
| "loss": 2.2636, | |
| "num_input_tokens_seen": 473088000, | |
| "step": 3610, | |
| "train_runtime": 163714.21, | |
| "train_tokens_per_second": 2889.719 | |
| }, | |
| { | |
| "epoch": 2.201977186311787, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00022149325015712572, | |
| "loss": 2.3206, | |
| "num_input_tokens_seen": 474398720, | |
| "step": 3620, | |
| "train_runtime": 163996.6786, | |
| "train_tokens_per_second": 2892.734 | |
| }, | |
| { | |
| "epoch": 2.208060836501901, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.0002202702150289898, | |
| "loss": 2.2987, | |
| "num_input_tokens_seen": 475709440, | |
| "step": 3630, | |
| "train_runtime": 164277.0917, | |
| "train_tokens_per_second": 2895.775 | |
| }, | |
| { | |
| "epoch": 2.2141444866920152, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.0002190479012102766, | |
| "loss": 2.304, | |
| "num_input_tokens_seen": 477020160, | |
| "step": 3640, | |
| "train_runtime": 164560.103, | |
| "train_tokens_per_second": 2898.759 | |
| }, | |
| { | |
| "epoch": 2.2202281368821293, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 0.00021782633835698513, | |
| "loss": 2.303, | |
| "num_input_tokens_seen": 478330880, | |
| "step": 3650, | |
| "train_runtime": 164843.6011, | |
| "train_tokens_per_second": 2901.725 | |
| }, | |
| { | |
| "epoch": 2.2263117870722433, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00021660555610689432, | |
| "loss": 2.297, | |
| "num_input_tokens_seen": 479641600, | |
| "step": 3660, | |
| "train_runtime": 165127.0401, | |
| "train_tokens_per_second": 2904.682 | |
| }, | |
| { | |
| "epoch": 2.2323954372623573, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00021538558407884428, | |
| "loss": 2.3029, | |
| "num_input_tokens_seen": 480952320, | |
| "step": 3670, | |
| "train_runtime": 165410.064, | |
| "train_tokens_per_second": 2907.636 | |
| }, | |
| { | |
| "epoch": 2.2384790874524714, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 0.0002141664518720168, | |
| "loss": 2.2902, | |
| "num_input_tokens_seen": 482263040, | |
| "step": 3680, | |
| "train_runtime": 165690.9192, | |
| "train_tokens_per_second": 2910.618 | |
| }, | |
| { | |
| "epoch": 2.2445627376425854, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 0.00021294818906521826, | |
| "loss": 2.2598, | |
| "num_input_tokens_seen": 483573760, | |
| "step": 3690, | |
| "train_runtime": 165969.7636, | |
| "train_tokens_per_second": 2913.626 | |
| }, | |
| { | |
| "epoch": 2.2506463878326994, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.0002117308252161614, | |
| "loss": 2.2521, | |
| "num_input_tokens_seen": 484884480, | |
| "step": 3700, | |
| "train_runtime": 166249.1147, | |
| "train_tokens_per_second": 2916.614 | |
| }, | |
| { | |
| "epoch": 2.2567300380228135, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.000210514389860748, | |
| "loss": 2.298, | |
| "num_input_tokens_seen": 486195200, | |
| "step": 3710, | |
| "train_runtime": 166531.0263, | |
| "train_tokens_per_second": 2919.547 | |
| }, | |
| { | |
| "epoch": 2.262813688212928, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00020929891251235288, | |
| "loss": 2.2704, | |
| "num_input_tokens_seen": 487505920, | |
| "step": 3720, | |
| "train_runtime": 166816.4218, | |
| "train_tokens_per_second": 2922.41 | |
| }, | |
| { | |
| "epoch": 2.268897338403042, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00020808442266110754, | |
| "loss": 2.2685, | |
| "num_input_tokens_seen": 488816640, | |
| "step": 3730, | |
| "train_runtime": 167139.2928, | |
| "train_tokens_per_second": 2924.606 | |
| }, | |
| { | |
| "epoch": 2.274980988593156, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00020687094977318426, | |
| "loss": 2.2528, | |
| "num_input_tokens_seen": 490127360, | |
| "step": 3740, | |
| "train_runtime": 167541.2573, | |
| "train_tokens_per_second": 2925.413 | |
| }, | |
| { | |
| "epoch": 2.28106463878327, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 0.00020565852329008198, | |
| "loss": 2.2887, | |
| "num_input_tokens_seen": 491438080, | |
| "step": 3750, | |
| "train_runtime": 167945.8138, | |
| "train_tokens_per_second": 2926.17 | |
| }, | |
| { | |
| "epoch": 2.287148288973384, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 0.00020444717262791155, | |
| "loss": 2.2814, | |
| "num_input_tokens_seen": 492748800, | |
| "step": 3760, | |
| "train_runtime": 168342.5277, | |
| "train_tokens_per_second": 2927.061 | |
| }, | |
| { | |
| "epoch": 2.293231939163498, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00020323692717668163, | |
| "loss": 2.2865, | |
| "num_input_tokens_seen": 494059520, | |
| "step": 3770, | |
| "train_runtime": 168747.5236, | |
| "train_tokens_per_second": 2927.803 | |
| }, | |
| { | |
| "epoch": 2.299315589353612, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 0.00020202781629958654, | |
| "loss": 2.286, | |
| "num_input_tokens_seen": 495370240, | |
| "step": 3780, | |
| "train_runtime": 169145.8982, | |
| "train_tokens_per_second": 2928.657 | |
| }, | |
| { | |
| "epoch": 2.305399239543726, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 0.0002008198693322933, | |
| "loss": 2.2767, | |
| "num_input_tokens_seen": 496680960, | |
| "step": 3790, | |
| "train_runtime": 169485.8374, | |
| "train_tokens_per_second": 2930.516 | |
| }, | |
| { | |
| "epoch": 2.31148288973384, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00019961311558222957, | |
| "loss": 2.2803, | |
| "num_input_tokens_seen": 497991680, | |
| "step": 3800, | |
| "train_runtime": 169783.4763, | |
| "train_tokens_per_second": 2933.099 | |
| }, | |
| { | |
| "epoch": 2.3175665399239542, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00019840758432787325, | |
| "loss": 2.2858, | |
| "num_input_tokens_seen": 499302400, | |
| "step": 3810, | |
| "train_runtime": 170120.6901, | |
| "train_tokens_per_second": 2934.989 | |
| }, | |
| { | |
| "epoch": 2.3236501901140683, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.00019720330481804194, | |
| "loss": 2.2956, | |
| "num_input_tokens_seen": 500613120, | |
| "step": 3820, | |
| "train_runtime": 170509.9773, | |
| "train_tokens_per_second": 2935.976 | |
| }, | |
| { | |
| "epoch": 2.3297338403041827, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 0.00019600030627118286, | |
| "loss": 2.2794, | |
| "num_input_tokens_seen": 501923840, | |
| "step": 3830, | |
| "train_runtime": 170909.5246, | |
| "train_tokens_per_second": 2936.781 | |
| }, | |
| { | |
| "epoch": 2.3358174904942968, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00019479861787466456, | |
| "loss": 2.2827, | |
| "num_input_tokens_seen": 503234560, | |
| "step": 3840, | |
| "train_runtime": 171311.8975, | |
| "train_tokens_per_second": 2937.534 | |
| }, | |
| { | |
| "epoch": 2.341901140684411, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00019359826878406823, | |
| "loss": 2.2841, | |
| "num_input_tokens_seen": 504545280, | |
| "step": 3850, | |
| "train_runtime": 171715.6137, | |
| "train_tokens_per_second": 2938.261 | |
| }, | |
| { | |
| "epoch": 2.347984790874525, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00019239928812248073, | |
| "loss": 2.2691, | |
| "num_input_tokens_seen": 505856000, | |
| "step": 3860, | |
| "train_runtime": 172119.6283, | |
| "train_tokens_per_second": 2938.979 | |
| }, | |
| { | |
| "epoch": 2.354068441064639, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00019120170497978806, | |
| "loss": 2.2681, | |
| "num_input_tokens_seen": 507166720, | |
| "step": 3870, | |
| "train_runtime": 172448.2534, | |
| "train_tokens_per_second": 2940.979 | |
| }, | |
| { | |
| "epoch": 2.360152091254753, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 0.0001900055484119688, | |
| "loss": 2.2728, | |
| "num_input_tokens_seen": 508477440, | |
| "step": 3880, | |
| "train_runtime": 172754.8765, | |
| "train_tokens_per_second": 2943.346 | |
| }, | |
| { | |
| "epoch": 2.366235741444867, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.0001888108474403903, | |
| "loss": 2.2809, | |
| "num_input_tokens_seen": 509788160, | |
| "step": 3890, | |
| "train_runtime": 173072.7558, | |
| "train_tokens_per_second": 2945.514 | |
| }, | |
| { | |
| "epoch": 2.372319391634981, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 0.00018761763105110368, | |
| "loss": 2.2759, | |
| "num_input_tokens_seen": 511098880, | |
| "step": 3900, | |
| "train_runtime": 173461.4391, | |
| "train_tokens_per_second": 2946.47 | |
| }, | |
| { | |
| "epoch": 2.378403041825095, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.0001864259281941408, | |
| "loss": 2.2714, | |
| "num_input_tokens_seen": 512409600, | |
| "step": 3910, | |
| "train_runtime": 173862.6385, | |
| "train_tokens_per_second": 2947.209 | |
| }, | |
| { | |
| "epoch": 2.384486692015209, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00018523576778281203, | |
| "loss": 2.3315, | |
| "num_input_tokens_seen": 513720320, | |
| "step": 3920, | |
| "train_runtime": 174260.0105, | |
| "train_tokens_per_second": 2948.01 | |
| }, | |
| { | |
| "epoch": 2.390570342205323, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00018404717869300474, | |
| "loss": 2.2851, | |
| "num_input_tokens_seen": 515031040, | |
| "step": 3930, | |
| "train_runtime": 174662.9573, | |
| "train_tokens_per_second": 2948.714 | |
| }, | |
| { | |
| "epoch": 2.396653992395437, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00018286018976248241, | |
| "loss": 2.2702, | |
| "num_input_tokens_seen": 516341760, | |
| "step": 3940, | |
| "train_runtime": 175072.4087, | |
| "train_tokens_per_second": 2949.304 | |
| }, | |
| { | |
| "epoch": 2.402737642585551, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 0.00018167482979018524, | |
| "loss": 2.2418, | |
| "num_input_tokens_seen": 517652480, | |
| "step": 3950, | |
| "train_runtime": 175479.8988, | |
| "train_tokens_per_second": 2949.925 | |
| }, | |
| { | |
| "epoch": 2.4088212927756656, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00018049112753553155, | |
| "loss": 2.2468, | |
| "num_input_tokens_seen": 518963200, | |
| "step": 3960, | |
| "train_runtime": 175887.4195, | |
| "train_tokens_per_second": 2950.542 | |
| }, | |
| { | |
| "epoch": 2.4149049429657796, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00017930911171771936, | |
| "loss": 2.2478, | |
| "num_input_tokens_seen": 520273920, | |
| "step": 3970, | |
| "train_runtime": 176291.8991, | |
| "train_tokens_per_second": 2951.207 | |
| }, | |
| { | |
| "epoch": 2.4209885931558937, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00017812881101503055, | |
| "loss": 2.2586, | |
| "num_input_tokens_seen": 521584640, | |
| "step": 3980, | |
| "train_runtime": 176698.2006, | |
| "train_tokens_per_second": 2951.839 | |
| }, | |
| { | |
| "epoch": 2.4270722433460077, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00017695025406413433, | |
| "loss": 2.2691, | |
| "num_input_tokens_seen": 522895360, | |
| "step": 3990, | |
| "train_runtime": 177101.7892, | |
| "train_tokens_per_second": 2952.513 | |
| }, | |
| { | |
| "epoch": 2.4331558935361217, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00017577346945939266, | |
| "loss": 2.2809, | |
| "num_input_tokens_seen": 524206080, | |
| "step": 4000, | |
| "train_runtime": 177508.9278, | |
| "train_tokens_per_second": 2953.125 | |
| }, | |
| { | |
| "epoch": 2.4392395437262357, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00017459848575216653, | |
| "loss": 2.26, | |
| "num_input_tokens_seen": 525516800, | |
| "step": 4010, | |
| "train_runtime": 177913.3706, | |
| "train_tokens_per_second": 2953.779 | |
| }, | |
| { | |
| "epoch": 2.44532319391635, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00017342533145012334, | |
| "loss": 2.2354, | |
| "num_input_tokens_seen": 526827520, | |
| "step": 4020, | |
| "train_runtime": 178319.7473, | |
| "train_tokens_per_second": 2954.398 | |
| }, | |
| { | |
| "epoch": 2.451406844106464, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00017225403501654497, | |
| "loss": 2.2772, | |
| "num_input_tokens_seen": 528138240, | |
| "step": 4030, | |
| "train_runtime": 178695.0138, | |
| "train_tokens_per_second": 2955.529 | |
| }, | |
| { | |
| "epoch": 2.457490494296578, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00017108462486963743, | |
| "loss": 2.2981, | |
| "num_input_tokens_seen": 529448960, | |
| "step": 4040, | |
| "train_runtime": 179008.7387, | |
| "train_tokens_per_second": 2957.671 | |
| }, | |
| { | |
| "epoch": 2.463574144486692, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00016991712938184154, | |
| "loss": 2.2367, | |
| "num_input_tokens_seen": 530759680, | |
| "step": 4050, | |
| "train_runtime": 179328.5813, | |
| "train_tokens_per_second": 2959.705 | |
| }, | |
| { | |
| "epoch": 2.469657794676806, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00016875157687914374, | |
| "loss": 2.2536, | |
| "num_input_tokens_seen": 532070400, | |
| "step": 4060, | |
| "train_runtime": 179694.1448, | |
| "train_tokens_per_second": 2960.978 | |
| }, | |
| { | |
| "epoch": 2.4757414448669204, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00016758799564039011, | |
| "loss": 2.253, | |
| "num_input_tokens_seen": 533381120, | |
| "step": 4070, | |
| "train_runtime": 180006.4269, | |
| "train_tokens_per_second": 2963.123 | |
| }, | |
| { | |
| "epoch": 2.4818250950570344, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00016642641389659891, | |
| "loss": 2.2397, | |
| "num_input_tokens_seen": 534691840, | |
| "step": 4080, | |
| "train_runtime": 180374.7182, | |
| "train_tokens_per_second": 2964.339 | |
| }, | |
| { | |
| "epoch": 2.4879087452471484, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00016526685983027676, | |
| "loss": 2.2834, | |
| "num_input_tokens_seen": 536002560, | |
| "step": 4090, | |
| "train_runtime": 180763.2045, | |
| "train_tokens_per_second": 2965.219 | |
| }, | |
| { | |
| "epoch": 2.4939923954372625, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00016410936157473428, | |
| "loss": 2.2324, | |
| "num_input_tokens_seen": 537313280, | |
| "step": 4100, | |
| "train_runtime": 181162.6654, | |
| "train_tokens_per_second": 2965.916 | |
| }, | |
| { | |
| "epoch": 2.5000760456273765, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00016295394721340352, | |
| "loss": 2.2596, | |
| "num_input_tokens_seen": 538624000, | |
| "step": 4110, | |
| "train_runtime": 181549.9797, | |
| "train_tokens_per_second": 2966.808 | |
| }, | |
| { | |
| "epoch": 2.5061596958174905, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.0001618006447791568, | |
| "loss": 2.2531, | |
| "num_input_tokens_seen": 539934720, | |
| "step": 4120, | |
| "train_runtime": 181949.9324, | |
| "train_tokens_per_second": 2967.491 | |
| }, | |
| { | |
| "epoch": 2.5122433460076046, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00016064948225362653, | |
| "loss": 2.2416, | |
| "num_input_tokens_seen": 541245440, | |
| "step": 4130, | |
| "train_runtime": 182347.8217, | |
| "train_tokens_per_second": 2968.203 | |
| }, | |
| { | |
| "epoch": 2.5183269961977186, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.00015950048756652613, | |
| "loss": 2.2866, | |
| "num_input_tokens_seen": 542556160, | |
| "step": 4140, | |
| "train_runtime": 182739.9129, | |
| "train_tokens_per_second": 2969.007 | |
| }, | |
| { | |
| "epoch": 2.5244106463878326, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 0.00015835368859497262, | |
| "loss": 2.2565, | |
| "num_input_tokens_seen": 543866880, | |
| "step": 4150, | |
| "train_runtime": 183056.8824, | |
| "train_tokens_per_second": 2971.027 | |
| }, | |
| { | |
| "epoch": 2.5304942965779467, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00015720911316281035, | |
| "loss": 2.278, | |
| "num_input_tokens_seen": 545177600, | |
| "step": 4160, | |
| "train_runtime": 183352.523, | |
| "train_tokens_per_second": 2973.385 | |
| }, | |
| { | |
| "epoch": 2.5365779467680607, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00015606678903993526, | |
| "loss": 2.2587, | |
| "num_input_tokens_seen": 546488320, | |
| "step": 4170, | |
| "train_runtime": 183645.3012, | |
| "train_tokens_per_second": 2975.782 | |
| }, | |
| { | |
| "epoch": 2.542661596958175, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00015492674394162212, | |
| "loss": 2.2541, | |
| "num_input_tokens_seen": 547799040, | |
| "step": 4180, | |
| "train_runtime": 183930.1192, | |
| "train_tokens_per_second": 2978.3 | |
| }, | |
| { | |
| "epoch": 2.5487452471482888, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00015378900552785147, | |
| "loss": 2.2852, | |
| "num_input_tokens_seen": 549109760, | |
| "step": 4190, | |
| "train_runtime": 184214.7892, | |
| "train_tokens_per_second": 2980.813 | |
| }, | |
| { | |
| "epoch": 2.5548288973384032, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 0.0001526536014026384, | |
| "loss": 2.2351, | |
| "num_input_tokens_seen": 550420480, | |
| "step": 4200, | |
| "train_runtime": 184498.9282, | |
| "train_tokens_per_second": 2983.326 | |
| }, | |
| { | |
| "epoch": 2.5609125475285173, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.00015152055911336331, | |
| "loss": 2.2378, | |
| "num_input_tokens_seen": 551731200, | |
| "step": 4210, | |
| "train_runtime": 184784.9113, | |
| "train_tokens_per_second": 2985.802 | |
| }, | |
| { | |
| "epoch": 2.5669961977186313, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 0.0001503899061501034, | |
| "loss": 2.2727, | |
| "num_input_tokens_seen": 553041920, | |
| "step": 4220, | |
| "train_runtime": 185071.4629, | |
| "train_tokens_per_second": 2988.261 | |
| }, | |
| { | |
| "epoch": 2.5730798479087453, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00014926166994496526, | |
| "loss": 2.2601, | |
| "num_input_tokens_seen": 554352640, | |
| "step": 4230, | |
| "train_runtime": 185356.131, | |
| "train_tokens_per_second": 2990.743 | |
| }, | |
| { | |
| "epoch": 2.5791634980988594, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00014813587787141985, | |
| "loss": 2.2717, | |
| "num_input_tokens_seen": 555663360, | |
| "step": 4240, | |
| "train_runtime": 185642.9747, | |
| "train_tokens_per_second": 2993.183 | |
| }, | |
| { | |
| "epoch": 2.5852471482889734, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 0.00014701255724363846, | |
| "loss": 2.2139, | |
| "num_input_tokens_seen": 556974080, | |
| "step": 4250, | |
| "train_runtime": 185930.8672, | |
| "train_tokens_per_second": 2995.598 | |
| }, | |
| { | |
| "epoch": 2.5913307984790874, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 0.0001458917353158291, | |
| "loss": 2.2556, | |
| "num_input_tokens_seen": 558284800, | |
| "step": 4260, | |
| "train_runtime": 186218.2786, | |
| "train_tokens_per_second": 2998.013 | |
| }, | |
| { | |
| "epoch": 2.5974144486692015, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00014477343928157637, | |
| "loss": 2.2514, | |
| "num_input_tokens_seen": 559595520, | |
| "step": 4270, | |
| "train_runtime": 186506.0602, | |
| "train_tokens_per_second": 3000.415 | |
| }, | |
| { | |
| "epoch": 2.6034980988593155, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 0.00014365769627318095, | |
| "loss": 2.2772, | |
| "num_input_tokens_seen": 560906240, | |
| "step": 4280, | |
| "train_runtime": 186795.2444, | |
| "train_tokens_per_second": 3002.787 | |
| }, | |
| { | |
| "epoch": 2.6095817490494295, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00014254453336100164, | |
| "loss": 2.2335, | |
| "num_input_tokens_seen": 562216960, | |
| "step": 4290, | |
| "train_runtime": 187082.6925, | |
| "train_tokens_per_second": 3005.179 | |
| }, | |
| { | |
| "epoch": 2.6156653992395436, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00014143397755279853, | |
| "loss": 2.2124, | |
| "num_input_tokens_seen": 563527680, | |
| "step": 4300, | |
| "train_runtime": 187370.1439, | |
| "train_tokens_per_second": 3007.564 | |
| }, | |
| { | |
| "epoch": 2.621749049429658, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00014032605579307746, | |
| "loss": 2.242, | |
| "num_input_tokens_seen": 564838400, | |
| "step": 4310, | |
| "train_runtime": 187656.4196, | |
| "train_tokens_per_second": 3009.96 | |
| }, | |
| { | |
| "epoch": 2.6278326996197716, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00013922079496243652, | |
| "loss": 2.2601, | |
| "num_input_tokens_seen": 566149120, | |
| "step": 4320, | |
| "train_runtime": 187943.3185, | |
| "train_tokens_per_second": 3012.34 | |
| }, | |
| { | |
| "epoch": 2.633916349809886, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 0.00013811822187691442, | |
| "loss": 2.2565, | |
| "num_input_tokens_seen": 567459840, | |
| "step": 4330, | |
| "train_runtime": 188229.6957, | |
| "train_tokens_per_second": 3014.72 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00013701836328733848, | |
| "loss": 2.2579, | |
| "num_input_tokens_seen": 568770560, | |
| "step": 4340, | |
| "train_runtime": 188511.174, | |
| "train_tokens_per_second": 3017.172 | |
| }, | |
| { | |
| "epoch": 2.646083650190114, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 0.00013592124587867699, | |
| "loss": 2.2485, | |
| "num_input_tokens_seen": 570081280, | |
| "step": 4350, | |
| "train_runtime": 188795.3102, | |
| "train_tokens_per_second": 3019.573 | |
| }, | |
| { | |
| "epoch": 2.652167300380228, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00013482689626939098, | |
| "loss": 2.2866, | |
| "num_input_tokens_seen": 571392000, | |
| "step": 4360, | |
| "train_runtime": 189080.8835, | |
| "train_tokens_per_second": 3021.945 | |
| }, | |
| { | |
| "epoch": 2.658250950570342, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.00013373534101078879, | |
| "loss": 2.2841, | |
| "num_input_tokens_seen": 572702720, | |
| "step": 4370, | |
| "train_runtime": 189363.4231, | |
| "train_tokens_per_second": 3024.358 | |
| }, | |
| { | |
| "epoch": 2.6643346007604563, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 0.00013264660658638156, | |
| "loss": 2.2631, | |
| "num_input_tokens_seen": 574013440, | |
| "step": 4380, | |
| "train_runtime": 189647.906, | |
| "train_tokens_per_second": 3026.732 | |
| }, | |
| { | |
| "epoch": 2.6704182509505703, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00013156071941124097, | |
| "loss": 2.2472, | |
| "num_input_tokens_seen": 575324160, | |
| "step": 4390, | |
| "train_runtime": 189933.2699, | |
| "train_tokens_per_second": 3029.086 | |
| }, | |
| { | |
| "epoch": 2.6765019011406843, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 0.00013047770583135806, | |
| "loss": 2.2455, | |
| "num_input_tokens_seen": 576634880, | |
| "step": 4400, | |
| "train_runtime": 190217.0142, | |
| "train_tokens_per_second": 3031.458 | |
| }, | |
| { | |
| "epoch": 2.6825855513307983, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 0.00012939759212300423, | |
| "loss": 2.2443, | |
| "num_input_tokens_seen": 577945600, | |
| "step": 4410, | |
| "train_runtime": 190502.7513, | |
| "train_tokens_per_second": 3033.791 | |
| }, | |
| { | |
| "epoch": 2.688669201520913, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00012832040449209397, | |
| "loss": 2.235, | |
| "num_input_tokens_seen": 579256320, | |
| "step": 4420, | |
| "train_runtime": 190784.7777, | |
| "train_tokens_per_second": 3036.177 | |
| }, | |
| { | |
| "epoch": 2.6947528517110264, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 0.00012724616907354831, | |
| "loss": 2.254, | |
| "num_input_tokens_seen": 580567040, | |
| "step": 4430, | |
| "train_runtime": 191069.3239, | |
| "train_tokens_per_second": 3038.515 | |
| }, | |
| { | |
| "epoch": 2.700836501901141, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00012617491193066138, | |
| "loss": 2.234, | |
| "num_input_tokens_seen": 581877760, | |
| "step": 4440, | |
| "train_runtime": 191351.4149, | |
| "train_tokens_per_second": 3040.886 | |
| }, | |
| { | |
| "epoch": 2.706920152091255, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 0.00012510665905446786, | |
| "loss": 2.2605, | |
| "num_input_tokens_seen": 583188480, | |
| "step": 4450, | |
| "train_runtime": 191635.3856, | |
| "train_tokens_per_second": 3043.219 | |
| }, | |
| { | |
| "epoch": 2.713003802281369, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 0.00012404143636311234, | |
| "loss": 2.2754, | |
| "num_input_tokens_seen": 584499200, | |
| "step": 4460, | |
| "train_runtime": 191919.5565, | |
| "train_tokens_per_second": 3045.543 | |
| }, | |
| { | |
| "epoch": 2.719087452471483, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 0.0001229792697012205, | |
| "loss": 2.2325, | |
| "num_input_tokens_seen": 585809920, | |
| "step": 4470, | |
| "train_runtime": 192203.536, | |
| "train_tokens_per_second": 3047.862 | |
| }, | |
| { | |
| "epoch": 2.725171102661597, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 0.0001219201848392722, | |
| "loss": 2.2679, | |
| "num_input_tokens_seen": 587120640, | |
| "step": 4480, | |
| "train_runtime": 192486.4046, | |
| "train_tokens_per_second": 3050.193 | |
| }, | |
| { | |
| "epoch": 2.731254752851711, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 0.0001208642074729758, | |
| "loss": 2.2326, | |
| "num_input_tokens_seen": 588431360, | |
| "step": 4490, | |
| "train_runtime": 192772.2556, | |
| "train_tokens_per_second": 3052.469 | |
| }, | |
| { | |
| "epoch": 2.737338403041825, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00011981136322264527, | |
| "loss": 2.2257, | |
| "num_input_tokens_seen": 589742080, | |
| "step": 4500, | |
| "train_runtime": 193056.7552, | |
| "train_tokens_per_second": 3054.76 | |
| }, | |
| { | |
| "epoch": 2.743422053231939, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 0.00011876167763257859, | |
| "loss": 2.2207, | |
| "num_input_tokens_seen": 591052800, | |
| "step": 4510, | |
| "train_runtime": 193339.6912, | |
| "train_tokens_per_second": 3057.069 | |
| }, | |
| { | |
| "epoch": 2.749505703422053, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 0.00011771517617043723, | |
| "loss": 2.2161, | |
| "num_input_tokens_seen": 592363520, | |
| "step": 4520, | |
| "train_runtime": 193619.5072, | |
| "train_tokens_per_second": 3059.421 | |
| }, | |
| { | |
| "epoch": 2.755589353612167, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.0001166718842266292, | |
| "loss": 2.2141, | |
| "num_input_tokens_seen": 593674240, | |
| "step": 4530, | |
| "train_runtime": 193900.0918, | |
| "train_tokens_per_second": 3061.753 | |
| }, | |
| { | |
| "epoch": 2.761673003802281, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 0.00011563182711369216, | |
| "loss": 2.2488, | |
| "num_input_tokens_seen": 594984960, | |
| "step": 4540, | |
| "train_runtime": 194181.2319, | |
| "train_tokens_per_second": 3064.07 | |
| }, | |
| { | |
| "epoch": 2.7677566539923957, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 0.00011459503006568026, | |
| "loss": 2.236, | |
| "num_input_tokens_seen": 596295680, | |
| "step": 4550, | |
| "train_runtime": 194461.9664, | |
| "train_tokens_per_second": 3066.387 | |
| }, | |
| { | |
| "epoch": 2.7738403041825093, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 0.00011356151823755109, | |
| "loss": 2.2273, | |
| "num_input_tokens_seen": 597606400, | |
| "step": 4560, | |
| "train_runtime": 194741.6653, | |
| "train_tokens_per_second": 3068.714 | |
| }, | |
| { | |
| "epoch": 2.7799239543726237, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 0.00011253131670455547, | |
| "loss": 2.2011, | |
| "num_input_tokens_seen": 598917120, | |
| "step": 4570, | |
| "train_runtime": 195021.7225, | |
| "train_tokens_per_second": 3071.028 | |
| }, | |
| { | |
| "epoch": 2.7860076045627378, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 0.00011150445046162949, | |
| "loss": 2.2139, | |
| "num_input_tokens_seen": 600227840, | |
| "step": 4580, | |
| "train_runtime": 195305.1746, | |
| "train_tokens_per_second": 3073.282 | |
| }, | |
| { | |
| "epoch": 2.792091254752852, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 0.00011048094442278775, | |
| "loss": 2.2377, | |
| "num_input_tokens_seen": 601538560, | |
| "step": 4590, | |
| "train_runtime": 195586.2309, | |
| "train_tokens_per_second": 3075.567 | |
| }, | |
| { | |
| "epoch": 2.798174904942966, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 0.000109460823420519, | |
| "loss": 2.2328, | |
| "num_input_tokens_seen": 602849280, | |
| "step": 4600, | |
| "train_runtime": 195869.0835, | |
| "train_tokens_per_second": 3077.817 | |
| }, | |
| { | |
| "epoch": 2.80425855513308, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 0.00010844411220518357, | |
| "loss": 2.2193, | |
| "num_input_tokens_seen": 604160000, | |
| "step": 4610, | |
| "train_runtime": 196149.6476, | |
| "train_tokens_per_second": 3080.097 | |
| }, | |
| { | |
| "epoch": 2.810342205323194, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 0.00010743083544441304, | |
| "loss": 2.2007, | |
| "num_input_tokens_seen": 605470720, | |
| "step": 4620, | |
| "train_runtime": 196431.3842, | |
| "train_tokens_per_second": 3082.352 | |
| }, | |
| { | |
| "epoch": 2.816425855513308, | |
| "grad_norm": 0.375, | |
| "learning_rate": 0.00010642101772251126, | |
| "loss": 2.2269, | |
| "num_input_tokens_seen": 606781440, | |
| "step": 4630, | |
| "train_runtime": 196713.1962, | |
| "train_tokens_per_second": 3084.6 | |
| }, | |
| { | |
| "epoch": 2.822509505703422, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00010541468353985878, | |
| "loss": 2.2308, | |
| "num_input_tokens_seen": 608092160, | |
| "step": 4640, | |
| "train_runtime": 196995.2329, | |
| "train_tokens_per_second": 3086.837 | |
| }, | |
| { | |
| "epoch": 2.828593155893536, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 0.0001044118573123177, | |
| "loss": 2.2212, | |
| "num_input_tokens_seen": 609402880, | |
| "step": 4650, | |
| "train_runtime": 197277.9382, | |
| "train_tokens_per_second": 3089.057 | |
| }, | |
| { | |
| "epoch": 2.83467680608365, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 0.00010341256337063917, | |
| "loss": 2.251, | |
| "num_input_tokens_seen": 610713600, | |
| "step": 4660, | |
| "train_runtime": 197559.8157, | |
| "train_tokens_per_second": 3091.285 | |
| }, | |
| { | |
| "epoch": 2.840760456273764, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 0.00010241682595987378, | |
| "loss": 2.232, | |
| "num_input_tokens_seen": 612024320, | |
| "step": 4670, | |
| "train_runtime": 197841.9689, | |
| "train_tokens_per_second": 3093.501 | |
| }, | |
| { | |
| "epoch": 2.8468441064638785, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 0.00010142466923878274, | |
| "loss": 2.2362, | |
| "num_input_tokens_seen": 613335040, | |
| "step": 4680, | |
| "train_runtime": 198123.472, | |
| "train_tokens_per_second": 3095.721 | |
| }, | |
| { | |
| "epoch": 2.852927756653992, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 0.00010043611727925194, | |
| "loss": 2.2195, | |
| "num_input_tokens_seen": 614645760, | |
| "step": 4690, | |
| "train_runtime": 198406.0097, | |
| "train_tokens_per_second": 3097.919 | |
| }, | |
| { | |
| "epoch": 2.8590114068441066, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 9.945119406570793e-05, | |
| "loss": 2.2289, | |
| "num_input_tokens_seen": 615956480, | |
| "step": 4700, | |
| "train_runtime": 198686.4134, | |
| "train_tokens_per_second": 3100.144 | |
| }, | |
| { | |
| "epoch": 2.8650950570342206, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 9.846992349453602e-05, | |
| "loss": 2.2286, | |
| "num_input_tokens_seen": 617267200, | |
| "step": 4710, | |
| "train_runtime": 198964.9676, | |
| "train_tokens_per_second": 3102.391 | |
| }, | |
| { | |
| "epoch": 2.8711787072243347, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 9.749232937350011e-05, | |
| "loss": 2.2129, | |
| "num_input_tokens_seen": 618577920, | |
| "step": 4720, | |
| "train_runtime": 199243.6329, | |
| "train_tokens_per_second": 3104.631 | |
| }, | |
| { | |
| "epoch": 2.8772623574144487, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 9.651843542116592e-05, | |
| "loss": 2.206, | |
| "num_input_tokens_seen": 619888640, | |
| "step": 4730, | |
| "train_runtime": 199525.2636, | |
| "train_tokens_per_second": 3106.818 | |
| }, | |
| { | |
| "epoch": 2.8833460076045627, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 9.554826526632476e-05, | |
| "loss": 2.1995, | |
| "num_input_tokens_seen": 621199360, | |
| "step": 4740, | |
| "train_runtime": 199805.6604, | |
| "train_tokens_per_second": 3109.018 | |
| }, | |
| { | |
| "epoch": 2.8894296577946768, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 9.458184244742033e-05, | |
| "loss": 2.231, | |
| "num_input_tokens_seen": 622510080, | |
| "step": 4750, | |
| "train_runtime": 200085.6118, | |
| "train_tokens_per_second": 3111.219 | |
| }, | |
| { | |
| "epoch": 2.895513307984791, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 9.3619190411978e-05, | |
| "loss": 2.2128, | |
| "num_input_tokens_seen": 623820800, | |
| "step": 4760, | |
| "train_runtime": 200366.2522, | |
| "train_tokens_per_second": 3113.403 | |
| }, | |
| { | |
| "epoch": 2.901596958174905, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 9.266033251603564e-05, | |
| "loss": 2.2127, | |
| "num_input_tokens_seen": 625131520, | |
| "step": 4770, | |
| "train_runtime": 200645.7105, | |
| "train_tokens_per_second": 3115.599 | |
| }, | |
| { | |
| "epoch": 2.907680608365019, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 9.170529202357705e-05, | |
| "loss": 2.2048, | |
| "num_input_tokens_seen": 626442240, | |
| "step": 4780, | |
| "train_runtime": 200924.6962, | |
| "train_tokens_per_second": 3117.796 | |
| }, | |
| { | |
| "epoch": 2.9137642585551333, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 9.075409210596758e-05, | |
| "loss": 2.2148, | |
| "num_input_tokens_seen": 627752960, | |
| "step": 4790, | |
| "train_runtime": 201204.8499, | |
| "train_tokens_per_second": 3119.969 | |
| }, | |
| { | |
| "epoch": 2.919847908745247, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8.980675584139152e-05, | |
| "loss": 2.1909, | |
| "num_input_tokens_seen": 629063680, | |
| "step": 4800, | |
| "train_runtime": 201490.3533, | |
| "train_tokens_per_second": 3122.054 | |
| }, | |
| { | |
| "epoch": 2.9259315589353614, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8.886330621429283e-05, | |
| "loss": 2.1946, | |
| "num_input_tokens_seen": 630374400, | |
| "step": 4810, | |
| "train_runtime": 201771.7145, | |
| "train_tokens_per_second": 3124.196 | |
| }, | |
| { | |
| "epoch": 2.9320152091254754, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8.792376611481729e-05, | |
| "loss": 2.2246, | |
| "num_input_tokens_seen": 631685120, | |
| "step": 4820, | |
| "train_runtime": 202053.0225, | |
| "train_tokens_per_second": 3126.333 | |
| }, | |
| { | |
| "epoch": 2.9380988593155895, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8.698815833825654e-05, | |
| "loss": 2.2071, | |
| "num_input_tokens_seen": 632995840, | |
| "step": 4830, | |
| "train_runtime": 202335.6315, | |
| "train_tokens_per_second": 3128.445 | |
| }, | |
| { | |
| "epoch": 2.9441825095057035, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8.605650558449588e-05, | |
| "loss": 2.1973, | |
| "num_input_tokens_seen": 634306560, | |
| "step": 4840, | |
| "train_runtime": 202616.803, | |
| "train_tokens_per_second": 3130.572 | |
| }, | |
| { | |
| "epoch": 2.9502661596958175, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8.512883045746297e-05, | |
| "loss": 2.2143, | |
| "num_input_tokens_seen": 635617280, | |
| "step": 4850, | |
| "train_runtime": 202898.4133, | |
| "train_tokens_per_second": 3132.687 | |
| }, | |
| { | |
| "epoch": 2.9563498098859315, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8.420515546457966e-05, | |
| "loss": 2.2054, | |
| "num_input_tokens_seen": 636928000, | |
| "step": 4860, | |
| "train_runtime": 203178.4938, | |
| "train_tokens_per_second": 3134.82 | |
| }, | |
| { | |
| "epoch": 2.9624334600760456, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8.32855030162157e-05, | |
| "loss": 2.2096, | |
| "num_input_tokens_seen": 638238720, | |
| "step": 4870, | |
| "train_runtime": 203457.2711, | |
| "train_tokens_per_second": 3136.967 | |
| }, | |
| { | |
| "epoch": 2.9685171102661596, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8.236989542514536e-05, | |
| "loss": 2.2334, | |
| "num_input_tokens_seen": 639549440, | |
| "step": 4880, | |
| "train_runtime": 203736.9483, | |
| "train_tokens_per_second": 3139.094 | |
| }, | |
| { | |
| "epoch": 2.9746007604562736, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8.145835490600548e-05, | |
| "loss": 2.2161, | |
| "num_input_tokens_seen": 640860160, | |
| "step": 4890, | |
| "train_runtime": 204018.1461, | |
| "train_tokens_per_second": 3141.192 | |
| }, | |
| { | |
| "epoch": 2.9806844106463877, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8.055090357475711e-05, | |
| "loss": 2.2009, | |
| "num_input_tokens_seen": 642170880, | |
| "step": 4900, | |
| "train_runtime": 204300.372, | |
| "train_tokens_per_second": 3143.268 | |
| }, | |
| { | |
| "epoch": 2.9867680608365017, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 7.964756344814892e-05, | |
| "loss": 2.2255, | |
| "num_input_tokens_seen": 643481600, | |
| "step": 4910, | |
| "train_runtime": 204582.9097, | |
| "train_tokens_per_second": 3145.334 | |
| }, | |
| { | |
| "epoch": 2.992851711026616, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.87483564431822e-05, | |
| "loss": 2.1963, | |
| "num_input_tokens_seen": 644792320, | |
| "step": 4920, | |
| "train_runtime": 204865.3517, | |
| "train_tokens_per_second": 3147.396 | |
| }, | |
| { | |
| "epoch": 2.9989353612167298, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 7.78533043765802e-05, | |
| "loss": 2.2164, | |
| "num_input_tokens_seen": 646103040, | |
| "step": 4930, | |
| "train_runtime": 205148.8225, | |
| "train_tokens_per_second": 3149.436 | |
| }, | |
| { | |
| "epoch": 3.0048669201520912, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 7.696242896425806e-05, | |
| "loss": 1.8253, | |
| "num_input_tokens_seen": 647372800, | |
| "step": 4940, | |
| "train_runtime": 205422.0038, | |
| "train_tokens_per_second": 3151.429 | |
| }, | |
| { | |
| "epoch": 3.0109505703422053, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 7.607575182079628e-05, | |
| "loss": 1.6715, | |
| "num_input_tokens_seen": 648683520, | |
| "step": 4950, | |
| "train_runtime": 205704.4519, | |
| "train_tokens_per_second": 3153.473 | |
| }, | |
| { | |
| "epoch": 3.0170342205323193, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 7.519329445891612e-05, | |
| "loss": 1.657, | |
| "num_input_tokens_seen": 649994240, | |
| "step": 4960, | |
| "train_runtime": 205986.9223, | |
| "train_tokens_per_second": 3155.512 | |
| }, | |
| { | |
| "epoch": 3.0231178707224333, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 7.431507828895793e-05, | |
| "loss": 1.6522, | |
| "num_input_tokens_seen": 651304960, | |
| "step": 4970, | |
| "train_runtime": 206268.5761, | |
| "train_tokens_per_second": 3157.558 | |
| }, | |
| { | |
| "epoch": 3.0292015209125474, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 7.344112461836108e-05, | |
| "loss": 1.6416, | |
| "num_input_tokens_seen": 652615680, | |
| "step": 4980, | |
| "train_runtime": 206547.7316, | |
| "train_tokens_per_second": 3159.636 | |
| }, | |
| { | |
| "epoch": 3.0352851711026614, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 7.25714546511478e-05, | |
| "loss": 1.6381, | |
| "num_input_tokens_seen": 653926400, | |
| "step": 4990, | |
| "train_runtime": 206828.4798, | |
| "train_tokens_per_second": 3161.685 | |
| }, | |
| { | |
| "epoch": 3.041368821292776, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 7.170608948740825e-05, | |
| "loss": 1.6629, | |
| "num_input_tokens_seen": 655237120, | |
| "step": 5000, | |
| "train_runtime": 207110.1126, | |
| "train_tokens_per_second": 3163.714 | |
| }, | |
| { | |
| "epoch": 3.04745247148289, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 7.084505012278857e-05, | |
| "loss": 1.6385, | |
| "num_input_tokens_seen": 656547840, | |
| "step": 5010, | |
| "train_runtime": 207400.0315, | |
| "train_tokens_per_second": 3165.611 | |
| }, | |
| { | |
| "epoch": 3.053536121673004, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 6.998835744798173e-05, | |
| "loss": 1.6357, | |
| "num_input_tokens_seen": 657858560, | |
| "step": 5020, | |
| "train_runtime": 207682.2079, | |
| "train_tokens_per_second": 3167.621 | |
| }, | |
| { | |
| "epoch": 3.059619771863118, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 6.91360322482202e-05, | |
| "loss": 1.6394, | |
| "num_input_tokens_seen": 659169280, | |
| "step": 5030, | |
| "train_runtime": 207963.6755, | |
| "train_tokens_per_second": 3169.637 | |
| }, | |
| { | |
| "epoch": 3.065703422053232, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 6.82880952027724e-05, | |
| "loss": 1.6288, | |
| "num_input_tokens_seen": 660480000, | |
| "step": 5040, | |
| "train_runtime": 208244.7333, | |
| "train_tokens_per_second": 3171.653 | |
| }, | |
| { | |
| "epoch": 3.071787072243346, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 6.744456688444042e-05, | |
| "loss": 1.614, | |
| "num_input_tokens_seen": 661790720, | |
| "step": 5050, | |
| "train_runtime": 208526.526, | |
| "train_tokens_per_second": 3173.652 | |
| }, | |
| { | |
| "epoch": 3.07787072243346, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 6.660546775906079e-05, | |
| "loss": 1.6317, | |
| "num_input_tokens_seen": 663101440, | |
| "step": 5060, | |
| "train_runtime": 208806.0903, | |
| "train_tokens_per_second": 3175.681 | |
| }, | |
| { | |
| "epoch": 3.083954372623574, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 6.577081818500838e-05, | |
| "loss": 1.6632, | |
| "num_input_tokens_seen": 664412160, | |
| "step": 5070, | |
| "train_runtime": 209085.8717, | |
| "train_tokens_per_second": 3177.7 | |
| }, | |
| { | |
| "epoch": 3.090038022813688, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 6.494063841270223e-05, | |
| "loss": 1.6428, | |
| "num_input_tokens_seen": 665722880, | |
| "step": 5080, | |
| "train_runtime": 209364.6273, | |
| "train_tokens_per_second": 3179.729 | |
| }, | |
| { | |
| "epoch": 3.096121673003802, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 6.411494858411415e-05, | |
| "loss": 1.6511, | |
| "num_input_tokens_seen": 667033600, | |
| "step": 5090, | |
| "train_runtime": 209643.9003, | |
| "train_tokens_per_second": 3181.746 | |
| }, | |
| { | |
| "epoch": 3.102205323193916, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 6.329376873228022e-05, | |
| "loss": 1.659, | |
| "num_input_tokens_seen": 668344320, | |
| "step": 5100, | |
| "train_runtime": 209922.9535, | |
| "train_tokens_per_second": 3183.76 | |
| }, | |
| { | |
| "epoch": 3.10828897338403, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 6.247711878081463e-05, | |
| "loss": 1.6451, | |
| "num_input_tokens_seen": 669655040, | |
| "step": 5110, | |
| "train_runtime": 210201.0119, | |
| "train_tokens_per_second": 3185.784 | |
| }, | |
| { | |
| "epoch": 3.1143726235741447, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 6.16650185434261e-05, | |
| "loss": 1.6196, | |
| "num_input_tokens_seen": 670965760, | |
| "step": 5120, | |
| "train_runtime": 210479.9689, | |
| "train_tokens_per_second": 3187.789 | |
| }, | |
| { | |
| "epoch": 3.1204562737642587, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 6.085748772343772e-05, | |
| "loss": 1.6467, | |
| "num_input_tokens_seen": 672276480, | |
| "step": 5130, | |
| "train_runtime": 210757.9701, | |
| "train_tokens_per_second": 3189.803 | |
| }, | |
| { | |
| "epoch": 3.1265399239543727, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 6.005454591330842e-05, | |
| "loss": 1.6439, | |
| "num_input_tokens_seen": 673587200, | |
| "step": 5140, | |
| "train_runtime": 211036.4458, | |
| "train_tokens_per_second": 3191.805 | |
| }, | |
| { | |
| "epoch": 3.1326235741444868, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 5.9256212594157505e-05, | |
| "loss": 1.6398, | |
| "num_input_tokens_seen": 674897920, | |
| "step": 5150, | |
| "train_runtime": 211317.0466, | |
| "train_tokens_per_second": 3193.769 | |
| }, | |
| { | |
| "epoch": 3.138707224334601, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 5.8462507135292524e-05, | |
| "loss": 1.6413, | |
| "num_input_tokens_seen": 676208640, | |
| "step": 5160, | |
| "train_runtime": 211596.8045, | |
| "train_tokens_per_second": 3195.741 | |
| }, | |
| { | |
| "epoch": 3.144790874524715, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 5.7673448793738956e-05, | |
| "loss": 1.6247, | |
| "num_input_tokens_seen": 677519360, | |
| "step": 5170, | |
| "train_runtime": 211876.5776, | |
| "train_tokens_per_second": 3197.708 | |
| }, | |
| { | |
| "epoch": 3.150874524714829, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 5.688905671377304e-05, | |
| "loss": 1.6543, | |
| "num_input_tokens_seen": 678830080, | |
| "step": 5180, | |
| "train_runtime": 212157.0767, | |
| "train_tokens_per_second": 3199.658 | |
| }, | |
| { | |
| "epoch": 3.156958174904943, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 5.610934992645736e-05, | |
| "loss": 1.6249, | |
| "num_input_tokens_seen": 680140800, | |
| "step": 5190, | |
| "train_runtime": 212435.2324, | |
| "train_tokens_per_second": 3201.638 | |
| }, | |
| { | |
| "epoch": 3.163041825095057, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 5.533434734917914e-05, | |
| "loss": 1.6163, | |
| "num_input_tokens_seen": 681451520, | |
| "step": 5200, | |
| "train_runtime": 212713.6486, | |
| "train_tokens_per_second": 3203.61 | |
| }, | |
| { | |
| "epoch": 3.169125475285171, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 5.456406778519099e-05, | |
| "loss": 1.6824, | |
| "num_input_tokens_seen": 682762240, | |
| "step": 5210, | |
| "train_runtime": 212991.8229, | |
| "train_tokens_per_second": 3205.58 | |
| }, | |
| { | |
| "epoch": 3.175209125475285, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 5.3798529923155286e-05, | |
| "loss": 1.6088, | |
| "num_input_tokens_seen": 684072960, | |
| "step": 5220, | |
| "train_runtime": 213269.9689, | |
| "train_tokens_per_second": 3207.545 | |
| }, | |
| { | |
| "epoch": 3.1812927756653995, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 5.303775233669003e-05, | |
| "loss": 1.6733, | |
| "num_input_tokens_seen": 685383680, | |
| "step": 5230, | |
| "train_runtime": 213547.7284, | |
| "train_tokens_per_second": 3209.511 | |
| }, | |
| { | |
| "epoch": 3.1873764258555135, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 5.22817534839187e-05, | |
| "loss": 1.648, | |
| "num_input_tokens_seen": 686694400, | |
| "step": 5240, | |
| "train_runtime": 213827.0794, | |
| "train_tokens_per_second": 3211.447 | |
| }, | |
| { | |
| "epoch": 3.1934600760456275, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 5.153055170702231e-05, | |
| "loss": 1.6205, | |
| "num_input_tokens_seen": 688005120, | |
| "step": 5250, | |
| "train_runtime": 214106.7604, | |
| "train_tokens_per_second": 3213.374 | |
| }, | |
| { | |
| "epoch": 3.1995437262357416, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 5.078416523179427e-05, | |
| "loss": 1.6327, | |
| "num_input_tokens_seen": 689315840, | |
| "step": 5260, | |
| "train_runtime": 214384.661, | |
| "train_tokens_per_second": 3215.323 | |
| }, | |
| { | |
| "epoch": 3.2056273764258556, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 5.00426121671983e-05, | |
| "loss": 1.6355, | |
| "num_input_tokens_seen": 690626560, | |
| "step": 5270, | |
| "train_runtime": 214663.7036, | |
| "train_tokens_per_second": 3217.249 | |
| }, | |
| { | |
| "epoch": 3.2117110266159696, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 4.930591050492913e-05, | |
| "loss": 1.6226, | |
| "num_input_tokens_seen": 691937280, | |
| "step": 5280, | |
| "train_runtime": 214941.4149, | |
| "train_tokens_per_second": 3219.19 | |
| }, | |
| { | |
| "epoch": 3.2177946768060837, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 4.857407811897566e-05, | |
| "loss": 1.6398, | |
| "num_input_tokens_seen": 693248000, | |
| "step": 5290, | |
| "train_runtime": 215219.1478, | |
| "train_tokens_per_second": 3221.126 | |
| }, | |
| { | |
| "epoch": 3.2238783269961977, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 4.7847132765187676e-05, | |
| "loss": 1.6255, | |
| "num_input_tokens_seen": 694558720, | |
| "step": 5300, | |
| "train_runtime": 215497.1854, | |
| "train_tokens_per_second": 3223.052 | |
| }, | |
| { | |
| "epoch": 3.2299619771863117, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 4.712509208084506e-05, | |
| "loss": 1.6393, | |
| "num_input_tokens_seen": 695869440, | |
| "step": 5310, | |
| "train_runtime": 215774.8903, | |
| "train_tokens_per_second": 3224.979 | |
| }, | |
| { | |
| "epoch": 3.2360456273764258, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 4.640797358422938e-05, | |
| "loss": 1.6536, | |
| "num_input_tokens_seen": 697180160, | |
| "step": 5320, | |
| "train_runtime": 216052.2699, | |
| "train_tokens_per_second": 3226.905 | |
| }, | |
| { | |
| "epoch": 3.24212927756654, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 4.569579467419943e-05, | |
| "loss": 1.6502, | |
| "num_input_tokens_seen": 698490880, | |
| "step": 5330, | |
| "train_runtime": 216334.0737, | |
| "train_tokens_per_second": 3228.76 | |
| }, | |
| { | |
| "epoch": 3.248212927756654, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 4.49885726297688e-05, | |
| "loss": 1.6125, | |
| "num_input_tokens_seen": 699801600, | |
| "step": 5340, | |
| "train_runtime": 216611.371, | |
| "train_tokens_per_second": 3230.678 | |
| }, | |
| { | |
| "epoch": 3.254296577946768, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 4.428632460968671e-05, | |
| "loss": 1.6049, | |
| "num_input_tokens_seen": 701112320, | |
| "step": 5350, | |
| "train_runtime": 216889.4967, | |
| "train_tokens_per_second": 3232.578 | |
| }, | |
| { | |
| "epoch": 3.2603802281368823, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 4.358906765202167e-05, | |
| "loss": 1.6302, | |
| "num_input_tokens_seen": 702423040, | |
| "step": 5360, | |
| "train_runtime": 217167.1521, | |
| "train_tokens_per_second": 3234.481 | |
| }, | |
| { | |
| "epoch": 3.2664638783269964, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 4.2896818673748256e-05, | |
| "loss": 1.6092, | |
| "num_input_tokens_seen": 703733760, | |
| "step": 5370, | |
| "train_runtime": 217444.6002, | |
| "train_tokens_per_second": 3236.382 | |
| }, | |
| { | |
| "epoch": 3.2725475285171104, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 4.220959447033629e-05, | |
| "loss": 1.668, | |
| "num_input_tokens_seen": 705044480, | |
| "step": 5380, | |
| "train_runtime": 217722.1571, | |
| "train_tokens_per_second": 3238.276 | |
| }, | |
| { | |
| "epoch": 3.2786311787072244, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 4.1527411715343774e-05, | |
| "loss": 1.6002, | |
| "num_input_tokens_seen": 706355200, | |
| "step": 5390, | |
| "train_runtime": 218000.8989, | |
| "train_tokens_per_second": 3240.148 | |
| }, | |
| { | |
| "epoch": 3.2847148288973385, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 4.0850286960012335e-05, | |
| "loss": 1.6425, | |
| "num_input_tokens_seen": 707665920, | |
| "step": 5400, | |
| "train_runtime": 218279.0655, | |
| "train_tokens_per_second": 3242.024 | |
| }, | |
| { | |
| "epoch": 3.2907984790874525, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 4.01782366328651e-05, | |
| "loss": 1.6527, | |
| "num_input_tokens_seen": 708976640, | |
| "step": 5410, | |
| "train_runtime": 218558.3443, | |
| "train_tokens_per_second": 3243.878 | |
| }, | |
| { | |
| "epoch": 3.2968821292775665, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 3.951127703930882e-05, | |
| "loss": 1.6389, | |
| "num_input_tokens_seen": 710287360, | |
| "step": 5420, | |
| "train_runtime": 218838.5125, | |
| "train_tokens_per_second": 3245.715 | |
| }, | |
| { | |
| "epoch": 3.3029657794676806, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 3.884942436123784e-05, | |
| "loss": 1.625, | |
| "num_input_tokens_seen": 711598080, | |
| "step": 5430, | |
| "train_runtime": 219119.7533, | |
| "train_tokens_per_second": 3247.53 | |
| }, | |
| { | |
| "epoch": 3.3090494296577946, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 3.819269465664157e-05, | |
| "loss": 1.6137, | |
| "num_input_tokens_seen": 712908800, | |
| "step": 5440, | |
| "train_runtime": 219397.4568, | |
| "train_tokens_per_second": 3249.394 | |
| }, | |
| { | |
| "epoch": 3.3151330798479086, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 3.754110385921502e-05, | |
| "loss": 1.6283, | |
| "num_input_tokens_seen": 714219520, | |
| "step": 5450, | |
| "train_runtime": 219677.914, | |
| "train_tokens_per_second": 3251.212 | |
| }, | |
| { | |
| "epoch": 3.3212167300380226, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 3.68946677779719e-05, | |
| "loss": 1.6457, | |
| "num_input_tokens_seen": 715530240, | |
| "step": 5460, | |
| "train_runtime": 219958.1204, | |
| "train_tokens_per_second": 3253.029 | |
| }, | |
| { | |
| "epoch": 3.327300380228137, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 3.6253402096861456e-05, | |
| "loss": 1.6237, | |
| "num_input_tokens_seen": 716840960, | |
| "step": 5470, | |
| "train_runtime": 220239.0455, | |
| "train_tokens_per_second": 3254.831 | |
| }, | |
| { | |
| "epoch": 3.3333840304182507, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 3.56173223743877e-05, | |
| "loss": 1.6404, | |
| "num_input_tokens_seen": 718151680, | |
| "step": 5480, | |
| "train_runtime": 220519.7735, | |
| "train_tokens_per_second": 3256.632 | |
| }, | |
| { | |
| "epoch": 3.339467680608365, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 3.4986444043231926e-05, | |
| "loss": 1.6324, | |
| "num_input_tokens_seen": 719462400, | |
| "step": 5490, | |
| "train_runtime": 220800.5642, | |
| "train_tokens_per_second": 3258.426 | |
| }, | |
| { | |
| "epoch": 3.345551330798479, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 3.436078240987836e-05, | |
| "loss": 1.6478, | |
| "num_input_tokens_seen": 720773120, | |
| "step": 5500, | |
| "train_runtime": 221081.8118, | |
| "train_tokens_per_second": 3260.21 | |
| }, | |
| { | |
| "epoch": 3.3516349809885932, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 3.3740352654242856e-05, | |
| "loss": 1.6567, | |
| "num_input_tokens_seen": 722083840, | |
| "step": 5510, | |
| "train_runtime": 221367.5106, | |
| "train_tokens_per_second": 3261.923 | |
| }, | |
| { | |
| "epoch": 3.3577186311787073, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 3.312516982930422e-05, | |
| "loss": 1.6376, | |
| "num_input_tokens_seen": 723394560, | |
| "step": 5520, | |
| "train_runtime": 221645.3627, | |
| "train_tokens_per_second": 3263.748 | |
| }, | |
| { | |
| "epoch": 3.3638022813688213, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 3.251524886073964e-05, | |
| "loss": 1.6641, | |
| "num_input_tokens_seen": 724705280, | |
| "step": 5530, | |
| "train_runtime": 221924.8737, | |
| "train_tokens_per_second": 3265.543 | |
| }, | |
| { | |
| "epoch": 3.3698859315589353, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 3.191060454656205e-05, | |
| "loss": 1.6531, | |
| "num_input_tokens_seen": 726016000, | |
| "step": 5540, | |
| "train_runtime": 222203.4442, | |
| "train_tokens_per_second": 3267.348 | |
| }, | |
| { | |
| "epoch": 3.3759695817490494, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 3.1311251556761064e-05, | |
| "loss": 1.6111, | |
| "num_input_tokens_seen": 727326720, | |
| "step": 5550, | |
| "train_runtime": 222483.2619, | |
| "train_tokens_per_second": 3269.13 | |
| }, | |
| { | |
| "epoch": 3.3820532319391634, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 3.071720443294748e-05, | |
| "loss": 1.6169, | |
| "num_input_tokens_seen": 728637440, | |
| "step": 5560, | |
| "train_runtime": 222764.1882, | |
| "train_tokens_per_second": 3270.891 | |
| }, | |
| { | |
| "epoch": 3.3881368821292774, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 3.0128477588000052e-05, | |
| "loss": 1.6792, | |
| "num_input_tokens_seen": 729948160, | |
| "step": 5570, | |
| "train_runtime": 223046.4055, | |
| "train_tokens_per_second": 3272.629 | |
| }, | |
| { | |
| "epoch": 3.3942205323193915, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 2.954508530571609e-05, | |
| "loss": 1.6474, | |
| "num_input_tokens_seen": 731258880, | |
| "step": 5580, | |
| "train_runtime": 223331.3629, | |
| "train_tokens_per_second": 3274.322 | |
| }, | |
| { | |
| "epoch": 3.4003041825095055, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 2.8967041740464633e-05, | |
| "loss": 1.6186, | |
| "num_input_tokens_seen": 732569600, | |
| "step": 5590, | |
| "train_runtime": 223620.5675, | |
| "train_tokens_per_second": 3275.949 | |
| }, | |
| { | |
| "epoch": 3.40638783269962, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 2.8394360916843388e-05, | |
| "loss": 1.6077, | |
| "num_input_tokens_seen": 733880320, | |
| "step": 5600, | |
| "train_runtime": 224039.407, | |
| "train_tokens_per_second": 3275.675 | |
| }, | |
| { | |
| "epoch": 3.412471482889734, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 2.782705672933794e-05, | |
| "loss": 1.6165, | |
| "num_input_tokens_seen": 735191040, | |
| "step": 5610, | |
| "train_runtime": 224502.8879, | |
| "train_tokens_per_second": 3274.751 | |
| }, | |
| { | |
| "epoch": 3.418555133079848, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 2.7265142941985316e-05, | |
| "loss": 1.6244, | |
| "num_input_tokens_seen": 736501760, | |
| "step": 5620, | |
| "train_runtime": 224972.3357, | |
| "train_tokens_per_second": 3273.744 | |
| }, | |
| { | |
| "epoch": 3.424638783269962, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 2.6708633188039455e-05, | |
| "loss": 1.6326, | |
| "num_input_tokens_seen": 737812480, | |
| "step": 5630, | |
| "train_runtime": 225440.8866, | |
| "train_tokens_per_second": 3272.754 | |
| }, | |
| { | |
| "epoch": 3.430722433460076, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 2.6157540969640652e-05, | |
| "loss": 1.6292, | |
| "num_input_tokens_seen": 739123200, | |
| "step": 5640, | |
| "train_runtime": 225911.9651, | |
| "train_tokens_per_second": 3271.731 | |
| }, | |
| { | |
| "epoch": 3.43680608365019, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 2.5611879657488096e-05, | |
| "loss": 1.6367, | |
| "num_input_tokens_seen": 740433920, | |
| "step": 5650, | |
| "train_runtime": 226382.517, | |
| "train_tokens_per_second": 3270.72 | |
| }, | |
| { | |
| "epoch": 3.442889733840304, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 2.5071662490515233e-05, | |
| "loss": 1.6283, | |
| "num_input_tokens_seen": 741744640, | |
| "step": 5660, | |
| "train_runtime": 226853.4792, | |
| "train_tokens_per_second": 3269.708 | |
| }, | |
| { | |
| "epoch": 3.448973384030418, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 2.4536902575568758e-05, | |
| "loss": 1.642, | |
| "num_input_tokens_seen": 743055360, | |
| "step": 5670, | |
| "train_runtime": 227323.0676, | |
| "train_tokens_per_second": 3268.72 | |
| }, | |
| { | |
| "epoch": 3.4550570342205322, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 2.4007612887090524e-05, | |
| "loss": 1.6326, | |
| "num_input_tokens_seen": 744366080, | |
| "step": 5680, | |
| "train_runtime": 227792.7324, | |
| "train_tokens_per_second": 3267.734 | |
| }, | |
| { | |
| "epoch": 3.4611406844106463, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 2.3483806266802632e-05, | |
| "loss": 1.6121, | |
| "num_input_tokens_seen": 745676800, | |
| "step": 5690, | |
| "train_runtime": 228265.1981, | |
| "train_tokens_per_second": 3266.713 | |
| }, | |
| { | |
| "epoch": 3.4672243346007603, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 2.2965495423396126e-05, | |
| "loss": 1.6207, | |
| "num_input_tokens_seen": 746987520, | |
| "step": 5700, | |
| "train_runtime": 228741.9712, | |
| "train_tokens_per_second": 3265.634 | |
| }, | |
| { | |
| "epoch": 3.4733079847908748, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 2.2452692932222575e-05, | |
| "loss": 1.6266, | |
| "num_input_tokens_seen": 748298240, | |
| "step": 5710, | |
| "train_runtime": 229218.7728, | |
| "train_tokens_per_second": 3264.559 | |
| }, | |
| { | |
| "epoch": 3.4793916349809884, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 2.194541123498872e-05, | |
| "loss": 1.6278, | |
| "num_input_tokens_seen": 749608960, | |
| "step": 5720, | |
| "train_runtime": 229696.1749, | |
| "train_tokens_per_second": 3263.48 | |
| }, | |
| { | |
| "epoch": 3.485475285171103, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 2.144366263945488e-05, | |
| "loss": 1.6114, | |
| "num_input_tokens_seen": 750919680, | |
| "step": 5730, | |
| "train_runtime": 230173.4943, | |
| "train_tokens_per_second": 3262.407 | |
| }, | |
| { | |
| "epoch": 3.491558935361217, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 2.094745931913633e-05, | |
| "loss": 1.6453, | |
| "num_input_tokens_seen": 752230400, | |
| "step": 5740, | |
| "train_runtime": 230651.4755, | |
| "train_tokens_per_second": 3261.329 | |
| }, | |
| { | |
| "epoch": 3.497642585551331, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 2.0456813313007777e-05, | |
| "loss": 1.6273, | |
| "num_input_tokens_seen": 753541120, | |
| "step": 5750, | |
| "train_runtime": 231130.0952, | |
| "train_tokens_per_second": 3260.247 | |
| }, | |
| { | |
| "epoch": 3.503726235741445, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 1.997173652521142e-05, | |
| "loss": 1.6474, | |
| "num_input_tokens_seen": 754851840, | |
| "step": 5760, | |
| "train_runtime": 231605.5777, | |
| "train_tokens_per_second": 3259.213 | |
| }, | |
| { | |
| "epoch": 3.509809885931559, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 1.9492240724768128e-05, | |
| "loss": 1.6206, | |
| "num_input_tokens_seen": 756162560, | |
| "step": 5770, | |
| "train_runtime": 232080.7709, | |
| "train_tokens_per_second": 3258.187 | |
| }, | |
| { | |
| "epoch": 3.515893536121673, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.9018337545291668e-05, | |
| "loss": 1.6258, | |
| "num_input_tokens_seen": 757473280, | |
| "step": 5780, | |
| "train_runtime": 232556.6923, | |
| "train_tokens_per_second": 3257.155 | |
| }, | |
| { | |
| "epoch": 3.521977186311787, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 1.8550038484706738e-05, | |
| "loss": 1.6021, | |
| "num_input_tokens_seen": 758784000, | |
| "step": 5790, | |
| "train_runtime": 233033.1187, | |
| "train_tokens_per_second": 3256.121 | |
| }, | |
| { | |
| "epoch": 3.528060836501901, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 1.8087354904969987e-05, | |
| "loss": 1.6396, | |
| "num_input_tokens_seen": 760094720, | |
| "step": 5800, | |
| "train_runtime": 233507.6875, | |
| "train_tokens_per_second": 3255.116 | |
| }, | |
| { | |
| "epoch": 3.534144486692015, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.7630298031794084e-05, | |
| "loss": 1.6261, | |
| "num_input_tokens_seen": 761405440, | |
| "step": 5810, | |
| "train_runtime": 233983.9121, | |
| "train_tokens_per_second": 3254.093 | |
| }, | |
| { | |
| "epoch": 3.540228136882129, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 1.717887895437559e-05, | |
| "loss": 1.6206, | |
| "num_input_tokens_seen": 762716160, | |
| "step": 5820, | |
| "train_runtime": 234460.178, | |
| "train_tokens_per_second": 3253.073 | |
| }, | |
| { | |
| "epoch": 3.546311787072243, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 1.6733108625125866e-05, | |
| "loss": 1.6487, | |
| "num_input_tokens_seen": 764026880, | |
| "step": 5830, | |
| "train_runtime": 234938.1446, | |
| "train_tokens_per_second": 3252.034 | |
| }, | |
| { | |
| "epoch": 3.5523954372623576, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 1.62929978594053e-05, | |
| "loss": 1.6688, | |
| "num_input_tokens_seen": 765337600, | |
| "step": 5840, | |
| "train_runtime": 235413.0674, | |
| "train_tokens_per_second": 3251.041 | |
| }, | |
| { | |
| "epoch": 3.558479087452471, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 1.5858557335260926e-05, | |
| "loss": 1.6187, | |
| "num_input_tokens_seen": 766648320, | |
| "step": 5850, | |
| "train_runtime": 235888.1153, | |
| "train_tokens_per_second": 3250.051 | |
| }, | |
| { | |
| "epoch": 3.5645627376425857, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 1.5429797593167372e-05, | |
| "loss": 1.5958, | |
| "num_input_tokens_seen": 767959040, | |
| "step": 5860, | |
| "train_runtime": 236366.2531, | |
| "train_tokens_per_second": 3249.022 | |
| }, | |
| { | |
| "epoch": 3.5706463878326997, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 1.5006729035771078e-05, | |
| "loss": 1.6523, | |
| "num_input_tokens_seen": 769269760, | |
| "step": 5870, | |
| "train_runtime": 236845.1897, | |
| "train_tokens_per_second": 3247.986 | |
| }, | |
| { | |
| "epoch": 3.5767300380228138, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 1.4589361927637907e-05, | |
| "loss": 1.6243, | |
| "num_input_tokens_seen": 770580480, | |
| "step": 5880, | |
| "train_runtime": 237322.8914, | |
| "train_tokens_per_second": 3246.971 | |
| }, | |
| { | |
| "epoch": 3.582813688212928, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.417770639500418e-05, | |
| "loss": 1.6093, | |
| "num_input_tokens_seen": 771891200, | |
| "step": 5890, | |
| "train_runtime": 237801.8938, | |
| "train_tokens_per_second": 3245.942 | |
| }, | |
| { | |
| "epoch": 3.588897338403042, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 1.3771772425530932e-05, | |
| "loss": 1.6162, | |
| "num_input_tokens_seen": 773201920, | |
| "step": 5900, | |
| "train_runtime": 238279.7947, | |
| "train_tokens_per_second": 3244.933 | |
| }, | |
| { | |
| "epoch": 3.594980988593156, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.3371569868061622e-05, | |
| "loss": 1.6657, | |
| "num_input_tokens_seen": 774512640, | |
| "step": 5910, | |
| "train_runtime": 238759.5079, | |
| "train_tokens_per_second": 3243.903 | |
| }, | |
| { | |
| "epoch": 3.60106463878327, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.2977108432383006e-05, | |
| "loss": 1.5992, | |
| "num_input_tokens_seen": 775823360, | |
| "step": 5920, | |
| "train_runtime": 239239.9149, | |
| "train_tokens_per_second": 3242.868 | |
| }, | |
| { | |
| "epoch": 3.607148288973384, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.2588397688989923e-05, | |
| "loss": 1.6096, | |
| "num_input_tokens_seen": 777134080, | |
| "step": 5930, | |
| "train_runtime": 239721.0782, | |
| "train_tokens_per_second": 3241.826 | |
| }, | |
| { | |
| "epoch": 3.613231939163498, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.2205447068852766e-05, | |
| "loss": 1.6149, | |
| "num_input_tokens_seen": 778444800, | |
| "step": 5940, | |
| "train_runtime": 240200.8963, | |
| "train_tokens_per_second": 3240.807 | |
| }, | |
| { | |
| "epoch": 3.6193155893536124, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 1.1828265863188692e-05, | |
| "loss": 1.6129, | |
| "num_input_tokens_seen": 779755520, | |
| "step": 5950, | |
| "train_runtime": 240678.7967, | |
| "train_tokens_per_second": 3239.818 | |
| }, | |
| { | |
| "epoch": 3.625399239543726, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.145686322323644e-05, | |
| "loss": 1.6548, | |
| "num_input_tokens_seen": 781066240, | |
| "step": 5960, | |
| "train_runtime": 241156.9834, | |
| "train_tokens_per_second": 3238.829 | |
| }, | |
| { | |
| "epoch": 3.6314828897338405, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.109124816003404e-05, | |
| "loss": 1.6589, | |
| "num_input_tokens_seen": 782376960, | |
| "step": 5970, | |
| "train_runtime": 241634.9202, | |
| "train_tokens_per_second": 3237.847 | |
| }, | |
| { | |
| "epoch": 3.6375665399239545, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.0731429544200311e-05, | |
| "loss": 1.667, | |
| "num_input_tokens_seen": 783687680, | |
| "step": 5980, | |
| "train_runtime": 242110.8067, | |
| "train_tokens_per_second": 3236.897 | |
| }, | |
| { | |
| "epoch": 3.6436501901140685, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.0377416105719628e-05, | |
| "loss": 1.6387, | |
| "num_input_tokens_seen": 784998400, | |
| "step": 5990, | |
| "train_runtime": 242587.3722, | |
| "train_tokens_per_second": 3235.941 | |
| }, | |
| { | |
| "epoch": 3.6497338403041826, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.0029216433730115e-05, | |
| "loss": 1.6584, | |
| "num_input_tokens_seen": 786309120, | |
| "step": 6000, | |
| "train_runtime": 243064.283, | |
| "train_tokens_per_second": 3234.984 | |
| }, | |
| { | |
| "epoch": 3.6558174904942966, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 9.686838976315172e-06, | |
| "loss": 1.6483, | |
| "num_input_tokens_seen": 787619840, | |
| "step": 6010, | |
| "train_runtime": 243540.2153, | |
| "train_tokens_per_second": 3234.044 | |
| }, | |
| { | |
| "epoch": 3.6619011406844106, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 9.350292040298664e-06, | |
| "loss": 1.6513, | |
| "num_input_tokens_seen": 788930560, | |
| "step": 6020, | |
| "train_runtime": 244016.4696, | |
| "train_tokens_per_second": 3233.104 | |
| }, | |
| { | |
| "epoch": 3.6679847908745247, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 9.019583791043284e-06, | |
| "loss": 1.6027, | |
| "num_input_tokens_seen": 790241280, | |
| "step": 6030, | |
| "train_runtime": 244495.7045, | |
| "train_tokens_per_second": 3232.127 | |
| }, | |
| { | |
| "epoch": 3.6740684410646387, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8.694722252252346e-06, | |
| "loss": 1.618, | |
| "num_input_tokens_seen": 791552000, | |
| "step": 6040, | |
| "train_runtime": 244974.1179, | |
| "train_tokens_per_second": 3231.166 | |
| }, | |
| { | |
| "epoch": 3.6801520912547527, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8.375715305775327e-06, | |
| "loss": 1.6454, | |
| "num_input_tokens_seen": 792862720, | |
| "step": 6050, | |
| "train_runtime": 245450.5375, | |
| "train_tokens_per_second": 3230.234 | |
| }, | |
| { | |
| "epoch": 3.6862357414448668, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8.06257069141647e-06, | |
| "loss": 1.5994, | |
| "num_input_tokens_seen": 794173440, | |
| "step": 6060, | |
| "train_runtime": 245927.9251, | |
| "train_tokens_per_second": 3229.293 | |
| }, | |
| { | |
| "epoch": 3.692319391634981, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 7.755296006747098e-06, | |
| "loss": 1.6254, | |
| "num_input_tokens_seen": 795484160, | |
| "step": 6070, | |
| "train_runtime": 246405.2565, | |
| "train_tokens_per_second": 3228.357 | |
| }, | |
| { | |
| "epoch": 3.6984030418250953, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 7.453898706921203e-06, | |
| "loss": 1.6246, | |
| "num_input_tokens_seen": 796794880, | |
| "step": 6080, | |
| "train_runtime": 246883.6093, | |
| "train_tokens_per_second": 3227.411 | |
| }, | |
| { | |
| "epoch": 3.704486692015209, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 7.1583861044946515e-06, | |
| "loss": 1.6219, | |
| "num_input_tokens_seen": 798105600, | |
| "step": 6090, | |
| "train_runtime": 247363.466, | |
| "train_tokens_per_second": 3226.449 | |
| }, | |
| { | |
| "epoch": 3.7105703422053233, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 6.868765369247604e-06, | |
| "loss": 1.6234, | |
| "num_input_tokens_seen": 799416320, | |
| "step": 6100, | |
| "train_runtime": 247842.3464, | |
| "train_tokens_per_second": 3225.503 | |
| }, | |
| { | |
| "epoch": 3.7166539923954374, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 6.585043528010759e-06, | |
| "loss": 1.6664, | |
| "num_input_tokens_seen": 800727040, | |
| "step": 6110, | |
| "train_runtime": 248320.6683, | |
| "train_tokens_per_second": 3224.569 | |
| }, | |
| { | |
| "epoch": 3.7227376425855514, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 6.30722746449483e-06, | |
| "loss": 1.6222, | |
| "num_input_tokens_seen": 802037760, | |
| "step": 6120, | |
| "train_runtime": 248798.6552, | |
| "train_tokens_per_second": 3223.642 | |
| }, | |
| { | |
| "epoch": 3.7288212927756654, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 6.035323919123342e-06, | |
| "loss": 1.605, | |
| "num_input_tokens_seen": 803348480, | |
| "step": 6130, | |
| "train_runtime": 249277.2337, | |
| "train_tokens_per_second": 3222.711 | |
| }, | |
| { | |
| "epoch": 3.7349049429657795, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 5.769339488869374e-06, | |
| "loss": 1.6482, | |
| "num_input_tokens_seen": 804659200, | |
| "step": 6140, | |
| "train_runtime": 249757.3827, | |
| "train_tokens_per_second": 3221.763 | |
| }, | |
| { | |
| "epoch": 3.7409885931558935, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 5.509280627095298e-06, | |
| "loss": 1.6258, | |
| "num_input_tokens_seen": 805969920, | |
| "step": 6150, | |
| "train_runtime": 250236.5617, | |
| "train_tokens_per_second": 3220.832 | |
| }, | |
| { | |
| "epoch": 3.7470722433460075, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 5.2551536433962655e-06, | |
| "loss": 1.6281, | |
| "num_input_tokens_seen": 807280640, | |
| "step": 6160, | |
| "train_runtime": 250715.0321, | |
| "train_tokens_per_second": 3219.913 | |
| }, | |
| { | |
| "epoch": 3.7531558935361216, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 5.006964703447137e-06, | |
| "loss": 1.6278, | |
| "num_input_tokens_seen": 808591360, | |
| "step": 6170, | |
| "train_runtime": 251193.7755, | |
| "train_tokens_per_second": 3218.994 | |
| }, | |
| { | |
| "epoch": 3.7592395437262356, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 4.764719828852876e-06, | |
| "loss": 1.6374, | |
| "num_input_tokens_seen": 809902080, | |
| "step": 6180, | |
| "train_runtime": 251672.2591, | |
| "train_tokens_per_second": 3218.082 | |
| }, | |
| { | |
| "epoch": 3.76532319391635, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 4.5284248970025035e-06, | |
| "loss": 1.627, | |
| "num_input_tokens_seen": 811212800, | |
| "step": 6190, | |
| "train_runtime": 252151.617, | |
| "train_tokens_per_second": 3217.163 | |
| }, | |
| { | |
| "epoch": 3.7714068441064637, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 4.2980856409263466e-06, | |
| "loss": 1.6303, | |
| "num_input_tokens_seen": 812523520, | |
| "step": 6200, | |
| "train_runtime": 252628.2077, | |
| "train_tokens_per_second": 3216.282 | |
| }, | |
| { | |
| "epoch": 3.777490494296578, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 4.073707649157182e-06, | |
| "loss": 1.6597, | |
| "num_input_tokens_seen": 813834240, | |
| "step": 6210, | |
| "train_runtime": 253105.2476, | |
| "train_tokens_per_second": 3215.399 | |
| }, | |
| { | |
| "epoch": 3.783574144486692, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 3.855296365594424e-06, | |
| "loss": 1.5916, | |
| "num_input_tokens_seen": 815144960, | |
| "step": 6220, | |
| "train_runtime": 253583.0587, | |
| "train_tokens_per_second": 3214.509 | |
| }, | |
| { | |
| "epoch": 3.789657794676806, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 3.642857089372148e-06, | |
| "loss": 1.6516, | |
| "num_input_tokens_seen": 816455680, | |
| "step": 6230, | |
| "train_runtime": 254061.1301, | |
| "train_tokens_per_second": 3213.619 | |
| }, | |
| { | |
| "epoch": 3.7957414448669202, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 3.436394974730528e-06, | |
| "loss": 1.6169, | |
| "num_input_tokens_seen": 817766400, | |
| "step": 6240, | |
| "train_runtime": 254539.0149, | |
| "train_tokens_per_second": 3212.735 | |
| }, | |
| { | |
| "epoch": 3.8018250950570343, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 3.23591503089074e-06, | |
| "loss": 1.6236, | |
| "num_input_tokens_seen": 819077120, | |
| "step": 6250, | |
| "train_runtime": 255018.4108, | |
| "train_tokens_per_second": 3211.835 | |
| }, | |
| { | |
| "epoch": 3.8079087452471483, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 3.041422121933507e-06, | |
| "loss": 1.6347, | |
| "num_input_tokens_seen": 820387840, | |
| "step": 6260, | |
| "train_runtime": 255495.0232, | |
| "train_tokens_per_second": 3210.974 | |
| }, | |
| { | |
| "epoch": 3.8139923954372623, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 2.852920966680883e-06, | |
| "loss": 1.6444, | |
| "num_input_tokens_seen": 821698560, | |
| "step": 6270, | |
| "train_runtime": 255977.0025, | |
| "train_tokens_per_second": 3210.048 | |
| }, | |
| { | |
| "epoch": 3.8200760456273763, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 2.670416138582099e-06, | |
| "loss": 1.657, | |
| "num_input_tokens_seen": 823009280, | |
| "step": 6280, | |
| "train_runtime": 256458.9092, | |
| "train_tokens_per_second": 3209.127 | |
| }, | |
| { | |
| "epoch": 3.8261596958174904, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 2.4939120656022607e-06, | |
| "loss": 1.6014, | |
| "num_input_tokens_seen": 824320000, | |
| "step": 6290, | |
| "train_runtime": 256940.8942, | |
| "train_tokens_per_second": 3208.209 | |
| }, | |
| { | |
| "epoch": 3.8322433460076044, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 2.3234130301151568e-06, | |
| "loss": 1.6304, | |
| "num_input_tokens_seen": 825630720, | |
| "step": 6300, | |
| "train_runtime": 257422.8169, | |
| "train_tokens_per_second": 3207.294 | |
| }, | |
| { | |
| "epoch": 3.8383269961977184, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 2.158923168799232e-06, | |
| "loss": 1.6337, | |
| "num_input_tokens_seen": 826941440, | |
| "step": 6310, | |
| "train_runtime": 257901.9133, | |
| "train_tokens_per_second": 3206.418 | |
| }, | |
| { | |
| "epoch": 3.844410646387833, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 2.0004464725372763e-06, | |
| "loss": 1.6302, | |
| "num_input_tokens_seen": 828252160, | |
| "step": 6320, | |
| "train_runtime": 258382.2286, | |
| "train_tokens_per_second": 3205.531 | |
| }, | |
| { | |
| "epoch": 3.8504942965779465, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.8479867863195333e-06, | |
| "loss": 1.6333, | |
| "num_input_tokens_seen": 829562880, | |
| "step": 6330, | |
| "train_runtime": 258861.7851, | |
| "train_tokens_per_second": 3204.656 | |
| }, | |
| { | |
| "epoch": 3.856577946768061, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 1.701547809150522e-06, | |
| "loss": 1.6353, | |
| "num_input_tokens_seen": 830873600, | |
| "step": 6340, | |
| "train_runtime": 259339.8156, | |
| "train_tokens_per_second": 3203.803 | |
| }, | |
| { | |
| "epoch": 3.862661596958175, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.5611330939592494e-06, | |
| "loss": 1.6396, | |
| "num_input_tokens_seen": 832184320, | |
| "step": 6350, | |
| "train_runtime": 259817.9997, | |
| "train_tokens_per_second": 3202.951 | |
| }, | |
| { | |
| "epoch": 3.868745247148289, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 1.4267460475128614e-06, | |
| "loss": 1.6672, | |
| "num_input_tokens_seen": 833495040, | |
| "step": 6360, | |
| "train_runtime": 260295.7867, | |
| "train_tokens_per_second": 3202.107 | |
| }, | |
| { | |
| "epoch": 3.874828897338403, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.298389930334265e-06, | |
| "loss": 1.6369, | |
| "num_input_tokens_seen": 834805760, | |
| "step": 6370, | |
| "train_runtime": 260772.7931, | |
| "train_tokens_per_second": 3201.276 | |
| }, | |
| { | |
| "epoch": 3.880912547528517, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.1760678566227479e-06, | |
| "loss": 1.61, | |
| "num_input_tokens_seen": 836116480, | |
| "step": 6380, | |
| "train_runtime": 261249.3034, | |
| "train_tokens_per_second": 3200.454 | |
| }, | |
| { | |
| "epoch": 3.886996197718631, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 1.0597827941786486e-06, | |
| "loss": 1.6572, | |
| "num_input_tokens_seen": 837427200, | |
| "step": 6390, | |
| "train_runtime": 261725.5029, | |
| "train_tokens_per_second": 3199.639 | |
| }, | |
| { | |
| "epoch": 3.893079847908745, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 9.495375643311655e-07, | |
| "loss": 1.6225, | |
| "num_input_tokens_seen": 838737920, | |
| "step": 6400, | |
| "train_runtime": 262202.5532, | |
| "train_tokens_per_second": 3198.817 | |
| }, | |
| { | |
| "epoch": 3.899163498098859, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8.453348418700213e-07, | |
| "loss": 1.6146, | |
| "num_input_tokens_seen": 840048640, | |
| "step": 6410, | |
| "train_runtime": 262679.9257, | |
| "train_tokens_per_second": 3197.993 | |
| }, | |
| { | |
| "epoch": 3.9052471482889732, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 7.471771549805162e-07, | |
| "loss": 1.6213, | |
| "num_input_tokens_seen": 841359360, | |
| "step": 6420, | |
| "train_runtime": 263158.6243, | |
| "train_tokens_per_second": 3197.157 | |
| }, | |
| { | |
| "epoch": 3.9113307984790877, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 6.55066885182215e-07, | |
| "loss": 1.6522, | |
| "num_input_tokens_seen": 842670080, | |
| "step": 6430, | |
| "train_runtime": 263636.9111, | |
| "train_tokens_per_second": 3196.328 | |
| }, | |
| { | |
| "epoch": 3.9174144486692013, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 5.690062672711605e-07, | |
| "loss": 1.6368, | |
| "num_input_tokens_seen": 843980800, | |
| "step": 6440, | |
| "train_runtime": 264114.9361, | |
| "train_tokens_per_second": 3195.506 | |
| }, | |
| { | |
| "epoch": 3.9234980988593158, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 4.889973892656107e-07, | |
| "loss": 1.6258, | |
| "num_input_tokens_seen": 845291520, | |
| "step": 6450, | |
| "train_runtime": 264593.569, | |
| "train_tokens_per_second": 3194.679 | |
| }, | |
| { | |
| "epoch": 3.92958174904943, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 4.1504219235546923e-07, | |
| "loss": 1.6317, | |
| "num_input_tokens_seen": 846602240, | |
| "step": 6460, | |
| "train_runtime": 265072.6691, | |
| "train_tokens_per_second": 3193.85 | |
| }, | |
| { | |
| "epoch": 3.935665399239544, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 3.471424708551274e-07, | |
| "loss": 1.6442, | |
| "num_input_tokens_seen": 847912960, | |
| "step": 6470, | |
| "train_runtime": 265551.9975, | |
| "train_tokens_per_second": 3193.02 | |
| }, | |
| { | |
| "epoch": 3.941749049429658, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 2.8529987215986096e-07, | |
| "loss": 1.6448, | |
| "num_input_tokens_seen": 849223680, | |
| "step": 6480, | |
| "train_runtime": 266031.0263, | |
| "train_tokens_per_second": 3192.198 | |
| }, | |
| { | |
| "epoch": 3.947832699619772, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 2.295158967060007e-07, | |
| "loss": 1.6409, | |
| "num_input_tokens_seen": 850534400, | |
| "step": 6490, | |
| "train_runtime": 266511.6606, | |
| "train_tokens_per_second": 3191.359 | |
| }, | |
| { | |
| "epoch": 3.953916349809886, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 1.7979189793448925e-07, | |
| "loss": 1.6319, | |
| "num_input_tokens_seen": 851845120, | |
| "step": 6500, | |
| "train_runtime": 266991.1154, | |
| "train_tokens_per_second": 3190.537 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.3612908225796306e-07, | |
| "loss": 1.6569, | |
| "num_input_tokens_seen": 853155840, | |
| "step": 6510, | |
| "train_runtime": 267470.5957, | |
| "train_tokens_per_second": 3189.718 | |
| }, | |
| { | |
| "epoch": 3.966083650190114, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 9.852850903155353e-08, | |
| "loss": 1.6181, | |
| "num_input_tokens_seen": 854466560, | |
| "step": 6520, | |
| "train_runtime": 267949.2792, | |
| "train_tokens_per_second": 3188.912 | |
| }, | |
| { | |
| "epoch": 3.972167300380228, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 6.699109052715758e-08, | |
| "loss": 1.6236, | |
| "num_input_tokens_seen": 855777280, | |
| "step": 6530, | |
| "train_runtime": 268429.2552, | |
| "train_tokens_per_second": 3188.092 | |
| }, | |
| { | |
| "epoch": 3.978250950570342, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 4.151759191137194e-08, | |
| "loss": 1.6233, | |
| "num_input_tokens_seen": 857088000, | |
| "step": 6540, | |
| "train_runtime": 268908.9629, | |
| "train_tokens_per_second": 3187.279 | |
| }, | |
| { | |
| "epoch": 3.984334600760456, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 2.210863122678597e-08, | |
| "loss": 1.6551, | |
| "num_input_tokens_seen": 858398720, | |
| "step": 6550, | |
| "train_runtime": 269388.3681, | |
| "train_tokens_per_second": 3186.473 | |
| }, | |
| { | |
| "epoch": 3.9904182509505706, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8.764679377132324e-09, | |
| "loss": 1.6475, | |
| "num_input_tokens_seen": 859709440, | |
| "step": 6560, | |
| "train_runtime": 269866.3772, | |
| "train_tokens_per_second": 3185.686 | |
| }, | |
| { | |
| "epoch": 3.996501901140684, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.4860601157962173e-09, | |
| "loss": 1.6436, | |
| "num_input_tokens_seen": 861020160, | |
| "step": 6570, | |
| "train_runtime": 270344.7392, | |
| "train_tokens_per_second": 3184.897 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "num_input_tokens_seen": 861765632, | |
| "step": 6576, | |
| "total_flos": 1.457459776629139e+19, | |
| "train_loss": 2.710902440866995, | |
| "train_runtime": 270642.3692, | |
| "train_samples_per_second": 1.555, | |
| "train_steps_per_second": 0.024, | |
| "train_tokens_per_second": 3184.149 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 6576, | |
| "num_input_tokens_seen": 861765632, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.457459776629139e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |