| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1638, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030525030525030525, | |
| "grad_norm": 0.5091429778387513, | |
| "learning_rate": 4.999926430159223e-05, | |
| "loss": 0.2014, | |
| "num_input_tokens_seen": 749328, | |
| "step": 5, | |
| "train_runtime": 25.8738, | |
| "train_tokens_per_second": 28960.872 | |
| }, | |
| { | |
| "epoch": 0.006105006105006105, | |
| "grad_norm": 0.37153788985641606, | |
| "learning_rate": 4.999627560102124e-05, | |
| "loss": 0.1612, | |
| "num_input_tokens_seen": 1509456, | |
| "step": 10, | |
| "train_runtime": 50.2738, | |
| "train_tokens_per_second": 30024.699 | |
| }, | |
| { | |
| "epoch": 0.009157509157509158, | |
| "grad_norm": 0.32109722228510573, | |
| "learning_rate": 4.999098819177214e-05, | |
| "loss": 0.1526, | |
| "num_input_tokens_seen": 2259568, | |
| "step": 15, | |
| "train_runtime": 74.2458, | |
| "train_tokens_per_second": 30433.595 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 0.30203981898876364, | |
| "learning_rate": 4.998340256008591e-05, | |
| "loss": 0.1573, | |
| "num_input_tokens_seen": 2984432, | |
| "step": 20, | |
| "train_runtime": 97.4248, | |
| "train_tokens_per_second": 30633.183 | |
| }, | |
| { | |
| "epoch": 0.015262515262515262, | |
| "grad_norm": 0.2589325538124247, | |
| "learning_rate": 4.997351940355277e-05, | |
| "loss": 0.1597, | |
| "num_input_tokens_seen": 3738880, | |
| "step": 25, | |
| "train_runtime": 121.1802, | |
| "train_tokens_per_second": 30853.89 | |
| }, | |
| { | |
| "epoch": 0.018315018315018316, | |
| "grad_norm": 0.3009276209015333, | |
| "learning_rate": 4.9961339631048035e-05, | |
| "loss": 0.1463, | |
| "num_input_tokens_seen": 4472992, | |
| "step": 30, | |
| "train_runtime": 144.8751, | |
| "train_tokens_per_second": 30874.816 | |
| }, | |
| { | |
| "epoch": 0.021367521367521368, | |
| "grad_norm": 0.25926474513366904, | |
| "learning_rate": 4.9946864362648506e-05, | |
| "loss": 0.1452, | |
| "num_input_tokens_seen": 5238224, | |
| "step": 35, | |
| "train_runtime": 169.174, | |
| "train_tokens_per_second": 30963.524 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 0.27429735952269424, | |
| "learning_rate": 4.9930094929529506e-05, | |
| "loss": 0.1469, | |
| "num_input_tokens_seen": 5990176, | |
| "step": 40, | |
| "train_runtime": 193.1788, | |
| "train_tokens_per_second": 31008.453 | |
| }, | |
| { | |
| "epoch": 0.027472527472527472, | |
| "grad_norm": 0.27515156097109633, | |
| "learning_rate": 4.991103287384244e-05, | |
| "loss": 0.1412, | |
| "num_input_tokens_seen": 6755296, | |
| "step": 45, | |
| "train_runtime": 217.8417, | |
| "train_tokens_per_second": 31010.118 | |
| }, | |
| { | |
| "epoch": 0.030525030525030524, | |
| "grad_norm": 0.2614218428872808, | |
| "learning_rate": 4.9889679948572974e-05, | |
| "loss": 0.1573, | |
| "num_input_tokens_seen": 7500160, | |
| "step": 50, | |
| "train_runtime": 242.0828, | |
| "train_tokens_per_second": 30981.798 | |
| }, | |
| { | |
| "epoch": 0.033577533577533576, | |
| "grad_norm": 0.2896795630264156, | |
| "learning_rate": 4.9866038117379824e-05, | |
| "loss": 0.1423, | |
| "num_input_tokens_seen": 8243712, | |
| "step": 55, | |
| "train_runtime": 265.7427, | |
| "train_tokens_per_second": 31021.404 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.26033736377204597, | |
| "learning_rate": 4.984010955441418e-05, | |
| "loss": 0.1419, | |
| "num_input_tokens_seen": 8997632, | |
| "step": 60, | |
| "train_runtime": 289.3728, | |
| "train_tokens_per_second": 31093.57 | |
| }, | |
| { | |
| "epoch": 0.03968253968253968, | |
| "grad_norm": 0.24860558418494275, | |
| "learning_rate": 4.981189664411981e-05, | |
| "loss": 0.1515, | |
| "num_input_tokens_seen": 9749984, | |
| "step": 65, | |
| "train_runtime": 313.138, | |
| "train_tokens_per_second": 31136.379 | |
| }, | |
| { | |
| "epoch": 0.042735042735042736, | |
| "grad_norm": 0.25656696592553796, | |
| "learning_rate": 4.978140198101366e-05, | |
| "loss": 0.1436, | |
| "num_input_tokens_seen": 10486624, | |
| "step": 70, | |
| "train_runtime": 337.1059, | |
| "train_tokens_per_second": 31107.803 | |
| }, | |
| { | |
| "epoch": 0.045787545787545784, | |
| "grad_norm": 0.25476921571577515, | |
| "learning_rate": 4.97486283694474e-05, | |
| "loss": 0.1403, | |
| "num_input_tokens_seen": 11247040, | |
| "step": 75, | |
| "train_runtime": 360.7995, | |
| "train_tokens_per_second": 31172.547 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 0.26405759747056734, | |
| "learning_rate": 4.9713578823349456e-05, | |
| "loss": 0.141, | |
| "num_input_tokens_seen": 11981936, | |
| "step": 80, | |
| "train_runtime": 384.4122, | |
| "train_tokens_per_second": 31169.5 | |
| }, | |
| { | |
| "epoch": 0.051892551892551896, | |
| "grad_norm": 0.26526559880084205, | |
| "learning_rate": 4.967625656594782e-05, | |
| "loss": 0.1462, | |
| "num_input_tokens_seen": 12711360, | |
| "step": 85, | |
| "train_runtime": 407.7426, | |
| "train_tokens_per_second": 31174.963 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.25978925421475424, | |
| "learning_rate": 4.9636665029473714e-05, | |
| "loss": 0.1359, | |
| "num_input_tokens_seen": 13457024, | |
| "step": 90, | |
| "train_runtime": 431.5263, | |
| "train_tokens_per_second": 31184.714 | |
| }, | |
| { | |
| "epoch": 0.057997557997558, | |
| "grad_norm": 0.24691016702106858, | |
| "learning_rate": 4.959480785484587e-05, | |
| "loss": 0.1376, | |
| "num_input_tokens_seen": 14221888, | |
| "step": 95, | |
| "train_runtime": 455.0506, | |
| "train_tokens_per_second": 31253.424 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 0.27050249577811053, | |
| "learning_rate": 4.955068889133576e-05, | |
| "loss": 0.1423, | |
| "num_input_tokens_seen": 14949152, | |
| "step": 100, | |
| "train_runtime": 478.9409, | |
| "train_tokens_per_second": 31212.939 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 0.2440274532919512, | |
| "learning_rate": 4.9504312196213596e-05, | |
| "loss": 0.141, | |
| "num_input_tokens_seen": 15702960, | |
| "step": 105, | |
| "train_runtime": 502.5202, | |
| "train_tokens_per_second": 31248.414 | |
| }, | |
| { | |
| "epoch": 0.06715506715506715, | |
| "grad_norm": 0.23245091024854858, | |
| "learning_rate": 4.945568203437521e-05, | |
| "loss": 0.135, | |
| "num_input_tokens_seen": 16451632, | |
| "step": 110, | |
| "train_runtime": 526.9505, | |
| "train_tokens_per_second": 31220.451 | |
| }, | |
| { | |
| "epoch": 0.07020757020757021, | |
| "grad_norm": 0.24753876184592694, | |
| "learning_rate": 4.9404802877949843e-05, | |
| "loss": 0.1326, | |
| "num_input_tokens_seen": 17212336, | |
| "step": 115, | |
| "train_runtime": 551.5575, | |
| "train_tokens_per_second": 31206.784 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.2414155399744996, | |
| "learning_rate": 4.935167940588887e-05, | |
| "loss": 0.1394, | |
| "num_input_tokens_seen": 17970192, | |
| "step": 120, | |
| "train_runtime": 575.0237, | |
| "train_tokens_per_second": 31251.218 | |
| }, | |
| { | |
| "epoch": 0.07631257631257632, | |
| "grad_norm": 0.22372397383209364, | |
| "learning_rate": 4.929631650353555e-05, | |
| "loss": 0.1322, | |
| "num_input_tokens_seen": 18747280, | |
| "step": 125, | |
| "train_runtime": 599.4096, | |
| "train_tokens_per_second": 31276.244 | |
| }, | |
| { | |
| "epoch": 0.07936507936507936, | |
| "grad_norm": 0.2373700099696729, | |
| "learning_rate": 4.9238719262175724e-05, | |
| "loss": 0.1425, | |
| "num_input_tokens_seen": 19506368, | |
| "step": 130, | |
| "train_runtime": 623.9095, | |
| "train_tokens_per_second": 31264.742 | |
| }, | |
| { | |
| "epoch": 0.08241758241758242, | |
| "grad_norm": 0.21599296160712622, | |
| "learning_rate": 4.9178892978569625e-05, | |
| "loss": 0.1383, | |
| "num_input_tokens_seen": 20258064, | |
| "step": 135, | |
| "train_runtime": 648.1332, | |
| "train_tokens_per_second": 31256.021 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 0.2912031364564355, | |
| "learning_rate": 4.911684315446477e-05, | |
| "loss": 0.1354, | |
| "num_input_tokens_seen": 21009984, | |
| "step": 140, | |
| "train_runtime": 672.8293, | |
| "train_tokens_per_second": 31226.32 | |
| }, | |
| { | |
| "epoch": 0.08852258852258853, | |
| "grad_norm": 0.249251976312198, | |
| "learning_rate": 4.9052575496090016e-05, | |
| "loss": 0.1385, | |
| "num_input_tokens_seen": 21749312, | |
| "step": 145, | |
| "train_runtime": 696.591, | |
| "train_tokens_per_second": 31222.497 | |
| }, | |
| { | |
| "epoch": 0.09157509157509157, | |
| "grad_norm": 0.24591736710120005, | |
| "learning_rate": 4.8986095913630806e-05, | |
| "loss": 0.1353, | |
| "num_input_tokens_seen": 22476624, | |
| "step": 150, | |
| "train_runtime": 719.465, | |
| "train_tokens_per_second": 31240.748 | |
| }, | |
| { | |
| "epoch": 0.09462759462759462, | |
| "grad_norm": 0.21998725587064735, | |
| "learning_rate": 4.8917410520685635e-05, | |
| "loss": 0.1255, | |
| "num_input_tokens_seen": 23237296, | |
| "step": 155, | |
| "train_runtime": 743.4105, | |
| "train_tokens_per_second": 31257.692 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 0.25821879440087686, | |
| "learning_rate": 4.884652563370385e-05, | |
| "loss": 0.1386, | |
| "num_input_tokens_seen": 23955088, | |
| "step": 160, | |
| "train_runtime": 766.316, | |
| "train_tokens_per_second": 31260.065 | |
| }, | |
| { | |
| "epoch": 0.10073260073260074, | |
| "grad_norm": 0.23439028302996529, | |
| "learning_rate": 4.87734477714048e-05, | |
| "loss": 0.1434, | |
| "num_input_tokens_seen": 24708720, | |
| "step": 165, | |
| "train_runtime": 790.2737, | |
| "train_tokens_per_second": 31266.03 | |
| }, | |
| { | |
| "epoch": 0.10378510378510379, | |
| "grad_norm": 0.2793662467655784, | |
| "learning_rate": 4.86981836541783e-05, | |
| "loss": 0.133, | |
| "num_input_tokens_seen": 25432544, | |
| "step": 170, | |
| "train_runtime": 813.7349, | |
| "train_tokens_per_second": 31254.092 | |
| }, | |
| { | |
| "epoch": 0.10683760683760683, | |
| "grad_norm": 0.25653181332891495, | |
| "learning_rate": 4.862074020346664e-05, | |
| "loss": 0.1383, | |
| "num_input_tokens_seen": 26206496, | |
| "step": 175, | |
| "train_runtime": 837.2491, | |
| "train_tokens_per_second": 31300.716 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.2248694154567311, | |
| "learning_rate": 4.854112454112811e-05, | |
| "loss": 0.1417, | |
| "num_input_tokens_seen": 26925872, | |
| "step": 180, | |
| "train_runtime": 860.8597, | |
| "train_tokens_per_second": 31277.886 | |
| }, | |
| { | |
| "epoch": 0.11294261294261294, | |
| "grad_norm": 0.248643467694514, | |
| "learning_rate": 4.845934398878202e-05, | |
| "loss": 0.1387, | |
| "num_input_tokens_seen": 27669680, | |
| "step": 185, | |
| "train_runtime": 884.7778, | |
| "train_tokens_per_second": 31273.028 | |
| }, | |
| { | |
| "epoch": 0.115995115995116, | |
| "grad_norm": 0.24260342645126912, | |
| "learning_rate": 4.837540606713538e-05, | |
| "loss": 0.142, | |
| "num_input_tokens_seen": 28423664, | |
| "step": 190, | |
| "train_runtime": 908.6487, | |
| "train_tokens_per_second": 31281.246 | |
| }, | |
| { | |
| "epoch": 0.11904761904761904, | |
| "grad_norm": 0.28207740110466256, | |
| "learning_rate": 4.828931849529129e-05, | |
| "loss": 0.1442, | |
| "num_input_tokens_seen": 29159680, | |
| "step": 195, | |
| "train_runtime": 931.8189, | |
| "train_tokens_per_second": 31293.292 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 0.2697921906800307, | |
| "learning_rate": 4.820108919003913e-05, | |
| "loss": 0.1449, | |
| "num_input_tokens_seen": 29878656, | |
| "step": 200, | |
| "train_runtime": 955.5111, | |
| "train_tokens_per_second": 31269.815 | |
| }, | |
| { | |
| "epoch": 0.12515262515262515, | |
| "grad_norm": 0.2430879959353569, | |
| "learning_rate": 4.811072626512642e-05, | |
| "loss": 0.1355, | |
| "num_input_tokens_seen": 30615968, | |
| "step": 205, | |
| "train_runtime": 979.7609, | |
| "train_tokens_per_second": 31248.407 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.20885779090171266, | |
| "learning_rate": 4.801823803051274e-05, | |
| "loss": 0.1294, | |
| "num_input_tokens_seen": 31365168, | |
| "step": 210, | |
| "train_runtime": 1003.7274, | |
| "train_tokens_per_second": 31248.692 | |
| }, | |
| { | |
| "epoch": 0.13125763125763126, | |
| "grad_norm": 0.23175722091194897, | |
| "learning_rate": 4.79236329916055e-05, | |
| "loss": 0.1458, | |
| "num_input_tokens_seen": 32138352, | |
| "step": 215, | |
| "train_runtime": 1028.345, | |
| "train_tokens_per_second": 31252.499 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 0.2494655624233967, | |
| "learning_rate": 4.782691984847773e-05, | |
| "loss": 0.1282, | |
| "num_input_tokens_seen": 32869952, | |
| "step": 220, | |
| "train_runtime": 1051.8138, | |
| "train_tokens_per_second": 31250.732 | |
| }, | |
| { | |
| "epoch": 0.13736263736263737, | |
| "grad_norm": 0.24578709797217577, | |
| "learning_rate": 4.77281074950681e-05, | |
| "loss": 0.1333, | |
| "num_input_tokens_seen": 33632032, | |
| "step": 225, | |
| "train_runtime": 1076.5664, | |
| "train_tokens_per_second": 31240.09 | |
| }, | |
| { | |
| "epoch": 0.14041514041514042, | |
| "grad_norm": 0.21699309117189078, | |
| "learning_rate": 4.76272050183629e-05, | |
| "loss": 0.1418, | |
| "num_input_tokens_seen": 34365776, | |
| "step": 230, | |
| "train_runtime": 1099.9303, | |
| "train_tokens_per_second": 31243.595 | |
| }, | |
| { | |
| "epoch": 0.14346764346764346, | |
| "grad_norm": 0.2733740667028303, | |
| "learning_rate": 4.752422169756048e-05, | |
| "loss": 0.1361, | |
| "num_input_tokens_seen": 35108640, | |
| "step": 235, | |
| "train_runtime": 1123.4729, | |
| "train_tokens_per_second": 31250.099 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.2758317817941706, | |
| "learning_rate": 4.741916700321785e-05, | |
| "loss": 0.1318, | |
| "num_input_tokens_seen": 35865600, | |
| "step": 240, | |
| "train_runtime": 1147.1077, | |
| "train_tokens_per_second": 31266.115 | |
| }, | |
| { | |
| "epoch": 0.14957264957264957, | |
| "grad_norm": 0.24922734918178294, | |
| "learning_rate": 4.7312050596379764e-05, | |
| "loss": 0.149, | |
| "num_input_tokens_seen": 36614272, | |
| "step": 245, | |
| "train_runtime": 1171.2783, | |
| "train_tokens_per_second": 31260.095 | |
| }, | |
| { | |
| "epoch": 0.15262515262515264, | |
| "grad_norm": 0.2727897024232562, | |
| "learning_rate": 4.7202882327690314e-05, | |
| "loss": 0.1333, | |
| "num_input_tokens_seen": 37340528, | |
| "step": 250, | |
| "train_runtime": 1194.4436, | |
| "train_tokens_per_second": 31261.859 | |
| }, | |
| { | |
| "epoch": 0.15567765567765568, | |
| "grad_norm": 0.22569055835795562, | |
| "learning_rate": 4.709167223648695e-05, | |
| "loss": 0.1315, | |
| "num_input_tokens_seen": 38075840, | |
| "step": 255, | |
| "train_runtime": 1217.917, | |
| "train_tokens_per_second": 31263.082 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 0.26029689401583483, | |
| "learning_rate": 4.697843054987737e-05, | |
| "loss": 0.1372, | |
| "num_input_tokens_seen": 38817888, | |
| "step": 260, | |
| "train_runtime": 1241.4176, | |
| "train_tokens_per_second": 31269.0 | |
| }, | |
| { | |
| "epoch": 0.1617826617826618, | |
| "grad_norm": 0.22643316824690848, | |
| "learning_rate": 4.686316768179889e-05, | |
| "loss": 0.1436, | |
| "num_input_tokens_seen": 39556304, | |
| "step": 265, | |
| "train_runtime": 1264.7839, | |
| "train_tokens_per_second": 31275.147 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.2245608456027274, | |
| "learning_rate": 4.674589423206083e-05, | |
| "loss": 0.1326, | |
| "num_input_tokens_seen": 40299952, | |
| "step": 270, | |
| "train_runtime": 1288.778, | |
| "train_tokens_per_second": 31269.895 | |
| }, | |
| { | |
| "epoch": 0.16788766788766787, | |
| "grad_norm": 0.28640791945271393, | |
| "learning_rate": 4.6626620985369724e-05, | |
| "loss": 0.1395, | |
| "num_input_tokens_seen": 41042560, | |
| "step": 275, | |
| "train_runtime": 1312.5624, | |
| "train_tokens_per_second": 31269.035 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 0.24136380215038572, | |
| "learning_rate": 4.650535891033752e-05, | |
| "loss": 0.1381, | |
| "num_input_tokens_seen": 41779920, | |
| "step": 280, | |
| "train_runtime": 1335.7411, | |
| "train_tokens_per_second": 31278.457 | |
| }, | |
| { | |
| "epoch": 0.17399267399267399, | |
| "grad_norm": 0.23490077378170912, | |
| "learning_rate": 4.6382119158472895e-05, | |
| "loss": 0.1359, | |
| "num_input_tokens_seen": 42537040, | |
| "step": 285, | |
| "train_runtime": 1360.6058, | |
| "train_tokens_per_second": 31263.309 | |
| }, | |
| { | |
| "epoch": 0.17704517704517705, | |
| "grad_norm": 0.2499204534251018, | |
| "learning_rate": 4.625691306315572e-05, | |
| "loss": 0.1309, | |
| "num_input_tokens_seen": 43279440, | |
| "step": 290, | |
| "train_runtime": 1384.6488, | |
| "train_tokens_per_second": 31256.619 | |
| }, | |
| { | |
| "epoch": 0.1800976800976801, | |
| "grad_norm": 0.24434225661817544, | |
| "learning_rate": 4.6129752138594874e-05, | |
| "loss": 0.134, | |
| "num_input_tokens_seen": 43999504, | |
| "step": 295, | |
| "train_runtime": 1408.0185, | |
| "train_tokens_per_second": 31249.237 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.22296781686606312, | |
| "learning_rate": 4.600064807876929e-05, | |
| "loss": 0.1272, | |
| "num_input_tokens_seen": 44745040, | |
| "step": 300, | |
| "train_runtime": 1431.9681, | |
| "train_tokens_per_second": 31247.232 | |
| }, | |
| { | |
| "epoch": 0.1862026862026862, | |
| "grad_norm": 0.2490768586972167, | |
| "learning_rate": 4.586961275635263e-05, | |
| "loss": 0.1294, | |
| "num_input_tokens_seen": 45495264, | |
| "step": 305, | |
| "train_runtime": 1455.8244, | |
| "train_tokens_per_second": 31250.516 | |
| }, | |
| { | |
| "epoch": 0.18925518925518925, | |
| "grad_norm": 0.23248133635067036, | |
| "learning_rate": 4.57366582216214e-05, | |
| "loss": 0.1363, | |
| "num_input_tokens_seen": 46249280, | |
| "step": 310, | |
| "train_runtime": 1479.5306, | |
| "train_tokens_per_second": 31259.427 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.21738235792627855, | |
| "learning_rate": 4.560179670134681e-05, | |
| "loss": 0.1319, | |
| "num_input_tokens_seen": 46983664, | |
| "step": 315, | |
| "train_runtime": 1503.5606, | |
| "train_tokens_per_second": 31248.268 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 0.22387346751098233, | |
| "learning_rate": 4.546504059767035e-05, | |
| "loss": 0.1284, | |
| "num_input_tokens_seen": 47753184, | |
| "step": 320, | |
| "train_runtime": 1527.8348, | |
| "train_tokens_per_second": 31255.462 | |
| }, | |
| { | |
| "epoch": 0.1984126984126984, | |
| "grad_norm": 0.28365615607427336, | |
| "learning_rate": 4.532640248696331e-05, | |
| "loss": 0.1317, | |
| "num_input_tokens_seen": 48503792, | |
| "step": 325, | |
| "train_runtime": 1551.7588, | |
| "train_tokens_per_second": 31257.302 | |
| }, | |
| { | |
| "epoch": 0.20146520146520147, | |
| "grad_norm": 0.2289878454689289, | |
| "learning_rate": 4.518589511867017e-05, | |
| "loss": 0.1371, | |
| "num_input_tokens_seen": 49255984, | |
| "step": 330, | |
| "train_runtime": 1575.8405, | |
| "train_tokens_per_second": 31256.96 | |
| }, | |
| { | |
| "epoch": 0.2045177045177045, | |
| "grad_norm": 0.27466494725952967, | |
| "learning_rate": 4.504353141413616e-05, | |
| "loss": 0.141, | |
| "num_input_tokens_seen": 49998832, | |
| "step": 335, | |
| "train_runtime": 1599.0021, | |
| "train_tokens_per_second": 31268.771 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 0.20943164116621985, | |
| "learning_rate": 4.4899324465419036e-05, | |
| "loss": 0.135, | |
| "num_input_tokens_seen": 50749456, | |
| "step": 340, | |
| "train_runtime": 1623.1812, | |
| "train_tokens_per_second": 31265.429 | |
| }, | |
| { | |
| "epoch": 0.21062271062271062, | |
| "grad_norm": 0.2750155646844687, | |
| "learning_rate": 4.475328753408499e-05, | |
| "loss": 0.1311, | |
| "num_input_tokens_seen": 51499824, | |
| "step": 345, | |
| "train_runtime": 1647.3605, | |
| "train_tokens_per_second": 31262.025 | |
| }, | |
| { | |
| "epoch": 0.21367521367521367, | |
| "grad_norm": 0.24180273413605358, | |
| "learning_rate": 4.460543404998924e-05, | |
| "loss": 0.1308, | |
| "num_input_tokens_seen": 52242768, | |
| "step": 350, | |
| "train_runtime": 1671.6663, | |
| "train_tokens_per_second": 31251.913 | |
| }, | |
| { | |
| "epoch": 0.21672771672771673, | |
| "grad_norm": 0.25172416850678214, | |
| "learning_rate": 4.4455777610040846e-05, | |
| "loss": 0.1277, | |
| "num_input_tokens_seen": 52986432, | |
| "step": 355, | |
| "train_runtime": 1695.1183, | |
| "train_tokens_per_second": 31258.251 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.3464432362591003, | |
| "learning_rate": 4.4304331976952426e-05, | |
| "loss": 0.1446, | |
| "num_input_tokens_seen": 53716960, | |
| "step": 360, | |
| "train_runtime": 1718.7359, | |
| "train_tokens_per_second": 31253.761 | |
| }, | |
| { | |
| "epoch": 0.22283272283272285, | |
| "grad_norm": 0.21545381131189395, | |
| "learning_rate": 4.415111107797445e-05, | |
| "loss": 0.1321, | |
| "num_input_tokens_seen": 54467760, | |
| "step": 365, | |
| "train_runtime": 1742.4062, | |
| "train_tokens_per_second": 31260.082 | |
| }, | |
| { | |
| "epoch": 0.2258852258852259, | |
| "grad_norm": 0.21665890149832603, | |
| "learning_rate": 4.3996129003614476e-05, | |
| "loss": 0.1336, | |
| "num_input_tokens_seen": 55205056, | |
| "step": 370, | |
| "train_runtime": 1766.4818, | |
| "train_tokens_per_second": 31251.415 | |
| }, | |
| { | |
| "epoch": 0.22893772893772893, | |
| "grad_norm": 0.2089557199469907, | |
| "learning_rate": 4.3839400006341335e-05, | |
| "loss": 0.1242, | |
| "num_input_tokens_seen": 55926512, | |
| "step": 375, | |
| "train_runtime": 1789.3591, | |
| "train_tokens_per_second": 31255.052 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 0.2302385481226358, | |
| "learning_rate": 4.3680938499274485e-05, | |
| "loss": 0.13, | |
| "num_input_tokens_seen": 56649424, | |
| "step": 380, | |
| "train_runtime": 1812.8285, | |
| "train_tokens_per_second": 31249.191 | |
| }, | |
| { | |
| "epoch": 0.23504273504273504, | |
| "grad_norm": 0.21290493618363043, | |
| "learning_rate": 4.352075905485854e-05, | |
| "loss": 0.1343, | |
| "num_input_tokens_seen": 57414592, | |
| "step": 385, | |
| "train_runtime": 1836.6218, | |
| "train_tokens_per_second": 31260.978 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 0.23528778271395331, | |
| "learning_rate": 4.335887640352312e-05, | |
| "loss": 0.1317, | |
| "num_input_tokens_seen": 58141200, | |
| "step": 390, | |
| "train_runtime": 1860.0856, | |
| "train_tokens_per_second": 31257.271 | |
| }, | |
| { | |
| "epoch": 0.24114774114774115, | |
| "grad_norm": 0.20144377799183244, | |
| "learning_rate": 4.319530543232827e-05, | |
| "loss": 0.1232, | |
| "num_input_tokens_seen": 58893360, | |
| "step": 395, | |
| "train_runtime": 1884.0294, | |
| "train_tokens_per_second": 31259.258 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 0.19320014877583194, | |
| "learning_rate": 4.303006118359537e-05, | |
| "loss": 0.1264, | |
| "num_input_tokens_seen": 59629152, | |
| "step": 400, | |
| "train_runtime": 1907.2548, | |
| "train_tokens_per_second": 31264.388 | |
| }, | |
| { | |
| "epoch": 0.24725274725274726, | |
| "grad_norm": 0.21784261752278658, | |
| "learning_rate": 4.286315885352382e-05, | |
| "loss": 0.128, | |
| "num_input_tokens_seen": 60368640, | |
| "step": 405, | |
| "train_runtime": 1930.8481, | |
| "train_tokens_per_second": 31265.35 | |
| }, | |
| { | |
| "epoch": 0.2503052503052503, | |
| "grad_norm": 0.24941877414236435, | |
| "learning_rate": 4.2694613790793604e-05, | |
| "loss": 0.1287, | |
| "num_input_tokens_seen": 61126592, | |
| "step": 410, | |
| "train_runtime": 1955.0184, | |
| "train_tokens_per_second": 31266.505 | |
| }, | |
| { | |
| "epoch": 0.25335775335775335, | |
| "grad_norm": 0.23538658703077275, | |
| "learning_rate": 4.252444149515374e-05, | |
| "loss": 0.1251, | |
| "num_input_tokens_seen": 61847552, | |
| "step": 415, | |
| "train_runtime": 1978.373, | |
| "train_tokens_per_second": 31261.825 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.2395535225862291, | |
| "learning_rate": 4.235265761599691e-05, | |
| "loss": 0.1295, | |
| "num_input_tokens_seen": 62599056, | |
| "step": 420, | |
| "train_runtime": 2002.1678, | |
| "train_tokens_per_second": 31265.639 | |
| }, | |
| { | |
| "epoch": 0.2594627594627595, | |
| "grad_norm": 0.22036167615999408, | |
| "learning_rate": 4.217927795092034e-05, | |
| "loss": 0.1294, | |
| "num_input_tokens_seen": 63343712, | |
| "step": 425, | |
| "train_runtime": 2026.5077, | |
| "train_tokens_per_second": 31257.573 | |
| }, | |
| { | |
| "epoch": 0.2625152625152625, | |
| "grad_norm": 0.18982203120111718, | |
| "learning_rate": 4.2004318444272985e-05, | |
| "loss": 0.1249, | |
| "num_input_tokens_seen": 64088256, | |
| "step": 430, | |
| "train_runtime": 2050.3985, | |
| "train_tokens_per_second": 31256.489 | |
| }, | |
| { | |
| "epoch": 0.26556776556776557, | |
| "grad_norm": 0.31205764274040426, | |
| "learning_rate": 4.182779518568926e-05, | |
| "loss": 0.1292, | |
| "num_input_tokens_seen": 64832512, | |
| "step": 435, | |
| "train_runtime": 2074.3468, | |
| "train_tokens_per_second": 31254.423 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 0.25298567616378814, | |
| "learning_rate": 4.1649724408609406e-05, | |
| "loss": 0.1276, | |
| "num_input_tokens_seen": 65576640, | |
| "step": 440, | |
| "train_runtime": 2097.764, | |
| "train_tokens_per_second": 31260.256 | |
| }, | |
| { | |
| "epoch": 0.27167277167277165, | |
| "grad_norm": 0.26554637254819285, | |
| "learning_rate": 4.1470122488786645e-05, | |
| "loss": 0.1225, | |
| "num_input_tokens_seen": 66320960, | |
| "step": 445, | |
| "train_runtime": 2121.4747, | |
| "train_tokens_per_second": 31261.726 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.2462921125528256, | |
| "learning_rate": 4.128900594278122e-05, | |
| "loss": 0.1324, | |
| "num_input_tokens_seen": 67041040, | |
| "step": 450, | |
| "train_runtime": 2144.8539, | |
| "train_tokens_per_second": 31256.692 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 0.2388680393149134, | |
| "learning_rate": 4.110639142644149e-05, | |
| "loss": 0.1277, | |
| "num_input_tokens_seen": 67784672, | |
| "step": 455, | |
| "train_runtime": 2168.8036, | |
| "train_tokens_per_second": 31254.408 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 0.2139224871525343, | |
| "learning_rate": 4.092229573337223e-05, | |
| "loss": 0.1276, | |
| "num_input_tokens_seen": 68528416, | |
| "step": 460, | |
| "train_runtime": 2192.4013, | |
| "train_tokens_per_second": 31257.241 | |
| }, | |
| { | |
| "epoch": 0.2838827838827839, | |
| "grad_norm": 0.22358130066172044, | |
| "learning_rate": 4.073673579339028e-05, | |
| "loss": 0.1324, | |
| "num_input_tokens_seen": 69293616, | |
| "step": 465, | |
| "train_runtime": 2217.3127, | |
| "train_tokens_per_second": 31251.169 | |
| }, | |
| { | |
| "epoch": 0.2869352869352869, | |
| "grad_norm": 0.18419433578824992, | |
| "learning_rate": 4.05497286709676e-05, | |
| "loss": 0.1333, | |
| "num_input_tokens_seen": 70026160, | |
| "step": 470, | |
| "train_runtime": 2241.1152, | |
| "train_tokens_per_second": 31246.123 | |
| }, | |
| { | |
| "epoch": 0.28998778998779, | |
| "grad_norm": 0.2173257720524292, | |
| "learning_rate": 4.036129156366203e-05, | |
| "loss": 0.1371, | |
| "num_input_tokens_seen": 70771216, | |
| "step": 475, | |
| "train_runtime": 2264.8588, | |
| "train_tokens_per_second": 31247.517 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.2197726158732718, | |
| "learning_rate": 4.017144180053572e-05, | |
| "loss": 0.1292, | |
| "num_input_tokens_seen": 71516128, | |
| "step": 480, | |
| "train_runtime": 2289.2943, | |
| "train_tokens_per_second": 31239.377 | |
| }, | |
| { | |
| "epoch": 0.2960927960927961, | |
| "grad_norm": 0.2935811926755833, | |
| "learning_rate": 3.998019684056158e-05, | |
| "loss": 0.1291, | |
| "num_input_tokens_seen": 72262192, | |
| "step": 485, | |
| "train_runtime": 2313.0088, | |
| "train_tokens_per_second": 31241.642 | |
| }, | |
| { | |
| "epoch": 0.29914529914529914, | |
| "grad_norm": 0.19680666241660333, | |
| "learning_rate": 3.978757427101764e-05, | |
| "loss": 0.1236, | |
| "num_input_tokens_seen": 73028336, | |
| "step": 490, | |
| "train_runtime": 2337.0883, | |
| "train_tokens_per_second": 31247.573 | |
| }, | |
| { | |
| "epoch": 0.3021978021978022, | |
| "grad_norm": 0.21991876746308214, | |
| "learning_rate": 3.959359180586975e-05, | |
| "loss": 0.116, | |
| "num_input_tokens_seen": 73793216, | |
| "step": 495, | |
| "train_runtime": 2361.731, | |
| "train_tokens_per_second": 31245.394 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 0.20934981167550853, | |
| "learning_rate": 3.939826728414254e-05, | |
| "loss": 0.1284, | |
| "num_input_tokens_seen": 74526624, | |
| "step": 500, | |
| "train_runtime": 2384.976, | |
| "train_tokens_per_second": 31248.375 | |
| }, | |
| { | |
| "epoch": 0.3083028083028083, | |
| "grad_norm": 0.1988331167025918, | |
| "learning_rate": 3.920161866827889e-05, | |
| "loss": 0.1194, | |
| "num_input_tokens_seen": 75239984, | |
| "step": 505, | |
| "train_runtime": 2407.9151, | |
| "train_tokens_per_second": 31246.942 | |
| }, | |
| { | |
| "epoch": 0.31135531135531136, | |
| "grad_norm": 0.25378650307905176, | |
| "learning_rate": 3.9003664042488144e-05, | |
| "loss": 0.1303, | |
| "num_input_tokens_seen": 75969680, | |
| "step": 510, | |
| "train_runtime": 2431.9622, | |
| "train_tokens_per_second": 31238.018 | |
| }, | |
| { | |
| "epoch": 0.3144078144078144, | |
| "grad_norm": 0.22021670338613852, | |
| "learning_rate": 3.8804421611082916e-05, | |
| "loss": 0.128, | |
| "num_input_tokens_seen": 76699136, | |
| "step": 515, | |
| "train_runtime": 2455.9626, | |
| "train_tokens_per_second": 31229.766 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 0.21715876384422564, | |
| "learning_rate": 3.8603909696805104e-05, | |
| "loss": 0.1186, | |
| "num_input_tokens_seen": 77431088, | |
| "step": 520, | |
| "train_runtime": 2479.5911, | |
| "train_tokens_per_second": 31227.361 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.21687092030874366, | |
| "learning_rate": 3.8402146739140874e-05, | |
| "loss": 0.1277, | |
| "num_input_tokens_seen": 78191456, | |
| "step": 525, | |
| "train_runtime": 2503.8314, | |
| "train_tokens_per_second": 31228.723 | |
| }, | |
| { | |
| "epoch": 0.3235653235653236, | |
| "grad_norm": 0.22920755237983448, | |
| "learning_rate": 3.819915129262484e-05, | |
| "loss": 0.1289, | |
| "num_input_tokens_seen": 78920656, | |
| "step": 530, | |
| "train_runtime": 2527.6636, | |
| "train_tokens_per_second": 31222.769 | |
| }, | |
| { | |
| "epoch": 0.3266178266178266, | |
| "grad_norm": 0.20468673040463367, | |
| "learning_rate": 3.799494202513386e-05, | |
| "loss": 0.1259, | |
| "num_input_tokens_seen": 79659328, | |
| "step": 535, | |
| "train_runtime": 2550.9059, | |
| "train_tokens_per_second": 31227.858 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.21487648312333246, | |
| "learning_rate": 3.7789537716170256e-05, | |
| "loss": 0.1245, | |
| "num_input_tokens_seen": 80403760, | |
| "step": 540, | |
| "train_runtime": 2574.6742, | |
| "train_tokens_per_second": 31228.713 | |
| }, | |
| { | |
| "epoch": 0.3327228327228327, | |
| "grad_norm": 0.23959027355174933, | |
| "learning_rate": 3.7582957255134765e-05, | |
| "loss": 0.1237, | |
| "num_input_tokens_seen": 81145952, | |
| "step": 545, | |
| "train_runtime": 2598.2472, | |
| "train_tokens_per_second": 31231.037 | |
| }, | |
| { | |
| "epoch": 0.33577533577533575, | |
| "grad_norm": 0.20886983292243563, | |
| "learning_rate": 3.7375219639589536e-05, | |
| "loss": 0.1275, | |
| "num_input_tokens_seen": 81894240, | |
| "step": 550, | |
| "train_runtime": 2622.4152, | |
| "train_tokens_per_second": 31228.556 | |
| }, | |
| { | |
| "epoch": 0.33882783882783885, | |
| "grad_norm": 0.24142531223687672, | |
| "learning_rate": 3.716634397351097e-05, | |
| "loss": 0.1325, | |
| "num_input_tokens_seen": 82629360, | |
| "step": 555, | |
| "train_runtime": 2645.5546, | |
| "train_tokens_per_second": 31233.285 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 0.20768412269548192, | |
| "learning_rate": 3.695634946553296e-05, | |
| "loss": 0.1262, | |
| "num_input_tokens_seen": 83382976, | |
| "step": 560, | |
| "train_runtime": 2669.8972, | |
| "train_tokens_per_second": 31230.781 | |
| }, | |
| { | |
| "epoch": 0.34493284493284493, | |
| "grad_norm": 0.19630996215409985, | |
| "learning_rate": 3.674525542718035e-05, | |
| "loss": 0.1341, | |
| "num_input_tokens_seen": 84124464, | |
| "step": 565, | |
| "train_runtime": 2693.4313, | |
| "train_tokens_per_second": 31233.194 | |
| }, | |
| { | |
| "epoch": 0.34798534798534797, | |
| "grad_norm": 0.19874056348871574, | |
| "learning_rate": 3.653308127109309e-05, | |
| "loss": 0.1253, | |
| "num_input_tokens_seen": 84863584, | |
| "step": 570, | |
| "train_runtime": 2717.3609, | |
| "train_tokens_per_second": 31230.149 | |
| }, | |
| { | |
| "epoch": 0.351037851037851, | |
| "grad_norm": 0.21665396551989632, | |
| "learning_rate": 3.631984650924094e-05, | |
| "loss": 0.1294, | |
| "num_input_tokens_seen": 85589552, | |
| "step": 575, | |
| "train_runtime": 2740.6271, | |
| "train_tokens_per_second": 31229.916 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 0.18724948594556168, | |
| "learning_rate": 3.610557075112914e-05, | |
| "loss": 0.1231, | |
| "num_input_tokens_seen": 86354208, | |
| "step": 580, | |
| "train_runtime": 2764.345, | |
| "train_tokens_per_second": 31238.579 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 0.2202900548353958, | |
| "learning_rate": 3.58902737019951e-05, | |
| "loss": 0.1258, | |
| "num_input_tokens_seen": 87086304, | |
| "step": 585, | |
| "train_runtime": 2787.8025, | |
| "train_tokens_per_second": 31238.333 | |
| }, | |
| { | |
| "epoch": 0.3601953601953602, | |
| "grad_norm": 0.20619605507158134, | |
| "learning_rate": 3.567397516099621e-05, | |
| "loss": 0.126, | |
| "num_input_tokens_seen": 87856016, | |
| "step": 590, | |
| "train_runtime": 2811.8181, | |
| "train_tokens_per_second": 31245.27 | |
| }, | |
| { | |
| "epoch": 0.36324786324786323, | |
| "grad_norm": 0.2073317626118498, | |
| "learning_rate": 3.545669501938913e-05, | |
| "loss": 0.1212, | |
| "num_input_tokens_seen": 88583632, | |
| "step": 595, | |
| "train_runtime": 2835.562, | |
| "train_tokens_per_second": 31240.238 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.20050334660313449, | |
| "learning_rate": 3.5238453258700514e-05, | |
| "loss": 0.1201, | |
| "num_input_tokens_seen": 89328928, | |
| "step": 600, | |
| "train_runtime": 2859.8853, | |
| "train_tokens_per_second": 31235.144 | |
| }, | |
| { | |
| "epoch": 0.3693528693528694, | |
| "grad_norm": 0.19619041030752735, | |
| "learning_rate": 3.501926994888946e-05, | |
| "loss": 0.1249, | |
| "num_input_tokens_seen": 90063824, | |
| "step": 605, | |
| "train_runtime": 2883.3725, | |
| "train_tokens_per_second": 31235.584 | |
| }, | |
| { | |
| "epoch": 0.3724053724053724, | |
| "grad_norm": 0.22247005969852648, | |
| "learning_rate": 3.479916524650188e-05, | |
| "loss": 0.1251, | |
| "num_input_tokens_seen": 90761024, | |
| "step": 610, | |
| "train_runtime": 2906.2071, | |
| "train_tokens_per_second": 31230.06 | |
| }, | |
| { | |
| "epoch": 0.37545787545787546, | |
| "grad_norm": 0.20583255395391806, | |
| "learning_rate": 3.45781593928168e-05, | |
| "loss": 0.1178, | |
| "num_input_tokens_seen": 91512496, | |
| "step": 615, | |
| "train_runtime": 2930.3305, | |
| "train_tokens_per_second": 31229.411 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 0.18562515742016406, | |
| "learning_rate": 3.4356272711984994e-05, | |
| "loss": 0.1199, | |
| "num_input_tokens_seen": 92248080, | |
| "step": 620, | |
| "train_runtime": 2954.1132, | |
| "train_tokens_per_second": 31226.996 | |
| }, | |
| { | |
| "epoch": 0.38156288156288154, | |
| "grad_norm": 0.21782057661344542, | |
| "learning_rate": 3.413352560915988e-05, | |
| "loss": 0.124, | |
| "num_input_tokens_seen": 92989936, | |
| "step": 625, | |
| "train_runtime": 2978.3068, | |
| "train_tokens_per_second": 31222.417 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.19573701245190292, | |
| "learning_rate": 3.390993856862106e-05, | |
| "loss": 0.1264, | |
| "num_input_tokens_seen": 93726464, | |
| "step": 630, | |
| "train_runtime": 3001.3205, | |
| "train_tokens_per_second": 31228.409 | |
| }, | |
| { | |
| "epoch": 0.3876678876678877, | |
| "grad_norm": 0.17976909902173854, | |
| "learning_rate": 3.368553215189052e-05, | |
| "loss": 0.1186, | |
| "num_input_tokens_seen": 94492800, | |
| "step": 635, | |
| "train_runtime": 3025.7327, | |
| "train_tokens_per_second": 31229.725 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 0.20215210432873576, | |
| "learning_rate": 3.346032699584176e-05, | |
| "loss": 0.1278, | |
| "num_input_tokens_seen": 95231392, | |
| "step": 640, | |
| "train_runtime": 3049.3935, | |
| "train_tokens_per_second": 31229.618 | |
| }, | |
| { | |
| "epoch": 0.39377289377289376, | |
| "grad_norm": 0.20929349823672183, | |
| "learning_rate": 3.323434381080199e-05, | |
| "loss": 0.1247, | |
| "num_input_tokens_seen": 95982544, | |
| "step": 645, | |
| "train_runtime": 3073.2829, | |
| "train_tokens_per_second": 31231.276 | |
| }, | |
| { | |
| "epoch": 0.3968253968253968, | |
| "grad_norm": 0.20447599448794396, | |
| "learning_rate": 3.300760337864755e-05, | |
| "loss": 0.128, | |
| "num_input_tokens_seen": 96701152, | |
| "step": 650, | |
| "train_runtime": 3096.405, | |
| "train_tokens_per_second": 31230.137 | |
| }, | |
| { | |
| "epoch": 0.3998778998778999, | |
| "grad_norm": 0.2575268707412887, | |
| "learning_rate": 3.278012655089277e-05, | |
| "loss": 0.1283, | |
| "num_input_tokens_seen": 97439328, | |
| "step": 655, | |
| "train_runtime": 3119.8702, | |
| "train_tokens_per_second": 31231.853 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.204537648671508, | |
| "learning_rate": 3.255193424677244e-05, | |
| "loss": 0.1248, | |
| "num_input_tokens_seen": 98166064, | |
| "step": 660, | |
| "train_runtime": 3143.4312, | |
| "train_tokens_per_second": 31228.953 | |
| }, | |
| { | |
| "epoch": 0.405982905982906, | |
| "grad_norm": 0.2023202942048695, | |
| "learning_rate": 3.2323047451318023e-05, | |
| "loss": 0.111, | |
| "num_input_tokens_seen": 98902864, | |
| "step": 665, | |
| "train_runtime": 3167.4208, | |
| "train_tokens_per_second": 31225.047 | |
| }, | |
| { | |
| "epoch": 0.409035409035409, | |
| "grad_norm": 0.19709317600990275, | |
| "learning_rate": 3.209348721342781e-05, | |
| "loss": 0.1187, | |
| "num_input_tokens_seen": 99653088, | |
| "step": 670, | |
| "train_runtime": 3191.6163, | |
| "train_tokens_per_second": 31223.392 | |
| }, | |
| { | |
| "epoch": 0.41208791208791207, | |
| "grad_norm": 0.17414631496473276, | |
| "learning_rate": 3.1863274643931244e-05, | |
| "loss": 0.1186, | |
| "num_input_tokens_seen": 100446640, | |
| "step": 675, | |
| "train_runtime": 3216.3709, | |
| "train_tokens_per_second": 31229.806 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 0.19849915867951054, | |
| "learning_rate": 3.163243091364752e-05, | |
| "loss": 0.1226, | |
| "num_input_tokens_seen": 101212368, | |
| "step": 680, | |
| "train_runtime": 3240.5911, | |
| "train_tokens_per_second": 31232.687 | |
| }, | |
| { | |
| "epoch": 0.4181929181929182, | |
| "grad_norm": 0.22353842230490337, | |
| "learning_rate": 3.140097725143868e-05, | |
| "loss": 0.1262, | |
| "num_input_tokens_seen": 101947248, | |
| "step": 685, | |
| "train_runtime": 3264.9914, | |
| "train_tokens_per_second": 31224.354 | |
| }, | |
| { | |
| "epoch": 0.42124542124542125, | |
| "grad_norm": 0.21753012175757472, | |
| "learning_rate": 3.116893494225734e-05, | |
| "loss": 0.1266, | |
| "num_input_tokens_seen": 102702528, | |
| "step": 690, | |
| "train_runtime": 3288.4757, | |
| "train_tokens_per_second": 31231.044 | |
| }, | |
| { | |
| "epoch": 0.4242979242979243, | |
| "grad_norm": 0.21359194589119807, | |
| "learning_rate": 3.093632532518931e-05, | |
| "loss": 0.1191, | |
| "num_input_tokens_seen": 103463872, | |
| "step": 695, | |
| "train_runtime": 3312.4087, | |
| "train_tokens_per_second": 31235.237 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 0.2016755261363845, | |
| "learning_rate": 3.0703169791491184e-05, | |
| "loss": 0.1206, | |
| "num_input_tokens_seen": 104209488, | |
| "step": 700, | |
| "train_runtime": 3335.6362, | |
| "train_tokens_per_second": 31241.263 | |
| }, | |
| { | |
| "epoch": 0.43040293040293043, | |
| "grad_norm": 0.19651662325310928, | |
| "learning_rate": 3.0469489782623163e-05, | |
| "loss": 0.1125, | |
| "num_input_tokens_seen": 104951200, | |
| "step": 705, | |
| "train_runtime": 3359.3503, | |
| "train_tokens_per_second": 31241.517 | |
| }, | |
| { | |
| "epoch": 0.43345543345543347, | |
| "grad_norm": 0.21382101846342014, | |
| "learning_rate": 3.0235306788277275e-05, | |
| "loss": 0.1203, | |
| "num_input_tokens_seen": 105713856, | |
| "step": 710, | |
| "train_runtime": 3383.3862, | |
| "train_tokens_per_second": 31244.986 | |
| }, | |
| { | |
| "epoch": 0.4365079365079365, | |
| "grad_norm": 0.19090499867460292, | |
| "learning_rate": 3.0000642344401113e-05, | |
| "loss": 0.1163, | |
| "num_input_tokens_seen": 106470640, | |
| "step": 715, | |
| "train_runtime": 3407.4748, | |
| "train_tokens_per_second": 31246.2 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.19619920941448632, | |
| "learning_rate": 2.9765518031217353e-05, | |
| "loss": 0.1128, | |
| "num_input_tokens_seen": 107210304, | |
| "step": 720, | |
| "train_runtime": 3431.3044, | |
| "train_tokens_per_second": 31244.766 | |
| }, | |
| { | |
| "epoch": 0.4426129426129426, | |
| "grad_norm": 0.1833246022032629, | |
| "learning_rate": 2.952995547123919e-05, | |
| "loss": 0.1159, | |
| "num_input_tokens_seen": 107959696, | |
| "step": 725, | |
| "train_runtime": 3455.9023, | |
| "train_tokens_per_second": 31239.221 | |
| }, | |
| { | |
| "epoch": 0.4456654456654457, | |
| "grad_norm": 0.21349272224922136, | |
| "learning_rate": 2.9293976327281908e-05, | |
| "loss": 0.131, | |
| "num_input_tokens_seen": 108695072, | |
| "step": 730, | |
| "train_runtime": 3479.3306, | |
| "train_tokens_per_second": 31240.225 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 0.21790534063384087, | |
| "learning_rate": 2.905760230047068e-05, | |
| "loss": 0.1198, | |
| "num_input_tokens_seen": 109430784, | |
| "step": 735, | |
| "train_runtime": 3503.6405, | |
| "train_tokens_per_second": 31233.451 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 0.187424280063185, | |
| "learning_rate": 2.882085512824495e-05, | |
| "loss": 0.1135, | |
| "num_input_tokens_seen": 110179680, | |
| "step": 740, | |
| "train_runtime": 3527.9623, | |
| "train_tokens_per_second": 31230.402 | |
| }, | |
| { | |
| "epoch": 0.4548229548229548, | |
| "grad_norm": 0.22185544255865774, | |
| "learning_rate": 2.8583756582359338e-05, | |
| "loss": 0.1272, | |
| "num_input_tokens_seen": 110924720, | |
| "step": 745, | |
| "train_runtime": 3551.6855, | |
| "train_tokens_per_second": 31231.572 | |
| }, | |
| { | |
| "epoch": 0.45787545787545786, | |
| "grad_norm": 0.18854068626118867, | |
| "learning_rate": 2.8346328466881545e-05, | |
| "loss": 0.1233, | |
| "num_input_tokens_seen": 111655808, | |
| "step": 750, | |
| "train_runtime": 3574.7075, | |
| "train_tokens_per_second": 31234.95 | |
| }, | |
| { | |
| "epoch": 0.4609279609279609, | |
| "grad_norm": 0.1787742433542255, | |
| "learning_rate": 2.8108592616187133e-05, | |
| "loss": 0.1178, | |
| "num_input_tokens_seen": 112410512, | |
| "step": 755, | |
| "train_runtime": 3598.7083, | |
| "train_tokens_per_second": 31236.35 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 0.22324380599041693, | |
| "learning_rate": 2.7870570892951642e-05, | |
| "loss": 0.121, | |
| "num_input_tokens_seen": 113144208, | |
| "step": 760, | |
| "train_runtime": 3622.2954, | |
| "train_tokens_per_second": 31235.5 | |
| }, | |
| { | |
| "epoch": 0.46703296703296704, | |
| "grad_norm": 0.19361401702432798, | |
| "learning_rate": 2.763228518614004e-05, | |
| "loss": 0.1194, | |
| "num_input_tokens_seen": 113890288, | |
| "step": 765, | |
| "train_runtime": 3645.5237, | |
| "train_tokens_per_second": 31241.132 | |
| }, | |
| { | |
| "epoch": 0.4700854700854701, | |
| "grad_norm": 0.19968273620755148, | |
| "learning_rate": 2.739375740899375e-05, | |
| "loss": 0.1158, | |
| "num_input_tokens_seen": 114624800, | |
| "step": 770, | |
| "train_runtime": 3669.3523, | |
| "train_tokens_per_second": 31238.429 | |
| }, | |
| { | |
| "epoch": 0.4731379731379731, | |
| "grad_norm": 0.20408585428254647, | |
| "learning_rate": 2.715500949701549e-05, | |
| "loss": 0.1182, | |
| "num_input_tokens_seen": 115354240, | |
| "step": 775, | |
| "train_runtime": 3693.277, | |
| "train_tokens_per_second": 31233.574 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.2174159465883422, | |
| "learning_rate": 2.6916063405952026e-05, | |
| "loss": 0.1221, | |
| "num_input_tokens_seen": 116095344, | |
| "step": 780, | |
| "train_runtime": 3716.8137, | |
| "train_tokens_per_second": 31235.18 | |
| }, | |
| { | |
| "epoch": 0.47924297924297926, | |
| "grad_norm": 0.21384700668290071, | |
| "learning_rate": 2.667694110977506e-05, | |
| "loss": 0.119, | |
| "num_input_tokens_seen": 116848064, | |
| "step": 785, | |
| "train_runtime": 3740.9686, | |
| "train_tokens_per_second": 31234.709 | |
| }, | |
| { | |
| "epoch": 0.4822954822954823, | |
| "grad_norm": 0.18873107987062665, | |
| "learning_rate": 2.6437664598660516e-05, | |
| "loss": 0.124, | |
| "num_input_tokens_seen": 117585264, | |
| "step": 790, | |
| "train_runtime": 3764.6964, | |
| "train_tokens_per_second": 31233.664 | |
| }, | |
| { | |
| "epoch": 0.48534798534798534, | |
| "grad_norm": 0.17903308008040358, | |
| "learning_rate": 2.6198255876966204e-05, | |
| "loss": 0.1181, | |
| "num_input_tokens_seen": 118323136, | |
| "step": 795, | |
| "train_runtime": 3788.6994, | |
| "train_tokens_per_second": 31230.542 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 0.18401792765892658, | |
| "learning_rate": 2.5958736961208314e-05, | |
| "loss": 0.1197, | |
| "num_input_tokens_seen": 119069008, | |
| "step": 800, | |
| "train_runtime": 3812.7634, | |
| "train_tokens_per_second": 31229.057 | |
| }, | |
| { | |
| "epoch": 0.49145299145299143, | |
| "grad_norm": 0.1854520049082589, | |
| "learning_rate": 2.5719129878036686e-05, | |
| "loss": 0.1067, | |
| "num_input_tokens_seen": 119794944, | |
| "step": 805, | |
| "train_runtime": 3836.5253, | |
| "train_tokens_per_second": 31224.854 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.19767556170945152, | |
| "learning_rate": 2.547945666220923e-05, | |
| "loss": 0.1213, | |
| "num_input_tokens_seen": 120536208, | |
| "step": 810, | |
| "train_runtime": 3859.9944, | |
| "train_tokens_per_second": 31227.042 | |
| }, | |
| { | |
| "epoch": 0.49755799755799757, | |
| "grad_norm": 0.20390176780643668, | |
| "learning_rate": 2.523973935456554e-05, | |
| "loss": 0.1216, | |
| "num_input_tokens_seen": 121263008, | |
| "step": 815, | |
| "train_runtime": 3883.7222, | |
| "train_tokens_per_second": 31223.399 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 0.1917820793311171, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.1152, | |
| "num_input_tokens_seen": 122000960, | |
| "step": 820, | |
| "train_runtime": 3907.2967, | |
| "train_tokens_per_second": 31223.879 | |
| }, | |
| { | |
| "epoch": 0.5036630036630036, | |
| "grad_norm": 0.1934270965898771, | |
| "learning_rate": 2.4760260645434462e-05, | |
| "loss": 0.1277, | |
| "num_input_tokens_seen": 122758368, | |
| "step": 825, | |
| "train_runtime": 3931.5115, | |
| "train_tokens_per_second": 31224.217 | |
| }, | |
| { | |
| "epoch": 0.5067155067155067, | |
| "grad_norm": 0.20490340371559168, | |
| "learning_rate": 2.452054333779078e-05, | |
| "loss": 0.1209, | |
| "num_input_tokens_seen": 123478800, | |
| "step": 830, | |
| "train_runtime": 3954.8707, | |
| "train_tokens_per_second": 31221.956 | |
| }, | |
| { | |
| "epoch": 0.5097680097680097, | |
| "grad_norm": 0.19981433936080123, | |
| "learning_rate": 2.4280870121963323e-05, | |
| "loss": 0.1202, | |
| "num_input_tokens_seen": 124232736, | |
| "step": 835, | |
| "train_runtime": 3978.6161, | |
| "train_tokens_per_second": 31225.113 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.21883346459426525, | |
| "learning_rate": 2.404126303879169e-05, | |
| "loss": 0.1263, | |
| "num_input_tokens_seen": 124967824, | |
| "step": 840, | |
| "train_runtime": 4003.1394, | |
| "train_tokens_per_second": 31217.455 | |
| }, | |
| { | |
| "epoch": 0.5158730158730159, | |
| "grad_norm": 0.223762879159024, | |
| "learning_rate": 2.38017441230338e-05, | |
| "loss": 0.1157, | |
| "num_input_tokens_seen": 125720640, | |
| "step": 845, | |
| "train_runtime": 4027.5319, | |
| "train_tokens_per_second": 31215.306 | |
| }, | |
| { | |
| "epoch": 0.518925518925519, | |
| "grad_norm": 0.20218976688130538, | |
| "learning_rate": 2.3562335401339486e-05, | |
| "loss": 0.112, | |
| "num_input_tokens_seen": 126482544, | |
| "step": 850, | |
| "train_runtime": 4051.6061, | |
| "train_tokens_per_second": 31217.878 | |
| }, | |
| { | |
| "epoch": 0.521978021978022, | |
| "grad_norm": 0.19347030172727372, | |
| "learning_rate": 2.3323058890224938e-05, | |
| "loss": 0.1183, | |
| "num_input_tokens_seen": 127241776, | |
| "step": 855, | |
| "train_runtime": 4075.8036, | |
| "train_tokens_per_second": 31218.819 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 0.18704391513542812, | |
| "learning_rate": 2.3083936594047983e-05, | |
| "loss": 0.1111, | |
| "num_input_tokens_seen": 128032656, | |
| "step": 860, | |
| "train_runtime": 4100.6525, | |
| "train_tokens_per_second": 31222.508 | |
| }, | |
| { | |
| "epoch": 0.5280830280830281, | |
| "grad_norm": 0.2013558526767148, | |
| "learning_rate": 2.2844990502984513e-05, | |
| "loss": 0.1247, | |
| "num_input_tokens_seen": 128759440, | |
| "step": 865, | |
| "train_runtime": 4123.9619, | |
| "train_tokens_per_second": 31222.267 | |
| }, | |
| { | |
| "epoch": 0.5311355311355311, | |
| "grad_norm": 0.2171753927516438, | |
| "learning_rate": 2.2606242591006253e-05, | |
| "loss": 0.1144, | |
| "num_input_tokens_seen": 129520032, | |
| "step": 870, | |
| "train_runtime": 4147.8679, | |
| "train_tokens_per_second": 31225.689 | |
| }, | |
| { | |
| "epoch": 0.5341880341880342, | |
| "grad_norm": 0.19798696616834766, | |
| "learning_rate": 2.2367714813859967e-05, | |
| "loss": 0.115, | |
| "num_input_tokens_seen": 130265744, | |
| "step": 875, | |
| "train_runtime": 4171.3174, | |
| "train_tokens_per_second": 31228.922 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 0.18314437140647155, | |
| "learning_rate": 2.2129429107048364e-05, | |
| "loss": 0.1195, | |
| "num_input_tokens_seen": 131007104, | |
| "step": 880, | |
| "train_runtime": 4195.2949, | |
| "train_tokens_per_second": 31227.15 | |
| }, | |
| { | |
| "epoch": 0.5402930402930403, | |
| "grad_norm": 0.20714245389854075, | |
| "learning_rate": 2.189140738381288e-05, | |
| "loss": 0.1264, | |
| "num_input_tokens_seen": 131729648, | |
| "step": 885, | |
| "train_runtime": 4218.5313, | |
| "train_tokens_per_second": 31226.424 | |
| }, | |
| { | |
| "epoch": 0.5433455433455433, | |
| "grad_norm": 0.19693823237763247, | |
| "learning_rate": 2.1653671533118468e-05, | |
| "loss": 0.1116, | |
| "num_input_tokens_seen": 132482960, | |
| "step": 890, | |
| "train_runtime": 4242.4717, | |
| "train_tokens_per_second": 31227.777 | |
| }, | |
| { | |
| "epoch": 0.5463980463980463, | |
| "grad_norm": 0.2096492967043494, | |
| "learning_rate": 2.1416243417640668e-05, | |
| "loss": 0.123, | |
| "num_input_tokens_seen": 133239472, | |
| "step": 895, | |
| "train_runtime": 4266.6466, | |
| "train_tokens_per_second": 31228.148 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.19355768030516468, | |
| "learning_rate": 2.1179144871755056e-05, | |
| "loss": 0.115, | |
| "num_input_tokens_seen": 133981072, | |
| "step": 900, | |
| "train_runtime": 4290.5714, | |
| "train_tokens_per_second": 31226.86 | |
| }, | |
| { | |
| "epoch": 0.5525030525030525, | |
| "grad_norm": 0.2024970018812971, | |
| "learning_rate": 2.0942397699529325e-05, | |
| "loss": 0.1117, | |
| "num_input_tokens_seen": 134739520, | |
| "step": 905, | |
| "train_runtime": 4314.8164, | |
| "train_tokens_per_second": 31227.174 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.17844654021795547, | |
| "learning_rate": 2.0706023672718098e-05, | |
| "loss": 0.1177, | |
| "num_input_tokens_seen": 135479872, | |
| "step": 910, | |
| "train_runtime": 4338.6497, | |
| "train_tokens_per_second": 31226.276 | |
| }, | |
| { | |
| "epoch": 0.5586080586080586, | |
| "grad_norm": 0.19405206525033525, | |
| "learning_rate": 2.047004452876081e-05, | |
| "loss": 0.1133, | |
| "num_input_tokens_seen": 136213072, | |
| "step": 915, | |
| "train_runtime": 4362.3786, | |
| "train_tokens_per_second": 31224.496 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 0.1838871460077683, | |
| "learning_rate": 2.0234481968782653e-05, | |
| "loss": 0.1102, | |
| "num_input_tokens_seen": 136958032, | |
| "step": 920, | |
| "train_runtime": 4385.869, | |
| "train_tokens_per_second": 31227.114 | |
| }, | |
| { | |
| "epoch": 0.5647130647130647, | |
| "grad_norm": 0.1795274118999085, | |
| "learning_rate": 1.9999357655598893e-05, | |
| "loss": 0.1148, | |
| "num_input_tokens_seen": 137681904, | |
| "step": 925, | |
| "train_runtime": 4408.9297, | |
| "train_tokens_per_second": 31227.965 | |
| }, | |
| { | |
| "epoch": 0.5677655677655677, | |
| "grad_norm": 0.18873100972065715, | |
| "learning_rate": 1.9764693211722727e-05, | |
| "loss": 0.1191, | |
| "num_input_tokens_seen": 138418096, | |
| "step": 930, | |
| "train_runtime": 4433.1164, | |
| "train_tokens_per_second": 31223.655 | |
| }, | |
| { | |
| "epoch": 0.5708180708180708, | |
| "grad_norm": 0.20355481920042287, | |
| "learning_rate": 1.9530510217376843e-05, | |
| "loss": 0.1194, | |
| "num_input_tokens_seen": 139172816, | |
| "step": 935, | |
| "train_runtime": 4456.6898, | |
| "train_tokens_per_second": 31227.844 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 0.19657277008573146, | |
| "learning_rate": 1.929683020850883e-05, | |
| "loss": 0.1105, | |
| "num_input_tokens_seen": 139907184, | |
| "step": 940, | |
| "train_runtime": 4479.8469, | |
| "train_tokens_per_second": 31230.349 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.18907325722938767, | |
| "learning_rate": 1.9063674674810696e-05, | |
| "loss": 0.1172, | |
| "num_input_tokens_seen": 140656832, | |
| "step": 945, | |
| "train_runtime": 4503.8625, | |
| "train_tokens_per_second": 31230.268 | |
| }, | |
| { | |
| "epoch": 0.57997557997558, | |
| "grad_norm": 0.19881146792371512, | |
| "learning_rate": 1.8831065057742657e-05, | |
| "loss": 0.1169, | |
| "num_input_tokens_seen": 141397952, | |
| "step": 950, | |
| "train_runtime": 4527.8579, | |
| "train_tokens_per_second": 31228.443 | |
| }, | |
| { | |
| "epoch": 0.5830280830280831, | |
| "grad_norm": 0.20546717039279117, | |
| "learning_rate": 1.8599022748561325e-05, | |
| "loss": 0.1205, | |
| "num_input_tokens_seen": 142152656, | |
| "step": 955, | |
| "train_runtime": 4552.0604, | |
| "train_tokens_per_second": 31228.201 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.18706320870845003, | |
| "learning_rate": 1.8367569086352483e-05, | |
| "loss": 0.1078, | |
| "num_input_tokens_seen": 142883936, | |
| "step": 960, | |
| "train_runtime": 4575.4177, | |
| "train_tokens_per_second": 31228.61 | |
| }, | |
| { | |
| "epoch": 0.5891330891330891, | |
| "grad_norm": 0.1977859789179324, | |
| "learning_rate": 1.8136725356068762e-05, | |
| "loss": 0.1208, | |
| "num_input_tokens_seen": 143644704, | |
| "step": 965, | |
| "train_runtime": 4599.2685, | |
| "train_tokens_per_second": 31232.076 | |
| }, | |
| { | |
| "epoch": 0.5921855921855922, | |
| "grad_norm": 0.19466234465200857, | |
| "learning_rate": 1.7906512786572198e-05, | |
| "loss": 0.1181, | |
| "num_input_tokens_seen": 144375984, | |
| "step": 970, | |
| "train_runtime": 4622.8901, | |
| "train_tokens_per_second": 31230.677 | |
| }, | |
| { | |
| "epoch": 0.5952380952380952, | |
| "grad_norm": 0.19177543805173208, | |
| "learning_rate": 1.767695254868198e-05, | |
| "loss": 0.1137, | |
| "num_input_tokens_seen": 145118576, | |
| "step": 975, | |
| "train_runtime": 4646.52, | |
| "train_tokens_per_second": 31231.67 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 0.20105614821200013, | |
| "learning_rate": 1.744806575322756e-05, | |
| "loss": 0.1181, | |
| "num_input_tokens_seen": 145859024, | |
| "step": 980, | |
| "train_runtime": 4670.5989, | |
| "train_tokens_per_second": 31229.191 | |
| }, | |
| { | |
| "epoch": 0.6013431013431013, | |
| "grad_norm": 0.1734938645633314, | |
| "learning_rate": 1.7219873449107233e-05, | |
| "loss": 0.1175, | |
| "num_input_tokens_seen": 146612992, | |
| "step": 985, | |
| "train_runtime": 4694.6339, | |
| "train_tokens_per_second": 31229.91 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.1913248884669364, | |
| "learning_rate": 1.699239662135246e-05, | |
| "loss": 0.1136, | |
| "num_input_tokens_seen": 147331680, | |
| "step": 990, | |
| "train_runtime": 4717.5667, | |
| "train_tokens_per_second": 31230.439 | |
| }, | |
| { | |
| "epoch": 0.6074481074481074, | |
| "grad_norm": 0.1771679404488154, | |
| "learning_rate": 1.6765656189198013e-05, | |
| "loss": 0.1134, | |
| "num_input_tokens_seen": 148081424, | |
| "step": 995, | |
| "train_runtime": 4741.5363, | |
| "train_tokens_per_second": 31230.684 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 0.2027735803632452, | |
| "learning_rate": 1.653967300415824e-05, | |
| "loss": 0.1147, | |
| "num_input_tokens_seen": 148805696, | |
| "step": 1000, | |
| "train_runtime": 4764.8845, | |
| "train_tokens_per_second": 31229.655 | |
| }, | |
| { | |
| "epoch": 0.6135531135531136, | |
| "grad_norm": 0.18538724479293786, | |
| "learning_rate": 1.6314467848109483e-05, | |
| "loss": 0.1194, | |
| "num_input_tokens_seen": 149555088, | |
| "step": 1005, | |
| "train_runtime": 25.5648, | |
| "train_tokens_per_second": 5850048.044 | |
| }, | |
| { | |
| "epoch": 0.6166056166056166, | |
| "grad_norm": 0.1934429088985181, | |
| "learning_rate": 1.609006143137895e-05, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 150280064, | |
| "step": 1010, | |
| "train_runtime": 49.6761, | |
| "train_tokens_per_second": 3025200.606 | |
| }, | |
| { | |
| "epoch": 0.6196581196581197, | |
| "grad_norm": 0.16765257936821912, | |
| "learning_rate": 1.5866474390840125e-05, | |
| "loss": 0.1098, | |
| "num_input_tokens_seen": 151000304, | |
| "step": 1015, | |
| "train_runtime": 73.0673, | |
| "train_tokens_per_second": 2066592.968 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.17674789456341927, | |
| "learning_rate": 1.564372728801501e-05, | |
| "loss": 0.1085, | |
| "num_input_tokens_seen": 151754352, | |
| "step": 1020, | |
| "train_runtime": 97.041, | |
| "train_tokens_per_second": 1563816.371 | |
| }, | |
| { | |
| "epoch": 0.6257631257631258, | |
| "grad_norm": 0.220812455479937, | |
| "learning_rate": 1.5421840607183203e-05, | |
| "loss": 0.1078, | |
| "num_input_tokens_seen": 152529360, | |
| "step": 1025, | |
| "train_runtime": 121.5101, | |
| "train_tokens_per_second": 1255280.935 | |
| }, | |
| { | |
| "epoch": 0.6288156288156288, | |
| "grad_norm": 0.19180349508234404, | |
| "learning_rate": 1.5200834753498128e-05, | |
| "loss": 0.112, | |
| "num_input_tokens_seen": 153265552, | |
| "step": 1030, | |
| "train_runtime": 144.9574, | |
| "train_tokens_per_second": 1057314.276 | |
| }, | |
| { | |
| "epoch": 0.6318681318681318, | |
| "grad_norm": 0.17410351424139398, | |
| "learning_rate": 1.4980730051110541e-05, | |
| "loss": 0.1121, | |
| "num_input_tokens_seen": 154019536, | |
| "step": 1035, | |
| "train_runtime": 169.0183, | |
| "train_tokens_per_second": 911259.53 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 0.19396480518137035, | |
| "learning_rate": 1.4761546741299495e-05, | |
| "loss": 0.1149, | |
| "num_input_tokens_seen": 154765472, | |
| "step": 1040, | |
| "train_runtime": 193.2243, | |
| "train_tokens_per_second": 800962.937 | |
| }, | |
| { | |
| "epoch": 0.6379731379731379, | |
| "grad_norm": 0.19446739256593015, | |
| "learning_rate": 1.4543304980610878e-05, | |
| "loss": 0.1181, | |
| "num_input_tokens_seen": 155509408, | |
| "step": 1045, | |
| "train_runtime": 217.4505, | |
| "train_tokens_per_second": 715148.617 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.19960142083739169, | |
| "learning_rate": 1.4326024839003804e-05, | |
| "loss": 0.1149, | |
| "num_input_tokens_seen": 156264480, | |
| "step": 1050, | |
| "train_runtime": 241.8673, | |
| "train_tokens_per_second": 646075.247 | |
| }, | |
| { | |
| "epoch": 0.6440781440781441, | |
| "grad_norm": 0.17331942565311023, | |
| "learning_rate": 1.4109726298004911e-05, | |
| "loss": 0.1134, | |
| "num_input_tokens_seen": 157027088, | |
| "step": 1055, | |
| "train_runtime": 266.0002, | |
| "train_tokens_per_second": 590326.888 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 0.19236431044025754, | |
| "learning_rate": 1.3894429248870866e-05, | |
| "loss": 0.1094, | |
| "num_input_tokens_seen": 157758048, | |
| "step": 1060, | |
| "train_runtime": 289.4973, | |
| "train_tokens_per_second": 544937.92 | |
| }, | |
| { | |
| "epoch": 0.6501831501831502, | |
| "grad_norm": 0.18106817069538997, | |
| "learning_rate": 1.3680153490759073e-05, | |
| "loss": 0.1138, | |
| "num_input_tokens_seen": 158510720, | |
| "step": 1065, | |
| "train_runtime": 313.3309, | |
| "train_tokens_per_second": 505889.253 | |
| }, | |
| { | |
| "epoch": 0.6532356532356532, | |
| "grad_norm": 0.19244418546935363, | |
| "learning_rate": 1.3466918728906919e-05, | |
| "loss": 0.1149, | |
| "num_input_tokens_seen": 159293376, | |
| "step": 1070, | |
| "train_runtime": 337.014, | |
| "train_tokens_per_second": 472661.066 | |
| }, | |
| { | |
| "epoch": 0.6562881562881563, | |
| "grad_norm": 0.18158189376043865, | |
| "learning_rate": 1.3254744572819658e-05, | |
| "loss": 0.1116, | |
| "num_input_tokens_seen": 160033920, | |
| "step": 1075, | |
| "train_runtime": 360.3719, | |
| "train_tokens_per_second": 444079.923 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.19244456879921595, | |
| "learning_rate": 1.3043650534467053e-05, | |
| "loss": 0.1126, | |
| "num_input_tokens_seen": 160775472, | |
| "step": 1080, | |
| "train_runtime": 383.8174, | |
| "train_tokens_per_second": 418885.277 | |
| }, | |
| { | |
| "epoch": 0.6623931623931624, | |
| "grad_norm": 0.17577856984557433, | |
| "learning_rate": 1.2833656026489028e-05, | |
| "loss": 0.107, | |
| "num_input_tokens_seen": 161494640, | |
| "step": 1085, | |
| "train_runtime": 406.6648, | |
| "train_tokens_per_second": 397119.792 | |
| }, | |
| { | |
| "epoch": 0.6654456654456654, | |
| "grad_norm": 0.17959267840002394, | |
| "learning_rate": 1.2624780360410466e-05, | |
| "loss": 0.1218, | |
| "num_input_tokens_seen": 162241408, | |
| "step": 1090, | |
| "train_runtime": 431.2415, | |
| "train_tokens_per_second": 376219.381 | |
| }, | |
| { | |
| "epoch": 0.6684981684981685, | |
| "grad_norm": 0.1847239274585138, | |
| "learning_rate": 1.2417042744865237e-05, | |
| "loss": 0.1105, | |
| "num_input_tokens_seen": 162990224, | |
| "step": 1095, | |
| "train_runtime": 455.3619, | |
| "train_tokens_per_second": 357935.613 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 0.19884446253341626, | |
| "learning_rate": 1.2210462283829755e-05, | |
| "loss": 0.1102, | |
| "num_input_tokens_seen": 163757808, | |
| "step": 1100, | |
| "train_runtime": 479.7341, | |
| "train_tokens_per_second": 341351.189 | |
| }, | |
| { | |
| "epoch": 0.6746031746031746, | |
| "grad_norm": 0.19296172293392244, | |
| "learning_rate": 1.2005057974866135e-05, | |
| "loss": 0.1159, | |
| "num_input_tokens_seen": 164501232, | |
| "step": 1105, | |
| "train_runtime": 503.0655, | |
| "train_tokens_per_second": 326997.617 | |
| }, | |
| { | |
| "epoch": 0.6776556776556777, | |
| "grad_norm": 0.1998262400802252, | |
| "learning_rate": 1.180084870737516e-05, | |
| "loss": 0.1122, | |
| "num_input_tokens_seen": 165238192, | |
| "step": 1110, | |
| "train_runtime": 526.612, | |
| "train_tokens_per_second": 313775.981 | |
| }, | |
| { | |
| "epoch": 0.6807081807081807, | |
| "grad_norm": 0.20281243961269585, | |
| "learning_rate": 1.1597853260859128e-05, | |
| "loss": 0.109, | |
| "num_input_tokens_seen": 165972784, | |
| "step": 1115, | |
| "train_runtime": 550.0413, | |
| "train_tokens_per_second": 301746.042 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 0.17103712645724142, | |
| "learning_rate": 1.1396090303194893e-05, | |
| "loss": 0.1179, | |
| "num_input_tokens_seen": 166698432, | |
| "step": 1120, | |
| "train_runtime": 574.0303, | |
| "train_tokens_per_second": 290400.036 | |
| }, | |
| { | |
| "epoch": 0.6868131868131868, | |
| "grad_norm": 0.20076646818296295, | |
| "learning_rate": 1.1195578388917092e-05, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 167445392, | |
| "step": 1125, | |
| "train_runtime": 597.9066, | |
| "train_tokens_per_second": 280052.736 | |
| }, | |
| { | |
| "epoch": 0.6898656898656899, | |
| "grad_norm": 0.20216409746677386, | |
| "learning_rate": 1.0996335957511867e-05, | |
| "loss": 0.1113, | |
| "num_input_tokens_seen": 168191200, | |
| "step": 1130, | |
| "train_runtime": 621.5416, | |
| "train_tokens_per_second": 270603.289 | |
| }, | |
| { | |
| "epoch": 0.6929181929181929, | |
| "grad_norm": 0.18888869947602685, | |
| "learning_rate": 1.0798381331721109e-05, | |
| "loss": 0.1039, | |
| "num_input_tokens_seen": 168933088, | |
| "step": 1135, | |
| "train_runtime": 645.2438, | |
| "train_tokens_per_second": 261812.807 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.18865185597332762, | |
| "learning_rate": 1.060173271585747e-05, | |
| "loss": 0.1118, | |
| "num_input_tokens_seen": 169691472, | |
| "step": 1140, | |
| "train_runtime": 669.5538, | |
| "train_tokens_per_second": 253439.619 | |
| }, | |
| { | |
| "epoch": 0.699023199023199, | |
| "grad_norm": 0.2055676497654679, | |
| "learning_rate": 1.0406408194130259e-05, | |
| "loss": 0.1174, | |
| "num_input_tokens_seen": 170450960, | |
| "step": 1145, | |
| "train_runtime": 693.571, | |
| "train_tokens_per_second": 245758.488 | |
| }, | |
| { | |
| "epoch": 0.702075702075702, | |
| "grad_norm": 0.1797733076441166, | |
| "learning_rate": 1.021242572898237e-05, | |
| "loss": 0.1082, | |
| "num_input_tokens_seen": 171199728, | |
| "step": 1150, | |
| "train_runtime": 717.3337, | |
| "train_tokens_per_second": 238661.213 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 0.18564281961486254, | |
| "learning_rate": 1.0019803159438423e-05, | |
| "loss": 0.1089, | |
| "num_input_tokens_seen": 171929312, | |
| "step": 1155, | |
| "train_runtime": 740.7725, | |
| "train_tokens_per_second": 232094.636 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 0.2022740264006, | |
| "learning_rate": 9.82855819946428e-06, | |
| "loss": 0.1121, | |
| "num_input_tokens_seen": 172663152, | |
| "step": 1160, | |
| "train_runtime": 764.1473, | |
| "train_tokens_per_second": 225955.327 | |
| }, | |
| { | |
| "epoch": 0.7112332112332113, | |
| "grad_norm": 0.18410038031563444, | |
| "learning_rate": 9.638708436337976e-06, | |
| "loss": 0.1147, | |
| "num_input_tokens_seen": 173407584, | |
| "step": 1165, | |
| "train_runtime": 788.8497, | |
| "train_tokens_per_second": 219823.348 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.1818866026978008, | |
| "learning_rate": 9.450271329032404e-06, | |
| "loss": 0.1105, | |
| "num_input_tokens_seen": 174168016, | |
| "step": 1170, | |
| "train_runtime": 813.1805, | |
| "train_tokens_per_second": 214181.244 | |
| }, | |
| { | |
| "epoch": 0.7173382173382173, | |
| "grad_norm": 0.17218088112662416, | |
| "learning_rate": 9.263264206609726e-06, | |
| "loss": 0.1063, | |
| "num_input_tokens_seen": 174926656, | |
| "step": 1175, | |
| "train_runtime": 837.0944, | |
| "train_tokens_per_second": 208968.859 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 0.16452070915424657, | |
| "learning_rate": 9.077704266627776e-06, | |
| "loss": 0.099, | |
| "num_input_tokens_seen": 175650240, | |
| "step": 1180, | |
| "train_runtime": 860.1376, | |
| "train_tokens_per_second": 204211.789 | |
| }, | |
| { | |
| "epoch": 0.7234432234432234, | |
| "grad_norm": 0.2027716202688436, | |
| "learning_rate": 8.893608573558515e-06, | |
| "loss": 0.1061, | |
| "num_input_tokens_seen": 176397552, | |
| "step": 1185, | |
| "train_runtime": 884.3119, | |
| "train_tokens_per_second": 199474.366 | |
| }, | |
| { | |
| "epoch": 0.7264957264957265, | |
| "grad_norm": 0.17510357067652002, | |
| "learning_rate": 8.710994057218782e-06, | |
| "loss": 0.1078, | |
| "num_input_tokens_seen": 177140960, | |
| "step": 1190, | |
| "train_runtime": 908.5023, | |
| "train_tokens_per_second": 194981.306 | |
| }, | |
| { | |
| "epoch": 0.7295482295482295, | |
| "grad_norm": 0.18807427083210113, | |
| "learning_rate": 8.529877511213357e-06, | |
| "loss": 0.1165, | |
| "num_input_tokens_seen": 177893904, | |
| "step": 1195, | |
| "train_runtime": 932.5961, | |
| "train_tokens_per_second": 190751.292 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.17837489043029936, | |
| "learning_rate": 8.3502755913906e-06, | |
| "loss": 0.1072, | |
| "num_input_tokens_seen": 178643056, | |
| "step": 1200, | |
| "train_runtime": 956.3943, | |
| "train_tokens_per_second": 186788.086 | |
| }, | |
| { | |
| "epoch": 0.7356532356532357, | |
| "grad_norm": 0.17394135307692066, | |
| "learning_rate": 8.172204814310742e-06, | |
| "loss": 0.1039, | |
| "num_input_tokens_seen": 179400992, | |
| "step": 1205, | |
| "train_runtime": 980.017, | |
| "train_tokens_per_second": 183059.057 | |
| }, | |
| { | |
| "epoch": 0.7387057387057387, | |
| "grad_norm": 0.17137545583214386, | |
| "learning_rate": 7.99568155572701e-06, | |
| "loss": 0.1076, | |
| "num_input_tokens_seen": 180147952, | |
| "step": 1210, | |
| "train_runtime": 1003.7589, | |
| "train_tokens_per_second": 179473.327 | |
| }, | |
| { | |
| "epoch": 0.7417582417582418, | |
| "grad_norm": 0.17504618079149348, | |
| "learning_rate": 7.820722049079653e-06, | |
| "loss": 0.1069, | |
| "num_input_tokens_seen": 180883088, | |
| "step": 1215, | |
| "train_runtime": 1027.0432, | |
| "train_tokens_per_second": 176120.238 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 0.1951670021121092, | |
| "learning_rate": 7.647342384003087e-06, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 181612288, | |
| "step": 1220, | |
| "train_runtime": 1050.7348, | |
| "train_tokens_per_second": 172843.133 | |
| }, | |
| { | |
| "epoch": 0.7478632478632479, | |
| "grad_norm": 0.19013647109819243, | |
| "learning_rate": 7.475558504846264e-06, | |
| "loss": 0.1106, | |
| "num_input_tokens_seen": 182345168, | |
| "step": 1225, | |
| "train_runtime": 1074.1177, | |
| "train_tokens_per_second": 169762.738 | |
| }, | |
| { | |
| "epoch": 0.7509157509157509, | |
| "grad_norm": 0.18791492860019623, | |
| "learning_rate": 7.305386209206397e-06, | |
| "loss": 0.1044, | |
| "num_input_tokens_seen": 183100912, | |
| "step": 1230, | |
| "train_runtime": 1097.446, | |
| "train_tokens_per_second": 166842.747 | |
| }, | |
| { | |
| "epoch": 0.753968253968254, | |
| "grad_norm": 0.175040313646104, | |
| "learning_rate": 7.136841146476181e-06, | |
| "loss": 0.1104, | |
| "num_input_tokens_seen": 183851904, | |
| "step": 1235, | |
| "train_runtime": 1120.9048, | |
| "train_tokens_per_second": 164020.978 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 0.18920567878002045, | |
| "learning_rate": 6.969938816404639e-06, | |
| "loss": 0.105, | |
| "num_input_tokens_seen": 184595216, | |
| "step": 1240, | |
| "train_runtime": 1144.6234, | |
| "train_tokens_per_second": 161271.576 | |
| }, | |
| { | |
| "epoch": 0.76007326007326, | |
| "grad_norm": 0.19803248478850427, | |
| "learning_rate": 6.8046945676717375e-06, | |
| "loss": 0.1074, | |
| "num_input_tokens_seen": 185327184, | |
| "step": 1245, | |
| "train_runtime": 1168.3367, | |
| "train_tokens_per_second": 158624.807 | |
| }, | |
| { | |
| "epoch": 0.7631257631257631, | |
| "grad_norm": 0.19558020277347915, | |
| "learning_rate": 6.641123596476889e-06, | |
| "loss": 0.1041, | |
| "num_input_tokens_seen": 186065952, | |
| "step": 1250, | |
| "train_runtime": 1192.0283, | |
| "train_tokens_per_second": 156091.896 | |
| }, | |
| { | |
| "epoch": 0.7661782661782662, | |
| "grad_norm": 0.1733505959719854, | |
| "learning_rate": 6.4792409451414735e-06, | |
| "loss": 0.102, | |
| "num_input_tokens_seen": 186800528, | |
| "step": 1255, | |
| "train_runtime": 1215.4946, | |
| "train_tokens_per_second": 153682.735 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.1579471538252503, | |
| "learning_rate": 6.319061500725515e-06, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 187557520, | |
| "step": 1260, | |
| "train_runtime": 1239.1164, | |
| "train_tokens_per_second": 151363.923 | |
| }, | |
| { | |
| "epoch": 0.7722832722832723, | |
| "grad_norm": 0.1946862696439488, | |
| "learning_rate": 6.1605999936586725e-06, | |
| "loss": 0.1137, | |
| "num_input_tokens_seen": 188316848, | |
| "step": 1265, | |
| "train_runtime": 1263.0496, | |
| "train_tokens_per_second": 149096.958 | |
| }, | |
| { | |
| "epoch": 0.7753357753357754, | |
| "grad_norm": 0.1774333688441627, | |
| "learning_rate": 6.003870996385533e-06, | |
| "loss": 0.1025, | |
| "num_input_tokens_seen": 189066896, | |
| "step": 1270, | |
| "train_runtime": 1286.7326, | |
| "train_tokens_per_second": 146935.653 | |
| }, | |
| { | |
| "epoch": 0.7783882783882784, | |
| "grad_norm": 0.1759016160568281, | |
| "learning_rate": 5.848888922025553e-06, | |
| "loss": 0.1051, | |
| "num_input_tokens_seen": 189803936, | |
| "step": 1275, | |
| "train_runtime": 1310.5911, | |
| "train_tokens_per_second": 144823.151 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 0.20590084016837348, | |
| "learning_rate": 5.695668023047579e-06, | |
| "loss": 0.1109, | |
| "num_input_tokens_seen": 190535536, | |
| "step": 1280, | |
| "train_runtime": 1333.4095, | |
| "train_tokens_per_second": 142893.487 | |
| }, | |
| { | |
| "epoch": 0.7844932844932845, | |
| "grad_norm": 0.19479428342414234, | |
| "learning_rate": 5.544222389959164e-06, | |
| "loss": 0.1128, | |
| "num_input_tokens_seen": 191260240, | |
| "step": 1285, | |
| "train_runtime": 1357.1396, | |
| "train_tokens_per_second": 140928.938 | |
| }, | |
| { | |
| "epoch": 0.7875457875457875, | |
| "grad_norm": 0.19694909267237828, | |
| "learning_rate": 5.394565950010769e-06, | |
| "loss": 0.1095, | |
| "num_input_tokens_seen": 192011984, | |
| "step": 1290, | |
| "train_runtime": 1380.9938, | |
| "train_tokens_per_second": 139038.989 | |
| }, | |
| { | |
| "epoch": 0.7905982905982906, | |
| "grad_norm": 0.1832591111163321, | |
| "learning_rate": 5.246712465915011e-06, | |
| "loss": 0.1073, | |
| "num_input_tokens_seen": 192759088, | |
| "step": 1295, | |
| "train_runtime": 1405.1765, | |
| "train_tokens_per_second": 137177.851 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 0.187465006369937, | |
| "learning_rate": 5.100675534580973e-06, | |
| "loss": 0.105, | |
| "num_input_tokens_seen": 193513760, | |
| "step": 1300, | |
| "train_runtime": 1428.9412, | |
| "train_tokens_per_second": 135424.583 | |
| }, | |
| { | |
| "epoch": 0.7967032967032966, | |
| "grad_norm": 0.1968745130664331, | |
| "learning_rate": 4.956468585863835e-06, | |
| "loss": 0.109, | |
| "num_input_tokens_seen": 194263568, | |
| "step": 1305, | |
| "train_runtime": 1453.363, | |
| "train_tokens_per_second": 133664.867 | |
| }, | |
| { | |
| "epoch": 0.7997557997557998, | |
| "grad_norm": 0.19765784327721697, | |
| "learning_rate": 4.814104881329828e-06, | |
| "loss": 0.1117, | |
| "num_input_tokens_seen": 195013952, | |
| "step": 1310, | |
| "train_runtime": 1477.9827, | |
| "train_tokens_per_second": 131946.03 | |
| }, | |
| { | |
| "epoch": 0.8028083028083028, | |
| "grad_norm": 0.17496994678208205, | |
| "learning_rate": 4.673597513036684e-06, | |
| "loss": 0.1062, | |
| "num_input_tokens_seen": 195750912, | |
| "step": 1315, | |
| "train_runtime": 1501.5023, | |
| "train_tokens_per_second": 130370.04 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.17065780815922202, | |
| "learning_rate": 4.5349594023296446e-06, | |
| "loss": 0.1022, | |
| "num_input_tokens_seen": 196505680, | |
| "step": 1320, | |
| "train_runtime": 1525.1053, | |
| "train_tokens_per_second": 128847.284 | |
| }, | |
| { | |
| "epoch": 0.8089133089133089, | |
| "grad_norm": 0.16256600519816009, | |
| "learning_rate": 4.398203298653195e-06, | |
| "loss": 0.1007, | |
| "num_input_tokens_seen": 197249152, | |
| "step": 1325, | |
| "train_runtime": 1549.7437, | |
| "train_tokens_per_second": 127278.56 | |
| }, | |
| { | |
| "epoch": 0.811965811965812, | |
| "grad_norm": 0.19747561990614157, | |
| "learning_rate": 4.263341778378608e-06, | |
| "loss": 0.1029, | |
| "num_input_tokens_seen": 197988416, | |
| "step": 1330, | |
| "train_runtime": 1573.1372, | |
| "train_tokens_per_second": 125855.788 | |
| }, | |
| { | |
| "epoch": 0.815018315018315, | |
| "grad_norm": 0.18039332530334673, | |
| "learning_rate": 4.130387243647377e-06, | |
| "loss": 0.1118, | |
| "num_input_tokens_seen": 198719120, | |
| "step": 1335, | |
| "train_runtime": 1596.9321, | |
| "train_tokens_per_second": 124438.056 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 0.19045486866954875, | |
| "learning_rate": 3.9993519212307154e-06, | |
| "loss": 0.1084, | |
| "num_input_tokens_seen": 199458576, | |
| "step": 1340, | |
| "train_runtime": 1620.2055, | |
| "train_tokens_per_second": 123106.962 | |
| }, | |
| { | |
| "epoch": 0.8211233211233211, | |
| "grad_norm": 0.18474904246763035, | |
| "learning_rate": 3.8702478614051355e-06, | |
| "loss": 0.1037, | |
| "num_input_tokens_seen": 200186640, | |
| "step": 1345, | |
| "train_runtime": 1642.963, | |
| "train_tokens_per_second": 121844.889 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.19988581366450398, | |
| "learning_rate": 3.7430869368442837e-06, | |
| "loss": 0.1101, | |
| "num_input_tokens_seen": 200921440, | |
| "step": 1350, | |
| "train_runtime": 1666.6682, | |
| "train_tokens_per_second": 120552.751 | |
| }, | |
| { | |
| "epoch": 0.8272283272283272, | |
| "grad_norm": 0.18088192477078352, | |
| "learning_rate": 3.6178808415271158e-06, | |
| "loss": 0.1049, | |
| "num_input_tokens_seen": 201666624, | |
| "step": 1355, | |
| "train_runtime": 1690.8981, | |
| "train_tokens_per_second": 119265.983 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 0.21055860383867625, | |
| "learning_rate": 3.4946410896624817e-06, | |
| "loss": 0.1086, | |
| "num_input_tokens_seen": 202417152, | |
| "step": 1360, | |
| "train_runtime": 1714.7749, | |
| "train_tokens_per_second": 118042.987 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.17657618585374651, | |
| "learning_rate": 3.373379014630279e-06, | |
| "loss": 0.1116, | |
| "num_input_tokens_seen": 203154720, | |
| "step": 1365, | |
| "train_runtime": 1738.3279, | |
| "train_tokens_per_second": 116867.892 | |
| }, | |
| { | |
| "epoch": 0.8363858363858364, | |
| "grad_norm": 0.1674680559452127, | |
| "learning_rate": 3.254105767939175e-06, | |
| "loss": 0.1108, | |
| "num_input_tokens_seen": 203918256, | |
| "step": 1370, | |
| "train_runtime": 1762.6422, | |
| "train_tokens_per_second": 115688.967 | |
| }, | |
| { | |
| "epoch": 0.8394383394383395, | |
| "grad_norm": 0.2062576670804984, | |
| "learning_rate": 3.136832318201119e-06, | |
| "loss": 0.1136, | |
| "num_input_tokens_seen": 204669728, | |
| "step": 1375, | |
| "train_runtime": 1786.2848, | |
| "train_tokens_per_second": 114578.442 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.17294057066788282, | |
| "learning_rate": 3.0215694501226384e-06, | |
| "loss": 0.1179, | |
| "num_input_tokens_seen": 205404912, | |
| "step": 1380, | |
| "train_runtime": 1809.8132, | |
| "train_tokens_per_second": 113495.093 | |
| }, | |
| { | |
| "epoch": 0.8455433455433455, | |
| "grad_norm": 0.19577572730064424, | |
| "learning_rate": 2.9083277635130523e-06, | |
| "loss": 0.107, | |
| "num_input_tokens_seen": 206131552, | |
| "step": 1385, | |
| "train_runtime": 1833.4279, | |
| "train_tokens_per_second": 112429.591 | |
| }, | |
| { | |
| "epoch": 0.8485958485958486, | |
| "grad_norm": 0.1793400226164633, | |
| "learning_rate": 2.7971176723096986e-06, | |
| "loss": 0.1036, | |
| "num_input_tokens_seen": 206875600, | |
| "step": 1390, | |
| "train_runtime": 1856.8767, | |
| "train_tokens_per_second": 111410.523 | |
| }, | |
| { | |
| "epoch": 0.8516483516483516, | |
| "grad_norm": 0.1842128906352051, | |
| "learning_rate": 2.687949403620235e-06, | |
| "loss": 0.1029, | |
| "num_input_tokens_seen": 207611712, | |
| "step": 1395, | |
| "train_runtime": 1880.2999, | |
| "train_tokens_per_second": 110414.148 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 0.18622082174256185, | |
| "learning_rate": 2.5808329967821563e-06, | |
| "loss": 0.1059, | |
| "num_input_tokens_seen": 208359376, | |
| "step": 1400, | |
| "train_runtime": 1903.7158, | |
| "train_tokens_per_second": 109448.785 | |
| }, | |
| { | |
| "epoch": 0.8577533577533577, | |
| "grad_norm": 0.17929812350697033, | |
| "learning_rate": 2.475778302439524e-06, | |
| "loss": 0.1075, | |
| "num_input_tokens_seen": 209108560, | |
| "step": 1405, | |
| "train_runtime": 1927.8947, | |
| "train_tokens_per_second": 108464.722 | |
| }, | |
| { | |
| "epoch": 0.8608058608058609, | |
| "grad_norm": 0.1728922703261754, | |
| "learning_rate": 2.3727949816371e-06, | |
| "loss": 0.1051, | |
| "num_input_tokens_seen": 209857904, | |
| "step": 1410, | |
| "train_runtime": 1951.8471, | |
| "train_tokens_per_second": 107517.596 | |
| }, | |
| { | |
| "epoch": 0.8638583638583639, | |
| "grad_norm": 0.1963181209348031, | |
| "learning_rate": 2.271892504931905e-06, | |
| "loss": 0.1134, | |
| "num_input_tokens_seen": 210611184, | |
| "step": 1415, | |
| "train_runtime": 1975.8194, | |
| "train_tokens_per_second": 106594.349 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 0.20330441600380375, | |
| "learning_rate": 2.173080151522272e-06, | |
| "loss": 0.1103, | |
| "num_input_tokens_seen": 211378688, | |
| "step": 1420, | |
| "train_runtime": 2000.3146, | |
| "train_tokens_per_second": 105672.721 | |
| }, | |
| { | |
| "epoch": 0.86996336996337, | |
| "grad_norm": 0.20191863786022263, | |
| "learning_rate": 2.0763670083945114e-06, | |
| "loss": 0.1046, | |
| "num_input_tokens_seen": 212108720, | |
| "step": 1425, | |
| "train_runtime": 2023.7548, | |
| "train_tokens_per_second": 104809.494 | |
| }, | |
| { | |
| "epoch": 0.873015873015873, | |
| "grad_norm": 0.16923393977402115, | |
| "learning_rate": 1.9817619694872614e-06, | |
| "loss": 0.0968, | |
| "num_input_tokens_seen": 212823344, | |
| "step": 1430, | |
| "train_runtime": 2046.8086, | |
| "train_tokens_per_second": 103978.137 | |
| }, | |
| { | |
| "epoch": 0.8760683760683761, | |
| "grad_norm": 0.20476539175205516, | |
| "learning_rate": 1.8892737348735812e-06, | |
| "loss": 0.1049, | |
| "num_input_tokens_seen": 213542016, | |
| "step": 1435, | |
| "train_runtime": 2070.2089, | |
| "train_tokens_per_second": 103149.985 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.1724666650664272, | |
| "learning_rate": 1.7989108099608742e-06, | |
| "loss": 0.1052, | |
| "num_input_tokens_seen": 214312144, | |
| "step": 1440, | |
| "train_runtime": 2094.4548, | |
| "train_tokens_per_second": 102323.594 | |
| }, | |
| { | |
| "epoch": 0.8821733821733821, | |
| "grad_norm": 0.19498369123874792, | |
| "learning_rate": 1.710681504708711e-06, | |
| "loss": 0.1088, | |
| "num_input_tokens_seen": 215050304, | |
| "step": 1445, | |
| "train_runtime": 2118.208, | |
| "train_tokens_per_second": 101524.638 | |
| }, | |
| { | |
| "epoch": 0.8852258852258852, | |
| "grad_norm": 0.19562986396976825, | |
| "learning_rate": 1.624593932864632e-06, | |
| "loss": 0.1145, | |
| "num_input_tokens_seen": 215771968, | |
| "step": 1450, | |
| "train_runtime": 2141.5992, | |
| "train_tokens_per_second": 100752.732 | |
| }, | |
| { | |
| "epoch": 0.8882783882783882, | |
| "grad_norm": 0.1996337724860765, | |
| "learning_rate": 1.5406560112179864e-06, | |
| "loss": 0.1132, | |
| "num_input_tokens_seen": 216536640, | |
| "step": 1455, | |
| "train_runtime": 2165.529, | |
| "train_tokens_per_second": 99992.491 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 0.1719436350942291, | |
| "learning_rate": 1.4588754588718862e-06, | |
| "loss": 0.1166, | |
| "num_input_tokens_seen": 217266608, | |
| "step": 1460, | |
| "train_runtime": 2188.7733, | |
| "train_tokens_per_second": 99264.098 | |
| }, | |
| { | |
| "epoch": 0.8943833943833944, | |
| "grad_norm": 0.17430034822723606, | |
| "learning_rate": 1.3792597965333581e-06, | |
| "loss": 0.1027, | |
| "num_input_tokens_seen": 218012736, | |
| "step": 1465, | |
| "train_runtime": 2212.814, | |
| "train_tokens_per_second": 98522.849 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.17956274894437826, | |
| "learning_rate": 1.3018163458217076e-06, | |
| "loss": 0.1046, | |
| "num_input_tokens_seen": 218754448, | |
| "step": 1470, | |
| "train_runtime": 2236.3294, | |
| "train_tokens_per_second": 97818.525 | |
| }, | |
| { | |
| "epoch": 0.9004884004884005, | |
| "grad_norm": 0.1704009409660837, | |
| "learning_rate": 1.2265522285952013e-06, | |
| "loss": 0.1075, | |
| "num_input_tokens_seen": 219497360, | |
| "step": 1475, | |
| "train_runtime": 2260.1231, | |
| "train_tokens_per_second": 97117.435 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 0.1874020307618338, | |
| "learning_rate": 1.1534743662961477e-06, | |
| "loss": 0.1008, | |
| "num_input_tokens_seen": 220228720, | |
| "step": 1480, | |
| "train_runtime": 2283.7479, | |
| "train_tokens_per_second": 96433.027 | |
| }, | |
| { | |
| "epoch": 0.9065934065934066, | |
| "grad_norm": 0.1760433678710787, | |
| "learning_rate": 1.0825894793143721e-06, | |
| "loss": 0.0993, | |
| "num_input_tokens_seen": 220991776, | |
| "step": 1485, | |
| "train_runtime": 2307.3025, | |
| "train_tokens_per_second": 95779.283 | |
| }, | |
| { | |
| "epoch": 0.9096459096459096, | |
| "grad_norm": 0.18111730094889816, | |
| "learning_rate": 1.0139040863692023e-06, | |
| "loss": 0.1115, | |
| "num_input_tokens_seen": 221720064, | |
| "step": 1490, | |
| "train_runtime": 2330.4419, | |
| "train_tokens_per_second": 95140.78 | |
| }, | |
| { | |
| "epoch": 0.9126984126984127, | |
| "grad_norm": 0.18509605591607733, | |
| "learning_rate": 9.474245039099882e-07, | |
| "loss": 0.1071, | |
| "num_input_tokens_seen": 222445072, | |
| "step": 1495, | |
| "train_runtime": 2353.292, | |
| "train_tokens_per_second": 94525.061 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.18777176668237425, | |
| "learning_rate": 8.831568455352352e-07, | |
| "loss": 0.1101, | |
| "num_input_tokens_seen": 223186944, | |
| "step": 1500, | |
| "train_runtime": 2376.7298, | |
| "train_tokens_per_second": 93905.056 | |
| }, | |
| { | |
| "epoch": 0.9188034188034188, | |
| "grad_norm": 0.17194653176652688, | |
| "learning_rate": 8.211070214303812e-07, | |
| "loss": 0.1135, | |
| "num_input_tokens_seen": 223916464, | |
| "step": 1505, | |
| "train_runtime": 2400.5305, | |
| "train_tokens_per_second": 93277.909 | |
| }, | |
| { | |
| "epoch": 0.9218559218559218, | |
| "grad_norm": 0.17885493292976987, | |
| "learning_rate": 7.612807378242798e-07, | |
| "loss": 0.1014, | |
| "num_input_tokens_seen": 224658112, | |
| "step": 1510, | |
| "train_runtime": 2423.995, | |
| "train_tokens_per_second": 92680.93 | |
| }, | |
| { | |
| "epoch": 0.924908424908425, | |
| "grad_norm": 0.1693207277479127, | |
| "learning_rate": 7.036834964644523e-07, | |
| "loss": 0.1004, | |
| "num_input_tokens_seen": 225410512, | |
| "step": 1515, | |
| "train_runtime": 2448.5245, | |
| "train_tokens_per_second": 92059.733 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 0.20343104828153044, | |
| "learning_rate": 6.483205941111348e-07, | |
| "loss": 0.1137, | |
| "num_input_tokens_seen": 226157008, | |
| "step": 1520, | |
| "train_runtime": 2472.4417, | |
| "train_tokens_per_second": 91471.118 | |
| }, | |
| { | |
| "epoch": 0.931013431013431, | |
| "grad_norm": 0.18201843551119887, | |
| "learning_rate": 5.951971220501645e-07, | |
| "loss": 0.103, | |
| "num_input_tokens_seen": 226914128, | |
| "step": 1525, | |
| "train_runtime": 2496.0089, | |
| "train_tokens_per_second": 90910.784 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.17329078478166665, | |
| "learning_rate": 5.44317965624791e-07, | |
| "loss": 0.115, | |
| "num_input_tokens_seen": 227663280, | |
| "step": 1530, | |
| "train_runtime": 2519.4914, | |
| "train_tokens_per_second": 90360.807 | |
| }, | |
| { | |
| "epoch": 0.9371184371184371, | |
| "grad_norm": 0.18474864271351296, | |
| "learning_rate": 4.956878037864043e-07, | |
| "loss": 0.1122, | |
| "num_input_tokens_seen": 228413792, | |
| "step": 1535, | |
| "train_runtime": 2543.9995, | |
| "train_tokens_per_second": 89785.314 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 0.18473914185393342, | |
| "learning_rate": 4.4931110866424375e-07, | |
| "loss": 0.1088, | |
| "num_input_tokens_seen": 229167792, | |
| "step": 1540, | |
| "train_runtime": 2567.5024, | |
| "train_tokens_per_second": 89257.091 | |
| }, | |
| { | |
| "epoch": 0.9432234432234432, | |
| "grad_norm": 0.1974718261479754, | |
| "learning_rate": 4.0519214515413463e-07, | |
| "loss": 0.1039, | |
| "num_input_tokens_seen": 229916896, | |
| "step": 1545, | |
| "train_runtime": 2591.77, | |
| "train_tokens_per_second": 88710.377 | |
| }, | |
| { | |
| "epoch": 0.9462759462759462, | |
| "grad_norm": 0.1924445400465147, | |
| "learning_rate": 3.6333497052629115e-07, | |
| "loss": 0.101, | |
| "num_input_tokens_seen": 230648560, | |
| "step": 1550, | |
| "train_runtime": 2615.1464, | |
| "train_tokens_per_second": 88197.19 | |
| }, | |
| { | |
| "epoch": 0.9493284493284493, | |
| "grad_norm": 0.18927801239681435, | |
| "learning_rate": 3.237434340521789e-07, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 231420400, | |
| "step": 1555, | |
| "train_runtime": 2639.7326, | |
| "train_tokens_per_second": 87668.123 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.1738699800560557, | |
| "learning_rate": 2.8642117665055034e-07, | |
| "loss": 0.1005, | |
| "num_input_tokens_seen": 232166064, | |
| "step": 1560, | |
| "train_runtime": 2663.5311, | |
| "train_tokens_per_second": 87164.766 | |
| }, | |
| { | |
| "epoch": 0.9554334554334555, | |
| "grad_norm": 0.18748809492911608, | |
| "learning_rate": 2.5137163055259926e-07, | |
| "loss": 0.1143, | |
| "num_input_tokens_seen": 232916848, | |
| "step": 1565, | |
| "train_runtime": 2687.5189, | |
| "train_tokens_per_second": 86666.125 | |
| }, | |
| { | |
| "epoch": 0.9584859584859585, | |
| "grad_norm": 0.19839518757189034, | |
| "learning_rate": 2.1859801898634347e-07, | |
| "loss": 0.1135, | |
| "num_input_tokens_seen": 233659040, | |
| "step": 1570, | |
| "train_runtime": 2711.5598, | |
| "train_tokens_per_second": 86171.45 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.16971733407968162, | |
| "learning_rate": 1.881033558802009e-07, | |
| "loss": 0.1032, | |
| "num_input_tokens_seen": 234381584, | |
| "step": 1575, | |
| "train_runtime": 2734.9922, | |
| "train_tokens_per_second": 85697.35 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 0.19498878670338224, | |
| "learning_rate": 1.598904455858169e-07, | |
| "loss": 0.1128, | |
| "num_input_tokens_seen": 235104208, | |
| "step": 1580, | |
| "train_runtime": 2758.2912, | |
| "train_tokens_per_second": 85235.455 | |
| }, | |
| { | |
| "epoch": 0.9676434676434676, | |
| "grad_norm": 0.19082375605703147, | |
| "learning_rate": 1.3396188262018438e-07, | |
| "loss": 0.1011, | |
| "num_input_tokens_seen": 235824192, | |
| "step": 1585, | |
| "train_runtime": 2781.0692, | |
| "train_tokens_per_second": 84796.234 | |
| }, | |
| { | |
| "epoch": 0.9706959706959707, | |
| "grad_norm": 0.19647551153981094, | |
| "learning_rate": 1.1032005142703195e-07, | |
| "loss": 0.1099, | |
| "num_input_tokens_seen": 236586656, | |
| "step": 1590, | |
| "train_runtime": 2805.341, | |
| "train_tokens_per_second": 84334.367 | |
| }, | |
| { | |
| "epoch": 0.9737484737484737, | |
| "grad_norm": 0.22901989403021827, | |
| "learning_rate": 8.896712615756308e-08, | |
| "loss": 0.107, | |
| "num_input_tokens_seen": 237333776, | |
| "step": 1595, | |
| "train_runtime": 2829.4237, | |
| "train_tokens_per_second": 83880.605 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 0.1668688782723868, | |
| "learning_rate": 6.990507047049676e-08, | |
| "loss": 0.0996, | |
| "num_input_tokens_seen": 238071712, | |
| "step": 1600, | |
| "train_runtime": 2853.2825, | |
| "train_tokens_per_second": 83437.834 | |
| }, | |
| { | |
| "epoch": 0.9798534798534798, | |
| "grad_norm": 0.17655129222267127, | |
| "learning_rate": 5.313563735149796e-08, | |
| "loss": 0.1036, | |
| "num_input_tokens_seen": 238803952, | |
| "step": 1605, | |
| "train_runtime": 2876.8261, | |
| "train_tokens_per_second": 83009.518 | |
| }, | |
| { | |
| "epoch": 0.9829059829059829, | |
| "grad_norm": 0.19201723178645366, | |
| "learning_rate": 3.8660368951973224e-08, | |
| "loss": 0.1043, | |
| "num_input_tokens_seen": 239534384, | |
| "step": 1610, | |
| "train_runtime": 2900.0489, | |
| "train_tokens_per_second": 82596.671 | |
| }, | |
| { | |
| "epoch": 0.985958485958486, | |
| "grad_norm": 0.19124664714613782, | |
| "learning_rate": 2.648059644723144e-08, | |
| "loss": 0.1058, | |
| "num_input_tokens_seen": 240269104, | |
| "step": 1615, | |
| "train_runtime": 2923.5905, | |
| "train_tokens_per_second": 82182.886 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.19181515403854157, | |
| "learning_rate": 1.6597439914092794e-08, | |
| "loss": 0.1051, | |
| "num_input_tokens_seen": 241022096, | |
| "step": 1620, | |
| "train_runtime": 2947.6626, | |
| "train_tokens_per_second": 81767.193 | |
| }, | |
| { | |
| "epoch": 0.9920634920634921, | |
| "grad_norm": 0.19615025842124598, | |
| "learning_rate": 9.011808227865625e-09, | |
| "loss": 0.104, | |
| "num_input_tokens_seen": 241767488, | |
| "step": 1625, | |
| "train_runtime": 2971.4787, | |
| "train_tokens_per_second": 81362.685 | |
| }, | |
| { | |
| "epoch": 0.9951159951159951, | |
| "grad_norm": 0.1740769548161794, | |
| "learning_rate": 3.7243989787633105e-09, | |
| "loss": 0.1056, | |
| "num_input_tokens_seen": 242515568, | |
| "step": 1630, | |
| "train_runtime": 2995.3381, | |
| "train_tokens_per_second": 80964.338 | |
| }, | |
| { | |
| "epoch": 0.9981684981684982, | |
| "grad_norm": 0.19285773837576708, | |
| "learning_rate": 7.356984077722117e-10, | |
| "loss": 0.1037, | |
| "num_input_tokens_seen": 243258848, | |
| "step": 1635, | |
| "train_runtime": 3019.0018, | |
| "train_tokens_per_second": 80575.921 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "num_input_tokens_seen": 243707408, | |
| "step": 1638, | |
| "total_flos": 487302408765440.0, | |
| "train_loss": 0.04227780149533199, | |
| "train_runtime": 3149.3455, | |
| "train_samples_per_second": 66.569, | |
| "train_steps_per_second": 0.52 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1638, | |
| "num_input_tokens_seen": 243707408, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 487302408765440.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |