| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6472491909385113, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006472491909385114, |
| "grad_norm": 2.0135042667388916, |
| "learning_rate": 1.8e-05, |
| "loss": 1.5311, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012944983818770227, |
| "grad_norm": 1.0551478862762451, |
| "learning_rate": 3.8e-05, |
| "loss": 1.2229, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 1.101570725440979, |
| "learning_rate": 5.8e-05, |
| "loss": 0.9845, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.025889967637540454, |
| "grad_norm": 0.9952124357223511, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.8599, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.032362459546925564, |
| "grad_norm": 0.8940704464912415, |
| "learning_rate": 9.8e-05, |
| "loss": 0.8401, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 0.8487522602081299, |
| "learning_rate": 9.93979933110368e-05, |
| "loss": 0.776, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.045307443365695796, |
| "grad_norm": 0.7864758968353271, |
| "learning_rate": 9.872909698996656e-05, |
| "loss": 0.8153, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05177993527508091, |
| "grad_norm": 0.7295967936515808, |
| "learning_rate": 9.806020066889633e-05, |
| "loss": 0.8251, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 0.7075174450874329, |
| "learning_rate": 9.739130434782609e-05, |
| "loss": 0.749, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 0.6422586441040039, |
| "learning_rate": 9.672240802675586e-05, |
| "loss": 0.7428, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07119741100323625, |
| "grad_norm": 0.6745761632919312, |
| "learning_rate": 9.605351170568563e-05, |
| "loss": 0.7806, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.6769530177116394, |
| "learning_rate": 9.53846153846154e-05, |
| "loss": 0.796, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08414239482200647, |
| "grad_norm": 0.5150454044342041, |
| "learning_rate": 9.471571906354515e-05, |
| "loss": 0.7486, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09061488673139159, |
| "grad_norm": 0.6074182391166687, |
| "learning_rate": 9.404682274247491e-05, |
| "loss": 0.7783, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 0.6745702624320984, |
| "learning_rate": 9.337792642140469e-05, |
| "loss": 0.7503, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10355987055016182, |
| "grad_norm": 0.6088436841964722, |
| "learning_rate": 9.270903010033445e-05, |
| "loss": 0.7771, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11003236245954692, |
| "grad_norm": 0.6351744532585144, |
| "learning_rate": 9.204013377926422e-05, |
| "loss": 0.7554, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.5281522274017334, |
| "learning_rate": 9.137123745819398e-05, |
| "loss": 0.7635, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12297734627831715, |
| "grad_norm": 0.6833154559135437, |
| "learning_rate": 9.070234113712374e-05, |
| "loss": 0.753, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 0.5272049307823181, |
| "learning_rate": 9.003344481605351e-05, |
| "loss": 0.7384, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 0.6461851596832275, |
| "learning_rate": 8.936454849498329e-05, |
| "loss": 0.7548, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1423948220064725, |
| "grad_norm": 0.5979950428009033, |
| "learning_rate": 8.869565217391305e-05, |
| "loss": 0.7534, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1488673139158576, |
| "grad_norm": 0.6396514773368835, |
| "learning_rate": 8.802675585284281e-05, |
| "loss": 0.7436, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 0.6104384064674377, |
| "learning_rate": 8.735785953177258e-05, |
| "loss": 0.7345, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16181229773462782, |
| "grad_norm": 0.5535375475883484, |
| "learning_rate": 8.668896321070234e-05, |
| "loss": 0.7293, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16828478964401294, |
| "grad_norm": 0.5984886884689331, |
| "learning_rate": 8.602006688963212e-05, |
| "loss": 0.7173, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 0.517716646194458, |
| "learning_rate": 8.535117056856188e-05, |
| "loss": 0.7694, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18122977346278318, |
| "grad_norm": 0.5802426934242249, |
| "learning_rate": 8.468227424749165e-05, |
| "loss": 0.7505, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18770226537216828, |
| "grad_norm": 0.5289272665977478, |
| "learning_rate": 8.401337792642141e-05, |
| "loss": 0.7678, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 0.5293733477592468, |
| "learning_rate": 8.334448160535117e-05, |
| "loss": 0.7242, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20064724919093851, |
| "grad_norm": 0.566146969795227, |
| "learning_rate": 8.267558528428094e-05, |
| "loss": 0.7248, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20711974110032363, |
| "grad_norm": 0.5496853590011597, |
| "learning_rate": 8.200668896321071e-05, |
| "loss": 0.7588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 0.5420507192611694, |
| "learning_rate": 8.133779264214048e-05, |
| "loss": 0.7673, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22006472491909385, |
| "grad_norm": 0.582582950592041, |
| "learning_rate": 8.066889632107024e-05, |
| "loss": 0.7341, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22653721682847897, |
| "grad_norm": 0.5332103371620178, |
| "learning_rate": 8e-05, |
| "loss": 0.7518, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 0.5132858157157898, |
| "learning_rate": 7.933110367892977e-05, |
| "loss": 0.752, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23948220064724918, |
| "grad_norm": 0.5356752276420593, |
| "learning_rate": 7.866220735785953e-05, |
| "loss": 0.7432, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2459546925566343, |
| "grad_norm": 0.5592623353004456, |
| "learning_rate": 7.79933110367893e-05, |
| "loss": 0.7396, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 0.5103404521942139, |
| "learning_rate": 7.732441471571906e-05, |
| "loss": 0.7253, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 0.5380160212516785, |
| "learning_rate": 7.665551839464883e-05, |
| "loss": 0.7464, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26537216828478966, |
| "grad_norm": 0.5373779535293579, |
| "learning_rate": 7.598662207357859e-05, |
| "loss": 0.7377, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.523170530796051, |
| "learning_rate": 7.531772575250837e-05, |
| "loss": 0.7413, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2783171521035599, |
| "grad_norm": 0.5368985533714294, |
| "learning_rate": 7.464882943143813e-05, |
| "loss": 0.7227, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.284789644012945, |
| "grad_norm": 0.587778627872467, |
| "learning_rate": 7.39799331103679e-05, |
| "loss": 0.7289, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 0.5471896529197693, |
| "learning_rate": 7.331103678929766e-05, |
| "loss": 0.7301, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2977346278317152, |
| "grad_norm": 0.5446394085884094, |
| "learning_rate": 7.264214046822742e-05, |
| "loss": 0.7325, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3042071197411003, |
| "grad_norm": 0.5761464238166809, |
| "learning_rate": 7.197324414715719e-05, |
| "loss": 0.7583, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.5082632899284363, |
| "learning_rate": 7.130434782608696e-05, |
| "loss": 0.7382, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.31715210355987056, |
| "grad_norm": 0.5441333651542664, |
| "learning_rate": 7.063545150501673e-05, |
| "loss": 0.7411, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 0.5039440393447876, |
| "learning_rate": 6.996655518394649e-05, |
| "loss": 0.7332, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 0.5474230051040649, |
| "learning_rate": 6.929765886287626e-05, |
| "loss": 0.7216, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3365695792880259, |
| "grad_norm": 0.5532674789428711, |
| "learning_rate": 6.862876254180602e-05, |
| "loss": 0.7389, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.343042071197411, |
| "grad_norm": 0.5631316900253296, |
| "learning_rate": 6.79598662207358e-05, |
| "loss": 0.7065, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 0.45850899815559387, |
| "learning_rate": 6.729096989966556e-05, |
| "loss": 0.7472, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3559870550161812, |
| "grad_norm": 0.48253533244132996, |
| "learning_rate": 6.662207357859532e-05, |
| "loss": 0.7312, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.36245954692556637, |
| "grad_norm": 0.6004955172538757, |
| "learning_rate": 6.595317725752509e-05, |
| "loss": 0.7681, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 0.517497181892395, |
| "learning_rate": 6.528428093645485e-05, |
| "loss": 0.7581, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.37540453074433655, |
| "grad_norm": 0.5473213195800781, |
| "learning_rate": 6.461538461538462e-05, |
| "loss": 0.7459, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3818770226537217, |
| "grad_norm": 0.5596595406532288, |
| "learning_rate": 6.39464882943144e-05, |
| "loss": 0.7322, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 0.5507789254188538, |
| "learning_rate": 6.327759197324416e-05, |
| "loss": 0.7324, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3948220064724919, |
| "grad_norm": 0.5934524536132812, |
| "learning_rate": 6.260869565217392e-05, |
| "loss": 0.7072, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.40129449838187703, |
| "grad_norm": 0.5208216905593872, |
| "learning_rate": 6.193979933110368e-05, |
| "loss": 0.755, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 0.506360650062561, |
| "learning_rate": 6.127090301003345e-05, |
| "loss": 0.7244, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.41423948220064727, |
| "grad_norm": 0.5485314130783081, |
| "learning_rate": 6.0602006688963206e-05, |
| "loss": 0.7284, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.42071197411003236, |
| "grad_norm": 0.5979824066162109, |
| "learning_rate": 5.9933110367892977e-05, |
| "loss": 0.7065, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 0.5200499892234802, |
| "learning_rate": 5.926421404682274e-05, |
| "loss": 0.7124, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4336569579288026, |
| "grad_norm": 0.5502509474754333, |
| "learning_rate": 5.859531772575251e-05, |
| "loss": 0.7493, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4401294498381877, |
| "grad_norm": 0.4699322581291199, |
| "learning_rate": 5.7926421404682275e-05, |
| "loss": 0.7588, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 0.4809620678424835, |
| "learning_rate": 5.725752508361204e-05, |
| "loss": 0.7463, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.45307443365695793, |
| "grad_norm": 0.5801950693130493, |
| "learning_rate": 5.658862876254181e-05, |
| "loss": 0.7206, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.459546925566343, |
| "grad_norm": 0.5444105267524719, |
| "learning_rate": 5.591973244147157e-05, |
| "loss": 0.7197, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 0.4769653379917145, |
| "learning_rate": 5.525083612040134e-05, |
| "loss": 0.7359, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.47249190938511326, |
| "grad_norm": 0.5103979706764221, |
| "learning_rate": 5.458193979933111e-05, |
| "loss": 0.769, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.47896440129449835, |
| "grad_norm": 0.543662965297699, |
| "learning_rate": 5.391304347826087e-05, |
| "loss": 0.7618, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 0.5364013314247131, |
| "learning_rate": 5.3244147157190635e-05, |
| "loss": 0.72, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4919093851132686, |
| "grad_norm": 0.6091406345367432, |
| "learning_rate": 5.2575250836120406e-05, |
| "loss": 0.6965, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.49838187702265374, |
| "grad_norm": 0.5977626442909241, |
| "learning_rate": 5.190635451505017e-05, |
| "loss": 0.7172, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 0.45984843373298645, |
| "learning_rate": 5.1237458193979934e-05, |
| "loss": 0.7391, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.511326860841424, |
| "grad_norm": 0.49561065435409546, |
| "learning_rate": 5.0568561872909704e-05, |
| "loss": 0.7526, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 0.4905160367488861, |
| "learning_rate": 4.989966555183947e-05, |
| "loss": 0.7278, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 0.5338721871376038, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 0.7297, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5307443365695793, |
| "grad_norm": 0.5137319564819336, |
| "learning_rate": 4.8561872909698996e-05, |
| "loss": 0.7863, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5372168284789643, |
| "grad_norm": 0.4649944603443146, |
| "learning_rate": 4.789297658862876e-05, |
| "loss": 0.7525, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 0.5467334985733032, |
| "learning_rate": 4.722408026755853e-05, |
| "loss": 0.7144, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5501618122977346, |
| "grad_norm": 0.47389763593673706, |
| "learning_rate": 4.6555183946488294e-05, |
| "loss": 0.7217, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5566343042071198, |
| "grad_norm": 0.45604580640792847, |
| "learning_rate": 4.5886287625418065e-05, |
| "loss": 0.7243, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 0.548736572265625, |
| "learning_rate": 4.521739130434783e-05, |
| "loss": 0.7078, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.56957928802589, |
| "grad_norm": 0.5387297868728638, |
| "learning_rate": 4.454849498327759e-05, |
| "loss": 0.6778, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5760517799352751, |
| "grad_norm": 0.5060684680938721, |
| "learning_rate": 4.3879598662207363e-05, |
| "loss": 0.7088, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 0.5241347551345825, |
| "learning_rate": 4.321070234113713e-05, |
| "loss": 0.7232, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5889967637540453, |
| "grad_norm": 0.5504106283187866, |
| "learning_rate": 4.254180602006689e-05, |
| "loss": 0.7118, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5954692556634305, |
| "grad_norm": 0.47307100892066956, |
| "learning_rate": 4.187290969899666e-05, |
| "loss": 0.7377, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 0.5132865905761719, |
| "learning_rate": 4.1204013377926426e-05, |
| "loss": 0.7237, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6084142394822006, |
| "grad_norm": 0.5692107677459717, |
| "learning_rate": 4.053511705685619e-05, |
| "loss": 0.6933, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6148867313915858, |
| "grad_norm": 0.5088536143302917, |
| "learning_rate": 3.986622073578595e-05, |
| "loss": 0.7094, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.47623276710510254, |
| "learning_rate": 3.919732441471572e-05, |
| "loss": 0.7219, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.627831715210356, |
| "grad_norm": 0.48927319049835205, |
| "learning_rate": 3.852842809364549e-05, |
| "loss": 0.7324, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6343042071197411, |
| "grad_norm": 0.4764053523540497, |
| "learning_rate": 3.785953177257525e-05, |
| "loss": 0.7286, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 0.4962717592716217, |
| "learning_rate": 3.7190635451505016e-05, |
| "loss": 0.7471, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 0.5014144778251648, |
| "learning_rate": 3.6521739130434786e-05, |
| "loss": 0.6966, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1545, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.626219816245002e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|