| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.970873786407767, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006472491909385114, |
| "grad_norm": 2.0135042667388916, |
| "learning_rate": 1.8e-05, |
| "loss": 1.5311, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012944983818770227, |
| "grad_norm": 1.0551478862762451, |
| "learning_rate": 3.8e-05, |
| "loss": 1.2229, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 1.101570725440979, |
| "learning_rate": 5.8e-05, |
| "loss": 0.9845, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.025889967637540454, |
| "grad_norm": 0.9952124357223511, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.8599, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.032362459546925564, |
| "grad_norm": 0.8940704464912415, |
| "learning_rate": 9.8e-05, |
| "loss": 0.8401, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 0.8487522602081299, |
| "learning_rate": 9.93979933110368e-05, |
| "loss": 0.776, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.045307443365695796, |
| "grad_norm": 0.7864758968353271, |
| "learning_rate": 9.872909698996656e-05, |
| "loss": 0.8153, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05177993527508091, |
| "grad_norm": 0.7295967936515808, |
| "learning_rate": 9.806020066889633e-05, |
| "loss": 0.8251, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 0.7075174450874329, |
| "learning_rate": 9.739130434782609e-05, |
| "loss": 0.749, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 0.6422586441040039, |
| "learning_rate": 9.672240802675586e-05, |
| "loss": 0.7428, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07119741100323625, |
| "grad_norm": 0.6745761632919312, |
| "learning_rate": 9.605351170568563e-05, |
| "loss": 0.7806, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.6769530177116394, |
| "learning_rate": 9.53846153846154e-05, |
| "loss": 0.796, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08414239482200647, |
| "grad_norm": 0.5150454044342041, |
| "learning_rate": 9.471571906354515e-05, |
| "loss": 0.7486, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09061488673139159, |
| "grad_norm": 0.6074182391166687, |
| "learning_rate": 9.404682274247491e-05, |
| "loss": 0.7783, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 0.6745702624320984, |
| "learning_rate": 9.337792642140469e-05, |
| "loss": 0.7503, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10355987055016182, |
| "grad_norm": 0.6088436841964722, |
| "learning_rate": 9.270903010033445e-05, |
| "loss": 0.7771, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11003236245954692, |
| "grad_norm": 0.6351744532585144, |
| "learning_rate": 9.204013377926422e-05, |
| "loss": 0.7554, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.5281522274017334, |
| "learning_rate": 9.137123745819398e-05, |
| "loss": 0.7635, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12297734627831715, |
| "grad_norm": 0.6833154559135437, |
| "learning_rate": 9.070234113712374e-05, |
| "loss": 0.753, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 0.5272049307823181, |
| "learning_rate": 9.003344481605351e-05, |
| "loss": 0.7384, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 0.6461851596832275, |
| "learning_rate": 8.936454849498329e-05, |
| "loss": 0.7548, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1423948220064725, |
| "grad_norm": 0.5979950428009033, |
| "learning_rate": 8.869565217391305e-05, |
| "loss": 0.7534, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1488673139158576, |
| "grad_norm": 0.6396514773368835, |
| "learning_rate": 8.802675585284281e-05, |
| "loss": 0.7436, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 0.6104384064674377, |
| "learning_rate": 8.735785953177258e-05, |
| "loss": 0.7345, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16181229773462782, |
| "grad_norm": 0.5535375475883484, |
| "learning_rate": 8.668896321070234e-05, |
| "loss": 0.7293, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16828478964401294, |
| "grad_norm": 0.5984886884689331, |
| "learning_rate": 8.602006688963212e-05, |
| "loss": 0.7173, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 0.517716646194458, |
| "learning_rate": 8.535117056856188e-05, |
| "loss": 0.7694, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18122977346278318, |
| "grad_norm": 0.5802426934242249, |
| "learning_rate": 8.468227424749165e-05, |
| "loss": 0.7505, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18770226537216828, |
| "grad_norm": 0.5289272665977478, |
| "learning_rate": 8.401337792642141e-05, |
| "loss": 0.7678, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 0.5293733477592468, |
| "learning_rate": 8.334448160535117e-05, |
| "loss": 0.7242, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20064724919093851, |
| "grad_norm": 0.566146969795227, |
| "learning_rate": 8.267558528428094e-05, |
| "loss": 0.7248, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20711974110032363, |
| "grad_norm": 0.5496853590011597, |
| "learning_rate": 8.200668896321071e-05, |
| "loss": 0.7588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 0.5420507192611694, |
| "learning_rate": 8.133779264214048e-05, |
| "loss": 0.7673, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22006472491909385, |
| "grad_norm": 0.582582950592041, |
| "learning_rate": 8.066889632107024e-05, |
| "loss": 0.7341, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22653721682847897, |
| "grad_norm": 0.5332103371620178, |
| "learning_rate": 8e-05, |
| "loss": 0.7518, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 0.5132858157157898, |
| "learning_rate": 7.933110367892977e-05, |
| "loss": 0.752, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23948220064724918, |
| "grad_norm": 0.5356752276420593, |
| "learning_rate": 7.866220735785953e-05, |
| "loss": 0.7432, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2459546925566343, |
| "grad_norm": 0.5592623353004456, |
| "learning_rate": 7.79933110367893e-05, |
| "loss": 0.7396, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 0.5103404521942139, |
| "learning_rate": 7.732441471571906e-05, |
| "loss": 0.7253, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 0.5380160212516785, |
| "learning_rate": 7.665551839464883e-05, |
| "loss": 0.7464, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26537216828478966, |
| "grad_norm": 0.5373779535293579, |
| "learning_rate": 7.598662207357859e-05, |
| "loss": 0.7377, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.523170530796051, |
| "learning_rate": 7.531772575250837e-05, |
| "loss": 0.7413, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2783171521035599, |
| "grad_norm": 0.5368985533714294, |
| "learning_rate": 7.464882943143813e-05, |
| "loss": 0.7227, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.284789644012945, |
| "grad_norm": 0.587778627872467, |
| "learning_rate": 7.39799331103679e-05, |
| "loss": 0.7289, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 0.5471896529197693, |
| "learning_rate": 7.331103678929766e-05, |
| "loss": 0.7301, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2977346278317152, |
| "grad_norm": 0.5446394085884094, |
| "learning_rate": 7.264214046822742e-05, |
| "loss": 0.7325, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3042071197411003, |
| "grad_norm": 0.5761464238166809, |
| "learning_rate": 7.197324414715719e-05, |
| "loss": 0.7583, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.5082632899284363, |
| "learning_rate": 7.130434782608696e-05, |
| "loss": 0.7382, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.31715210355987056, |
| "grad_norm": 0.5441333651542664, |
| "learning_rate": 7.063545150501673e-05, |
| "loss": 0.7411, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 0.5039440393447876, |
| "learning_rate": 6.996655518394649e-05, |
| "loss": 0.7332, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 0.5474230051040649, |
| "learning_rate": 6.929765886287626e-05, |
| "loss": 0.7216, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3365695792880259, |
| "grad_norm": 0.5532674789428711, |
| "learning_rate": 6.862876254180602e-05, |
| "loss": 0.7389, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.343042071197411, |
| "grad_norm": 0.5631316900253296, |
| "learning_rate": 6.79598662207358e-05, |
| "loss": 0.7065, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 0.45850899815559387, |
| "learning_rate": 6.729096989966556e-05, |
| "loss": 0.7472, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3559870550161812, |
| "grad_norm": 0.48253533244132996, |
| "learning_rate": 6.662207357859532e-05, |
| "loss": 0.7312, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.36245954692556637, |
| "grad_norm": 0.6004955172538757, |
| "learning_rate": 6.595317725752509e-05, |
| "loss": 0.7681, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 0.517497181892395, |
| "learning_rate": 6.528428093645485e-05, |
| "loss": 0.7581, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.37540453074433655, |
| "grad_norm": 0.5473213195800781, |
| "learning_rate": 6.461538461538462e-05, |
| "loss": 0.7459, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3818770226537217, |
| "grad_norm": 0.5596595406532288, |
| "learning_rate": 6.39464882943144e-05, |
| "loss": 0.7322, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 0.5507789254188538, |
| "learning_rate": 6.327759197324416e-05, |
| "loss": 0.7324, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3948220064724919, |
| "grad_norm": 0.5934524536132812, |
| "learning_rate": 6.260869565217392e-05, |
| "loss": 0.7072, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.40129449838187703, |
| "grad_norm": 0.5208216905593872, |
| "learning_rate": 6.193979933110368e-05, |
| "loss": 0.755, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 0.506360650062561, |
| "learning_rate": 6.127090301003345e-05, |
| "loss": 0.7244, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.41423948220064727, |
| "grad_norm": 0.5485314130783081, |
| "learning_rate": 6.0602006688963206e-05, |
| "loss": 0.7284, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.42071197411003236, |
| "grad_norm": 0.5979824066162109, |
| "learning_rate": 5.9933110367892977e-05, |
| "loss": 0.7065, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 0.5200499892234802, |
| "learning_rate": 5.926421404682274e-05, |
| "loss": 0.7124, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4336569579288026, |
| "grad_norm": 0.5502509474754333, |
| "learning_rate": 5.859531772575251e-05, |
| "loss": 0.7493, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4401294498381877, |
| "grad_norm": 0.4699322581291199, |
| "learning_rate": 5.7926421404682275e-05, |
| "loss": 0.7588, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 0.4809620678424835, |
| "learning_rate": 5.725752508361204e-05, |
| "loss": 0.7463, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.45307443365695793, |
| "grad_norm": 0.5801950693130493, |
| "learning_rate": 5.658862876254181e-05, |
| "loss": 0.7206, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.459546925566343, |
| "grad_norm": 0.5444105267524719, |
| "learning_rate": 5.591973244147157e-05, |
| "loss": 0.7197, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 0.4769653379917145, |
| "learning_rate": 5.525083612040134e-05, |
| "loss": 0.7359, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.47249190938511326, |
| "grad_norm": 0.5103979706764221, |
| "learning_rate": 5.458193979933111e-05, |
| "loss": 0.769, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.47896440129449835, |
| "grad_norm": 0.543662965297699, |
| "learning_rate": 5.391304347826087e-05, |
| "loss": 0.7618, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 0.5364013314247131, |
| "learning_rate": 5.3244147157190635e-05, |
| "loss": 0.72, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4919093851132686, |
| "grad_norm": 0.6091406345367432, |
| "learning_rate": 5.2575250836120406e-05, |
| "loss": 0.6965, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.49838187702265374, |
| "grad_norm": 0.5977626442909241, |
| "learning_rate": 5.190635451505017e-05, |
| "loss": 0.7172, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 0.45984843373298645, |
| "learning_rate": 5.1237458193979934e-05, |
| "loss": 0.7391, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.511326860841424, |
| "grad_norm": 0.49561065435409546, |
| "learning_rate": 5.0568561872909704e-05, |
| "loss": 0.7526, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 0.4905160367488861, |
| "learning_rate": 4.989966555183947e-05, |
| "loss": 0.7278, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 0.5338721871376038, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 0.7297, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5307443365695793, |
| "grad_norm": 0.5137319564819336, |
| "learning_rate": 4.8561872909698996e-05, |
| "loss": 0.7863, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5372168284789643, |
| "grad_norm": 0.4649944603443146, |
| "learning_rate": 4.789297658862876e-05, |
| "loss": 0.7525, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 0.5467334985733032, |
| "learning_rate": 4.722408026755853e-05, |
| "loss": 0.7144, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5501618122977346, |
| "grad_norm": 0.47389763593673706, |
| "learning_rate": 4.6555183946488294e-05, |
| "loss": 0.7217, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5566343042071198, |
| "grad_norm": 0.45604580640792847, |
| "learning_rate": 4.5886287625418065e-05, |
| "loss": 0.7243, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 0.548736572265625, |
| "learning_rate": 4.521739130434783e-05, |
| "loss": 0.7078, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.56957928802589, |
| "grad_norm": 0.5387297868728638, |
| "learning_rate": 4.454849498327759e-05, |
| "loss": 0.6778, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5760517799352751, |
| "grad_norm": 0.5060684680938721, |
| "learning_rate": 4.3879598662207363e-05, |
| "loss": 0.7088, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 0.5241347551345825, |
| "learning_rate": 4.321070234113713e-05, |
| "loss": 0.7232, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5889967637540453, |
| "grad_norm": 0.5504106283187866, |
| "learning_rate": 4.254180602006689e-05, |
| "loss": 0.7118, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5954692556634305, |
| "grad_norm": 0.47307100892066956, |
| "learning_rate": 4.187290969899666e-05, |
| "loss": 0.7377, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 0.5132865905761719, |
| "learning_rate": 4.1204013377926426e-05, |
| "loss": 0.7237, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6084142394822006, |
| "grad_norm": 0.5692107677459717, |
| "learning_rate": 4.053511705685619e-05, |
| "loss": 0.6933, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6148867313915858, |
| "grad_norm": 0.5088536143302917, |
| "learning_rate": 3.986622073578595e-05, |
| "loss": 0.7094, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.47623276710510254, |
| "learning_rate": 3.919732441471572e-05, |
| "loss": 0.7219, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.627831715210356, |
| "grad_norm": 0.48927319049835205, |
| "learning_rate": 3.852842809364549e-05, |
| "loss": 0.7324, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6343042071197411, |
| "grad_norm": 0.4764053523540497, |
| "learning_rate": 3.785953177257525e-05, |
| "loss": 0.7286, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 0.4962717592716217, |
| "learning_rate": 3.7190635451505016e-05, |
| "loss": 0.7471, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 0.5014144778251648, |
| "learning_rate": 3.6521739130434786e-05, |
| "loss": 0.6966, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6537216828478964, |
| "grad_norm": 0.44654181599617004, |
| "learning_rate": 3.585284280936455e-05, |
| "loss": 0.7057, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 0.49160680174827576, |
| "learning_rate": 3.5183946488294314e-05, |
| "loss": 0.7016, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.5010440349578857, |
| "learning_rate": 3.4515050167224085e-05, |
| "loss": 0.7132, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6731391585760518, |
| "grad_norm": 0.4979737102985382, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 0.7243, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 0.45698127150535583, |
| "learning_rate": 3.317725752508362e-05, |
| "loss": 0.7535, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.686084142394822, |
| "grad_norm": 0.47817227244377136, |
| "learning_rate": 3.250836120401338e-05, |
| "loss": 0.7749, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6925566343042071, |
| "grad_norm": 0.4754548966884613, |
| "learning_rate": 3.183946488294314e-05, |
| "loss": 0.7229, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 0.5484136343002319, |
| "learning_rate": 3.117056856187291e-05, |
| "loss": 0.7191, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7055016181229773, |
| "grad_norm": 0.5162729620933533, |
| "learning_rate": 3.0501672240802674e-05, |
| "loss": 0.7293, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7119741100323624, |
| "grad_norm": 0.5177437663078308, |
| "learning_rate": 2.9832775919732442e-05, |
| "loss": 0.7254, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7184466019417476, |
| "grad_norm": 0.5128371119499207, |
| "learning_rate": 2.916387959866221e-05, |
| "loss": 0.728, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7249190938511327, |
| "grad_norm": 0.5296499729156494, |
| "learning_rate": 2.8494983277591973e-05, |
| "loss": 0.6976, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7313915857605178, |
| "grad_norm": 0.5406925678253174, |
| "learning_rate": 2.782608695652174e-05, |
| "loss": 0.7313, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 0.5372524261474609, |
| "learning_rate": 2.7157190635451507e-05, |
| "loss": 0.7126, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7443365695792881, |
| "grad_norm": 0.5130836367607117, |
| "learning_rate": 2.6488294314381275e-05, |
| "loss": 0.7178, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7508090614886731, |
| "grad_norm": 0.636240541934967, |
| "learning_rate": 2.581939799331104e-05, |
| "loss": 0.6861, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7572815533980582, |
| "grad_norm": 0.4897923767566681, |
| "learning_rate": 2.5150501672240806e-05, |
| "loss": 0.6889, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7637540453074434, |
| "grad_norm": 0.4422953128814697, |
| "learning_rate": 2.448160535117057e-05, |
| "loss": 0.7314, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7702265372168284, |
| "grad_norm": 0.4565708041191101, |
| "learning_rate": 2.3812709030100337e-05, |
| "loss": 0.7111, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 0.5054651498794556, |
| "learning_rate": 2.31438127090301e-05, |
| "loss": 0.742, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7831715210355987, |
| "grad_norm": 0.5192699432373047, |
| "learning_rate": 2.2474916387959868e-05, |
| "loss": 0.7211, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7896440129449838, |
| "grad_norm": 0.4859030842781067, |
| "learning_rate": 2.1806020066889632e-05, |
| "loss": 0.6962, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7961165048543689, |
| "grad_norm": 0.4740995466709137, |
| "learning_rate": 2.11371237458194e-05, |
| "loss": 0.6868, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8025889967637541, |
| "grad_norm": 0.4981386959552765, |
| "learning_rate": 2.0468227424749163e-05, |
| "loss": 0.7222, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8090614886731392, |
| "grad_norm": 0.5139773488044739, |
| "learning_rate": 1.979933110367893e-05, |
| "loss": 0.7247, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 0.5492443442344666, |
| "learning_rate": 1.9130434782608697e-05, |
| "loss": 0.7195, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8220064724919094, |
| "grad_norm": 0.46029913425445557, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.6993, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8284789644012945, |
| "grad_norm": 0.45861348509788513, |
| "learning_rate": 1.779264214046823e-05, |
| "loss": 0.7214, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8349514563106796, |
| "grad_norm": 0.48776987195014954, |
| "learning_rate": 1.7123745819397992e-05, |
| "loss": 0.7554, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8414239482200647, |
| "grad_norm": 0.44210392236709595, |
| "learning_rate": 1.645484949832776e-05, |
| "loss": 0.7155, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8478964401294499, |
| "grad_norm": 0.45907357335090637, |
| "learning_rate": 1.5785953177257527e-05, |
| "loss": 0.7345, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8543689320388349, |
| "grad_norm": 0.47504889965057373, |
| "learning_rate": 1.5117056856187292e-05, |
| "loss": 0.6969, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.86084142394822, |
| "grad_norm": 0.553654670715332, |
| "learning_rate": 1.4448160535117058e-05, |
| "loss": 0.6983, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8673139158576052, |
| "grad_norm": 0.5136590003967285, |
| "learning_rate": 1.3779264214046825e-05, |
| "loss": 0.7423, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 0.44719818234443665, |
| "learning_rate": 1.3110367892976589e-05, |
| "loss": 0.6991, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8802588996763754, |
| "grad_norm": 0.5215861797332764, |
| "learning_rate": 1.2441471571906355e-05, |
| "loss": 0.7171, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8867313915857605, |
| "grad_norm": 0.48722243309020996, |
| "learning_rate": 1.177257525083612e-05, |
| "loss": 0.7072, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8932038834951457, |
| "grad_norm": 0.46748003363609314, |
| "learning_rate": 1.1103678929765887e-05, |
| "loss": 0.7326, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8996763754045307, |
| "grad_norm": 0.47341325879096985, |
| "learning_rate": 1.0434782608695651e-05, |
| "loss": 0.7437, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9061488673139159, |
| "grad_norm": 0.7238942980766296, |
| "learning_rate": 9.765886287625419e-06, |
| "loss": 0.6979, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.912621359223301, |
| "grad_norm": 0.4732665717601776, |
| "learning_rate": 9.096989966555184e-06, |
| "loss": 0.7304, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.919093851132686, |
| "grad_norm": 0.5634979009628296, |
| "learning_rate": 8.42809364548495e-06, |
| "loss": 0.7154, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9255663430420712, |
| "grad_norm": 0.5499462485313416, |
| "learning_rate": 7.759197324414715e-06, |
| "loss": 0.7049, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 0.4869629144668579, |
| "learning_rate": 7.090301003344482e-06, |
| "loss": 0.7045, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.9385113268608414, |
| "grad_norm": 0.4725572168827057, |
| "learning_rate": 6.421404682274247e-06, |
| "loss": 0.7077, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9449838187702265, |
| "grad_norm": 0.46987947821617126, |
| "learning_rate": 5.7525083612040135e-06, |
| "loss": 0.7217, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9514563106796117, |
| "grad_norm": 0.5289896130561829, |
| "learning_rate": 5.08361204013378e-06, |
| "loss": 0.6953, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9579288025889967, |
| "grad_norm": 0.545849621295929, |
| "learning_rate": 4.4147157190635455e-06, |
| "loss": 0.7042, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9644012944983819, |
| "grad_norm": 0.556922197341919, |
| "learning_rate": 3.745819397993311e-06, |
| "loss": 0.7215, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 0.4752863645553589, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.705, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1545, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.4469720641542554e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|