| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 506, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.019762845849802372, |
| "grad_norm": 1.1964468955993652, |
| "learning_rate": 1.875e-06, |
| "loss": 1.3514, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.039525691699604744, |
| "grad_norm": 0.7849017381668091, |
| "learning_rate": 4.21875e-06, |
| "loss": 1.3223, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05928853754940711, |
| "grad_norm": 0.6334398984909058, |
| "learning_rate": 6.5625e-06, |
| "loss": 1.3177, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07905138339920949, |
| "grad_norm": 0.611282467842102, |
| "learning_rate": 8.90625e-06, |
| "loss": 1.325, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09881422924901186, |
| "grad_norm": 0.5317814350128174, |
| "learning_rate": 1.125e-05, |
| "loss": 1.2733, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11857707509881422, |
| "grad_norm": 0.567980170249939, |
| "learning_rate": 1.359375e-05, |
| "loss": 1.2692, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1383399209486166, |
| "grad_norm": 0.5414619445800781, |
| "learning_rate": 1.59375e-05, |
| "loss": 1.2723, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.15810276679841898, |
| "grad_norm": 0.6008857488632202, |
| "learning_rate": 1.828125e-05, |
| "loss": 1.254, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17786561264822134, |
| "grad_norm": 0.44546812772750854, |
| "learning_rate": 2.0625e-05, |
| "loss": 1.2629, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1976284584980237, |
| "grad_norm": 0.4174504280090332, |
| "learning_rate": 2.296875e-05, |
| "loss": 1.1975, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.4895493686199188, |
| "learning_rate": 2.5312500000000002e-05, |
| "loss": 1.2347, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.23715415019762845, |
| "grad_norm": 0.47120198607444763, |
| "learning_rate": 2.765625e-05, |
| "loss": 1.1846, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.25691699604743085, |
| "grad_norm": 0.5295073986053467, |
| "learning_rate": 3e-05, |
| "loss": 1.1923, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2766798418972332, |
| "grad_norm": 0.5054467916488647, |
| "learning_rate": 2.999871705271597e-05, |
| "loss": 1.1632, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2964426877470356, |
| "grad_norm": 0.5323383808135986, |
| "learning_rate": 2.9994868430324378e-05, |
| "loss": 1.1303, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.31620553359683795, |
| "grad_norm": 0.5701656341552734, |
| "learning_rate": 2.9988454791169173e-05, |
| "loss": 1.11, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3359683794466403, |
| "grad_norm": 0.5317907333374023, |
| "learning_rate": 2.9979477232365156e-05, |
| "loss": 1.1302, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3557312252964427, |
| "grad_norm": 0.609390139579773, |
| "learning_rate": 2.996793728961027e-05, |
| "loss": 1.0821, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.37549407114624506, |
| "grad_norm": 0.5419305562973022, |
| "learning_rate": 2.995383693692296e-05, |
| "loss": 1.0277, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3952569169960474, |
| "grad_norm": 0.5599208474159241, |
| "learning_rate": 2.9937178586304434e-05, |
| "loss": 1.0886, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4150197628458498, |
| "grad_norm": 0.6469520330429077, |
| "learning_rate": 2.991796508732613e-05, |
| "loss": 1.0063, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.6948160529136658, |
| "learning_rate": 2.989619972664222e-05, |
| "loss": 0.9859, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.6252328753471375, |
| "learning_rate": 2.9871886227427417e-05, |
| "loss": 1.0894, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4743083003952569, |
| "grad_norm": 0.6875498294830322, |
| "learning_rate": 2.9845028748740103e-05, |
| "loss": 0.9858, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.49407114624505927, |
| "grad_norm": 0.741546630859375, |
| "learning_rate": 2.9815631884810848e-05, |
| "loss": 1.0052, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5138339920948617, |
| "grad_norm": 0.7393259406089783, |
| "learning_rate": 2.978370066425655e-05, |
| "loss": 0.9504, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5335968379446641, |
| "grad_norm": 0.751603364944458, |
| "learning_rate": 2.9749240549220235e-05, |
| "loss": 0.9486, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5533596837944664, |
| "grad_norm": 0.7952867150306702, |
| "learning_rate": 2.97122574344367e-05, |
| "loss": 0.9134, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5731225296442688, |
| "grad_norm": 0.8205041289329529, |
| "learning_rate": 2.9672757646224177e-05, |
| "loss": 0.9495, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5928853754940712, |
| "grad_norm": 0.8304847478866577, |
| "learning_rate": 2.963074794140212e-05, |
| "loss": 0.9298, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6126482213438735, |
| "grad_norm": 0.8987076878547668, |
| "learning_rate": 2.9586235506135434e-05, |
| "loss": 0.8791, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6324110671936759, |
| "grad_norm": 0.842399001121521, |
| "learning_rate": 2.9539227954705167e-05, |
| "loss": 0.8508, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.8677818179130554, |
| "learning_rate": 2.948973332820605e-05, |
| "loss": 0.9138, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6719367588932806, |
| "grad_norm": 0.8640912175178528, |
| "learning_rate": 2.943776009317097e-05, |
| "loss": 0.9166, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.691699604743083, |
| "grad_norm": 0.7900509238243103, |
| "learning_rate": 2.9383317140122685e-05, |
| "loss": 0.8462, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7114624505928854, |
| "grad_norm": 0.8746598362922668, |
| "learning_rate": 2.9326413782053035e-05, |
| "loss": 0.8937, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7312252964426877, |
| "grad_norm": 0.868657648563385, |
| "learning_rate": 2.926705975282984e-05, |
| "loss": 0.8369, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7509881422924901, |
| "grad_norm": 0.8763641715049744, |
| "learning_rate": 2.9205265205531844e-05, |
| "loss": 0.8472, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7707509881422925, |
| "grad_norm": 0.881182074546814, |
| "learning_rate": 2.9141040710711933e-05, |
| "loss": 0.8424, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7905138339920948, |
| "grad_norm": 0.9215484261512756, |
| "learning_rate": 2.9074397254588935e-05, |
| "loss": 0.7925, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8102766798418972, |
| "grad_norm": 1.0325407981872559, |
| "learning_rate": 2.9005346237168318e-05, |
| "loss": 0.7892, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8300395256916996, |
| "grad_norm": 1.0284020900726318, |
| "learning_rate": 2.8933899470292115e-05, |
| "loss": 0.7682, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.849802371541502, |
| "grad_norm": 1.1372671127319336, |
| "learning_rate": 2.88600691756184e-05, |
| "loss": 0.7338, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.98224937915802, |
| "learning_rate": 2.878386798253064e-05, |
| "loss": 0.7693, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8893280632411067, |
| "grad_norm": 0.9914873838424683, |
| "learning_rate": 2.870530892597733e-05, |
| "loss": 0.7752, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 1.075027585029602, |
| "learning_rate": 2.862440544424224e-05, |
| "loss": 0.7618, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9288537549407114, |
| "grad_norm": 0.9851027727127075, |
| "learning_rate": 2.854117137664565e-05, |
| "loss": 0.7553, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.9486166007905138, |
| "grad_norm": 1.161730170249939, |
| "learning_rate": 2.8455620961177027e-05, |
| "loss": 0.7143, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9683794466403162, |
| "grad_norm": 0.9710651636123657, |
| "learning_rate": 2.8367768832059457e-05, |
| "loss": 0.7061, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9881422924901185, |
| "grad_norm": 1.1001405715942383, |
| "learning_rate": 2.8277630017246343e-05, |
| "loss": 0.6668, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.007905138339921, |
| "grad_norm": 1.0591286420822144, |
| "learning_rate": 2.8185219935850692e-05, |
| "loss": 0.6975, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.0276679841897234, |
| "grad_norm": 0.9946438670158386, |
| "learning_rate": 2.809055439550757e-05, |
| "loss": 0.6225, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0474308300395256, |
| "grad_norm": 1.138195276260376, |
| "learning_rate": 2.7993649589670026e-05, |
| "loss": 0.5956, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.0671936758893281, |
| "grad_norm": 1.0832058191299438, |
| "learning_rate": 2.7894522094839056e-05, |
| "loss": 0.6119, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0869565217391304, |
| "grad_norm": 1.0984829664230347, |
| "learning_rate": 2.7793188867728026e-05, |
| "loss": 0.6447, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.1067193675889329, |
| "grad_norm": 1.2497950792312622, |
| "learning_rate": 2.7689667242362072e-05, |
| "loss": 0.5223, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1264822134387351, |
| "grad_norm": 1.0441067218780518, |
| "learning_rate": 2.758397492711294e-05, |
| "loss": 0.5637, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.1462450592885376, |
| "grad_norm": 1.0681952238082886, |
| "learning_rate": 2.74761300016698e-05, |
| "loss": 0.5286, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1660079051383399, |
| "grad_norm": 1.1822644472122192, |
| "learning_rate": 2.7366150913946547e-05, |
| "loss": 0.5217, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.1857707509881423, |
| "grad_norm": 1.249902367591858, |
| "learning_rate": 2.72540564769261e-05, |
| "loss": 0.5591, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2055335968379446, |
| "grad_norm": 1.4187573194503784, |
| "learning_rate": 2.7139865865442265e-05, |
| "loss": 0.5904, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.225296442687747, |
| "grad_norm": 1.1566301584243774, |
| "learning_rate": 2.7023598612899686e-05, |
| "loss": 0.518, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.2450592885375493, |
| "grad_norm": 1.1384133100509644, |
| "learning_rate": 2.690527460793248e-05, |
| "loss": 0.5171, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.2648221343873518, |
| "grad_norm": 1.2240594625473022, |
| "learning_rate": 2.6784914091002096e-05, |
| "loss": 0.5475, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2845849802371543, |
| "grad_norm": 1.144561529159546, |
| "learning_rate": 2.6662537650934966e-05, |
| "loss": 0.5308, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 1.1269924640655518, |
| "learning_rate": 2.6538166221400612e-05, |
| "loss": 0.5528, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3241106719367588, |
| "grad_norm": 1.1573723554611206, |
| "learning_rate": 2.6411821077330734e-05, |
| "loss": 0.5193, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.3438735177865613, |
| "grad_norm": 1.1979612112045288, |
| "learning_rate": 2.6283523831279923e-05, |
| "loss": 0.5359, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 1.1988822221755981, |
| "learning_rate": 2.615329642972863e-05, |
| "loss": 0.5649, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.383399209486166, |
| "grad_norm": 1.0966296195983887, |
| "learning_rate": 2.6021161149328996e-05, |
| "loss": 0.5794, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4031620553359683, |
| "grad_norm": 1.145140528678894, |
| "learning_rate": 2.588714059309425e-05, |
| "loss": 0.489, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.4229249011857708, |
| "grad_norm": 1.246820330619812, |
| "learning_rate": 2.5751257686532202e-05, |
| "loss": 0.4758, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4426877470355732, |
| "grad_norm": 1.2031733989715576, |
| "learning_rate": 2.5613535673723637e-05, |
| "loss": 0.5147, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.4624505928853755, |
| "grad_norm": 1.108897089958191, |
| "learning_rate": 2.54739981133462e-05, |
| "loss": 0.4368, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.4822134387351777, |
| "grad_norm": 1.2132493257522583, |
| "learning_rate": 2.5332668874644434e-05, |
| "loss": 0.4989, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.5019762845849802, |
| "grad_norm": 1.1977864503860474, |
| "learning_rate": 2.5189572133346737e-05, |
| "loss": 0.4776, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 1.316010594367981, |
| "learning_rate": 2.5044732367529847e-05, |
| "loss": 0.4607, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.541501976284585, |
| "grad_norm": 1.1140236854553223, |
| "learning_rate": 2.4898174353431664e-05, |
| "loss": 0.4765, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.5612648221343872, |
| "grad_norm": 1.267500400543213, |
| "learning_rate": 2.4749923161213e-05, |
| "loss": 0.4874, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.5810276679841897, |
| "grad_norm": 1.2721202373504639, |
| "learning_rate": 2.4600004150669112e-05, |
| "loss": 0.4785, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6007905138339922, |
| "grad_norm": 1.388615608215332, |
| "learning_rate": 2.4448442966891653e-05, |
| "loss": 0.4042, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.6205533596837944, |
| "grad_norm": 1.2269567251205444, |
| "learning_rate": 2.4295265535881837e-05, |
| "loss": 0.4907, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.6403162055335967, |
| "grad_norm": 1.2590751647949219, |
| "learning_rate": 2.4140498060115536e-05, |
| "loss": 0.4494, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.6600790513833992, |
| "grad_norm": 1.1927860975265503, |
| "learning_rate": 2.3984167014061123e-05, |
| "loss": 0.4119, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6798418972332017, |
| "grad_norm": 1.2527580261230469, |
| "learning_rate": 2.3826299139650705e-05, |
| "loss": 0.4518, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.699604743083004, |
| "grad_norm": 1.0854295492172241, |
| "learning_rate": 2.3666921441705726e-05, |
| "loss": 0.4573, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.7193675889328062, |
| "grad_norm": 1.2952898740768433, |
| "learning_rate": 2.3506061183317474e-05, |
| "loss": 0.4337, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 1.2577977180480957, |
| "learning_rate": 2.3343745881183498e-05, |
| "loss": 0.4392, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.7588932806324111, |
| "grad_norm": 1.357822060585022, |
| "learning_rate": 2.3180003300900597e-05, |
| "loss": 0.4092, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.7786561264822134, |
| "grad_norm": 1.1936920881271362, |
| "learning_rate": 2.3014861452215273e-05, |
| "loss": 0.4459, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.7984189723320159, |
| "grad_norm": 1.1854654550552368, |
| "learning_rate": 2.284834858423234e-05, |
| "loss": 0.4227, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 1.1320987939834595, |
| "learning_rate": 2.268049318058271e-05, |
| "loss": 0.3757, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.8379446640316206, |
| "grad_norm": 1.1270341873168945, |
| "learning_rate": 2.2511323954550938e-05, |
| "loss": 0.4168, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.8577075098814229, |
| "grad_norm": 1.254346251487732, |
| "learning_rate": 2.2340869844163572e-05, |
| "loss": 0.4159, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.8774703557312253, |
| "grad_norm": 1.1780612468719482, |
| "learning_rate": 2.216916000723901e-05, |
| "loss": 0.3815, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.8972332015810278, |
| "grad_norm": 1.2447336912155151, |
| "learning_rate": 2.199622381639977e-05, |
| "loss": 0.3958, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.91699604743083, |
| "grad_norm": 1.3927847146987915, |
| "learning_rate": 2.182209085404804e-05, |
| "loss": 0.4206, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.9367588932806323, |
| "grad_norm": 1.2636570930480957, |
| "learning_rate": 2.1646790907305288e-05, |
| "loss": 0.3803, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.9565217391304348, |
| "grad_norm": 1.38932466506958, |
| "learning_rate": 2.1470353962916933e-05, |
| "loss": 0.3986, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.9762845849802373, |
| "grad_norm": 1.162876009941101, |
| "learning_rate": 2.1292810202122784e-05, |
| "loss": 0.3422, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9960474308300395, |
| "grad_norm": 1.3223176002502441, |
| "learning_rate": 2.1114189995494274e-05, |
| "loss": 0.3549, |
| "step": 505 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1265, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.34236863669207e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|