| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9893390191897655, | |
| "eval_steps": 60, | |
| "global_step": 232, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 664.3515930175781, | |
| "epoch": 0.017057569296375266, | |
| "grad_norm": 0.11565207690000534, | |
| "kl": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.1249, | |
| "reward": 0.8191964775323868, | |
| "reward_std": 0.1755836745724082, | |
| "rewards/accuracy_reward": 0.8191964775323868, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 661.427487373352, | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 0.1410389542579651, | |
| "kl": 0.00010322034358978271, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0815, | |
| "reward": 0.7940848618745804, | |
| "reward_std": 0.16921476647257805, | |
| "rewards/accuracy_reward": 0.7940848618745804, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 648.45962600708, | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 0.09059495478868484, | |
| "kl": 0.00012706518173217775, | |
| "learning_rate": 5e-07, | |
| "loss": 0.092, | |
| "reward": 0.8165178954601288, | |
| "reward_std": 0.1695016896352172, | |
| "rewards/accuracy_reward": 0.8165178954601288, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 635.6861877441406, | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 0.23655401170253754, | |
| "kl": 0.0001537799835205078, | |
| "learning_rate": 5e-07, | |
| "loss": 0.1002, | |
| "reward": 0.8232143238186836, | |
| "reward_std": 0.17031898349523544, | |
| "rewards/accuracy_reward": 0.8232143238186836, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 636.7087341308594, | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 0.15169048309326172, | |
| "kl": 0.0003520965576171875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0965, | |
| "reward": 0.8183036118745803, | |
| "reward_std": 0.16691437950357796, | |
| "rewards/accuracy_reward": 0.8183036118745803, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 660.6172164916992, | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 0.10549971461296082, | |
| "kl": 0.00020837783813476562, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0838, | |
| "reward": 0.813392898440361, | |
| "reward_std": 0.17468413366004826, | |
| "rewards/accuracy_reward": 0.813392898440361, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 659.3672187805175, | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 0.13681049644947052, | |
| "kl": 0.00038820505142211914, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0786, | |
| "reward": 0.80357146859169, | |
| "reward_std": 0.17490468453615904, | |
| "rewards/accuracy_reward": 0.80357146859169, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 660.5917663574219, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.09065572917461395, | |
| "kl": 0.0004504680633544922, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0824, | |
| "reward": 0.8071428924798966, | |
| "reward_std": 0.1621523329988122, | |
| "rewards/accuracy_reward": 0.8071428924798966, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 638.1212364196778, | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 0.09261901676654816, | |
| "kl": 0.0005172014236450196, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0585, | |
| "reward": 0.8138393208384513, | |
| "reward_std": 0.1534264313057065, | |
| "rewards/accuracy_reward": 0.8138393208384513, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 664.7774856567382, | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 0.09141222387552261, | |
| "kl": 0.00053253173828125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0687, | |
| "reward": 0.8071428909897804, | |
| "reward_std": 0.16072208830155432, | |
| "rewards/accuracy_reward": 0.8071428909897804, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 662.5498062133789, | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 0.24577292799949646, | |
| "kl": 0.0011467933654785156, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0649, | |
| "reward": 0.8042411059141159, | |
| "reward_std": 0.16364638023078443, | |
| "rewards/accuracy_reward": 0.8042411059141159, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 643.9475730895996, | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 0.10820304602384567, | |
| "kl": 0.000740814208984375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0622, | |
| "reward": 0.8261161133646965, | |
| "reward_std": 0.15972621561959385, | |
| "rewards/accuracy_reward": 0.8261161133646965, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0341151385927505, | |
| "grad_norm": 0.1097937524318695, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0662, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0341151385927505, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 638.2177686691284, | |
| "eval_kl": 0.0012085437774658203, | |
| "eval_loss": 0.027663394808769226, | |
| "eval_reward": 0.7151227928698063, | |
| "eval_reward_std": 0.2182473847642541, | |
| "eval_rewards/accuracy_reward": 0.7151227928698063, | |
| "eval_runtime": 835.396, | |
| "eval_samples_per_second": 0.599, | |
| "eval_steps_per_second": 0.006, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 630.2452568054199, | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.08139240741729736, | |
| "kl": 0.0015056610107421875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0579, | |
| "reward": 0.8170759312808513, | |
| "reward_std": 0.16009651254862547, | |
| "rewards/accuracy_reward": 0.8170759312808513, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 620.5440017700196, | |
| "epoch": 1.2046908315565032, | |
| "grad_norm": 0.10313019156455994, | |
| "kl": 0.0016027450561523437, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0586, | |
| "reward": 0.8310268223285675, | |
| "reward_std": 0.1424413041677326, | |
| "rewards/accuracy_reward": 0.8310268223285675, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 649.887752532959, | |
| "epoch": 1.2899786780383795, | |
| "grad_norm": 0.09998168796300888, | |
| "kl": 0.00717315673828125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0564, | |
| "reward": 0.8100446775555611, | |
| "reward_std": 0.1757219755090773, | |
| "rewards/accuracy_reward": 0.8100446775555611, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 614.6263717651367, | |
| "epoch": 1.375266524520256, | |
| "grad_norm": 0.08961261808872223, | |
| "kl": 0.0022918701171875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0376, | |
| "reward": 0.8328125387430191, | |
| "reward_std": 0.13861298179253936, | |
| "rewards/accuracy_reward": 0.8328125387430191, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 631.008511352539, | |
| "epoch": 1.4605543710021323, | |
| "grad_norm": 0.1273442804813385, | |
| "kl": 0.002947235107421875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0541, | |
| "reward": 0.8229911118745804, | |
| "reward_std": 0.14886255729943515, | |
| "rewards/accuracy_reward": 0.8229911118745804, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 646.872346496582, | |
| "epoch": 1.5458422174840085, | |
| "grad_norm": 0.15443700551986694, | |
| "kl": 0.0033596038818359377, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0595, | |
| "reward": 0.809821467101574, | |
| "reward_std": 0.15138995712623, | |
| "rewards/accuracy_reward": 0.809821467101574, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 631.2808311462402, | |
| "epoch": 1.6311300639658848, | |
| "grad_norm": 0.09066915512084961, | |
| "kl": 0.004022216796875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0418, | |
| "reward": 0.8258928924798965, | |
| "reward_std": 0.1533732468262315, | |
| "rewards/accuracy_reward": 0.8258928924798965, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 627.2167701721191, | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.10236337780952454, | |
| "kl": 0.011969375610351562, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0372, | |
| "reward": 0.8267857551574707, | |
| "reward_std": 0.13705341126769782, | |
| "rewards/accuracy_reward": 0.8267857551574707, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 643.0727920532227, | |
| "epoch": 1.8017057569296375, | |
| "grad_norm": 0.09229780733585358, | |
| "kl": 0.00559539794921875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0289, | |
| "reward": 0.8116071805357933, | |
| "reward_std": 0.147033178107813, | |
| "rewards/accuracy_reward": 0.8116071805357933, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 634.0770401000976, | |
| "epoch": 1.886993603411514, | |
| "grad_norm": 0.1279992163181305, | |
| "kl": 0.006862640380859375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0364, | |
| "reward": 0.8312500357627869, | |
| "reward_std": 0.14459644490852952, | |
| "rewards/accuracy_reward": 0.8312500357627869, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 643.7125259399414, | |
| "epoch": 1.9722814498933903, | |
| "grad_norm": 0.12527693808078766, | |
| "kl": 0.00738983154296875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0394, | |
| "reward": 0.8138393253087998, | |
| "reward_std": 0.15650860401801764, | |
| "rewards/accuracy_reward": 0.8138393253087998, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.068230277185501, | |
| "grad_norm": 0.13853299617767334, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0354, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.068230277185501, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 627.0459775924683, | |
| "eval_kl": 0.009876251220703125, | |
| "eval_loss": 0.023924430832266808, | |
| "eval_reward": 0.7343750353902578, | |
| "eval_reward_std": 0.19236661097966135, | |
| "eval_rewards/accuracy_reward": 0.7343750353902578, | |
| "eval_runtime": 697.2301, | |
| "eval_samples_per_second": 0.717, | |
| "eval_steps_per_second": 0.007, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 621.6253170013428, | |
| "epoch": 2.1535181236673773, | |
| "grad_norm": 0.11815498024225235, | |
| "kl": 0.01000518798828125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0358, | |
| "reward": 0.8255580753087998, | |
| "reward_std": 0.14198732506483794, | |
| "rewards/accuracy_reward": 0.8255580753087998, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 621.314752960205, | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.113522969186306, | |
| "kl": 0.0125152587890625, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0269, | |
| "reward": 0.8386161178350449, | |
| "reward_std": 0.14197837365791202, | |
| "rewards/accuracy_reward": 0.8386161178350449, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 642.9511466979981, | |
| "epoch": 2.3240938166311302, | |
| "grad_norm": 0.14222967624664307, | |
| "kl": 0.01525726318359375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0476, | |
| "reward": 0.7872768193483353, | |
| "reward_std": 0.14514056108891965, | |
| "rewards/accuracy_reward": 0.7872768193483353, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 628.785961151123, | |
| "epoch": 2.4093816631130065, | |
| "grad_norm": 0.13704024255275726, | |
| "kl": 0.01926116943359375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0403, | |
| "reward": 0.8256696745753288, | |
| "reward_std": 0.14226720854640007, | |
| "rewards/accuracy_reward": 0.8256696745753288, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 614.7006958007812, | |
| "epoch": 2.4946695095948828, | |
| "grad_norm": 0.19874536991119385, | |
| "kl": 0.0266326904296875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0278, | |
| "reward": 0.8165178939700126, | |
| "reward_std": 0.16517118187621235, | |
| "rewards/accuracy_reward": 0.8165178939700126, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 649.61431350708, | |
| "epoch": 2.579957356076759, | |
| "grad_norm": 0.40368160605430603, | |
| "kl": 0.0365936279296875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0341, | |
| "reward": 0.7767857447266578, | |
| "reward_std": 0.1682931227609515, | |
| "rewards/accuracy_reward": 0.7767857447266578, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 620.872575378418, | |
| "epoch": 2.6652452025586353, | |
| "grad_norm": 0.37761253118515015, | |
| "kl": 0.049951171875, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0415, | |
| "reward": 0.7785714641213417, | |
| "reward_std": 0.19512954521924258, | |
| "rewards/accuracy_reward": 0.7785714641213417, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 617.9544944763184, | |
| "epoch": 2.750533049040512, | |
| "grad_norm": 0.44903331995010376, | |
| "kl": 0.0691650390625, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0422, | |
| "reward": 0.7671875387430191, | |
| "reward_std": 0.19579849690198897, | |
| "rewards/accuracy_reward": 0.7671875387430191, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 616.282169342041, | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 0.7222861647605896, | |
| "kl": 0.10626220703125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0487, | |
| "reward": 0.7156250298023223, | |
| "reward_std": 0.2289330180734396, | |
| "rewards/accuracy_reward": 0.7156250298023223, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 649.1158721923828, | |
| "epoch": 2.9211087420042645, | |
| "grad_norm": 1.717586636543274, | |
| "kl": 0.194580078125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0679, | |
| "reward": 0.614955385774374, | |
| "reward_std": 0.2752906741574407, | |
| "rewards/accuracy_reward": 0.614955385774374, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 786.9139389038086, | |
| "epoch": 3.0170575692963753, | |
| "grad_norm": 1.529920220375061, | |
| "kl": 0.439892578125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.1198, | |
| "reward": 0.368080372735858, | |
| "reward_std": 0.29059169851243494, | |
| "rewards/accuracy_reward": 0.368080372735858, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.1023454157782515, | |
| "grad_norm": 1.5960689783096313, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0887, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.1023454157782515, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 791.9263305664062, | |
| "eval_kl": 2.4365234375, | |
| "eval_loss": 0.06115880608558655, | |
| "eval_reward": 0.08565848605940118, | |
| "eval_reward_std": 0.13261561130639166, | |
| "eval_rewards/accuracy_reward": 0.08565848605940118, | |
| "eval_runtime": 821.1595, | |
| "eval_samples_per_second": 0.609, | |
| "eval_steps_per_second": 0.006, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 818.8542762756348, | |
| "epoch": 3.1876332622601278, | |
| "grad_norm": 11.959312438964844, | |
| "kl": 2.426806640625, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0522, | |
| "reward": 0.10647321877768263, | |
| "reward_std": 0.14978813820052891, | |
| "rewards/accuracy_reward": 0.10647321877768263, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 679.0966827392579, | |
| "epoch": 3.272921108742004, | |
| "grad_norm": 19.53175163269043, | |
| "kl": 3.7345703125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0359, | |
| "reward": 0.039732144516892734, | |
| "reward_std": 0.07693687449209392, | |
| "rewards/accuracy_reward": 0.039732144516892734, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 561.9143058776856, | |
| "epoch": 3.3582089552238807, | |
| "grad_norm": 8.676216125488281, | |
| "kl": 5.15078125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0272, | |
| "reward": 0.026116072735749184, | |
| "reward_std": 0.05382296503521502, | |
| "rewards/accuracy_reward": 0.026116072735749184, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 466.9301559448242, | |
| "epoch": 3.443496801705757, | |
| "grad_norm": 16.412755966186523, | |
| "kl": 7.519140625, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0222, | |
| "reward": 0.02700892973225564, | |
| "reward_std": 0.058131046639755365, | |
| "rewards/accuracy_reward": 0.02700892973225564, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 409.43640365600584, | |
| "epoch": 3.5287846481876333, | |
| "grad_norm": 10.202103614807129, | |
| "kl": 4.88203125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0116, | |
| "reward": 0.02410714393481612, | |
| "reward_std": 0.051837433129549026, | |
| "rewards/accuracy_reward": 0.02410714393481612, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 366.6959976196289, | |
| "epoch": 3.6140724946695095, | |
| "grad_norm": 33.47189712524414, | |
| "kl": 6.95859375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0144, | |
| "reward": 0.026116072852164506, | |
| "reward_std": 0.04902788205072284, | |
| "rewards/accuracy_reward": 0.026116072852164506, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 356.4863983154297, | |
| "epoch": 3.699360341151386, | |
| "grad_norm": 7.119285583496094, | |
| "kl": 3.1953125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0123, | |
| "reward": 0.0292410729220137, | |
| "reward_std": 0.05716597293503582, | |
| "rewards/accuracy_reward": 0.0292410729220137, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 343.4513526916504, | |
| "epoch": 3.7846481876332625, | |
| "grad_norm": 15.441688537597656, | |
| "kl": 4.012109375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0177, | |
| "reward": 0.028571429941803218, | |
| "reward_std": 0.058770314510911706, | |
| "rewards/accuracy_reward": 0.028571429941803218, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 310.74108505249023, | |
| "epoch": 3.8699360341151388, | |
| "grad_norm": 7.061368942260742, | |
| "kl": 5.580859375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0081, | |
| "reward": 0.02857143001165241, | |
| "reward_std": 0.06492680269293487, | |
| "rewards/accuracy_reward": 0.02857143001165241, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 314.79555015563966, | |
| "epoch": 3.955223880597015, | |
| "grad_norm": 22.824426651000977, | |
| "kl": 8.496484375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0108, | |
| "reward": 0.033258930104784666, | |
| "reward_std": 0.06678469418548047, | |
| "rewards/accuracy_reward": 0.033258930104784666, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 311.46373558044434, | |
| "epoch": 3.9893390191897655, | |
| "kl": 4.9599609375, | |
| "reward": 0.042968751688022166, | |
| "reward_std": 0.08240398659836501, | |
| "rewards/accuracy_reward": 0.042968751688022166, | |
| "step": 232, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0500773029434013, | |
| "train_runtime": 52194.0457, | |
| "train_samples_per_second": 0.575, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 232, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |