| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9554140127388535, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8057.0, | |
| "completions/mean_length": 6127.464599609375, | |
| "completions/min_length": 1222.5, | |
| "epoch": 0.0031847133757961785, | |
| "grad_norm": 0.4097345173358917, | |
| "kl": 0.0001392364501953125, | |
| "learning_rate": 2.5e-07, | |
| "loss": -0.22166921198368073, | |
| "memory(GiB)": 142.96, | |
| "reward": 0.3214285969734192, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3214285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4609040319919586, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.002718 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8075.0, | |
| "completions/mean_length": 6311.14306640625, | |
| "completions/min_length": 1474.0, | |
| "epoch": 0.006369426751592357, | |
| "grad_norm": 0.19287629425525665, | |
| "kl": 0.00012159347534179688, | |
| "learning_rate": 5e-07, | |
| "loss": -0.09268201887607574, | |
| "memory(GiB)": 157.67, | |
| "reward": 0.5357142984867096, | |
| "reward_std": 0.25552501529455185, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5357142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 2, | |
| "train_speed(iter/s)": 0.002969 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8049.5, | |
| "completions/mean_length": 6904.018310546875, | |
| "completions/min_length": 1641.0, | |
| "epoch": 0.009554140127388535, | |
| "grad_norm": 0.19940905272960663, | |
| "kl": 4.482269287109375e-05, | |
| "learning_rate": 7.5e-07, | |
| "loss": -0.1511116474866867, | |
| "memory(GiB)": 157.67, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 3, | |
| "train_speed(iter/s)": 0.003082 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8045.0, | |
| "completions/mean_length": 6924.089599609375, | |
| "completions/min_length": 2193.5, | |
| "epoch": 0.012738853503184714, | |
| "grad_norm": 0.6029608845710754, | |
| "kl": 0.00013637542724609375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.41891923546791077, | |
| "memory(GiB)": 157.67, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.2253357544541359, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4389495849609375, | |
| "step": 4, | |
| "train_speed(iter/s)": 0.003138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8054.0, | |
| "completions/mean_length": 7148.3037109375, | |
| "completions/min_length": 2388.5, | |
| "epoch": 0.01592356687898089, | |
| "grad_norm": 0.2799333333969116, | |
| "kl": 0.00018405914306640625, | |
| "learning_rate": 9.999743248701019e-07, | |
| "loss": -0.09833915531635284, | |
| "memory(GiB)": 157.67, | |
| "reward": 0.3214285969734192, | |
| "reward_std": 0.32695358991622925, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3214285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4755948781967163, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.003168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7857142857142857, | |
| "completions/max_length": 8034.5, | |
| "completions/mean_length": 7279.303955078125, | |
| "completions/min_length": 3276.0, | |
| "epoch": 0.01910828025477707, | |
| "grad_norm": 0.2061210572719574, | |
| "kl": 0.00016450881958007812, | |
| "learning_rate": 9.998973021172564e-07, | |
| "loss": 0.03570747375488281, | |
| "memory(GiB)": 157.75, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.23086077719926834, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.46781930327415466, | |
| "step": 6, | |
| "train_speed(iter/s)": 0.003171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7857142857142857, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 7143.1611328125, | |
| "completions/min_length": 2512.5, | |
| "epoch": 0.022292993630573247, | |
| "grad_norm": 0.5545817017555237, | |
| "kl": 9.918212890625e-05, | |
| "learning_rate": 9.997689396517406e-07, | |
| "loss": -0.36179664731025696, | |
| "memory(GiB)": 157.86, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.33800362795591354, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4389495849609375, | |
| "step": 7, | |
| "train_speed(iter/s)": 0.003182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857142, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 6924.375244140625, | |
| "completions/min_length": 1848.0, | |
| "epoch": 0.025477707006369428, | |
| "grad_norm": 0.3819968104362488, | |
| "kl": 0.0002493858337402344, | |
| "learning_rate": 9.99589250656446e-07, | |
| "loss": -0.23172156512737274, | |
| "memory(GiB)": 157.86, | |
| "reward": 0.2678571566939354, | |
| "reward_std": 0.21981073915958405, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571566939354, | |
| "rewards/AnswerTagAccuracyORM/std": 0.426847904920578, | |
| "step": 8, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6457.82177734375, | |
| "completions/min_length": 1266.0, | |
| "epoch": 0.028662420382165606, | |
| "grad_norm": 0.4406679570674896, | |
| "kl": 1.0362602143682022e-41, | |
| "learning_rate": 9.993582535855263e-07, | |
| "loss": -0.26652470231056213, | |
| "memory(GiB)": 157.86, | |
| "reward": 0.3928571492433548, | |
| "reward_std": 0.2253357619047165, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3928571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4846093952655792, | |
| "step": 9, | |
| "train_speed(iter/s)": 0.003209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8019.0, | |
| "completions/mean_length": 6910.01806640625, | |
| "completions/min_length": 1922.5, | |
| "epoch": 0.03184713375796178, | |
| "grad_norm": 0.22980190813541412, | |
| "kl": 0.0003070831298828125, | |
| "learning_rate": 9.990759721625005e-07, | |
| "loss": 0.014422202482819557, | |
| "memory(GiB)": 157.89, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.33800362050533295, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513247013092, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.003216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.8035714285714286, | |
| "completions/max_length": 8046.0, | |
| "completions/mean_length": 7162.714599609375, | |
| "completions/min_length": 3054.0, | |
| "epoch": 0.03503184713375796, | |
| "grad_norm": 0.13719218969345093, | |
| "kl": 5.929594451790463e-42, | |
| "learning_rate": 9.98742435377817e-07, | |
| "loss": -0.0598757378757, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.2500000186264515, | |
| "reward_std": 0.19514648616313934, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000186264515, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3831089437007904, | |
| "step": 11, | |
| "train_speed(iter/s)": 0.003205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8062.0, | |
| "completions/mean_length": 6935.589599609375, | |
| "completions/min_length": 1498.0, | |
| "epoch": 0.03821656050955414, | |
| "grad_norm": 0.3652593195438385, | |
| "kl": 5.194803875951948e-08, | |
| "learning_rate": 9.983576774858775e-07, | |
| "loss": -0.02829546295106411, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.3681928962469101, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.458276703953743, | |
| "step": 12, | |
| "train_speed(iter/s)": 0.0032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857143, | |
| "completions/max_length": 8041.0, | |
| "completions/mean_length": 7132.393310546875, | |
| "completions/min_length": 2462.0, | |
| "epoch": 0.041401273885350316, | |
| "grad_norm": 0.2656404376029968, | |
| "kl": 0.00013446807861328125, | |
| "learning_rate": 9.979217380015173e-07, | |
| "loss": -0.09244758635759354, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.2253357619047165, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4600437134504318, | |
| "step": 13, | |
| "train_speed(iter/s)": 0.003197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.8214285714285714, | |
| "completions/max_length": 8044.5, | |
| "completions/mean_length": 7196.393310546875, | |
| "completions/min_length": 1879.0, | |
| "epoch": 0.044585987261146494, | |
| "grad_norm": 0.5126783847808838, | |
| "kl": 0.00011968612670898438, | |
| "learning_rate": 9.974346616959475e-07, | |
| "loss": -0.23802340030670166, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1428571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3524957150220871, | |
| "step": 14, | |
| "train_speed(iter/s)": 0.003188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857143, | |
| "completions/max_length": 8062.5, | |
| "completions/mean_length": 7267.26806640625, | |
| "completions/min_length": 2604.5, | |
| "epoch": 0.04777070063694268, | |
| "grad_norm": 0.6457058787345886, | |
| "kl": 0.000148773193359375, | |
| "learning_rate": 9.968964985921581e-07, | |
| "loss": -0.41891902685165405, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.3750000298023224, | |
| "reward_std": 0.3324785977602005, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8051.5, | |
| "completions/mean_length": 7219.57177734375, | |
| "completions/min_length": 3177.0, | |
| "epoch": 0.050955414012738856, | |
| "grad_norm": 0.29857584834098816, | |
| "kl": 0.0001468658447265625, | |
| "learning_rate": 9.963073039597796e-07, | |
| "loss": -0.1913096308708191, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.46781930327415466, | |
| "step": 16, | |
| "train_speed(iter/s)": 0.003187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857142, | |
| "completions/max_length": 8070.0, | |
| "completions/mean_length": 7195.3037109375, | |
| "completions/min_length": 2742.0, | |
| "epoch": 0.054140127388535034, | |
| "grad_norm": 0.5090013742446899, | |
| "kl": 1.1901227857510671e-41, | |
| "learning_rate": 9.956671383094068e-07, | |
| "loss": -0.181706503033638, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.2678571566939354, | |
| "reward_std": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571566939354, | |
| "rewards/AnswerTagAccuracyORM/std": 0.426847904920578, | |
| "step": 17, | |
| "train_speed(iter/s)": 0.003177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8023.5, | |
| "completions/mean_length": 6882.964599609375, | |
| "completions/min_length": 1486.5, | |
| "epoch": 0.05732484076433121, | |
| "grad_norm": 0.24539124965667725, | |
| "kl": 0.0003590583801269531, | |
| "learning_rate": 9.949760673863846e-07, | |
| "loss": -0.04751904308795929, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.37371790409088135, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4469868242740631, | |
| "step": 18, | |
| "train_speed(iter/s)": 0.003182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8032.0, | |
| "completions/mean_length": 6654.82177734375, | |
| "completions/min_length": 1637.0, | |
| "epoch": 0.06050955414012739, | |
| "grad_norm": 0.2233801782131195, | |
| "kl": 0.0003261566162109375, | |
| "learning_rate": 9.942341621640557e-07, | |
| "loss": -0.11176574230194092, | |
| "memory(GiB)": 158.35, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5032612681388855, | |
| "step": 19, | |
| "train_speed(iter/s)": 0.003186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8052.5, | |
| "completions/mean_length": 6537.839599609375, | |
| "completions/min_length": 1345.0, | |
| "epoch": 0.06369426751592357, | |
| "grad_norm": 0.2528194189071655, | |
| "kl": 0.0002574920654296875, | |
| "learning_rate": 9.934414988364722e-07, | |
| "loss": -0.1300119161605835, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.3078143745660782, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165241718292, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.003187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857143, | |
| "completions/max_length": 8054.5, | |
| "completions/mean_length": 7098.500244140625, | |
| "completions/min_length": 2948.5, | |
| "epoch": 0.06687898089171974, | |
| "grad_norm": 0.35901573300361633, | |
| "kl": 0.0002613067626953125, | |
| "learning_rate": 9.925981588105694e-07, | |
| "loss": 0.0739278644323349, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.3078143820166588, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4635152816772461, | |
| "step": 21, | |
| "train_speed(iter/s)": 0.003186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8043.0, | |
| "completions/mean_length": 6802.339599609375, | |
| "completions/min_length": 2142.5, | |
| "epoch": 0.07006369426751592, | |
| "grad_norm": 0.31139233708381653, | |
| "kl": 0.0002899169921875, | |
| "learning_rate": 9.917042286978063e-07, | |
| "loss": -0.23081684112548828, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786955863237381, | |
| "step": 22, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8046.5, | |
| "completions/mean_length": 6078.089599609375, | |
| "completions/min_length": 1577.5, | |
| "epoch": 0.0732484076433121, | |
| "grad_norm": 0.3595212697982788, | |
| "kl": 0.06200312077999115, | |
| "learning_rate": 9.9075980030527e-07, | |
| "loss": -0.26652395725250244, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.6250000298023224, | |
| "reward_std": 0.30228935182094574, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.6250000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4750668406486511, | |
| "step": 23, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8046.0, | |
| "completions/mean_length": 6813.75048828125, | |
| "completions/min_length": 1453.0, | |
| "epoch": 0.07643312101910828, | |
| "grad_norm": 0.3403284549713135, | |
| "kl": 0.0003566741943359375, | |
| "learning_rate": 9.897649706262473e-07, | |
| "loss": -0.11760014295578003, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.31333938241004944, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4469868242740631, | |
| "step": 24, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8073.5, | |
| "completions/mean_length": 6462.9111328125, | |
| "completions/min_length": 474.5, | |
| "epoch": 0.07961783439490445, | |
| "grad_norm": 0.5593795776367188, | |
| "kl": 25.92991018295288, | |
| "learning_rate": 9.887198418302628e-07, | |
| "loss": -0.2458166778087616, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.3681928962469101, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.502610981464386, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7678571428571428, | |
| "completions/max_length": 8025.0, | |
| "completions/mean_length": 7177.714599609375, | |
| "completions/min_length": 1826.5, | |
| "epoch": 0.08280254777070063, | |
| "grad_norm": 0.3172476291656494, | |
| "kl": 0.0003204345703125, | |
| "learning_rate": 9.87624521252587e-07, | |
| "loss": 0.09654846042394638, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.3214285969734192, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.40946151316165924, | |
| "step": 26, | |
| "train_speed(iter/s)": 0.003186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 8030.5, | |
| "completions/mean_length": 7575.19677734375, | |
| "completions/min_length": 5771.5, | |
| "epoch": 0.08598726114649681, | |
| "grad_norm": 0.24004344642162323, | |
| "kl": 0.0003147125244140625, | |
| "learning_rate": 9.864791213832125e-07, | |
| "loss": -0.0921306237578392, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.1964285857975483, | |
| "reward_std": 0.23086078464984894, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1964285857975483, | |
| "rewards/AnswerTagAccuracyORM/std": 0.36893007159233093, | |
| "step": 27, | |
| "train_speed(iter/s)": 0.003183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7321428571428571, | |
| "completions/max_length": 8037.0, | |
| "completions/mean_length": 7293.5361328125, | |
| "completions/min_length": 3104.0, | |
| "epoch": 0.08917197452229299, | |
| "grad_norm": 0.46742865443229675, | |
| "kl": 0.0008754730224609375, | |
| "learning_rate": 9.852837598553008e-07, | |
| "loss": -0.4069012999534607, | |
| "memory(GiB)": 158.38, | |
| "reward": 0.2142857313156128, | |
| "reward_std": 0.27762509882450104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.417855441570282, | |
| "step": 28, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8046.5, | |
| "completions/mean_length": 6678.232421875, | |
| "completions/min_length": 2332.5, | |
| "epoch": 0.09235668789808917, | |
| "grad_norm": 0.4905704855918884, | |
| "kl": 1.6553538759069064e-41, | |
| "learning_rate": 9.840385594331022e-07, | |
| "loss": -0.22333285212516785, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.2321428656578064, | |
| "reward_std": 0.37371791899204254, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2321428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.429407000541687, | |
| "step": 29, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8048.5, | |
| "completions/mean_length": 6784.232421875, | |
| "completions/min_length": 1839.5, | |
| "epoch": 0.09554140127388536, | |
| "grad_norm": 0.2499741017818451, | |
| "kl": 0.0011577606201171875, | |
| "learning_rate": 9.827436479993468e-07, | |
| "loss": -0.10406609624624252, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.3324786201119423, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4576014578342438, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.00318 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 8053.5, | |
| "completions/mean_length": 7322.607666015625, | |
| "completions/min_length": 3317.5, | |
| "epoch": 0.09872611464968153, | |
| "grad_norm": 0.22784624993801117, | |
| "kl": 0.0009670257568359375, | |
| "learning_rate": 9.813991585421116e-07, | |
| "loss": -0.1821945756673813, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.25, | |
| "reward_std": 0.23638580739498138, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.25, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44095855951309204, | |
| "step": 31, | |
| "train_speed(iter/s)": 0.003176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7321428571428572, | |
| "completions/max_length": 8045.5, | |
| "completions/mean_length": 7113.89306640625, | |
| "completions/min_length": 2601.0, | |
| "epoch": 0.10191082802547771, | |
| "grad_norm": 0.27732953429222107, | |
| "kl": 0.000583648681640625, | |
| "learning_rate": 9.800052291411629e-07, | |
| "loss": -0.1299564391374588, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.23086076974868774, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050112903118134, | |
| "step": 32, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857143, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 6726.625244140625, | |
| "completions/min_length": 1058.0, | |
| "epoch": 0.10509554140127389, | |
| "grad_norm": 0.4480609595775604, | |
| "kl": 0.000911712646484375, | |
| "learning_rate": 9.78562002953774e-07, | |
| "loss": -0.21174360811710358, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.2321428656578064, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2321428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.425032377243042, | |
| "step": 33, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8025.0, | |
| "completions/mean_length": 6545.64306640625, | |
| "completions/min_length": 1659.0, | |
| "epoch": 0.10828025477707007, | |
| "grad_norm": 0.2851637005805969, | |
| "kl": 0.00090789794921875, | |
| "learning_rate": 9.770696282000244e-07, | |
| "loss": -0.11861994117498398, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.3928571492433548, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3928571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4846093952655792, | |
| "step": 34, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 6577.803955078125, | |
| "completions/min_length": 1984.5, | |
| "epoch": 0.11146496815286625, | |
| "grad_norm": 0.32200559973716736, | |
| "kl": 0.00152587890625, | |
| "learning_rate": 9.755282581475767e-07, | |
| "loss": -0.16413375735282898, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.003164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8069.0, | |
| "completions/mean_length": 6215.32177734375, | |
| "completions/min_length": 921.0, | |
| "epoch": 0.11464968152866242, | |
| "grad_norm": 0.3323407471179962, | |
| "kl": 0.00124359130859375, | |
| "learning_rate": 9.739380510959364e-07, | |
| "loss": -0.22452522814273834, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.5000000298023224, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44095855951309204, | |
| "step": 36, | |
| "train_speed(iter/s)": 0.003167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.8035714285714286, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 7640.928955078125, | |
| "completions/min_length": 4844.0, | |
| "epoch": 0.1178343949044586, | |
| "grad_norm": 0.22709085047245026, | |
| "kl": 9.104936771950499e-42, | |
| "learning_rate": 9.722991703601935e-07, | |
| "loss": -0.06687425076961517, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.160714291036129, | |
| "reward_std": 0.23086076974868774, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.160714291036129, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3731846660375595, | |
| "step": 37, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8069.0, | |
| "completions/mean_length": 6600.446533203125, | |
| "completions/min_length": 2290.0, | |
| "epoch": 0.12101910828025478, | |
| "grad_norm": 0.4062687158584595, | |
| "kl": 0.002471923828125, | |
| "learning_rate": 9.706117842542516e-07, | |
| "loss": -0.22931034862995148, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.33800362050533295, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48795004189014435, | |
| "step": 38, | |
| "train_speed(iter/s)": 0.003164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7678571428571428, | |
| "completions/max_length": 8054.5, | |
| "completions/mean_length": 7287.62548828125, | |
| "completions/min_length": 2747.5, | |
| "epoch": 0.12420382165605096, | |
| "grad_norm": 0.306149959564209, | |
| "kl": 0.000904083251953125, | |
| "learning_rate": 9.688760660735402e-07, | |
| "loss": -0.11831033229827881, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.1785714328289032, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1785714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.39002102613449097, | |
| "step": 39, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7857142857142858, | |
| "completions/max_length": 8063.5, | |
| "completions/mean_length": 7219.143310546875, | |
| "completions/min_length": 3600.0, | |
| "epoch": 0.12738853503184713, | |
| "grad_norm": 0.29684150218963623, | |
| "kl": 0.000675201416015625, | |
| "learning_rate": 9.670921940772186e-07, | |
| "loss": -0.16423112154006958, | |
| "memory(GiB)": 169.42, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.2721000760793686, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4469868242740631, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.003158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8044.5, | |
| "completions/mean_length": 5554.14306640625, | |
| "completions/min_length": 769.5, | |
| "epoch": 0.1305732484076433, | |
| "grad_norm": 0.34710463881492615, | |
| "kl": 0.00290679931640625, | |
| "learning_rate": 9.652603514698673e-07, | |
| "loss": -0.11703464388847351, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.379242941737175, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5065638720989227, | |
| "step": 41, | |
| "train_speed(iter/s)": 0.003157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.23214285714285715, | |
| "completions/max_length": 8039.5, | |
| "completions/mean_length": 4597.910888671875, | |
| "completions/min_length": 949.0, | |
| "epoch": 0.1337579617834395, | |
| "grad_norm": 0.33481189608573914, | |
| "kl": 0.00357818603515625, | |
| "learning_rate": 9.633807263826744e-07, | |
| "loss": -0.02082175388932228, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.5892857313156128, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5892857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 42, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 7991.5, | |
| "completions/mean_length": 6022.285888671875, | |
| "completions/min_length": 1342.0, | |
| "epoch": 0.13694267515923567, | |
| "grad_norm": 0.321031391620636, | |
| "kl": 0.003082275390625, | |
| "learning_rate": 9.614535118541125e-07, | |
| "loss": -0.06112157553434372, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.14838216826319695, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.46781930327415466, | |
| "step": 43, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 8059.5, | |
| "completions/mean_length": 7382.285888671875, | |
| "completions/min_length": 3295.0, | |
| "epoch": 0.14012738853503184, | |
| "grad_norm": 0.33003029227256775, | |
| "kl": 0.00177764892578125, | |
| "learning_rate": 9.594789058101153e-07, | |
| "loss": -0.22145554423332214, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.3035714328289032, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4644543081521988, | |
| "step": 44, | |
| "train_speed(iter/s)": 0.003155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8033.0, | |
| "completions/mean_length": 5761.893310546875, | |
| "completions/min_length": 988.5, | |
| "epoch": 0.14331210191082802, | |
| "grad_norm": 0.3568911552429199, | |
| "kl": 0.00131988525390625, | |
| "learning_rate": 9.574571110437496e-07, | |
| "loss": -0.1308659315109253, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.535714328289032, | |
| "reward_std": 0.19514648616313934, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.535714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.003156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7857142857142858, | |
| "completions/max_length": 8067.0, | |
| "completions/mean_length": 7363.285888671875, | |
| "completions/min_length": 3301.0, | |
| "epoch": 0.1464968152866242, | |
| "grad_norm": 0.4601195454597473, | |
| "kl": 0.0016326904296875, | |
| "learning_rate": 9.55388335194388e-07, | |
| "loss": -0.4354901909828186, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.40943221747875214, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165241718292, | |
| "step": 46, | |
| "train_speed(iter/s)": 0.003157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8056.5, | |
| "completions/mean_length": 6383.57177734375, | |
| "completions/min_length": 1777.5, | |
| "epoch": 0.14968152866242038, | |
| "grad_norm": 0.32571953535079956, | |
| "kl": 0.00157928466796875, | |
| "learning_rate": 9.532727907263859e-07, | |
| "loss": -0.08489307761192322, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.32695360481739044, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4061589241027832, | |
| "step": 47, | |
| "train_speed(iter/s)": 0.003158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8029.0, | |
| "completions/mean_length": 6672.178955078125, | |
| "completions/min_length": 1226.0, | |
| "epoch": 0.15286624203821655, | |
| "grad_norm": 0.2804900109767914, | |
| "kl": 0.001705169677734375, | |
| "learning_rate": 9.511106949072587e-07, | |
| "loss": -0.07148804515600204, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.4285714328289032, | |
| "reward_std": 0.2142857238650322, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4985625892877579, | |
| "step": 48, | |
| "train_speed(iter/s)": 0.003156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857142, | |
| "completions/max_length": 8053.5, | |
| "completions/mean_length": 6692.678955078125, | |
| "completions/min_length": 2195.5, | |
| "epoch": 0.15605095541401273, | |
| "grad_norm": 0.43050575256347656, | |
| "kl": 0.00308990478515625, | |
| "learning_rate": 9.489022697853708e-07, | |
| "loss": -0.3393845856189728, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.3750000298023224, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737354040146, | |
| "step": 49, | |
| "train_speed(iter/s)": 0.003156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142857, | |
| "completions/max_length": 8043.0, | |
| "completions/mean_length": 6784.5537109375, | |
| "completions/min_length": 2953.5, | |
| "epoch": 0.1592356687898089, | |
| "grad_norm": 0.3050151765346527, | |
| "kl": 0.00262451171875, | |
| "learning_rate": 9.466477421671295e-07, | |
| "loss": -0.11103521287441254, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.23214287497103214, | |
| "reward_std": 0.1785714365541935, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.23214287497103214, | |
| "rewards/AnswerTagAccuracyORM/std": 0.34646742790937424, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.8035714285714286, | |
| "completions/max_length": 8053.0, | |
| "completions/mean_length": 7592.14306640625, | |
| "completions/min_length": 4495.5, | |
| "epoch": 0.1624203821656051, | |
| "grad_norm": 0.2979692220687866, | |
| "kl": 9.197422470595937e-42, | |
| "learning_rate": 9.443473435936927e-07, | |
| "loss": -0.14950904250144958, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1428571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.35634833574295044, | |
| "step": 51, | |
| "train_speed(iter/s)": 0.003147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8060.5, | |
| "completions/mean_length": 6415.9111328125, | |
| "completions/min_length": 1379.5, | |
| "epoch": 0.16560509554140126, | |
| "grad_norm": 0.19361363351345062, | |
| "kl": 965.6489562988281, | |
| "learning_rate": 9.420013103171891e-07, | |
| "loss": -0.16895829141139984, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.14838215708732605, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 52, | |
| "train_speed(iter/s)": 0.003141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8066.5, | |
| "completions/mean_length": 6355.839599609375, | |
| "completions/min_length": 1968.0, | |
| "epoch": 0.16878980891719744, | |
| "grad_norm": 0.3892778158187866, | |
| "kl": 0.0029144287109375, | |
| "learning_rate": 9.396098832764554e-07, | |
| "loss": -0.2460786998271942, | |
| "memory(GiB)": 169.48, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.30228935182094574, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 53, | |
| "train_speed(iter/s)": 0.003138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8032.5, | |
| "completions/mean_length": 6231.6787109375, | |
| "completions/min_length": 1345.0, | |
| "epoch": 0.17197452229299362, | |
| "grad_norm": 0.5784780383110046, | |
| "kl": 0.00372314453125, | |
| "learning_rate": 9.37173308072291e-07, | |
| "loss": -0.3872688412666321, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.25552503019571304, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199816048145294, | |
| "step": 54, | |
| "train_speed(iter/s)": 0.003136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8043.5, | |
| "completions/mean_length": 5922.51806640625, | |
| "completions/min_length": 920.5, | |
| "epoch": 0.1751592356687898, | |
| "grad_norm": 0.2570510804653168, | |
| "kl": 0.0035492679744493216, | |
| "learning_rate": 9.346918349422355e-07, | |
| "loss": -0.05529748648405075, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.2006715089082718, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4469868391752243, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.003134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8032.0, | |
| "completions/mean_length": 6564.035888671875, | |
| "completions/min_length": 1182.0, | |
| "epoch": 0.17834394904458598, | |
| "grad_norm": 0.526690661907196, | |
| "kl": 0.0024261474609375, | |
| "learning_rate": 9.321657187348688e-07, | |
| "loss": -0.20846779644489288, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.3681928962469101, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4489477872848511, | |
| "step": 56, | |
| "train_speed(iter/s)": 0.003129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7857142857142857, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 7038.2861328125, | |
| "completions/min_length": 4610.0, | |
| "epoch": 0.18152866242038215, | |
| "grad_norm": 0.15928295254707336, | |
| "kl": 0.0027923583984375, | |
| "learning_rate": 9.295952188836378e-07, | |
| "loss": 0.021755440160632133, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.1250000074505806, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1250000074505806, | |
| "rewards/AnswerTagAccuracyORM/std": 0.33565935492515564, | |
| "step": 57, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8024.0, | |
| "completions/mean_length": 6775.214599609375, | |
| "completions/min_length": 1785.0, | |
| "epoch": 0.18471337579617833, | |
| "grad_norm": 0.5611134767532349, | |
| "kl": 7.1264767029788345e-06, | |
| "learning_rate": 9.269805993802128e-07, | |
| "loss": -0.15547017753124237, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.29123931378126144, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4628649652004242, | |
| "step": 58, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8037.5, | |
| "completions/mean_length": 6358.4111328125, | |
| "completions/min_length": 1612.5, | |
| "epoch": 0.18789808917197454, | |
| "grad_norm": 0.15718594193458557, | |
| "kl": 0.004180908203125, | |
| "learning_rate": 9.243221287473755e-07, | |
| "loss": 0.03484642878174782, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.11266787722706795, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165539741516, | |
| "step": 59, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142858, | |
| "completions/max_length": 8040.0, | |
| "completions/mean_length": 6776.6787109375, | |
| "completions/min_length": 1845.0, | |
| "epoch": 0.1910828025477707, | |
| "grad_norm": 0.36074620485305786, | |
| "kl": 0.0035858154296875, | |
| "learning_rate": 9.216200800114411e-07, | |
| "loss": -0.19808490574359894, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.2142857164144516, | |
| "reward_std": 0.19514648616313934, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4154897928237915, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.003113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142857, | |
| "completions/max_length": 8027.0, | |
| "completions/mean_length": 6271.26806640625, | |
| "completions/min_length": 1195.5, | |
| "epoch": 0.1942675159235669, | |
| "grad_norm": 0.39658188819885254, | |
| "kl": 1.2364356999970023e-41, | |
| "learning_rate": 9.188747306742189e-07, | |
| "loss": -0.1974707692861557, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968627691269, | |
| "step": 61, | |
| "train_speed(iter/s)": 0.003113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.42857142857142855, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 5597.178955078125, | |
| "completions/min_length": 1193.0, | |
| "epoch": 0.19745222929936307, | |
| "grad_norm": 0.20000393688678741, | |
| "kl": 0.0042724609375, | |
| "learning_rate": 9.160863626845119e-07, | |
| "loss": -0.044626351445913315, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.1539071872830391, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4635152518749237, | |
| "step": 62, | |
| "train_speed(iter/s)": 0.003113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7142857142857142, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6918.053955078125, | |
| "completions/min_length": 1768.0, | |
| "epoch": 0.20063694267515925, | |
| "grad_norm": 0.6508347988128662, | |
| "kl": 0.003936767578125, | |
| "learning_rate": 9.132552624091619e-07, | |
| "loss": -0.21150559186935425, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.1607142984867096, | |
| "reward_std": 0.1785714402794838, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1607142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3664129227399826, | |
| "step": 63, | |
| "train_speed(iter/s)": 0.003111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8040.5, | |
| "completions/mean_length": 6990.643310546875, | |
| "completions/min_length": 2698.5, | |
| "epoch": 0.20382165605095542, | |
| "grad_norm": 0.44794604182243347, | |
| "kl": 0.00531005859375, | |
| "learning_rate": 9.103817206036382e-07, | |
| "loss": -0.43547752499580383, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591383934021, | |
| "step": 64, | |
| "train_speed(iter/s)": 0.003107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7678571428571428, | |
| "completions/max_length": 8033.0, | |
| "completions/mean_length": 6935.285888671875, | |
| "completions/min_length": 1890.5, | |
| "epoch": 0.2070063694267516, | |
| "grad_norm": 1.0245670080184937, | |
| "kl": 0.0034942626953125, | |
| "learning_rate": 9.07466032382177e-07, | |
| "loss": -0.16633543372154236, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968627691269, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.003103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8060.5, | |
| "completions/mean_length": 6056.26806640625, | |
| "completions/min_length": 1797.0, | |
| "epoch": 0.21019108280254778, | |
| "grad_norm": 0.26687300205230713, | |
| "kl": 0.005523681640625, | |
| "learning_rate": 9.045084971874737e-07, | |
| "loss": -0.17107920348644257, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.464285746216774, | |
| "reward_std": 0.26657507568597794, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.464285746216774, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 66, | |
| "train_speed(iter/s)": 0.003104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8050.0, | |
| "completions/mean_length": 7268.107421875, | |
| "completions/min_length": 1853.5, | |
| "epoch": 0.21337579617834396, | |
| "grad_norm": 0.39480164647102356, | |
| "kl": 0.00036018589162267745, | |
| "learning_rate": 9.015094187599296e-07, | |
| "loss": -0.2682667374610901, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.2142857238650322, | |
| "reward_std": 0.2253357544541359, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857238650322, | |
| "rewards/AnswerTagAccuracyORM/std": 0.40819603204727173, | |
| "step": 67, | |
| "train_speed(iter/s)": 0.003105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8069.5, | |
| "completions/mean_length": 5839.25048828125, | |
| "completions/min_length": 776.0, | |
| "epoch": 0.21656050955414013, | |
| "grad_norm": 0.37252077460289, | |
| "kl": 0.0123291015625, | |
| "learning_rate": 8.984691051064574e-07, | |
| "loss": -0.14431779086589813, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.4107142984867096, | |
| "reward_std": 0.21981073170900345, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.497912272810936, | |
| "step": 68, | |
| "train_speed(iter/s)": 0.003108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 5993.5361328125, | |
| "completions/min_length": 1093.0, | |
| "epoch": 0.2197452229299363, | |
| "grad_norm": 0.3685239553451538, | |
| "kl": 0.01165771484375, | |
| "learning_rate": 8.953878684688492e-07, | |
| "loss": -0.1928965002298355, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968627691269, | |
| "step": 69, | |
| "train_speed(iter/s)": 0.003111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 6114.82177734375, | |
| "completions/min_length": 1662.5, | |
| "epoch": 0.2229299363057325, | |
| "grad_norm": 0.5651078224182129, | |
| "kl": 0.013641357421875, | |
| "learning_rate": 8.922660252917087e-07, | |
| "loss": -0.3238877058029175, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48647116124629974, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.003113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8029.0, | |
| "completions/mean_length": 6308.1787109375, | |
| "completions/min_length": 1943.5, | |
| "epoch": 0.22611464968152867, | |
| "grad_norm": 0.41033416986465454, | |
| "kl": 0.0162353515625, | |
| "learning_rate": 8.891038961899519e-07, | |
| "loss": -0.17454411089420319, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.3392857387661934, | |
| "reward_std": 0.1896214708685875, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857387661934, | |
| "rewards/AnswerTagAccuracyORM/std": 0.43211139738559723, | |
| "step": 71, | |
| "train_speed(iter/s)": 0.003115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8051.5, | |
| "completions/mean_length": 5688.107177734375, | |
| "completions/min_length": 1738.5, | |
| "epoch": 0.22929936305732485, | |
| "grad_norm": 0.4853333830833435, | |
| "kl": 0.015869140625, | |
| "learning_rate": 8.859018059158809e-07, | |
| "loss": -0.2635350823402405, | |
| "memory(GiB)": 175.0, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.3792429566383362, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 72, | |
| "train_speed(iter/s)": 0.003117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428571, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 6535.660888671875, | |
| "completions/min_length": 2464.0, | |
| "epoch": 0.23248407643312102, | |
| "grad_norm": 0.18295517563819885, | |
| "kl": 0.00787353515625, | |
| "learning_rate": 8.826600833258307e-07, | |
| "loss": -0.05387752130627632, | |
| "memory(GiB)": 175.14, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.460043728351593, | |
| "step": 73, | |
| "train_speed(iter/s)": 0.003115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.44642857142857145, | |
| "completions/max_length": 8065.5, | |
| "completions/mean_length": 5659.46435546875, | |
| "completions/min_length": 1074.5, | |
| "epoch": 0.2356687898089172, | |
| "grad_norm": 0.27235084772109985, | |
| "kl": 0.01446533203125, | |
| "learning_rate": 8.793790613463954e-07, | |
| "loss": -0.07979360222816467, | |
| "memory(GiB)": 175.14, | |
| "reward": 0.375, | |
| "reward_std": 0.23086076974868774, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.375, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4750668406486511, | |
| "step": 74, | |
| "train_speed(iter/s)": 0.003117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8040.5, | |
| "completions/mean_length": 6586.518310546875, | |
| "completions/min_length": 1586.5, | |
| "epoch": 0.23885350318471338, | |
| "grad_norm": 0.4448719322681427, | |
| "kl": 1.2463148541704923e-41, | |
| "learning_rate": 8.760590769402371e-07, | |
| "loss": -0.14089421927928925, | |
| "memory(GiB)": 175.14, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.27762509882450104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4635152518749237, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 7960.5, | |
| "completions/mean_length": 5350.035888671875, | |
| "completions/min_length": 1008.5, | |
| "epoch": 0.24203821656050956, | |
| "grad_norm": 0.3529123067855835, | |
| "kl": 0.007568359375, | |
| "learning_rate": 8.727004710714798e-07, | |
| "loss": -0.06856054067611694, | |
| "memory(GiB)": 175.14, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5026109665632248, | |
| "step": 76, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3928571428571429, | |
| "completions/max_length": 8051.5, | |
| "completions/mean_length": 5923.339599609375, | |
| "completions/min_length": 1552.0, | |
| "epoch": 0.24522292993630573, | |
| "grad_norm": 0.4466400742530823, | |
| "kl": 0.01898193359375, | |
| "learning_rate": 8.693035886706907e-07, | |
| "loss": -0.28264355659484863, | |
| "memory(GiB)": 175.14, | |
| "reward": 0.4821428954601288, | |
| "reward_std": 0.29123931378126144, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428954601288, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 77, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 6365.14306640625, | |
| "completions/min_length": 1254.0, | |
| "epoch": 0.2484076433121019, | |
| "grad_norm": 0.42635998129844666, | |
| "kl": 0.0216064453125, | |
| "learning_rate": 8.658687785994578e-07, | |
| "loss": -0.2270558625459671, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.1896214708685875, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050114393234253, | |
| "step": 78, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428571, | |
| "completions/max_length": 8055.5, | |
| "completions/mean_length": 6524.4111328125, | |
| "completions/min_length": 2012.5, | |
| "epoch": 0.2515923566878981, | |
| "grad_norm": 0.2484586536884308, | |
| "kl": 0.00823974609375, | |
| "learning_rate": 8.623963936145599e-07, | |
| "loss": -0.10795820504426956, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.07695359364151955, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 79, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8033.0, | |
| "completions/mean_length": 5579.375244140625, | |
| "completions/min_length": 1700.5, | |
| "epoch": 0.25477707006369427, | |
| "grad_norm": 0.4129941463470459, | |
| "kl": 0.02008056640625, | |
| "learning_rate": 8.588867903317394e-07, | |
| "loss": -0.17563505470752716, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.4821428656578064, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5085247755050659, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.003123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8030.5, | |
| "completions/mean_length": 5784.410888671875, | |
| "completions/min_length": 1787.0, | |
| "epoch": 0.25796178343949044, | |
| "grad_norm": 0.44289883971214294, | |
| "kl": 0.0218505859375, | |
| "learning_rate": 8.553403291890767e-07, | |
| "loss": -0.2814217805862427, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.4821428805589676, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500482559204, | |
| "step": 81, | |
| "train_speed(iter/s)": 0.003126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.39285714285714285, | |
| "completions/max_length": 8028.5, | |
| "completions/mean_length": 5141.1787109375, | |
| "completions/min_length": 1243.5, | |
| "epoch": 0.2611464968152866, | |
| "grad_norm": 0.556716799736023, | |
| "kl": 0.0203857421875, | |
| "learning_rate": 8.51757374409974e-07, | |
| "loss": -0.24188844859600067, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.2967643439769745, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 82, | |
| "train_speed(iter/s)": 0.003129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8033.5, | |
| "completions/mean_length": 5490.625244140625, | |
| "completions/min_length": 1102.5, | |
| "epoch": 0.2643312101910828, | |
| "grad_norm": 0.43611571192741394, | |
| "kl": 0.01397705078125, | |
| "learning_rate": 8.481382939657488e-07, | |
| "loss": -0.06833790987730026, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.21981073170900345, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500482559204, | |
| "step": 83, | |
| "train_speed(iter/s)": 0.003131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6488.946533203125, | |
| "completions/min_length": 2348.5, | |
| "epoch": 0.267515923566879, | |
| "grad_norm": 0.5264121890068054, | |
| "kl": 0.0264892578125, | |
| "learning_rate": 8.444834595378433e-07, | |
| "loss": -0.28356218338012695, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.30228933691978455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.46781930327415466, | |
| "step": 84, | |
| "train_speed(iter/s)": 0.003133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8023.5, | |
| "completions/mean_length": 5915.893310546875, | |
| "completions/min_length": 1153.0, | |
| "epoch": 0.27070063694267515, | |
| "grad_norm": 0.852773129940033, | |
| "kl": 0.01702880859375, | |
| "learning_rate": 8.407932464796521e-07, | |
| "loss": -0.2204403430223465, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.29123931378126144, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.003133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.35714285714285715, | |
| "completions/max_length": 8021.5, | |
| "completions/mean_length": 5667.4111328125, | |
| "completions/min_length": 1922.0, | |
| "epoch": 0.27388535031847133, | |
| "grad_norm": 0.41208142042160034, | |
| "kl": 0.02435302734375, | |
| "learning_rate": 8.370680337779736e-07, | |
| "loss": -0.09956123679876328, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.29123932123184204, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135854244232, | |
| "step": 86, | |
| "train_speed(iter/s)": 0.003136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142857, | |
| "completions/max_length": 8047.0, | |
| "completions/mean_length": 6915.0361328125, | |
| "completions/min_length": 2560.5, | |
| "epoch": 0.2770700636942675, | |
| "grad_norm": 0.5504844188690186, | |
| "kl": 0.021484375, | |
| "learning_rate": 8.333082040140882e-07, | |
| "loss": -0.33471736311912537, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.2610500305891037, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49264875054359436, | |
| "step": 87, | |
| "train_speed(iter/s)": 0.003136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8103.0, | |
| "completions/mean_length": 5899.035888671875, | |
| "completions/min_length": 1566.0, | |
| "epoch": 0.2802547770700637, | |
| "grad_norm": 0.6481395959854126, | |
| "kl": 0.023193359375, | |
| "learning_rate": 8.295141433244659e-07, | |
| "loss": -0.3118484616279602, | |
| "memory(GiB)": 175.23, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 88, | |
| "train_speed(iter/s)": 0.003138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8064.5, | |
| "completions/mean_length": 5719.839599609375, | |
| "completions/min_length": 1776.0, | |
| "epoch": 0.28343949044585987, | |
| "grad_norm": 0.3430127203464508, | |
| "kl": 0.01434326171875, | |
| "learning_rate": 8.256862413611112e-07, | |
| "loss": 0.07326733320951462, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4285714328289032, | |
| "reward_std": 0.40943220257759094, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4985625892877579, | |
| "step": 89, | |
| "train_speed(iter/s)": 0.003141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8071.0, | |
| "completions/mean_length": 6036.107421875, | |
| "completions/min_length": 1410.5, | |
| "epoch": 0.28662420382165604, | |
| "grad_norm": 0.9747743010520935, | |
| "kl": 0.0157470703125, | |
| "learning_rate": 8.218248912515442e-07, | |
| "loss": -0.32189831137657166, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.379242941737175, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199817538261414, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.003143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8030.0, | |
| "completions/mean_length": 5853.910888671875, | |
| "completions/min_length": 1923.5, | |
| "epoch": 0.2898089171974522, | |
| "grad_norm": 0.6417209506034851, | |
| "kl": 0.0321044921875, | |
| "learning_rate": 8.179304895584281e-07, | |
| "loss": -0.20055779814720154, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.40943220257759094, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.502610981464386, | |
| "step": 91, | |
| "train_speed(iter/s)": 0.003145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8069.0, | |
| "completions/mean_length": 6345.964599609375, | |
| "completions/min_length": 1514.0, | |
| "epoch": 0.2929936305732484, | |
| "grad_norm": 0.157789409160614, | |
| "kl": 0.01397705078125, | |
| "learning_rate": 8.140034362388397e-07, | |
| "loss": -0.0897713378071785, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4285714328289032, | |
| "reward_std": 0.2142857238650322, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4985625743865967, | |
| "step": 92, | |
| "train_speed(iter/s)": 0.003146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8048.0, | |
| "completions/mean_length": 6575.268310546875, | |
| "completions/min_length": 1400.5, | |
| "epoch": 0.2961783439490446, | |
| "grad_norm": 0.19858166575431824, | |
| "kl": 0.02850341796875, | |
| "learning_rate": 8.100441346031958e-07, | |
| "loss": -0.004586466588079929, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.1785714365541935, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4628649652004242, | |
| "step": 93, | |
| "train_speed(iter/s)": 0.003146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8063.5, | |
| "completions/mean_length": 5983.518310546875, | |
| "completions/min_length": 1320.5, | |
| "epoch": 0.29936305732484075, | |
| "grad_norm": 0.5492147207260132, | |
| "kl": 796.5401611328125, | |
| "learning_rate": 8.060529912738314e-07, | |
| "loss": -0.20520265400409698, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.31333939731121063, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4979122579097748, | |
| "step": 94, | |
| "train_speed(iter/s)": 0.003147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7678571428571428, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 6954.500244140625, | |
| "completions/min_length": 2610.0, | |
| "epoch": 0.30254777070063693, | |
| "grad_norm": 0.38487347960472107, | |
| "kl": 1.0918917634018975e-41, | |
| "learning_rate": 8.020304161432403e-07, | |
| "loss": -0.061524372547864914, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.1785714402794838, | |
| "reward_std": 0.1539071798324585, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1785714402794838, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3871018886566162, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.003145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 5583.64306640625, | |
| "completions/min_length": 1472.5, | |
| "epoch": 0.3057324840764331, | |
| "grad_norm": 0.3516985774040222, | |
| "kl": 0.018798828125, | |
| "learning_rate": 7.979768223319785e-07, | |
| "loss": -0.08431195467710495, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4973474591970444, | |
| "step": 96, | |
| "train_speed(iter/s)": 0.003147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8028.5, | |
| "completions/mean_length": 5748.21435546875, | |
| "completions/min_length": 1341.0, | |
| "epoch": 0.3089171974522293, | |
| "grad_norm": 0.4274459779262543, | |
| "kl": 0.0357666015625, | |
| "learning_rate": 7.938926261462365e-07, | |
| "loss": -0.17042537033557892, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2253357619047165, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4691530019044876, | |
| "step": 97, | |
| "train_speed(iter/s)": 0.003149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8063.5, | |
| "completions/mean_length": 6201.875244140625, | |
| "completions/min_length": 1993.5, | |
| "epoch": 0.31210191082802546, | |
| "grad_norm": 0.32356998324394226, | |
| "kl": 1.5625879175686035e-41, | |
| "learning_rate": 7.897782470350849e-07, | |
| "loss": -0.13458389043807983, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513247013092, | |
| "step": 98, | |
| "train_speed(iter/s)": 0.003151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 6730.339599609375, | |
| "completions/min_length": 2250.0, | |
| "epoch": 0.31528662420382164, | |
| "grad_norm": 0.7448033690452576, | |
| "kl": 0.00970458984375, | |
| "learning_rate": 7.856341075473961e-07, | |
| "loss": -0.19996413588523865, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 99, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3214285714285714, | |
| "completions/max_length": 8037.5, | |
| "completions/mean_length": 5035.035888671875, | |
| "completions/min_length": 932.0, | |
| "epoch": 0.3184713375796178, | |
| "grad_norm": 0.5652780532836914, | |
| "kl": 0.04150390625, | |
| "learning_rate": 7.814606332884488e-07, | |
| "loss": -0.10035921633243561, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.5714285969734192, | |
| "reward_std": 0.32695361226797104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5714285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4985625892877579, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.003155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8061.0, | |
| "completions/mean_length": 6390.910888671875, | |
| "completions/min_length": 1904.0, | |
| "epoch": 0.321656050955414, | |
| "grad_norm": 0.4127773344516754, | |
| "kl": 0.038330078125, | |
| "learning_rate": 7.772582528762178e-07, | |
| "loss": -0.16605715453624725, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.23086078464984894, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4979122579097748, | |
| "step": 101, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8040.0, | |
| "completions/mean_length": 6349.01806640625, | |
| "completions/min_length": 1580.0, | |
| "epoch": 0.3248407643312102, | |
| "grad_norm": 0.4982652962207794, | |
| "kl": 22.047773361206055, | |
| "learning_rate": 7.730273978973552e-07, | |
| "loss": -0.2726196050643921, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49791230261325836, | |
| "step": 102, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 5684.39306640625, | |
| "completions/min_length": 1680.0, | |
| "epoch": 0.32802547770700635, | |
| "grad_norm": 0.6498633027076721, | |
| "kl": 0.082763671875, | |
| "learning_rate": 7.687685028628652e-07, | |
| "loss": -0.21366629004478455, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.1896214783191681, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 103, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8018.5, | |
| "completions/mean_length": 6570.8037109375, | |
| "completions/min_length": 2079.0, | |
| "epoch": 0.33121019108280253, | |
| "grad_norm": 0.7568531632423401, | |
| "kl": 0.0177001953125, | |
| "learning_rate": 7.644820051634812e-07, | |
| "loss": -0.23152737319469452, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.196428582072258, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.196428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4039382338523865, | |
| "step": 104, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 5679.3037109375, | |
| "completions/min_length": 1807.0, | |
| "epoch": 0.3343949044585987, | |
| "grad_norm": 0.48169225454330444, | |
| "kl": 0.0501708984375, | |
| "learning_rate": 7.60168345024744e-07, | |
| "loss": -0.10911425948143005, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.4149572253227234, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500482559204, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8050.5, | |
| "completions/mean_length": 6120.9111328125, | |
| "completions/min_length": 1163.5, | |
| "epoch": 0.3375796178343949, | |
| "grad_norm": 0.41283684968948364, | |
| "kl": 0.013671875, | |
| "learning_rate": 7.558279654617912e-07, | |
| "loss": -0.15918892621994019, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.307814359664917, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199817538261414, | |
| "step": 106, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 6480.857421875, | |
| "completions/min_length": 1826.5, | |
| "epoch": 0.34076433121019106, | |
| "grad_norm": 0.5248873829841614, | |
| "kl": 0.03424072265625, | |
| "learning_rate": 7.514613122338589e-07, | |
| "loss": -0.17167718708515167, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 107, | |
| "train_speed(iter/s)": 0.003154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8055.0, | |
| "completions/mean_length": 6560.357421875, | |
| "completions/min_length": 1519.0, | |
| "epoch": 0.34394904458598724, | |
| "grad_norm": 0.7283872365951538, | |
| "kl": 0.01104736328125, | |
| "learning_rate": 7.470688337985029e-07, | |
| "loss": -0.3855530917644501, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3035714328289032, | |
| "reward_std": 0.21981074661016464, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4644543081521988, | |
| "step": 108, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6183.893310546875, | |
| "completions/min_length": 1794.5, | |
| "epoch": 0.3471337579617834, | |
| "grad_norm": 0.6313491463661194, | |
| "kl": 0.0653076171875, | |
| "learning_rate": 7.426509812655405e-07, | |
| "loss": -0.2580084502696991, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4436842352151871, | |
| "step": 109, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8056.5, | |
| "completions/mean_length": 6767.071533203125, | |
| "completions/min_length": 1435.5, | |
| "epoch": 0.3503184713375796, | |
| "grad_norm": 0.45339030027389526, | |
| "kl": 0.030517578125, | |
| "learning_rate": 7.382082083507225e-07, | |
| "loss": -0.16426539421081543, | |
| "memory(GiB)": 175.55, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.2721000909805298, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786956012248993, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8042.0, | |
| "completions/mean_length": 6121.696533203125, | |
| "completions/min_length": 1248.0, | |
| "epoch": 0.3535031847133758, | |
| "grad_norm": 0.44927987456321716, | |
| "kl": 1.280361533164978, | |
| "learning_rate": 7.337409713291355e-07, | |
| "loss": -0.032493047416210175, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 111, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8030.5, | |
| "completions/mean_length": 6174.0361328125, | |
| "completions/min_length": 1399.0, | |
| "epoch": 0.35668789808917195, | |
| "grad_norm": 0.451442688703537, | |
| "kl": 0.1644287109375, | |
| "learning_rate": 7.292497289883432e-07, | |
| "loss": 0.022751769050955772, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2142857201397419, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4691530168056488, | |
| "step": 112, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8033.5, | |
| "completions/mean_length": 6115.357421875, | |
| "completions/min_length": 1842.5, | |
| "epoch": 0.35987261146496813, | |
| "grad_norm": 0.5630915760993958, | |
| "kl": 0.02783203125, | |
| "learning_rate": 7.24734942581267e-07, | |
| "loss": -0.30773723125457764, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3035714402794838, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714402794838, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4321114122867584, | |
| "step": 113, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8043.0, | |
| "completions/mean_length": 6186.803955078125, | |
| "completions/min_length": 1296.5, | |
| "epoch": 0.3630573248407643, | |
| "grad_norm": 0.49433982372283936, | |
| "kl": 0.0421142578125, | |
| "learning_rate": 7.201970757788171e-07, | |
| "loss": -0.055917054414749146, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.3792429566383362, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165241718292, | |
| "step": 114, | |
| "train_speed(iter/s)": 0.003153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8041.0, | |
| "completions/mean_length": 6332.6611328125, | |
| "completions/min_length": 1067.0, | |
| "epoch": 0.3662420382165605, | |
| "grad_norm": 0.2365662157535553, | |
| "kl": 0.02978515625, | |
| "learning_rate": 7.15636594622272e-07, | |
| "loss": 0.03591850399971008, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.1964285857975483, | |
| "reward_std": 0.07695359364151955, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1964285857975483, | |
| "rewards/AnswerTagAccuracyORM/std": 0.36893007159233093, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.003154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428572, | |
| "completions/max_length": 8068.0, | |
| "completions/mean_length": 5660.196533203125, | |
| "completions/min_length": 1355.0, | |
| "epoch": 0.36942675159235666, | |
| "grad_norm": 0.8137925863265991, | |
| "kl": 0.21121002733707428, | |
| "learning_rate": 7.110539674754159e-07, | |
| "loss": -0.3542415499687195, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.375, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.375, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47506681084632874, | |
| "step": 116, | |
| "train_speed(iter/s)": 0.003154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8045.0, | |
| "completions/mean_length": 5915.482421875, | |
| "completions/min_length": 1733.0, | |
| "epoch": 0.37261146496815284, | |
| "grad_norm": 71.4358139038086, | |
| "kl": 18.5830078125, | |
| "learning_rate": 7.06449664976438e-07, | |
| "loss": -0.01425784919410944, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050112903118134, | |
| "step": 117, | |
| "train_speed(iter/s)": 0.003154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3214285714285714, | |
| "completions/max_length": 8045.5, | |
| "completions/mean_length": 5088.660888671875, | |
| "completions/min_length": 908.5, | |
| "epoch": 0.37579617834394907, | |
| "grad_norm": 0.42832183837890625, | |
| "kl": 0.0203857421875, | |
| "learning_rate": 7.018241599895973e-07, | |
| "loss": -0.0722283348441124, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 118, | |
| "train_speed(iter/s)": 0.003156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8042.0, | |
| "completions/mean_length": 6498.76806640625, | |
| "completions/min_length": 2193.0, | |
| "epoch": 0.37898089171974525, | |
| "grad_norm": 0.5034194588661194, | |
| "kl": 0.0457763671875, | |
| "learning_rate": 6.971779275566593e-07, | |
| "loss": -0.14812861382961273, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.1785714328289032, | |
| "reward_std": 0.2253357470035553, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1785714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.37796448171138763, | |
| "step": 119, | |
| "train_speed(iter/s)": 0.003156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.35714285714285715, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 4917.14306640625, | |
| "completions/min_length": 1183.5, | |
| "epoch": 0.3821656050955414, | |
| "grad_norm": 0.7444789409637451, | |
| "kl": 0.0513916015625, | |
| "learning_rate": 6.925114448481088e-07, | |
| "loss": -0.26919350028038025, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.642857164144516, | |
| "reward_std": 0.3681929111480713, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.642857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199817538261414, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.003157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8067.5, | |
| "completions/mean_length": 6616.857421875, | |
| "completions/min_length": 1642.0, | |
| "epoch": 0.3853503184713376, | |
| "grad_norm": 0.4549780786037445, | |
| "kl": 0.0621337890625, | |
| "learning_rate": 6.87825191114145e-07, | |
| "loss": -0.012696207500994205, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.27762509882450104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45290274918079376, | |
| "step": 121, | |
| "train_speed(iter/s)": 0.003157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8049.5, | |
| "completions/mean_length": 6262.821533203125, | |
| "completions/min_length": 1352.0, | |
| "epoch": 0.3885350318471338, | |
| "grad_norm": 0.6874606013298035, | |
| "kl": 0.025634765625, | |
| "learning_rate": 6.831196476354614e-07, | |
| "loss": -0.19306860864162445, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.37371791899204254, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 122, | |
| "train_speed(iter/s)": 0.003157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3392857142857143, | |
| "completions/max_length": 8052.5, | |
| "completions/mean_length": 5354.339599609375, | |
| "completions/min_length": 1056.5, | |
| "epoch": 0.39171974522292996, | |
| "grad_norm": 0.5245152711868286, | |
| "kl": 0.041748046875, | |
| "learning_rate": 6.783952976738178e-07, | |
| "loss": -0.24547605216503143, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.5, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5091750919818878, | |
| "step": 123, | |
| "train_speed(iter/s)": 0.003158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8028.5, | |
| "completions/mean_length": 4598.0179443359375, | |
| "completions/min_length": 803.0, | |
| "epoch": 0.39490445859872614, | |
| "grad_norm": 0.31059756875038147, | |
| "kl": 0.03350830078125, | |
| "learning_rate": 6.7365262642241e-07, | |
| "loss": -0.010546373203396797, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.5357142984867096, | |
| "reward_std": 0.2142857238650322, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5357142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5065638422966003, | |
| "step": 124, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 6164.375244140625, | |
| "completions/min_length": 1348.5, | |
| "epoch": 0.3980891719745223, | |
| "grad_norm": 0.36853721737861633, | |
| "kl": 0.0347900390625, | |
| "learning_rate": 6.688921209560403e-07, | |
| "loss": -0.15686392784118652, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857164144516, | |
| "reward_std": 0.2610500305891037, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4495980441570282, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8056.5, | |
| "completions/mean_length": 6088.44677734375, | |
| "completions/min_length": 1482.5, | |
| "epoch": 0.4012738853503185, | |
| "grad_norm": 0.4430883228778839, | |
| "kl": 0.0606689453125, | |
| "learning_rate": 6.641142701810931e-07, | |
| "loss": -0.24629399180412292, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3928571492433548, | |
| "reward_std": 0.20619653165340424, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3928571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4846093952655792, | |
| "step": 126, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8057.5, | |
| "completions/mean_length": 6104.5361328125, | |
| "completions/min_length": 2068.5, | |
| "epoch": 0.40445859872611467, | |
| "grad_norm": 0.6949173808097839, | |
| "kl": 0.106201171875, | |
| "learning_rate": 6.593195647853258e-07, | |
| "loss": -0.3462884724140167, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.4039071798324585, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4576014429330826, | |
| "step": 127, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8028.5, | |
| "completions/mean_length": 5471.26806640625, | |
| "completions/min_length": 798.5, | |
| "epoch": 0.40764331210191085, | |
| "grad_norm": 0.4856749176979065, | |
| "kl": 0.0289306640625, | |
| "learning_rate": 6.545084971874736e-07, | |
| "loss": -0.14708048105239868, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.1896214708685875, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 128, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8055.0, | |
| "completions/mean_length": 5762.50048828125, | |
| "completions/min_length": 1270.0, | |
| "epoch": 0.410828025477707, | |
| "grad_norm": 0.5902029275894165, | |
| "kl": 0.03662109375, | |
| "learning_rate": 6.496815614866791e-07, | |
| "loss": -0.08738569170236588, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4821428805589676, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 129, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8043.5, | |
| "completions/mean_length": 6093.4287109375, | |
| "completions/min_length": 1134.0, | |
| "epoch": 0.4140127388535032, | |
| "grad_norm": 0.3767712712287903, | |
| "kl": 0.006173963658511639, | |
| "learning_rate": 6.448392534117461e-07, | |
| "loss": -0.17554393410682678, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.23086077719926834, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 6678.94677734375, | |
| "completions/min_length": 2177.0, | |
| "epoch": 0.4171974522292994, | |
| "grad_norm": 0.7001217603683472, | |
| "kl": 0.0213623046875, | |
| "learning_rate": 6.399820702702304e-07, | |
| "loss": -0.2599309980869293, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.37371791899204254, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49264875054359436, | |
| "step": 131, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142857, | |
| "completions/max_length": 8036.5, | |
| "completions/mean_length": 6684.57177734375, | |
| "completions/min_length": 1943.5, | |
| "epoch": 0.42038216560509556, | |
| "grad_norm": 0.5899467468261719, | |
| "kl": 0.05859375, | |
| "learning_rate": 6.351105108973644e-07, | |
| "loss": -0.17598707973957062, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 132, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8050.0, | |
| "completions/mean_length": 6337.08935546875, | |
| "completions/min_length": 1953.0, | |
| "epoch": 0.42356687898089174, | |
| "grad_norm": 0.4514944553375244, | |
| "kl": 0.0384521484375, | |
| "learning_rate": 6.302250756048267e-07, | |
| "loss": -0.23234152793884277, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.25552502274513245, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165241718292, | |
| "step": 133, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8029.0, | |
| "completions/mean_length": 5813.14306640625, | |
| "completions/min_length": 780.0, | |
| "epoch": 0.4267515923566879, | |
| "grad_norm": 0.5305230617523193, | |
| "kl": 0.084716796875, | |
| "learning_rate": 6.253262661293602e-07, | |
| "loss": -0.16682913899421692, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.2363857924938202, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173471331596375, | |
| "step": 134, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142858, | |
| "completions/max_length": 8067.0, | |
| "completions/mean_length": 6596.125244140625, | |
| "completions/min_length": 1434.0, | |
| "epoch": 0.4299363057324841, | |
| "grad_norm": 6475.49609375, | |
| "kl": 2048.0140380859375, | |
| "learning_rate": 6.204145855812438e-07, | |
| "loss": 3.8663876056671143, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48647116124629974, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3392857142857143, | |
| "completions/max_length": 8056.0, | |
| "completions/mean_length": 4872.14306640625, | |
| "completions/min_length": 1017.0, | |
| "epoch": 0.43312101910828027, | |
| "grad_norm": 0.26369690895080566, | |
| "kl": 0.047607421875, | |
| "learning_rate": 6.154905383926216e-07, | |
| "loss": -0.016700170934200287, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.21981074661016464, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5085247755050659, | |
| "step": 136, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571429, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 6843.303955078125, | |
| "completions/min_length": 2125.5, | |
| "epoch": 0.43630573248407645, | |
| "grad_norm": 0.48116692900657654, | |
| "kl": 0.04315185546875, | |
| "learning_rate": 6.105546302656986e-07, | |
| "loss": -0.08012282848358154, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2142857201397419, | |
| "reward_std": 0.2967643365263939, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857201397419, | |
| "rewards/AnswerTagAccuracyORM/std": 0.37510766088962555, | |
| "step": 137, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6785714285714286, | |
| "completions/max_length": 8045.5, | |
| "completions/mean_length": 6495.821533203125, | |
| "completions/min_length": 1893.5, | |
| "epoch": 0.4394904458598726, | |
| "grad_norm": 0.39181163907051086, | |
| "kl": 289.435302734375, | |
| "learning_rate": 6.056073681208037e-07, | |
| "loss": -0.048386890441179276, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.2721000984311104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4576014429330826, | |
| "step": 138, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8016.0, | |
| "completions/mean_length": 5637.39306640625, | |
| "completions/min_length": 1424.5, | |
| "epoch": 0.4426751592356688, | |
| "grad_norm": 0.2754450738430023, | |
| "kl": 0.046875, | |
| "learning_rate": 6.0064926004433e-07, | |
| "loss": -0.08374255150556564, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.1785714365541935, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050114393234253, | |
| "step": 139, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 5262.000244140625, | |
| "completions/min_length": 1212.5, | |
| "epoch": 0.445859872611465, | |
| "grad_norm": 0.641979455947876, | |
| "kl": 0.07440185546875, | |
| "learning_rate": 5.956808152365532e-07, | |
| "loss": -0.11813775449991226, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3214285746216774, | |
| "reward_std": 0.25552503019571304, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3214285746216774, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4327617287635803, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8020.0, | |
| "completions/mean_length": 5747.625244140625, | |
| "completions/min_length": 1448.5, | |
| "epoch": 0.44904458598726116, | |
| "grad_norm": 0.30898842215538025, | |
| "kl": 0.0184326171875, | |
| "learning_rate": 5.907025439593365e-07, | |
| "loss": -0.03316553309559822, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.2610500305891037, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 141, | |
| "train_speed(iter/s)": 0.003164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8040.5, | |
| "completions/mean_length": 5516.482177734375, | |
| "completions/min_length": 1085.5, | |
| "epoch": 0.45222929936305734, | |
| "grad_norm": 0.5380728244781494, | |
| "kl": 0.0506591796875, | |
| "learning_rate": 5.857149574837268e-07, | |
| "loss": -0.25722232460975647, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.30228933691978455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.497912272810936, | |
| "step": 142, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8030.0, | |
| "completions/mean_length": 5218.1611328125, | |
| "completions/min_length": 1178.5, | |
| "epoch": 0.4554140127388535, | |
| "grad_norm": 0.4074670672416687, | |
| "kl": 0.0584716796875, | |
| "learning_rate": 5.807185680374467e-07, | |
| "loss": -0.0464337095618248, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3035714328289032, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4644543081521988, | |
| "step": 143, | |
| "train_speed(iter/s)": 0.003166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8053.5, | |
| "completions/mean_length": 6286.125244140625, | |
| "completions/min_length": 1153.5, | |
| "epoch": 0.4585987261146497, | |
| "grad_norm": 0.4895031154155731, | |
| "kl": 0.0423583984375, | |
| "learning_rate": 5.757138887522883e-07, | |
| "loss": -0.06021181866526604, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.26657506823539734, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4389495849609375, | |
| "step": 144, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8067.0, | |
| "completions/mean_length": 6192.5537109375, | |
| "completions/min_length": 1276.5, | |
| "epoch": 0.46178343949044587, | |
| "grad_norm": 0.40592557191848755, | |
| "kl": 0.0712890625, | |
| "learning_rate": 5.707014336114146e-07, | |
| "loss": -0.048989661037921906, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.27762511372566223, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5026109665632248, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8031.0, | |
| "completions/mean_length": 6168.714599609375, | |
| "completions/min_length": 1853.0, | |
| "epoch": 0.46496815286624205, | |
| "grad_norm": 0.4599376618862152, | |
| "kl": 0.051025390625, | |
| "learning_rate": 5.656817173965732e-07, | |
| "loss": -0.06779219955205917, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4489477574825287, | |
| "step": 146, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8048.0, | |
| "completions/mean_length": 6351.232421875, | |
| "completions/min_length": 2068.5, | |
| "epoch": 0.4681528662420382, | |
| "grad_norm": 0.3567911684513092, | |
| "kl": 0.0421142578125, | |
| "learning_rate": 5.606552556352274e-07, | |
| "loss": -0.09004680067300797, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 147, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8061.5, | |
| "completions/mean_length": 6130.57177734375, | |
| "completions/min_length": 1451.0, | |
| "epoch": 0.4713375796178344, | |
| "grad_norm": 0.5385421514511108, | |
| "kl": 0.0621337890625, | |
| "learning_rate": 5.556225645476118e-07, | |
| "loss": -0.12423180043697357, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513247013092, | |
| "step": 148, | |
| "train_speed(iter/s)": 0.003165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3035714285714286, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 4891.875244140625, | |
| "completions/min_length": 1196.5, | |
| "epoch": 0.4745222929936306, | |
| "grad_norm": 0.39023974537849426, | |
| "kl": 0.0255126953125, | |
| "learning_rate": 5.505841609937161e-07, | |
| "loss": -0.18357349932193756, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.29123931378126144, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 149, | |
| "train_speed(iter/s)": 0.003166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8016.0, | |
| "completions/mean_length": 4689.14306640625, | |
| "completions/min_length": 833.5, | |
| "epoch": 0.47770700636942676, | |
| "grad_norm": 0.5281595587730408, | |
| "kl": 0.052490234375, | |
| "learning_rate": 5.455405624202032e-07, | |
| "loss": -0.15326838195323944, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3928571492433548, | |
| "reward_std": 0.1539071872830391, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3928571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4846093952655792, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.003167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8023.0, | |
| "completions/mean_length": 5550.000244140625, | |
| "completions/min_length": 1138.5, | |
| "epoch": 0.48089171974522293, | |
| "grad_norm": 0.2876298427581787, | |
| "kl": 0.03759765625, | |
| "learning_rate": 5.404922868072672e-07, | |
| "loss": 0.08055908977985382, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4107142984867096, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4576014578342438, | |
| "step": 151, | |
| "train_speed(iter/s)": 0.003168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.26785714285714285, | |
| "completions/max_length": 8016.0, | |
| "completions/mean_length": 3929.5538330078125, | |
| "completions/min_length": 986.0, | |
| "epoch": 0.4840764331210191, | |
| "grad_norm": 1.2029175758361816, | |
| "kl": 0.45068359375, | |
| "learning_rate": 5.354398526154365e-07, | |
| "loss": -0.09824319183826447, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.5357142984867096, | |
| "reward_std": 0.25552502274513245, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5357142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5026109963655472, | |
| "step": 152, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8062.0, | |
| "completions/mean_length": 5090.232421875, | |
| "completions/min_length": 1046.0, | |
| "epoch": 0.4872611464968153, | |
| "grad_norm": 0.44915616512298584, | |
| "kl": 0.0460205078125, | |
| "learning_rate": 5.30383778732328e-07, | |
| "loss": -0.08847501873970032, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.34905366599559784, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.502610981464386, | |
| "step": 153, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8054.0, | |
| "completions/mean_length": 5964.4287109375, | |
| "completions/min_length": 1407.5, | |
| "epoch": 0.49044585987261147, | |
| "grad_norm": 0.7181938886642456, | |
| "kl": 0.0526123046875, | |
| "learning_rate": 5.253245844193564e-07, | |
| "loss": -0.25980204343795776, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.4727715849876404, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513396024704, | |
| "step": 154, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 6524.339599609375, | |
| "completions/min_length": 1993.0, | |
| "epoch": 0.49363057324840764, | |
| "grad_norm": 0.6525817513465881, | |
| "kl": 0.08203125, | |
| "learning_rate": 5.202627892584065e-07, | |
| "loss": -0.14491651952266693, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.3324785977602005, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8034.5, | |
| "completions/mean_length": 5646.178955078125, | |
| "completions/min_length": 1162.5, | |
| "epoch": 0.4968152866242038, | |
| "grad_norm": 0.7706606388092041, | |
| "kl": 0.07666015625, | |
| "learning_rate": 5.151989130984714e-07, | |
| "loss": -0.09087943285703659, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.32695358991622925, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.460043728351593, | |
| "step": 156, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428572, | |
| "completions/max_length": 8046.5, | |
| "completions/mean_length": 5837.58935546875, | |
| "completions/min_length": 1139.5, | |
| "epoch": 0.5, | |
| "grad_norm": 0.4703036844730377, | |
| "kl": 0.03955078125, | |
| "learning_rate": 5.101334760022639e-07, | |
| "loss": -0.15095964074134827, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.25552502274513245, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48647117614746094, | |
| "step": 157, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3214285714285714, | |
| "completions/max_length": 8043.5, | |
| "completions/mean_length": 5477.0361328125, | |
| "completions/min_length": 1285.0, | |
| "epoch": 0.5031847133757962, | |
| "grad_norm": 0.975563645362854, | |
| "kl": 0.197265625, | |
| "learning_rate": 5.050669981928055e-07, | |
| "loss": -0.09782678633928299, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.4396214783191681, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4973474591970444, | |
| "step": 158, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8048.5, | |
| "completions/mean_length": 5391.803955078125, | |
| "completions/min_length": 1264.5, | |
| "epoch": 0.5063694267515924, | |
| "grad_norm": 0.6060774922370911, | |
| "kl": 0.044677734375, | |
| "learning_rate": 5e-07, | |
| "loss": -0.30903252959251404, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4821428656578064, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5085247755050659, | |
| "step": 159, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8036.5, | |
| "completions/mean_length": 6244.39306640625, | |
| "completions/min_length": 1104.5, | |
| "epoch": 0.5095541401273885, | |
| "grad_norm": 0.3611801564693451, | |
| "kl": 0.032743350418968475, | |
| "learning_rate": 4.949330018071946e-07, | |
| "loss": -0.14383217692375183, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.2253357656300068, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 6116.875244140625, | |
| "completions/min_length": 1644.5, | |
| "epoch": 0.5127388535031847, | |
| "grad_norm": 0.3732077479362488, | |
| "kl": 0.07177734375, | |
| "learning_rate": 4.898665239977362e-07, | |
| "loss": 0.008805501274764538, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3035714402794838, | |
| "reward_std": 0.23086079210042953, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714402794838, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4321114122867584, | |
| "step": 161, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8043.5, | |
| "completions/mean_length": 6551.1611328125, | |
| "completions/min_length": 2065.5, | |
| "epoch": 0.5159235668789809, | |
| "grad_norm": 0.40818125009536743, | |
| "kl": 0.0394287109375, | |
| "learning_rate": 4.848010869015287e-07, | |
| "loss": -0.172414168715477, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.1428571529686451, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4061589390039444, | |
| "step": 162, | |
| "train_speed(iter/s)": 0.003168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.2857142857142857, | |
| "completions/max_length": 8026.5, | |
| "completions/mean_length": 5044.535888671875, | |
| "completions/min_length": 1356.5, | |
| "epoch": 0.5191082802547771, | |
| "grad_norm": 1.3621019124984741, | |
| "kl": 0.4180908203125, | |
| "learning_rate": 4.797372107415935e-07, | |
| "loss": -0.2163340151309967, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.5357142984867096, | |
| "reward_std": 0.42048224806785583, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5357142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5065638720989227, | |
| "step": 163, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.2857142857142857, | |
| "completions/max_length": 8046.5, | |
| "completions/mean_length": 5025.2861328125, | |
| "completions/min_length": 1086.0, | |
| "epoch": 0.5222929936305732, | |
| "grad_norm": 0.5102728605270386, | |
| "kl": 0.0535888671875, | |
| "learning_rate": 4.746754155806437e-07, | |
| "loss": -0.23832020163536072, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.2721000909805298, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 164, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 5601.375244140625, | |
| "completions/min_length": 1904.5, | |
| "epoch": 0.5254777070063694, | |
| "grad_norm": 0.7859033942222595, | |
| "kl": 0.1533203125, | |
| "learning_rate": 4.69616221267672e-07, | |
| "loss": -0.3384000360965729, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.42048226296901703, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513396024704, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8026.0, | |
| "completions/mean_length": 5810.482421875, | |
| "completions/min_length": 1074.0, | |
| "epoch": 0.5286624203821656, | |
| "grad_norm": 0.5610930323600769, | |
| "kl": 0.06005859375, | |
| "learning_rate": 4.645601473845635e-07, | |
| "loss": -0.1656591296195984, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.307814359664917, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48795005679130554, | |
| "step": 166, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8055.5, | |
| "completions/mean_length": 5735.08935546875, | |
| "completions/min_length": 1439.0, | |
| "epoch": 0.5318471337579618, | |
| "grad_norm": 0.912844717502594, | |
| "kl": 0.1314697265625, | |
| "learning_rate": 4.5950771319273296e-07, | |
| "loss": -0.04387956112623215, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.21981074661016464, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 167, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8049.0, | |
| "completions/mean_length": 6489.285888671875, | |
| "completions/min_length": 1574.5, | |
| "epoch": 0.535031847133758, | |
| "grad_norm": 0.3967674970626831, | |
| "kl": 1.4857299551628638e+34, | |
| "learning_rate": 4.544594375797968e-07, | |
| "loss": 0.07432208955287933, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.357142873108387, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.357142873108387, | |
| "rewards/AnswerTagAccuracyORM/std": 0.43015046417713165, | |
| "step": 168, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8061.0, | |
| "completions/mean_length": 5971.535888671875, | |
| "completions/min_length": 867.0, | |
| "epoch": 0.5382165605095541, | |
| "grad_norm": 3.6906192302703857, | |
| "kl": 0.22985238194814883, | |
| "learning_rate": 4.4941583900628393e-07, | |
| "loss": -0.2558591365814209, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.23086077719926834, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 169, | |
| "train_speed(iter/s)": 0.003169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.2857142857142857, | |
| "completions/max_length": 8057.5, | |
| "completions/mean_length": 4240.500244140625, | |
| "completions/min_length": 650.0, | |
| "epoch": 0.5414012738853503, | |
| "grad_norm": 0.5648459196090698, | |
| "kl": 0.0484619140625, | |
| "learning_rate": 4.443774354523882e-07, | |
| "loss": -0.026298800483345985, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.5535714626312256, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.003171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8025.0, | |
| "completions/mean_length": 5686.607421875, | |
| "completions/min_length": 1502.0, | |
| "epoch": 0.5445859872611465, | |
| "grad_norm": 0.7092170715332031, | |
| "kl": 0.0477294921875, | |
| "learning_rate": 4.3934474436477253e-07, | |
| "loss": -0.25052332878112793, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.23086077719926834, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 171, | |
| "train_speed(iter/s)": 0.003172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8053.0, | |
| "completions/mean_length": 5796.964599609375, | |
| "completions/min_length": 1074.0, | |
| "epoch": 0.5477707006369427, | |
| "grad_norm": 0.448478639125824, | |
| "kl": 0.0557861328125, | |
| "learning_rate": 4.3431828260342675e-07, | |
| "loss": -0.27337488532066345, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.27762509882450104, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4635152518749237, | |
| "step": 172, | |
| "train_speed(iter/s)": 0.003172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8049.5, | |
| "completions/mean_length": 6193.589599609375, | |
| "completions/min_length": 1322.0, | |
| "epoch": 0.5509554140127388, | |
| "grad_norm": 0.4961269795894623, | |
| "kl": 0.0238037109375, | |
| "learning_rate": 4.292985663885853e-07, | |
| "loss": -0.1575016975402832, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 173, | |
| "train_speed(iter/s)": 0.003172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8028.0, | |
| "completions/mean_length": 6504.14306640625, | |
| "completions/min_length": 1982.0, | |
| "epoch": 0.554140127388535, | |
| "grad_norm": 0.8586503267288208, | |
| "kl": 0.0418701171875, | |
| "learning_rate": 4.242861112477118e-07, | |
| "loss": -0.26731064915657043, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.4149572402238846, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786956012248993, | |
| "step": 174, | |
| "train_speed(iter/s)": 0.003172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8055.0, | |
| "completions/mean_length": 5515.000244140625, | |
| "completions/min_length": 1278.0, | |
| "epoch": 0.5573248407643312, | |
| "grad_norm": 0.5378698110580444, | |
| "kl": 3752.9690551757812, | |
| "learning_rate": 4.192814319625533e-07, | |
| "loss": -0.10730624943971634, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.4285714328289032, | |
| "reward_std": 0.2967643290758133, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4985625743865967, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.003173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3392857142857143, | |
| "completions/max_length": 8059.0, | |
| "completions/mean_length": 5019.607421875, | |
| "completions/min_length": 1034.0, | |
| "epoch": 0.5605095541401274, | |
| "grad_norm": 0.39968299865722656, | |
| "kl": 0.0423583984375, | |
| "learning_rate": 4.1428504251627325e-07, | |
| "loss": 0.0431935153901577, | |
| "memory(GiB)": 175.77, | |
| "reward": 0.535714328289032, | |
| "reward_std": 0.19514648616313934, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.535714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078745484352112, | |
| "step": 176, | |
| "train_speed(iter/s)": 0.003175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8054.5, | |
| "completions/mean_length": 5915.3037109375, | |
| "completions/min_length": 1198.5, | |
| "epoch": 0.5636942675159236, | |
| "grad_norm": 0.39187169075012207, | |
| "kl": 3.19933819770813, | |
| "learning_rate": 4.0929745604066343e-07, | |
| "loss": -0.09193204343318939, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000149011612, | |
| "reward_std": 0.2967643290758133, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4973474442958832, | |
| "step": 177, | |
| "train_speed(iter/s)": 0.003175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3928571428571429, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 5273.268310546875, | |
| "completions/min_length": 1048.5, | |
| "epoch": 0.5668789808917197, | |
| "grad_norm": 0.6660857796669006, | |
| "kl": 0.1064453125, | |
| "learning_rate": 4.0431918476344685e-07, | |
| "loss": -0.19565197825431824, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.535714328289032, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.535714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513396024704, | |
| "step": 178, | |
| "train_speed(iter/s)": 0.003177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428571, | |
| "completions/max_length": 8059.5, | |
| "completions/mean_length": 6514.857421875, | |
| "completions/min_length": 1473.0, | |
| "epoch": 0.5700636942675159, | |
| "grad_norm": 0.698070228099823, | |
| "kl": 0.085205078125, | |
| "learning_rate": 3.9935073995566987e-07, | |
| "loss": -0.09061294049024582, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.25552503019571304, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.460043728351593, | |
| "step": 179, | |
| "train_speed(iter/s)": 0.003177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8024.0, | |
| "completions/mean_length": 5687.553955078125, | |
| "completions/min_length": 1122.0, | |
| "epoch": 0.5732484076433121, | |
| "grad_norm": 0.31134656071662903, | |
| "kl": 0.03778076171875, | |
| "learning_rate": 3.943926318791963e-07, | |
| "loss": -0.14330630004405975, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4821428954601288, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428954601288, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.003178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8041.5, | |
| "completions/mean_length": 6372.910888671875, | |
| "completions/min_length": 1543.5, | |
| "epoch": 0.5764331210191083, | |
| "grad_norm": 0.5216385126113892, | |
| "kl": 0.023681640625, | |
| "learning_rate": 3.8944536973430156e-07, | |
| "loss": -0.11478479206562042, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.18409644439816475, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4582767188549042, | |
| "step": 181, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8024.5, | |
| "completions/mean_length": 6145.232421875, | |
| "completions/min_length": 1394.5, | |
| "epoch": 0.5796178343949044, | |
| "grad_norm": 0.4548920691013336, | |
| "kl": 0.0491943359375, | |
| "learning_rate": 3.845094616073783e-07, | |
| "loss": -0.04961169883608818, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.18409645557403564, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44368425011634827, | |
| "step": 182, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.35714285714285715, | |
| "completions/max_length": 8025.0, | |
| "completions/mean_length": 4988.08935546875, | |
| "completions/min_length": 1431.0, | |
| "epoch": 0.5828025477707006, | |
| "grad_norm": 0.45550552010536194, | |
| "kl": 0.0574951171875, | |
| "learning_rate": 3.7958541441875627e-07, | |
| "loss": -0.0993984192609787, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714626312256, | |
| "reward_std": 0.21981073170900345, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 183, | |
| "train_speed(iter/s)": 0.003181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8052.5, | |
| "completions/mean_length": 6105.107421875, | |
| "completions/min_length": 1428.0, | |
| "epoch": 0.5859872611464968, | |
| "grad_norm": 0.4129337668418884, | |
| "kl": 0.03155517578125, | |
| "learning_rate": 3.7467373387063964e-07, | |
| "loss": -0.18813078105449677, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107142984867096, | |
| "reward_std": 0.2610500305891037, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.497912272810936, | |
| "step": 184, | |
| "train_speed(iter/s)": 0.003181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8026.5, | |
| "completions/mean_length": 5963.446533203125, | |
| "completions/min_length": 1203.5, | |
| "epoch": 0.589171974522293, | |
| "grad_norm": 0.4038497805595398, | |
| "kl": 0.0673828125, | |
| "learning_rate": 3.6977492439517346e-07, | |
| "loss": -0.08315330743789673, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786956012248993, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.003182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8037.5, | |
| "completions/mean_length": 6319.321533203125, | |
| "completions/min_length": 1822.0, | |
| "epoch": 0.5923566878980892, | |
| "grad_norm": 1.8196825981140137, | |
| "kl": 0.7462158203125, | |
| "learning_rate": 3.648894891026358e-07, | |
| "loss": -0.03824207931756973, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3035714328289032, | |
| "reward_std": 0.1785714402794838, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4644542932510376, | |
| "step": 186, | |
| "train_speed(iter/s)": 0.003183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8033.5, | |
| "completions/mean_length": 5305.8037109375, | |
| "completions/min_length": 942.0, | |
| "epoch": 0.5955414012738853, | |
| "grad_norm": 0.41588449478149414, | |
| "kl": 0.025146484375, | |
| "learning_rate": 3.600179297297695e-07, | |
| "loss": -0.10517025738954544, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 187, | |
| "train_speed(iter/s)": 0.003184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 5222.250244140625, | |
| "completions/min_length": 1221.5, | |
| "epoch": 0.5987261146496815, | |
| "grad_norm": 0.42883527278900146, | |
| "kl": 0.033935546875, | |
| "learning_rate": 3.5516074658825397e-07, | |
| "loss": -0.04254484549164772, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.2967643141746521, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49734747409820557, | |
| "step": 188, | |
| "train_speed(iter/s)": 0.003185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.35714285714285715, | |
| "completions/max_length": 8027.5, | |
| "completions/mean_length": 4837.39306640625, | |
| "completions/min_length": 1332.0, | |
| "epoch": 0.6019108280254777, | |
| "grad_norm": 0.6678107380867004, | |
| "kl": 0.08056640625, | |
| "learning_rate": 3.50318438513321e-07, | |
| "loss": -0.23368430137634277, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.3681928962469101, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173466861248016, | |
| "step": 189, | |
| "train_speed(iter/s)": 0.003187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 6229.303955078125, | |
| "completions/min_length": 2022.0, | |
| "epoch": 0.6050955414012739, | |
| "grad_norm": 0.6864339113235474, | |
| "kl": 1.5787729148315552e-41, | |
| "learning_rate": 3.454915028125263e-07, | |
| "loss": -0.17627106606960297, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.3078143820166588, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5065638422966003, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.003187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8023.5, | |
| "completions/mean_length": 5785.803955078125, | |
| "completions/min_length": 1411.0, | |
| "epoch": 0.60828025477707, | |
| "grad_norm": 0.4775516092777252, | |
| "kl": 0.027587890625, | |
| "learning_rate": 3.406804352146742e-07, | |
| "loss": -0.1085924282670021, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.1896214708685875, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 191, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8018.0, | |
| "completions/mean_length": 6275.1611328125, | |
| "completions/min_length": 1364.0, | |
| "epoch": 0.6114649681528662, | |
| "grad_norm": 0.3889318108558655, | |
| "kl": 0.0579833984375, | |
| "learning_rate": 3.3588572981890684e-07, | |
| "loss": -0.09743942320346832, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2321428656578064, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2321428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.429407000541687, | |
| "step": 192, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8026.0, | |
| "completions/mean_length": 6002.982421875, | |
| "completions/min_length": 805.0, | |
| "epoch": 0.6146496815286624, | |
| "grad_norm": 0.6355292201042175, | |
| "kl": 0.0274658203125, | |
| "learning_rate": 3.311078790439598e-07, | |
| "loss": -0.29961735010147095, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.3078143820166588, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5039526224136353, | |
| "step": 193, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8070.0, | |
| "completions/mean_length": 6182.6787109375, | |
| "completions/min_length": 1595.0, | |
| "epoch": 0.6178343949044586, | |
| "grad_norm": 0.5008262395858765, | |
| "kl": 0.0277099609375, | |
| "learning_rate": 3.263473735775899e-07, | |
| "loss": -0.17556621134281158, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000298023224, | |
| "reward_std": 0.3243894428014755, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 194, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.48214285714285715, | |
| "completions/max_length": 8046.5, | |
| "completions/mean_length": 6006.875244140625, | |
| "completions/min_length": 1325.0, | |
| "epoch": 0.6210191082802548, | |
| "grad_norm": 0.6817752122879028, | |
| "kl": 0.0562744140625, | |
| "learning_rate": 3.2160470232618225e-07, | |
| "loss": -0.2873086631298065, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 5104.1787109375, | |
| "completions/min_length": 1014.0, | |
| "epoch": 0.6242038216560509, | |
| "grad_norm": 0.40815719962120056, | |
| "kl": 22.176969528198242, | |
| "learning_rate": 3.1688035236453865e-07, | |
| "loss": 0.02237345091998577, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.23086077719926834, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 196, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8032.0, | |
| "completions/mean_length": 6547.00048828125, | |
| "completions/min_length": 1434.5, | |
| "epoch": 0.6273885350318471, | |
| "grad_norm": 0.4397486746311188, | |
| "kl": 0.02099609375, | |
| "learning_rate": 3.121748088858549e-07, | |
| "loss": -0.10549207031726837, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.3681929111480713, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4114224463701248, | |
| "step": 197, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8035.0, | |
| "completions/mean_length": 5620.51806640625, | |
| "completions/min_length": 597.0, | |
| "epoch": 0.6305732484076433, | |
| "grad_norm": 27.555814743041992, | |
| "kl": 2.0523681640625, | |
| "learning_rate": 3.0748855515189096e-07, | |
| "loss": -0.24800211191177368, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.464285746216774, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.464285746216774, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968776702881, | |
| "step": 198, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.42857142857142855, | |
| "completions/max_length": 8045.5, | |
| "completions/mean_length": 5171.607421875, | |
| "completions/min_length": 424.0, | |
| "epoch": 0.6337579617834395, | |
| "grad_norm": 0.4833768606185913, | |
| "kl": 1.5786327849851227e-41, | |
| "learning_rate": 3.028220724433408e-07, | |
| "loss": -0.06498469412326813, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.375, | |
| "reward_std": 0.30228936672210693, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.375, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4750668406486511, | |
| "step": 199, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8049.0, | |
| "completions/mean_length": 5697.178955078125, | |
| "completions/min_length": 1219.0, | |
| "epoch": 0.6369426751592356, | |
| "grad_norm": 1.165695071220398, | |
| "kl": 1483.6046142578125, | |
| "learning_rate": 2.981758400104028e-07, | |
| "loss": -0.07902750372886658, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.2967643439769745, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4973474442958832, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 6021.26806640625, | |
| "completions/min_length": 1541.5, | |
| "epoch": 0.6401273885350318, | |
| "grad_norm": 0.3752177655696869, | |
| "kl": 0.0467529296875, | |
| "learning_rate": 2.9355033502356194e-07, | |
| "loss": -0.0795094221830368, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.21981073915958405, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 201, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571429, | |
| "completions/max_length": 8032.5, | |
| "completions/mean_length": 6366.946533203125, | |
| "completions/min_length": 1789.0, | |
| "epoch": 0.643312101910828, | |
| "grad_norm": 0.5095097422599792, | |
| "kl": 0.042724609375, | |
| "learning_rate": 2.8894603252458403e-07, | |
| "loss": -0.24165716767311096, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.30228933691978455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050114393234253, | |
| "step": 202, | |
| "train_speed(iter/s)": 0.003189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8052.5, | |
| "completions/mean_length": 6997.57177734375, | |
| "completions/min_length": 1818.5, | |
| "epoch": 0.6464968152866242, | |
| "grad_norm": 0.716253399848938, | |
| "kl": 0.0411376953125, | |
| "learning_rate": 2.8436340537772794e-07, | |
| "loss": -0.3257341980934143, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4389495849609375, | |
| "step": 203, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 6138.303955078125, | |
| "completions/min_length": 1044.5, | |
| "epoch": 0.6496815286624203, | |
| "grad_norm": 0.6485111713409424, | |
| "kl": 0.0439453125, | |
| "learning_rate": 2.7980292422118277e-07, | |
| "loss": -0.20691558718681335, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.535714328289032, | |
| "reward_std": 0.3902929872274399, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.535714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 204, | |
| "train_speed(iter/s)": 0.00319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.30357142857142855, | |
| "completions/max_length": 8048.0, | |
| "completions/mean_length": 5014.732177734375, | |
| "completions/min_length": 1183.5, | |
| "epoch": 0.6528662420382165, | |
| "grad_norm": 0.4565260410308838, | |
| "kl": 0.0528564453125, | |
| "learning_rate": 2.75265057418733e-07, | |
| "loss": -0.19542962312698364, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.21981074661016464, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5032612979412079, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.003192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 6365.250244140625, | |
| "completions/min_length": 2251.0, | |
| "epoch": 0.6560509554140127, | |
| "grad_norm": 0.7114933729171753, | |
| "kl": 0.169677734375, | |
| "learning_rate": 2.70750271011657e-07, | |
| "loss": -0.12340293079614639, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4691530168056488, | |
| "step": 206, | |
| "train_speed(iter/s)": 0.003192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8031.5, | |
| "completions/mean_length": 5673.46435546875, | |
| "completions/min_length": 1684.0, | |
| "epoch": 0.6592356687898089, | |
| "grad_norm": 0.5769023299217224, | |
| "kl": 0.03271484375, | |
| "learning_rate": 2.6625902867086447e-07, | |
| "loss": -0.10204778611660004, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4821428656578064, | |
| "reward_std": 0.2721000798046589, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5085248351097107, | |
| "step": 207, | |
| "train_speed(iter/s)": 0.003193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.35714285714285715, | |
| "completions/max_length": 8049.0, | |
| "completions/mean_length": 5337.607421875, | |
| "completions/min_length": 1091.5, | |
| "epoch": 0.6624203821656051, | |
| "grad_norm": 7.4786057472229, | |
| "kl": 4.21484375, | |
| "learning_rate": 2.6179179164927754e-07, | |
| "loss": 0.045502904802560806, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.14838217198848724, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49264875054359436, | |
| "step": 208, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8050.5, | |
| "completions/mean_length": 6416.553955078125, | |
| "completions/min_length": 2526.0, | |
| "epoch": 0.6656050955414012, | |
| "grad_norm": 0.5907986760139465, | |
| "kl": NaN, | |
| "learning_rate": 2.5734901873445956e-07, | |
| "loss": -0.22783614695072174, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.32695358991622925, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 209, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8041.0, | |
| "completions/mean_length": 5512.57177734375, | |
| "completions/min_length": 1217.5, | |
| "epoch": 0.6687898089171974, | |
| "grad_norm": 6.297171115875244, | |
| "kl": 1.9635009765625, | |
| "learning_rate": 2.529311662014972e-07, | |
| "loss": -0.09180951863527298, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.30228934437036514, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571428, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 6852.94677734375, | |
| "completions/min_length": 2993.5, | |
| "epoch": 0.6719745222929936, | |
| "grad_norm": 0.5260760188102722, | |
| "kl": 0.0394287109375, | |
| "learning_rate": 2.485386877661411e-07, | |
| "loss": -0.13763374090194702, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.32695360481739044, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.458276703953743, | |
| "step": 211, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428572, | |
| "completions/max_length": 8028.0, | |
| "completions/mean_length": 6248.76806640625, | |
| "completions/min_length": 1682.0, | |
| "epoch": 0.6751592356687898, | |
| "grad_norm": 2.8984975814819336, | |
| "kl": 0.337890625, | |
| "learning_rate": 2.441720345382089e-07, | |
| "loss": -0.07186296582221985, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050112903118134, | |
| "step": 212, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8037.0, | |
| "completions/mean_length": 5828.625244140625, | |
| "completions/min_length": 1334.0, | |
| "epoch": 0.678343949044586, | |
| "grad_norm": 47.080162048339844, | |
| "kl": 19.67626953125, | |
| "learning_rate": 2.3983165497525596e-07, | |
| "loss": 0.14914795756340027, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.32695358991622925, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4600437134504318, | |
| "step": 213, | |
| "train_speed(iter/s)": 0.003193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.39285714285714285, | |
| "completions/max_length": 8034.5, | |
| "completions/mean_length": 5060.107421875, | |
| "completions/min_length": 1518.5, | |
| "epoch": 0.6815286624203821, | |
| "grad_norm": 0.5471820831298828, | |
| "kl": 0.0250244140625, | |
| "learning_rate": 2.355179948365189e-07, | |
| "loss": -0.1757272332906723, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513247013092, | |
| "step": 214, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571429, | |
| "completions/max_length": 8038.5, | |
| "completions/mean_length": 5785.375244140625, | |
| "completions/min_length": 1677.0, | |
| "epoch": 0.6847133757961783, | |
| "grad_norm": 0.6869480013847351, | |
| "kl": 0.026123046875, | |
| "learning_rate": 2.3123149713713468e-07, | |
| "loss": -0.2949236035346985, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.307814359664917, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4609040319919586, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8027.5, | |
| "completions/mean_length": 6098.482421875, | |
| "completions/min_length": 1733.5, | |
| "epoch": 0.6878980891719745, | |
| "grad_norm": 0.38128015398979187, | |
| "kl": 0.0662841796875, | |
| "learning_rate": 2.26972602102645e-07, | |
| "loss": 0.0502837672829628, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000298023224, | |
| "reward_std": 0.2253357619047165, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48795005679130554, | |
| "step": 216, | |
| "train_speed(iter/s)": 0.003194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8033.5, | |
| "completions/mean_length": 6116.053955078125, | |
| "completions/min_length": 1466.5, | |
| "epoch": 0.6910828025477707, | |
| "grad_norm": 0.926658034324646, | |
| "kl": 0.076416015625, | |
| "learning_rate": 2.2274174712378207e-07, | |
| "loss": -0.12618423998355865, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 217, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8053.5, | |
| "completions/mean_length": 5445.69677734375, | |
| "completions/min_length": 853.0, | |
| "epoch": 0.6942675159235668, | |
| "grad_norm": 0.6047350764274597, | |
| "kl": 51.23466873168945, | |
| "learning_rate": 2.1853936671155127e-07, | |
| "loss": -0.11778053641319275, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.49191083014011383, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 218, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8047.0, | |
| "completions/mean_length": 6047.035888671875, | |
| "completions/min_length": 1355.5, | |
| "epoch": 0.697452229299363, | |
| "grad_norm": 0.9835929274559021, | |
| "kl": 0.0267333984375, | |
| "learning_rate": 2.1436589245260372e-07, | |
| "loss": -0.29198572039604187, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.37371790409088135, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 219, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8063.5, | |
| "completions/mean_length": 5899.7861328125, | |
| "completions/min_length": 1418.5, | |
| "epoch": 0.7006369426751592, | |
| "grad_norm": 0.48660025000572205, | |
| "kl": 0.0634765625, | |
| "learning_rate": 2.1022175296491512e-07, | |
| "loss": -0.18401746451854706, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8063.5, | |
| "completions/mean_length": 5425.0003662109375, | |
| "completions/min_length": 1516.5, | |
| "epoch": 0.7038216560509554, | |
| "grad_norm": 0.38574594259262085, | |
| "kl": 0.03814697265625, | |
| "learning_rate": 2.0610737385376348e-07, | |
| "loss": -0.04302213340997696, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.25552501529455185, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4389495849609375, | |
| "step": 221, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3035714285714286, | |
| "completions/max_length": 8049.5, | |
| "completions/mean_length": 5389.4287109375, | |
| "completions/min_length": 1246.5, | |
| "epoch": 0.7070063694267515, | |
| "grad_norm": 5124.12841796875, | |
| "kl": 274.0233154296875, | |
| "learning_rate": 2.0202317766802152e-07, | |
| "loss": 0.4420851469039917, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.3078143745660782, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4973474591970444, | |
| "step": 222, | |
| "train_speed(iter/s)": 0.003196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.26785714285714285, | |
| "completions/max_length": 8044.5, | |
| "completions/mean_length": 4609.19677734375, | |
| "completions/min_length": 923.5, | |
| "epoch": 0.7101910828025477, | |
| "grad_norm": 4.193013668060303, | |
| "kl": 2.1361083984375, | |
| "learning_rate": 1.9796958385675965e-07, | |
| "loss": 0.02818513847887516, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.6071428954601288, | |
| "reward_std": 0.2253357544541359, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.6071428954601288, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4959513247013092, | |
| "step": 223, | |
| "train_speed(iter/s)": 0.003197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8045.5, | |
| "completions/mean_length": 6820.643310546875, | |
| "completions/min_length": 1928.0, | |
| "epoch": 0.7133757961783439, | |
| "grad_norm": 0.7545607089996338, | |
| "kl": 0.048828125, | |
| "learning_rate": 1.9394700872616853e-07, | |
| "loss": -0.3313080370426178, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.45619654655456543, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.47245559096336365, | |
| "step": 224, | |
| "train_speed(iter/s)": 0.003197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 5816.178955078125, | |
| "completions/min_length": 1331.5, | |
| "epoch": 0.7165605095541401, | |
| "grad_norm": 0.531544029712677, | |
| "kl": 0.094482421875, | |
| "learning_rate": 1.899558653968042e-07, | |
| "loss": -0.11124253273010254, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500482559204, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.003197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.39285714285714285, | |
| "completions/max_length": 8038.5, | |
| "completions/mean_length": 5465.143310546875, | |
| "completions/min_length": 1681.0, | |
| "epoch": 0.7197452229299363, | |
| "grad_norm": 0.45902571082115173, | |
| "kl": 0.0625, | |
| "learning_rate": 1.8599656376116024e-07, | |
| "loss": -0.2800550162792206, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000149011612, | |
| "reward_std": 0.3490536957979202, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49734747409820557, | |
| "step": 226, | |
| "train_speed(iter/s)": 0.003198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8061.0, | |
| "completions/mean_length": 5977.035888671875, | |
| "completions/min_length": 1405.5, | |
| "epoch": 0.7229299363057324, | |
| "grad_norm": 0.40895432233810425, | |
| "kl": 0.06298828125, | |
| "learning_rate": 1.820695104415721e-07, | |
| "loss": -0.07818439602851868, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.44672515988349915, | |
| "step": 227, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8046.0, | |
| "completions/mean_length": 6236.82177734375, | |
| "completions/min_length": 2348.5, | |
| "epoch": 0.7261146496815286, | |
| "grad_norm": 1.3278292417526245, | |
| "kl": 0.188232421875, | |
| "learning_rate": 1.7817510874845582e-07, | |
| "loss": -0.1289130449295044, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.19514649361371994, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 228, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8058.0, | |
| "completions/mean_length": 6219.535888671875, | |
| "completions/min_length": 2286.5, | |
| "epoch": 0.7292993630573248, | |
| "grad_norm": 2.8997809886932373, | |
| "kl": 1.2813720703125, | |
| "learning_rate": 1.7431375863888898e-07, | |
| "loss": -0.2479753941297531, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.32695358991622925, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4489477574825287, | |
| "step": 229, | |
| "train_speed(iter/s)": 0.0032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8028.5, | |
| "completions/mean_length": 5949.357421875, | |
| "completions/min_length": 1292.5, | |
| "epoch": 0.732484076433121, | |
| "grad_norm": 0.37234172224998474, | |
| "kl": 0.020263671875, | |
| "learning_rate": 1.7048585667553412e-07, | |
| "loss": 0.023813849315047264, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.375, | |
| "reward_std": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.375, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4750668406486511, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.0032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8021.5, | |
| "completions/mean_length": 5838.39306640625, | |
| "completions/min_length": 1460.0, | |
| "epoch": 0.7356687898089171, | |
| "grad_norm": 0.3578107953071594, | |
| "kl": 0.1229248046875, | |
| "learning_rate": 1.6669179598591183e-07, | |
| "loss": -0.21507693827152252, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714626312256, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49791228771209717, | |
| "step": 231, | |
| "train_speed(iter/s)": 0.0032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3035714285714286, | |
| "completions/max_length": 8058.5, | |
| "completions/mean_length": 4920.39306640625, | |
| "completions/min_length": 1596.0, | |
| "epoch": 0.7388535031847133, | |
| "grad_norm": 0.4056342840194702, | |
| "kl": 0.05517578125, | |
| "learning_rate": 1.6293196622202632e-07, | |
| "loss": 0.024090681225061417, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.2721000760793686, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 232, | |
| "train_speed(iter/s)": 0.003201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8033.5, | |
| "completions/mean_length": 5438.732421875, | |
| "completions/min_length": 2205.0, | |
| "epoch": 0.7420382165605095, | |
| "grad_norm": 0.5016524195671082, | |
| "kl": 0.0782470703125, | |
| "learning_rate": 1.592067535203479e-07, | |
| "loss": -0.18710020184516907, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.429407000541687, | |
| "step": 233, | |
| "train_speed(iter/s)": 0.003202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8018.0, | |
| "completions/mean_length": 5327.535888671875, | |
| "completions/min_length": 1035.5, | |
| "epoch": 0.7452229299363057, | |
| "grad_norm": 0.6563963294029236, | |
| "kl": 0.0595703125, | |
| "learning_rate": 1.555165404621567e-07, | |
| "loss": -0.38865458965301514, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5, | |
| "reward_std": 0.40943218767642975, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5091750919818878, | |
| "step": 234, | |
| "train_speed(iter/s)": 0.003202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6607142857142857, | |
| "completions/max_length": 8040.5, | |
| "completions/mean_length": 6642.339599609375, | |
| "completions/min_length": 1762.5, | |
| "epoch": 0.7484076433121019, | |
| "grad_norm": 0.815308153629303, | |
| "kl": 0.0169677734375, | |
| "learning_rate": 1.518617060342513e-07, | |
| "loss": -0.25266438722610474, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.3078143820166588, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.003202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8059.5, | |
| "completions/mean_length": 5736.250244140625, | |
| "completions/min_length": 998.0, | |
| "epoch": 0.7515923566878981, | |
| "grad_norm": 0.5217832922935486, | |
| "kl": 0.04364013671875, | |
| "learning_rate": 1.4824262559002592e-07, | |
| "loss": -0.21336083114147186, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000298023224, | |
| "reward_std": 0.30228935927152634, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4628649652004242, | |
| "step": 236, | |
| "train_speed(iter/s)": 0.003203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8044.5, | |
| "completions/mean_length": 5882.714599609375, | |
| "completions/min_length": 1346.5, | |
| "epoch": 0.7547770700636943, | |
| "grad_norm": 0.5135810971260071, | |
| "kl": 0.0560302734375, | |
| "learning_rate": 1.4465967081092345e-07, | |
| "loss": -0.15495836734771729, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.26657506823539734, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4582767188549042, | |
| "step": 237, | |
| "train_speed(iter/s)": 0.003203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6428571428571429, | |
| "completions/max_length": 8056.5, | |
| "completions/mean_length": 6505.57177734375, | |
| "completions/min_length": 1349.0, | |
| "epoch": 0.7579617834394905, | |
| "grad_norm": 0.20377548038959503, | |
| "kl": 1.419865668977121e-41, | |
| "learning_rate": 1.4111320966826057e-07, | |
| "loss": -0.06175333261489868, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2253357470035553, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4691530168056488, | |
| "step": 238, | |
| "train_speed(iter/s)": 0.003203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 5583.607421875, | |
| "completions/min_length": 1573.5, | |
| "epoch": 0.7611464968152867, | |
| "grad_norm": 0.7509793639183044, | |
| "kl": 0.03759765625, | |
| "learning_rate": 1.376036063854401e-07, | |
| "loss": -0.21666017174720764, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.32695360481739044, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5026109665632248, | |
| "step": 239, | |
| "train_speed(iter/s)": 0.003204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.39285714285714285, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 5945.732421875, | |
| "completions/min_length": 1600.0, | |
| "epoch": 0.7643312101910829, | |
| "grad_norm": 0.36760520935058594, | |
| "kl": 0.0548095703125, | |
| "learning_rate": 1.3413122140054217e-07, | |
| "loss": -0.12507101893424988, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3571428656578064, | |
| "reward_std": 0.2967643365263939, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48795004189014435, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.003205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8023.5, | |
| "completions/mean_length": 5641.893310546875, | |
| "completions/min_length": 1097.5, | |
| "epoch": 0.767515923566879, | |
| "grad_norm": 0.5026525259017944, | |
| "kl": 0.0391845703125, | |
| "learning_rate": 1.3069641132930926e-07, | |
| "loss": -0.16692417860031128, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.2967643365263939, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5065638720989227, | |
| "step": 241, | |
| "train_speed(iter/s)": 0.003206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8050.0, | |
| "completions/mean_length": 6501.6787109375, | |
| "completions/min_length": 2805.0, | |
| "epoch": 0.7707006369426752, | |
| "grad_norm": 0.4156489968299866, | |
| "kl": 0.023803617145823353, | |
| "learning_rate": 1.272995289285202e-07, | |
| "loss": -0.18214215338230133, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857142984867096, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45290274918079376, | |
| "step": 242, | |
| "train_speed(iter/s)": 0.003206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8062.0, | |
| "completions/mean_length": 6547.01806640625, | |
| "completions/min_length": 1906.5, | |
| "epoch": 0.7738853503184714, | |
| "grad_norm": 0.29880252480506897, | |
| "kl": 0.0003601927019190043, | |
| "learning_rate": 1.2394092305976272e-07, | |
| "loss": -0.12600275874137878, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.267857164144516, | |
| "reward_std": 0.23086078837513924, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.267857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.40946151316165924, | |
| "step": 243, | |
| "train_speed(iter/s)": 0.003206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8044.5, | |
| "completions/mean_length": 6625.714599609375, | |
| "completions/min_length": 1873.5, | |
| "epoch": 0.7770700636942676, | |
| "grad_norm": 0.6597678661346436, | |
| "kl": 556.0035400390625, | |
| "learning_rate": 1.2062093865360457e-07, | |
| "loss": -0.10461865365505219, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.160714291036129, | |
| "reward_std": 0.29123931378126144, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.160714291036129, | |
| "rewards/AnswerTagAccuracyORM/std": 0.3731846809387207, | |
| "step": 244, | |
| "train_speed(iter/s)": 0.003206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8078.0, | |
| "completions/mean_length": 5632.14306640625, | |
| "completions/min_length": 2093.0, | |
| "epoch": 0.7802547770700637, | |
| "grad_norm": 0.4589294493198395, | |
| "kl": 0.025390625, | |
| "learning_rate": 1.1733991667416926e-07, | |
| "loss": 0.09096799790859222, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000298023224, | |
| "reward_std": 0.33800365030765533, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.003207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 5813.6611328125, | |
| "completions/min_length": 1675.5, | |
| "epoch": 0.7834394904458599, | |
| "grad_norm": 0.5432755947113037, | |
| "kl": 0.048095703125, | |
| "learning_rate": 1.1409819408411897e-07, | |
| "loss": -0.0925760492682457, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.25552502274513245, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4582767188549042, | |
| "step": 246, | |
| "train_speed(iter/s)": 0.003207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8064.0, | |
| "completions/mean_length": 5805.607421875, | |
| "completions/min_length": 1816.5, | |
| "epoch": 0.7866242038216561, | |
| "grad_norm": 0.5733675360679626, | |
| "kl": 0.028795340098440647, | |
| "learning_rate": 1.108961038100481e-07, | |
| "loss": 0.0018586559453979135, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.23086076974868774, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786955714225769, | |
| "step": 247, | |
| "train_speed(iter/s)": 0.003208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3392857142857143, | |
| "completions/max_length": 8042.0, | |
| "completions/mean_length": 5231.625244140625, | |
| "completions/min_length": 1361.0, | |
| "epoch": 0.7898089171974523, | |
| "grad_norm": 0.3587133586406708, | |
| "kl": 2.521376371383667, | |
| "learning_rate": 1.0773397470829143e-07, | |
| "loss": -0.1912791132926941, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.6071428954601288, | |
| "reward_std": 0.34905368089675903, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.6071428954601288, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4609040319919586, | |
| "step": 248, | |
| "train_speed(iter/s)": 0.003209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8039.0, | |
| "completions/mean_length": 5721.500244140625, | |
| "completions/min_length": 1892.0, | |
| "epoch": 0.7929936305732485, | |
| "grad_norm": 0.48776480555534363, | |
| "kl": 0.077392578125, | |
| "learning_rate": 1.0461213153115079e-07, | |
| "loss": -0.07181628048419952, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000298023224, | |
| "reward_std": 0.34905368089675903, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078745186328888, | |
| "step": 249, | |
| "train_speed(iter/s)": 0.00321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8027.5, | |
| "completions/mean_length": 5601.21435546875, | |
| "completions/min_length": 1040.0, | |
| "epoch": 0.7961783439490446, | |
| "grad_norm": 0.2700536251068115, | |
| "kl": 0.0224609375, | |
| "learning_rate": 1.0153089489354256e-07, | |
| "loss": -0.038272541016340256, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.11266787722706795, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4061589390039444, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.003211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8045.0, | |
| "completions/mean_length": 6220.410888671875, | |
| "completions/min_length": 1321.5, | |
| "epoch": 0.7993630573248408, | |
| "grad_norm": 0.547614574432373, | |
| "kl": 0.04052734375, | |
| "learning_rate": 9.849058124007043e-08, | |
| "loss": -0.17825300991535187, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107142984867096, | |
| "reward_std": 0.37371791899204254, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107142984867096, | |
| "rewards/AnswerTagAccuracyORM/std": 0.497912272810936, | |
| "step": 251, | |
| "train_speed(iter/s)": 0.003211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4464285714285714, | |
| "completions/max_length": 8047.5, | |
| "completions/mean_length": 6005.39306640625, | |
| "completions/min_length": 1652.0, | |
| "epoch": 0.802547770700637, | |
| "grad_norm": 0.3931257426738739, | |
| "kl": 0.0443115234375, | |
| "learning_rate": 9.549150281252632e-08, | |
| "loss": 0.03433360904455185, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.2253357470035553, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 252, | |
| "train_speed(iter/s)": 0.003212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6229.875244140625, | |
| "completions/min_length": 1784.5, | |
| "epoch": 0.8057324840764332, | |
| "grad_norm": 0.9034252166748047, | |
| "kl": 0.07275390625, | |
| "learning_rate": 9.253396761782306e-08, | |
| "loss": 0.06343323737382889, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.3324786275625229, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 253, | |
| "train_speed(iter/s)": 0.003212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3214285714285714, | |
| "completions/max_length": 8007.0, | |
| "completions/mean_length": 4358.8216552734375, | |
| "completions/min_length": 924.5, | |
| "epoch": 0.8089171974522293, | |
| "grad_norm": 0.36683788895606995, | |
| "kl": 0.0548095703125, | |
| "learning_rate": 8.961827939636196e-08, | |
| "loss": -0.12078238278627396, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.21981072798371315, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49264873564243317, | |
| "step": 254, | |
| "train_speed(iter/s)": 0.003213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.26785714285714285, | |
| "completions/max_length": 8039.5, | |
| "completions/mean_length": 4816.250244140625, | |
| "completions/min_length": 1190.0, | |
| "epoch": 0.8121019108280255, | |
| "grad_norm": 0.5869824886322021, | |
| "kl": 0.0762939453125, | |
| "learning_rate": 8.6744737590838e-08, | |
| "loss": -0.1321803778409958, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714328289032, | |
| "reward_std": 0.3324786275625229, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5032612830400467, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.003214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.30357142857142855, | |
| "completions/max_length": 8047.0, | |
| "completions/mean_length": 5286.732421875, | |
| "completions/min_length": 1331.5, | |
| "epoch": 0.8152866242038217, | |
| "grad_norm": 0.562877893447876, | |
| "kl": 0.0516357421875, | |
| "learning_rate": 8.391363731548811e-08, | |
| "loss": -0.20328308641910553, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714477300644, | |
| "reward_std": 0.38476796448230743, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786955863237381, | |
| "step": 256, | |
| "train_speed(iter/s)": 0.003215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8021.0, | |
| "completions/mean_length": 5327.553955078125, | |
| "completions/min_length": 1074.0, | |
| "epoch": 0.8184713375796179, | |
| "grad_norm": 0.4446698725223541, | |
| "kl": 0.0565185546875, | |
| "learning_rate": 8.112526932578117e-08, | |
| "loss": -0.039517223834991455, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4821428656578064, | |
| "reward_std": 0.37371791899204254, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5085247755050659, | |
| "step": 257, | |
| "train_speed(iter/s)": 0.003215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8030.5, | |
| "completions/mean_length": 5912.285888671875, | |
| "completions/min_length": 1241.0, | |
| "epoch": 0.821656050955414, | |
| "grad_norm": 0.6693341732025146, | |
| "kl": 0.130615234375, | |
| "learning_rate": 7.837991998855897e-08, | |
| "loss": -0.08013840764760971, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3214285969734192, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3214285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4609040319919586, | |
| "step": 258, | |
| "train_speed(iter/s)": 0.003216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8041.5, | |
| "completions/mean_length": 5953.500244140625, | |
| "completions/min_length": 1419.0, | |
| "epoch": 0.8248407643312102, | |
| "grad_norm": 0.6183769702911377, | |
| "kl": 0.085205078125, | |
| "learning_rate": 7.567787125262449e-08, | |
| "loss": -0.2941018044948578, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.19514649361371994, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199816048145294, | |
| "step": 259, | |
| "train_speed(iter/s)": 0.003216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8020.0, | |
| "completions/mean_length": 6054.143310546875, | |
| "completions/min_length": 1255.0, | |
| "epoch": 0.8280254777070064, | |
| "grad_norm": 0.35015594959259033, | |
| "kl": 0.024169921875, | |
| "learning_rate": 7.301940061978722e-08, | |
| "loss": 0.021139614284038544, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2321428656578064, | |
| "reward_std": 0.29123930633068085, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2321428656578064, | |
| "rewards/AnswerTagAccuracyORM/std": 0.425032377243042, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.003215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 8042.5, | |
| "completions/mean_length": 6572.178955078125, | |
| "completions/min_length": 2292.5, | |
| "epoch": 0.8312101910828026, | |
| "grad_norm": 0.41886037588119507, | |
| "kl": 0.01409912109375, | |
| "learning_rate": 7.040478111636228e-08, | |
| "loss": -0.14545117318630219, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.45050114393234253, | |
| "step": 261, | |
| "train_speed(iter/s)": 0.003215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8034.5, | |
| "completions/mean_length": 5691.9287109375, | |
| "completions/min_length": 1192.0, | |
| "epoch": 0.8343949044585988, | |
| "grad_norm": 1.4889556169509888, | |
| "kl": 0.3389892578125, | |
| "learning_rate": 6.783428126513125e-08, | |
| "loss": -0.09831805527210236, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2142857201397419, | |
| "reward_std": 0.19514648616313934, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857201397419, | |
| "rewards/AnswerTagAccuracyORM/std": 0.37510764598846436, | |
| "step": 262, | |
| "train_speed(iter/s)": 0.003216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8028.0, | |
| "completions/mean_length": 5496.607421875, | |
| "completions/min_length": 1581.0, | |
| "epoch": 0.8375796178343949, | |
| "grad_norm": 0.2921965718269348, | |
| "kl": 0.0533447265625, | |
| "learning_rate": 6.530816505776443e-08, | |
| "loss": -0.031957417726516724, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.1896214708685875, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4839591085910797, | |
| "step": 263, | |
| "train_speed(iter/s)": 0.003217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8054.5, | |
| "completions/mean_length": 5091.857421875, | |
| "completions/min_length": 1231.0, | |
| "epoch": 0.8407643312101911, | |
| "grad_norm": 0.24407930672168732, | |
| "kl": 0.06640625, | |
| "learning_rate": 6.282669192770895e-08, | |
| "loss": -0.011408509686589241, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000149011612, | |
| "reward_std": 0.19514649361371994, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.460043728351593, | |
| "step": 264, | |
| "train_speed(iter/s)": 0.003217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.7321428571428572, | |
| "completions/max_length": 8064.0, | |
| "completions/mean_length": 7200.107666015625, | |
| "completions/min_length": 3825.5, | |
| "epoch": 0.8439490445859873, | |
| "grad_norm": 0.6117250919342041, | |
| "kl": 0.03643798828125, | |
| "learning_rate": 6.039011672354455e-08, | |
| "loss": -0.25999927520751953, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.3078143745660782, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2500000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.43280795216560364, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.003217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.2857142857142857, | |
| "completions/max_length": 8020.5, | |
| "completions/mean_length": 4265.500244140625, | |
| "completions/min_length": 979.0, | |
| "epoch": 0.8471337579617835, | |
| "grad_norm": 8.540395736694336, | |
| "kl": 3.001220703125, | |
| "learning_rate": 5.799868968281074e-08, | |
| "loss": -0.06987228244543076, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.6250000298023224, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.6250000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4897737503051758, | |
| "step": 266, | |
| "train_speed(iter/s)": 0.003219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8050.5, | |
| "completions/mean_length": 6214.964599609375, | |
| "completions/min_length": 1640.0, | |
| "epoch": 0.8503184713375797, | |
| "grad_norm": 0.4220220446586609, | |
| "kl": 0.03466796875, | |
| "learning_rate": 5.565265640630723e-08, | |
| "loss": -0.0669555515050888, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.1785714328289032, | |
| "reward_std": 0.2253357470035553, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.1785714328289032, | |
| "rewards/AnswerTagAccuracyORM/std": 0.37796446681022644, | |
| "step": 267, | |
| "train_speed(iter/s)": 0.003219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 8021.0, | |
| "completions/mean_length": 5096.89306640625, | |
| "completions/min_length": 1902.5, | |
| "epoch": 0.8535031847133758, | |
| "grad_norm": 562.29443359375, | |
| "kl": 174.029296875, | |
| "learning_rate": 5.335225783287051e-08, | |
| "loss": 0.30988335609436035, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.2610500454902649, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5032612681388855, | |
| "step": 268, | |
| "train_speed(iter/s)": 0.00322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8059.5, | |
| "completions/mean_length": 5627.44677734375, | |
| "completions/min_length": 1379.0, | |
| "epoch": 0.856687898089172, | |
| "grad_norm": 0.6505483984947205, | |
| "kl": 0.04296875, | |
| "learning_rate": 5.109773021462921e-08, | |
| "loss": -0.24035362899303436, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.3435286581516266, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 269, | |
| "train_speed(iter/s)": 0.00322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8059.0, | |
| "completions/mean_length": 5999.339599609375, | |
| "completions/min_length": 2076.0, | |
| "epoch": 0.8598726114649682, | |
| "grad_norm": 1.334702730178833, | |
| "kl": 1.001953125, | |
| "learning_rate": 4.888930509274125e-08, | |
| "loss": 0.008383408188819885, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.2253357619047165, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968925714493, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.00322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8038.5, | |
| "completions/mean_length": 5342.410888671875, | |
| "completions/min_length": 1213.5, | |
| "epoch": 0.8630573248407644, | |
| "grad_norm": 0.2792721390724182, | |
| "kl": 0.041748046875, | |
| "learning_rate": 4.6727209273614124e-08, | |
| "loss": -0.08284494280815125, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4107143133878708, | |
| "reward_std": 0.21981074661016464, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4107143133878708, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5006500333547592, | |
| "step": 271, | |
| "train_speed(iter/s)": 0.003221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4821428571428571, | |
| "completions/max_length": 8046.0, | |
| "completions/mean_length": 5400.964599609375, | |
| "completions/min_length": 739.0, | |
| "epoch": 0.8662420382165605, | |
| "grad_norm": 0.5285795331001282, | |
| "kl": 21.917213439941406, | |
| "learning_rate": 4.4611664805611794e-08, | |
| "loss": -0.2615126073360443, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.2610500529408455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5032612979412079, | |
| "step": 272, | |
| "train_speed(iter/s)": 0.003221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4285714285714286, | |
| "completions/max_length": 8027.5, | |
| "completions/mean_length": 5324.553955078125, | |
| "completions/min_length": 855.0, | |
| "epoch": 0.8694267515923567, | |
| "grad_norm": 0.3686632513999939, | |
| "kl": 0.06591796875, | |
| "learning_rate": 4.2542888956250464e-08, | |
| "loss": -0.04334472492337227, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714626312256, | |
| "reward_std": 0.30228933691978455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135854244232, | |
| "step": 273, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8016.5, | |
| "completions/mean_length": 5866.160888671875, | |
| "completions/min_length": 1242.5, | |
| "epoch": 0.8726114649681529, | |
| "grad_norm": 0.23201723396778107, | |
| "kl": 0.03265380859375, | |
| "learning_rate": 4.0521094189884696e-08, | |
| "loss": -0.03820869326591492, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2678571566939354, | |
| "reward_std": 0.14838216453790665, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571566939354, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4268478900194168, | |
| "step": 274, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5892857142857143, | |
| "completions/max_length": 8049.5, | |
| "completions/mean_length": 6177.125244140625, | |
| "completions/min_length": 1355.0, | |
| "epoch": 0.8757961783439491, | |
| "grad_norm": 0.4513340890407562, | |
| "kl": 0.0380859375, | |
| "learning_rate": 3.8546488145887624e-08, | |
| "loss": -0.08043000847101212, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.3078143745660782, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4489477574825287, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6964285714285714, | |
| "completions/max_length": 8028.0, | |
| "completions/mean_length": 6920.803955078125, | |
| "completions/min_length": 2485.0, | |
| "epoch": 0.8789808917197452, | |
| "grad_norm": 1.4028353691101074, | |
| "kl": 0.6268310546875, | |
| "learning_rate": 3.6619273617325695e-08, | |
| "loss": -0.16914184391498566, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.232142873108387, | |
| "reward_std": 0.23086076974868774, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.232142873108387, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4159715920686722, | |
| "step": 276, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8043.0, | |
| "completions/mean_length": 6354.357421875, | |
| "completions/min_length": 1492.5, | |
| "epoch": 0.8821656050955414, | |
| "grad_norm": 0.5193430185317993, | |
| "kl": 1.416993007125255e-41, | |
| "learning_rate": 3.473964853013273e-08, | |
| "loss": -0.09646855294704437, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.30228933691978455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.46781928837299347, | |
| "step": 277, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8023.0, | |
| "completions/mean_length": 5800.464599609375, | |
| "completions/min_length": 1759.0, | |
| "epoch": 0.8853503184713376, | |
| "grad_norm": 1.6207529306411743, | |
| "kl": 0.4185791015625, | |
| "learning_rate": 3.2907805922781476e-08, | |
| "loss": -0.2863817811012268, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714626312256, | |
| "reward_std": 0.26657505333423615, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5039526224136353, | |
| "step": 278, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8027.5, | |
| "completions/mean_length": 6033.375244140625, | |
| "completions/min_length": 1543.0, | |
| "epoch": 0.8885350318471338, | |
| "grad_norm": 0.6210139989852905, | |
| "kl": 0.0206298828125, | |
| "learning_rate": 3.1123933926459845e-08, | |
| "loss": -0.1716410219669342, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.446428582072258, | |
| "reward_std": 0.3324785977602005, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.446428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4786955863237381, | |
| "step": 279, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571428, | |
| "completions/max_length": 8034.0, | |
| "completions/mean_length": 6295.035888671875, | |
| "completions/min_length": 1541.5, | |
| "epoch": 0.89171974522293, | |
| "grad_norm": 0.38655340671539307, | |
| "kl": 0.0438232421875, | |
| "learning_rate": 2.9388215745748345e-08, | |
| "loss": -0.10300473123788834, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2142857238650322, | |
| "reward_std": 0.2967643216252327, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857238650322, | |
| "rewards/AnswerTagAccuracyORM/std": 0.40819603204727173, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8059.0, | |
| "completions/mean_length": 6408.1611328125, | |
| "completions/min_length": 1532.0, | |
| "epoch": 0.8949044585987261, | |
| "grad_norm": 0.5052747130393982, | |
| "kl": 0.039306640625, | |
| "learning_rate": 2.7700829639806465e-08, | |
| "loss": -0.17020417749881744, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.321428582072258, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.321428582072258, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4739968776702881, | |
| "step": 281, | |
| "train_speed(iter/s)": 0.003223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8031.5, | |
| "completions/mean_length": 5205.26806640625, | |
| "completions/min_length": 1111.0, | |
| "epoch": 0.8980891719745223, | |
| "grad_norm": 0.7017294764518738, | |
| "kl": 0.04931640625, | |
| "learning_rate": 2.6061948904063658e-08, | |
| "loss": -0.0019177369540557265, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 282, | |
| "train_speed(iter/s)": 0.003223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8032.0, | |
| "completions/mean_length": 6105.125244140625, | |
| "completions/min_length": 1046.5, | |
| "epoch": 0.9012738853503185, | |
| "grad_norm": 0.5392616391181946, | |
| "kl": 0.0572509765625, | |
| "learning_rate": 2.4471741852423233e-08, | |
| "loss": -0.040537793189287186, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.33800365030765533, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 283, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8060.0, | |
| "completions/mean_length": 6067.14306640625, | |
| "completions/min_length": 745.5, | |
| "epoch": 0.9044585987261147, | |
| "grad_norm": 0.5731423497200012, | |
| "kl": 0.05810546875, | |
| "learning_rate": 2.293037179997559e-08, | |
| "loss": -0.07903746515512466, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4642857313156128, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4642857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48647116124629974, | |
| "step": 284, | |
| "train_speed(iter/s)": 0.003223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8035.0, | |
| "completions/mean_length": 5818.035888671875, | |
| "completions/min_length": 1439.0, | |
| "epoch": 0.9076433121019108, | |
| "grad_norm": 0.6655234694480896, | |
| "kl": 0.0400390625, | |
| "learning_rate": 2.1437997046226008e-08, | |
| "loss": -0.2412337064743042, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.2610500380396843, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48177245259284973, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5714285714285714, | |
| "completions/max_length": 8061.0, | |
| "completions/mean_length": 6306.19677734375, | |
| "completions/min_length": 1288.0, | |
| "epoch": 0.910828025477707, | |
| "grad_norm": 0.8818116188049316, | |
| "kl": 0.0340576171875, | |
| "learning_rate": 1.9994770858837107e-08, | |
| "loss": -0.10960516333580017, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3571428805589676, | |
| "reward_std": 0.26657506078481674, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3571428805589676, | |
| "rewards/AnswerTagAccuracyORM/std": 0.48199817538261414, | |
| "step": 286, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8053.0, | |
| "completions/mean_length": 6440.303955078125, | |
| "completions/min_length": 1986.5, | |
| "epoch": 0.9140127388535032, | |
| "grad_norm": 0.39754489064216614, | |
| "kl": 0.03631591796875, | |
| "learning_rate": 1.860084145788826e-08, | |
| "loss": 0.06631383299827576, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2678571492433548, | |
| "reward_std": 0.3324785977602005, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2678571492433548, | |
| "rewards/AnswerTagAccuracyORM/std": 0.43898552656173706, | |
| "step": 287, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4642857142857143, | |
| "completions/max_length": 8059.0, | |
| "completions/mean_length": 5826.82177734375, | |
| "completions/min_length": 1675.0, | |
| "epoch": 0.9171974522292994, | |
| "grad_norm": 0.6225547194480896, | |
| "kl": 0.0421142578125, | |
| "learning_rate": 1.725635200065323e-08, | |
| "loss": -0.3206044137477875, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4285714477300644, | |
| "reward_std": 0.33800363540649414, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4285714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.502610981464386, | |
| "step": 288, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.39285714285714285, | |
| "completions/max_length": 8045.0, | |
| "completions/mean_length": 5416.107177734375, | |
| "completions/min_length": 1183.5, | |
| "epoch": 0.9203821656050956, | |
| "grad_norm": 0.47386959195137024, | |
| "kl": 0.115966796875, | |
| "learning_rate": 1.596144056689791e-08, | |
| "loss": -0.10028056055307388, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4821428954601288, | |
| "reward_std": 0.2610500305891037, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4821428954601288, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4817724674940109, | |
| "step": 289, | |
| "train_speed(iter/s)": 0.003225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5178571428571429, | |
| "completions/max_length": 8044.0, | |
| "completions/mean_length": 6445.464599609375, | |
| "completions/min_length": 1398.5, | |
| "epoch": 0.9235668789808917, | |
| "grad_norm": 0.49086901545524597, | |
| "kl": 0.0467529296875, | |
| "learning_rate": 1.4716240144699187e-08, | |
| "loss": -0.14612972736358643, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2142857313156128, | |
| "reward_std": 0.2253357470035553, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2142857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.39528264105319977, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.003225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 8029.5, | |
| "completions/mean_length": 5734.375244140625, | |
| "completions/min_length": 844.5, | |
| "epoch": 0.9267515923566879, | |
| "grad_norm": 0.5175443291664124, | |
| "kl": 0.0667724609375, | |
| "learning_rate": 1.3520878616787523e-08, | |
| "loss": 0.018897494301199913, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.3681929111480713, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4744165241718292, | |
| "step": 291, | |
| "train_speed(iter/s)": 0.003225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.42857142857142855, | |
| "completions/max_length": 8033.0, | |
| "completions/mean_length": 5356.482421875, | |
| "completions/min_length": 1571.0, | |
| "epoch": 0.9299363057324841, | |
| "grad_norm": 0.33042165637016296, | |
| "kl": 0.0430908203125, | |
| "learning_rate": 1.2375478747413015e-08, | |
| "loss": 0.04050120711326599, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5000000298023224, | |
| "reward_std": 0.2967643141746521, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5000000298023224, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5078744888305664, | |
| "step": 292, | |
| "train_speed(iter/s)": 0.003226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3214285714285714, | |
| "completions/max_length": 8054.5, | |
| "completions/mean_length": 5093.76806640625, | |
| "completions/min_length": 1073.5, | |
| "epoch": 0.9331210191082803, | |
| "grad_norm": 0.8467631936073303, | |
| "kl": 0.052978515625, | |
| "learning_rate": 1.1280158169737265e-08, | |
| "loss": -0.26039645075798035, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.5535714626312256, | |
| "reward_std": 0.3324785977602005, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.5535714626312256, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135556221008, | |
| "step": 293, | |
| "train_speed(iter/s)": 0.003226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.3392857142857143, | |
| "completions/max_length": 8050.0, | |
| "completions/mean_length": 4675.946533203125, | |
| "completions/min_length": 587.0, | |
| "epoch": 0.9363057324840764, | |
| "grad_norm": 0.6540654301643372, | |
| "kl": 0.0511474609375, | |
| "learning_rate": 1.0235029373752757e-08, | |
| "loss": -0.02355077676475048, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.4464285969734192, | |
| "reward_std": 0.3324786126613617, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.4464285969734192, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4979122579097748, | |
| "step": 294, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5535714285714286, | |
| "completions/max_length": 8030.5, | |
| "completions/mean_length": 6136.69677734375, | |
| "completions/min_length": 1404.0, | |
| "epoch": 0.9394904458598726, | |
| "grad_norm": 0.40905487537384033, | |
| "kl": 0.0523681640625, | |
| "learning_rate": 9.240199694729944e-09, | |
| "loss": 0.014087937772274017, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.392857164144516, | |
| "reward_std": 0.18409644439816475, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.392857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49173468351364136, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8051.0, | |
| "completions/mean_length": 5592.5537109375, | |
| "completions/min_length": 1196.0, | |
| "epoch": 0.9426751592356688, | |
| "grad_norm": 0.3601033091545105, | |
| "kl": 0.0228271484375, | |
| "learning_rate": 8.295771302193721e-09, | |
| "loss": -0.025663327425718307, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.2857143059372902, | |
| "reward_std": 0.11266788095235825, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.2857143059372902, | |
| "rewards/AnswerTagAccuracyORM/std": 0.43015047907829285, | |
| "step": 296, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.4107142857142857, | |
| "completions/max_length": 8053.0, | |
| "completions/mean_length": 5372.053955078125, | |
| "completions/min_length": 1333.0, | |
| "epoch": 0.945859872611465, | |
| "grad_norm": 0.5271080732345581, | |
| "kl": 0.040283203125, | |
| "learning_rate": 7.401841189430657e-09, | |
| "loss": -0.12217244505882263, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3750000149011612, | |
| "reward_std": 0.2721000909805298, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3750000149011612, | |
| "rewards/AnswerTagAccuracyORM/std": 0.49264876544475555, | |
| "step": 297, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.5357142857142857, | |
| "completions/max_length": 8052.0, | |
| "completions/mean_length": 5935.625244140625, | |
| "completions/min_length": 1075.5, | |
| "epoch": 0.9490445859872612, | |
| "grad_norm": 0.5965413451194763, | |
| "kl": 0.05285042445757426, | |
| "learning_rate": 6.558501163527963e-09, | |
| "loss": -0.2230261266231537, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3392857313156128, | |
| "reward_std": 0.2721000760793686, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3392857313156128, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4628649652004242, | |
| "step": 298, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 8037.0, | |
| "completions/mean_length": 5281.518310546875, | |
| "completions/min_length": 893.0, | |
| "epoch": 0.9522292993630573, | |
| "grad_norm": 0.32658663392066956, | |
| "kl": 0.07177734375, | |
| "learning_rate": 5.765837835944309e-09, | |
| "loss": 0.05634969845414162, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.517857164144516, | |
| "reward_std": 0.2610500529408455, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.517857164144516, | |
| "rewards/AnswerTagAccuracyORM/std": 0.5059135854244232, | |
| "step": 299, | |
| "train_speed(iter/s)": 0.003226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.6071428571428572, | |
| "completions/max_length": 8031.0, | |
| "completions/mean_length": 6464.035888671875, | |
| "completions/min_length": 1501.5, | |
| "epoch": 0.9554140127388535, | |
| "grad_norm": 0.33558884263038635, | |
| "kl": 1.261028488045903e-41, | |
| "learning_rate": 5.023932613615445e-09, | |
| "loss": -0.09524659812450409, | |
| "memory(GiB)": 176.78, | |
| "reward": 0.3035714477300644, | |
| "reward_std": 0.3435286656022072, | |
| "rewards/AnswerTagAccuracyORM/mean": 0.3035714477300644, | |
| "rewards/AnswerTagAccuracyORM/std": 0.4576014429330826, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.003225 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 314, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |