| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 935.5, | |
| "completions/mean_length": 571.30859375, | |
| "completions/min_length": 264.5, | |
| "epoch": 0.02, | |
| "grad_norm": 1.2956373691558838, | |
| "kl": 0.0006160736083984375, | |
| "learning_rate": 2e-07, | |
| "loss": 0.11099594086408615, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.18179254233837128, | |
| "reward_std": 0.021205796860158443, | |
| "rewards/MCQ_Reward/mean": 0.18179254233837128, | |
| "rewards/MCQ_Reward/std": 0.0575394481420517, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.017384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.04, | |
| "grad_norm": 1.2956030368804932, | |
| "kl": 0.0006160736083984375, | |
| "learning_rate": 4e-07, | |
| "loss": 0.11099594086408615, | |
| "memory(GiB)": 18.17, | |
| "step": 2, | |
| "train_speed(iter/s)": 0.033769 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1004.0, | |
| "completions/mean_length": 582.2890625, | |
| "completions/min_length": 126.5, | |
| "epoch": 0.06, | |
| "grad_norm": 1.1973260641098022, | |
| "kl": 0.00061798095703125, | |
| "learning_rate": 6e-07, | |
| "loss": 0.09401366859674454, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.1757229119539261, | |
| "reward_std": 0.02308646310120821, | |
| "rewards/MCQ_Reward/mean": 0.1757229119539261, | |
| "rewards/MCQ_Reward/std": 0.06555243954062462, | |
| "step": 3, | |
| "train_speed(iter/s)": 0.029478 | |
| }, | |
| { | |
| "clip_ratio": 0.0011098573449999094, | |
| "epoch": 0.08, | |
| "grad_norm": 1.206025242805481, | |
| "kl": 0.0006008148193359375, | |
| "learning_rate": 8e-07, | |
| "loss": 0.09423406422138214, | |
| "memory(GiB)": 18.17, | |
| "step": 4, | |
| "train_speed(iter/s)": 0.038797 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1025.0, | |
| "completions/mean_length": 587.22265625, | |
| "completions/min_length": 50.0, | |
| "epoch": 0.1, | |
| "grad_norm": 1.1425890922546387, | |
| "kl": 0.0006389617919921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.10835893452167511, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.20135290175676346, | |
| "reward_std": 0.026336468756198883, | |
| "rewards/MCQ_Reward/mean": 0.20135290175676346, | |
| "rewards/MCQ_Reward/std": 0.04013596661388874, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.033455 | |
| }, | |
| { | |
| "clip_ratio": 0.000744842371204868, | |
| "epoch": 0.12, | |
| "grad_norm": 1.1426688432693481, | |
| "kl": 0.0006389617919921875, | |
| "learning_rate": 9.999899300364532e-07, | |
| "loss": 0.10809706896543503, | |
| "memory(GiB)": 18.17, | |
| "step": 6, | |
| "train_speed(iter/s)": 0.039768 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 986.0, | |
| "completions/mean_length": 554.33203125, | |
| "completions/min_length": 187.5, | |
| "epoch": 0.14, | |
| "grad_norm": 1.2598297595977783, | |
| "kl": 0.000637054443359375, | |
| "learning_rate": 9.999597205514296e-07, | |
| "loss": 0.10747133195400238, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.18709591031074524, | |
| "reward_std": 0.022870728746056557, | |
| "rewards/MCQ_Reward/mean": 0.18709591031074524, | |
| "rewards/MCQ_Reward/std": 0.061255430802702904, | |
| "step": 7, | |
| "train_speed(iter/s)": 0.036272 | |
| }, | |
| { | |
| "clip_ratio": 0.0011600544094108045, | |
| "epoch": 0.16, | |
| "grad_norm": 1.2500499486923218, | |
| "kl": 0.0007114410400390625, | |
| "learning_rate": 9.999093727617628e-07, | |
| "loss": 0.10704316943883896, | |
| "memory(GiB)": 18.17, | |
| "step": 8, | |
| "train_speed(iter/s)": 0.041177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1011.5, | |
| "completions/mean_length": 562.61328125, | |
| "completions/min_length": 231.5, | |
| "epoch": 0.18, | |
| "grad_norm": 1.4137037992477417, | |
| "kl": 0.00092315673828125, | |
| "learning_rate": 9.998388886954545e-07, | |
| "loss": 0.1194264367222786, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.20057281106710434, | |
| "reward_std": 0.02457202784717083, | |
| "rewards/MCQ_Reward/mean": 0.20057281106710434, | |
| "rewards/MCQ_Reward/std": 0.0581410713493824, | |
| "step": 9, | |
| "train_speed(iter/s)": 0.037627 | |
| }, | |
| { | |
| "clip_ratio": 0.0008636733400635421, | |
| "epoch": 0.2, | |
| "grad_norm": 1.4122164249420166, | |
| "kl": 0.001087188720703125, | |
| "learning_rate": 9.997482711915925e-07, | |
| "loss": 0.11916504055261612, | |
| "memory(GiB)": 18.17, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.041584 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 988.0, | |
| "completions/mean_length": 545.3203125, | |
| "completions/min_length": 13.0, | |
| "epoch": 0.22, | |
| "grad_norm": 1.1587789058685303, | |
| "kl": 0.001285552978515625, | |
| "learning_rate": 9.996375239002368e-07, | |
| "loss": 0.06654135137796402, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.18803076446056366, | |
| "reward_std": 0.027116701006889343, | |
| "rewards/MCQ_Reward/mean": 0.18803076446056366, | |
| "rewards/MCQ_Reward/std": 0.06116201728582382, | |
| "step": 11, | |
| "train_speed(iter/s)": 0.037797 | |
| }, | |
| { | |
| "clip_ratio": 0.0012727798894047737, | |
| "epoch": 0.24, | |
| "grad_norm": 1.1393318176269531, | |
| "kl": 0.001819610595703125, | |
| "learning_rate": 9.995066512822718e-07, | |
| "loss": 0.0661393254995346, | |
| "memory(GiB)": 18.17, | |
| "step": 12, | |
| "train_speed(iter/s)": 0.041011 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 928.0, | |
| "completions/mean_length": 502.984375, | |
| "completions/min_length": 181.5, | |
| "epoch": 0.26, | |
| "grad_norm": 1.3736039400100708, | |
| "kl": 0.00341796875, | |
| "learning_rate": 9.99355658609228e-07, | |
| "loss": 0.09961968660354614, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.2046608179807663, | |
| "reward_std": 0.02339835651218891, | |
| "rewards/MCQ_Reward/mean": 0.2046608179807663, | |
| "rewards/MCQ_Reward/std": 0.07441236078739166, | |
| "step": 13, | |
| "train_speed(iter/s)": 0.038941 | |
| }, | |
| { | |
| "clip_ratio": 0.0013542931410484016, | |
| "epoch": 0.28, | |
| "grad_norm": 1.341399073600769, | |
| "kl": 0.004730224609375, | |
| "learning_rate": 9.991845519630676e-07, | |
| "loss": 0.09878668189048767, | |
| "memory(GiB)": 18.17, | |
| "step": 14, | |
| "train_speed(iter/s)": 0.041763 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 939.0, | |
| "completions/mean_length": 479.08984375, | |
| "completions/min_length": 201.5, | |
| "epoch": 0.3, | |
| "grad_norm": 1.2583457231521606, | |
| "kl": 0.005706787109375, | |
| "learning_rate": 9.989933382359422e-07, | |
| "loss": 0.09561844170093536, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.23959992825984955, | |
| "reward_std": 0.024829759262502193, | |
| "rewards/MCQ_Reward/mean": 0.23959992825984955, | |
| "rewards/MCQ_Reward/std": 0.059385696426033974, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.040033 | |
| }, | |
| { | |
| "clip_ratio": 0.0012090829550288618, | |
| "epoch": 0.32, | |
| "grad_norm": 1.2485970258712769, | |
| "kl": 0.0069122314453125, | |
| "learning_rate": 9.98782025129912e-07, | |
| "loss": 0.09502086043357849, | |
| "memory(GiB)": 18.17, | |
| "step": 16, | |
| "train_speed(iter/s)": 0.042555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 785.0, | |
| "completions/mean_length": 446.19140625, | |
| "completions/min_length": 186.5, | |
| "epoch": 0.34, | |
| "grad_norm": 1.4837766885757446, | |
| "kl": 0.0080718994140625, | |
| "learning_rate": 9.985506211566386e-07, | |
| "loss": 0.11237534880638123, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.204755961894989, | |
| "reward_std": 0.025960725732147694, | |
| "rewards/MCQ_Reward/mean": 0.204755961894989, | |
| "rewards/MCQ_Reward/std": 0.05882856249809265, | |
| "step": 17, | |
| "train_speed(iter/s)": 0.041421 | |
| }, | |
| { | |
| "clip_ratio": 0.0012163713108748198, | |
| "epoch": 0.36, | |
| "grad_norm": 1.4663207530975342, | |
| "kl": 0.00933837890625, | |
| "learning_rate": 9.982991356370403e-07, | |
| "loss": 0.11209464073181152, | |
| "memory(GiB)": 18.17, | |
| "step": 18, | |
| "train_speed(iter/s)": 0.043701 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 771.0, | |
| "completions/mean_length": 451.41015625, | |
| "completions/min_length": 101.0, | |
| "epoch": 0.38, | |
| "grad_norm": 1.2070645093917847, | |
| "kl": 0.010772705078125, | |
| "learning_rate": 9.98027578700917e-07, | |
| "loss": 0.0659424215555191, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.18814751505851746, | |
| "reward_std": 0.024471789598464966, | |
| "rewards/MCQ_Reward/mean": 0.18814751505851746, | |
| "rewards/MCQ_Reward/std": 0.062104713171720505, | |
| "step": 19, | |
| "train_speed(iter/s)": 0.042657 | |
| }, | |
| { | |
| "clip_ratio": 0.0017630973597988486, | |
| "epoch": 0.4, | |
| "grad_norm": 1.1632057428359985, | |
| "kl": 0.014007568359375, | |
| "learning_rate": 9.977359612865422e-07, | |
| "loss": 0.0650935024023056, | |
| "memory(GiB)": 18.17, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.044775 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 808.0, | |
| "completions/mean_length": 392.30078125, | |
| "completions/min_length": 84.0, | |
| "epoch": 0.42, | |
| "grad_norm": 1.313915491104126, | |
| "kl": 0.019775390625, | |
| "learning_rate": 9.974242951402235e-07, | |
| "loss": 0.07705788314342499, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.23380683362483978, | |
| "reward_std": 0.03150738961994648, | |
| "rewards/MCQ_Reward/mean": 0.23380683362483978, | |
| "rewards/MCQ_Reward/std": 0.057576023042201996, | |
| "step": 21, | |
| "train_speed(iter/s)": 0.043224 | |
| }, | |
| { | |
| "clip_ratio": 0.0028022455517202616, | |
| "epoch": 0.44, | |
| "grad_norm": 1.242121934890747, | |
| "kl": 0.02642822265625, | |
| "learning_rate": 9.970925928158272e-07, | |
| "loss": 0.07613129168748856, | |
| "memory(GiB)": 18.17, | |
| "step": 22, | |
| "train_speed(iter/s)": 0.045118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 621.0, | |
| "completions/mean_length": 355.48828125, | |
| "completions/min_length": 144.0, | |
| "epoch": 0.46, | |
| "grad_norm": 1.3318829536437988, | |
| "kl": 0.034423828125, | |
| "learning_rate": 9.967408676742751e-07, | |
| "loss": 0.07269842177629471, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.22312550246715546, | |
| "reward_std": 0.031231535598635674, | |
| "rewards/MCQ_Reward/mean": 0.22312550246715546, | |
| "rewards/MCQ_Reward/std": 0.05438939481973648, | |
| "step": 23, | |
| "train_speed(iter/s)": 0.044616 | |
| }, | |
| { | |
| "clip_ratio": 0.0020711172837764025, | |
| "epoch": 0.48, | |
| "grad_norm": 1.2974779605865479, | |
| "kl": 0.0413818359375, | |
| "learning_rate": 9.963691338830042e-07, | |
| "loss": 0.07173984497785568, | |
| "memory(GiB)": 18.17, | |
| "step": 24, | |
| "train_speed(iter/s)": 0.046444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 651.5, | |
| "completions/mean_length": 318.5234375, | |
| "completions/min_length": 92.0, | |
| "epoch": 0.5, | |
| "grad_norm": 1.397636890411377, | |
| "kl": 0.047119140625, | |
| "learning_rate": 9.959774064153975e-07, | |
| "loss": 0.03884683549404144, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.23498350381851196, | |
| "reward_std": 0.03053601924329996, | |
| "rewards/MCQ_Reward/mean": 0.23498350381851196, | |
| "rewards/MCQ_Reward/std": 0.05711263045668602, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.045888 | |
| }, | |
| { | |
| "clip_ratio": 0.0013737165136262774, | |
| "epoch": 0.52, | |
| "grad_norm": 1.379469394683838, | |
| "kl": 0.052734375, | |
| "learning_rate": 9.955657010501806e-07, | |
| "loss": 0.038122277706861496, | |
| "memory(GiB)": 18.17, | |
| "step": 26, | |
| "train_speed(iter/s)": 0.047611 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 611.5, | |
| "completions/mean_length": 293.42578125, | |
| "completions/min_length": 110.5, | |
| "epoch": 0.54, | |
| "grad_norm": 1.3771414756774902, | |
| "kl": 0.0574951171875, | |
| "learning_rate": 9.95134034370785e-07, | |
| "loss": 0.05064291134476662, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.257246270775795, | |
| "reward_std": 0.03051395993679762, | |
| "rewards/MCQ_Reward/mean": 0.257246270775795, | |
| "rewards/MCQ_Reward/std": 0.05405682139098644, | |
| "step": 27, | |
| "train_speed(iter/s)": 0.046967 | |
| }, | |
| { | |
| "clip_ratio": 0.0015082518220879138, | |
| "epoch": 0.56, | |
| "grad_norm": 1.3394073247909546, | |
| "kl": 0.063720703125, | |
| "learning_rate": 9.946824237646824e-07, | |
| "loss": 0.04972712695598602, | |
| "memory(GiB)": 18.17, | |
| "step": 28, | |
| "train_speed(iter/s)": 0.048554 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 505.5, | |
| "completions/mean_length": 259.3515625, | |
| "completions/min_length": 76.0, | |
| "epoch": 0.58, | |
| "grad_norm": 1.4677767753601074, | |
| "kl": 0.070556640625, | |
| "learning_rate": 9.94210887422681e-07, | |
| "loss": -0.01695432886481285, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.25767549127340317, | |
| "reward_std": 0.03901047818362713, | |
| "rewards/MCQ_Reward/mean": 0.25767549127340317, | |
| "rewards/MCQ_Reward/std": 0.05495491810142994, | |
| "step": 29, | |
| "train_speed(iter/s)": 0.048377 | |
| }, | |
| { | |
| "clip_ratio": 0.001286374346818775, | |
| "epoch": 0.6, | |
| "grad_norm": 1.4747378826141357, | |
| "kl": 0.076904296875, | |
| "learning_rate": 9.93719444338197e-07, | |
| "loss": -0.017460569739341736, | |
| "memory(GiB)": 18.17, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.04994 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 561.5, | |
| "completions/mean_length": 250.26171875, | |
| "completions/min_length": 96.5, | |
| "epoch": 0.62, | |
| "grad_norm": 1.6029585599899292, | |
| "kl": 0.07763671875, | |
| "learning_rate": 9.932081143064858e-07, | |
| "loss": 0.042436983436346054, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.23062269389629364, | |
| "reward_std": 0.036025889217853546, | |
| "rewards/MCQ_Reward/mean": 0.23062269389629364, | |
| "rewards/MCQ_Reward/std": 0.0671730749309063, | |
| "step": 31, | |
| "train_speed(iter/s)": 0.048974 | |
| }, | |
| { | |
| "clip_ratio": 0.00158036028733477, | |
| "epoch": 0.64, | |
| "grad_norm": 1.5435467958450317, | |
| "kl": 0.08349609375, | |
| "learning_rate": 9.926769179238464e-07, | |
| "loss": 0.04148583859205246, | |
| "memory(GiB)": 18.17, | |
| "step": 32, | |
| "train_speed(iter/s)": 0.050428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 522.5, | |
| "completions/mean_length": 246.3984375, | |
| "completions/min_length": 89.0, | |
| "epoch": 0.66, | |
| "grad_norm": 1.466068983078003, | |
| "kl": 0.093994140625, | |
| "learning_rate": 9.921258765867919e-07, | |
| "loss": 0.008220436982810497, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.22424693405628204, | |
| "reward_std": 0.03309958428144455, | |
| "rewards/MCQ_Reward/mean": 0.22424693405628204, | |
| "rewards/MCQ_Reward/std": 0.06848622299730778, | |
| "step": 33, | |
| "train_speed(iter/s)": 0.050299 | |
| }, | |
| { | |
| "clip_ratio": 0.0012578482856042683, | |
| "epoch": 0.68, | |
| "grad_norm": 1.4434019327163696, | |
| "kl": 0.10009765625, | |
| "learning_rate": 9.915550124911866e-07, | |
| "loss": 0.007482614368200302, | |
| "memory(GiB)": 18.17, | |
| "step": 34, | |
| "train_speed(iter/s)": 0.051722 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 520.0, | |
| "completions/mean_length": 226.12109375, | |
| "completions/min_length": 47.5, | |
| "epoch": 0.7, | |
| "grad_norm": 1.529449224472046, | |
| "kl": 0.10546875, | |
| "learning_rate": 9.909643486313533e-07, | |
| "loss": -0.024700753390789032, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.24431276321411133, | |
| "reward_std": 0.03709370456635952, | |
| "rewards/MCQ_Reward/mean": 0.24431276321411133, | |
| "rewards/MCQ_Reward/std": 0.06565525010228157, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.051572 | |
| }, | |
| { | |
| "clip_ratio": 0.0013001365587115288, | |
| "epoch": 0.72, | |
| "grad_norm": 1.524826169013977, | |
| "kl": 0.110595703125, | |
| "learning_rate": 9.903539087991461e-07, | |
| "loss": -0.025061530992388725, | |
| "memory(GiB)": 18.17, | |
| "step": 36, | |
| "train_speed(iter/s)": 0.052951 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 461.0, | |
| "completions/mean_length": 206.1328125, | |
| "completions/min_length": 63.0, | |
| "epoch": 0.74, | |
| "grad_norm": 1.5648741722106934, | |
| "kl": 0.11474609375, | |
| "learning_rate": 9.897237175829926e-07, | |
| "loss": -0.010986058972775936, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.26653096079826355, | |
| "reward_std": 0.03736630827188492, | |
| "rewards/MCQ_Reward/mean": 0.26653096079826355, | |
| "rewards/MCQ_Reward/std": 0.065978042781353, | |
| "step": 37, | |
| "train_speed(iter/s)": 0.052793 | |
| }, | |
| { | |
| "clip_ratio": 0.0015517690917477012, | |
| "epoch": 0.76, | |
| "grad_norm": 1.5597436428070068, | |
| "kl": 0.122802734375, | |
| "learning_rate": 9.890738003669027e-07, | |
| "loss": -0.011755033396184444, | |
| "memory(GiB)": 18.17, | |
| "step": 38, | |
| "train_speed(iter/s)": 0.054118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 412.5, | |
| "completions/mean_length": 203.34375, | |
| "completions/min_length": 35.5, | |
| "epoch": 0.78, | |
| "grad_norm": 1.6045058965682983, | |
| "kl": 0.125244140625, | |
| "learning_rate": 9.884041833294475e-07, | |
| "loss": -0.04164643585681915, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.2605663910508156, | |
| "reward_std": 0.03675983473658562, | |
| "rewards/MCQ_Reward/mean": 0.2605663910508156, | |
| "rewards/MCQ_Reward/std": 0.06591521203517914, | |
| "step": 39, | |
| "train_speed(iter/s)": 0.054082 | |
| }, | |
| { | |
| "clip_ratio": 0.0013205534196458757, | |
| "epoch": 0.8, | |
| "grad_norm": 1.608991265296936, | |
| "kl": 0.1337890625, | |
| "learning_rate": 9.877148934427035e-07, | |
| "loss": -0.042494483292102814, | |
| "memory(GiB)": 18.17, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.055369 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 385.0, | |
| "completions/mean_length": 189.2734375, | |
| "completions/min_length": 60.5, | |
| "epoch": 0.82, | |
| "grad_norm": 1.8442962169647217, | |
| "kl": 0.14208984375, | |
| "learning_rate": 9.870059584711668e-07, | |
| "loss": -0.07683762162923813, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.26815178990364075, | |
| "reward_std": 0.04410684481263161, | |
| "rewards/MCQ_Reward/mean": 0.26815178990364075, | |
| "rewards/MCQ_Reward/std": 0.06000189855694771, | |
| "step": 41, | |
| "train_speed(iter/s)": 0.055022 | |
| }, | |
| { | |
| "clip_ratio": 0.0013334141112864017, | |
| "epoch": 0.84, | |
| "grad_norm": 1.8422967195510864, | |
| "kl": 0.14599609375, | |
| "learning_rate": 9.862774069706345e-07, | |
| "loss": -0.0775442123413086, | |
| "memory(GiB)": 18.17, | |
| "step": 42, | |
| "train_speed(iter/s)": 0.056271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.5, | |
| "completions/mean_length": 185.765625, | |
| "completions/min_length": 65.5, | |
| "epoch": 0.86, | |
| "grad_norm": 1.7880198955535889, | |
| "kl": 0.14453125, | |
| "learning_rate": 9.85529268287055e-07, | |
| "loss": 0.009722323156893253, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.26024360954761505, | |
| "reward_std": 0.04201339744031429, | |
| "rewards/MCQ_Reward/mean": 0.26024360954761505, | |
| "rewards/MCQ_Reward/std": 0.0699400007724762, | |
| "step": 43, | |
| "train_speed(iter/s)": 0.056122 | |
| }, | |
| { | |
| "clip_ratio": 0.0013897960307076573, | |
| "epoch": 0.88, | |
| "grad_norm": 1.7613471746444702, | |
| "kl": 0.14599609375, | |
| "learning_rate": 9.847615725553455e-07, | |
| "loss": 0.008702307008206844, | |
| "memory(GiB)": 18.17, | |
| "step": 44, | |
| "train_speed(iter/s)": 0.057328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 329.5, | |
| "completions/mean_length": 180.44921875, | |
| "completions/min_length": 71.5, | |
| "epoch": 0.9, | |
| "grad_norm": 1.8986045122146606, | |
| "kl": 0.16357421875, | |
| "learning_rate": 9.83974350698178e-07, | |
| "loss": -0.01265439111739397, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.24561913311481476, | |
| "reward_std": 0.041749605908989906, | |
| "rewards/MCQ_Reward/mean": 0.24561913311481476, | |
| "rewards/MCQ_Reward/std": 0.0692291297018528, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.057564 | |
| }, | |
| { | |
| "clip_ratio": 0.0017767796525731683, | |
| "epoch": 0.92, | |
| "grad_norm": 1.8627526760101318, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.831676344247342e-07, | |
| "loss": -0.013573069125413895, | |
| "memory(GiB)": 18.17, | |
| "step": 46, | |
| "train_speed(iter/s)": 0.058753 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 360.5, | |
| "completions/mean_length": 181.046875, | |
| "completions/min_length": 58.0, | |
| "epoch": 0.94, | |
| "grad_norm": 1.8329010009765625, | |
| "kl": 0.1689453125, | |
| "learning_rate": 9.82341456229428e-07, | |
| "loss": -0.009910675697028637, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.2712182253599167, | |
| "reward_std": 0.03875480592250824, | |
| "rewards/MCQ_Reward/mean": 0.2712182253599167, | |
| "rewards/MCQ_Reward/std": 0.05874207057058811, | |
| "step": 47, | |
| "train_speed(iter/s)": 0.05881 | |
| }, | |
| { | |
| "clip_ratio": 0.0020254994742572308, | |
| "epoch": 0.96, | |
| "grad_norm": 1.7636630535125732, | |
| "kl": 0.17529296875, | |
| "learning_rate": 9.814958493905962e-07, | |
| "loss": -0.011010742746293545, | |
| "memory(GiB)": 18.17, | |
| "step": 48, | |
| "train_speed(iter/s)": 0.05997 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.0, | |
| "completions/mean_length": 198.5, | |
| "completions/min_length": 83.0, | |
| "epoch": 0.98, | |
| "grad_norm": 1.9754475355148315, | |
| "kl": 0.15625, | |
| "learning_rate": 9.806308479691594e-07, | |
| "loss": 0.026388226076960564, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.2969816029071808, | |
| "reward_std": 0.033485451713204384, | |
| "rewards/MCQ_Reward/mean": 0.2969816029071808, | |
| "rewards/MCQ_Reward/std": 0.06154371425509453, | |
| "step": 49, | |
| "train_speed(iter/s)": 0.059869 | |
| }, | |
| { | |
| "clip_ratio": 0.002143923775292933, | |
| "epoch": 1.0, | |
| "grad_norm": 1.9168144464492798, | |
| "kl": 0.16455078125, | |
| "learning_rate": 9.797464868072486e-07, | |
| "loss": 0.025302505120635033, | |
| "memory(GiB)": 18.17, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.060949 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 349.5, | |
| "completions/mean_length": 175.86328125, | |
| "completions/min_length": 67.5, | |
| "epoch": 1.02, | |
| "grad_norm": 1.949724793434143, | |
| "kl": 0.18359375, | |
| "learning_rate": 9.788428015268026e-07, | |
| "loss": 0.016914475709199905, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.28643812239170074, | |
| "reward_std": 0.038882166147232056, | |
| "rewards/MCQ_Reward/mean": 0.28643812239170074, | |
| "rewards/MCQ_Reward/std": 0.05762592889368534, | |
| "step": 51, | |
| "train_speed(iter/s)": 0.06051 | |
| }, | |
| { | |
| "clip_ratio": 0.0030939964344725013, | |
| "epoch": 1.04, | |
| "grad_norm": 1.873901128768921, | |
| "kl": 0.1962890625, | |
| "learning_rate": 9.779198285281326e-07, | |
| "loss": 0.015664130449295044, | |
| "memory(GiB)": 18.17, | |
| "step": 52, | |
| "train_speed(iter/s)": 0.061602 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 301.0, | |
| "completions/mean_length": 173.26953125, | |
| "completions/min_length": 50.5, | |
| "epoch": 1.06, | |
| "grad_norm": 1.748197317123413, | |
| "kl": 0.20361328125, | |
| "learning_rate": 9.769776049884563e-07, | |
| "loss": -0.012495264410972595, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.2694673240184784, | |
| "reward_std": 0.03306659869849682, | |
| "rewards/MCQ_Reward/mean": 0.2694673240184784, | |
| "rewards/MCQ_Reward/std": 0.06984242424368858, | |
| "step": 53, | |
| "train_speed(iter/s)": 0.061749 | |
| }, | |
| { | |
| "clip_ratio": 0.003254209994338453, | |
| "epoch": 1.08, | |
| "grad_norm": 1.7254936695098877, | |
| "kl": 0.22021484375, | |
| "learning_rate": 9.760161688604007e-07, | |
| "loss": -0.012979630380868912, | |
| "memory(GiB)": 18.17, | |
| "step": 54, | |
| "train_speed(iter/s)": 0.062813 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 323.0, | |
| "completions/mean_length": 164.23046875, | |
| "completions/min_length": 74.0, | |
| "epoch": 1.1, | |
| "grad_norm": 1.8942813873291016, | |
| "kl": 0.21044921875, | |
| "learning_rate": 9.750355588704727e-07, | |
| "loss": -0.009442738257348537, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.29137177765369415, | |
| "reward_std": 0.03919493593275547, | |
| "rewards/MCQ_Reward/mean": 0.29137177765369415, | |
| "rewards/MCQ_Reward/std": 0.055357255041599274, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.062825 | |
| }, | |
| { | |
| "clip_ratio": 0.0029244048055261374, | |
| "epoch": 1.12, | |
| "grad_norm": 1.8403282165527344, | |
| "kl": 0.2255859375, | |
| "learning_rate": 9.740358145174997e-07, | |
| "loss": -0.010412258096039295, | |
| "memory(GiB)": 18.17, | |
| "step": 56, | |
| "train_speed(iter/s)": 0.063885 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 291.5, | |
| "completions/mean_length": 159.5703125, | |
| "completions/min_length": 68.5, | |
| "epoch": 1.1400000000000001, | |
| "grad_norm": 1.9502640962600708, | |
| "kl": 0.24072265625, | |
| "learning_rate": 9.730169760710385e-07, | |
| "loss": -0.01350313052535057, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3086051344871521, | |
| "reward_std": 0.036856647580862045, | |
| "rewards/MCQ_Reward/mean": 0.3086051344871521, | |
| "rewards/MCQ_Reward/std": 0.05716245248913765, | |
| "step": 57, | |
| "train_speed(iter/s)": 0.064059 | |
| }, | |
| { | |
| "clip_ratio": 0.0026392132276669145, | |
| "epoch": 1.16, | |
| "grad_norm": 1.8639681339263916, | |
| "kl": 0.244140625, | |
| "learning_rate": 9.719790845697532e-07, | |
| "loss": -0.014377694576978683, | |
| "memory(GiB)": 18.17, | |
| "step": 58, | |
| "train_speed(iter/s)": 0.065093 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 236.0, | |
| "completions/mean_length": 133.83984375, | |
| "completions/min_length": 52.5, | |
| "epoch": 1.18, | |
| "grad_norm": 2.159579038619995, | |
| "kl": 0.2607421875, | |
| "learning_rate": 9.709221818197623e-07, | |
| "loss": -0.03235793486237526, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3192738890647888, | |
| "reward_std": 0.03647255524992943, | |
| "rewards/MCQ_Reward/mean": 0.3192738890647888, | |
| "rewards/MCQ_Reward/std": 0.04580973833799362, | |
| "step": 59, | |
| "train_speed(iter/s)": 0.065376 | |
| }, | |
| { | |
| "clip_ratio": 0.0033569036750122905, | |
| "epoch": 1.2, | |
| "grad_norm": 2.0858945846557617, | |
| "kl": 0.2685546875, | |
| "learning_rate": 9.698463103929541e-07, | |
| "loss": -0.03384597226977348, | |
| "memory(GiB)": 18.17, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.066397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 275.5, | |
| "completions/mean_length": 152.6640625, | |
| "completions/min_length": 54.0, | |
| "epoch": 1.22, | |
| "grad_norm": 1.9752745628356934, | |
| "kl": 0.2509765625, | |
| "learning_rate": 9.68751513625273e-07, | |
| "loss": -0.012610888108611107, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.30408790707588196, | |
| "reward_std": 0.03896576911211014, | |
| "rewards/MCQ_Reward/mean": 0.30408790707588196, | |
| "rewards/MCQ_Reward/std": 0.059865519404411316, | |
| "step": 61, | |
| "train_speed(iter/s)": 0.066047 | |
| }, | |
| { | |
| "clip_ratio": 0.0028306948952376842, | |
| "epoch": 1.24, | |
| "grad_norm": 1.8911457061767578, | |
| "kl": 0.2509765625, | |
| "learning_rate": 9.676378356149732e-07, | |
| "loss": -0.014004014432430267, | |
| "memory(GiB)": 18.17, | |
| "step": 62, | |
| "train_speed(iter/s)": 0.067044 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 275.5, | |
| "completions/mean_length": 147.6953125, | |
| "completions/min_length": 69.0, | |
| "epoch": 1.26, | |
| "grad_norm": 2.153862953186035, | |
| "kl": 0.265625, | |
| "learning_rate": 9.665053212208426e-07, | |
| "loss": -0.027626825496554375, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.31602054834365845, | |
| "reward_std": 0.03946657292544842, | |
| "rewards/MCQ_Reward/mean": 0.31602054834365845, | |
| "rewards/MCQ_Reward/std": 0.06625748611986637, | |
| "step": 63, | |
| "train_speed(iter/s)": 0.067162 | |
| }, | |
| { | |
| "clip_ratio": 0.004200217663310468, | |
| "epoch": 1.28, | |
| "grad_norm": 2.027595281600952, | |
| "kl": 0.2626953125, | |
| "learning_rate": 9.653540160603955e-07, | |
| "loss": -0.028667613863945007, | |
| "memory(GiB)": 18.17, | |
| "step": 64, | |
| "train_speed(iter/s)": 0.06814 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 300.5, | |
| "completions/mean_length": 153.3828125, | |
| "completions/min_length": 42.0, | |
| "epoch": 1.3, | |
| "grad_norm": 2.058096170425415, | |
| "kl": 0.26318359375, | |
| "learning_rate": 9.641839665080363e-07, | |
| "loss": 0.019130591303110123, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3058909475803375, | |
| "reward_std": 0.03743278048932552, | |
| "rewards/MCQ_Reward/mean": 0.3058909475803375, | |
| "rewards/MCQ_Reward/std": 0.06633425317704678, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.068294 | |
| }, | |
| { | |
| "clip_ratio": 0.0030368451261892915, | |
| "epoch": 1.32, | |
| "grad_norm": 2.0810675621032715, | |
| "kl": 0.26708984375, | |
| "learning_rate": 9.6299521969319e-07, | |
| "loss": 0.01858600787818432, | |
| "memory(GiB)": 18.17, | |
| "step": 66, | |
| "train_speed(iter/s)": 0.069245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 310.5, | |
| "completions/mean_length": 170.65625, | |
| "completions/min_length": 70.0, | |
| "epoch": 1.34, | |
| "grad_norm": 1.9177082777023315, | |
| "kl": 0.25390625, | |
| "learning_rate": 9.617878234984054e-07, | |
| "loss": 0.013776745647192001, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.32124653458595276, | |
| "reward_std": 0.03586815297603607, | |
| "rewards/MCQ_Reward/mean": 0.32124653458595276, | |
| "rewards/MCQ_Reward/std": 0.05279739946126938, | |
| "step": 67, | |
| "train_speed(iter/s)": 0.069258 | |
| }, | |
| { | |
| "clip_ratio": 0.003581640077754855, | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 1.800355076789856, | |
| "kl": 0.271484375, | |
| "learning_rate": 9.60561826557425e-07, | |
| "loss": 0.01218567043542862, | |
| "memory(GiB)": 18.17, | |
| "step": 68, | |
| "train_speed(iter/s)": 0.070198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 320.5, | |
| "completions/mean_length": 165.45703125, | |
| "completions/min_length": 84.5, | |
| "epoch": 1.38, | |
| "grad_norm": 1.9321861267089844, | |
| "kl": 0.2734375, | |
| "learning_rate": 9.593172782532267e-07, | |
| "loss": -0.06093820929527283, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.33785562217235565, | |
| "reward_std": 0.03626340813934803, | |
| "rewards/MCQ_Reward/mean": 0.33785562217235565, | |
| "rewards/MCQ_Reward/std": 0.04918426461517811, | |
| "step": 69, | |
| "train_speed(iter/s)": 0.070079 | |
| }, | |
| { | |
| "clip_ratio": 0.002684593666344881, | |
| "epoch": 1.4, | |
| "grad_norm": 1.9250681400299072, | |
| "kl": 0.2822265625, | |
| "learning_rate": 9.580542287160346e-07, | |
| "loss": -0.06187870353460312, | |
| "memory(GiB)": 18.17, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.071007 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 315.5, | |
| "completions/mean_length": 167.71875, | |
| "completions/min_length": 60.0, | |
| "epoch": 1.42, | |
| "grad_norm": 1.9310671091079712, | |
| "kl": 0.26953125, | |
| "learning_rate": 9.567727288213004e-07, | |
| "loss": -0.03052324429154396, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3391506224870682, | |
| "reward_std": 0.037205325439572334, | |
| "rewards/MCQ_Reward/mean": 0.3391506224870682, | |
| "rewards/MCQ_Reward/std": 0.06270403787493706, | |
| "step": 71, | |
| "train_speed(iter/s)": 0.070595 | |
| }, | |
| { | |
| "clip_ratio": 0.004182511591352522, | |
| "epoch": 1.44, | |
| "grad_norm": 1.808637261390686, | |
| "kl": 0.26953125, | |
| "learning_rate": 9.554728301876524e-07, | |
| "loss": -0.031438540667295456, | |
| "memory(GiB)": 18.17, | |
| "step": 72, | |
| "train_speed(iter/s)": 0.071499 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 330.0, | |
| "completions/mean_length": 171.5859375, | |
| "completions/min_length": 73.5, | |
| "epoch": 1.46, | |
| "grad_norm": 2.1356284618377686, | |
| "kl": 0.2666015625, | |
| "learning_rate": 9.541545851748185e-07, | |
| "loss": 0.06165466085076332, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3267658054828644, | |
| "reward_std": 0.03793729655444622, | |
| "rewards/MCQ_Reward/mean": 0.3267658054828644, | |
| "rewards/MCQ_Reward/std": 0.06866181083023548, | |
| "step": 73, | |
| "train_speed(iter/s)": 0.071359 | |
| }, | |
| { | |
| "clip_ratio": 0.0023740422911942005, | |
| "epoch": 1.48, | |
| "grad_norm": 2.081942319869995, | |
| "kl": 0.2724609375, | |
| "learning_rate": 9.528180468815154e-07, | |
| "loss": 0.06085401773452759, | |
| "memory(GiB)": 18.17, | |
| "step": 74, | |
| "train_speed(iter/s)": 0.072254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 388.0, | |
| "completions/mean_length": 176.4140625, | |
| "completions/min_length": 60.0, | |
| "epoch": 1.5, | |
| "grad_norm": 1.819736361503601, | |
| "kl": 0.291015625, | |
| "learning_rate": 9.514632691433106e-07, | |
| "loss": 0.041995078325271606, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.34543414413928986, | |
| "reward_std": 0.03658975474536419, | |
| "rewards/MCQ_Reward/mean": 0.34543414413928986, | |
| "rewards/MCQ_Reward/std": 0.0643342137336731, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.072103 | |
| }, | |
| { | |
| "clip_ratio": 0.0024005533196032047, | |
| "epoch": 1.52, | |
| "grad_norm": 1.7825483083724976, | |
| "kl": 0.302734375, | |
| "learning_rate": 9.500903065304539e-07, | |
| "loss": 0.04098404943943024, | |
| "memory(GiB)": 18.17, | |
| "step": 76, | |
| "train_speed(iter/s)": 0.072975 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 324.0, | |
| "completions/mean_length": 179.35546875, | |
| "completions/min_length": 71.5, | |
| "epoch": 1.54, | |
| "grad_norm": 1.83073091506958, | |
| "kl": 0.2919921875, | |
| "learning_rate": 9.486992143456791e-07, | |
| "loss": 0.026145532727241516, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.33697785437107086, | |
| "reward_std": 0.033385418355464935, | |
| "rewards/MCQ_Reward/mean": 0.33697785437107086, | |
| "rewards/MCQ_Reward/std": 0.06162330321967602, | |
| "step": 77, | |
| "train_speed(iter/s)": 0.072818 | |
| }, | |
| { | |
| "clip_ratio": 0.0029612210346385837, | |
| "epoch": 1.56, | |
| "grad_norm": 1.7568435668945312, | |
| "kl": 0.3046875, | |
| "learning_rate": 9.472900486219768e-07, | |
| "loss": 0.02535586804151535, | |
| "memory(GiB)": 18.17, | |
| "step": 78, | |
| "train_speed(iter/s)": 0.07364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 297.0, | |
| "completions/mean_length": 181.63671875, | |
| "completions/min_length": 86.0, | |
| "epoch": 1.58, | |
| "grad_norm": 1.763022541999817, | |
| "kl": 0.296875, | |
| "learning_rate": 9.458628661203366e-07, | |
| "loss": -0.016155043616890907, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3397578001022339, | |
| "reward_std": 0.030555096454918385, | |
| "rewards/MCQ_Reward/mean": 0.3397578001022339, | |
| "rewards/MCQ_Reward/std": 0.0736413523554802, | |
| "step": 79, | |
| "train_speed(iter/s)": 0.073639 | |
| }, | |
| { | |
| "clip_ratio": 0.003752505173906684, | |
| "epoch": 1.6, | |
| "grad_norm": 1.75266695022583, | |
| "kl": 0.314453125, | |
| "learning_rate": 9.444177243274617e-07, | |
| "loss": -0.016932127997279167, | |
| "memory(GiB)": 18.17, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.074482 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 316.0, | |
| "completions/mean_length": 173.53515625, | |
| "completions/min_length": 82.5, | |
| "epoch": 1.62, | |
| "grad_norm": 1.813202142715454, | |
| "kl": 0.3193359375, | |
| "learning_rate": 9.429546814534528e-07, | |
| "loss": 0.014175940304994583, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.35451021790504456, | |
| "reward_std": 0.0316955391317606, | |
| "rewards/MCQ_Reward/mean": 0.35451021790504456, | |
| "rewards/MCQ_Reward/std": 0.058956997469067574, | |
| "step": 81, | |
| "train_speed(iter/s)": 0.073923 | |
| }, | |
| { | |
| "clip_ratio": 0.003929685335606337, | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 1.7315208911895752, | |
| "kl": 0.337890625, | |
| "learning_rate": 9.414737964294634e-07, | |
| "loss": 0.013125661760568619, | |
| "memory(GiB)": 18.17, | |
| "step": 82, | |
| "train_speed(iter/s)": 0.074757 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 265.5, | |
| "completions/mean_length": 159.95703125, | |
| "completions/min_length": 68.5, | |
| "epoch": 1.6600000000000001, | |
| "grad_norm": 1.86507248878479, | |
| "kl": 0.333984375, | |
| "learning_rate": 9.399751289053266e-07, | |
| "loss": 0.0190749391913414, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.32107532024383545, | |
| "reward_std": 0.03531700000166893, | |
| "rewards/MCQ_Reward/mean": 0.32107532024383545, | |
| "rewards/MCQ_Reward/std": 0.06730588898062706, | |
| "step": 83, | |
| "train_speed(iter/s)": 0.074766 | |
| }, | |
| { | |
| "clip_ratio": 0.005602485965937376, | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 1.8452680110931396, | |
| "kl": 0.3515625, | |
| "learning_rate": 9.384587392471514e-07, | |
| "loss": 0.018391648307442665, | |
| "memory(GiB)": 18.17, | |
| "step": 84, | |
| "train_speed(iter/s)": 0.075562 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 274.5, | |
| "completions/mean_length": 146.36328125, | |
| "completions/min_length": 51.5, | |
| "epoch": 1.7, | |
| "grad_norm": 2.060523271560669, | |
| "kl": 0.3564453125, | |
| "learning_rate": 9.369246885348925e-07, | |
| "loss": 0.00966290757060051, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.34230072796344757, | |
| "reward_std": 0.03451686259359121, | |
| "rewards/MCQ_Reward/mean": 0.34230072796344757, | |
| "rewards/MCQ_Reward/std": 0.07506715506315231, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.075608 | |
| }, | |
| { | |
| "clip_ratio": 0.0025914940051734447, | |
| "epoch": 1.72, | |
| "grad_norm": 2.089233875274658, | |
| "kl": 0.357421875, | |
| "learning_rate": 9.353730385598886e-07, | |
| "loss": 0.008917246013879776, | |
| "memory(GiB)": 18.17, | |
| "step": 86, | |
| "train_speed(iter/s)": 0.076403 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 251.0, | |
| "completions/mean_length": 149.41796875, | |
| "completions/min_length": 72.0, | |
| "epoch": 1.74, | |
| "grad_norm": 2.100825071334839, | |
| "kl": 0.3642578125, | |
| "learning_rate": 9.338038518223745e-07, | |
| "loss": 0.0011688023805618286, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.29714760184288025, | |
| "reward_std": 0.03046888206154108, | |
| "rewards/MCQ_Reward/mean": 0.29714760184288025, | |
| "rewards/MCQ_Reward/std": 0.0724717304110527, | |
| "step": 87, | |
| "train_speed(iter/s)": 0.076468 | |
| }, | |
| { | |
| "clip_ratio": 0.0029116831719875336, | |
| "epoch": 1.76, | |
| "grad_norm": 2.091975688934326, | |
| "kl": 0.3740234375, | |
| "learning_rate": 9.322171915289633e-07, | |
| "loss": 0.0007365690544247627, | |
| "memory(GiB)": 18.17, | |
| "step": 88, | |
| "train_speed(iter/s)": 0.077267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 243.0, | |
| "completions/mean_length": 149.0546875, | |
| "completions/min_length": 74.5, | |
| "epoch": 1.78, | |
| "grad_norm": 2.0660133361816406, | |
| "kl": 0.5546875, | |
| "learning_rate": 9.306131215901003e-07, | |
| "loss": -0.002558637410402298, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3453996330499649, | |
| "reward_std": 0.030298423022031784, | |
| "rewards/MCQ_Reward/mean": 0.3453996330499649, | |
| "rewards/MCQ_Reward/std": 0.05576108209788799, | |
| "step": 89, | |
| "train_speed(iter/s)": 0.07741 | |
| }, | |
| { | |
| "clip_ratio": 0.0030759836081415415, | |
| "epoch": 1.8, | |
| "grad_norm": 1.9661788940429688, | |
| "kl": 0.5439453125, | |
| "learning_rate": 9.289917066174885e-07, | |
| "loss": -0.003219339996576309, | |
| "memory(GiB)": 18.17, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.078204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 279.0, | |
| "completions/mean_length": 137.28125, | |
| "completions/min_length": 57.0, | |
| "epoch": 1.8199999999999998, | |
| "grad_norm": 2.1432077884674072, | |
| "kl": 0.4169921875, | |
| "learning_rate": 9.273530119214867e-07, | |
| "loss": -0.019994597882032394, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3450734615325928, | |
| "reward_std": 0.03698188066482544, | |
| "rewards/MCQ_Reward/mean": 0.3450734615325928, | |
| "rewards/MCQ_Reward/std": 0.06834666058421135, | |
| "step": 91, | |
| "train_speed(iter/s)": 0.077823 | |
| }, | |
| { | |
| "clip_ratio": 0.006807451136410236, | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 2.026726484298706, | |
| "kl": 0.4423828125, | |
| "learning_rate": 9.256971035084784e-07, | |
| "loss": -0.02127775177359581, | |
| "memory(GiB)": 18.17, | |
| "step": 92, | |
| "train_speed(iter/s)": 0.078595 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 258.0, | |
| "completions/mean_length": 144.11328125, | |
| "completions/min_length": 62.5, | |
| "epoch": 1.8599999999999999, | |
| "grad_norm": 2.5080695152282715, | |
| "kl": 0.44140625, | |
| "learning_rate": 9.240240480782129e-07, | |
| "loss": 0.038984864950180054, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.34395235776901245, | |
| "reward_std": 0.030767593532800674, | |
| "rewards/MCQ_Reward/mean": 0.34395235776901245, | |
| "rewards/MCQ_Reward/std": 0.08772432059049606, | |
| "step": 93, | |
| "train_speed(iter/s)": 0.07864 | |
| }, | |
| { | |
| "clip_ratio": 0.0038948373403400183, | |
| "epoch": 1.88, | |
| "grad_norm": 2.293992042541504, | |
| "kl": 0.466796875, | |
| "learning_rate": 9.223339130211192e-07, | |
| "loss": 0.03854737430810928, | |
| "memory(GiB)": 18.17, | |
| "step": 94, | |
| "train_speed(iter/s)": 0.0794 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 288.0, | |
| "completions/mean_length": 144.3671875, | |
| "completions/min_length": 66.5, | |
| "epoch": 1.9, | |
| "grad_norm": 2.3717093467712402, | |
| "kl": 0.4423828125, | |
| "learning_rate": 9.206267664155906e-07, | |
| "loss": 0.02822975069284439, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.35692907869815826, | |
| "reward_std": 0.033766910433769226, | |
| "rewards/MCQ_Reward/mean": 0.35692907869815826, | |
| "rewards/MCQ_Reward/std": 0.055017637088894844, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.079264 | |
| }, | |
| { | |
| "clip_ratio": 0.01540788309648633, | |
| "epoch": 1.92, | |
| "grad_norm": 2.8082501888275146, | |
| "kl": 0.4873046875, | |
| "learning_rate": 9.189026770252436e-07, | |
| "loss": 0.027400558814406395, | |
| "memory(GiB)": 18.17, | |
| "step": 96, | |
| "train_speed(iter/s)": 0.080015 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 216.5, | |
| "completions/mean_length": 131.2265625, | |
| "completions/min_length": 64.0, | |
| "epoch": 1.94, | |
| "grad_norm": 2.578866481781006, | |
| "kl": 0.458984375, | |
| "learning_rate": 9.171617142961476e-07, | |
| "loss": -0.028647061437368393, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.35198159515857697, | |
| "reward_std": 0.036471933126449585, | |
| "rewards/MCQ_Reward/mean": 0.35198159515857697, | |
| "rewards/MCQ_Reward/std": 0.09679177403450012, | |
| "step": 97, | |
| "train_speed(iter/s)": 0.080136 | |
| }, | |
| { | |
| "clip_ratio": 0.007482210174202919, | |
| "epoch": 1.96, | |
| "grad_norm": 2.6245126724243164, | |
| "kl": 0.455078125, | |
| "learning_rate": 9.154039483540272e-07, | |
| "loss": -0.02990054339170456, | |
| "memory(GiB)": 18.17, | |
| "step": 98, | |
| "train_speed(iter/s)": 0.080877 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 254.5, | |
| "completions/mean_length": 140.546875, | |
| "completions/min_length": 70.0, | |
| "epoch": 1.98, | |
| "grad_norm": 2.0212841033935547, | |
| "kl": 0.4462890625, | |
| "learning_rate": 9.136294500014385e-07, | |
| "loss": 0.007645269390195608, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3687240034341812, | |
| "reward_std": 0.0377286896109581, | |
| "rewards/MCQ_Reward/mean": 0.3687240034341812, | |
| "rewards/MCQ_Reward/std": 0.09235312044620514, | |
| "step": 99, | |
| "train_speed(iter/s)": 0.080838 | |
| }, | |
| { | |
| "clip_ratio": 0.004757207585498691, | |
| "epoch": 2.0, | |
| "grad_norm": 1.9354287385940552, | |
| "kl": 0.4638671875, | |
| "learning_rate": 9.118382907149163e-07, | |
| "loss": 0.006971254944801331, | |
| "memory(GiB)": 18.17, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.08155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 252.5, | |
| "completions/mean_length": 123.4140625, | |
| "completions/min_length": 54.0, | |
| "epoch": 2.02, | |
| "grad_norm": 2.3176586627960205, | |
| "kl": 0.4755859375, | |
| "learning_rate": 9.100305426420956e-07, | |
| "loss": -0.016116395592689514, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38898809254169464, | |
| "reward_std": 0.038034453988075256, | |
| "rewards/MCQ_Reward/mean": 0.38898809254169464, | |
| "rewards/MCQ_Reward/std": 0.07776015624403954, | |
| "step": 101, | |
| "train_speed(iter/s)": 0.081234 | |
| }, | |
| { | |
| "clip_ratio": 0.004006300354376435, | |
| "epoch": 2.04, | |
| "grad_norm": 2.1871023178100586, | |
| "kl": 0.4931640625, | |
| "learning_rate": 9.082062785988048e-07, | |
| "loss": -0.01703297346830368, | |
| "memory(GiB)": 18.17, | |
| "step": 102, | |
| "train_speed(iter/s)": 0.081962 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 199.0, | |
| "completions/mean_length": 113.1484375, | |
| "completions/min_length": 56.5, | |
| "epoch": 2.06, | |
| "grad_norm": 2.5120768547058105, | |
| "kl": 0.517578125, | |
| "learning_rate": 9.06365572066134e-07, | |
| "loss": -0.027387384325265884, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.357058048248291, | |
| "reward_std": 0.031020362861454487, | |
| "rewards/MCQ_Reward/mean": 0.357058048248291, | |
| "rewards/MCQ_Reward/std": 0.06582547165453434, | |
| "step": 103, | |
| "train_speed(iter/s)": 0.082061 | |
| }, | |
| { | |
| "clip_ratio": 0.014288442209362984, | |
| "epoch": 2.08, | |
| "grad_norm": 3.2106845378875732, | |
| "kl": 0.5009765625, | |
| "learning_rate": 9.045084971874737e-07, | |
| "loss": -0.02823379635810852, | |
| "memory(GiB)": 18.17, | |
| "step": 104, | |
| "train_speed(iter/s)": 0.082761 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 211.0, | |
| "completions/mean_length": 126.953125, | |
| "completions/min_length": 70.0, | |
| "epoch": 2.1, | |
| "grad_norm": 2.2478950023651123, | |
| "kl": 0.48828125, | |
| "learning_rate": 9.026351287655293e-07, | |
| "loss": 0.02888938970863819, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3573220670223236, | |
| "reward_std": 0.03388269431889057, | |
| "rewards/MCQ_Reward/mean": 0.3573220670223236, | |
| "rewards/MCQ_Reward/std": 0.08621830865740776, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.082851 | |
| }, | |
| { | |
| "clip_ratio": 0.005271225702017546, | |
| "epoch": 2.12, | |
| "grad_norm": 2.07523250579834, | |
| "kl": 0.513671875, | |
| "learning_rate": 9.007455422593075e-07, | |
| "loss": 0.028001034632325172, | |
| "memory(GiB)": 18.17, | |
| "step": 106, | |
| "train_speed(iter/s)": 0.083561 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 244.0, | |
| "completions/mean_length": 143.50390625, | |
| "completions/min_length": 62.5, | |
| "epoch": 2.14, | |
| "grad_norm": 2.149932861328125, | |
| "kl": 0.474609375, | |
| "learning_rate": 8.988398137810776e-07, | |
| "loss": -0.0027789073064923286, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.37795157730579376, | |
| "reward_std": 0.03415030054748058, | |
| "rewards/MCQ_Reward/mean": 0.37795157730579376, | |
| "rewards/MCQ_Reward/std": 0.07794364914298058, | |
| "step": 107, | |
| "train_speed(iter/s)": 0.083617 | |
| }, | |
| { | |
| "clip_ratio": 0.008057619212195277, | |
| "epoch": 2.16, | |
| "grad_norm": 2.7377026081085205, | |
| "kl": 0.5078125, | |
| "learning_rate": 8.969180200933047e-07, | |
| "loss": -0.003491489216685295, | |
| "memory(GiB)": 18.17, | |
| "step": 108, | |
| "train_speed(iter/s)": 0.084274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 226.5, | |
| "completions/mean_length": 133.1875, | |
| "completions/min_length": 58.5, | |
| "epoch": 2.18, | |
| "grad_norm": 2.826488494873047, | |
| "kl": 0.5390625, | |
| "learning_rate": 8.94980238605558e-07, | |
| "loss": 0.02833351120352745, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39782722294330597, | |
| "reward_std": 0.031135279685258865, | |
| "rewards/MCQ_Reward/mean": 0.39782722294330597, | |
| "rewards/MCQ_Reward/std": 0.07045348361134529, | |
| "step": 109, | |
| "train_speed(iter/s)": 0.084336 | |
| }, | |
| { | |
| "clip_ratio": 0.00684792990796268, | |
| "epoch": 2.2, | |
| "grad_norm": 2.434086322784424, | |
| "kl": 0.5703125, | |
| "learning_rate": 8.930265473713937e-07, | |
| "loss": 0.027658611536026, | |
| "memory(GiB)": 18.17, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.085034 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 216.0, | |
| "completions/mean_length": 131.703125, | |
| "completions/min_length": 67.0, | |
| "epoch": 2.22, | |
| "grad_norm": 2.134516716003418, | |
| "kl": 0.48828125, | |
| "learning_rate": 8.910570250852096e-07, | |
| "loss": 0.006394753232598305, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3707956522703171, | |
| "reward_std": 0.03248129412531853, | |
| "rewards/MCQ_Reward/mean": 0.3707956522703171, | |
| "rewards/MCQ_Reward/std": 0.10541465878486633, | |
| "step": 111, | |
| "train_speed(iter/s)": 0.084685 | |
| }, | |
| { | |
| "clip_ratio": 0.00865771621465683, | |
| "epoch": 2.24, | |
| "grad_norm": 2.2900125980377197, | |
| "kl": 0.513671875, | |
| "learning_rate": 8.890717510790762e-07, | |
| "loss": 0.00539240799844265, | |
| "memory(GiB)": 18.17, | |
| "step": 112, | |
| "train_speed(iter/s)": 0.085353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 264.5, | |
| "completions/mean_length": 126.9140625, | |
| "completions/min_length": 62.0, | |
| "epoch": 2.26, | |
| "grad_norm": 2.6178812980651855, | |
| "kl": 0.546875, | |
| "learning_rate": 8.870708053195413e-07, | |
| "loss": 0.019267559051513672, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3922416865825653, | |
| "reward_std": 0.03025819268077612, | |
| "rewards/MCQ_Reward/mean": 0.3922416865825653, | |
| "rewards/MCQ_Reward/std": 0.08424495533108711, | |
| "step": 113, | |
| "train_speed(iter/s)": 0.085338 | |
| }, | |
| { | |
| "clip_ratio": 0.006454117828980088, | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 2.1509737968444824, | |
| "kl": 0.57421875, | |
| "learning_rate": 8.850542684044078e-07, | |
| "loss": 0.01820582151412964, | |
| "memory(GiB)": 18.17, | |
| "step": 114, | |
| "train_speed(iter/s)": 0.085985 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.5, | |
| "completions/mean_length": 118.85546875, | |
| "completions/min_length": 59.5, | |
| "epoch": 2.3, | |
| "grad_norm": 2.528681755065918, | |
| "kl": 0.525390625, | |
| "learning_rate": 8.83022221559489e-07, | |
| "loss": 0.008160990662872791, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.404242143034935, | |
| "reward_std": 0.03400178253650665, | |
| "rewards/MCQ_Reward/mean": 0.404242143034935, | |
| "rewards/MCQ_Reward/std": 0.09943690523505211, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.086069 | |
| }, | |
| { | |
| "clip_ratio": 0.005366077646613121, | |
| "epoch": 2.32, | |
| "grad_norm": 2.1966934204101562, | |
| "kl": 0.546875, | |
| "learning_rate": 8.809747466353355e-07, | |
| "loss": 0.007157166488468647, | |
| "memory(GiB)": 18.17, | |
| "step": 116, | |
| "train_speed(iter/s)": 0.086734 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 239.0, | |
| "completions/mean_length": 125.3359375, | |
| "completions/min_length": 59.5, | |
| "epoch": 2.34, | |
| "grad_norm": 2.4033124446868896, | |
| "kl": 0.537109375, | |
| "learning_rate": 8.789119261039384e-07, | |
| "loss": 0.017890973016619682, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.36347851157188416, | |
| "reward_std": 0.027591521851718426, | |
| "rewards/MCQ_Reward/mean": 0.36347851157188416, | |
| "rewards/MCQ_Reward/std": 0.09114562720060349, | |
| "step": 117, | |
| "train_speed(iter/s)": 0.086687 | |
| }, | |
| { | |
| "clip_ratio": 0.011405623517930508, | |
| "epoch": 2.36, | |
| "grad_norm": 2.8501975536346436, | |
| "kl": 0.587890625, | |
| "learning_rate": 8.768338430554082e-07, | |
| "loss": 0.016866052523255348, | |
| "memory(GiB)": 18.17, | |
| "step": 118, | |
| "train_speed(iter/s)": 0.08735 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 194.0, | |
| "completions/mean_length": 122.23046875, | |
| "completions/min_length": 65.0, | |
| "epoch": 2.38, | |
| "grad_norm": 2.5570151805877686, | |
| "kl": 0.5126953125, | |
| "learning_rate": 8.74740581194627e-07, | |
| "loss": -0.011926580220460892, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40480077266693115, | |
| "reward_std": 0.03289741463959217, | |
| "rewards/MCQ_Reward/mean": 0.40480077266693115, | |
| "rewards/MCQ_Reward/std": 0.08261778578162193, | |
| "step": 119, | |
| "train_speed(iter/s)": 0.087419 | |
| }, | |
| { | |
| "clip_ratio": 0.007963848765939474, | |
| "epoch": 2.4, | |
| "grad_norm": 2.1802773475646973, | |
| "kl": 0.5009765625, | |
| "learning_rate": 8.726322248378774e-07, | |
| "loss": -0.0127539848908782, | |
| "memory(GiB)": 18.17, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.088053 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 244.0, | |
| "completions/mean_length": 130.2421875, | |
| "completions/min_length": 60.5, | |
| "epoch": 2.42, | |
| "grad_norm": 2.4936065673828125, | |
| "kl": 0.537109375, | |
| "learning_rate": 8.705088589094458e-07, | |
| "loss": 0.008000252768397331, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.36072438955307007, | |
| "reward_std": 0.030319811776280403, | |
| "rewards/MCQ_Reward/mean": 0.36072438955307007, | |
| "rewards/MCQ_Reward/std": 0.1019350104033947, | |
| "step": 121, | |
| "train_speed(iter/s)": 0.08768 | |
| }, | |
| { | |
| "clip_ratio": 0.006943409331142902, | |
| "epoch": 2.44, | |
| "grad_norm": 2.4447567462921143, | |
| "kl": 0.544921875, | |
| "learning_rate": 8.683705689382024e-07, | |
| "loss": 0.0072016119956970215, | |
| "memory(GiB)": 18.17, | |
| "step": 122, | |
| "train_speed(iter/s)": 0.088326 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 191.5, | |
| "completions/mean_length": 112.40234375, | |
| "completions/min_length": 53.0, | |
| "epoch": 2.46, | |
| "grad_norm": 2.279759168624878, | |
| "kl": 0.55859375, | |
| "learning_rate": 8.662174410541554e-07, | |
| "loss": 0.00623547937721014, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3670702576637268, | |
| "reward_std": 0.02890967670828104, | |
| "rewards/MCQ_Reward/mean": 0.3670702576637268, | |
| "rewards/MCQ_Reward/std": 0.0740283839404583, | |
| "step": 123, | |
| "train_speed(iter/s)": 0.088484 | |
| }, | |
| { | |
| "clip_ratio": 0.007923177909106016, | |
| "epoch": 2.48, | |
| "grad_norm": 2.789609909057617, | |
| "kl": 0.587890625, | |
| "learning_rate": 8.64049561984982e-07, | |
| "loss": 0.005373558960855007, | |
| "memory(GiB)": 18.17, | |
| "step": 124, | |
| "train_speed(iter/s)": 0.089133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 194.5, | |
| "completions/mean_length": 124.91796875, | |
| "completions/min_length": 73.0, | |
| "epoch": 2.5, | |
| "grad_norm": 2.2765557765960693, | |
| "kl": 0.498046875, | |
| "learning_rate": 8.61867019052535e-07, | |
| "loss": -0.0031618811190128326, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3880574107170105, | |
| "reward_std": 0.02767461072653532, | |
| "rewards/MCQ_Reward/mean": 0.3880574107170105, | |
| "rewards/MCQ_Reward/std": 0.11312882974743843, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.089217 | |
| }, | |
| { | |
| "clip_ratio": 0.006887951632961631, | |
| "epoch": 2.52, | |
| "grad_norm": 2.2742230892181396, | |
| "kl": 0.509765625, | |
| "learning_rate": 8.596699001693255e-07, | |
| "loss": -0.004048643633723259, | |
| "memory(GiB)": 18.17, | |
| "step": 126, | |
| "train_speed(iter/s)": 0.089838 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 202.5, | |
| "completions/mean_length": 117.484375, | |
| "completions/min_length": 56.5, | |
| "epoch": 2.54, | |
| "grad_norm": 2.340428113937378, | |
| "kl": 0.546875, | |
| "learning_rate": 8.574582938349817e-07, | |
| "loss": -0.009344515390694141, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38609637320041656, | |
| "reward_std": 0.033216655254364014, | |
| "rewards/MCQ_Reward/mean": 0.38609637320041656, | |
| "rewards/MCQ_Reward/std": 0.09242032468318939, | |
| "step": 127, | |
| "train_speed(iter/s)": 0.089914 | |
| }, | |
| { | |
| "clip_ratio": 0.007429210003465414, | |
| "epoch": 2.56, | |
| "grad_norm": 2.3134751319885254, | |
| "kl": 0.57421875, | |
| "learning_rate": 8.552322891326844e-07, | |
| "loss": -0.010545218363404274, | |
| "memory(GiB)": 18.17, | |
| "step": 128, | |
| "train_speed(iter/s)": 0.090544 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/mean_length": 119.9765625, | |
| "completions/min_length": 57.0, | |
| "epoch": 2.58, | |
| "grad_norm": 2.265873670578003, | |
| "kl": 0.4931640625, | |
| "learning_rate": 8.529919757255781e-07, | |
| "loss": -0.007635302376002073, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41428878903388977, | |
| "reward_std": 0.028425303287804127, | |
| "rewards/MCQ_Reward/mean": 0.41428878903388977, | |
| "rewards/MCQ_Reward/std": 0.07786687836050987, | |
| "step": 129, | |
| "train_speed(iter/s)": 0.09048 | |
| }, | |
| { | |
| "clip_ratio": 0.006183756981045008, | |
| "epoch": 2.6, | |
| "grad_norm": 2.283554792404175, | |
| "kl": 0.498046875, | |
| "learning_rate": 8.507374438531606e-07, | |
| "loss": -0.008446864783763885, | |
| "memory(GiB)": 18.17, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.091107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 196.0, | |
| "completions/mean_length": 119.125, | |
| "completions/min_length": 59.0, | |
| "epoch": 2.62, | |
| "grad_norm": 2.8296353816986084, | |
| "kl": 0.525390625, | |
| "learning_rate": 8.484687843276468e-07, | |
| "loss": 0.003696079831570387, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40898391604423523, | |
| "reward_std": 0.02961808815598488, | |
| "rewards/MCQ_Reward/mean": 0.40898391604423523, | |
| "rewards/MCQ_Reward/std": 0.09117832407355309, | |
| "step": 131, | |
| "train_speed(iter/s)": 0.09081 | |
| }, | |
| { | |
| "clip_ratio": 0.010138689540326595, | |
| "epoch": 2.64, | |
| "grad_norm": 2.565761089324951, | |
| "kl": 0.53515625, | |
| "learning_rate": 8.461860885303113e-07, | |
| "loss": 0.003048412501811981, | |
| "memory(GiB)": 18.17, | |
| "step": 132, | |
| "train_speed(iter/s)": 0.091425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 209.0, | |
| "completions/mean_length": 129.40234375, | |
| "completions/min_length": 70.0, | |
| "epoch": 2.66, | |
| "grad_norm": 2.344294786453247, | |
| "kl": 0.513671875, | |
| "learning_rate": 8.438894484078085e-07, | |
| "loss": 0.005981519352644682, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40958625078201294, | |
| "reward_std": 0.027244774624705315, | |
| "rewards/MCQ_Reward/mean": 0.40958625078201294, | |
| "rewards/MCQ_Reward/std": 0.07108591124415398, | |
| "step": 133, | |
| "train_speed(iter/s)": 0.091506 | |
| }, | |
| { | |
| "clip_ratio": 0.006955728633329272, | |
| "epoch": 2.68, | |
| "grad_norm": 2.667799949645996, | |
| "kl": 0.50390625, | |
| "learning_rate": 8.415789564684673e-07, | |
| "loss": 0.0052396636456251144, | |
| "memory(GiB)": 18.17, | |
| "step": 134, | |
| "train_speed(iter/s)": 0.092113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 193.0, | |
| "completions/mean_length": 132.30859375, | |
| "completions/min_length": 79.0, | |
| "epoch": 2.7, | |
| "grad_norm": 2.6722846031188965, | |
| "kl": 0.5029296875, | |
| "learning_rate": 8.392547057785661e-07, | |
| "loss": 0.0176947470754385, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39249348640441895, | |
| "reward_std": 0.024370728991925716, | |
| "rewards/MCQ_Reward/mean": 0.39249348640441895, | |
| "rewards/MCQ_Reward/std": 0.10880232974886894, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.092158 | |
| }, | |
| { | |
| "clip_ratio": 0.009976111352443695, | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 2.80319881439209, | |
| "kl": 0.548828125, | |
| "learning_rate": 8.369167899585839e-07, | |
| "loss": 0.01698880083858967, | |
| "memory(GiB)": 18.17, | |
| "step": 136, | |
| "train_speed(iter/s)": 0.092755 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 186.0, | |
| "completions/mean_length": 117.91015625, | |
| "completions/min_length": 53.5, | |
| "epoch": 2.74, | |
| "grad_norm": 2.5274980068206787, | |
| "kl": 0.5087890625, | |
| "learning_rate": 8.34565303179429e-07, | |
| "loss": -0.004888280760496855, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3668254613876343, | |
| "reward_std": 0.02390660159289837, | |
| "rewards/MCQ_Reward/mean": 0.3668254613876343, | |
| "rewards/MCQ_Reward/std": 0.06858384422957897, | |
| "step": 137, | |
| "train_speed(iter/s)": 0.092788 | |
| }, | |
| { | |
| "clip_ratio": 0.00792233063839376, | |
| "epoch": 2.76, | |
| "grad_norm": 2.6973214149475098, | |
| "kl": 0.513671875, | |
| "learning_rate": 8.322003401586461e-07, | |
| "loss": -0.0054510245099663734, | |
| "memory(GiB)": 18.17, | |
| "step": 138, | |
| "train_speed(iter/s)": 0.093386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.5, | |
| "completions/mean_length": 128.76953125, | |
| "completions/min_length": 74.0, | |
| "epoch": 2.7800000000000002, | |
| "grad_norm": 2.22070574760437, | |
| "kl": 0.4912109375, | |
| "learning_rate": 8.298219961566008e-07, | |
| "loss": -0.001897591631859541, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3943639397621155, | |
| "reward_std": 0.021683918312191963, | |
| "rewards/MCQ_Reward/mean": 0.3943639397621155, | |
| "rewards/MCQ_Reward/std": 0.08081439509987831, | |
| "step": 139, | |
| "train_speed(iter/s)": 0.093426 | |
| }, | |
| { | |
| "clip_ratio": 0.005092586623504758, | |
| "epoch": 2.8, | |
| "grad_norm": 2.3254384994506836, | |
| "kl": 0.5009765625, | |
| "learning_rate": 8.274303669726426e-07, | |
| "loss": -0.0023171789944171906, | |
| "memory(GiB)": 18.17, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.094018 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 236.0, | |
| "completions/mean_length": 131.94140625, | |
| "completions/min_length": 76.0, | |
| "epoch": 2.82, | |
| "grad_norm": 2.8199474811553955, | |
| "kl": 0.513671875, | |
| "learning_rate": 8.250255489412462e-07, | |
| "loss": 0.03072257712483406, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4145784378051758, | |
| "reward_std": 0.026746340095996857, | |
| "rewards/MCQ_Reward/mean": 0.4145784378051758, | |
| "rewards/MCQ_Reward/std": 0.1253884807229042, | |
| "step": 141, | |
| "train_speed(iter/s)": 0.093563 | |
| }, | |
| { | |
| "clip_ratio": 0.01698949094861746, | |
| "epoch": 2.84, | |
| "grad_norm": 3.6371665000915527, | |
| "kl": 0.5654296875, | |
| "learning_rate": 8.226076389281314e-07, | |
| "loss": 0.030751001089811325, | |
| "memory(GiB)": 18.17, | |
| "step": 142, | |
| "train_speed(iter/s)": 0.094156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 222.0, | |
| "completions/mean_length": 122.05859375, | |
| "completions/min_length": 41.0, | |
| "epoch": 2.86, | |
| "grad_norm": 3.697355031967163, | |
| "kl": 0.529296875, | |
| "learning_rate": 8.201767343263611e-07, | |
| "loss": 0.001254035159945488, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4235128164291382, | |
| "reward_std": 0.02945070993155241, | |
| "rewards/MCQ_Reward/mean": 0.4235128164291382, | |
| "rewards/MCQ_Reward/std": 0.0826257448643446, | |
| "step": 143, | |
| "train_speed(iter/s)": 0.094158 | |
| }, | |
| { | |
| "clip_ratio": 0.010704205837100744, | |
| "epoch": 2.88, | |
| "grad_norm": 2.6047918796539307, | |
| "kl": 0.556640625, | |
| "learning_rate": 8.177329330524181e-07, | |
| "loss": 0.0003689592704176903, | |
| "memory(GiB)": 18.17, | |
| "step": 144, | |
| "train_speed(iter/s)": 0.09474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 314.5, | |
| "completions/mean_length": 147.65234375, | |
| "completions/min_length": 84.0, | |
| "epoch": 2.9, | |
| "grad_norm": 2.0444202423095703, | |
| "kl": 0.4521484375, | |
| "learning_rate": 8.152763335422612e-07, | |
| "loss": 0.009064443409442902, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38259103894233704, | |
| "reward_std": 0.023838728666305542, | |
| "rewards/MCQ_Reward/mean": 0.38259103894233704, | |
| "rewards/MCQ_Reward/std": 0.0847747940570116, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.09459 | |
| }, | |
| { | |
| "clip_ratio": 0.013846603687852621, | |
| "epoch": 2.92, | |
| "grad_norm": 3.0148403644561768, | |
| "kl": 0.47265625, | |
| "learning_rate": 8.128070347473608e-07, | |
| "loss": 0.008937995880842209, | |
| "memory(GiB)": 18.17, | |
| "step": 146, | |
| "train_speed(iter/s)": 0.095167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.0, | |
| "completions/mean_length": 131.4609375, | |
| "completions/min_length": 58.5, | |
| "epoch": 2.94, | |
| "grad_norm": 2.3035802841186523, | |
| "kl": 0.515625, | |
| "learning_rate": 8.103251361307118e-07, | |
| "loss": -0.003920593298971653, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.46591490507125854, | |
| "reward_std": 0.02803555503487587, | |
| "rewards/MCQ_Reward/mean": 0.46591490507125854, | |
| "rewards/MCQ_Reward/std": 0.08151933178305626, | |
| "step": 147, | |
| "train_speed(iter/s)": 0.095144 | |
| }, | |
| { | |
| "clip_ratio": 0.008604592643678188, | |
| "epoch": 2.96, | |
| "grad_norm": 3.269644021987915, | |
| "kl": 0.498046875, | |
| "learning_rate": 8.07830737662829e-07, | |
| "loss": -0.004623805172741413, | |
| "memory(GiB)": 18.17, | |
| "step": 148, | |
| "train_speed(iter/s)": 0.095712 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 229.0, | |
| "completions/mean_length": 115.5859375, | |
| "completions/min_length": 47.5, | |
| "epoch": 2.98, | |
| "grad_norm": 2.762554883956909, | |
| "kl": 0.55859375, | |
| "learning_rate": 8.053239398177191e-07, | |
| "loss": -0.002270375844091177, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40475866198539734, | |
| "reward_std": 0.02323055360466242, | |
| "rewards/MCQ_Reward/mean": 0.40475866198539734, | |
| "rewards/MCQ_Reward/std": 0.11423858627676964, | |
| "step": 149, | |
| "train_speed(iter/s)": 0.095646 | |
| }, | |
| { | |
| "clip_ratio": 0.005962205119431019, | |
| "epoch": 3.0, | |
| "grad_norm": 2.495875358581543, | |
| "kl": 0.5625, | |
| "learning_rate": 8.028048435688333e-07, | |
| "loss": -0.0031687067821621895, | |
| "memory(GiB)": 18.17, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.0962 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 167.5, | |
| "completions/mean_length": 117.21484375, | |
| "completions/min_length": 58.0, | |
| "epoch": 3.02, | |
| "grad_norm": 3.30179762840271, | |
| "kl": 0.572265625, | |
| "learning_rate": 8.002735503850015e-07, | |
| "loss": -0.0032917922362685204, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39226125180721283, | |
| "reward_std": 0.025511370040476322, | |
| "rewards/MCQ_Reward/mean": 0.39226125180721283, | |
| "rewards/MCQ_Reward/std": 0.08468513377010822, | |
| "step": 151, | |
| "train_speed(iter/s)": 0.095897 | |
| }, | |
| { | |
| "clip_ratio": 0.007298078387975693, | |
| "epoch": 3.04, | |
| "grad_norm": 2.3152873516082764, | |
| "kl": 0.56640625, | |
| "learning_rate": 7.97730162226344e-07, | |
| "loss": -0.004036391619592905, | |
| "memory(GiB)": 18.17, | |
| "step": 152, | |
| "train_speed(iter/s)": 0.096461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 200.0, | |
| "completions/mean_length": 121.8984375, | |
| "completions/min_length": 63.5, | |
| "epoch": 3.06, | |
| "grad_norm": 2.2318758964538574, | |
| "kl": 0.51171875, | |
| "learning_rate": 7.951747815401649e-07, | |
| "loss": 0.008308425545692444, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.425733745098114, | |
| "reward_std": 0.02289827074855566, | |
| "rewards/MCQ_Reward/mean": 0.425733745098114, | |
| "rewards/MCQ_Reward/std": 0.12863966077566147, | |
| "step": 153, | |
| "train_speed(iter/s)": 0.096546 | |
| }, | |
| { | |
| "clip_ratio": 0.009599440731108189, | |
| "epoch": 3.08, | |
| "grad_norm": 3.2350826263427734, | |
| "kl": 0.5009765625, | |
| "learning_rate": 7.926075112568258e-07, | |
| "loss": 0.00774328364059329, | |
| "memory(GiB)": 18.17, | |
| "step": 154, | |
| "train_speed(iter/s)": 0.0971 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 315.5, | |
| "completions/mean_length": 129.765625, | |
| "completions/min_length": 63.5, | |
| "epoch": 3.1, | |
| "grad_norm": 2.8958089351654053, | |
| "kl": 0.5146484375, | |
| "learning_rate": 7.900284547855991e-07, | |
| "loss": 0.005472003482282162, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3814770430326462, | |
| "reward_std": 0.021100854501128197, | |
| "rewards/MCQ_Reward/mean": 0.3814770430326462, | |
| "rewards/MCQ_Reward/std": 0.08354593068361282, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.096733 | |
| }, | |
| { | |
| "clip_ratio": 0.008797692600637674, | |
| "epoch": 3.12, | |
| "grad_norm": 2.330720901489258, | |
| "kl": 0.5107421875, | |
| "learning_rate": 7.874377160105036e-07, | |
| "loss": 0.00483354227617383, | |
| "memory(GiB)": 18.17, | |
| "step": 156, | |
| "train_speed(iter/s)": 0.097282 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 208.0, | |
| "completions/mean_length": 123.1640625, | |
| "completions/min_length": 68.0, | |
| "epoch": 3.14, | |
| "grad_norm": 2.1395411491394043, | |
| "kl": 0.515625, | |
| "learning_rate": 7.848353992861194e-07, | |
| "loss": 0.009709931910037994, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4426523745059967, | |
| "reward_std": 0.024569914676249027, | |
| "rewards/MCQ_Reward/mean": 0.4426523745059967, | |
| "rewards/MCQ_Reward/std": 0.10452848672866821, | |
| "step": 157, | |
| "train_speed(iter/s)": 0.097277 | |
| }, | |
| { | |
| "clip_ratio": 0.008177514653652906, | |
| "epoch": 3.16, | |
| "grad_norm": 2.8377902507781982, | |
| "kl": 0.49609375, | |
| "learning_rate": 7.822216094333847e-07, | |
| "loss": 0.00888834334909916, | |
| "memory(GiB)": 18.17, | |
| "step": 158, | |
| "train_speed(iter/s)": 0.097824 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 192.5, | |
| "completions/mean_length": 121.08203125, | |
| "completions/min_length": 59.0, | |
| "epoch": 3.18, | |
| "grad_norm": 2.439819574356079, | |
| "kl": 0.5009765625, | |
| "learning_rate": 7.795964517353733e-07, | |
| "loss": -0.005721232853829861, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4260745346546173, | |
| "reward_std": 0.024243751540780067, | |
| "rewards/MCQ_Reward/mean": 0.4260745346546173, | |
| "rewards/MCQ_Reward/std": 0.08284034207463264, | |
| "step": 159, | |
| "train_speed(iter/s)": 0.09781 | |
| }, | |
| { | |
| "clip_ratio": 0.006790396990254521, | |
| "epoch": 3.2, | |
| "grad_norm": 1.9817484617233276, | |
| "kl": 0.4970703125, | |
| "learning_rate": 7.769600319330552e-07, | |
| "loss": -0.006797813344746828, | |
| "memory(GiB)": 18.17, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.098355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 200.5, | |
| "completions/mean_length": 112.234375, | |
| "completions/min_length": 54.0, | |
| "epoch": 3.22, | |
| "grad_norm": 2.4277918338775635, | |
| "kl": 0.60546875, | |
| "learning_rate": 7.743124562210351e-07, | |
| "loss": 0.011250641196966171, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4286917597055435, | |
| "reward_std": 0.023968255147337914, | |
| "rewards/MCQ_Reward/mean": 0.4286917597055435, | |
| "rewards/MCQ_Reward/std": 0.08755803853273392, | |
| "step": 161, | |
| "train_speed(iter/s)": 0.097905 | |
| }, | |
| { | |
| "clip_ratio": 0.008228898979723454, | |
| "epoch": 3.24, | |
| "grad_norm": 2.4396235942840576, | |
| "kl": 0.63671875, | |
| "learning_rate": 7.716538312432765e-07, | |
| "loss": 0.009992354549467564, | |
| "memory(GiB)": 18.17, | |
| "step": 162, | |
| "train_speed(iter/s)": 0.098438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 182.0, | |
| "completions/mean_length": 128.484375, | |
| "completions/min_length": 65.5, | |
| "epoch": 3.26, | |
| "grad_norm": 2.378303289413452, | |
| "kl": 0.4560546875, | |
| "learning_rate": 7.689842640888063e-07, | |
| "loss": 0.014578643254935741, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4368235617876053, | |
| "reward_std": 0.024292019195854664, | |
| "rewards/MCQ_Reward/mean": 0.4368235617876053, | |
| "rewards/MCQ_Reward/std": 0.10128979757428169, | |
| "step": 163, | |
| "train_speed(iter/s)": 0.098485 | |
| }, | |
| { | |
| "clip_ratio": 0.006144619081169367, | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 2.336179733276367, | |
| "kl": 0.455078125, | |
| "learning_rate": 7.663038622873999e-07, | |
| "loss": 0.014264167286455631, | |
| "memory(GiB)": 18.17, | |
| "step": 164, | |
| "train_speed(iter/s)": 0.09902 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 190.0, | |
| "completions/mean_length": 127.5546875, | |
| "completions/min_length": 68.0, | |
| "epoch": 3.3, | |
| "grad_norm": 2.3888978958129883, | |
| "kl": 0.51953125, | |
| "learning_rate": 7.636127338052511e-07, | |
| "loss": 0.0008876635693013668, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3655773550271988, | |
| "reward_std": 0.023151511326432228, | |
| "rewards/MCQ_Reward/mean": 0.3655773550271988, | |
| "rewards/MCQ_Reward/std": 0.08209535107016563, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.099067 | |
| }, | |
| { | |
| "clip_ratio": 0.009708862751722336, | |
| "epoch": 3.32, | |
| "grad_norm": 2.849376678466797, | |
| "kl": 0.53515625, | |
| "learning_rate": 7.60910987040623e-07, | |
| "loss": 0.0005215085111558437, | |
| "memory(GiB)": 18.17, | |
| "step": 166, | |
| "train_speed(iter/s)": 0.099591 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 183.0, | |
| "completions/mean_length": 114.09375, | |
| "completions/min_length": 68.5, | |
| "epoch": 3.34, | |
| "grad_norm": 2.3568837642669678, | |
| "kl": 0.568359375, | |
| "learning_rate": 7.581987308194809e-07, | |
| "loss": 0.009412365034222603, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38831935822963715, | |
| "reward_std": 0.024401471950113773, | |
| "rewards/MCQ_Reward/mean": 0.38831935822963715, | |
| "rewards/MCQ_Reward/std": 0.07682501710951328, | |
| "step": 167, | |
| "train_speed(iter/s)": 0.099643 | |
| }, | |
| { | |
| "clip_ratio": 0.009874043520539999, | |
| "epoch": 3.36, | |
| "grad_norm": 4.141200542449951, | |
| "kl": 0.548828125, | |
| "learning_rate": 7.554760743911103e-07, | |
| "loss": 0.008638818748295307, | |
| "memory(GiB)": 18.17, | |
| "step": 168, | |
| "train_speed(iter/s)": 0.100139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 177.5, | |
| "completions/mean_length": 116.015625, | |
| "completions/min_length": 68.0, | |
| "epoch": 3.38, | |
| "grad_norm": 2.3995447158813477, | |
| "kl": 0.5390625, | |
| "learning_rate": 7.527431274237149e-07, | |
| "loss": 0.009148918092250824, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43169474601745605, | |
| "reward_std": 0.023636899888515472, | |
| "rewards/MCQ_Reward/mean": 0.43169474601745605, | |
| "rewards/MCQ_Reward/std": 0.08781928941607475, | |
| "step": 169, | |
| "train_speed(iter/s)": 0.100207 | |
| }, | |
| { | |
| "clip_ratio": 0.011634313501417637, | |
| "epoch": 3.4, | |
| "grad_norm": 3.3103132247924805, | |
| "kl": 0.580078125, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.008654891513288021, | |
| "memory(GiB)": 18.17, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.100725 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 198.5, | |
| "completions/mean_length": 116.5859375, | |
| "completions/min_length": 61.0, | |
| "epoch": 3.42, | |
| "grad_norm": 2.4376144409179688, | |
| "kl": 0.51171875, | |
| "learning_rate": 7.472468026127384e-07, | |
| "loss": 0.0037187309935688972, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4193449318408966, | |
| "reward_std": 0.024272997863590717, | |
| "rewards/MCQ_Reward/mean": 0.4193449318408966, | |
| "rewards/MCQ_Reward/std": 0.08024471625685692, | |
| "step": 171, | |
| "train_speed(iter/s)": 0.100337 | |
| }, | |
| { | |
| "clip_ratio": 0.004286584910005331, | |
| "epoch": 3.44, | |
| "grad_norm": 2.298527479171753, | |
| "kl": 0.501953125, | |
| "learning_rate": 7.444836461603194e-07, | |
| "loss": 0.0035052020102739334, | |
| "memory(GiB)": 18.17, | |
| "step": 172, | |
| "train_speed(iter/s)": 0.10083 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 192.0, | |
| "completions/mean_length": 107.78515625, | |
| "completions/min_length": 54.0, | |
| "epoch": 3.46, | |
| "grad_norm": 2.706815004348755, | |
| "kl": 0.572265625, | |
| "learning_rate": 7.417106419422818e-07, | |
| "loss": 0.001836567185819149, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4373796284198761, | |
| "reward_std": 0.024632513523101807, | |
| "rewards/MCQ_Reward/mean": 0.4373796284198761, | |
| "rewards/MCQ_Reward/std": 0.10328296199440956, | |
| "step": 173, | |
| "train_speed(iter/s)": 0.100842 | |
| }, | |
| { | |
| "clip_ratio": 0.00837572431191802, | |
| "epoch": 3.48, | |
| "grad_norm": 2.7765517234802246, | |
| "kl": 0.55859375, | |
| "learning_rate": 7.389279016548316e-07, | |
| "loss": 0.0008762972429394722, | |
| "memory(GiB)": 18.17, | |
| "step": 174, | |
| "train_speed(iter/s)": 0.10133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 235.5, | |
| "completions/mean_length": 141.59375, | |
| "completions/min_length": 93.0, | |
| "epoch": 3.5, | |
| "grad_norm": 2.0208756923675537, | |
| "kl": 0.494140625, | |
| "learning_rate": 7.361355373863413e-07, | |
| "loss": -0.0017252122052013874, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4430805742740631, | |
| "reward_std": 0.023134860210120678, | |
| "rewards/MCQ_Reward/mean": 0.4430805742740631, | |
| "rewards/MCQ_Reward/std": 0.10230642557144165, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.101269 | |
| }, | |
| { | |
| "clip_ratio": 0.008417821954935789, | |
| "epoch": 3.52, | |
| "grad_norm": 2.5541892051696777, | |
| "kl": 0.498046875, | |
| "learning_rate": 7.333336616128369e-07, | |
| "loss": -0.0020766020752489567, | |
| "memory(GiB)": 18.17, | |
| "step": 176, | |
| "train_speed(iter/s)": 0.101776 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 237.5, | |
| "completions/mean_length": 140.26953125, | |
| "completions/min_length": 61.5, | |
| "epoch": 3.54, | |
| "grad_norm": 2.090574264526367, | |
| "kl": 0.455078125, | |
| "learning_rate": 7.305223871934656e-07, | |
| "loss": -0.004062575753778219, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4077337831258774, | |
| "reward_std": 0.021388554014265537, | |
| "rewards/MCQ_Reward/mean": 0.4077337831258774, | |
| "rewards/MCQ_Reward/std": 0.1092216707766056, | |
| "step": 177, | |
| "train_speed(iter/s)": 0.101717 | |
| }, | |
| { | |
| "clip_ratio": 0.009097482077777386, | |
| "epoch": 3.56, | |
| "grad_norm": 2.031277894973755, | |
| "kl": 0.4638671875, | |
| "learning_rate": 7.277018273659516e-07, | |
| "loss": -0.005147318355739117, | |
| "memory(GiB)": 18.17, | |
| "step": 178, | |
| "train_speed(iter/s)": 0.102192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.5, | |
| "completions/mean_length": 103.77734375, | |
| "completions/min_length": 56.0, | |
| "epoch": 3.58, | |
| "grad_norm": 2.28383731842041, | |
| "kl": 0.55078125, | |
| "learning_rate": 7.248720957420329e-07, | |
| "loss": 0.0054731229320168495, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.37708504498004913, | |
| "reward_std": 0.022474835626780987, | |
| "rewards/MCQ_Reward/mean": 0.37708504498004913, | |
| "rewards/MCQ_Reward/std": 0.10817139223217964, | |
| "step": 179, | |
| "train_speed(iter/s)": 0.102207 | |
| }, | |
| { | |
| "clip_ratio": 0.005004609236493707, | |
| "epoch": 3.6, | |
| "grad_norm": 2.2720046043395996, | |
| "kl": 0.552734375, | |
| "learning_rate": 7.220333063028871e-07, | |
| "loss": 0.004853987134993076, | |
| "memory(GiB)": 18.17, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.10258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 267.0, | |
| "completions/mean_length": 135.375, | |
| "completions/min_length": 64.5, | |
| "epoch": 3.62, | |
| "grad_norm": 2.0278213024139404, | |
| "kl": 0.537109375, | |
| "learning_rate": 7.191855733945386e-07, | |
| "loss": 0.007204895373433828, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.37996095418930054, | |
| "reward_std": 0.024972867220640182, | |
| "rewards/MCQ_Reward/mean": 0.37996095418930054, | |
| "rewards/MCQ_Reward/std": 0.06211347132921219, | |
| "step": 181, | |
| "train_speed(iter/s)": 0.102022 | |
| }, | |
| { | |
| "clip_ratio": 0.0050066676922142506, | |
| "epoch": 3.64, | |
| "grad_norm": 2.026421308517456, | |
| "kl": 0.54296875, | |
| "learning_rate": 7.163290117232541e-07, | |
| "loss": 0.006550833582878113, | |
| "memory(GiB)": 18.17, | |
| "step": 182, | |
| "train_speed(iter/s)": 0.102515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 231.0, | |
| "completions/mean_length": 132.80078125, | |
| "completions/min_length": 70.0, | |
| "epoch": 3.66, | |
| "grad_norm": 2.322474479675293, | |
| "kl": 0.4560546875, | |
| "learning_rate": 7.134637363509209e-07, | |
| "loss": 0.00408747885376215, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42590010166168213, | |
| "reward_std": 0.02117757499217987, | |
| "rewards/MCQ_Reward/mean": 0.42590010166168213, | |
| "rewards/MCQ_Reward/std": 0.10450495779514313, | |
| "step": 183, | |
| "train_speed(iter/s)": 0.102439 | |
| }, | |
| { | |
| "clip_ratio": 0.005717001855373383, | |
| "epoch": 3.68, | |
| "grad_norm": 2.0725347995758057, | |
| "kl": 0.4501953125, | |
| "learning_rate": 7.105898626904134e-07, | |
| "loss": 0.003590245731174946, | |
| "memory(GiB)": 18.17, | |
| "step": 184, | |
| "train_speed(iter/s)": 0.10291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 185.0, | |
| "completions/mean_length": 107.73828125, | |
| "completions/min_length": 67.5, | |
| "epoch": 3.7, | |
| "grad_norm": 2.94624662399292, | |
| "kl": 0.578125, | |
| "learning_rate": 7.077075065009433e-07, | |
| "loss": -0.0015533820260316133, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4082287549972534, | |
| "reward_std": 0.023994137533009052, | |
| "rewards/MCQ_Reward/mean": 0.4082287549972534, | |
| "rewards/MCQ_Reward/std": 0.09996674209833145, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.102951 | |
| }, | |
| { | |
| "clip_ratio": 0.006125608924776316, | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 2.3971669673919678, | |
| "kl": 0.572265625, | |
| "learning_rate": 7.048167838833976e-07, | |
| "loss": -0.0021633533760905266, | |
| "memory(GiB)": 18.17, | |
| "step": 186, | |
| "train_speed(iter/s)": 0.103425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 227.5, | |
| "completions/mean_length": 131.453125, | |
| "completions/min_length": 59.0, | |
| "epoch": 3.74, | |
| "grad_norm": 2.0767407417297363, | |
| "kl": 0.513671875, | |
| "learning_rate": 7.019178112756625e-07, | |
| "loss": 0.005040531512349844, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43931877613067627, | |
| "reward_std": 0.02542781364172697, | |
| "rewards/MCQ_Reward/mean": 0.43931877613067627, | |
| "rewards/MCQ_Reward/std": 0.0755577739328146, | |
| "step": 187, | |
| "train_speed(iter/s)": 0.103367 | |
| }, | |
| { | |
| "clip_ratio": 0.007456609280779958, | |
| "epoch": 3.76, | |
| "grad_norm": 2.0555458068847656, | |
| "kl": 0.513671875, | |
| "learning_rate": 6.990107054479312e-07, | |
| "loss": 0.004873338155448437, | |
| "memory(GiB)": 18.17, | |
| "step": 188, | |
| "train_speed(iter/s)": 0.103852 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 187.0, | |
| "completions/mean_length": 120.015625, | |
| "completions/min_length": 56.0, | |
| "epoch": 3.7800000000000002, | |
| "grad_norm": 2.1511483192443848, | |
| "kl": 0.546875, | |
| "learning_rate": 6.960955834980027e-07, | |
| "loss": -0.007258214056491852, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3652060180902481, | |
| "reward_std": 0.023877170868217945, | |
| "rewards/MCQ_Reward/mean": 0.3652060180902481, | |
| "rewards/MCQ_Reward/std": 0.09329301491379738, | |
| "step": 189, | |
| "train_speed(iter/s)": 0.103851 | |
| }, | |
| { | |
| "clip_ratio": 0.006274498999118805, | |
| "epoch": 3.8, | |
| "grad_norm": 2.204212188720703, | |
| "kl": 0.5546875, | |
| "learning_rate": 6.931725628465642e-07, | |
| "loss": -0.0077828834764659405, | |
| "memory(GiB)": 18.17, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.104325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 179.5, | |
| "completions/mean_length": 119.02734375, | |
| "completions/min_length": 68.0, | |
| "epoch": 3.82, | |
| "grad_norm": 2.489328384399414, | |
| "kl": 0.5625, | |
| "learning_rate": 6.902417612324615e-07, | |
| "loss": -0.004156440030783415, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41069237887859344, | |
| "reward_std": 0.02522939257323742, | |
| "rewards/MCQ_Reward/mean": 0.41069237887859344, | |
| "rewards/MCQ_Reward/std": 0.10438777878880501, | |
| "step": 191, | |
| "train_speed(iter/s)": 0.103961 | |
| }, | |
| { | |
| "clip_ratio": 0.006902764085680246, | |
| "epoch": 3.84, | |
| "grad_norm": 2.573939085006714, | |
| "kl": 0.53125, | |
| "learning_rate": 6.87303296707956e-07, | |
| "loss": -0.004263042006641626, | |
| "memory(GiB)": 18.17, | |
| "step": 192, | |
| "train_speed(iter/s)": 0.104434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 199.0, | |
| "completions/mean_length": 119.2109375, | |
| "completions/min_length": 63.5, | |
| "epoch": 3.86, | |
| "grad_norm": 2.4605846405029297, | |
| "kl": 0.537109375, | |
| "learning_rate": 6.843572876339704e-07, | |
| "loss": -0.006107931490987539, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41506680846214294, | |
| "reward_std": 0.025901762768626213, | |
| "rewards/MCQ_Reward/mean": 0.41506680846214294, | |
| "rewards/MCQ_Reward/std": 0.11812347918748856, | |
| "step": 193, | |
| "train_speed(iter/s)": 0.104435 | |
| }, | |
| { | |
| "clip_ratio": 0.006947604939341545, | |
| "epoch": 3.88, | |
| "grad_norm": 2.9201459884643555, | |
| "kl": 0.533203125, | |
| "learning_rate": 6.814038526753204e-07, | |
| "loss": -0.006667410954833031, | |
| "memory(GiB)": 18.17, | |
| "step": 194, | |
| "train_speed(iter/s)": 0.104911 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 204.0, | |
| "completions/mean_length": 123.375, | |
| "completions/min_length": 58.5, | |
| "epoch": 3.9, | |
| "grad_norm": 2.481006145477295, | |
| "kl": 0.638671875, | |
| "learning_rate": 6.784431107959358e-07, | |
| "loss": -0.00256272591650486, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4147709757089615, | |
| "reward_std": 0.023487260565161705, | |
| "rewards/MCQ_Reward/mean": 0.4147709757089615, | |
| "rewards/MCQ_Reward/std": 0.08765164762735367, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.104938 | |
| }, | |
| { | |
| "clip_ratio": 0.00836537522263825, | |
| "epoch": 3.92, | |
| "grad_norm": 2.211996078491211, | |
| "kl": 0.62109375, | |
| "learning_rate": 6.754751812540679e-07, | |
| "loss": -0.0026485356502234936, | |
| "memory(GiB)": 18.17, | |
| "step": 196, | |
| "train_speed(iter/s)": 0.105375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 161.0, | |
| "completions/mean_length": 113.68359375, | |
| "completions/min_length": 58.0, | |
| "epoch": 3.94, | |
| "grad_norm": 2.5469682216644287, | |
| "kl": 0.556640625, | |
| "learning_rate": 6.725001835974852e-07, | |
| "loss": -0.005141774192452431, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39422211050987244, | |
| "reward_std": 0.022977779619395733, | |
| "rewards/MCQ_Reward/mean": 0.39422211050987244, | |
| "rewards/MCQ_Reward/std": 0.09659452736377716, | |
| "step": 197, | |
| "train_speed(iter/s)": 0.105428 | |
| }, | |
| { | |
| "clip_ratio": 0.007515270030125976, | |
| "epoch": 3.96, | |
| "grad_norm": 2.603193998336792, | |
| "kl": 0.57421875, | |
| "learning_rate": 6.695182376586602e-07, | |
| "loss": -0.00558980368077755, | |
| "memory(GiB)": 18.17, | |
| "step": 198, | |
| "train_speed(iter/s)": 0.105897 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 251.0, | |
| "completions/mean_length": 124.140625, | |
| "completions/min_length": 66.5, | |
| "epoch": 3.98, | |
| "grad_norm": 2.8109734058380127, | |
| "kl": 0.5703125, | |
| "learning_rate": 6.665294635499403e-07, | |
| "loss": -0.008472483605146408, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3954710364341736, | |
| "reward_std": 0.026893282309174538, | |
| "rewards/MCQ_Reward/mean": 0.3954710364341736, | |
| "rewards/MCQ_Reward/std": 0.07466300576925278, | |
| "step": 199, | |
| "train_speed(iter/s)": 0.10569 | |
| }, | |
| { | |
| "clip_ratio": 0.007555491756647825, | |
| "epoch": 4.0, | |
| "grad_norm": 3.981370687484741, | |
| "kl": 0.5625, | |
| "learning_rate": 6.635339816587108e-07, | |
| "loss": -0.008467345498502254, | |
| "memory(GiB)": 18.17, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.106122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 189.0, | |
| "completions/mean_length": 114.1640625, | |
| "completions/min_length": 67.0, | |
| "epoch": 4.02, | |
| "grad_norm": 3.464586019515991, | |
| "kl": 1.001953125, | |
| "learning_rate": 6.605319126425453e-07, | |
| "loss": 0.010952511802315712, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4330308884382248, | |
| "reward_std": 0.022406785748898983, | |
| "rewards/MCQ_Reward/mean": 0.4330308884382248, | |
| "rewards/MCQ_Reward/std": 0.09031685814261436, | |
| "step": 201, | |
| "train_speed(iter/s)": 0.10573 | |
| }, | |
| { | |
| "clip_ratio": 0.010695958975702524, | |
| "epoch": 4.04, | |
| "grad_norm": 3.2848002910614014, | |
| "kl": 1.3125, | |
| "learning_rate": 6.575233774243464e-07, | |
| "loss": 0.010859224945306778, | |
| "memory(GiB)": 18.17, | |
| "step": 202, | |
| "train_speed(iter/s)": 0.106187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 173.5, | |
| "completions/mean_length": 115.0625, | |
| "completions/min_length": 64.5, | |
| "epoch": 4.06, | |
| "grad_norm": 2.5354137420654297, | |
| "kl": 0.521484375, | |
| "learning_rate": 6.545084971874736e-07, | |
| "loss": 0.008116345852613449, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4043910503387451, | |
| "reward_std": 0.023216267116367817, | |
| "rewards/MCQ_Reward/mean": 0.4043910503387451, | |
| "rewards/MCQ_Reward/std": 0.09529644250869751, | |
| "step": 203, | |
| "train_speed(iter/s)": 0.106255 | |
| }, | |
| { | |
| "clip_ratio": 0.005409660283476114, | |
| "epoch": 4.08, | |
| "grad_norm": 2.4091176986694336, | |
| "kl": 0.52734375, | |
| "learning_rate": 6.514873933708637e-07, | |
| "loss": 0.007959958165884018, | |
| "memory(GiB)": 18.17, | |
| "step": 204, | |
| "train_speed(iter/s)": 0.10667 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 191.5, | |
| "completions/mean_length": 103.03515625, | |
| "completions/min_length": 53.0, | |
| "epoch": 4.1, | |
| "grad_norm": 2.983665704727173, | |
| "kl": 0.62109375, | |
| "learning_rate": 6.484601876641375e-07, | |
| "loss": -0.014035141095519066, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4240594506263733, | |
| "reward_std": 0.025937434285879135, | |
| "rewards/MCQ_Reward/mean": 0.4240594506263733, | |
| "rewards/MCQ_Reward/std": 0.07473786175251007, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.106723 | |
| }, | |
| { | |
| "clip_ratio": 0.018164899200201035, | |
| "epoch": 4.12, | |
| "grad_norm": 6.4920454025268555, | |
| "kl": 0.5859375, | |
| "learning_rate": 6.454270020026995e-07, | |
| "loss": -0.013708272948861122, | |
| "memory(GiB)": 18.17, | |
| "step": 206, | |
| "train_speed(iter/s)": 0.107162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 242.0, | |
| "completions/mean_length": 129.375, | |
| "completions/min_length": 58.5, | |
| "epoch": 4.14, | |
| "grad_norm": 2.714660882949829, | |
| "kl": 0.5625, | |
| "learning_rate": 6.423879585628261e-07, | |
| "loss": -0.014167927205562592, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.396339014172554, | |
| "reward_std": 0.02192540653049946, | |
| "rewards/MCQ_Reward/mean": 0.396339014172554, | |
| "rewards/MCQ_Reward/std": 0.11277944594621658, | |
| "step": 207, | |
| "train_speed(iter/s)": 0.106875 | |
| }, | |
| { | |
| "clip_ratio": 0.007178165018558502, | |
| "epoch": 4.16, | |
| "grad_norm": 2.4650375843048096, | |
| "kl": 0.560546875, | |
| "learning_rate": 6.393431797567439e-07, | |
| "loss": -0.014689125120639801, | |
| "memory(GiB)": 18.17, | |
| "step": 208, | |
| "train_speed(iter/s)": 0.107325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 255.0, | |
| "completions/mean_length": 131.01953125, | |
| "completions/min_length": 64.5, | |
| "epoch": 4.18, | |
| "grad_norm": 2.1339519023895264, | |
| "kl": 0.58203125, | |
| "learning_rate": 6.362927882276989e-07, | |
| "loss": -0.017007270827889442, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42686355113983154, | |
| "reward_std": 0.023915644735097885, | |
| "rewards/MCQ_Reward/mean": 0.42686355113983154, | |
| "rewards/MCQ_Reward/std": 0.10529575496912003, | |
| "step": 209, | |
| "train_speed(iter/s)": 0.107141 | |
| }, | |
| { | |
| "clip_ratio": 0.005084275268018246, | |
| "epoch": 4.2, | |
| "grad_norm": 2.0464680194854736, | |
| "kl": 0.59375, | |
| "learning_rate": 6.332369068450174e-07, | |
| "loss": -0.0175747312605381, | |
| "memory(GiB)": 18.17, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.107586 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 197.5, | |
| "completions/mean_length": 116.63671875, | |
| "completions/min_length": 61.5, | |
| "epoch": 4.22, | |
| "grad_norm": 2.4869492053985596, | |
| "kl": 0.544921875, | |
| "learning_rate": 6.30175658699156e-07, | |
| "loss": -0.0016960185021162033, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43242450058460236, | |
| "reward_std": 0.02396441251039505, | |
| "rewards/MCQ_Reward/mean": 0.43242450058460236, | |
| "rewards/MCQ_Reward/std": 0.07406600937247276, | |
| "step": 211, | |
| "train_speed(iter/s)": 0.107182 | |
| }, | |
| { | |
| "clip_ratio": 0.006936221849173307, | |
| "epoch": 4.24, | |
| "grad_norm": 2.2954320907592773, | |
| "kl": 0.5390625, | |
| "learning_rate": 6.271091670967436e-07, | |
| "loss": -0.001955235842615366, | |
| "memory(GiB)": 18.17, | |
| "step": 212, | |
| "train_speed(iter/s)": 0.10762 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.5, | |
| "completions/mean_length": 132.296875, | |
| "completions/min_length": 90.0, | |
| "epoch": 4.26, | |
| "grad_norm": 2.5567421913146973, | |
| "kl": 0.548828125, | |
| "learning_rate": 6.240375555556145e-07, | |
| "loss": -0.010683618485927582, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3712979108095169, | |
| "reward_std": 0.022392110899090767, | |
| "rewards/MCQ_Reward/mean": 0.3712979108095169, | |
| "rewards/MCQ_Reward/std": 0.0758376233279705, | |
| "step": 213, | |
| "train_speed(iter/s)": 0.107578 | |
| }, | |
| { | |
| "clip_ratio": 0.01051389379426837, | |
| "epoch": 4.28, | |
| "grad_norm": 3.9029605388641357, | |
| "kl": 0.529296875, | |
| "learning_rate": 6.209609477998338e-07, | |
| "loss": -0.010750237852334976, | |
| "memory(GiB)": 18.17, | |
| "step": 214, | |
| "train_speed(iter/s)": 0.108018 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 181.5, | |
| "completions/mean_length": 117.71875, | |
| "completions/min_length": 60.5, | |
| "epoch": 4.3, | |
| "grad_norm": 2.3913040161132812, | |
| "kl": 0.6015625, | |
| "learning_rate": 6.178794677547137e-07, | |
| "loss": -0.012967615388333797, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3914954960346222, | |
| "reward_std": 0.021691203117370605, | |
| "rewards/MCQ_Reward/mean": 0.3914954960346222, | |
| "rewards/MCQ_Reward/std": 0.10047328472137451, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.108034 | |
| }, | |
| { | |
| "clip_ratio": 0.005430733785033226, | |
| "epoch": 4.32, | |
| "grad_norm": 2.3732998371124268, | |
| "kl": 0.61328125, | |
| "learning_rate": 6.147932395418205e-07, | |
| "loss": -0.013309886679053307, | |
| "memory(GiB)": 18.17, | |
| "step": 216, | |
| "train_speed(iter/s)": 0.108474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 209.5, | |
| "completions/mean_length": 123.4765625, | |
| "completions/min_length": 65.0, | |
| "epoch": 4.34, | |
| "grad_norm": 2.7147343158721924, | |
| "kl": 0.552734375, | |
| "learning_rate": 6.117023874739771e-07, | |
| "loss": -0.0006074332632124424, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4220256060361862, | |
| "reward_std": 0.0257421238347888, | |
| "rewards/MCQ_Reward/mean": 0.4220256060361862, | |
| "rewards/MCQ_Reward/std": 0.12063978612422943, | |
| "step": 217, | |
| "train_speed(iter/s)": 0.10841 | |
| }, | |
| { | |
| "clip_ratio": 0.006779439281672239, | |
| "epoch": 4.36, | |
| "grad_norm": 2.3169238567352295, | |
| "kl": 0.544921875, | |
| "learning_rate": 6.086070360502539e-07, | |
| "loss": -0.0006955214776098728, | |
| "memory(GiB)": 18.17, | |
| "step": 218, | |
| "train_speed(iter/s)": 0.108822 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 203.5, | |
| "completions/mean_length": 116.0625, | |
| "completions/min_length": 53.5, | |
| "epoch": 4.38, | |
| "grad_norm": 2.7408437728881836, | |
| "kl": 0.615234375, | |
| "learning_rate": 6.055073099509549e-07, | |
| "loss": -0.007178765721619129, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41480791568756104, | |
| "reward_std": 0.028133179992437363, | |
| "rewards/MCQ_Reward/mean": 0.41480791568756104, | |
| "rewards/MCQ_Reward/std": 0.1095062680542469, | |
| "step": 219, | |
| "train_speed(iter/s)": 0.108796 | |
| }, | |
| { | |
| "clip_ratio": 0.007214481011033058, | |
| "epoch": 4.4, | |
| "grad_norm": 2.457122802734375, | |
| "kl": 0.6171875, | |
| "learning_rate": 6.024033340325954e-07, | |
| "loss": -0.008253653533756733, | |
| "memory(GiB)": 18.17, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.109227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 209.5, | |
| "completions/mean_length": 118.609375, | |
| "completions/min_length": 59.0, | |
| "epoch": 4.42, | |
| "grad_norm": 2.8679587841033936, | |
| "kl": 0.568359375, | |
| "learning_rate": 5.992952333228726e-07, | |
| "loss": 0.013627042062580585, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4350634217262268, | |
| "reward_std": 0.0218770457431674, | |
| "rewards/MCQ_Reward/mean": 0.4350634217262268, | |
| "rewards/MCQ_Reward/std": 0.07635831832885742, | |
| "step": 221, | |
| "train_speed(iter/s)": 0.108811 | |
| }, | |
| { | |
| "clip_ratio": 0.005678659770637751, | |
| "epoch": 4.44, | |
| "grad_norm": 2.187412738800049, | |
| "kl": 0.58203125, | |
| "learning_rate": 5.961831330156305e-07, | |
| "loss": 0.013213744387030602, | |
| "memory(GiB)": 18.17, | |
| "step": 222, | |
| "train_speed(iter/s)": 0.109221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 226.5, | |
| "completions/mean_length": 125.9765625, | |
| "completions/min_length": 48.5, | |
| "epoch": 4.46, | |
| "grad_norm": 3.5221126079559326, | |
| "kl": 0.587890625, | |
| "learning_rate": 5.93067158465815e-07, | |
| "loss": -0.0011408873833715916, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44135691225528717, | |
| "reward_std": 0.025366419926285744, | |
| "rewards/MCQ_Reward/mean": 0.44135691225528717, | |
| "rewards/MCQ_Reward/std": 0.07711124420166016, | |
| "step": 223, | |
| "train_speed(iter/s)": 0.109176 | |
| }, | |
| { | |
| "clip_ratio": 0.007937990361824632, | |
| "epoch": 4.48, | |
| "grad_norm": 2.513356924057007, | |
| "kl": 0.5703125, | |
| "learning_rate": 5.899474351844269e-07, | |
| "loss": -0.0011316398158669472, | |
| "memory(GiB)": 18.17, | |
| "step": 224, | |
| "train_speed(iter/s)": 0.109601 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 228.0, | |
| "completions/mean_length": 120.234375, | |
| "completions/min_length": 54.0, | |
| "epoch": 4.5, | |
| "grad_norm": 2.853579044342041, | |
| "kl": 0.744140625, | |
| "learning_rate": 5.868240888334652e-07, | |
| "loss": -0.0010898616164922714, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41750770807266235, | |
| "reward_std": 0.024566995911300182, | |
| "rewards/MCQ_Reward/mean": 0.41750770807266235, | |
| "rewards/MCQ_Reward/std": 0.09383138827979565, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.109546 | |
| }, | |
| { | |
| "clip_ratio": 0.012675716076046228, | |
| "epoch": 4.52, | |
| "grad_norm": 5.211337089538574, | |
| "kl": 0.658203125, | |
| "learning_rate": 5.836972452208654e-07, | |
| "loss": -0.001642034389078617, | |
| "memory(GiB)": 18.17, | |
| "step": 226, | |
| "train_speed(iter/s)": 0.109972 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 183.5, | |
| "completions/mean_length": 126.68359375, | |
| "completions/min_length": 64.0, | |
| "epoch": 4.54, | |
| "grad_norm": 2.3116183280944824, | |
| "kl": 0.505859375, | |
| "learning_rate": 5.805670302954321e-07, | |
| "loss": 0.017429981380701065, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41671665012836456, | |
| "reward_std": 0.02627546712756157, | |
| "rewards/MCQ_Reward/mean": 0.41671665012836456, | |
| "rewards/MCQ_Reward/std": 0.09354511648416519, | |
| "step": 227, | |
| "train_speed(iter/s)": 0.109937 | |
| }, | |
| { | |
| "clip_ratio": 0.005898691713809967, | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 2.306483507156372, | |
| "kl": 0.5087890625, | |
| "learning_rate": 5.774335701417662e-07, | |
| "loss": 0.016744598746299744, | |
| "memory(GiB)": 18.17, | |
| "step": 228, | |
| "train_speed(iter/s)": 0.110353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 213.5, | |
| "completions/mean_length": 124.5546875, | |
| "completions/min_length": 61.0, | |
| "epoch": 4.58, | |
| "grad_norm": 2.3084402084350586, | |
| "kl": 0.552734375, | |
| "learning_rate": 5.742969909751858e-07, | |
| "loss": -0.009621858596801758, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.45828977227211, | |
| "reward_std": 0.023471640422940254, | |
| "rewards/MCQ_Reward/mean": 0.45828977227211, | |
| "rewards/MCQ_Reward/std": 0.09269878640770912, | |
| "step": 229, | |
| "train_speed(iter/s)": 0.110326 | |
| }, | |
| { | |
| "clip_ratio": 0.005610911408439279, | |
| "epoch": 4.6, | |
| "grad_norm": 2.163801431655884, | |
| "kl": 0.552734375, | |
| "learning_rate": 5.711574191366427e-07, | |
| "loss": -0.010531945154070854, | |
| "memory(GiB)": 18.17, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.110743 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 187.5, | |
| "completions/mean_length": 116.93359375, | |
| "completions/min_length": 62.5, | |
| "epoch": 4.62, | |
| "grad_norm": 3.1812872886657715, | |
| "kl": 2.26171875, | |
| "learning_rate": 5.680149810876322e-07, | |
| "loss": 0.006941274274140596, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.45568907260894775, | |
| "reward_std": 0.023496804758906364, | |
| "rewards/MCQ_Reward/mean": 0.45568907260894775, | |
| "rewards/MCQ_Reward/std": 0.09556515514850616, | |
| "step": 231, | |
| "train_speed(iter/s)": 0.110377 | |
| }, | |
| { | |
| "clip_ratio": 0.006443677702918649, | |
| "epoch": 4.64, | |
| "grad_norm": 2.733854293823242, | |
| "kl": 2.2734375, | |
| "learning_rate": 5.648698034051008e-07, | |
| "loss": 0.006462510209530592, | |
| "memory(GiB)": 18.17, | |
| "step": 232, | |
| "train_speed(iter/s)": 0.110787 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 233.0, | |
| "completions/mean_length": 133.1015625, | |
| "completions/min_length": 70.5, | |
| "epoch": 4.66, | |
| "grad_norm": 2.4281585216522217, | |
| "kl": 0.55859375, | |
| "learning_rate": 5.617220127763474e-07, | |
| "loss": 0.013438165187835693, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43506887555122375, | |
| "reward_std": 0.025797616690397263, | |
| "rewards/MCQ_Reward/mean": 0.43506887555122375, | |
| "rewards/MCQ_Reward/std": 0.09859243780374527, | |
| "step": 233, | |
| "train_speed(iter/s)": 0.110691 | |
| }, | |
| { | |
| "clip_ratio": 0.0072706313803792, | |
| "epoch": 4.68, | |
| "grad_norm": 2.526357889175415, | |
| "kl": 0.55859375, | |
| "learning_rate": 5.585717359939192e-07, | |
| "loss": 0.012631012126803398, | |
| "memory(GiB)": 18.17, | |
| "step": 234, | |
| "train_speed(iter/s)": 0.111101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 214.5, | |
| "completions/mean_length": 133.1328125, | |
| "completions/min_length": 57.0, | |
| "epoch": 4.7, | |
| "grad_norm": 2.639338731765747, | |
| "kl": 0.552734375, | |
| "learning_rate": 5.554190999505055e-07, | |
| "loss": -0.008054563775658607, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40963128209114075, | |
| "reward_std": 0.024876238778233528, | |
| "rewards/MCQ_Reward/mean": 0.40963128209114075, | |
| "rewards/MCQ_Reward/std": 0.06643268279731274, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.111027 | |
| }, | |
| { | |
| "clip_ratio": 0.008271400351077318, | |
| "epoch": 4.72, | |
| "grad_norm": 2.7264564037323, | |
| "kl": 0.568359375, | |
| "learning_rate": 5.522642316338268e-07, | |
| "loss": -0.008453292772173882, | |
| "memory(GiB)": 18.17, | |
| "step": 236, | |
| "train_speed(iter/s)": 0.111434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 188.0, | |
| "completions/mean_length": 123.84765625, | |
| "completions/min_length": 65.5, | |
| "epoch": 4.74, | |
| "grad_norm": 2.405317544937134, | |
| "kl": 0.5400390625, | |
| "learning_rate": 5.491072581215186e-07, | |
| "loss": 0.00114892004057765, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4337426722049713, | |
| "reward_std": 0.020247386768460274, | |
| "rewards/MCQ_Reward/mean": 0.4337426722049713, | |
| "rewards/MCQ_Reward/std": 0.07973705604672432, | |
| "step": 237, | |
| "train_speed(iter/s)": 0.111369 | |
| }, | |
| { | |
| "clip_ratio": 0.006459691561758518, | |
| "epoch": 4.76, | |
| "grad_norm": 2.8662662506103516, | |
| "kl": 0.5400390625, | |
| "learning_rate": 5.459483065760138e-07, | |
| "loss": 0.0009391154162585735, | |
| "memory(GiB)": 18.17, | |
| "step": 238, | |
| "train_speed(iter/s)": 0.111775 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 244.0, | |
| "completions/mean_length": 133.96875, | |
| "completions/min_length": 75.0, | |
| "epoch": 4.78, | |
| "grad_norm": 2.400651216506958, | |
| "kl": 0.5078125, | |
| "learning_rate": 5.427875042394199e-07, | |
| "loss": 0.002962369006127119, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4192984253168106, | |
| "reward_std": 0.023103597573935986, | |
| "rewards/MCQ_Reward/mean": 0.4192984253168106, | |
| "rewards/MCQ_Reward/std": 0.08515846729278564, | |
| "step": 239, | |
| "train_speed(iter/s)": 0.11166 | |
| }, | |
| { | |
| "clip_ratio": 0.00794414198026061, | |
| "epoch": 4.8, | |
| "grad_norm": 3.1118853092193604, | |
| "kl": 0.5029296875, | |
| "learning_rate": 5.396249784283942e-07, | |
| "loss": 0.0026899795047938824, | |
| "memory(GiB)": 18.17, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.112066 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.5, | |
| "completions/mean_length": 114.4609375, | |
| "completions/min_length": 47.5, | |
| "epoch": 4.82, | |
| "grad_norm": 2.5313034057617188, | |
| "kl": 0.5390625, | |
| "learning_rate": 5.364608565290154e-07, | |
| "loss": -0.0074430471286177635, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4074428677558899, | |
| "reward_std": 0.02112921793013811, | |
| "rewards/MCQ_Reward/mean": 0.4074428677558899, | |
| "rewards/MCQ_Reward/std": 0.07994595915079117, | |
| "step": 241, | |
| "train_speed(iter/s)": 0.111745 | |
| }, | |
| { | |
| "clip_ratio": 0.007256179815158248, | |
| "epoch": 4.84, | |
| "grad_norm": 2.768711566925049, | |
| "kl": 0.5625, | |
| "learning_rate": 5.33295265991652e-07, | |
| "loss": -0.0077315750531852245, | |
| "memory(GiB)": 18.17, | |
| "step": 242, | |
| "train_speed(iter/s)": 0.112147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 167.0, | |
| "completions/mean_length": 115.47265625, | |
| "completions/min_length": 67.5, | |
| "epoch": 4.86, | |
| "grad_norm": 2.561013698577881, | |
| "kl": 0.57421875, | |
| "learning_rate": 5.301283343258292e-07, | |
| "loss": -0.0039140088483691216, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42967718839645386, | |
| "reward_std": 0.020259867422282696, | |
| "rewards/MCQ_Reward/mean": 0.42967718839645386, | |
| "rewards/MCQ_Reward/std": 0.09365658834576607, | |
| "step": 243, | |
| "train_speed(iter/s)": 0.112166 | |
| }, | |
| { | |
| "clip_ratio": 0.008353757206350565, | |
| "epoch": 4.88, | |
| "grad_norm": 3.9286372661590576, | |
| "kl": 0.560546875, | |
| "learning_rate": 5.26960189095093e-07, | |
| "loss": -0.003905682824552059, | |
| "memory(GiB)": 18.17, | |
| "step": 244, | |
| "train_speed(iter/s)": 0.112566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 206.0, | |
| "completions/mean_length": 130.84375, | |
| "completions/min_length": 77.0, | |
| "epoch": 4.9, | |
| "grad_norm": 2.3792028427124023, | |
| "kl": 0.515625, | |
| "learning_rate": 5.237909579118712e-07, | |
| "loss": 0.0075805773958563805, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.37578998506069183, | |
| "reward_std": 0.022264255210757256, | |
| "rewards/MCQ_Reward/mean": 0.37578998506069183, | |
| "rewards/MCQ_Reward/std": 0.09643128886818886, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.112504 | |
| }, | |
| { | |
| "clip_ratio": 0.006022685440257192, | |
| "epoch": 4.92, | |
| "grad_norm": 2.490131378173828, | |
| "kl": 0.501953125, | |
| "learning_rate": 5.206207684323335e-07, | |
| "loss": 0.007525968365371227, | |
| "memory(GiB)": 18.17, | |
| "step": 246, | |
| "train_speed(iter/s)": 0.112901 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 177.5, | |
| "completions/mean_length": 112.49609375, | |
| "completions/min_length": 62.5, | |
| "epoch": 4.9399999999999995, | |
| "grad_norm": 2.270827293395996, | |
| "kl": 0.580078125, | |
| "learning_rate": 5.174497483512505e-07, | |
| "loss": 0.011211629025638103, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39156346023082733, | |
| "reward_std": 0.02191222459077835, | |
| "rewards/MCQ_Reward/mean": 0.39156346023082733, | |
| "rewards/MCQ_Reward/std": 0.12107554450631142, | |
| "step": 247, | |
| "train_speed(iter/s)": 0.112883 | |
| }, | |
| { | |
| "clip_ratio": 0.006176856812089682, | |
| "epoch": 4.96, | |
| "grad_norm": 2.373053550720215, | |
| "kl": 0.57421875, | |
| "learning_rate": 5.142780253968481e-07, | |
| "loss": 0.010641951113939285, | |
| "memory(GiB)": 18.17, | |
| "step": 248, | |
| "train_speed(iter/s)": 0.11328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 204.0, | |
| "completions/mean_length": 131.14453125, | |
| "completions/min_length": 62.5, | |
| "epoch": 4.98, | |
| "grad_norm": 2.2482690811157227, | |
| "kl": 0.525390625, | |
| "learning_rate": 5.111057273256647e-07, | |
| "loss": 0.0050743343308568, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40770605206489563, | |
| "reward_std": 0.022150222212076187, | |
| "rewards/MCQ_Reward/mean": 0.40770605206489563, | |
| "rewards/MCQ_Reward/std": 0.11748149991035461, | |
| "step": 249, | |
| "train_speed(iter/s)": 0.113183 | |
| }, | |
| { | |
| "clip_ratio": 0.006638662423938513, | |
| "epoch": 5.0, | |
| "grad_norm": 2.2492520809173584, | |
| "kl": 0.5390625, | |
| "learning_rate": 5.07932981917404e-07, | |
| "loss": 0.004837746266275644, | |
| "memory(GiB)": 18.17, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.113563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 214.0, | |
| "completions/mean_length": 125.765625, | |
| "completions/min_length": 68.5, | |
| "epoch": 5.02, | |
| "grad_norm": 2.556406259536743, | |
| "kl": 0.5078125, | |
| "learning_rate": 5.047599169697883e-07, | |
| "loss": 0.017076797783374786, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4466231018304825, | |
| "reward_std": 0.0222383551299572, | |
| "rewards/MCQ_Reward/mean": 0.4466231018304825, | |
| "rewards/MCQ_Reward/std": 0.11308542639017105, | |
| "step": 251, | |
| "train_speed(iter/s)": 0.113109 | |
| }, | |
| { | |
| "clip_ratio": 0.007436602842062712, | |
| "epoch": 5.04, | |
| "grad_norm": 2.0482616424560547, | |
| "kl": 0.515625, | |
| "learning_rate": 5.015866602934111e-07, | |
| "loss": 0.01610303670167923, | |
| "memory(GiB)": 18.17, | |
| "step": 252, | |
| "train_speed(iter/s)": 0.113475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 168.0, | |
| "completions/mean_length": 109.3359375, | |
| "completions/min_length": 65.0, | |
| "epoch": 5.06, | |
| "grad_norm": 2.6583385467529297, | |
| "kl": 0.6015625, | |
| "learning_rate": 4.984133397065888e-07, | |
| "loss": 0.005715301260352135, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3956441879272461, | |
| "reward_std": 0.02386545669287443, | |
| "rewards/MCQ_Reward/mean": 0.3956441879272461, | |
| "rewards/MCQ_Reward/std": 0.0772719755768776, | |
| "step": 253, | |
| "train_speed(iter/s)": 0.113471 | |
| }, | |
| { | |
| "clip_ratio": 0.006691478192806244, | |
| "epoch": 5.08, | |
| "grad_norm": 2.478234052658081, | |
| "kl": 0.5859375, | |
| "learning_rate": 4.952400830302116e-07, | |
| "loss": 0.00553365983068943, | |
| "memory(GiB)": 18.17, | |
| "step": 254, | |
| "train_speed(iter/s)": 0.113858 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 235.0, | |
| "completions/mean_length": 144.1796875, | |
| "completions/min_length": 78.0, | |
| "epoch": 5.1, | |
| "grad_norm": 2.308807373046875, | |
| "kl": 0.5009765625, | |
| "learning_rate": 4.92067018082596e-07, | |
| "loss": -0.0058871605433523655, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4203776866197586, | |
| "reward_std": 0.022159602493047714, | |
| "rewards/MCQ_Reward/mean": 0.4203776866197586, | |
| "rewards/MCQ_Reward/std": 0.09526496008038521, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.113761 | |
| }, | |
| { | |
| "clip_ratio": 0.007533560739830136, | |
| "epoch": 5.12, | |
| "grad_norm": 2.9820773601531982, | |
| "kl": 0.4921875, | |
| "learning_rate": 4.888942726743353e-07, | |
| "loss": -0.006009383127093315, | |
| "memory(GiB)": 18.17, | |
| "step": 256, | |
| "train_speed(iter/s)": 0.114127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 189.5, | |
| "completions/mean_length": 115.0078125, | |
| "completions/min_length": 65.0, | |
| "epoch": 5.14, | |
| "grad_norm": 2.3862602710723877, | |
| "kl": 0.57421875, | |
| "learning_rate": 4.857219746031519e-07, | |
| "loss": -0.010767871513962746, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43338486552238464, | |
| "reward_std": 0.025110138580203056, | |
| "rewards/MCQ_Reward/mean": 0.43338486552238464, | |
| "rewards/MCQ_Reward/std": 0.08122389577329159, | |
| "step": 257, | |
| "train_speed(iter/s)": 0.114083 | |
| }, | |
| { | |
| "clip_ratio": 0.005816203076392412, | |
| "epoch": 5.16, | |
| "grad_norm": 2.2391088008880615, | |
| "kl": 0.57421875, | |
| "learning_rate": 4.825502516487496e-07, | |
| "loss": -0.011337889358401299, | |
| "memory(GiB)": 18.17, | |
| "step": 258, | |
| "train_speed(iter/s)": 0.11446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 241.0, | |
| "completions/mean_length": 121.109375, | |
| "completions/min_length": 68.0, | |
| "epoch": 5.18, | |
| "grad_norm": 3.2198102474212646, | |
| "kl": 0.642578125, | |
| "learning_rate": 4.793792315676664e-07, | |
| "loss": -0.0017241109162569046, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41922956705093384, | |
| "reward_std": 0.02394416555762291, | |
| "rewards/MCQ_Reward/mean": 0.41922956705093384, | |
| "rewards/MCQ_Reward/std": 0.08786309324204922, | |
| "step": 259, | |
| "train_speed(iter/s)": 0.11433 | |
| }, | |
| { | |
| "clip_ratio": 0.008633819408714771, | |
| "epoch": 5.2, | |
| "grad_norm": 2.5045688152313232, | |
| "kl": 0.611328125, | |
| "learning_rate": 4.762090420881288e-07, | |
| "loss": -0.0024092746898531914, | |
| "memory(GiB)": 18.17, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.11471 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 203.5, | |
| "completions/mean_length": 121.0, | |
| "completions/min_length": 59.5, | |
| "epoch": 5.22, | |
| "grad_norm": 3.3788204193115234, | |
| "kl": 0.65625, | |
| "learning_rate": 4.7303981090490706e-07, | |
| "loss": 0.0016009537503123283, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4228467643260956, | |
| "reward_std": 0.02382771298289299, | |
| "rewards/MCQ_Reward/mean": 0.4228467643260956, | |
| "rewards/MCQ_Reward/std": 0.08922314271330833, | |
| "step": 261, | |
| "train_speed(iter/s)": 0.114325 | |
| }, | |
| { | |
| "clip_ratio": 0.009796116035431623, | |
| "epoch": 5.24, | |
| "grad_norm": 3.2910051345825195, | |
| "kl": 0.603515625, | |
| "learning_rate": 4.698716656741708e-07, | |
| "loss": 0.0013471171259880066, | |
| "memory(GiB)": 18.17, | |
| "step": 262, | |
| "train_speed(iter/s)": 0.114703 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 178.0, | |
| "completions/mean_length": 117.85546875, | |
| "completions/min_length": 58.5, | |
| "epoch": 5.26, | |
| "grad_norm": 3.0833852291107178, | |
| "kl": 0.607421875, | |
| "learning_rate": 4.66704734008348e-07, | |
| "loss": 0.01880352757871151, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4038514196872711, | |
| "reward_std": 0.024144282564520836, | |
| "rewards/MCQ_Reward/mean": 0.4038514196872711, | |
| "rewards/MCQ_Reward/std": 0.11032669246196747, | |
| "step": 263, | |
| "train_speed(iter/s)": 0.114712 | |
| }, | |
| { | |
| "clip_ratio": 0.0071860982570797205, | |
| "epoch": 5.28, | |
| "grad_norm": 2.223651885986328, | |
| "kl": 0.62109375, | |
| "learning_rate": 4.6353914347098467e-07, | |
| "loss": 0.018028832972049713, | |
| "memory(GiB)": 18.17, | |
| "step": 264, | |
| "train_speed(iter/s)": 0.115068 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 178.0, | |
| "completions/mean_length": 126.16796875, | |
| "completions/min_length": 63.0, | |
| "epoch": 5.3, | |
| "grad_norm": 2.7954585552215576, | |
| "kl": 0.521484375, | |
| "learning_rate": 4.6037502157160567e-07, | |
| "loss": 0.008576348423957825, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4126065671443939, | |
| "reward_std": 0.02162686362862587, | |
| "rewards/MCQ_Reward/mean": 0.4126065671443939, | |
| "rewards/MCQ_Reward/std": 0.08540061488747597, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.115013 | |
| }, | |
| { | |
| "clip_ratio": 0.00956161879003048, | |
| "epoch": 5.32, | |
| "grad_norm": 4.209680557250977, | |
| "kl": 0.544921875, | |
| "learning_rate": 4.5721249576058027e-07, | |
| "loss": 0.009101202711462975, | |
| "memory(GiB)": 18.17, | |
| "step": 266, | |
| "train_speed(iter/s)": 0.115384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 181.5, | |
| "completions/mean_length": 113.3515625, | |
| "completions/min_length": 71.5, | |
| "epoch": 5.34, | |
| "grad_norm": 2.6387808322906494, | |
| "kl": 0.595703125, | |
| "learning_rate": 4.540516934239863e-07, | |
| "loss": 0.008354030549526215, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4057372510433197, | |
| "reward_std": 0.025215539149940014, | |
| "rewards/MCQ_Reward/mean": 0.4057372510433197, | |
| "rewards/MCQ_Reward/std": 0.10797113552689552, | |
| "step": 267, | |
| "train_speed(iter/s)": 0.115352 | |
| }, | |
| { | |
| "clip_ratio": 0.004749758169054985, | |
| "epoch": 5.36, | |
| "grad_norm": 2.726827383041382, | |
| "kl": 0.59765625, | |
| "learning_rate": 4.508927418784814e-07, | |
| "loss": 0.008263107389211655, | |
| "memory(GiB)": 18.17, | |
| "step": 268, | |
| "train_speed(iter/s)": 0.115721 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 253.0, | |
| "completions/mean_length": 128.75, | |
| "completions/min_length": 65.0, | |
| "epoch": 5.38, | |
| "grad_norm": 2.4489338397979736, | |
| "kl": 0.5859375, | |
| "learning_rate": 4.477357683661733e-07, | |
| "loss": 0.0003694836050271988, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39796915650367737, | |
| "reward_std": 0.0229190643876791, | |
| "rewards/MCQ_Reward/mean": 0.39796915650367737, | |
| "rewards/MCQ_Reward/std": 0.06984946131706238, | |
| "step": 269, | |
| "train_speed(iter/s)": 0.115538 | |
| }, | |
| { | |
| "clip_ratio": 0.0044297389686107635, | |
| "epoch": 5.4, | |
| "grad_norm": 2.187133312225342, | |
| "kl": 0.587890625, | |
| "learning_rate": 4.445809000494945e-07, | |
| "loss": 6.162561476230621e-06, | |
| "memory(GiB)": 18.17, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.115873 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 207.5, | |
| "completions/mean_length": 120.48046875, | |
| "completions/min_length": 76.5, | |
| "epoch": 5.42, | |
| "grad_norm": 2.354365348815918, | |
| "kl": 0.595703125, | |
| "learning_rate": 4.4142826400608085e-07, | |
| "loss": -0.011774084530770779, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4731539338827133, | |
| "reward_std": 0.025172382593154907, | |
| "rewards/MCQ_Reward/mean": 0.4731539338827133, | |
| "rewards/MCQ_Reward/std": 0.09358260780572891, | |
| "step": 271, | |
| "train_speed(iter/s)": 0.115479 | |
| }, | |
| { | |
| "clip_ratio": 0.007754836697131395, | |
| "epoch": 5.44, | |
| "grad_norm": 2.9754416942596436, | |
| "kl": 0.568359375, | |
| "learning_rate": 4.382779872236526e-07, | |
| "loss": -0.01219811663031578, | |
| "memory(GiB)": 18.17, | |
| "step": 272, | |
| "train_speed(iter/s)": 0.115843 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 193.0, | |
| "completions/mean_length": 127.671875, | |
| "completions/min_length": 82.0, | |
| "epoch": 5.46, | |
| "grad_norm": 2.66938853263855, | |
| "kl": 0.587890625, | |
| "learning_rate": 4.3513019659489906e-07, | |
| "loss": -0.01641671359539032, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3951749950647354, | |
| "reward_std": 0.026222089305520058, | |
| "rewards/MCQ_Reward/mean": 0.3951749950647354, | |
| "rewards/MCQ_Reward/std": 0.07432432845234871, | |
| "step": 273, | |
| "train_speed(iter/s)": 0.11581 | |
| }, | |
| { | |
| "clip_ratio": 0.006316621555015445, | |
| "epoch": 5.48, | |
| "grad_norm": 2.3686916828155518, | |
| "kl": 0.595703125, | |
| "learning_rate": 4.31985018912368e-07, | |
| "loss": -0.01686863601207733, | |
| "memory(GiB)": 18.17, | |
| "step": 274, | |
| "train_speed(iter/s)": 0.116173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.0, | |
| "completions/mean_length": 127.2578125, | |
| "completions/min_length": 64.5, | |
| "epoch": 5.5, | |
| "grad_norm": 2.3570117950439453, | |
| "kl": 0.5390625, | |
| "learning_rate": 4.2884258086335745e-07, | |
| "loss": 0.0007358621805906296, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44543667137622833, | |
| "reward_std": 0.024644173681735992, | |
| "rewards/MCQ_Reward/mean": 0.44543667137622833, | |
| "rewards/MCQ_Reward/std": 0.09130855649709702, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.116062 | |
| }, | |
| { | |
| "clip_ratio": 0.009702229872345924, | |
| "epoch": 5.52, | |
| "grad_norm": 4.230794906616211, | |
| "kl": 0.517578125, | |
| "learning_rate": 4.257030090248142e-07, | |
| "loss": 0.0004968619905412197, | |
| "memory(GiB)": 18.17, | |
| "step": 276, | |
| "train_speed(iter/s)": 0.116424 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 188.5, | |
| "completions/mean_length": 124.16796875, | |
| "completions/min_length": 66.5, | |
| "epoch": 5.54, | |
| "grad_norm": 2.1478097438812256, | |
| "kl": 0.607421875, | |
| "learning_rate": 4.2256642985823387e-07, | |
| "loss": 0.012350899167358875, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4112658351659775, | |
| "reward_std": 0.023498238995671272, | |
| "rewards/MCQ_Reward/mean": 0.4112658351659775, | |
| "rewards/MCQ_Reward/std": 0.08520639687776566, | |
| "step": 277, | |
| "train_speed(iter/s)": 0.116375 | |
| }, | |
| { | |
| "clip_ratio": 0.004101653583347797, | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 2.062098503112793, | |
| "kl": 0.62109375, | |
| "learning_rate": 4.19432969704568e-07, | |
| "loss": 0.012091840617358685, | |
| "memory(GiB)": 18.17, | |
| "step": 278, | |
| "train_speed(iter/s)": 0.116723 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 252.5, | |
| "completions/mean_length": 122.69921875, | |
| "completions/min_length": 59.0, | |
| "epoch": 5.58, | |
| "grad_norm": 2.9315075874328613, | |
| "kl": 0.5390625, | |
| "learning_rate": 4.1630275477913465e-07, | |
| "loss": -0.013242216780781746, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39477604627609253, | |
| "reward_std": 0.02283278852701187, | |
| "rewards/MCQ_Reward/mean": 0.39477604627609253, | |
| "rewards/MCQ_Reward/std": 0.09505810588598251, | |
| "step": 279, | |
| "train_speed(iter/s)": 0.116608 | |
| }, | |
| { | |
| "clip_ratio": 0.006070411531254649, | |
| "epoch": 5.6, | |
| "grad_norm": 2.2812304496765137, | |
| "kl": 0.53515625, | |
| "learning_rate": 4.131759111665348e-07, | |
| "loss": -0.013854868710041046, | |
| "memory(GiB)": 18.17, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.116971 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 206.5, | |
| "completions/mean_length": 129.95703125, | |
| "completions/min_length": 60.5, | |
| "epoch": 5.62, | |
| "grad_norm": 2.015717029571533, | |
| "kl": 0.513671875, | |
| "learning_rate": 4.1005256481557306e-07, | |
| "loss": 0.0003234475152567029, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.40168674290180206, | |
| "reward_std": 0.020120804198086262, | |
| "rewards/MCQ_Reward/mean": 0.40168674290180206, | |
| "rewards/MCQ_Reward/std": 0.09599081426858902, | |
| "step": 281, | |
| "train_speed(iter/s)": 0.116542 | |
| }, | |
| { | |
| "clip_ratio": 0.0076590063981711864, | |
| "epoch": 5.64, | |
| "grad_norm": 2.828334331512451, | |
| "kl": 0.5009765625, | |
| "learning_rate": 4.0693284153418497e-07, | |
| "loss": 0.00015916512347757816, | |
| "memory(GiB)": 18.17, | |
| "step": 282, | |
| "train_speed(iter/s)": 0.116903 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 201.5, | |
| "completions/mean_length": 121.16015625, | |
| "completions/min_length": 71.5, | |
| "epoch": 5.66, | |
| "grad_norm": 2.985908269882202, | |
| "kl": 0.58203125, | |
| "learning_rate": 4.038168669843697e-07, | |
| "loss": -0.0021479236893355846, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4441321939229965, | |
| "reward_std": 0.021154197864234447, | |
| "rewards/MCQ_Reward/mean": 0.4441321939229965, | |
| "rewards/MCQ_Reward/std": 0.10662735998630524, | |
| "step": 283, | |
| "train_speed(iter/s)": 0.116806 | |
| }, | |
| { | |
| "clip_ratio": 0.00845325831323862, | |
| "epoch": 5.68, | |
| "grad_norm": 2.2008328437805176, | |
| "kl": 0.5703125, | |
| "learning_rate": 4.0070476667712736e-07, | |
| "loss": -0.0024233213625848293, | |
| "memory(GiB)": 18.17, | |
| "step": 284, | |
| "train_speed(iter/s)": 0.117157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 192.5, | |
| "completions/mean_length": 131.40625, | |
| "completions/min_length": 65.5, | |
| "epoch": 5.7, | |
| "grad_norm": 2.1404271125793457, | |
| "kl": 0.609375, | |
| "learning_rate": 3.9759666596740473e-07, | |
| "loss": 0.009725593030452728, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4451696425676346, | |
| "reward_std": 0.02477285359054804, | |
| "rewards/MCQ_Reward/mean": 0.4451696425676346, | |
| "rewards/MCQ_Reward/std": 0.07242370769381523, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.117116 | |
| }, | |
| { | |
| "clip_ratio": 0.004681814229115844, | |
| "epoch": 5.72, | |
| "grad_norm": 2.289313316345215, | |
| "kl": 0.61328125, | |
| "learning_rate": 3.9449269004904516e-07, | |
| "loss": 0.009346994571387768, | |
| "memory(GiB)": 18.17, | |
| "step": 286, | |
| "train_speed(iter/s)": 0.117466 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 160.0, | |
| "completions/mean_length": 104.30078125, | |
| "completions/min_length": 51.0, | |
| "epoch": 5.74, | |
| "grad_norm": 2.770270347595215, | |
| "kl": 1.189453125, | |
| "learning_rate": 3.913929639497462e-07, | |
| "loss": 0.009477443993091583, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43081943690776825, | |
| "reward_std": 0.025431891903281212, | |
| "rewards/MCQ_Reward/mean": 0.43081943690776825, | |
| "rewards/MCQ_Reward/std": 0.10991119593381882, | |
| "step": 287, | |
| "train_speed(iter/s)": 0.117471 | |
| }, | |
| { | |
| "clip_ratio": 0.006838085595518351, | |
| "epoch": 5.76, | |
| "grad_norm": 2.8960061073303223, | |
| "kl": 1.087890625, | |
| "learning_rate": 3.882976125260229e-07, | |
| "loss": 0.008670520968735218, | |
| "memory(GiB)": 18.17, | |
| "step": 288, | |
| "train_speed(iter/s)": 0.117827 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 172.5, | |
| "completions/mean_length": 114.71484375, | |
| "completions/min_length": 56.5, | |
| "epoch": 5.78, | |
| "grad_norm": 2.4359030723571777, | |
| "kl": 0.552734375, | |
| "learning_rate": 3.852067604581794e-07, | |
| "loss": 0.006409616209566593, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41095563769340515, | |
| "reward_std": 0.02436618786305189, | |
| "rewards/MCQ_Reward/mean": 0.41095563769340515, | |
| "rewards/MCQ_Reward/std": 0.09878598526120186, | |
| "step": 289, | |
| "train_speed(iter/s)": 0.117814 | |
| }, | |
| { | |
| "clip_ratio": 0.007955410983413458, | |
| "epoch": 5.8, | |
| "grad_norm": 3.950528383255005, | |
| "kl": 0.5390625, | |
| "learning_rate": 3.821205322452863e-07, | |
| "loss": 0.0066283950582146645, | |
| "memory(GiB)": 18.17, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.118161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 225.5, | |
| "completions/mean_length": 134.59375, | |
| "completions/min_length": 63.0, | |
| "epoch": 5.82, | |
| "grad_norm": 2.4326717853546143, | |
| "kl": 0.5263671875, | |
| "learning_rate": 3.790390522001662e-07, | |
| "loss": 0.002648044377565384, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4533398002386093, | |
| "reward_std": 0.023892495781183243, | |
| "rewards/MCQ_Reward/mean": 0.4533398002386093, | |
| "rewards/MCQ_Reward/std": 0.08347899466753006, | |
| "step": 291, | |
| "train_speed(iter/s)": 0.117724 | |
| }, | |
| { | |
| "clip_ratio": 0.004736665170639753, | |
| "epoch": 5.84, | |
| "grad_norm": 2.2011497020721436, | |
| "kl": 0.541015625, | |
| "learning_rate": 3.7596244444438574e-07, | |
| "loss": 0.002431286498904228, | |
| "memory(GiB)": 18.17, | |
| "step": 292, | |
| "train_speed(iter/s)": 0.118068 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 191.5, | |
| "completions/mean_length": 117.24609375, | |
| "completions/min_length": 63.5, | |
| "epoch": 5.86, | |
| "grad_norm": 2.58125376701355, | |
| "kl": 0.541015625, | |
| "learning_rate": 3.728908329032566e-07, | |
| "loss": -0.003335139248520136, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4097088426351547, | |
| "reward_std": 0.022918211296200752, | |
| "rewards/MCQ_Reward/mean": 0.4097088426351547, | |
| "rewards/MCQ_Reward/std": 0.1199105829000473, | |
| "step": 293, | |
| "train_speed(iter/s)": 0.118029 | |
| }, | |
| { | |
| "clip_ratio": 0.007036251947283745, | |
| "epoch": 5.88, | |
| "grad_norm": 2.4533321857452393, | |
| "kl": 0.5625, | |
| "learning_rate": 3.6982434130084396e-07, | |
| "loss": -0.0037924423813819885, | |
| "memory(GiB)": 18.17, | |
| "step": 294, | |
| "train_speed(iter/s)": 0.118366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 180.5, | |
| "completions/mean_length": 127.00390625, | |
| "completions/min_length": 75.0, | |
| "epoch": 5.9, | |
| "grad_norm": 2.2269814014434814, | |
| "kl": 0.5, | |
| "learning_rate": 3.6676309315498255e-07, | |
| "loss": 0.012001181952655315, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42691150307655334, | |
| "reward_std": 0.021617514081299305, | |
| "rewards/MCQ_Reward/mean": 0.42691150307655334, | |
| "rewards/MCQ_Reward/std": 0.11347687244415283, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.11833 | |
| }, | |
| { | |
| "clip_ratio": 0.004536686465144157, | |
| "epoch": 5.92, | |
| "grad_norm": 2.593670129776001, | |
| "kl": 0.513671875, | |
| "learning_rate": 3.6370721177230115e-07, | |
| "loss": 0.011945893988013268, | |
| "memory(GiB)": 18.17, | |
| "step": 296, | |
| "train_speed(iter/s)": 0.118674 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 231.5, | |
| "completions/mean_length": 123.5234375, | |
| "completions/min_length": 71.5, | |
| "epoch": 5.9399999999999995, | |
| "grad_norm": 2.1928629875183105, | |
| "kl": 0.4970703125, | |
| "learning_rate": 3.6065682024325617e-07, | |
| "loss": 0.015498391352593899, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41268619894981384, | |
| "reward_std": 0.02419480960816145, | |
| "rewards/MCQ_Reward/mean": 0.41268619894981384, | |
| "rewards/MCQ_Reward/std": 0.09195958822965622, | |
| "step": 297, | |
| "train_speed(iter/s)": 0.118532 | |
| }, | |
| { | |
| "clip_ratio": 0.0050865779630839825, | |
| "epoch": 5.96, | |
| "grad_norm": 2.1392431259155273, | |
| "kl": 0.494140625, | |
| "learning_rate": 3.5761204143717385e-07, | |
| "loss": 0.014891544356942177, | |
| "memory(GiB)": 18.17, | |
| "step": 298, | |
| "train_speed(iter/s)": 0.118872 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 226.5, | |
| "completions/mean_length": 124.328125, | |
| "completions/min_length": 64.5, | |
| "epoch": 5.98, | |
| "grad_norm": 2.7249698638916016, | |
| "kl": 0.880859375, | |
| "learning_rate": 3.5457299799730045e-07, | |
| "loss": -0.010070513002574444, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4588439464569092, | |
| "reward_std": 0.029408703558146954, | |
| "rewards/MCQ_Reward/mean": 0.4588439464569092, | |
| "rewards/MCQ_Reward/std": 0.09774744883179665, | |
| "step": 299, | |
| "train_speed(iter/s)": 0.118723 | |
| }, | |
| { | |
| "clip_ratio": 0.01025686739012599, | |
| "epoch": 6.0, | |
| "grad_norm": 3.8231394290924072, | |
| "kl": 0.7529296875, | |
| "learning_rate": 3.5153981233586274e-07, | |
| "loss": -0.009807607159018517, | |
| "memory(GiB)": 18.17, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.119048 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 185.5, | |
| "completions/mean_length": 109.19140625, | |
| "completions/min_length": 56.0, | |
| "epoch": 6.02, | |
| "grad_norm": 2.6895663738250732, | |
| "kl": 0.599609375, | |
| "learning_rate": 3.485126066291364e-07, | |
| "loss": -0.010052207857370377, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4080576002597809, | |
| "reward_std": 0.02562197484076023, | |
| "rewards/MCQ_Reward/mean": 0.4080576002597809, | |
| "rewards/MCQ_Reward/std": 0.09971121698617935, | |
| "step": 301, | |
| "train_speed(iter/s)": 0.118697 | |
| }, | |
| { | |
| "clip_ratio": 0.005149862729012966, | |
| "epoch": 6.04, | |
| "grad_norm": 2.655897855758667, | |
| "kl": 0.607421875, | |
| "learning_rate": 3.454915028125263e-07, | |
| "loss": -0.010359197854995728, | |
| "memory(GiB)": 18.17, | |
| "step": 302, | |
| "train_speed(iter/s)": 0.11903 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 179.5, | |
| "completions/mean_length": 117.90234375, | |
| "completions/min_length": 56.5, | |
| "epoch": 6.06, | |
| "grad_norm": 2.423926591873169, | |
| "kl": 0.546875, | |
| "learning_rate": 3.4247662257565366e-07, | |
| "loss": 0.018125958740711212, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4407869875431061, | |
| "reward_std": 0.025757532566785812, | |
| "rewards/MCQ_Reward/mean": 0.4407869875431061, | |
| "rewards/MCQ_Reward/std": 0.12692639231681824, | |
| "step": 303, | |
| "train_speed(iter/s)": 0.118923 | |
| }, | |
| { | |
| "clip_ratio": 0.00550723378546536, | |
| "epoch": 6.08, | |
| "grad_norm": 2.2029030323028564, | |
| "kl": 0.5546875, | |
| "learning_rate": 3.394680873574546e-07, | |
| "loss": 0.017929650843143463, | |
| "memory(GiB)": 18.17, | |
| "step": 304, | |
| "train_speed(iter/s)": 0.119254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 195.0, | |
| "completions/mean_length": 124.44140625, | |
| "completions/min_length": 54.5, | |
| "epoch": 6.1, | |
| "grad_norm": 2.3613805770874023, | |
| "kl": 0.5703125, | |
| "learning_rate": 3.3646601834128916e-07, | |
| "loss": -0.007877168245613575, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.49866482615470886, | |
| "reward_std": 0.024780258536338806, | |
| "rewards/MCQ_Reward/mean": 0.49866482615470886, | |
| "rewards/MCQ_Reward/std": 0.07562171667814255, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.11921 | |
| }, | |
| { | |
| "clip_ratio": 0.004300985252484679, | |
| "epoch": 6.12, | |
| "grad_norm": 2.1242995262145996, | |
| "kl": 0.576171875, | |
| "learning_rate": 3.3347053645005965e-07, | |
| "loss": -0.008408917114138603, | |
| "memory(GiB)": 18.17, | |
| "step": 306, | |
| "train_speed(iter/s)": 0.119519 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 159.5, | |
| "completions/mean_length": 105.72265625, | |
| "completions/min_length": 64.5, | |
| "epoch": 6.14, | |
| "grad_norm": 2.5641608238220215, | |
| "kl": 0.560546875, | |
| "learning_rate": 3.3048176234133963e-07, | |
| "loss": 0.0034052138216793537, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.3926085978746414, | |
| "reward_std": 0.01911616325378418, | |
| "rewards/MCQ_Reward/mean": 0.3926085978746414, | |
| "rewards/MCQ_Reward/std": 0.06766298227012157, | |
| "step": 307, | |
| "train_speed(iter/s)": 0.119522 | |
| }, | |
| { | |
| "clip_ratio": 0.007244990672916174, | |
| "epoch": 6.16, | |
| "grad_norm": 2.7589051723480225, | |
| "kl": 0.572265625, | |
| "learning_rate": 3.274998164025148e-07, | |
| "loss": 0.0031583395320922136, | |
| "memory(GiB)": 18.17, | |
| "step": 308, | |
| "train_speed(iter/s)": 0.119856 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 236.0, | |
| "completions/mean_length": 119.86328125, | |
| "completions/min_length": 57.0, | |
| "epoch": 6.18, | |
| "grad_norm": 2.9221317768096924, | |
| "kl": 0.611328125, | |
| "learning_rate": 3.245248187459323e-07, | |
| "loss": -0.019380319863557816, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.386982798576355, | |
| "reward_std": 0.026672961190342903, | |
| "rewards/MCQ_Reward/mean": 0.386982798576355, | |
| "rewards/MCQ_Reward/std": 0.10517054051160812, | |
| "step": 309, | |
| "train_speed(iter/s)": 0.119747 | |
| }, | |
| { | |
| "clip_ratio": 0.005416512954980135, | |
| "epoch": 6.2, | |
| "grad_norm": 2.7965259552001953, | |
| "kl": 0.61328125, | |
| "learning_rate": 3.215568892040641e-07, | |
| "loss": -0.019356630742549896, | |
| "memory(GiB)": 18.17, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.120077 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 200.5, | |
| "completions/mean_length": 118.21484375, | |
| "completions/min_length": 57.0, | |
| "epoch": 6.22, | |
| "grad_norm": 2.8668336868286133, | |
| "kl": 0.607421875, | |
| "learning_rate": 3.1859614732467954e-07, | |
| "loss": -0.013122756965458393, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4595968574285507, | |
| "reward_std": 0.024624092504382133, | |
| "rewards/MCQ_Reward/mean": 0.4595968574285507, | |
| "rewards/MCQ_Reward/std": 0.08434771373867989, | |
| "step": 311, | |
| "train_speed(iter/s)": 0.119696 | |
| }, | |
| { | |
| "clip_ratio": 0.00573662668466568, | |
| "epoch": 6.24, | |
| "grad_norm": 2.4580280780792236, | |
| "kl": 0.609375, | |
| "learning_rate": 3.156427123660297e-07, | |
| "loss": -0.013560149818658829, | |
| "memory(GiB)": 18.17, | |
| "step": 312, | |
| "train_speed(iter/s)": 0.120023 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 202.5, | |
| "completions/mean_length": 121.68359375, | |
| "completions/min_length": 73.5, | |
| "epoch": 6.26, | |
| "grad_norm": 2.6274502277374268, | |
| "kl": 0.58984375, | |
| "learning_rate": 3.1269670329204393e-07, | |
| "loss": 0.0022671520709991455, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44664010405540466, | |
| "reward_std": 0.024377938359975815, | |
| "rewards/MCQ_Reward/mean": 0.44664010405540466, | |
| "rewards/MCQ_Reward/std": 0.08575410395860672, | |
| "step": 313, | |
| "train_speed(iter/s)": 0.119945 | |
| }, | |
| { | |
| "clip_ratio": 0.0052670135628432035, | |
| "epoch": 6.28, | |
| "grad_norm": 2.753713607788086, | |
| "kl": 0.578125, | |
| "learning_rate": 3.097582387675385e-07, | |
| "loss": 0.0018416689708828926, | |
| "memory(GiB)": 18.17, | |
| "step": 314, | |
| "train_speed(iter/s)": 0.120272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 207.5, | |
| "completions/mean_length": 127.57421875, | |
| "completions/min_length": 77.0, | |
| "epoch": 6.3, | |
| "grad_norm": 2.4003334045410156, | |
| "kl": 0.583984375, | |
| "learning_rate": 3.068274371534356e-07, | |
| "loss": 0.0005114064551889896, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44641484320163727, | |
| "reward_std": 0.024146192707121372, | |
| "rewards/MCQ_Reward/mean": 0.44641484320163727, | |
| "rewards/MCQ_Reward/std": 0.08713827468454838, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.120168 | |
| }, | |
| { | |
| "clip_ratio": 0.008136166725307703, | |
| "epoch": 6.32, | |
| "grad_norm": 2.3975117206573486, | |
| "kl": 0.619140625, | |
| "learning_rate": 3.039044165019972e-07, | |
| "loss": 0.0004498562775552273, | |
| "memory(GiB)": 18.17, | |
| "step": 316, | |
| "train_speed(iter/s)": 0.120495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 196.5, | |
| "completions/mean_length": 117.9609375, | |
| "completions/min_length": 58.5, | |
| "epoch": 6.34, | |
| "grad_norm": 2.348710060119629, | |
| "kl": 0.548828125, | |
| "learning_rate": 3.00989294552069e-07, | |
| "loss": 0.00850888341665268, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42280539870262146, | |
| "reward_std": 0.02416596282273531, | |
| "rewards/MCQ_Reward/mean": 0.42280539870262146, | |
| "rewards/MCQ_Reward/std": 0.0933729000389576, | |
| "step": 317, | |
| "train_speed(iter/s)": 0.120401 | |
| }, | |
| { | |
| "clip_ratio": 0.005974379135295749, | |
| "epoch": 6.36, | |
| "grad_norm": 2.630732774734497, | |
| "kl": 0.5390625, | |
| "learning_rate": 2.9808218872433766e-07, | |
| "loss": 0.008482606150209904, | |
| "memory(GiB)": 18.17, | |
| "step": 318, | |
| "train_speed(iter/s)": 0.120723 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 200.0, | |
| "completions/mean_length": 123.66796875, | |
| "completions/min_length": 75.5, | |
| "epoch": 6.38, | |
| "grad_norm": 2.1341052055358887, | |
| "kl": 0.517578125, | |
| "learning_rate": 2.9518321611660234e-07, | |
| "loss": -0.0021673766896128654, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4051154851913452, | |
| "reward_std": 0.020906205289065838, | |
| "rewards/MCQ_Reward/mean": 0.4051154851913452, | |
| "rewards/MCQ_Reward/std": 0.09874700754880905, | |
| "step": 319, | |
| "train_speed(iter/s)": 0.12062 | |
| }, | |
| { | |
| "clip_ratio": 0.00719631533138454, | |
| "epoch": 6.4, | |
| "grad_norm": 3.2350962162017822, | |
| "kl": 0.5390625, | |
| "learning_rate": 2.922924934990568e-07, | |
| "loss": -0.0024176109582185745, | |
| "memory(GiB)": 18.17, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.120919 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 170.0, | |
| "completions/mean_length": 117.234375, | |
| "completions/min_length": 69.0, | |
| "epoch": 6.42, | |
| "grad_norm": 74.83729553222656, | |
| "kl": 20.791015625, | |
| "learning_rate": 2.894101373095867e-07, | |
| "loss": 0.04349440336227417, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44527527689933777, | |
| "reward_std": 0.021908948197960854, | |
| "rewards/MCQ_Reward/mean": 0.44527527689933777, | |
| "rewards/MCQ_Reward/std": 0.08160104416310787, | |
| "step": 321, | |
| "train_speed(iter/s)": 0.120602 | |
| }, | |
| { | |
| "clip_ratio": 0.004950069589540362, | |
| "epoch": 6.44, | |
| "grad_norm": 99.64342498779297, | |
| "kl": 26.54296875, | |
| "learning_rate": 2.8653626364907914e-07, | |
| "loss": 0.04914519935846329, | |
| "memory(GiB)": 18.17, | |
| "step": 322, | |
| "train_speed(iter/s)": 0.120907 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 232.0, | |
| "completions/mean_length": 128.45703125, | |
| "completions/min_length": 52.5, | |
| "epoch": 6.46, | |
| "grad_norm": 2.5322988033294678, | |
| "kl": 0.529296875, | |
| "learning_rate": 2.8367098827674576e-07, | |
| "loss": 0.009952299296855927, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4740261733531952, | |
| "reward_std": 0.023401367478072643, | |
| "rewards/MCQ_Reward/mean": 0.4740261733531952, | |
| "rewards/MCQ_Reward/std": 0.08106581121683121, | |
| "step": 323, | |
| "train_speed(iter/s)": 0.12071 | |
| }, | |
| { | |
| "clip_ratio": 0.005782874301075935, | |
| "epoch": 6.48, | |
| "grad_norm": 2.591923952102661, | |
| "kl": 0.53125, | |
| "learning_rate": 2.808144266054612e-07, | |
| "loss": 0.009899303317070007, | |
| "memory(GiB)": 18.17, | |
| "step": 324, | |
| "train_speed(iter/s)": 0.121029 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 202.5, | |
| "completions/mean_length": 133.33203125, | |
| "completions/min_length": 81.5, | |
| "epoch": 6.5, | |
| "grad_norm": 2.113783121109009, | |
| "kl": 0.537109375, | |
| "learning_rate": 2.779666936971129e-07, | |
| "loss": -0.0006487010978162289, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39647024869918823, | |
| "reward_std": 0.02249709703028202, | |
| "rewards/MCQ_Reward/mean": 0.39647024869918823, | |
| "rewards/MCQ_Reward/std": 0.0880400650203228, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.120986 | |
| }, | |
| { | |
| "clip_ratio": 0.006350549403578043, | |
| "epoch": 6.52, | |
| "grad_norm": 2.4789633750915527, | |
| "kl": 0.525390625, | |
| "learning_rate": 2.751279042579672e-07, | |
| "loss": -0.0002095792442560196, | |
| "memory(GiB)": 18.17, | |
| "step": 326, | |
| "train_speed(iter/s)": 0.121304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 199.0, | |
| "completions/mean_length": 126.234375, | |
| "completions/min_length": 54.0, | |
| "epoch": 6.54, | |
| "grad_norm": 2.4260339736938477, | |
| "kl": 0.548828125, | |
| "learning_rate": 2.7229817263404864e-07, | |
| "loss": -0.0033088945783674717, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4554037004709244, | |
| "reward_std": 0.02187604457139969, | |
| "rewards/MCQ_Reward/mean": 0.4554037004709244, | |
| "rewards/MCQ_Reward/std": 0.09804989397525787, | |
| "step": 327, | |
| "train_speed(iter/s)": 0.121167 | |
| }, | |
| { | |
| "clip_ratio": 0.008008664939552546, | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 4.365505695343018, | |
| "kl": 0.533203125, | |
| "learning_rate": 2.6947761280653447e-07, | |
| "loss": -0.00283604022115469, | |
| "memory(GiB)": 18.17, | |
| "step": 328, | |
| "train_speed(iter/s)": 0.121483 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 180.0, | |
| "completions/mean_length": 117.9609375, | |
| "completions/min_length": 69.5, | |
| "epoch": 6.58, | |
| "grad_norm": 2.2564356327056885, | |
| "kl": 0.5283203125, | |
| "learning_rate": 2.6666633838716314e-07, | |
| "loss": -0.0077381255105137825, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4396722763776779, | |
| "reward_std": 0.022700872272253036, | |
| "rewards/MCQ_Reward/mean": 0.4396722763776779, | |
| "rewards/MCQ_Reward/std": 0.10192850604653358, | |
| "step": 329, | |
| "train_speed(iter/s)": 0.12143 | |
| }, | |
| { | |
| "clip_ratio": 0.0047557426150888205, | |
| "epoch": 6.6, | |
| "grad_norm": 2.172281503677368, | |
| "kl": 0.5322265625, | |
| "learning_rate": 2.638644626136587e-07, | |
| "loss": -0.008173219859600067, | |
| "memory(GiB)": 18.17, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.121737 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 183.0, | |
| "completions/mean_length": 126.0859375, | |
| "completions/min_length": 68.5, | |
| "epoch": 6.62, | |
| "grad_norm": 2.167248010635376, | |
| "kl": 0.4873046875, | |
| "learning_rate": 2.610720983451685e-07, | |
| "loss": 0.018461888656020164, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44843943417072296, | |
| "reward_std": 0.02303914539515972, | |
| "rewards/MCQ_Reward/mean": 0.44843943417072296, | |
| "rewards/MCQ_Reward/std": 0.08497340604662895, | |
| "step": 331, | |
| "train_speed(iter/s)": 0.121397 | |
| }, | |
| { | |
| "clip_ratio": 0.0052658268250525, | |
| "epoch": 6.64, | |
| "grad_norm": 2.136260509490967, | |
| "kl": 0.4921875, | |
| "learning_rate": 2.58289358057718e-07, | |
| "loss": 0.01842992939054966, | |
| "memory(GiB)": 18.17, | |
| "step": 332, | |
| "train_speed(iter/s)": 0.121707 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 225.5, | |
| "completions/mean_length": 127.5546875, | |
| "completions/min_length": 65.5, | |
| "epoch": 6.66, | |
| "grad_norm": 2.595977306365967, | |
| "kl": 0.578125, | |
| "learning_rate": 2.555163538396806e-07, | |
| "loss": -0.011687211692333221, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4103027582168579, | |
| "reward_std": 0.02552829496562481, | |
| "rewards/MCQ_Reward/mean": 0.4103027582168579, | |
| "rewards/MCQ_Reward/std": 0.0971563570201397, | |
| "step": 333, | |
| "train_speed(iter/s)": 0.1216 | |
| }, | |
| { | |
| "clip_ratio": 0.0067884225863963366, | |
| "epoch": 6.68, | |
| "grad_norm": 3.2224881649017334, | |
| "kl": 0.59765625, | |
| "learning_rate": 2.5275319738726165e-07, | |
| "loss": -0.011430272832512856, | |
| "memory(GiB)": 18.17, | |
| "step": 334, | |
| "train_speed(iter/s)": 0.121912 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 220.0, | |
| "completions/mean_length": 123.2578125, | |
| "completions/min_length": 75.0, | |
| "epoch": 6.7, | |
| "grad_norm": 2.387573480606079, | |
| "kl": 0.56640625, | |
| "learning_rate": 2.500000000000001e-07, | |
| "loss": -0.006422008387744427, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4134673774242401, | |
| "reward_std": 0.022745592519640923, | |
| "rewards/MCQ_Reward/mean": 0.4134673774242401, | |
| "rewards/MCQ_Reward/std": 0.10698199272155762, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.121789 | |
| }, | |
| { | |
| "clip_ratio": 0.007158383261412382, | |
| "epoch": 6.72, | |
| "grad_norm": 2.7240705490112305, | |
| "kl": 0.564453125, | |
| "learning_rate": 2.472568725762853e-07, | |
| "loss": -0.0065142130479216576, | |
| "memory(GiB)": 18.17, | |
| "step": 336, | |
| "train_speed(iter/s)": 0.122088 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 153.5, | |
| "completions/mean_length": 108.890625, | |
| "completions/min_length": 63.5, | |
| "epoch": 6.74, | |
| "grad_norm": 2.2466800212860107, | |
| "kl": 0.7421875, | |
| "learning_rate": 2.4452392560888976e-07, | |
| "loss": -0.00018489733338356018, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42812955379486084, | |
| "reward_std": 0.0208740271627903, | |
| "rewards/MCQ_Reward/mean": 0.42812955379486084, | |
| "rewards/MCQ_Reward/std": 0.08048268780112267, | |
| "step": 337, | |
| "train_speed(iter/s)": 0.12208 | |
| }, | |
| { | |
| "clip_ratio": 0.005281613674014807, | |
| "epoch": 6.76, | |
| "grad_norm": 2.0434200763702393, | |
| "kl": 0.771484375, | |
| "learning_rate": 2.418012691805191e-07, | |
| "loss": -0.0005159445572644472, | |
| "memory(GiB)": 18.17, | |
| "step": 338, | |
| "train_speed(iter/s)": 0.122388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 202.0, | |
| "completions/mean_length": 117.3984375, | |
| "completions/min_length": 65.0, | |
| "epoch": 6.78, | |
| "grad_norm": 2.669919729232788, | |
| "kl": 0.572265625, | |
| "learning_rate": 2.390890129593771e-07, | |
| "loss": -0.009503326378762722, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41273191571235657, | |
| "reward_std": 0.023225258104503155, | |
| "rewards/MCQ_Reward/mean": 0.41273191571235657, | |
| "rewards/MCQ_Reward/std": 0.08152876608073711, | |
| "step": 339, | |
| "train_speed(iter/s)": 0.122302 | |
| }, | |
| { | |
| "clip_ratio": 0.005108103854581714, | |
| "epoch": 6.8, | |
| "grad_norm": 2.5069973468780518, | |
| "kl": 0.576171875, | |
| "learning_rate": 2.3638726619474875e-07, | |
| "loss": -0.009927002713084221, | |
| "memory(GiB)": 18.17, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.122605 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 205.5, | |
| "completions/mean_length": 121.0703125, | |
| "completions/min_length": 66.0, | |
| "epoch": 6.82, | |
| "grad_norm": 2.5319740772247314, | |
| "kl": 0.59765625, | |
| "learning_rate": 2.3369613771260005e-07, | |
| "loss": 0.004871162120252848, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39162860810756683, | |
| "reward_std": 0.022268068976700306, | |
| "rewards/MCQ_Reward/mean": 0.39162860810756683, | |
| "rewards/MCQ_Reward/std": 0.07392172142863274, | |
| "step": 341, | |
| "train_speed(iter/s)": 0.12225 | |
| }, | |
| { | |
| "clip_ratio": 0.004840584937483072, | |
| "epoch": 6.84, | |
| "grad_norm": 2.547236204147339, | |
| "kl": 0.60546875, | |
| "learning_rate": 2.310157359111938e-07, | |
| "loss": 0.004931057803332806, | |
| "memory(GiB)": 18.17, | |
| "step": 342, | |
| "train_speed(iter/s)": 0.122534 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 201.5, | |
| "completions/mean_length": 124.6328125, | |
| "completions/min_length": 67.5, | |
| "epoch": 6.86, | |
| "grad_norm": 2.610426664352417, | |
| "kl": 0.5419921875, | |
| "learning_rate": 2.283461687567236e-07, | |
| "loss": 0.012133005075156689, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38104377686977386, | |
| "reward_std": 0.023476887494325638, | |
| "rewards/MCQ_Reward/mean": 0.38104377686977386, | |
| "rewards/MCQ_Reward/std": 0.13691367208957672, | |
| "step": 343, | |
| "train_speed(iter/s)": 0.122472 | |
| }, | |
| { | |
| "clip_ratio": 0.005503881955519319, | |
| "epoch": 6.88, | |
| "grad_norm": 2.517308473587036, | |
| "kl": 0.5458984375, | |
| "learning_rate": 2.2568754377896515e-07, | |
| "loss": 0.012206798419356346, | |
| "memory(GiB)": 18.17, | |
| "step": 344, | |
| "train_speed(iter/s)": 0.122771 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 191.0, | |
| "completions/mean_length": 122.8125, | |
| "completions/min_length": 54.0, | |
| "epoch": 6.9, | |
| "grad_norm": 2.268815517425537, | |
| "kl": 0.576171875, | |
| "learning_rate": 2.2303996806694486e-07, | |
| "loss": 0.005438795313239098, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41502565145492554, | |
| "reward_std": 0.021418385207653046, | |
| "rewards/MCQ_Reward/mean": 0.41502565145492554, | |
| "rewards/MCQ_Reward/std": 0.09508999437093735, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.122753 | |
| }, | |
| { | |
| "clip_ratio": 0.005775286350399256, | |
| "epoch": 6.92, | |
| "grad_norm": 2.83811616897583, | |
| "kl": 0.603515625, | |
| "learning_rate": 2.2040354826462664e-07, | |
| "loss": 0.005799311213195324, | |
| "memory(GiB)": 18.17, | |
| "step": 346, | |
| "train_speed(iter/s)": 0.123049 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 164.5, | |
| "completions/mean_length": 116.91796875, | |
| "completions/min_length": 65.5, | |
| "epoch": 6.9399999999999995, | |
| "grad_norm": 2.334526777267456, | |
| "kl": 0.564453125, | |
| "learning_rate": 2.177783905666155e-07, | |
| "loss": 0.0054929498583078384, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.39654283225536346, | |
| "reward_std": 0.022173049859702587, | |
| "rewards/MCQ_Reward/mean": 0.39654283225536346, | |
| "rewards/MCQ_Reward/std": 0.09505746513605118, | |
| "step": 347, | |
| "train_speed(iter/s)": 0.123026 | |
| }, | |
| { | |
| "clip_ratio": 0.0045166281051933765, | |
| "epoch": 6.96, | |
| "grad_norm": 2.271827220916748, | |
| "kl": 0.564453125, | |
| "learning_rate": 2.151646007138806e-07, | |
| "loss": 0.0055296882055699825, | |
| "memory(GiB)": 18.17, | |
| "step": 348, | |
| "train_speed(iter/s)": 0.123296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 219.0, | |
| "completions/mean_length": 130.65625, | |
| "completions/min_length": 77.5, | |
| "epoch": 6.98, | |
| "grad_norm": 2.0946249961853027, | |
| "kl": 0.55859375, | |
| "learning_rate": 2.125622839894964e-07, | |
| "loss": 0.003636482171714306, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43836964666843414, | |
| "reward_std": 0.021374424919486046, | |
| "rewards/MCQ_Reward/mean": 0.43836964666843414, | |
| "rewards/MCQ_Reward/std": 0.06100250408053398, | |
| "step": 349, | |
| "train_speed(iter/s)": 0.123225 | |
| }, | |
| { | |
| "clip_ratio": 0.0046428050845861435, | |
| "epoch": 7.0, | |
| "grad_norm": 2.23724365234375, | |
| "kl": 0.57421875, | |
| "learning_rate": 2.0997154521440097e-07, | |
| "loss": 0.004051330033689737, | |
| "memory(GiB)": 18.17, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.123516 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 188.0, | |
| "completions/mean_length": 121.21484375, | |
| "completions/min_length": 72.0, | |
| "epoch": 7.02, | |
| "grad_norm": 2.815627336502075, | |
| "kl": 0.5703125, | |
| "learning_rate": 2.0739248874317438e-07, | |
| "loss": -0.019233888015151024, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4290418028831482, | |
| "reward_std": 0.022210314869880676, | |
| "rewards/MCQ_Reward/mean": 0.4290418028831482, | |
| "rewards/MCQ_Reward/std": 0.06661852076649666, | |
| "step": 351, | |
| "train_speed(iter/s)": 0.123139 | |
| }, | |
| { | |
| "clip_ratio": 0.00514651439152658, | |
| "epoch": 7.04, | |
| "grad_norm": 3.0636136531829834, | |
| "kl": 0.576171875, | |
| "learning_rate": 2.048252184598352e-07, | |
| "loss": -0.01901531219482422, | |
| "memory(GiB)": 18.17, | |
| "step": 352, | |
| "train_speed(iter/s)": 0.12342 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 169.0, | |
| "completions/mean_length": 112.85546875, | |
| "completions/min_length": 62.5, | |
| "epoch": 7.06, | |
| "grad_norm": 2.700939178466797, | |
| "kl": 0.58203125, | |
| "learning_rate": 2.0226983777365603e-07, | |
| "loss": -0.007234710268676281, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43640220165252686, | |
| "reward_std": 0.022726435214281082, | |
| "rewards/MCQ_Reward/mean": 0.43640220165252686, | |
| "rewards/MCQ_Reward/std": 0.08832718059420586, | |
| "step": 353, | |
| "train_speed(iter/s)": 0.123424 | |
| }, | |
| { | |
| "clip_ratio": 0.00972440093755722, | |
| "epoch": 7.08, | |
| "grad_norm": 3.0179059505462646, | |
| "kl": 0.564453125, | |
| "learning_rate": 1.9972644961499853e-07, | |
| "loss": -0.007274748291820288, | |
| "memory(GiB)": 18.17, | |
| "step": 354, | |
| "train_speed(iter/s)": 0.123722 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 212.5, | |
| "completions/mean_length": 115.6328125, | |
| "completions/min_length": 68.0, | |
| "epoch": 7.1, | |
| "grad_norm": 2.484236240386963, | |
| "kl": 0.619140625, | |
| "learning_rate": 1.9719515643116674e-07, | |
| "loss": 0.015900151804089546, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.45114465057849884, | |
| "reward_std": 0.024738659150898457, | |
| "rewards/MCQ_Reward/mean": 0.45114465057849884, | |
| "rewards/MCQ_Reward/std": 0.10900644585490227, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.123607 | |
| }, | |
| { | |
| "clip_ratio": 0.0064309455920010805, | |
| "epoch": 7.12, | |
| "grad_norm": 3.852499485015869, | |
| "kl": 0.607421875, | |
| "learning_rate": 1.9467606018228088e-07, | |
| "loss": 0.01630295254290104, | |
| "memory(GiB)": 18.17, | |
| "step": 356, | |
| "train_speed(iter/s)": 0.123891 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 195.0, | |
| "completions/mean_length": 128.88671875, | |
| "completions/min_length": 74.5, | |
| "epoch": 7.14, | |
| "grad_norm": 2.455781936645508, | |
| "kl": 0.5478515625, | |
| "learning_rate": 1.9216926233717084e-07, | |
| "loss": -0.00730013195425272, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4758221060037613, | |
| "reward_std": 0.024665928445756435, | |
| "rewards/MCQ_Reward/mean": 0.4758221060037613, | |
| "rewards/MCQ_Reward/std": 0.0809130035340786, | |
| "step": 357, | |
| "train_speed(iter/s)": 0.123852 | |
| }, | |
| { | |
| "clip_ratio": 0.00344535568729043, | |
| "epoch": 7.16, | |
| "grad_norm": 2.2257754802703857, | |
| "kl": 0.5576171875, | |
| "learning_rate": 1.8967486386928817e-07, | |
| "loss": -0.0074045369401574135, | |
| "memory(GiB)": 18.17, | |
| "step": 358, | |
| "train_speed(iter/s)": 0.124151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 217.0, | |
| "completions/mean_length": 130.06640625, | |
| "completions/min_length": 67.5, | |
| "epoch": 7.18, | |
| "grad_norm": 2.7154037952423096, | |
| "kl": 0.51171875, | |
| "learning_rate": 1.8719296525263923e-07, | |
| "loss": 0.019313501194119453, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4561205357313156, | |
| "reward_std": 0.023944508284330368, | |
| "rewards/MCQ_Reward/mean": 0.4561205357313156, | |
| "rewards/MCQ_Reward/std": 0.10000644996762276, | |
| "step": 359, | |
| "train_speed(iter/s)": 0.124074 | |
| }, | |
| { | |
| "clip_ratio": 0.006082270760089159, | |
| "epoch": 7.2, | |
| "grad_norm": 2.114431381225586, | |
| "kl": 0.5234375, | |
| "learning_rate": 1.847236664577389e-07, | |
| "loss": 0.01907144859433174, | |
| "memory(GiB)": 18.17, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.124368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 223.5, | |
| "completions/mean_length": 130.765625, | |
| "completions/min_length": 79.0, | |
| "epoch": 7.22, | |
| "grad_norm": 2.2248895168304443, | |
| "kl": 0.5390625, | |
| "learning_rate": 1.8226706694758193e-07, | |
| "loss": 0.012620393186807632, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.44832468032836914, | |
| "reward_std": 0.025768463499844074, | |
| "rewards/MCQ_Reward/mean": 0.44832468032836914, | |
| "rewards/MCQ_Reward/std": 0.09799568355083466, | |
| "step": 361, | |
| "train_speed(iter/s)": 0.123928 | |
| }, | |
| { | |
| "clip_ratio": 0.006066091358661652, | |
| "epoch": 7.24, | |
| "grad_norm": 2.5757896900177, | |
| "kl": 0.53515625, | |
| "learning_rate": 1.7982326567363886e-07, | |
| "loss": 0.013028541579842567, | |
| "memory(GiB)": 18.17, | |
| "step": 362, | |
| "train_speed(iter/s)": 0.124219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 194.5, | |
| "completions/mean_length": 122.546875, | |
| "completions/min_length": 50.5, | |
| "epoch": 7.26, | |
| "grad_norm": 2.2651302814483643, | |
| "kl": 0.5322265625, | |
| "learning_rate": 1.7739236107186857e-07, | |
| "loss": 0.009481780230998993, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4318048655986786, | |
| "reward_std": 0.022731643170118332, | |
| "rewards/MCQ_Reward/mean": 0.4318048655986786, | |
| "rewards/MCQ_Reward/std": 0.09833444282412529, | |
| "step": 363, | |
| "train_speed(iter/s)": 0.124163 | |
| }, | |
| { | |
| "clip_ratio": 0.0038783656200394034, | |
| "epoch": 7.28, | |
| "grad_norm": 2.2316813468933105, | |
| "kl": 0.5302734375, | |
| "learning_rate": 1.7497445105875374e-07, | |
| "loss": 0.009487325325608253, | |
| "memory(GiB)": 18.17, | |
| "step": 364, | |
| "train_speed(iter/s)": 0.124456 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 236.5, | |
| "completions/mean_length": 131.63671875, | |
| "completions/min_length": 61.5, | |
| "epoch": 7.3, | |
| "grad_norm": 2.720024347305298, | |
| "kl": 0.5517578125, | |
| "learning_rate": 1.725696330273575e-07, | |
| "loss": 0.0073198857717216015, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4407372921705246, | |
| "reward_std": 0.019983571954071522, | |
| "rewards/MCQ_Reward/mean": 0.4407372921705246, | |
| "rewards/MCQ_Reward/std": 0.07775032892823219, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.124298 | |
| }, | |
| { | |
| "clip_ratio": 0.005759742809459567, | |
| "epoch": 7.32, | |
| "grad_norm": 2.4700775146484375, | |
| "kl": 0.5556640625, | |
| "learning_rate": 1.7017800384339924e-07, | |
| "loss": 0.00751863420009613, | |
| "memory(GiB)": 18.17, | |
| "step": 366, | |
| "train_speed(iter/s)": 0.124588 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 240.5, | |
| "completions/mean_length": 122.73828125, | |
| "completions/min_length": 64.5, | |
| "epoch": 7.34, | |
| "grad_norm": 2.3976547718048096, | |
| "kl": 0.541015625, | |
| "learning_rate": 1.6779965984135374e-07, | |
| "loss": 0.015993405133485794, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41162461042404175, | |
| "reward_std": 0.020391933619976044, | |
| "rewards/MCQ_Reward/mean": 0.41162461042404175, | |
| "rewards/MCQ_Reward/std": 0.0841926857829094, | |
| "step": 367, | |
| "train_speed(iter/s)": 0.124346 | |
| }, | |
| { | |
| "clip_ratio": 0.005305928410962224, | |
| "epoch": 7.36, | |
| "grad_norm": 2.444512128829956, | |
| "kl": 0.546875, | |
| "learning_rate": 1.6543469682057104e-07, | |
| "loss": 0.016359636560082436, | |
| "memory(GiB)": 18.17, | |
| "step": 368, | |
| "train_speed(iter/s)": 0.124615 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 156.0, | |
| "completions/mean_length": 113.90234375, | |
| "completions/min_length": 68.5, | |
| "epoch": 7.38, | |
| "grad_norm": 3.490565299987793, | |
| "kl": 0.57421875, | |
| "learning_rate": 1.6308321004141607e-07, | |
| "loss": -0.0010942098451778293, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.38713136315345764, | |
| "reward_std": 0.021422830410301685, | |
| "rewards/MCQ_Reward/mean": 0.38713136315345764, | |
| "rewards/MCQ_Reward/std": 0.10617586970329285, | |
| "step": 369, | |
| "train_speed(iter/s)": 0.124639 | |
| }, | |
| { | |
| "clip_ratio": 0.005288022803142667, | |
| "epoch": 7.4, | |
| "grad_norm": 2.881525754928589, | |
| "kl": 0.564453125, | |
| "learning_rate": 1.6074529422143396e-07, | |
| "loss": -0.0009173217695206404, | |
| "memory(GiB)": 18.17, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.124914 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.5, | |
| "completions/mean_length": 139.4375, | |
| "completions/min_length": 87.0, | |
| "epoch": 7.42, | |
| "grad_norm": 2.1569535732269287, | |
| "kl": 0.49609375, | |
| "learning_rate": 1.5842104353153285e-07, | |
| "loss": 0.014979809522628784, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4273018389940262, | |
| "reward_std": 0.02148488350212574, | |
| "rewards/MCQ_Reward/mean": 0.4273018389940262, | |
| "rewards/MCQ_Reward/std": 0.13347461819648743, | |
| "step": 371, | |
| "train_speed(iter/s)": 0.124503 | |
| }, | |
| { | |
| "clip_ratio": 0.006136654410511255, | |
| "epoch": 7.44, | |
| "grad_norm": 2.3948974609375, | |
| "kl": 0.486328125, | |
| "learning_rate": 1.561105515921915e-07, | |
| "loss": 0.015109008178114891, | |
| "memory(GiB)": 18.17, | |
| "step": 372, | |
| "train_speed(iter/s)": 0.124788 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.5, | |
| "completions/mean_length": 117.00390625, | |
| "completions/min_length": 69.5, | |
| "epoch": 7.46, | |
| "grad_norm": 2.3135647773742676, | |
| "kl": 0.669921875, | |
| "learning_rate": 1.5381391146968863e-07, | |
| "loss": 0.006555130705237389, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4488084018230438, | |
| "reward_std": 0.02006101794540882, | |
| "rewards/MCQ_Reward/mean": 0.4488084018230438, | |
| "rewards/MCQ_Reward/std": 0.07920502312481403, | |
| "step": 373, | |
| "train_speed(iter/s)": 0.124722 | |
| }, | |
| { | |
| "clip_ratio": 0.007013680646196008, | |
| "epoch": 7.48, | |
| "grad_norm": 2.962529420852661, | |
| "kl": 0.642578125, | |
| "learning_rate": 1.5153121567235333e-07, | |
| "loss": 0.006604420021176338, | |
| "memory(GiB)": 18.17, | |
| "step": 374, | |
| "train_speed(iter/s)": 0.125001 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 169.0, | |
| "completions/mean_length": 107.60546875, | |
| "completions/min_length": 53.5, | |
| "epoch": 7.5, | |
| "grad_norm": 2.731383800506592, | |
| "kl": 0.576171875, | |
| "learning_rate": 1.492625561468393e-07, | |
| "loss": -0.005473949480801821, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.41762372851371765, | |
| "reward_std": 0.019964593462646008, | |
| "rewards/MCQ_Reward/mean": 0.41762372851371765, | |
| "rewards/MCQ_Reward/std": 0.08107879385352135, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.124937 | |
| }, | |
| { | |
| "clip_ratio": 0.004663396626710892, | |
| "epoch": 7.52, | |
| "grad_norm": 2.615187406539917, | |
| "kl": 0.576171875, | |
| "learning_rate": 1.4700802427442178e-07, | |
| "loss": -0.005246948450803757, | |
| "memory(GiB)": 18.17, | |
| "step": 376, | |
| "train_speed(iter/s)": 0.125201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.5, | |
| "completions/mean_length": 107.15625, | |
| "completions/min_length": 50.5, | |
| "epoch": 7.54, | |
| "grad_norm": 2.796724557876587, | |
| "kl": 0.640625, | |
| "learning_rate": 1.4476771086731565e-07, | |
| "loss": 0.01410718634724617, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.4095290005207062, | |
| "reward_std": 0.02420712448656559, | |
| "rewards/MCQ_Reward/mean": 0.4095290005207062, | |
| "rewards/MCQ_Reward/std": 0.07465272396802902, | |
| "step": 377, | |
| "train_speed(iter/s)": 0.125163 | |
| }, | |
| { | |
| "clip_ratio": 0.006976983975619078, | |
| "epoch": 7.5600000000000005, | |
| "grad_norm": 2.945889711380005, | |
| "kl": 0.66015625, | |
| "learning_rate": 1.4254170616501827e-07, | |
| "loss": 0.014726857654750347, | |
| "memory(GiB)": 18.17, | |
| "step": 378, | |
| "train_speed(iter/s)": 0.125433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 174.0, | |
| "completions/mean_length": 118.50390625, | |
| "completions/min_length": 63.5, | |
| "epoch": 7.58, | |
| "grad_norm": 2.9761271476745605, | |
| "kl": 0.607421875, | |
| "learning_rate": 1.4033009983067452e-07, | |
| "loss": -0.004153972025960684, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.42119112610816956, | |
| "reward_std": 0.02067422866821289, | |
| "rewards/MCQ_Reward/mean": 0.42119112610816956, | |
| "rewards/MCQ_Reward/std": 0.0681285560131073, | |
| "step": 379, | |
| "train_speed(iter/s)": 0.125369 | |
| }, | |
| { | |
| "clip_ratio": 0.0061764034908264875, | |
| "epoch": 7.6, | |
| "grad_norm": 3.6120944023132324, | |
| "kl": 0.6171875, | |
| "learning_rate": 1.381329809474649e-07, | |
| "loss": -0.0035073161125183105, | |
| "memory(GiB)": 18.17, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.125649 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 194.0, | |
| "completions/mean_length": 130.67578125, | |
| "completions/min_length": 79.0, | |
| "epoch": 7.62, | |
| "grad_norm": 2.3507981300354004, | |
| "kl": 0.5419921875, | |
| "learning_rate": 1.3595043801501794e-07, | |
| "loss": -0.0032176347449421883, | |
| "memory(GiB)": 18.17, | |
| "reward": 0.43415170907974243, | |
| "reward_std": 0.021646766923367977, | |
| "rewards/MCQ_Reward/mean": 0.43415170907974243, | |
| "rewards/MCQ_Reward/std": 0.11485166102647781, | |
| "step": 381, | |
| "train_speed(iter/s)": 0.125308 | |
| }, | |
| { | |
| "clip_ratio": 0.006046550814062357, | |
| "epoch": 7.64, | |
| "grad_norm": 2.5917809009552, | |
| "kl": 0.541015625, | |
| "learning_rate": 1.3378255894584462e-07, | |
| "loss": -0.0032573172356933355, | |
| "memory(GiB)": 18.17, | |
| "step": 382, | |
| "train_speed(iter/s)": 0.125575 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 591.0, | |
| "completions/mean_length": 111.87109375, | |
| "completions/min_length": 62.5, | |
| "epoch": 7.66, | |
| "grad_norm": 3.2898316383361816, | |
| "kl": 0.84375, | |
| "learning_rate": 1.3162943106179748e-07, | |
| "loss": 0.05431316792964935, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4442131072282791, | |
| "reward_std": 0.02893070410937071, | |
| "rewards/MCQ_Reward/mean": 0.4442131072282791, | |
| "rewards/MCQ_Reward/std": 0.0882490873336792, | |
| "step": 383, | |
| "train_speed(iter/s)": 0.124772 | |
| }, | |
| { | |
| "clip_ratio": 0.005024469457566738, | |
| "epoch": 7.68, | |
| "grad_norm": 3.0035033226013184, | |
| "kl": 0.82421875, | |
| "learning_rate": 1.2949114109055414e-07, | |
| "loss": 0.054804857820272446, | |
| "memory(GiB)": 25.14, | |
| "step": 384, | |
| "train_speed(iter/s)": 0.125047 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 236.5, | |
| "completions/mean_length": 125.80078125, | |
| "completions/min_length": 67.0, | |
| "epoch": 7.7, | |
| "grad_norm": 2.8262860774993896, | |
| "kl": 0.55078125, | |
| "learning_rate": 1.2736777516212267e-07, | |
| "loss": -0.006510823965072632, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.40428027510643005, | |
| "reward_std": 0.025332522578537464, | |
| "rewards/MCQ_Reward/mean": 0.40428027510643005, | |
| "rewards/MCQ_Reward/std": 0.10921913757920265, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.124957 | |
| }, | |
| { | |
| "clip_ratio": 0.005720158107578754, | |
| "epoch": 7.72, | |
| "grad_norm": 2.3165252208709717, | |
| "kl": 0.54296875, | |
| "learning_rate": 1.2525941880537304e-07, | |
| "loss": -0.006398671306669712, | |
| "memory(GiB)": 25.14, | |
| "step": 386, | |
| "train_speed(iter/s)": 0.125223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 171.5, | |
| "completions/mean_length": 115.546875, | |
| "completions/min_length": 68.5, | |
| "epoch": 7.74, | |
| "grad_norm": 2.5941028594970703, | |
| "kl": 0.650390625, | |
| "learning_rate": 1.2316615694459186e-07, | |
| "loss": 0.013789664953947067, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4454474151134491, | |
| "reward_std": 0.02376528736203909, | |
| "rewards/MCQ_Reward/mean": 0.4454474151134491, | |
| "rewards/MCQ_Reward/std": 0.07124818488955498, | |
| "step": 387, | |
| "train_speed(iter/s)": 0.125174 | |
| }, | |
| { | |
| "clip_ratio": 0.00573781062848866, | |
| "epoch": 7.76, | |
| "grad_norm": 2.886561393737793, | |
| "kl": 0.634765625, | |
| "learning_rate": 1.2108807389606158e-07, | |
| "loss": 0.014278584159910679, | |
| "memory(GiB)": 25.14, | |
| "step": 388, | |
| "train_speed(iter/s)": 0.125449 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 183.0, | |
| "completions/mean_length": 121.00390625, | |
| "completions/min_length": 57.5, | |
| "epoch": 7.78, | |
| "grad_norm": 2.2996103763580322, | |
| "kl": 0.6171875, | |
| "learning_rate": 1.1902525336466462e-07, | |
| "loss": 0.012145346030592918, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42450854182243347, | |
| "reward_std": 0.021244493313133717, | |
| "rewards/MCQ_Reward/mean": 0.42450854182243347, | |
| "rewards/MCQ_Reward/std": 0.09635130688548088, | |
| "step": 389, | |
| "train_speed(iter/s)": 0.125399 | |
| }, | |
| { | |
| "clip_ratio": 0.005426776595413685, | |
| "epoch": 7.8, | |
| "grad_norm": 2.1788930892944336, | |
| "kl": 0.62890625, | |
| "learning_rate": 1.1697777844051104e-07, | |
| "loss": 0.011829939670860767, | |
| "memory(GiB)": 25.14, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.125672 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 216.5, | |
| "completions/mean_length": 129.03125, | |
| "completions/min_length": 70.0, | |
| "epoch": 7.82, | |
| "grad_norm": 2.2412619590759277, | |
| "kl": 0.53515625, | |
| "learning_rate": 1.1494573159559212e-07, | |
| "loss": 9.762030094861984e-05, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4155340790748596, | |
| "reward_std": 0.020521354861557484, | |
| "rewards/MCQ_Reward/mean": 0.4155340790748596, | |
| "rewards/MCQ_Reward/std": 0.12795967236161232, | |
| "step": 391, | |
| "train_speed(iter/s)": 0.125325 | |
| }, | |
| { | |
| "clip_ratio": 0.005442213034257293, | |
| "epoch": 7.84, | |
| "grad_norm": 2.445225954055786, | |
| "kl": 0.54296875, | |
| "learning_rate": 1.1292919468045875e-07, | |
| "loss": 0.0006964541971683502, | |
| "memory(GiB)": 25.14, | |
| "step": 392, | |
| "train_speed(iter/s)": 0.125594 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 180.5, | |
| "completions/mean_length": 129.45703125, | |
| "completions/min_length": 68.5, | |
| "epoch": 7.86, | |
| "grad_norm": 2.254128932952881, | |
| "kl": 0.607421875, | |
| "learning_rate": 1.1092824892092373e-07, | |
| "loss": -0.010345934890210629, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.40340456366539, | |
| "reward_std": 0.022636689245700836, | |
| "rewards/MCQ_Reward/mean": 0.40340456366539, | |
| "rewards/MCQ_Reward/std": 0.09724823385477066, | |
| "step": 393, | |
| "train_speed(iter/s)": 0.125579 | |
| }, | |
| { | |
| "clip_ratio": 0.004930965369567275, | |
| "epoch": 7.88, | |
| "grad_norm": 2.3455586433410645, | |
| "kl": 0.623046875, | |
| "learning_rate": 1.0894297491479043e-07, | |
| "loss": -0.009814320132136345, | |
| "memory(GiB)": 25.14, | |
| "step": 394, | |
| "train_speed(iter/s)": 0.125852 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 211.5, | |
| "completions/mean_length": 122.03125, | |
| "completions/min_length": 72.5, | |
| "epoch": 7.9, | |
| "grad_norm": 2.7601866722106934, | |
| "kl": 0.54296875, | |
| "learning_rate": 1.0697345262860635e-07, | |
| "loss": 0.011853070929646492, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.44544240832328796, | |
| "reward_std": 0.02559925615787506, | |
| "rewards/MCQ_Reward/mean": 0.44544240832328796, | |
| "rewards/MCQ_Reward/std": 0.09495911747217178, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.125762 | |
| }, | |
| { | |
| "clip_ratio": 0.004873325582593679, | |
| "epoch": 7.92, | |
| "grad_norm": 3.1385254859924316, | |
| "kl": 0.541015625, | |
| "learning_rate": 1.0501976139444191e-07, | |
| "loss": 0.01212891936302185, | |
| "memory(GiB)": 25.14, | |
| "step": 396, | |
| "train_speed(iter/s)": 0.126021 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 186.0, | |
| "completions/mean_length": 131.75, | |
| "completions/min_length": 80.0, | |
| "epoch": 7.9399999999999995, | |
| "grad_norm": 2.280336380004883, | |
| "kl": 0.59765625, | |
| "learning_rate": 1.0308197990669537e-07, | |
| "loss": -0.0006723229307681322, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.3935137987136841, | |
| "reward_std": 0.0229948153719306, | |
| "rewards/MCQ_Reward/mean": 0.3935137987136841, | |
| "rewards/MCQ_Reward/std": 0.09170003235340118, | |
| "step": 397, | |
| "train_speed(iter/s)": 0.125959 | |
| }, | |
| { | |
| "clip_ratio": 0.009115117136389017, | |
| "epoch": 7.96, | |
| "grad_norm": 2.6576101779937744, | |
| "kl": 0.623046875, | |
| "learning_rate": 1.0116018621892236e-07, | |
| "loss": -0.0008128315676003695, | |
| "memory(GiB)": 25.14, | |
| "step": 398, | |
| "train_speed(iter/s)": 0.126231 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 195.5, | |
| "completions/mean_length": 125.65625, | |
| "completions/min_length": 67.0, | |
| "epoch": 7.98, | |
| "grad_norm": 2.7158310413360596, | |
| "kl": 0.58203125, | |
| "learning_rate": 9.92544577406923e-08, | |
| "loss": 0.006697420962154865, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.43207649886608124, | |
| "reward_std": 0.02400553785264492, | |
| "rewards/MCQ_Reward/mean": 0.43207649886608124, | |
| "rewards/MCQ_Reward/std": 0.0867740847170353, | |
| "step": 399, | |
| "train_speed(iter/s)": 0.126178 | |
| }, | |
| { | |
| "clip_ratio": 0.005927033722400665, | |
| "epoch": 8.0, | |
| "grad_norm": 2.416578769683838, | |
| "kl": 0.580078125, | |
| "learning_rate": 9.736487123447068e-08, | |
| "loss": 0.006666385568678379, | |
| "memory(GiB)": 25.14, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.126428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 210.0, | |
| "completions/mean_length": 128.03515625, | |
| "completions/min_length": 68.0, | |
| "epoch": 8.02, | |
| "grad_norm": 2.4625000953674316, | |
| "kl": 0.55078125, | |
| "learning_rate": 9.549150281252632e-08, | |
| "loss": 0.019197747111320496, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.41131871938705444, | |
| "reward_std": 0.02179474849253893, | |
| "rewards/MCQ_Reward/mean": 0.41131871938705444, | |
| "rewards/MCQ_Reward/std": 0.0903569795191288, | |
| "step": 401, | |
| "train_speed(iter/s)": 0.12607 | |
| }, | |
| { | |
| "clip_ratio": 0.004682507831603289, | |
| "epoch": 8.04, | |
| "grad_norm": 2.4578921794891357, | |
| "kl": 0.556640625, | |
| "learning_rate": 9.363442793386606e-08, | |
| "loss": 0.019492177292704582, | |
| "memory(GiB)": 25.14, | |
| "step": 402, | |
| "train_speed(iter/s)": 0.126333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 179.5, | |
| "completions/mean_length": 124.9453125, | |
| "completions/min_length": 65.0, | |
| "epoch": 8.06, | |
| "grad_norm": 2.380934000015259, | |
| "kl": 0.595703125, | |
| "learning_rate": 9.179372140119524e-08, | |
| "loss": 0.00032033398747444153, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.45213624835014343, | |
| "reward_std": 0.019670803099870682, | |
| "rewards/MCQ_Reward/mean": 0.45213624835014343, | |
| "rewards/MCQ_Reward/std": 0.05602107755839825, | |
| "step": 403, | |
| "train_speed(iter/s)": 0.126289 | |
| }, | |
| { | |
| "clip_ratio": 0.005494384560734034, | |
| "epoch": 8.08, | |
| "grad_norm": 2.2825376987457275, | |
| "kl": 0.59765625, | |
| "learning_rate": 8.996945735790446e-08, | |
| "loss": 0.00025699660181999207, | |
| "memory(GiB)": 25.14, | |
| "step": 404, | |
| "train_speed(iter/s)": 0.126553 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 193.0, | |
| "completions/mean_length": 113.30078125, | |
| "completions/min_length": 66.0, | |
| "epoch": 8.1, | |
| "grad_norm": 2.4504525661468506, | |
| "kl": 0.65234375, | |
| "learning_rate": 8.816170928508365e-08, | |
| "loss": 0.005521825514733791, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4200716018676758, | |
| "reward_std": 0.02163711003959179, | |
| "rewards/MCQ_Reward/mean": 0.4200716018676758, | |
| "rewards/MCQ_Reward/std": 0.09177059680223465, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.126487 | |
| }, | |
| { | |
| "clip_ratio": 0.005122944712638855, | |
| "epoch": 8.12, | |
| "grad_norm": 2.5025854110717773, | |
| "kl": 0.65234375, | |
| "learning_rate": 8.637054999856147e-08, | |
| "loss": 0.005893816705793142, | |
| "memory(GiB)": 25.14, | |
| "step": 406, | |
| "train_speed(iter/s)": 0.126707 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 206.0, | |
| "completions/mean_length": 131.84765625, | |
| "completions/min_length": 84.0, | |
| "epoch": 8.14, | |
| "grad_norm": 2.2803900241851807, | |
| "kl": 0.677734375, | |
| "learning_rate": 8.459605164597267e-08, | |
| "loss": 0.002506987191736698, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42351874709129333, | |
| "reward_std": 0.019920101389288902, | |
| "rewards/MCQ_Reward/mean": 0.42351874709129333, | |
| "rewards/MCQ_Reward/std": 0.07087348401546478, | |
| "step": 407, | |
| "train_speed(iter/s)": 0.126629 | |
| }, | |
| { | |
| "clip_ratio": 0.004146608873270452, | |
| "epoch": 8.16, | |
| "grad_norm": 2.197411060333252, | |
| "kl": 0.693359375, | |
| "learning_rate": 8.283828570385237e-08, | |
| "loss": 0.0028184172697365284, | |
| "memory(GiB)": 25.14, | |
| "step": 408, | |
| "train_speed(iter/s)": 0.126894 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 246.5, | |
| "completions/mean_length": 126.35546875, | |
| "completions/min_length": 55.0, | |
| "epoch": 8.18, | |
| "grad_norm": 3.133226156234741, | |
| "kl": 0.54296875, | |
| "learning_rate": 8.109732297475635e-08, | |
| "loss": 0.003347148187458515, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4289032816886902, | |
| "reward_std": 0.023678142577409744, | |
| "rewards/MCQ_Reward/mean": 0.4289032816886902, | |
| "rewards/MCQ_Reward/std": 0.08180082961916924, | |
| "step": 409, | |
| "train_speed(iter/s)": 0.126716 | |
| }, | |
| { | |
| "clip_ratio": 0.004793429281562567, | |
| "epoch": 8.2, | |
| "grad_norm": 2.647909164428711, | |
| "kl": 0.548828125, | |
| "learning_rate": 7.937323358440934e-08, | |
| "loss": 0.003219081088900566, | |
| "memory(GiB)": 25.14, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.126979 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 200.0, | |
| "completions/mean_length": 120.65234375, | |
| "completions/min_length": 66.0, | |
| "epoch": 8.22, | |
| "grad_norm": 2.844910144805908, | |
| "kl": 1.08984375, | |
| "learning_rate": 7.766608697888094e-08, | |
| "loss": 0.00578346848487854, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.40613003075122833, | |
| "reward_std": 0.024234792217612267, | |
| "rewards/MCQ_Reward/mean": 0.40613003075122833, | |
| "rewards/MCQ_Reward/std": 0.10613492503762245, | |
| "step": 411, | |
| "train_speed(iter/s)": 0.126628 | |
| }, | |
| { | |
| "clip_ratio": 0.008466396480798721, | |
| "epoch": 8.24, | |
| "grad_norm": 3.322730779647827, | |
| "kl": 1.30859375, | |
| "learning_rate": 7.597595192178702e-08, | |
| "loss": 0.006200029980391264, | |
| "memory(GiB)": 25.14, | |
| "step": 412, | |
| "train_speed(iter/s)": 0.126892 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 181.0, | |
| "completions/mean_length": 120.59375, | |
| "completions/min_length": 63.5, | |
| "epoch": 8.26, | |
| "grad_norm": 3.1121227741241455, | |
| "kl": 0.57421875, | |
| "learning_rate": 7.430289649152155e-08, | |
| "loss": -0.005076010245829821, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4349597841501236, | |
| "reward_std": 0.022311867214739323, | |
| "rewards/MCQ_Reward/mean": 0.4349597841501236, | |
| "rewards/MCQ_Reward/std": 0.0992676205933094, | |
| "step": 413, | |
| "train_speed(iter/s)": 0.126827 | |
| }, | |
| { | |
| "clip_ratio": 0.005325015634298325, | |
| "epoch": 8.28, | |
| "grad_norm": 3.336932897567749, | |
| "kl": 0.5859375, | |
| "learning_rate": 7.264698807851327e-08, | |
| "loss": -0.004951636306941509, | |
| "memory(GiB)": 25.14, | |
| "step": 414, | |
| "train_speed(iter/s)": 0.127083 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.0, | |
| "completions/mean_length": 122.34765625, | |
| "completions/min_length": 80.0, | |
| "epoch": 8.3, | |
| "grad_norm": 2.32357120513916, | |
| "kl": 0.576171875, | |
| "learning_rate": 7.100829338251146e-08, | |
| "loss": 0.010018033906817436, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.46219733357429504, | |
| "reward_std": 0.023064136505126953, | |
| "rewards/MCQ_Reward/mean": 0.46219733357429504, | |
| "rewards/MCQ_Reward/std": 0.10461203381419182, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.127059 | |
| }, | |
| { | |
| "clip_ratio": 0.004823329858481884, | |
| "epoch": 8.32, | |
| "grad_norm": 2.399235486984253, | |
| "kl": 0.56640625, | |
| "learning_rate": 6.938687840989971e-08, | |
| "loss": 0.010338631458580494, | |
| "memory(GiB)": 25.14, | |
| "step": 416, | |
| "train_speed(iter/s)": 0.127319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 205.0, | |
| "completions/mean_length": 126.6484375, | |
| "completions/min_length": 59.5, | |
| "epoch": 8.34, | |
| "grad_norm": 2.3096046447753906, | |
| "kl": 0.59765625, | |
| "learning_rate": 6.778280847103667e-08, | |
| "loss": 0.007643429096788168, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.45115791261196136, | |
| "reward_std": 0.026236201636493206, | |
| "rewards/MCQ_Reward/mean": 0.45115791261196136, | |
| "rewards/MCQ_Reward/std": 0.07101332768797874, | |
| "step": 417, | |
| "train_speed(iter/s)": 0.127229 | |
| }, | |
| { | |
| "clip_ratio": 0.00613890727981925, | |
| "epoch": 8.36, | |
| "grad_norm": 2.6392662525177, | |
| "kl": 0.599609375, | |
| "learning_rate": 6.619614817762536e-08, | |
| "loss": 0.00813712365925312, | |
| "memory(GiB)": 25.14, | |
| "step": 418, | |
| "train_speed(iter/s)": 0.127474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 203.5, | |
| "completions/mean_length": 128.6484375, | |
| "completions/min_length": 70.5, | |
| "epoch": 8.38, | |
| "grad_norm": 2.6424126625061035, | |
| "kl": 0.5546875, | |
| "learning_rate": 6.462696144011148e-08, | |
| "loss": 0.01095396839082241, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.43093007802963257, | |
| "reward_std": 0.021352089941501617, | |
| "rewards/MCQ_Reward/mean": 0.43093007802963257, | |
| "rewards/MCQ_Reward/std": 0.09322765283286572, | |
| "step": 419, | |
| "train_speed(iter/s)": 0.127401 | |
| }, | |
| { | |
| "clip_ratio": 0.005334047833457589, | |
| "epoch": 8.4, | |
| "grad_norm": 2.514528751373291, | |
| "kl": 0.560546875, | |
| "learning_rate": 6.307531146510753e-08, | |
| "loss": 0.011139345355331898, | |
| "memory(GiB)": 25.14, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.127655 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 198.0, | |
| "completions/mean_length": 121.234375, | |
| "completions/min_length": 61.5, | |
| "epoch": 8.42, | |
| "grad_norm": 2.6931869983673096, | |
| "kl": 0.576171875, | |
| "learning_rate": 6.154126075284855e-08, | |
| "loss": -0.004434285219758749, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.47386451065540314, | |
| "reward_std": 0.02479046955704689, | |
| "rewards/MCQ_Reward/mean": 0.47386451065540314, | |
| "rewards/MCQ_Reward/std": 0.08362133055925369, | |
| "step": 421, | |
| "train_speed(iter/s)": 0.127304 | |
| }, | |
| { | |
| "clip_ratio": 0.004985473584383726, | |
| "epoch": 8.44, | |
| "grad_norm": 2.623483896255493, | |
| "kl": 0.5859375, | |
| "learning_rate": 6.002487109467347e-08, | |
| "loss": -0.004044556524604559, | |
| "memory(GiB)": 25.14, | |
| "step": 422, | |
| "train_speed(iter/s)": 0.12756 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 167.0, | |
| "completions/mean_length": 120.3359375, | |
| "completions/min_length": 57.0, | |
| "epoch": 8.46, | |
| "grad_norm": 2.4557580947875977, | |
| "kl": 0.54296875, | |
| "learning_rate": 5.8526203570536504e-08, | |
| "loss": -0.0014804373495280743, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.38437609374523163, | |
| "reward_std": 0.019576413556933403, | |
| "rewards/MCQ_Reward/mean": 0.38437609374523163, | |
| "rewards/MCQ_Reward/std": 0.08220572769641876, | |
| "step": 423, | |
| "train_speed(iter/s)": 0.12751 | |
| }, | |
| { | |
| "clip_ratio": 0.005047354847192764, | |
| "epoch": 8.48, | |
| "grad_norm": 2.414680004119873, | |
| "kl": 0.548828125, | |
| "learning_rate": 5.70453185465472e-08, | |
| "loss": -0.0010703507578000426, | |
| "memory(GiB)": 25.14, | |
| "step": 424, | |
| "train_speed(iter/s)": 0.127763 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 171.0, | |
| "completions/mean_length": 109.29296875, | |
| "completions/min_length": 59.0, | |
| "epoch": 8.5, | |
| "grad_norm": 2.3690483570098877, | |
| "kl": 0.59375, | |
| "learning_rate": 5.5582275672538316e-08, | |
| "loss": 0.0056993430480360985, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.404767170548439, | |
| "reward_std": 0.024388392455875874, | |
| "rewards/MCQ_Reward/mean": 0.404767170548439, | |
| "rewards/MCQ_Reward/std": 0.09245007485151291, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.127734 | |
| }, | |
| { | |
| "clip_ratio": 0.004816505592316389, | |
| "epoch": 8.52, | |
| "grad_norm": 2.3456268310546875, | |
| "kl": 0.59765625, | |
| "learning_rate": 5.4137133879663287e-08, | |
| "loss": 0.005467045586556196, | |
| "memory(GiB)": 25.14, | |
| "step": 426, | |
| "train_speed(iter/s)": 0.127977 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 259.5, | |
| "completions/mean_length": 131.4375, | |
| "completions/min_length": 65.5, | |
| "epoch": 8.54, | |
| "grad_norm": 2.3816792964935303, | |
| "kl": 0.55078125, | |
| "learning_rate": 5.270995137802314e-08, | |
| "loss": 0.0031818237621337175, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.38306334614753723, | |
| "reward_std": 0.02167375199496746, | |
| "rewards/MCQ_Reward/mean": 0.38306334614753723, | |
| "rewards/MCQ_Reward/std": 0.12913303077220917, | |
| "step": 427, | |
| "train_speed(iter/s)": 0.12777 | |
| }, | |
| { | |
| "clip_ratio": 0.005708938697353005, | |
| "epoch": 8.56, | |
| "grad_norm": 2.7459070682525635, | |
| "kl": 0.560546875, | |
| "learning_rate": 5.1300785654320886e-08, | |
| "loss": 0.0036508457269519567, | |
| "memory(GiB)": 25.14, | |
| "step": 428, | |
| "train_speed(iter/s)": 0.128012 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 216.5, | |
| "completions/mean_length": 141.1796875, | |
| "completions/min_length": 63.5, | |
| "epoch": 8.58, | |
| "grad_norm": 2.546011447906494, | |
| "kl": 0.560546875, | |
| "learning_rate": 4.9909693469546097e-08, | |
| "loss": -0.0037225554697215557, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4553868919610977, | |
| "reward_std": 0.024206943809986115, | |
| "rewards/MCQ_Reward/mean": 0.4553868919610977, | |
| "rewards/MCQ_Reward/std": 0.10913475230336189, | |
| "step": 429, | |
| "train_speed(iter/s)": 0.127896 | |
| }, | |
| { | |
| "clip_ratio": 0.005615573842078447, | |
| "epoch": 8.6, | |
| "grad_norm": 2.4503653049468994, | |
| "kl": 0.552734375, | |
| "learning_rate": 4.853673085668947e-08, | |
| "loss": -0.0035459164064377546, | |
| "memory(GiB)": 25.14, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.128133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 170.5, | |
| "completions/mean_length": 121.25, | |
| "completions/min_length": 68.0, | |
| "epoch": 8.62, | |
| "grad_norm": 2.6130316257476807, | |
| "kl": 0.560546875, | |
| "learning_rate": 4.718195311848455e-08, | |
| "loss": 0.006583400070667267, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4170517176389694, | |
| "reward_std": 0.022290964610874653, | |
| "rewards/MCQ_Reward/mean": 0.4170517176389694, | |
| "rewards/MCQ_Reward/std": 0.10183962434530258, | |
| "step": 431, | |
| "train_speed(iter/s)": 0.12785 | |
| }, | |
| { | |
| "clip_ratio": 0.0055829116608947515, | |
| "epoch": 8.64, | |
| "grad_norm": 2.6913576126098633, | |
| "kl": 0.572265625, | |
| "learning_rate": 4.5845414825181394e-08, | |
| "loss": 0.006918736733496189, | |
| "memory(GiB)": 25.14, | |
| "step": 432, | |
| "train_speed(iter/s)": 0.128096 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 185.5, | |
| "completions/mean_length": 113.8046875, | |
| "completions/min_length": 74.0, | |
| "epoch": 8.66, | |
| "grad_norm": 2.4241960048675537, | |
| "kl": 0.6201171875, | |
| "learning_rate": 4.452716981234744e-08, | |
| "loss": 0.011290742084383965, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4250094145536423, | |
| "reward_std": 0.022951221093535423, | |
| "rewards/MCQ_Reward/mean": 0.4250094145536423, | |
| "rewards/MCQ_Reward/std": 0.10084276273846626, | |
| "step": 433, | |
| "train_speed(iter/s)": 0.128069 | |
| }, | |
| { | |
| "clip_ratio": 0.005609560292214155, | |
| "epoch": 8.68, | |
| "grad_norm": 2.5790963172912598, | |
| "kl": 0.650390625, | |
| "learning_rate": 4.322727117869951e-08, | |
| "loss": 0.011948860250413418, | |
| "memory(GiB)": 25.14, | |
| "step": 434, | |
| "train_speed(iter/s)": 0.128291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 196.0, | |
| "completions/mean_length": 126.3515625, | |
| "completions/min_length": 83.5, | |
| "epoch": 8.7, | |
| "grad_norm": 2.430708885192871, | |
| "kl": 0.5390625, | |
| "learning_rate": 4.19457712839652e-08, | |
| "loss": -0.008761925622820854, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.43507225811481476, | |
| "reward_std": 0.024821095168590546, | |
| "rewards/MCQ_Reward/mean": 0.43507225811481476, | |
| "rewards/MCQ_Reward/std": 0.10436990112066269, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.128196 | |
| }, | |
| { | |
| "clip_ratio": 0.004881069879047573, | |
| "epoch": 8.72, | |
| "grad_norm": 2.439311981201172, | |
| "kl": 0.5400390625, | |
| "learning_rate": 4.068272174677334e-08, | |
| "loss": -0.00834021344780922, | |
| "memory(GiB)": 25.14, | |
| "step": 436, | |
| "train_speed(iter/s)": 0.128446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 169.0, | |
| "completions/mean_length": 118.14453125, | |
| "completions/min_length": 67.5, | |
| "epoch": 8.74, | |
| "grad_norm": 2.607220411300659, | |
| "kl": 0.619140625, | |
| "learning_rate": 3.9438173442575e-08, | |
| "loss": 0.005073768552392721, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4522544592618942, | |
| "reward_std": 0.024327417835593224, | |
| "rewards/MCQ_Reward/mean": 0.4522544592618942, | |
| "rewards/MCQ_Reward/std": 0.08557374030351639, | |
| "step": 437, | |
| "train_speed(iter/s)": 0.128414 | |
| }, | |
| { | |
| "clip_ratio": 0.005367731209844351, | |
| "epoch": 8.76, | |
| "grad_norm": 2.472538709640503, | |
| "kl": 0.626953125, | |
| "learning_rate": 3.821217650159453e-08, | |
| "loss": 0.005441693589091301, | |
| "memory(GiB)": 25.14, | |
| "step": 438, | |
| "train_speed(iter/s)": 0.128664 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 177.0, | |
| "completions/mean_length": 117.36328125, | |
| "completions/min_length": 65.5, | |
| "epoch": 8.78, | |
| "grad_norm": 2.8752048015594482, | |
| "kl": 0.62109375, | |
| "learning_rate": 3.700478030680987e-08, | |
| "loss": 0.001543362159281969, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.44734521210193634, | |
| "reward_std": 0.02054190542548895, | |
| "rewards/MCQ_Reward/mean": 0.44734521210193634, | |
| "rewards/MCQ_Reward/std": 0.09018547832965851, | |
| "step": 439, | |
| "train_speed(iter/s)": 0.128624 | |
| }, | |
| { | |
| "clip_ratio": 0.006753503577783704, | |
| "epoch": 8.8, | |
| "grad_norm": 2.822502374649048, | |
| "kl": 0.625, | |
| "learning_rate": 3.581603349196371e-08, | |
| "loss": 0.0017494899220764637, | |
| "memory(GiB)": 25.14, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.128861 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 217.5, | |
| "completions/mean_length": 117.40234375, | |
| "completions/min_length": 62.0, | |
| "epoch": 8.82, | |
| "grad_norm": 2.5104751586914062, | |
| "kl": 0.59375, | |
| "learning_rate": 3.464598393960449e-08, | |
| "loss": -0.004553473554551601, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.39943838119506836, | |
| "reward_std": 0.023083772510290146, | |
| "rewards/MCQ_Reward/mean": 0.39943838119506836, | |
| "rewards/MCQ_Reward/std": 0.08860309049487114, | |
| "step": 441, | |
| "train_speed(iter/s)": 0.128489 | |
| }, | |
| { | |
| "clip_ratio": 0.00470179901458323, | |
| "epoch": 8.84, | |
| "grad_norm": 2.480741500854492, | |
| "kl": 0.58984375, | |
| "learning_rate": 3.349467877915746e-08, | |
| "loss": -0.004542327020317316, | |
| "memory(GiB)": 25.14, | |
| "step": 442, | |
| "train_speed(iter/s)": 0.128733 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 254.0, | |
| "completions/mean_length": 127.69140625, | |
| "completions/min_length": 50.0, | |
| "epoch": 8.86, | |
| "grad_norm": 2.399143934249878, | |
| "kl": 0.607421875, | |
| "learning_rate": 3.23621643850267e-08, | |
| "loss": -0.004238632973283529, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.40998475253582, | |
| "reward_std": 0.02201936673372984, | |
| "rewards/MCQ_Reward/mean": 0.40998475253582, | |
| "rewards/MCQ_Reward/std": 0.0800128486007452, | |
| "step": 443, | |
| "train_speed(iter/s)": 0.128561 | |
| }, | |
| { | |
| "clip_ratio": 0.006211797473952174, | |
| "epoch": 8.88, | |
| "grad_norm": 2.5745253562927246, | |
| "kl": 0.603515625, | |
| "learning_rate": 3.124848637472688e-08, | |
| "loss": -0.003581822384148836, | |
| "memory(GiB)": 25.14, | |
| "step": 444, | |
| "train_speed(iter/s)": 0.128809 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 187.5, | |
| "completions/mean_length": 128.41015625, | |
| "completions/min_length": 71.0, | |
| "epoch": 8.9, | |
| "grad_norm": 2.989118814468384, | |
| "kl": 0.6640625, | |
| "learning_rate": 3.015368960704584e-08, | |
| "loss": 0.0020642182789742947, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.45626600086688995, | |
| "reward_std": 0.022524941712617874, | |
| "rewards/MCQ_Reward/mean": 0.45626600086688995, | |
| "rewards/MCQ_Reward/std": 0.08293722942471504, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.128751 | |
| }, | |
| { | |
| "clip_ratio": 0.0053639879915863276, | |
| "epoch": 8.92, | |
| "grad_norm": 2.226865291595459, | |
| "kl": 0.65234375, | |
| "learning_rate": 2.907781818023769e-08, | |
| "loss": 0.0022344959434121847, | |
| "memory(GiB)": 25.14, | |
| "step": 446, | |
| "train_speed(iter/s)": 0.128997 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 198.5, | |
| "completions/mean_length": 114.81640625, | |
| "completions/min_length": 69.5, | |
| "epoch": 8.94, | |
| "grad_norm": 2.5736968517303467, | |
| "kl": 0.626953125, | |
| "learning_rate": 2.8020915430246706e-08, | |
| "loss": 0.00543589424341917, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4480299800634384, | |
| "reward_std": 0.021618574857711792, | |
| "rewards/MCQ_Reward/mean": 0.4480299800634384, | |
| "rewards/MCQ_Reward/std": 0.08090543001890182, | |
| "step": 447, | |
| "train_speed(iter/s)": 0.128968 | |
| }, | |
| { | |
| "clip_ratio": 0.005519783589988947, | |
| "epoch": 8.96, | |
| "grad_norm": 2.7313241958618164, | |
| "kl": 0.62890625, | |
| "learning_rate": 2.69830239289614e-08, | |
| "loss": 0.005457316525280476, | |
| "memory(GiB)": 25.14, | |
| "step": 448, | |
| "train_speed(iter/s)": 0.12921 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 163.0, | |
| "completions/mean_length": 114.08203125, | |
| "completions/min_length": 69.5, | |
| "epoch": 8.98, | |
| "grad_norm": 3.3176426887512207, | |
| "kl": 0.658203125, | |
| "learning_rate": 2.596418548250029e-08, | |
| "loss": -0.006901263725012541, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4552987068891525, | |
| "reward_std": 0.02576339803636074, | |
| "rewards/MCQ_Reward/mean": 0.4552987068891525, | |
| "rewards/MCQ_Reward/std": 0.09829828701913357, | |
| "step": 449, | |
| "train_speed(iter/s)": 0.129186 | |
| }, | |
| { | |
| "clip_ratio": 0.005895850248634815, | |
| "epoch": 9.0, | |
| "grad_norm": 3.1435494422912598, | |
| "kl": 0.65625, | |
| "learning_rate": 2.4964441129527335e-08, | |
| "loss": -0.006242312025278807, | |
| "memory(GiB)": 25.14, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.129418 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 173.5, | |
| "completions/mean_length": 107.38671875, | |
| "completions/min_length": 61.0, | |
| "epoch": 9.02, | |
| "grad_norm": 2.6646904945373535, | |
| "kl": 0.60546875, | |
| "learning_rate": 2.3983831139599286e-08, | |
| "loss": 0.006207154132425785, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.39446285367012024, | |
| "reward_std": 0.022946057841181755, | |
| "rewards/MCQ_Reward/mean": 0.39446285367012024, | |
| "rewards/MCQ_Reward/std": 0.1063094437122345, | |
| "step": 451, | |
| "train_speed(iter/s)": 0.129116 | |
| }, | |
| { | |
| "clip_ratio": 0.005521278129890561, | |
| "epoch": 9.04, | |
| "grad_norm": 2.453953504562378, | |
| "kl": 0.619140625, | |
| "learning_rate": 2.3022395011543682e-08, | |
| "loss": 0.006389847490936518, | |
| "memory(GiB)": 25.14, | |
| "step": 452, | |
| "train_speed(iter/s)": 0.129358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 210.5, | |
| "completions/mean_length": 128.57421875, | |
| "completions/min_length": 55.0, | |
| "epoch": 9.06, | |
| "grad_norm": 2.812540054321289, | |
| "kl": 0.580078125, | |
| "learning_rate": 2.208017147186736e-08, | |
| "loss": -0.005320190917700529, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.41816772520542145, | |
| "reward_std": 0.023720718920230865, | |
| "rewards/MCQ_Reward/mean": 0.41816772520542145, | |
| "rewards/MCQ_Reward/std": 0.11730682849884033, | |
| "step": 453, | |
| "train_speed(iter/s)": 0.129235 | |
| }, | |
| { | |
| "clip_ratio": 0.005719892680644989, | |
| "epoch": 9.08, | |
| "grad_norm": 2.8398780822753906, | |
| "kl": 0.578125, | |
| "learning_rate": 2.1157198473197413e-08, | |
| "loss": -0.004547153599560261, | |
| "memory(GiB)": 25.14, | |
| "step": 454, | |
| "train_speed(iter/s)": 0.129473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 209.5, | |
| "completions/mean_length": 121.10546875, | |
| "completions/min_length": 61.0, | |
| "epoch": 9.1, | |
| "grad_norm": 2.6457087993621826, | |
| "kl": 0.623046875, | |
| "learning_rate": 2.025351319275137e-08, | |
| "loss": 0.006458953022956848, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4360807240009308, | |
| "reward_std": 0.023424276150763035, | |
| "rewards/MCQ_Reward/mean": 0.4360807240009308, | |
| "rewards/MCQ_Reward/std": 0.08403830602765083, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.129418 | |
| }, | |
| { | |
| "clip_ratio": 0.007413617800921202, | |
| "epoch": 9.12, | |
| "grad_norm": 3.019871473312378, | |
| "kl": 0.615234375, | |
| "learning_rate": 1.936915203084055e-08, | |
| "loss": 0.007484931964427233, | |
| "memory(GiB)": 25.14, | |
| "step": 456, | |
| "train_speed(iter/s)": 0.129657 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 180.5, | |
| "completions/mean_length": 115.48828125, | |
| "completions/min_length": 62.0, | |
| "epoch": 9.14, | |
| "grad_norm": 2.869127035140991, | |
| "kl": 0.5703125, | |
| "learning_rate": 1.8504150609403856e-08, | |
| "loss": 0.002277131425216794, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42605504393577576, | |
| "reward_std": 0.02147796005010605, | |
| "rewards/MCQ_Reward/mean": 0.42605504393577576, | |
| "rewards/MCQ_Reward/std": 0.09400845319032669, | |
| "step": 457, | |
| "train_speed(iter/s)": 0.129623 | |
| }, | |
| { | |
| "clip_ratio": 0.00495463190600276, | |
| "epoch": 9.16, | |
| "grad_norm": 2.7837038040161133, | |
| "kl": 0.564453125, | |
| "learning_rate": 1.7658543770572186e-08, | |
| "loss": 0.0023261206224560738, | |
| "memory(GiB)": 25.14, | |
| "step": 458, | |
| "train_speed(iter/s)": 0.129859 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 226.5, | |
| "completions/mean_length": 131.125, | |
| "completions/min_length": 63.0, | |
| "epoch": 9.18, | |
| "grad_norm": 2.4485437870025635, | |
| "kl": 0.564453125, | |
| "learning_rate": 1.683236557526574e-08, | |
| "loss": -0.001264197751879692, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.43159276247024536, | |
| "reward_std": 0.02392040565609932, | |
| "rewards/MCQ_Reward/mean": 0.43159276247024536, | |
| "rewards/MCQ_Reward/std": 0.10159046202898026, | |
| "step": 459, | |
| "train_speed(iter/s)": 0.129693 | |
| }, | |
| { | |
| "clip_ratio": 0.004053628304973245, | |
| "epoch": 9.2, | |
| "grad_norm": 2.3056235313415527, | |
| "kl": 0.5625, | |
| "learning_rate": 1.6025649301821875e-08, | |
| "loss": -0.000987461768090725, | |
| "memory(GiB)": 25.14, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.129933 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 181.5, | |
| "completions/mean_length": 113.18359375, | |
| "completions/min_length": 65.5, | |
| "epoch": 9.22, | |
| "grad_norm": 2.3913767337799072, | |
| "kl": 0.544921875, | |
| "learning_rate": 1.5238427444654367e-08, | |
| "loss": 0.012515128590166569, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4141518771648407, | |
| "reward_std": 0.019386641681194305, | |
| "rewards/MCQ_Reward/mean": 0.4141518771648407, | |
| "rewards/MCQ_Reward/std": 0.09657716751098633, | |
| "step": 461, | |
| "train_speed(iter/s)": 0.129665 | |
| }, | |
| { | |
| "clip_ratio": 0.005686681717634201, | |
| "epoch": 9.24, | |
| "grad_norm": 2.5303232669830322, | |
| "kl": 0.544921875, | |
| "learning_rate": 1.4470731712944883e-08, | |
| "loss": 0.013128566555678844, | |
| "memory(GiB)": 25.14, | |
| "step": 462, | |
| "train_speed(iter/s)": 0.129891 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 173.0, | |
| "completions/mean_length": 113.08984375, | |
| "completions/min_length": 68.0, | |
| "epoch": 9.26, | |
| "grad_norm": 2.9452006816864014, | |
| "kl": 0.578125, | |
| "learning_rate": 1.3722593029365459e-08, | |
| "loss": 0.01786494255065918, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4347621351480484, | |
| "reward_std": 0.023103663697838783, | |
| "rewards/MCQ_Reward/mean": 0.4347621351480484, | |
| "rewards/MCQ_Reward/std": 0.10107803344726562, | |
| "step": 463, | |
| "train_speed(iter/s)": 0.129821 | |
| }, | |
| { | |
| "clip_ratio": 0.004837532993406057, | |
| "epoch": 9.28, | |
| "grad_norm": 3.270838499069214, | |
| "kl": 0.576171875, | |
| "learning_rate": 1.2994041528833267e-08, | |
| "loss": 0.01855536922812462, | |
| "memory(GiB)": 25.14, | |
| "step": 464, | |
| "train_speed(iter/s)": 0.130055 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 197.0, | |
| "completions/mean_length": 130.0703125, | |
| "completions/min_length": 61.0, | |
| "epoch": 9.3, | |
| "grad_norm": 2.5287396907806396, | |
| "kl": 0.5703125, | |
| "learning_rate": 1.2285106557296476e-08, | |
| "loss": -0.009716257452964783, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4242394268512726, | |
| "reward_std": 0.024817454628646374, | |
| "rewards/MCQ_Reward/mean": 0.4242394268512726, | |
| "rewards/MCQ_Reward/std": 0.11753027141094208, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.129996 | |
| }, | |
| { | |
| "clip_ratio": 0.0049513031262904406, | |
| "epoch": 9.32, | |
| "grad_norm": 2.6941351890563965, | |
| "kl": 0.56640625, | |
| "learning_rate": 1.1595816670552428e-08, | |
| "loss": -0.009578550234436989, | |
| "memory(GiB)": 25.14, | |
| "step": 466, | |
| "train_speed(iter/s)": 0.130232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 176.0, | |
| "completions/mean_length": 120.80859375, | |
| "completions/min_length": 79.0, | |
| "epoch": 9.34, | |
| "grad_norm": 2.4061837196350098, | |
| "kl": 0.580078125, | |
| "learning_rate": 1.0926199633097154e-08, | |
| "loss": 0.009803004562854767, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4236748516559601, | |
| "reward_std": 0.020633171312510967, | |
| "rewards/MCQ_Reward/mean": 0.4236748516559601, | |
| "rewards/MCQ_Reward/std": 0.10525783523917198, | |
| "step": 467, | |
| "train_speed(iter/s)": 0.130202 | |
| }, | |
| { | |
| "clip_ratio": 0.0038570521865040064, | |
| "epoch": 9.36, | |
| "grad_norm": 2.538754463195801, | |
| "kl": 0.576171875, | |
| "learning_rate": 1.0276282417007399e-08, | |
| "loss": 0.010506462305784225, | |
| "memory(GiB)": 25.14, | |
| "step": 468, | |
| "train_speed(iter/s)": 0.130419 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 163.0, | |
| "completions/mean_length": 115.359375, | |
| "completions/min_length": 72.5, | |
| "epoch": 9.38, | |
| "grad_norm": 2.767404317855835, | |
| "kl": 0.58203125, | |
| "learning_rate": 9.646091200853801e-09, | |
| "loss": 0.002447181846946478, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4558543264865875, | |
| "reward_std": 0.023351009003818035, | |
| "rewards/MCQ_Reward/mean": 0.4558543264865875, | |
| "rewards/MCQ_Reward/std": 0.10045822337269783, | |
| "step": 469, | |
| "train_speed(iter/s)": 0.130376 | |
| }, | |
| { | |
| "clip_ratio": 0.003978088265284896, | |
| "epoch": 9.4, | |
| "grad_norm": 2.3947746753692627, | |
| "kl": 0.58984375, | |
| "learning_rate": 9.035651368646646e-09, | |
| "loss": 0.0025905624497681856, | |
| "memory(GiB)": 25.14, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.130609 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 199.0, | |
| "completions/mean_length": 120.05078125, | |
| "completions/min_length": 61.0, | |
| "epoch": 9.42, | |
| "grad_norm": 2.2213082313537598, | |
| "kl": 0.595703125, | |
| "learning_rate": 8.44498750881345e-09, | |
| "loss": 0.022836437448859215, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4252375066280365, | |
| "reward_std": 0.02044745907187462, | |
| "rewards/MCQ_Reward/mean": 0.4252375066280365, | |
| "rewards/MCQ_Reward/std": 0.0874844454228878, | |
| "step": 471, | |
| "train_speed(iter/s)": 0.130308 | |
| }, | |
| { | |
| "clip_ratio": 0.004947596346028149, | |
| "epoch": 9.44, | |
| "grad_norm": 2.374445676803589, | |
| "kl": 0.599609375, | |
| "learning_rate": 7.874123413208145e-09, | |
| "loss": 0.02313510701060295, | |
| "memory(GiB)": 25.14, | |
| "step": 472, | |
| "train_speed(iter/s)": 0.130541 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 202.5, | |
| "completions/mean_length": 122.046875, | |
| "completions/min_length": 59.0, | |
| "epoch": 9.46, | |
| "grad_norm": 2.6664299964904785, | |
| "kl": 0.626953125, | |
| "learning_rate": 7.323082076153508e-09, | |
| "loss": 0.0047410172410309315, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42370498180389404, | |
| "reward_std": 0.021436103619635105, | |
| "rewards/MCQ_Reward/mean": 0.42370498180389404, | |
| "rewards/MCQ_Reward/std": 0.11163535714149475, | |
| "step": 473, | |
| "train_speed(iter/s)": 0.130462 | |
| }, | |
| { | |
| "clip_ratio": 0.005457588471472263, | |
| "epoch": 9.48, | |
| "grad_norm": 2.7726047039031982, | |
| "kl": 0.626953125, | |
| "learning_rate": 6.791885693514132e-09, | |
| "loss": 0.005159153137356043, | |
| "memory(GiB)": 25.14, | |
| "step": 474, | |
| "train_speed(iter/s)": 0.130692 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 244.5, | |
| "completions/mean_length": 136.453125, | |
| "completions/min_length": 83.0, | |
| "epoch": 9.5, | |
| "grad_norm": 2.2565746307373047, | |
| "kl": 0.595703125, | |
| "learning_rate": 6.280555661802856e-09, | |
| "loss": 0.011247138492763042, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4296618103981018, | |
| "reward_std": 0.021635888144373894, | |
| "rewards/MCQ_Reward/mean": 0.4296618103981018, | |
| "rewards/MCQ_Reward/std": 0.06789225153625011, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.130512 | |
| }, | |
| { | |
| "clip_ratio": 0.005767492577433586, | |
| "epoch": 9.52, | |
| "grad_norm": 2.250284433364868, | |
| "kl": 0.6015625, | |
| "learning_rate": 5.789112577318789e-09, | |
| "loss": 0.011374367401003838, | |
| "memory(GiB)": 25.14, | |
| "step": 476, | |
| "train_speed(iter/s)": 0.130746 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 185.5, | |
| "completions/mean_length": 118.5703125, | |
| "completions/min_length": 73.5, | |
| "epoch": 9.54, | |
| "grad_norm": 2.5178654193878174, | |
| "kl": 0.728515625, | |
| "learning_rate": 5.317576235317756e-09, | |
| "loss": 0.007045174017548561, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.44049952924251556, | |
| "reward_std": 0.02334336470812559, | |
| "rewards/MCQ_Reward/mean": 0.44049952924251556, | |
| "rewards/MCQ_Reward/std": 0.0808117426931858, | |
| "step": 477, | |
| "train_speed(iter/s)": 0.130671 | |
| }, | |
| { | |
| "clip_ratio": 0.004105736967176199, | |
| "epoch": 9.56, | |
| "grad_norm": 2.5065832138061523, | |
| "kl": 0.6953125, | |
| "learning_rate": 4.865965629214819e-09, | |
| "loss": 0.007527303881943226, | |
| "memory(GiB)": 25.14, | |
| "step": 478, | |
| "train_speed(iter/s)": 0.130887 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 191.0, | |
| "completions/mean_length": 117.73046875, | |
| "completions/min_length": 75.0, | |
| "epoch": 9.58, | |
| "grad_norm": 3.128554105758667, | |
| "kl": 0.59765625, | |
| "learning_rate": 4.434298949819448e-09, | |
| "loss": -0.021542608737945557, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4070900082588196, | |
| "reward_std": 0.023668975569307804, | |
| "rewards/MCQ_Reward/mean": 0.4070900082588196, | |
| "rewards/MCQ_Reward/std": 0.08471970073878765, | |
| "step": 479, | |
| "train_speed(iter/s)": 0.130803 | |
| }, | |
| { | |
| "clip_ratio": 0.00539792119525373, | |
| "epoch": 9.6, | |
| "grad_norm": 3.067028045654297, | |
| "kl": 0.59765625, | |
| "learning_rate": 4.022593584602329e-09, | |
| "loss": -0.02082860842347145, | |
| "memory(GiB)": 25.14, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.131034 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 251.0, | |
| "completions/mean_length": 130.140625, | |
| "completions/min_length": 54.0, | |
| "epoch": 9.62, | |
| "grad_norm": 2.8921902179718018, | |
| "kl": 0.59375, | |
| "learning_rate": 3.6308661169957565e-09, | |
| "loss": -0.0016225441358983517, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42697805166244507, | |
| "reward_std": 0.0217811968177557, | |
| "rewards/MCQ_Reward/mean": 0.42697805166244507, | |
| "rewards/MCQ_Reward/std": 0.0660354271531105, | |
| "step": 481, | |
| "train_speed(iter/s)": 0.130674 | |
| }, | |
| { | |
| "clip_ratio": 0.007906233426183462, | |
| "epoch": 9.64, | |
| "grad_norm": 2.9274981021881104, | |
| "kl": 0.595703125, | |
| "learning_rate": 3.2591323257248894e-09, | |
| "loss": -0.0016696015372872353, | |
| "memory(GiB)": 25.14, | |
| "step": 482, | |
| "train_speed(iter/s)": 0.130879 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 222.0, | |
| "completions/mean_length": 135.94921875, | |
| "completions/min_length": 71.0, | |
| "epoch": 9.66, | |
| "grad_norm": 2.4433958530426025, | |
| "kl": 0.5546875, | |
| "learning_rate": 2.9074071841727054e-09, | |
| "loss": 0.019563939422369003, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42691025137901306, | |
| "reward_std": 0.020791654475033283, | |
| "rewards/MCQ_Reward/mean": 0.42691025137901306, | |
| "rewards/MCQ_Reward/std": 0.0828494131565094, | |
| "step": 483, | |
| "train_speed(iter/s)": 0.13078 | |
| }, | |
| { | |
| "clip_ratio": 0.004861004883423448, | |
| "epoch": 9.68, | |
| "grad_norm": 2.2269864082336426, | |
| "kl": 0.55859375, | |
| "learning_rate": 2.5757048597765395e-09, | |
| "loss": 0.019545655697584152, | |
| "memory(GiB)": 25.14, | |
| "step": 484, | |
| "train_speed(iter/s)": 0.131008 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 199.0, | |
| "completions/mean_length": 141.06640625, | |
| "completions/min_length": 89.0, | |
| "epoch": 9.7, | |
| "grad_norm": 2.19620418548584, | |
| "kl": 0.513671875, | |
| "learning_rate": 2.2640387134577053e-09, | |
| "loss": 0.010847845114767551, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.42219071090221405, | |
| "reward_std": 0.022757427766919136, | |
| "rewards/MCQ_Reward/mean": 0.42219071090221405, | |
| "rewards/MCQ_Reward/std": 0.0853536631911993, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.130923 | |
| }, | |
| { | |
| "clip_ratio": 0.006320674438029528, | |
| "epoch": 9.72, | |
| "grad_norm": 2.1190598011016846, | |
| "kl": 0.5048828125, | |
| "learning_rate": 1.9724212990830936e-09, | |
| "loss": 0.010512834414839745, | |
| "memory(GiB)": 25.14, | |
| "step": 486, | |
| "train_speed(iter/s)": 0.13115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 180.5, | |
| "completions/mean_length": 111.921875, | |
| "completions/min_length": 63.5, | |
| "epoch": 9.74, | |
| "grad_norm": 2.5479891300201416, | |
| "kl": 0.59765625, | |
| "learning_rate": 1.7008643629596864e-09, | |
| "loss": -0.008141995407640934, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.41020119190216064, | |
| "reward_std": 0.022871771827340126, | |
| "rewards/MCQ_Reward/mean": 0.41020119190216064, | |
| "rewards/MCQ_Reward/std": 0.10586465150117874, | |
| "step": 487, | |
| "train_speed(iter/s)": 0.131123 | |
| }, | |
| { | |
| "clip_ratio": 0.004743925994262099, | |
| "epoch": 9.76, | |
| "grad_norm": 2.7629165649414062, | |
| "kl": 0.591796875, | |
| "learning_rate": 1.4493788433612708e-09, | |
| "loss": -0.008076684549450874, | |
| "memory(GiB)": 25.14, | |
| "step": 488, | |
| "train_speed(iter/s)": 0.131348 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 177.0, | |
| "completions/mean_length": 116.10546875, | |
| "completions/min_length": 67.0, | |
| "epoch": 9.78, | |
| "grad_norm": 2.770082950592041, | |
| "kl": 0.576171875, | |
| "learning_rate": 1.217974870087901e-09, | |
| "loss": 0.010374639183282852, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.47805055975914, | |
| "reward_std": 0.023321266286075115, | |
| "rewards/MCQ_Reward/mean": 0.47805055975914, | |
| "rewards/MCQ_Reward/std": 0.1008174680173397, | |
| "step": 489, | |
| "train_speed(iter/s)": 0.131298 | |
| }, | |
| { | |
| "clip_ratio": 0.005443725967779756, | |
| "epoch": 9.8, | |
| "grad_norm": 2.5658154487609863, | |
| "kl": 0.583984375, | |
| "learning_rate": 1.0066617640578368e-09, | |
| "loss": 0.010389911010861397, | |
| "memory(GiB)": 25.14, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.131523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 181.5, | |
| "completions/mean_length": 128.69921875, | |
| "completions/min_length": 71.5, | |
| "epoch": 9.82, | |
| "grad_norm": 2.3105576038360596, | |
| "kl": 0.90625, | |
| "learning_rate": 8.154480369321759e-10, | |
| "loss": -0.004896960221230984, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.43206796050071716, | |
| "reward_std": 0.02110449317842722, | |
| "rewards/MCQ_Reward/mean": 0.43206796050071716, | |
| "rewards/MCQ_Reward/std": 0.10026764124631882, | |
| "step": 491, | |
| "train_speed(iter/s)": 0.13119 | |
| }, | |
| { | |
| "clip_ratio": 0.004017886472865939, | |
| "epoch": 9.84, | |
| "grad_norm": 2.2543957233428955, | |
| "kl": 0.892578125, | |
| "learning_rate": 6.443413907720186e-10, | |
| "loss": -0.004858216270804405, | |
| "memory(GiB)": 25.14, | |
| "step": 492, | |
| "train_speed(iter/s)": 0.131415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 221.5, | |
| "completions/mean_length": 131.3203125, | |
| "completions/min_length": 58.0, | |
| "epoch": 9.86, | |
| "grad_norm": 2.459817409515381, | |
| "kl": 0.5390625, | |
| "learning_rate": 4.933487177280482e-10, | |
| "loss": 0.0025399066507816315, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.47691330313682556, | |
| "reward_std": 0.022764784283936024, | |
| "rewards/MCQ_Reward/mean": 0.47691330313682556, | |
| "rewards/MCQ_Reward/std": 0.09778410196304321, | |
| "step": 493, | |
| "train_speed(iter/s)": 0.131346 | |
| }, | |
| { | |
| "clip_ratio": 0.004864038084633648, | |
| "epoch": 9.88, | |
| "grad_norm": 2.518949508666992, | |
| "kl": 0.537109375, | |
| "learning_rate": 3.6247609976319817e-10, | |
| "loss": 0.0027223415672779083, | |
| "memory(GiB)": 25.14, | |
| "step": 494, | |
| "train_speed(iter/s)": 0.131569 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 184.0, | |
| "completions/mean_length": 113.7421875, | |
| "completions/min_length": 57.5, | |
| "epoch": 9.9, | |
| "grad_norm": 2.7932207584381104, | |
| "kl": 0.640625, | |
| "learning_rate": 2.517288084074587e-10, | |
| "loss": -0.008804459124803543, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.45272429287433624, | |
| "reward_std": 0.02382285613566637, | |
| "rewards/MCQ_Reward/mean": 0.45272429287433624, | |
| "rewards/MCQ_Reward/std": 0.08811983093619347, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.13153 | |
| }, | |
| { | |
| "clip_ratio": 0.005316317779943347, | |
| "epoch": 9.92, | |
| "grad_norm": 2.3468141555786133, | |
| "kl": 0.634765625, | |
| "learning_rate": 1.6111130454543597e-10, | |
| "loss": -0.00884802732616663, | |
| "memory(GiB)": 25.14, | |
| "step": 496, | |
| "train_speed(iter/s)": 0.131752 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 175.0, | |
| "completions/mean_length": 111.8515625, | |
| "completions/min_length": 57.5, | |
| "epoch": 9.94, | |
| "grad_norm": 2.973198413848877, | |
| "kl": 0.642578125, | |
| "learning_rate": 9.06272382371065e-11, | |
| "loss": 0.002287194598466158, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.4001469016075134, | |
| "reward_std": 0.0235411636531353, | |
| "rewards/MCQ_Reward/mean": 0.4001469016075134, | |
| "rewards/MCQ_Reward/std": 0.07189228385686874, | |
| "step": 497, | |
| "train_speed(iter/s)": 0.131698 | |
| }, | |
| { | |
| "clip_ratio": 0.0034996896283701062, | |
| "epoch": 9.96, | |
| "grad_norm": 3.0021812915802, | |
| "kl": 0.6484375, | |
| "learning_rate": 4.0279448570323946e-11, | |
| "loss": 0.002919801976531744, | |
| "memory(GiB)": 25.14, | |
| "step": 498, | |
| "train_speed(iter/s)": 0.131924 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 225.0, | |
| "completions/mean_length": 135.265625, | |
| "completions/min_length": 68.5, | |
| "epoch": 9.98, | |
| "grad_norm": 2.244234085083008, | |
| "kl": 0.55078125, | |
| "learning_rate": 1.0069963546743831e-11, | |
| "loss": -0.0014414777979254723, | |
| "memory(GiB)": 25.14, | |
| "reward": 0.46473294496536255, | |
| "reward_std": 0.02351410035043955, | |
| "rewards/MCQ_Reward/mean": 0.46473294496536255, | |
| "rewards/MCQ_Reward/std": 0.06907243467867374, | |
| "step": 499, | |
| "train_speed(iter/s)": 0.131777 | |
| }, | |
| { | |
| "clip_ratio": 0.0020644072210416198, | |
| "epoch": 10.0, | |
| "grad_norm": 2.3687548637390137, | |
| "kl": 0.55078125, | |
| "learning_rate": 0.0, | |
| "loss": -0.0014774189330637455, | |
| "memory(GiB)": 25.14, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.131993 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |