| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6890756302521008, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 523.3984375, | |
| "epoch": 0.01680672268907563, | |
| "grad_norm": 0.4084254801273346, | |
| "kl": 0.0, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.0346, | |
| "num_tokens": 149643.0, | |
| "reward": 0.05572916800156236, | |
| "reward_std": 0.1236814484000206, | |
| "rewards/curriculum_aware_reward_fn": 0.024479168001562357, | |
| "rewards/format_reward": 0.03125, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 446.5390625, | |
| "epoch": 0.03361344537815126, | |
| "grad_norm": 0.5236720442771912, | |
| "kl": 0.0, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.015, | |
| "num_tokens": 282184.0, | |
| "reward": 0.17552083544433117, | |
| "reward_std": 0.2788553349673748, | |
| "rewards/curriculum_aware_reward_fn": 0.12864583544433117, | |
| "rewards/format_reward": 0.046875, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 404.5625, | |
| "epoch": 0.05042016806722689, | |
| "grad_norm": 0.5860257744789124, | |
| "kl": 0.00027751922607421875, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 411536.0, | |
| "reward": 0.1171875, | |
| "reward_std": 0.2090039700269699, | |
| "rewards/curriculum_aware_reward_fn": 0.078125, | |
| "rewards/format_reward": 0.0390625, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 437.8359375, | |
| "epoch": 0.06722689075630252, | |
| "grad_norm": 0.5572423338890076, | |
| "kl": 0.00028705596923828125, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0288, | |
| "num_tokens": 543811.0, | |
| "reward": 0.1276041716337204, | |
| "reward_std": 0.17299909517169, | |
| "rewards/curriculum_aware_reward_fn": 0.1041666716337204, | |
| "rewards/format_reward": 0.0234375, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 484.453125, | |
| "epoch": 0.08403361344537816, | |
| "grad_norm": 0.5424997806549072, | |
| "kl": 0.0003066062927246094, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 674637.0, | |
| "reward": 0.15625, | |
| "reward_std": 0.20851250365376472, | |
| "rewards/curriculum_aware_reward_fn": 0.109375, | |
| "rewards/format_reward": 0.046875, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 468.078125, | |
| "epoch": 0.10084033613445378, | |
| "grad_norm": 0.47155389189720154, | |
| "kl": 0.0003123283386230469, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": -0.0177, | |
| "num_tokens": 810527.0, | |
| "reward": 0.1276041716337204, | |
| "reward_std": 0.18760672956705093, | |
| "rewards/curriculum_aware_reward_fn": 0.0651041716337204, | |
| "rewards/format_reward": 0.0625, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 500.3125, | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 0.5090876817703247, | |
| "kl": 0.0004954338073730469, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 0.0566, | |
| "num_tokens": 947063.0, | |
| "reward": 0.1354166679084301, | |
| "reward_std": 0.1753537617623806, | |
| "rewards/curriculum_aware_reward_fn": 0.07291666697710752, | |
| "rewards/format_reward": 0.0625, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 483.84375, | |
| "epoch": 0.13445378151260504, | |
| "grad_norm": 0.6272737979888916, | |
| "kl": 0.0008707046508789062, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.0704, | |
| "num_tokens": 1093987.0, | |
| "reward": 0.16250000149011612, | |
| "reward_std": 0.3029462620615959, | |
| "rewards/curriculum_aware_reward_fn": 0.037500000558793545, | |
| "rewards/format_reward": 0.125, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 423.1484375, | |
| "epoch": 0.15126050420168066, | |
| "grad_norm": 0.7402248978614807, | |
| "kl": 0.00279998779296875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0728, | |
| "num_tokens": 1218118.0, | |
| "reward": 0.3968750089406967, | |
| "reward_std": 0.45669952034950256, | |
| "rewards/curriculum_aware_reward_fn": 0.10781250474974513, | |
| "rewards/format_reward": 0.2890625, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 496.734375, | |
| "epoch": 0.16806722689075632, | |
| "grad_norm": 0.7178624272346497, | |
| "kl": 0.00432586669921875, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": -0.0207, | |
| "num_tokens": 1362188.0, | |
| "reward": 0.5677083432674408, | |
| "reward_std": 0.5023705065250397, | |
| "rewards/curriculum_aware_reward_fn": 0.013020833721384406, | |
| "rewards/format_reward": 0.5546875, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 500.3984375, | |
| "epoch": 0.18487394957983194, | |
| "grad_norm": 0.6241538524627686, | |
| "kl": 0.0084075927734375, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.0165, | |
| "num_tokens": 1496743.0, | |
| "reward": 0.714062511920929, | |
| "reward_std": 0.344537615776062, | |
| "rewards/curriculum_aware_reward_fn": 0.010937500279396772, | |
| "rewards/format_reward": 0.703125, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 517.5625, | |
| "epoch": 0.20168067226890757, | |
| "grad_norm": 0.6269044280052185, | |
| "kl": 0.00717926025390625, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0195, | |
| "num_tokens": 1650679.0, | |
| "reward": 0.7604166716337204, | |
| "reward_std": 0.2794100269675255, | |
| "rewards/curriculum_aware_reward_fn": 0.010416666977107525, | |
| "rewards/format_reward": 0.75, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 489.9921875, | |
| "epoch": 0.2184873949579832, | |
| "grad_norm": 0.6628085374832153, | |
| "kl": 0.011993408203125, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 0.0178, | |
| "num_tokens": 1791190.0, | |
| "reward": 0.9375000298023224, | |
| "reward_std": 0.23163868859410286, | |
| "rewards/curriculum_aware_reward_fn": 0.031250000931322575, | |
| "rewards/format_reward": 0.90625, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 418.90625, | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 0.7439326643943787, | |
| "kl": 0.0212249755859375, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": -0.0283, | |
| "num_tokens": 1912322.0, | |
| "reward": 0.8645833432674408, | |
| "reward_std": 0.2743529714643955, | |
| "rewards/curriculum_aware_reward_fn": 0.02864583395421505, | |
| "rewards/format_reward": 0.8359375, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 440.5546875, | |
| "epoch": 0.25210084033613445, | |
| "grad_norm": 0.477730393409729, | |
| "kl": 0.0169830322265625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0314, | |
| "num_tokens": 2041473.0, | |
| "reward": 1.0234375298023224, | |
| "reward_std": 0.159461235627532, | |
| "rewards/curriculum_aware_reward_fn": 0.05468750325962901, | |
| "rewards/format_reward": 0.96875, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 497.265625, | |
| "epoch": 0.2689075630252101, | |
| "grad_norm": 0.3044699728488922, | |
| "kl": 0.021484375, | |
| "learning_rate": 4.999952797253148e-06, | |
| "loss": 0.0386, | |
| "num_tokens": 2178899.0, | |
| "reward": 0.9427083432674408, | |
| "reward_std": 0.10222155228257179, | |
| "rewards/curriculum_aware_reward_fn": 0.06770833395421505, | |
| "rewards/format_reward": 0.875, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 503.7421875, | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.4504559338092804, | |
| "kl": 0.033233642578125, | |
| "learning_rate": 4.9998111909931225e-06, | |
| "loss": 0.0346, | |
| "num_tokens": 2311042.0, | |
| "reward": 1.0927083492279053, | |
| "reward_std": 0.13363875821232796, | |
| "rewards/curriculum_aware_reward_fn": 0.09270833618938923, | |
| "rewards/format_reward": 1.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 582.390625, | |
| "epoch": 0.3025210084033613, | |
| "grad_norm": 0.3522663712501526, | |
| "kl": 0.033416748046875, | |
| "learning_rate": 4.999575187161439e-06, | |
| "loss": 0.0049, | |
| "num_tokens": 2477404.0, | |
| "reward": 0.997395858168602, | |
| "reward_std": 0.0840194127522409, | |
| "rewards/curriculum_aware_reward_fn": 0.05989583651535213, | |
| "rewards/format_reward": 0.9375, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 603.4140625, | |
| "epoch": 0.31932773109243695, | |
| "grad_norm": 0.2832561731338501, | |
| "kl": 0.0328369140625, | |
| "learning_rate": 4.9992447956603455e-06, | |
| "loss": 0.0152, | |
| "num_tokens": 2642801.0, | |
| "reward": 0.895833358168602, | |
| "reward_std": 0.05173455877229571, | |
| "rewards/curriculum_aware_reward_fn": 0.020833333721384406, | |
| "rewards/format_reward": 0.875, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 501.46875, | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 0.4254682660102844, | |
| "kl": 0.0416259765625, | |
| "learning_rate": 4.998820030352409e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 2784445.0, | |
| "reward": 1.0010417103767395, | |
| "reward_std": 0.13327472284436226, | |
| "rewards/curriculum_aware_reward_fn": 0.07135416753590107, | |
| "rewards/format_reward": 0.9296875, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 598.765625, | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 0.3762480914592743, | |
| "kl": 0.033721923828125, | |
| "learning_rate": 4.998300909059929e-06, | |
| "loss": -0.0163, | |
| "num_tokens": 2951415.0, | |
| "reward": 0.9947916865348816, | |
| "reward_std": 0.11004853993654251, | |
| "rewards/curriculum_aware_reward_fn": 0.07291666604578495, | |
| "rewards/format_reward": 0.921875, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 489.6484375, | |
| "epoch": 0.3697478991596639, | |
| "grad_norm": 0.6521299481391907, | |
| "kl": 0.04229736328125, | |
| "learning_rate": 4.997687453564198e-06, | |
| "loss": 0.0083, | |
| "num_tokens": 3090354.0, | |
| "reward": 0.9609375298023224, | |
| "reward_std": 0.25490327179431915, | |
| "rewards/curriculum_aware_reward_fn": 0.1562500037252903, | |
| "rewards/format_reward": 0.8046875, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 446.625, | |
| "epoch": 0.3865546218487395, | |
| "grad_norm": 0.4581259489059448, | |
| "kl": 0.02838134765625, | |
| "learning_rate": 4.9969796896045775e-06, | |
| "loss": 0.0239, | |
| "num_tokens": 3234002.0, | |
| "reward": 1.1093750298023224, | |
| "reward_std": 0.15402578841894865, | |
| "rewards/curriculum_aware_reward_fn": 0.17187500139698386, | |
| "rewards/format_reward": 0.9375, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 438.296875, | |
| "epoch": 0.40336134453781514, | |
| "grad_norm": 0.469014436006546, | |
| "kl": 0.02874755859375, | |
| "learning_rate": 4.996177646877426e-06, | |
| "loss": 0.0065, | |
| "num_tokens": 3368280.0, | |
| "reward": 1.0302083790302277, | |
| "reward_std": 0.12476669438183308, | |
| "rewards/curriculum_aware_reward_fn": 0.045833335258066654, | |
| "rewards/format_reward": 0.984375, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 433.9921875, | |
| "epoch": 0.42016806722689076, | |
| "grad_norm": 0.5325790643692017, | |
| "kl": 0.028350830078125, | |
| "learning_rate": 4.995281359034851e-06, | |
| "loss": -0.0046, | |
| "num_tokens": 3495607.0, | |
| "reward": 1.0026041865348816, | |
| "reward_std": 0.2044544592499733, | |
| "rewards/curriculum_aware_reward_fn": 0.08072916977107525, | |
| "rewards/format_reward": 0.921875, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 380.015625, | |
| "epoch": 0.4369747899159664, | |
| "grad_norm": 0.5200158357620239, | |
| "kl": 0.03680419921875, | |
| "learning_rate": 4.994290863683296e-06, | |
| "loss": 0.0419, | |
| "num_tokens": 3609809.0, | |
| "reward": 1.1458334028720856, | |
| "reward_std": 0.20207119127735496, | |
| "rewards/curriculum_aware_reward_fn": 0.15364583837799728, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 413.7265625, | |
| "epoch": 0.453781512605042, | |
| "grad_norm": 0.41265320777893066, | |
| "kl": 0.033966064453125, | |
| "learning_rate": 4.99320620238196e-06, | |
| "loss": -0.0204, | |
| "num_tokens": 3744550.0, | |
| "reward": 1.0963541865348816, | |
| "reward_std": 0.11664257757365704, | |
| "rewards/curriculum_aware_reward_fn": 0.09635416604578495, | |
| "rewards/format_reward": 1.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 461.4375, | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.3836808502674103, | |
| "kl": 0.032623291015625, | |
| "learning_rate": 4.99202742064106e-06, | |
| "loss": 0.035, | |
| "num_tokens": 3875238.0, | |
| "reward": 1.0182291865348816, | |
| "reward_std": 0.10335793904960155, | |
| "rewards/curriculum_aware_reward_fn": 0.0807291679084301, | |
| "rewards/format_reward": 0.9375, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 506.953125, | |
| "epoch": 0.48739495798319327, | |
| "grad_norm": 0.3787353038787842, | |
| "kl": 0.029510498046875, | |
| "learning_rate": 4.990754567919917e-06, | |
| "loss": 0.0725, | |
| "num_tokens": 4024312.0, | |
| "reward": 0.9531250298023224, | |
| "reward_std": 0.08838835544884205, | |
| "rewards/curriculum_aware_reward_fn": 0.08593750279396772, | |
| "rewards/format_reward": 0.8671875, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 474.1875, | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 0.37932828068733215, | |
| "kl": 0.03497314453125, | |
| "learning_rate": 4.989387697624881e-06, | |
| "loss": 0.0078, | |
| "num_tokens": 4161712.0, | |
| "reward": 1.1328125298023224, | |
| "reward_std": 0.14552949741482735, | |
| "rewards/curriculum_aware_reward_fn": 0.1328125037252903, | |
| "rewards/format_reward": 1.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 447.6796875, | |
| "epoch": 0.5210084033613446, | |
| "grad_norm": 0.396627813577652, | |
| "kl": 0.0396728515625, | |
| "learning_rate": 4.987926867107095e-06, | |
| "loss": 0.004, | |
| "num_tokens": 4304935.0, | |
| "reward": 0.9671875089406967, | |
| "reward_std": 0.12499183788895607, | |
| "rewards/curriculum_aware_reward_fn": 0.09218750381842256, | |
| "rewards/format_reward": 0.875, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 574.703125, | |
| "epoch": 0.5378151260504201, | |
| "grad_norm": 0.39759182929992676, | |
| "kl": 0.030426025390625, | |
| "learning_rate": 4.986372137660078e-06, | |
| "loss": 0.0566, | |
| "num_tokens": 4464105.0, | |
| "reward": 0.8953125327825546, | |
| "reward_std": 0.147516256198287, | |
| "rewards/curriculum_aware_reward_fn": 0.0906250006519258, | |
| "rewards/format_reward": 0.8046875, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 434.640625, | |
| "epoch": 0.5546218487394958, | |
| "grad_norm": 0.3933623433113098, | |
| "kl": 0.034210205078125, | |
| "learning_rate": 4.984723574517165e-06, | |
| "loss": 0.0163, | |
| "num_tokens": 4602139.0, | |
| "reward": 1.1234374940395355, | |
| "reward_std": 0.15428178012371063, | |
| "rewards/curriculum_aware_reward_fn": 0.18593750335276127, | |
| "rewards/format_reward": 0.9375, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 445.1640625, | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.3263511061668396, | |
| "kl": 0.04083251953125, | |
| "learning_rate": 4.9829812468487655e-06, | |
| "loss": 0.0145, | |
| "num_tokens": 4736544.0, | |
| "reward": 0.9843750298023224, | |
| "reward_std": 0.09849035926163197, | |
| "rewards/curriculum_aware_reward_fn": 0.05468750139698386, | |
| "rewards/format_reward": 0.9296875, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 477.53125, | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.336434543132782, | |
| "kl": 0.0318603515625, | |
| "learning_rate": 4.981145227759457e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 4881308.0, | |
| "reward": 0.966145858168602, | |
| "reward_std": 0.07084779627621174, | |
| "rewards/curriculum_aware_reward_fn": 0.09114583441987634, | |
| "rewards/format_reward": 0.875, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 469.4609375, | |
| "epoch": 0.6050420168067226, | |
| "grad_norm": 0.3600573241710663, | |
| "kl": 0.0386962890625, | |
| "learning_rate": 4.979215594284924e-06, | |
| "loss": 0.008, | |
| "num_tokens": 5017415.0, | |
| "reward": 1.0843750536441803, | |
| "reward_std": 0.11371596809476614, | |
| "rewards/curriculum_aware_reward_fn": 0.08437500288709998, | |
| "rewards/format_reward": 1.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 422.7890625, | |
| "epoch": 0.6218487394957983, | |
| "grad_norm": 0.38463500142097473, | |
| "kl": 0.03997802734375, | |
| "learning_rate": 4.977192427388722e-06, | |
| "loss": 0.0195, | |
| "num_tokens": 5141804.0, | |
| "reward": 1.1130208671092987, | |
| "reward_std": 0.1456994889304042, | |
| "rewards/curriculum_aware_reward_fn": 0.12083333730697632, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 415.4765625, | |
| "epoch": 0.6386554621848739, | |
| "grad_norm": 0.5343978404998779, | |
| "kl": 0.04315185546875, | |
| "learning_rate": 4.9750758119588824e-06, | |
| "loss": -0.0, | |
| "num_tokens": 5262529.0, | |
| "reward": 1.136979192495346, | |
| "reward_std": 0.29098181426525116, | |
| "rewards/curriculum_aware_reward_fn": 0.16822916036471725, | |
| "rewards/format_reward": 0.96875, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 442.2421875, | |
| "epoch": 0.6554621848739496, | |
| "grad_norm": 0.33322009444236755, | |
| "kl": 0.047119140625, | |
| "learning_rate": 4.972865836804349e-06, | |
| "loss": 0.0088, | |
| "num_tokens": 5399424.0, | |
| "reward": 1.03125, | |
| "reward_std": 0.13204573467373848, | |
| "rewards/curriculum_aware_reward_fn": 0.109375, | |
| "rewards/format_reward": 0.921875, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 482.59375, | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 0.35028988122940063, | |
| "kl": 0.0504150390625, | |
| "learning_rate": 4.970562594651254e-06, | |
| "loss": 0.0066, | |
| "num_tokens": 5539492.0, | |
| "reward": 1.09375, | |
| "reward_std": 0.1325825173407793, | |
| "rewards/curriculum_aware_reward_fn": 0.125, | |
| "rewards/format_reward": 0.96875, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 423.125, | |
| "epoch": 0.6890756302521008, | |
| "grad_norm": 0.3613918125629425, | |
| "kl": 0.06549072265625, | |
| "learning_rate": 4.968166182139026e-06, | |
| "loss": 0.0516, | |
| "num_tokens": 5667012.0, | |
| "reward": 1.122395858168602, | |
| "reward_std": 0.09522313997149467, | |
| "rewards/curriculum_aware_reward_fn": 0.13020833837799728, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 391.953125, | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 0.6798313856124878, | |
| "kl": 0.080322265625, | |
| "learning_rate": 4.9656766998163306e-06, | |
| "loss": 0.0429, | |
| "num_tokens": 5786006.0, | |
| "reward": 1.1645833551883698, | |
| "reward_std": 0.23287939466536045, | |
| "rewards/curriculum_aware_reward_fn": 0.19583334028720856, | |
| "rewards/format_reward": 0.96875, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 406.59375, | |
| "epoch": 0.7226890756302521, | |
| "grad_norm": 0.4006165862083435, | |
| "kl": 0.06298828125, | |
| "learning_rate": 4.963094252136865e-06, | |
| "loss": 0.0119, | |
| "num_tokens": 5910762.0, | |
| "reward": 1.0989583432674408, | |
| "reward_std": 0.1059559416025877, | |
| "rewards/curriculum_aware_reward_fn": 0.16927083767950535, | |
| "rewards/format_reward": 0.9296875, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 411.28125, | |
| "epoch": 0.7394957983193278, | |
| "grad_norm": 0.3308926522731781, | |
| "kl": 0.068359375, | |
| "learning_rate": 4.960418947454958e-06, | |
| "loss": 0.0204, | |
| "num_tokens": 6042942.0, | |
| "reward": 0.9947916865348816, | |
| "reward_std": 0.10640286095440388, | |
| "rewards/curriculum_aware_reward_fn": 0.0572916679084301, | |
| "rewards/format_reward": 0.9375, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 380.9140625, | |
| "epoch": 0.7563025210084033, | |
| "grad_norm": 0.478098064661026, | |
| "kl": 0.0755615234375, | |
| "learning_rate": 4.957650898021038e-06, | |
| "loss": 0.0624, | |
| "num_tokens": 6162299.0, | |
| "reward": 1.158854216337204, | |
| "reward_std": 0.20395735278725624, | |
| "rewards/curriculum_aware_reward_fn": 0.15885416977107525, | |
| "rewards/format_reward": 1.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 333.5390625, | |
| "epoch": 0.773109243697479, | |
| "grad_norm": 0.47225892543792725, | |
| "kl": 0.0811767578125, | |
| "learning_rate": 4.954790219976915e-06, | |
| "loss": 0.0167, | |
| "num_tokens": 6272592.0, | |
| "reward": 1.170312523841858, | |
| "reward_std": 0.1300742938183248, | |
| "rewards/curriculum_aware_reward_fn": 0.17031250218860805, | |
| "rewards/format_reward": 1.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 366.34375, | |
| "epoch": 0.7899159663865546, | |
| "grad_norm": 0.4816167652606964, | |
| "kl": 0.0848388671875, | |
| "learning_rate": 4.95183703335091e-06, | |
| "loss": 0.0103, | |
| "num_tokens": 6397436.0, | |
| "reward": 1.1953125298023224, | |
| "reward_std": 0.12861409783363342, | |
| "rewards/curriculum_aware_reward_fn": 0.1953125074505806, | |
| "rewards/format_reward": 1.0, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 338.65625, | |
| "epoch": 0.8067226890756303, | |
| "grad_norm": 0.47690296173095703, | |
| "kl": 0.07958984375, | |
| "learning_rate": 4.948791462052819e-06, | |
| "loss": -0.0039, | |
| "num_tokens": 6519896.0, | |
| "reward": 1.0442708730697632, | |
| "reward_std": 0.11976211331784725, | |
| "rewards/curriculum_aware_reward_fn": 0.05208333441987634, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 325.78125, | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 0.46987131237983704, | |
| "kl": 0.08935546875, | |
| "learning_rate": 4.945653633868716e-06, | |
| "loss": 0.023, | |
| "num_tokens": 6637036.0, | |
| "reward": 1.1223958730697632, | |
| "reward_std": 0.18378917127847672, | |
| "rewards/curriculum_aware_reward_fn": 0.14583333488553762, | |
| "rewards/format_reward": 0.9765625, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 302.6171875, | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 0.5635585784912109, | |
| "kl": 0.088623046875, | |
| "learning_rate": 4.942423680455584e-06, | |
| "loss": 0.0157, | |
| "num_tokens": 6749411.0, | |
| "reward": 1.1494792103767395, | |
| "reward_std": 0.19005249440670013, | |
| "rewards/curriculum_aware_reward_fn": 0.1494791703298688, | |
| "rewards/format_reward": 1.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 366.4140625, | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.5150569677352905, | |
| "kl": 0.0848388671875, | |
| "learning_rate": 4.939101737335802e-06, | |
| "loss": -0.0145, | |
| "num_tokens": 6879104.0, | |
| "reward": 0.9713541865348816, | |
| "reward_std": 0.12468592822551727, | |
| "rewards/curriculum_aware_reward_fn": 0.041666666977107525, | |
| "rewards/format_reward": 0.9296875, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 288.9609375, | |
| "epoch": 0.8739495798319328, | |
| "grad_norm": 0.6744732856750488, | |
| "kl": 0.088134765625, | |
| "learning_rate": 4.935687943891447e-06, | |
| "loss": 0.0413, | |
| "num_tokens": 6981627.0, | |
| "reward": 1.171354204416275, | |
| "reward_std": 0.26688926108181477, | |
| "rewards/curriculum_aware_reward_fn": 0.1791666648350656, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 286.609375, | |
| "epoch": 0.8907563025210085, | |
| "grad_norm": 0.6298589706420898, | |
| "kl": 0.0899658203125, | |
| "learning_rate": 4.932182443358458e-06, | |
| "loss": 0.002, | |
| "num_tokens": 7088449.0, | |
| "reward": 1.1250000298023224, | |
| "reward_std": 0.14211241621524096, | |
| "rewards/curriculum_aware_reward_fn": 0.125000003259629, | |
| "rewards/format_reward": 1.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 302.4921875, | |
| "epoch": 0.907563025210084, | |
| "grad_norm": 0.6446663737297058, | |
| "kl": 0.0955810546875, | |
| "learning_rate": 4.928585382820616e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 7197360.0, | |
| "reward": 1.1005208790302277, | |
| "reward_std": 0.08887648163363338, | |
| "rewards/curriculum_aware_reward_fn": 0.10052083828486502, | |
| "rewards/format_reward": 1.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 399.0625, | |
| "epoch": 0.9243697478991597, | |
| "grad_norm": 0.729965090751648, | |
| "kl": 0.1077880859375, | |
| "learning_rate": 4.924896913203376e-06, | |
| "loss": 0.0496, | |
| "num_tokens": 7331528.0, | |
| "reward": 0.9932291805744171, | |
| "reward_std": 0.15547171607613564, | |
| "rewards/curriculum_aware_reward_fn": 0.1807291698642075, | |
| "rewards/format_reward": 0.8125, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 494.09375, | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.41514354944229126, | |
| "kl": 0.0888671875, | |
| "learning_rate": 4.921117189267535e-06, | |
| "loss": -0.0255, | |
| "num_tokens": 7482548.0, | |
| "reward": 0.8671875149011612, | |
| "reward_std": 0.09729943191632628, | |
| "rewards/curriculum_aware_reward_fn": 0.05468750069849193, | |
| "rewards/format_reward": 0.8125, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 511.90625, | |
| "epoch": 0.957983193277311, | |
| "grad_norm": 0.6172528266906738, | |
| "kl": 0.1680908203125, | |
| "learning_rate": 4.917246369602742e-06, | |
| "loss": 0.0368, | |
| "num_tokens": 7621256.0, | |
| "reward": 1.050520896911621, | |
| "reward_std": 0.1373576819896698, | |
| "rewards/curriculum_aware_reward_fn": 0.17552083916962147, | |
| "rewards/format_reward": 0.875, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 415.8046875, | |
| "epoch": 0.9747899159663865, | |
| "grad_norm": 0.5895751118659973, | |
| "kl": 0.2454833984375, | |
| "learning_rate": 4.9132846166208355e-06, | |
| "loss": 0.0654, | |
| "num_tokens": 7740207.0, | |
| "reward": 1.1197916865348816, | |
| "reward_std": 0.09127403190359473, | |
| "rewards/curriculum_aware_reward_fn": 0.12760416674427688, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 578.2857208251953, | |
| "epoch": 0.9915966386554622, | |
| "grad_norm": 0.4072950482368469, | |
| "kl": 0.12353515625, | |
| "learning_rate": 4.9092320965490365e-06, | |
| "loss": 0.0363, | |
| "num_tokens": 7885325.0, | |
| "reward": 1.0598958432674408, | |
| "reward_std": 0.11012758687138557, | |
| "rewards/curriculum_aware_reward_fn": 0.0598958358168602, | |
| "rewards/format_reward": 1.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 564.78125, | |
| "epoch": 1.0168067226890756, | |
| "grad_norm": 0.4054146707057953, | |
| "kl": 0.1219482421875, | |
| "learning_rate": 4.905088979422971e-06, | |
| "loss": 0.0515, | |
| "num_tokens": 8038297.0, | |
| "reward": 1.0937500596046448, | |
| "reward_std": 0.1636663186363876, | |
| "rewards/curriculum_aware_reward_fn": 0.1015625053551048, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 578.2890625, | |
| "epoch": 1.0336134453781514, | |
| "grad_norm": 0.3728667199611664, | |
| "kl": 0.126708984375, | |
| "learning_rate": 4.900855439079536e-06, | |
| "loss": 0.0134, | |
| "num_tokens": 8185046.0, | |
| "reward": 1.1265625357627869, | |
| "reward_std": 0.14053087309002876, | |
| "rewards/curriculum_aware_reward_fn": 0.12656250409781933, | |
| "rewards/format_reward": 1.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 722.3125, | |
| "epoch": 1.050420168067227, | |
| "grad_norm": 0.32597795128822327, | |
| "kl": 0.1097412109375, | |
| "learning_rate": 4.8965316531496055e-06, | |
| "loss": 0.0184, | |
| "num_tokens": 8366894.0, | |
| "reward": 0.9677083492279053, | |
| "reward_std": 0.10382660711184144, | |
| "rewards/curriculum_aware_reward_fn": 0.04583333386108279, | |
| "rewards/format_reward": 0.921875, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 663.3671875, | |
| "epoch": 1.0672268907563025, | |
| "grad_norm": 0.4411354064941406, | |
| "kl": 0.1256103515625, | |
| "learning_rate": 4.892117803050578e-06, | |
| "loss": 0.0637, | |
| "num_tokens": 8514637.0, | |
| "reward": 1.0671875327825546, | |
| "reward_std": 0.21371026523411274, | |
| "rewards/curriculum_aware_reward_fn": 0.09843750204890966, | |
| "rewards/format_reward": 0.96875, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 644.1328125, | |
| "epoch": 1.084033613445378, | |
| "grad_norm": 0.36323124170303345, | |
| "kl": 0.1290283203125, | |
| "learning_rate": 4.887614073978761e-06, | |
| "loss": -0.0258, | |
| "num_tokens": 8683182.0, | |
| "reward": 0.9921875, | |
| "reward_std": 0.12415501847863197, | |
| "rewards/curriculum_aware_reward_fn": 0.0859375, | |
| "rewards/format_reward": 0.90625, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 659.5703125, | |
| "epoch": 1.1008403361344539, | |
| "grad_norm": 0.5263646245002747, | |
| "kl": 0.1494140625, | |
| "learning_rate": 4.883020654901609e-06, | |
| "loss": -0.03, | |
| "num_tokens": 8846031.0, | |
| "reward": 0.9375000149011612, | |
| "reward_std": 0.29325952008366585, | |
| "rewards/curriculum_aware_reward_fn": 0.08593750093132257, | |
| "rewards/format_reward": 0.8515625, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 542.421875, | |
| "epoch": 1.1176470588235294, | |
| "grad_norm": 0.5841398239135742, | |
| "kl": 0.1328125, | |
| "learning_rate": 4.878337738549785e-06, | |
| "loss": 0.0185, | |
| "num_tokens": 8994685.0, | |
| "reward": 0.9244791865348816, | |
| "reward_std": 0.350746251642704, | |
| "rewards/curriculum_aware_reward_fn": 0.0651041679084301, | |
| "rewards/format_reward": 0.859375, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 582.9453125, | |
| "epoch": 1.134453781512605, | |
| "grad_norm": 0.5570164322853088, | |
| "kl": 0.122314453125, | |
| "learning_rate": 4.873565521409082e-06, | |
| "loss": 0.0521, | |
| "num_tokens": 9155622.0, | |
| "reward": 0.9843750298023224, | |
| "reward_std": 0.3105625621974468, | |
| "rewards/curriculum_aware_reward_fn": 0.140625, | |
| "rewards/format_reward": 0.84375, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 485.3359375, | |
| "epoch": 1.1512605042016806, | |
| "grad_norm": 0.5985521078109741, | |
| "kl": 0.1263427734375, | |
| "learning_rate": 4.868704203712173e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 9288609.0, | |
| "reward": 1.052083358168602, | |
| "reward_std": 0.3865399658679962, | |
| "rewards/curriculum_aware_reward_fn": 0.18489583488553762, | |
| "rewards/format_reward": 0.8671875, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 535.578125, | |
| "epoch": 1.1680672268907564, | |
| "grad_norm": 0.564681887626648, | |
| "kl": 0.1126708984375, | |
| "learning_rate": 4.86375398943021e-06, | |
| "loss": -0.0485, | |
| "num_tokens": 9437915.0, | |
| "reward": 0.9140625298023224, | |
| "reward_std": 0.28478266298770905, | |
| "rewards/curriculum_aware_reward_fn": 0.07031250256113708, | |
| "rewards/format_reward": 0.84375, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 496.0703125, | |
| "epoch": 1.184873949579832, | |
| "grad_norm": 0.5399004220962524, | |
| "kl": 0.099853515625, | |
| "learning_rate": 4.858715086264274e-06, | |
| "loss": 0.0, | |
| "num_tokens": 9580828.0, | |
| "reward": 0.934895858168602, | |
| "reward_std": 0.19700524397194386, | |
| "rewards/curriculum_aware_reward_fn": 0.04427083441987634, | |
| "rewards/format_reward": 0.890625, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 469.078125, | |
| "epoch": 1.2016806722689075, | |
| "grad_norm": 0.552808940410614, | |
| "kl": 0.104736328125, | |
| "learning_rate": 4.853587705636646e-06, | |
| "loss": -0.0129, | |
| "num_tokens": 9710926.0, | |
| "reward": 1.0234375447034836, | |
| "reward_std": 0.2662259414792061, | |
| "rewards/curriculum_aware_reward_fn": 0.07031250186264515, | |
| "rewards/format_reward": 0.953125, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 539.765625, | |
| "epoch": 1.2184873949579833, | |
| "grad_norm": 0.4241795837879181, | |
| "kl": 0.093505859375, | |
| "learning_rate": 4.84837206268195e-06, | |
| "loss": -0.0161, | |
| "num_tokens": 9867328.0, | |
| "reward": 0.796875, | |
| "reward_std": 0.12126770988106728, | |
| "rewards/curriculum_aware_reward_fn": 0.015625, | |
| "rewards/format_reward": 0.78125, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 502.6328125, | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": 0.4336708188056946, | |
| "kl": 0.0975341796875, | |
| "learning_rate": 4.8430683762381195e-06, | |
| "loss": 0.0086, | |
| "num_tokens": 10007809.0, | |
| "reward": 1.0416666865348816, | |
| "reward_std": 0.1647402998059988, | |
| "rewards/curriculum_aware_reward_fn": 0.07291666697710752, | |
| "rewards/format_reward": 0.96875, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 523.640625, | |
| "epoch": 1.2521008403361344, | |
| "grad_norm": 0.46911758184432983, | |
| "kl": 0.1041259765625, | |
| "learning_rate": 4.837676868837213e-06, | |
| "loss": 0.0313, | |
| "num_tokens": 10147291.0, | |
| "reward": 1.0302083790302277, | |
| "reward_std": 0.18167300708591938, | |
| "rewards/curriculum_aware_reward_fn": 0.06145833572372794, | |
| "rewards/format_reward": 0.96875, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 628.6171875, | |
| "epoch": 1.26890756302521, | |
| "grad_norm": 0.5553966760635376, | |
| "kl": 0.091552734375, | |
| "learning_rate": 4.832197766696085e-06, | |
| "loss": 0.0771, | |
| "num_tokens": 10304234.0, | |
| "reward": 1.0401041805744171, | |
| "reward_std": 0.2994466572999954, | |
| "rewards/curriculum_aware_reward_fn": 0.14166666939854622, | |
| "rewards/format_reward": 0.8984375, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 541.03125, | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 0.4850277602672577, | |
| "kl": 0.092529296875, | |
| "learning_rate": 4.826631299706887e-06, | |
| "loss": -0.0076, | |
| "num_tokens": 10455718.0, | |
| "reward": 1.043229192495346, | |
| "reward_std": 0.18734892271459103, | |
| "rewards/curriculum_aware_reward_fn": 0.12135417200624943, | |
| "rewards/format_reward": 0.921875, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 513.34375, | |
| "epoch": 1.3025210084033614, | |
| "grad_norm": 0.5054645538330078, | |
| "kl": 0.098876953125, | |
| "learning_rate": 4.820977701427424e-06, | |
| "loss": 0.0342, | |
| "num_tokens": 10586514.0, | |
| "reward": 1.0703125149011612, | |
| "reward_std": 0.18218252062797546, | |
| "rewards/curriculum_aware_reward_fn": 0.10156250256113708, | |
| "rewards/format_reward": 0.96875, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 554.0390625, | |
| "epoch": 1.319327731092437, | |
| "grad_norm": 0.5157454013824463, | |
| "kl": 0.0882568359375, | |
| "learning_rate": 4.81523720907136e-06, | |
| "loss": 0.05, | |
| "num_tokens": 10726735.0, | |
| "reward": 1.1385416984558105, | |
| "reward_std": 0.23471001349389553, | |
| "rewards/curriculum_aware_reward_fn": 0.17760416865348816, | |
| "rewards/format_reward": 0.9609375, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 580.015625, | |
| "epoch": 1.3361344537815127, | |
| "grad_norm": 0.41959676146507263, | |
| "kl": 0.0958251953125, | |
| "learning_rate": 4.809410063498254e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 10884185.0, | |
| "reward": 0.8411458283662796, | |
| "reward_std": 0.22234245762228966, | |
| "rewards/curriculum_aware_reward_fn": 0.06770833395421505, | |
| "rewards/format_reward": 0.7734375, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 534.84375, | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": 0.6390196681022644, | |
| "kl": 0.093017578125, | |
| "learning_rate": 4.8034965092034656e-06, | |
| "loss": 0.0963, | |
| "num_tokens": 11024149.0, | |
| "reward": 1.0494791865348816, | |
| "reward_std": 0.2942212373018265, | |
| "rewards/curriculum_aware_reward_fn": 0.1432291679084301, | |
| "rewards/format_reward": 0.90625, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 525.671875, | |
| "epoch": 1.3697478991596639, | |
| "grad_norm": 0.4957650303840637, | |
| "kl": 0.105224609375, | |
| "learning_rate": 4.797496794307889e-06, | |
| "loss": -0.0236, | |
| "num_tokens": 11167547.0, | |
| "reward": 0.9828125238418579, | |
| "reward_std": 0.14313106751069427, | |
| "rewards/curriculum_aware_reward_fn": 0.06093750288709998, | |
| "rewards/format_reward": 0.921875, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 488.7890625, | |
| "epoch": 1.3865546218487395, | |
| "grad_norm": 0.5502341389656067, | |
| "kl": 0.104248046875, | |
| "learning_rate": 4.791411170547545e-06, | |
| "loss": 0.072, | |
| "num_tokens": 11300104.0, | |
| "reward": 1.143229216337204, | |
| "reward_std": 0.2514217048883438, | |
| "rewards/curriculum_aware_reward_fn": 0.1822916679084301, | |
| "rewards/format_reward": 0.9609375, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 508.1015625, | |
| "epoch": 1.403361344537815, | |
| "grad_norm": 0.489242285490036, | |
| "kl": 0.0986328125, | |
| "learning_rate": 4.785239893263017e-06, | |
| "loss": 0.1064, | |
| "num_tokens": 11446565.0, | |
| "reward": 0.9505208730697632, | |
| "reward_std": 0.18909456953406334, | |
| "rewards/curriculum_aware_reward_fn": 0.1145833358168602, | |
| "rewards/format_reward": 0.8359375, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 448.359375, | |
| "epoch": 1.4201680672268908, | |
| "grad_norm": 0.46208083629608154, | |
| "kl": 0.11083984375, | |
| "learning_rate": 4.778983221388742e-06, | |
| "loss": 0.0139, | |
| "num_tokens": 11580547.0, | |
| "reward": 1.0276041626930237, | |
| "reward_std": 0.12672015465795994, | |
| "rewards/curriculum_aware_reward_fn": 0.09791666641831398, | |
| "rewards/format_reward": 0.9296875, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 457.3515625, | |
| "epoch": 1.4369747899159664, | |
| "grad_norm": 0.44321703910827637, | |
| "kl": 0.1085205078125, | |
| "learning_rate": 4.77264141744214e-06, | |
| "loss": 0.0184, | |
| "num_tokens": 11712984.0, | |
| "reward": 1.160416692495346, | |
| "reward_std": 0.15310384705662727, | |
| "rewards/curriculum_aware_reward_fn": 0.16041666828095913, | |
| "rewards/format_reward": 1.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 384.3984375, | |
| "epoch": 1.453781512605042, | |
| "grad_norm": 0.4427275061607361, | |
| "kl": 0.123779296875, | |
| "learning_rate": 4.766214747512603e-06, | |
| "loss": 0.0233, | |
| "num_tokens": 11829315.0, | |
| "reward": 1.1666666865348816, | |
| "reward_std": 0.14362479094415903, | |
| "rewards/curriculum_aware_reward_fn": 0.1744791674427688, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 389.265625, | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 0.5121983885765076, | |
| "kl": 0.12841796875, | |
| "learning_rate": 4.759703481250331e-06, | |
| "loss": 0.0229, | |
| "num_tokens": 11949533.0, | |
| "reward": 1.1343750357627869, | |
| "reward_std": 0.16322745941579342, | |
| "rewards/curriculum_aware_reward_fn": 0.1500000013038516, | |
| "rewards/format_reward": 0.984375, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 459.921875, | |
| "epoch": 1.4873949579831933, | |
| "grad_norm": 0.6308655738830566, | |
| "kl": 0.11328125, | |
| "learning_rate": 4.753107891855015e-06, | |
| "loss": 0.02, | |
| "num_tokens": 12092563.0, | |
| "reward": 0.8854166716337204, | |
| "reward_std": 0.22940894588828087, | |
| "rewards/curriculum_aware_reward_fn": 0.11197916697710752, | |
| "rewards/format_reward": 0.7734375, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 325.625, | |
| "epoch": 1.504201680672269, | |
| "grad_norm": 0.6008861660957336, | |
| "kl": 0.14013671875, | |
| "learning_rate": 4.746428256064375e-06, | |
| "loss": 0.0021, | |
| "num_tokens": 12195803.0, | |
| "reward": 1.261979192495346, | |
| "reward_std": 0.17762075550854206, | |
| "rewards/curriculum_aware_reward_fn": 0.2697916701436043, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 348.2421875, | |
| "epoch": 1.5210084033613445, | |
| "grad_norm": 0.5485793948173523, | |
| "kl": 0.13720703125, | |
| "learning_rate": 4.7396648541425534e-06, | |
| "loss": 0.0053, | |
| "num_tokens": 12314866.0, | |
| "reward": 1.2442708611488342, | |
| "reward_std": 0.21353545226156712, | |
| "rewards/curriculum_aware_reward_fn": 0.24427084252238274, | |
| "rewards/format_reward": 1.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 519.265625, | |
| "epoch": 1.53781512605042, | |
| "grad_norm": 0.4276430010795593, | |
| "kl": 0.11962890625, | |
| "learning_rate": 4.732817969868348e-06, | |
| "loss": 0.0315, | |
| "num_tokens": 12463604.0, | |
| "reward": 0.895833358168602, | |
| "reward_std": 0.09582467563450336, | |
| "rewards/curriculum_aware_reward_fn": 0.0833333358168602, | |
| "rewards/format_reward": 0.8125, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 334.859375, | |
| "epoch": 1.5546218487394958, | |
| "grad_norm": 0.5961228013038635, | |
| "kl": 0.1279296875, | |
| "learning_rate": 4.7258878905233095e-06, | |
| "loss": 0.0697, | |
| "num_tokens": 12583698.0, | |
| "reward": 1.2239583432674408, | |
| "reward_std": 0.18167817778885365, | |
| "rewards/curriculum_aware_reward_fn": 0.2317708358168602, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 363.859375, | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 0.7697988152503967, | |
| "kl": 0.1495361328125, | |
| "learning_rate": 4.718874906879688e-06, | |
| "loss": 0.1313, | |
| "num_tokens": 12705640.0, | |
| "reward": 1.123437523841858, | |
| "reward_std": 0.29410218447446823, | |
| "rewards/curriculum_aware_reward_fn": 0.17812500521540642, | |
| "rewards/format_reward": 0.9453125, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 297.1328125, | |
| "epoch": 1.5882352941176472, | |
| "grad_norm": 0.5211566686630249, | |
| "kl": 0.133056640625, | |
| "learning_rate": 4.711779313188231e-06, | |
| "loss": 0.04, | |
| "num_tokens": 12824305.0, | |
| "reward": 1.1119791865348816, | |
| "reward_std": 0.18469560518860817, | |
| "rewards/curriculum_aware_reward_fn": 0.1119791679084301, | |
| "rewards/format_reward": 1.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 253.859375, | |
| "epoch": 1.6050420168067228, | |
| "grad_norm": 0.6686471104621887, | |
| "kl": 0.1435546875, | |
| "learning_rate": 4.70460140716584e-06, | |
| "loss": -0.0138, | |
| "num_tokens": 12918559.0, | |
| "reward": 1.1968750357627869, | |
| "reward_std": 0.1724256370216608, | |
| "rewards/curriculum_aware_reward_fn": 0.20468750596046448, | |
| "rewards/format_reward": 0.9921875, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 575.90625, | |
| "epoch": 1.6218487394957983, | |
| "grad_norm": 0.3436340093612671, | |
| "kl": 0.160888671875, | |
| "learning_rate": 4.697341489983076e-06, | |
| "loss": 0.0223, | |
| "num_tokens": 13076699.0, | |
| "reward": 0.8567708432674408, | |
| "reward_std": 0.08664888702332973, | |
| "rewards/curriculum_aware_reward_fn": 0.10677083395421505, | |
| "rewards/format_reward": 0.75, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 301.9140625, | |
| "epoch": 1.638655462184874, | |
| "grad_norm": 1.0377366542816162, | |
| "kl": 0.162353515625, | |
| "learning_rate": 4.6899998662515215e-06, | |
| "loss": 0.0906, | |
| "num_tokens": 13192776.0, | |
| "reward": 1.1197916716337204, | |
| "reward_std": 0.12685893662273884, | |
| "rewards/curriculum_aware_reward_fn": 0.13541666674427688, | |
| "rewards/format_reward": 0.984375, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 331.28125, | |
| "epoch": 1.6554621848739495, | |
| "grad_norm": 0.477889746427536, | |
| "kl": 0.151123046875, | |
| "learning_rate": 4.682576844011007e-06, | |
| "loss": 0.0147, | |
| "num_tokens": 13309884.0, | |
| "reward": 1.2942708730697632, | |
| "reward_std": 0.10486710164695978, | |
| "rewards/curriculum_aware_reward_fn": 0.3567708432674408, | |
| "rewards/format_reward": 0.9375, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 418.96875, | |
| "epoch": 1.6722689075630253, | |
| "grad_norm": 1.2051795721054077, | |
| "kl": 0.1630859375, | |
| "learning_rate": 4.675072734716678e-06, | |
| "loss": 0.0986, | |
| "num_tokens": 13443272.0, | |
| "reward": 1.0937500298023224, | |
| "reward_std": 0.1639669369906187, | |
| "rewards/curriculum_aware_reward_fn": 0.1796875111758709, | |
| "rewards/format_reward": 0.9140625, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 257.484375, | |
| "epoch": 1.6890756302521008, | |
| "grad_norm": 0.6367622017860413, | |
| "kl": 0.157958984375, | |
| "learning_rate": 4.667487853225931e-06, | |
| "loss": 0.0274, | |
| "num_tokens": 13546814.0, | |
| "reward": 1.3072916865348816, | |
| "reward_std": 0.1563644390553236, | |
| "rewards/curriculum_aware_reward_fn": 0.3307291641831398, | |
| "rewards/format_reward": 0.9765625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.6890756302521008, | |
| "step": 100, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 1.1082, | |
| "train_samples_per_second": 1712.724, | |
| "train_steps_per_second": 52.338 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 58, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |