{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100.0, "global_step": 256, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3062.0, "completions/mean_length": 1676.234375, "completions/min_length": 774.0, "entropy/max": 0.65234375, "entropy/mean": 0.4375, "entropy/min": 0.341796875, "epoch": 0.00390625, "frac_reward_zero_std": 0.625, "grad_norm": 0.33669739961624146, "kl": 0.0, "learning_rate": 7.692307692307694e-07, "loss": 6.51925802230835e-09, "reward": 0.33529332280158997, "reward_std": 0.41226130723953247, "rewards/DenseCaptionF1/mean": 0.11390755325555801, "rewards/DenseCaptionF1/std": 0.2861669063568115, "rewards/DenseCaptionSodaM/mean": 0.08076076209545135, "rewards/DenseCaptionSodaM/std": 0.2062472105026245, "rewards/TiemstampCaptionLength/mean": 0.140625, "rewards/TiemstampCaptionLength/std": 0.3503824472427368, "rewards/TimestampFormat/mean": 0.140625, "rewards/TimestampFormat/std": 0.3503824472427368, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3774.0, "completions/mean_length": 2192.40625, "completions/min_length": 1202.0, "entropy/max": 0.671875, "entropy/mean": 0.46875, "entropy/min": 0.259765625, "epoch": 0.0078125, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.5384615384615387e-06, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/DenseCaptionF1/mean": 0.0, "rewards/DenseCaptionF1/std": 0.0, "rewards/DenseCaptionSodaM/mean": 0.0, "rewards/DenseCaptionSodaM/std": 0.0, "rewards/TiemstampCaptionLength/mean": 0.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 0.0, "rewards/TimestampFormat/std": 0.0, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3581.0, "completions/mean_length": 1615.6875, "completions/min_length": 381.0, "entropy/max": 0.73046875, "entropy/mean": 0.5234375, "entropy/min": 0.298828125, "epoch": 0.01171875, "frac_reward_zero_std": 0.5, "grad_norm": 0.45485445857048035, "kl": 7.989356527104974e-05, "learning_rate": 2.307692307692308e-06, "loss": 3.299224772490561e-06, "reward": 0.6258076429367065, "reward_std": 0.419928640127182, "rewards/DenseCaptionF1/mean": 0.20682787895202637, "rewards/DenseCaptionF1/std": 0.3422560691833496, "rewards/DenseCaptionSodaM/mean": 0.12210478633642197, "rewards/DenseCaptionSodaM/std": 0.19544678926467896, "rewards/TiemstampCaptionLength/mean": 0.296875, "rewards/TiemstampCaptionLength/std": 0.4604927599430084, "rewards/TimestampFormat/mean": 0.296875, "rewards/TimestampFormat/std": 0.4604927599430084, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3052.0, "completions/mean_length": 1579.359375, "completions/min_length": 889.0, "entropy/max": 0.57421875, "entropy/mean": 0.439453125, "entropy/min": 0.2578125, "epoch": 0.015625, "frac_reward_zero_std": 0.75, "grad_norm": 0.3300647735595703, "kl": 8.311930287163705e-05, "learning_rate": 3.0769230769230774e-06, "loss": 3.3535045531607466e-06, "reward": 0.20333555340766907, "reward_std": 0.22650524973869324, "rewards/DenseCaptionF1/mean": 0.06781341135501862, "rewards/DenseCaptionF1/std": 0.21572406589984894, "rewards/DenseCaptionSodaM/mean": 0.041772134602069855, "rewards/DenseCaptionSodaM/std": 0.13558875024318695, "rewards/TiemstampCaptionLength/mean": 0.09375, "rewards/TiemstampCaptionLength/std": 0.29378482699394226, "rewards/TimestampFormat/mean": 0.09375, "rewards/TimestampFormat/std": 0.29378482699394226, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3021.0, "completions/mean_length": 1719.71875, "completions/min_length": 498.0, "entropy/max": 0.875, "entropy/mean": 0.5234375, "entropy/min": 0.359375, "epoch": 0.01953125, "frac_reward_zero_std": 0.625, "grad_norm": 0.35874465107917786, "kl": 7.733127131359652e-05, "learning_rate": 3.846153846153847e-06, "loss": 3.127614263576106e-06, "reward": 0.2088506668806076, "reward_std": 0.3283998668193817, "rewards/DenseCaptionF1/mean": 0.055382177233695984, "rewards/DenseCaptionF1/std": 0.16304251551628113, "rewards/DenseCaptionSodaM/mean": 0.04409348964691162, "rewards/DenseCaptionSodaM/std": 0.12984584271907806, "rewards/TiemstampCaptionLength/mean": 0.109375, "rewards/TiemstampCaptionLength/std": 0.3145764470100403, "rewards/TimestampFormat/mean": 0.109375, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3330.0, "completions/mean_length": 1659.578125, "completions/min_length": 450.0, "entropy/max": 0.62890625, "entropy/mean": 0.4453125, "entropy/min": 0.330078125, "epoch": 0.0234375, "frac_reward_zero_std": 0.875, "grad_norm": 0.23100338876247406, "kl": 8.260580943897367e-05, "learning_rate": 4.615384615384616e-06, "loss": 3.2648445085214917e-06, "reward": 0.11252006888389587, "reward_std": 0.12164957076311111, "rewards/DenseCaptionF1/mean": 0.0279682707041502, "rewards/DenseCaptionF1/std": 0.11443595588207245, "rewards/DenseCaptionSodaM/mean": 0.022051798179745674, "rewards/DenseCaptionSodaM/std": 0.08787863701581955, "rewards/TiemstampCaptionLength/mean": 0.0625, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.0625, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2488.0, "completions/mean_length": 1616.1875, "completions/min_length": 914.0, "entropy/max": 0.6171875, "entropy/mean": 0.47265625, "entropy/min": 0.341796875, "epoch": 0.02734375, "frac_reward_zero_std": 0.625, "grad_norm": 0.34800484776496887, "kl": 9.036576375365257e-05, "learning_rate": 5.384615384615385e-06, "loss": 3.6178503250994254e-06, "reward": 0.4976370930671692, "reward_std": 0.3053871691226959, "rewards/DenseCaptionF1/mean": 0.14112716913223267, "rewards/DenseCaptionF1/std": 0.25632479786872864, "rewards/DenseCaptionSodaM/mean": 0.10650992393493652, "rewards/DenseCaptionSodaM/std": 0.18886621296405792, "rewards/TiemstampCaptionLength/mean": 0.25, "rewards/TiemstampCaptionLength/std": 0.4364357888698578, "rewards/TimestampFormat/mean": 0.25, "rewards/TimestampFormat/std": 0.4364357888698578, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2626.0, "completions/mean_length": 1691.46875, "completions/min_length": 959.0, "entropy/max": 0.71484375, "entropy/mean": 0.498046875, "entropy/min": 0.2119140625, "epoch": 0.03125, "frac_reward_zero_std": 0.375, "grad_norm": 0.45934543013572693, "kl": 0.00012245975085534155, "learning_rate": 6.153846153846155e-06, "loss": 4.908712526230374e-06, "reward": 0.38751348853111267, "reward_std": 0.5993294715881348, "rewards/DenseCaptionF1/mean": 0.1268688291311264, "rewards/DenseCaptionF1/std": 0.2948600649833679, "rewards/DenseCaptionSodaM/mean": 0.08876965939998627, "rewards/DenseCaptionSodaM/std": 0.20143647491931915, "rewards/TiemstampCaptionLength/mean": 0.171875, "rewards/TiemstampCaptionLength/std": 0.38025420904159546, "rewards/TimestampFormat/mean": 0.171875, "rewards/TimestampFormat/std": 0.38025420904159546, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3861.0, "completions/mean_length": 2073.40625, "completions/min_length": 909.0, "entropy/max": 0.7109375, "entropy/mean": 0.5234375, "entropy/min": 0.3984375, "epoch": 0.03515625, "frac_reward_zero_std": 0.75, "grad_norm": 0.34851229190826416, "kl": 0.00021328622824512422, "learning_rate": 6.923076923076923e-06, "loss": 8.552353392587975e-06, "reward": 0.2592480778694153, "reward_std": 0.18327659368515015, "rewards/DenseCaptionF1/mean": 0.0821901485323906, "rewards/DenseCaptionF1/std": 0.2231617271900177, "rewards/DenseCaptionSodaM/mean": 0.05205794796347618, "rewards/DenseCaptionSodaM/std": 0.1404447704553604, "rewards/TiemstampCaptionLength/mean": 0.125, "rewards/TiemstampCaptionLength/std": 0.3333333432674408, "rewards/TimestampFormat/mean": 0.125, "rewards/TimestampFormat/std": 0.3333333432674408, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3575.0, "completions/mean_length": 1887.984375, "completions/min_length": 885.0, "entropy/max": 0.65234375, "entropy/mean": 0.48828125, "entropy/min": 0.318359375, "epoch": 0.0390625, "frac_reward_zero_std": 0.625, "grad_norm": 0.32556527853012085, "kl": 0.00043061739415861666, "learning_rate": 7.692307692307694e-06, "loss": 1.7255888451472856e-05, "reward": 0.09155982732772827, "reward_std": 0.25897032022476196, "rewards/DenseCaptionF1/mean": 0.0298549123108387, "rewards/DenseCaptionF1/std": 0.14728239178657532, "rewards/DenseCaptionSodaM/mean": 0.01743408665060997, "rewards/DenseCaptionSodaM/std": 0.08487895131111145, "rewards/TiemstampCaptionLength/mean": 0.0416666679084301, "rewards/TiemstampCaptionLength/std": 0.19245009124279022, "rewards/TimestampFormat/mean": 0.046875, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2205.0, "completions/mean_length": 1440.625, "completions/min_length": 647.0, "entropy/max": 0.72265625, "entropy/mean": 0.490234375, "entropy/min": 0.302734375, "epoch": 0.04296875, "frac_reward_zero_std": 0.375, "grad_norm": 0.4874301254749298, "kl": 0.0008995528914965689, "learning_rate": 8.461538461538462e-06, "loss": 3.6125522456131876e-05, "reward": 0.8666101098060608, "reward_std": 0.5633249878883362, "rewards/DenseCaptionF1/mean": 0.3042565584182739, "rewards/DenseCaptionF1/std": 0.41655826568603516, "rewards/DenseCaptionSodaM/mean": 0.18735358119010925, "rewards/DenseCaptionSodaM/std": 0.2545086145401001, "rewards/TiemstampCaptionLength/mean": 0.375, "rewards/TiemstampCaptionLength/std": 0.48795005679130554, "rewards/TimestampFormat/mean": 0.375, "rewards/TimestampFormat/std": 0.48795005679130554, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3659.0, "completions/mean_length": 2064.046875, "completions/min_length": 376.0, "entropy/max": 0.81640625, "entropy/mean": 0.5078125, "entropy/min": 0.30859375, "epoch": 0.046875, "frac_reward_zero_std": 0.75, "grad_norm": 0.36841216683387756, "kl": 0.0014957406092435122, "learning_rate": 9.230769230769232e-06, "loss": 5.995495666866191e-05, "reward": 0.22793494164943695, "reward_std": 0.21785634756088257, "rewards/DenseCaptionF1/mean": 0.06400325894355774, "rewards/DenseCaptionF1/std": 0.1750815510749817, "rewards/DenseCaptionSodaM/mean": 0.03893166407942772, "rewards/DenseCaptionSodaM/std": 0.11452297121286392, "rewards/TiemstampCaptionLength/mean": 0.125, "rewards/TiemstampCaptionLength/std": 0.3333333432674408, "rewards/TimestampFormat/mean": 0.125, "rewards/TimestampFormat/std": 0.3333333432674408, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3706.0, "completions/mean_length": 2285.765625, "completions/min_length": 1077.0, "entropy/max": 0.640625, "entropy/mean": 0.50390625, "entropy/min": 0.353515625, "epoch": 0.05078125, "frac_reward_zero_std": 0.75, "grad_norm": 0.26272639632225037, "kl": 0.0025145045947283506, "learning_rate": 1e-05, "loss": 0.00010084829409606755, "reward": 0.11095122992992401, "reward_std": 0.24312539398670197, "rewards/DenseCaptionF1/mean": 0.03828125074505806, "rewards/DenseCaptionF1/std": 0.17428816854953766, "rewards/DenseCaptionSodaM/mean": 0.02579498291015625, "rewards/DenseCaptionSodaM/std": 0.11784151196479797, "rewards/TiemstampCaptionLength/mean": 0.046875, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.046875, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2938.0, "completions/mean_length": 1778.921875, "completions/min_length": 819.0, "entropy/max": 0.6953125, "entropy/mean": 0.515625, "entropy/min": 0.349609375, "epoch": 0.0546875, "frac_reward_zero_std": 0.625, "grad_norm": 0.3603832423686981, "kl": 0.003843991318717599, "learning_rate": 9.999582149277188e-06, "loss": 0.000153951725224033, "reward": 0.2635659873485565, "reward_std": 0.2455659955739975, "rewards/DenseCaptionF1/mean": 0.07066268473863602, "rewards/DenseCaptionF1/std": 0.18533751368522644, "rewards/DenseCaptionSodaM/mean": 0.0522783026099205, "rewards/DenseCaptionSodaM/std": 0.13549016416072845, "rewards/TiemstampCaptionLength/mean": 0.140625, "rewards/TiemstampCaptionLength/std": 0.3503824472427368, "rewards/TimestampFormat/mean": 0.140625, "rewards/TimestampFormat/std": 0.3503824472427368, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4184.0, "completions/mean_length": 2201.046875, "completions/min_length": 885.0, "entropy/max": 0.609375, "entropy/mean": 0.5, "entropy/min": 0.404296875, "epoch": 0.05859375, "frac_reward_zero_std": 0.625, "grad_norm": 0.30636030435562134, "kl": 0.006575712934136391, "learning_rate": 9.998328666948437e-06, "loss": 0.00026336696464568377, "reward": 0.20778439939022064, "reward_std": 0.3372444808483124, "rewards/DenseCaptionF1/mean": 0.06573660671710968, "rewards/DenseCaptionF1/std": 0.2090059369802475, "rewards/DenseCaptionSodaM/mean": 0.04829777777194977, "rewards/DenseCaptionSodaM/std": 0.15207083523273468, "rewards/TiemstampCaptionLength/mean": 0.09375, "rewards/TiemstampCaptionLength/std": 0.29378482699394226, "rewards/TimestampFormat/mean": 0.09375, "rewards/TimestampFormat/std": 0.29378482699394226, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4208.0, "completions/mean_length": 2478.390625, "completions/min_length": 863.0, "entropy/max": 0.671875, "entropy/mean": 0.51953125, "entropy/min": 0.365234375, "epoch": 0.0625, "frac_reward_zero_std": 0.875, "grad_norm": 0.20259444415569305, "kl": 0.008406754583120346, "learning_rate": 9.996239762521152e-06, "loss": 0.0003381561255082488, "reward": 0.12115638703107834, "reward_std": 0.16726306080818176, "rewards/DenseCaptionF1/mean": 0.0455729179084301, "rewards/DenseCaptionF1/std": 0.20730151236057281, "rewards/DenseCaptionSodaM/mean": 0.028708472847938538, "rewards/DenseCaptionSodaM/std": 0.1305268108844757, "rewards/TiemstampCaptionLength/mean": 0.046875, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.046875, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2195.046875, "completions/min_length": 728.0, "entropy/max": 1.1875, "entropy/mean": 0.609375, "entropy/min": 0.1103515625, "epoch": 0.06640625, "frac_reward_zero_std": 0.75, "grad_norm": 0.2823851406574249, "kl": 0.011962870135903358, "learning_rate": 9.993315785135417e-06, "loss": 0.00048333226004615426, "reward": 0.35875755548477173, "reward_std": 0.16727258265018463, "rewards/DenseCaptionF1/mean": 0.11765454709529877, "rewards/DenseCaptionF1/std": 0.29483330249786377, "rewards/DenseCaptionSodaM/mean": 0.08485300838947296, "rewards/DenseCaptionSodaM/std": 0.20221486687660217, "rewards/TiemstampCaptionLength/mean": 0.15625, "rewards/TiemstampCaptionLength/std": 0.36596253514289856, "rewards/TimestampFormat/mean": 0.15625, "rewards/TimestampFormat/std": 0.36596253514289856, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3605.0, "completions/mean_length": 2101.4375, "completions/min_length": 880.0, "entropy/max": 0.7265625, "entropy/mean": 0.5078125, "entropy/min": 0.3203125, "epoch": 0.0703125, "frac_reward_zero_std": 0.75, "grad_norm": 0.2937946319580078, "kl": 0.015237608924508095, "learning_rate": 9.989557223505661e-06, "loss": 0.0006094181444495916, "reward": 0.3275236189365387, "reward_std": 0.23650048673152924, "rewards/DenseCaptionF1/mean": 0.10937843471765518, "rewards/DenseCaptionF1/std": 0.2602582275867462, "rewards/DenseCaptionSodaM/mean": 0.06189517304301262, "rewards/DenseCaptionSodaM/std": 0.14991170167922974, "rewards/TiemstampCaptionLength/mean": 0.15625, "rewards/TiemstampCaptionLength/std": 0.36596253514289856, "rewards/TimestampFormat/mean": 0.15625, "rewards/TimestampFormat/std": 0.36596253514289856, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3604.0, "completions/mean_length": 2388.359375, "completions/min_length": 1166.0, "entropy/max": 0.7734375, "entropy/mean": 0.55078125, "entropy/min": 0.380859375, "epoch": 0.07421875, "frac_reward_zero_std": 0.375, "grad_norm": 0.37759771943092346, "kl": 0.0160197950899601, "learning_rate": 9.98496470583896e-06, "loss": 0.0006415115785785019, "reward": 0.4447316527366638, "reward_std": 0.6170377731323242, "rewards/DenseCaptionF1/mean": 0.1351507306098938, "rewards/DenseCaptionF1/std": 0.27623483538627625, "rewards/DenseCaptionSodaM/mean": 0.10775801539421082, "rewards/DenseCaptionSodaM/std": 0.21980473399162292, "rewards/TiemstampCaptionLength/mean": 0.2005208283662796, "rewards/TiemstampCaptionLength/std": 0.4008145332336426, "rewards/TimestampFormat/mean": 0.203125, "rewards/TimestampFormat/std": 0.40550529956817627, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 5027.0, "completions/mean_length": 2083.15625, "completions/min_length": 590.0, "entropy/max": 0.90234375, "entropy/mean": 0.54296875, "entropy/min": 0.30078125, "epoch": 0.078125, "frac_reward_zero_std": 0.625, "grad_norm": 0.3374199867248535, "kl": 0.016353856772184372, "learning_rate": 9.979538999730047e-06, "loss": 0.0006515942513942719, "reward": 0.12008658796548843, "reward_std": 0.2829945981502533, "rewards/DenseCaptionF1/mean": 0.03498358279466629, "rewards/DenseCaptionF1/std": 0.13844537734985352, "rewards/DenseCaptionSodaM/mean": 0.022603008896112442, "rewards/DenseCaptionSodaM/std": 0.09045752882957458, "rewards/TiemstampCaptionLength/mean": 0.0625, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.0625, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3595.0, "completions/mean_length": 1994.25, "completions/min_length": 964.0, "entropy/max": 0.765625, "entropy/mean": 0.55078125, "entropy/min": 0.376953125, "epoch": 0.08203125, "frac_reward_zero_std": 0.625, "grad_norm": 0.3366459012031555, "kl": 0.01988409459590912, "learning_rate": 9.973281012033009e-06, "loss": 0.000799685250967741, "reward": 0.33904898166656494, "reward_std": 0.23382535576820374, "rewards/DenseCaptionF1/mean": 0.09593891352415085, "rewards/DenseCaptionF1/std": 0.21650290489196777, "rewards/DenseCaptionSodaM/mean": 0.07123503088951111, "rewards/DenseCaptionSodaM/std": 0.1632857322692871, "rewards/TiemstampCaptionLength/mean": 0.171875, "rewards/TiemstampCaptionLength/std": 0.38025420904159546, "rewards/TimestampFormat/mean": 0.171875, "rewards/TimestampFormat/std": 0.38025420904159546, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3650.0, "completions/mean_length": 1928.578125, "completions/min_length": 816.0, "entropy/max": 0.875, "entropy/mean": 0.5859375, "entropy/min": 0.43359375, "epoch": 0.0859375, "frac_reward_zero_std": 0.5, "grad_norm": 0.4066086709499359, "kl": 0.022677309811115265, "learning_rate": 9.966191788709716e-06, "loss": 0.0009096093126572669, "reward": 0.18491098284721375, "reward_std": 0.40771812200546265, "rewards/DenseCaptionF1/mean": 0.05379953980445862, "rewards/DenseCaptionF1/std": 0.1785956472158432, "rewards/DenseCaptionSodaM/mean": 0.037361450493335724, "rewards/DenseCaptionSodaM/std": 0.1213492900133133, "rewards/TiemstampCaptionLength/mean": 0.09375, "rewards/TiemstampCaptionLength/std": 0.29378482699394226, "rewards/TimestampFormat/mean": 0.09375, "rewards/TimestampFormat/std": 0.29378482699394226, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3628.0, "completions/mean_length": 2177.875, "completions/min_length": 902.0, "entropy/max": 0.70703125, "entropy/mean": 0.51171875, "entropy/min": 0.3828125, "epoch": 0.08984375, "frac_reward_zero_std": 0.5, "grad_norm": 0.35382241010665894, "kl": 0.020987944677472115, "learning_rate": 9.958272514655006e-06, "loss": 0.000841582368593663, "reward": 0.2189454734325409, "reward_std": 0.4250008761882782, "rewards/DenseCaptionF1/mean": 0.06233471632003784, "rewards/DenseCaptionF1/std": 0.1853744387626648, "rewards/DenseCaptionSodaM/mean": 0.047235749661922455, "rewards/DenseCaptionSodaM/std": 0.13834145665168762, "rewards/TiemstampCaptionLength/mean": 0.109375, "rewards/TiemstampCaptionLength/std": 0.3145764470100403, "rewards/TimestampFormat/mean": 0.109375, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3190.0, "completions/mean_length": 1853.296875, "completions/min_length": 989.0, "entropy/max": 0.79296875, "entropy/mean": 0.56640625, "entropy/min": 0.4375, "epoch": 0.09375, "frac_reward_zero_std": 0.375, "grad_norm": 0.44761282205581665, "kl": 0.029041863977909088, "learning_rate": 9.949524513498636e-06, "loss": 0.0011664638295769691, "reward": 0.7176538705825806, "reward_std": 0.5046995282173157, "rewards/DenseCaptionF1/mean": 0.21744775772094727, "rewards/DenseCaptionF1/std": 0.2997380197048187, "rewards/DenseCaptionSodaM/mean": 0.1408311277627945, "rewards/DenseCaptionSodaM/std": 0.1953917145729065, "rewards/TiemstampCaptionLength/mean": 0.359375, "rewards/TiemstampCaptionLength/std": 0.4836103618144989, "rewards/TimestampFormat/mean": 0.359375, "rewards/TimestampFormat/std": 0.4836103618144989, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3667.0, "completions/mean_length": 2230.59375, "completions/min_length": 1048.0, "entropy/max": 0.76953125, "entropy/mean": 0.578125, "entropy/min": 0.4296875, "epoch": 0.09765625, "frac_reward_zero_std": 0.75, "grad_norm": 0.2841798663139343, "kl": 0.028013236820697784, "learning_rate": 9.939949247384046e-06, "loss": 0.0011283187195658684, "reward": 0.3157457709312439, "reward_std": 0.10166770219802856, "rewards/DenseCaptionF1/mean": 0.09699587523937225, "rewards/DenseCaptionF1/std": 0.24287192523479462, "rewards/DenseCaptionSodaM/mean": 0.07812491804361343, "rewards/DenseCaptionSodaM/std": 0.1968449503183365, "rewards/TiemstampCaptionLength/mean": 0.140625, "rewards/TiemstampCaptionLength/std": 0.3503824472427368, "rewards/TimestampFormat/mean": 0.140625, "rewards/TimestampFormat/std": 0.3503824472427368, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4428.0, "completions/mean_length": 1915.203125, "completions/min_length": 244.0, "entropy/max": 0.90625, "entropy/mean": 0.55859375, "entropy/min": 0.33984375, "epoch": 0.1015625, "frac_reward_zero_std": 0.125, "grad_norm": 0.5215222835540771, "kl": 0.032491087913513184, "learning_rate": 9.929548316723983e-06, "loss": 0.0013178382068872452, "reward": 0.6358413696289062, "reward_std": 0.7758326530456543, "rewards/DenseCaptionF1/mean": 0.18299464881420135, "rewards/DenseCaptionF1/std": 0.28064069151878357, "rewards/DenseCaptionSodaM/mean": 0.1305810809135437, "rewards/DenseCaptionSodaM/std": 0.20247425138950348, "rewards/TiemstampCaptionLength/mean": 0.31640625, "rewards/TiemstampCaptionLength/std": 0.4612831473350525, "rewards/TimestampFormat/mean": 0.328125, "rewards/TimestampFormat/std": 0.4732423722743988, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4103.0, "completions/mean_length": 2101.765625, "completions/min_length": 952.0, "entropy/max": 0.9375, "entropy/mean": 0.578125, "entropy/min": 0.28125, "epoch": 0.10546875, "frac_reward_zero_std": 0.375, "grad_norm": 0.3893539607524872, "kl": 0.02889391966164112, "learning_rate": 9.918323459933006e-06, "loss": 0.001159125124104321, "reward": 0.35946089029312134, "reward_std": 0.5790758728981018, "rewards/DenseCaptionF1/mean": 0.12246093153953552, "rewards/DenseCaptionF1/std": 0.27518293261528015, "rewards/DenseCaptionSodaM/mean": 0.06981244683265686, "rewards/DenseCaptionSodaM/std": 0.15849006175994873, "rewards/TiemstampCaptionLength/mean": 0.16249999403953552, "rewards/TiemstampCaptionLength/std": 0.36666667461395264, "rewards/TimestampFormat/mean": 0.171875, "rewards/TimestampFormat/std": 0.38025420904159546, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3711.0, "completions/mean_length": 1937.203125, "completions/min_length": 752.0, "entropy/max": 0.7734375, "entropy/mean": 0.5546875, "entropy/min": 0.365234375, "epoch": 0.109375, "frac_reward_zero_std": 0.5, "grad_norm": 0.4255591034889221, "kl": 0.03484990447759628, "learning_rate": 9.906276553136924e-06, "loss": 0.0014025644632056355, "reward": 0.1896531581878662, "reward_std": 0.35658401250839233, "rewards/DenseCaptionF1/mean": 0.0452314093708992, "rewards/DenseCaptionF1/std": 0.14166533946990967, "rewards/DenseCaptionSodaM/mean": 0.03504673391580582, "rewards/DenseCaptionSodaM/std": 0.10599219799041748, "rewards/TiemstampCaptionLength/mean": 0.109375, "rewards/TiemstampCaptionLength/std": 0.3145764470100403, "rewards/TimestampFormat/mean": 0.109375, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3660.0, "completions/mean_length": 1987.65625, "completions/min_length": 754.0, "entropy/max": 0.80859375, "entropy/mean": 0.55859375, "entropy/min": 0.259765625, "epoch": 0.11328125, "frac_reward_zero_std": 0.25, "grad_norm": 0.45167720317840576, "kl": 0.0329359695315361, "learning_rate": 9.893409609859221e-06, "loss": 0.0013238743413239717, "reward": 1.1427762508392334, "reward_std": 0.45914119482040405, "rewards/DenseCaptionF1/mean": 0.3499799966812134, "rewards/DenseCaptionF1/std": 0.3501557409763336, "rewards/DenseCaptionSodaM/mean": 0.2472234070301056, "rewards/DenseCaptionSodaM/std": 0.24433213472366333, "rewards/TiemstampCaptionLength/mean": 0.5442708134651184, "rewards/TiemstampCaptionLength/std": 0.4997726082801819, "rewards/TimestampFormat/mean": 0.546875, "rewards/TimestampFormat/std": 0.501733124256134, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2918.0, "completions/mean_length": 1711.234375, "completions/min_length": 709.0, "entropy/max": 0.859375, "entropy/mean": 0.56640625, "entropy/min": 0.443359375, "epoch": 0.1171875, "frac_reward_zero_std": 0.25, "grad_norm": 0.49171510338783264, "kl": 0.041421182453632355, "learning_rate": 9.879724780684518e-06, "loss": 0.0016657232772558928, "reward": 0.6909555196762085, "reward_std": 0.6424126625061035, "rewards/DenseCaptionF1/mean": 0.2269957959651947, "rewards/DenseCaptionF1/std": 0.33632996678352356, "rewards/DenseCaptionSodaM/mean": 0.1358346790075302, "rewards/DenseCaptionSodaM/std": 0.21048593521118164, "rewards/TiemstampCaptionLength/mean": 0.328125, "rewards/TiemstampCaptionLength/std": 0.4732423722743988, "rewards/TimestampFormat/mean": 0.328125, "rewards/TimestampFormat/std": 0.4732423722743988, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3525.0, "completions/mean_length": 1632.375, "completions/min_length": 592.0, "entropy/max": 0.75390625, "entropy/mean": 0.484375, "entropy/min": 0.28125, "epoch": 0.12109375, "frac_reward_zero_std": 0.375, "grad_norm": 0.4527930021286011, "kl": 0.03862081840634346, "learning_rate": 9.86522435289912e-06, "loss": 0.0015425803139805794, "reward": 0.660457968711853, "reward_std": 0.642650842666626, "rewards/DenseCaptionF1/mean": 0.20808811485767365, "rewards/DenseCaptionF1/std": 0.33126798272132874, "rewards/DenseCaptionSodaM/mean": 0.15549485385417938, "rewards/DenseCaptionSodaM/std": 0.2465606927871704, "rewards/TiemstampCaptionLength/mean": 0.296875, "rewards/TiemstampCaptionLength/std": 0.4604927599430084, "rewards/TimestampFormat/mean": 0.296875, "rewards/TimestampFormat/std": 0.4604927599430084, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3501.0, "completions/mean_length": 1980.0625, "completions/min_length": 639.0, "entropy/max": 0.875, "entropy/mean": 0.578125, "entropy/min": 0.416015625, "epoch": 0.125, "frac_reward_zero_std": 0.375, "grad_norm": 0.47621989250183105, "kl": 0.045973040163517, "learning_rate": 9.849910750108718e-06, "loss": 0.0018451442010700703, "reward": 0.6612102389335632, "reward_std": 0.5066034197807312, "rewards/DenseCaptionF1/mean": 0.18637947738170624, "rewards/DenseCaptionF1/std": 0.3137284815311432, "rewards/DenseCaptionSodaM/mean": 0.1515420526266098, "rewards/DenseCaptionSodaM/std": 0.22664391994476318, "rewards/TiemstampCaptionLength/mean": 0.318452388048172, "rewards/TiemstampCaptionLength/std": 0.46229735016822815, "rewards/TimestampFormat/mean": 0.328125, "rewards/TimestampFormat/std": 0.4732423722743988, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3340.0, "completions/mean_length": 1555.0625, "completions/min_length": 903.0, "entropy/max": 0.65625, "entropy/mean": 0.50390625, "entropy/min": 0.330078125, "epoch": 0.12890625, "frac_reward_zero_std": 0.25, "grad_norm": 0.4809737801551819, "kl": 0.04283065348863602, "learning_rate": 9.833786531833311e-06, "loss": 0.0017127534374594688, "reward": 0.8187474012374878, "reward_std": 0.5873106718063354, "rewards/DenseCaptionF1/mean": 0.25816553831100464, "rewards/DenseCaptionF1/std": 0.34912580251693726, "rewards/DenseCaptionSodaM/mean": 0.19079023599624634, "rewards/DenseCaptionSodaM/std": 0.25204429030418396, "rewards/TiemstampCaptionLength/mean": 0.3645833134651184, "rewards/TiemstampCaptionLength/std": 0.4814687669277191, "rewards/TimestampFormat/mean": 0.375, "rewards/TimestampFormat/std": 0.48795005679130554, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3063.0, "completions/mean_length": 2028.6875, "completions/min_length": 1019.0, "entropy/max": 0.83984375, "entropy/mean": 0.54296875, "entropy/min": 0.40234375, "epoch": 0.1328125, "frac_reward_zero_std": 0.125, "grad_norm": 0.4648730158805847, "kl": 0.043154969811439514, "learning_rate": 9.816854393079402e-06, "loss": 0.0017284248024225235, "reward": 0.8269856572151184, "reward_std": 0.7138704061508179, "rewards/DenseCaptionF1/mean": 0.2372475117444992, "rewards/DenseCaptionF1/std": 0.30505022406578064, "rewards/DenseCaptionSodaM/mean": 0.183488130569458, "rewards/DenseCaptionSodaM/std": 0.23432087898254395, "rewards/TiemstampCaptionLength/mean": 0.40625, "rewards/TiemstampCaptionLength/std": 0.49501484632492065, "rewards/TimestampFormat/mean": 0.40625, "rewards/TimestampFormat/std": 0.49501484632492065, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3714.0, "completions/mean_length": 1911.1875, "completions/min_length": 325.0, "entropy/max": 0.76953125, "entropy/mean": 0.52734375, "entropy/min": 0.35546875, "epoch": 0.13671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5330745577812195, "kl": 0.04283313453197479, "learning_rate": 9.79911716388956e-06, "loss": 0.0017346041277050972, "reward": 0.7788149118423462, "reward_std": 0.6244890093803406, "rewards/DenseCaptionF1/mean": 0.22027342021465302, "rewards/DenseCaptionF1/std": 0.28901150822639465, "rewards/DenseCaptionSodaM/mean": 0.17253299057483673, "rewards/DenseCaptionSodaM/std": 0.2367001622915268, "rewards/TiemstampCaptionLength/mean": 0.38139206171035767, "rewards/TiemstampCaptionLength/std": 0.4841456711292267, "rewards/TimestampFormat/mean": 0.390625, "rewards/TimestampFormat/std": 0.4917473793029785, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3692.0, "completions/mean_length": 1689.375, "completions/min_length": 735.0, "entropy/max": 0.8359375, "entropy/mean": 0.5625, "entropy/min": 0.3828125, "epoch": 0.140625, "frac_reward_zero_std": 0.25, "grad_norm": 0.5430750846862793, "kl": 0.04801669344305992, "learning_rate": 9.7805778088694e-06, "loss": 0.0019329048227518797, "reward": 1.0940829515457153, "reward_std": 0.47211867570877075, "rewards/DenseCaptionF1/mean": 0.3414607346057892, "rewards/DenseCaptionF1/std": 0.35802242159843445, "rewards/DenseCaptionSodaM/mean": 0.24480977654457092, "rewards/DenseCaptionSodaM/std": 0.2575652599334717, "rewards/TiemstampCaptionLength/mean": 0.5, "rewards/TiemstampCaptionLength/std": 0.4940117597579956, "rewards/TimestampFormat/mean": 0.515625, "rewards/TimestampFormat/std": 0.5037065148353577, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2703.0, "completions/mean_length": 1723.71875, "completions/min_length": 672.0, "entropy/max": 0.92578125, "entropy/mean": 0.5859375, "entropy/min": 0.431640625, "epoch": 0.14453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5763617157936096, "kl": 0.051824457943439484, "learning_rate": 9.761239426692077e-06, "loss": 0.0020843958482146263, "reward": 1.4357333183288574, "reward_std": 0.6296243667602539, "rewards/DenseCaptionF1/mean": 0.44952523708343506, "rewards/DenseCaptionF1/std": 0.3528907895088196, "rewards/DenseCaptionSodaM/mean": 0.2830829918384552, "rewards/DenseCaptionSodaM/std": 0.21533827483654022, "rewards/TiemstampCaptionLength/mean": 0.703125, "rewards/TiemstampCaptionLength/std": 0.4604927599430084, "rewards/TimestampFormat/mean": 0.703125, "rewards/TimestampFormat/std": 0.4604927599430084, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3096.0, "completions/mean_length": 1675.75, "completions/min_length": 761.0, "entropy/max": 0.76171875, "entropy/mean": 0.53515625, "entropy/min": 0.349609375, "epoch": 0.1484375, "frac_reward_zero_std": 0.125, "grad_norm": 0.5313538312911987, "kl": 0.053580090403556824, "learning_rate": 9.741105249580383e-06, "loss": 0.002145090838894248, "reward": 1.1066089868545532, "reward_std": 0.7426514029502869, "rewards/DenseCaptionF1/mean": 0.35633063316345215, "rewards/DenseCaptionF1/std": 0.36695289611816406, "rewards/DenseCaptionSodaM/mean": 0.2346533089876175, "rewards/DenseCaptionSodaM/std": 0.2370554357767105, "rewards/TiemstampCaptionLength/mean": 0.515625, "rewards/TiemstampCaptionLength/std": 0.5037065148353577, "rewards/TimestampFormat/mean": 0.515625, "rewards/TimestampFormat/std": 0.5037065148353577, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3117.0, "completions/mean_length": 1778.984375, "completions/min_length": 917.0, "entropy/max": 0.7734375, "entropy/mean": 0.51953125, "entropy/min": 0.26953125, "epoch": 0.15234375, "frac_reward_zero_std": 0.125, "grad_norm": 0.4957989454269409, "kl": 0.04684580862522125, "learning_rate": 9.7201786427665e-06, "loss": 0.0018801564583554864, "reward": 1.1384104490280151, "reward_std": 0.8322298526763916, "rewards/DenseCaptionF1/mean": 0.3500368595123291, "rewards/DenseCaptionF1/std": 0.35018593072891235, "rewards/DenseCaptionSodaM/mean": 0.24345172941684723, "rewards/DenseCaptionSodaM/std": 0.23924703896045685, "rewards/TiemstampCaptionLength/mean": 0.54296875, "rewards/TiemstampCaptionLength/std": 0.4991156756877899, "rewards/TimestampFormat/mean": 0.546875, "rewards/TimestampFormat/std": 0.501733124256134, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2655.0, "completions/mean_length": 1472.078125, "completions/min_length": 351.0, "entropy/max": 0.609375, "entropy/mean": 0.50390625, "entropy/min": 0.2314453125, "epoch": 0.15625, "frac_reward_zero_std": 0.125, "grad_norm": 0.5393358469009399, "kl": 0.06385189294815063, "learning_rate": 9.698463103929542e-06, "loss": 0.002563423477113247, "reward": 1.4312630891799927, "reward_std": 0.44951027631759644, "rewards/DenseCaptionF1/mean": 0.43007513880729675, "rewards/DenseCaptionF1/std": 0.33957046270370483, "rewards/DenseCaptionSodaM/mean": 0.2980629503726959, "rewards/DenseCaptionSodaM/std": 0.2082088440656662, "rewards/TiemstampCaptionLength/mean": 0.703125, "rewards/TiemstampCaptionLength/std": 0.4604927599430084, "rewards/TimestampFormat/mean": 0.703125, "rewards/TimestampFormat/std": 0.4604927599430084, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2781.0, "completions/mean_length": 1705.421875, "completions/min_length": 748.0, "entropy/max": 0.85546875, "entropy/mean": 0.51953125, "entropy/min": 0.326171875, "epoch": 0.16015625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5383458733558655, "kl": 0.05609148368239403, "learning_rate": 9.67596226261095e-06, "loss": 0.002261662855744362, "reward": 1.6230489015579224, "reward_std": 0.5145324468612671, "rewards/DenseCaptionF1/mean": 0.4726840853691101, "rewards/DenseCaptionF1/std": 0.2743079364299774, "rewards/DenseCaptionSodaM/mean": 0.3404690623283386, "rewards/DenseCaptionSodaM/std": 0.1955847442150116, "rewards/TiemstampCaptionLength/mean": 0.8072916865348816, "rewards/TiemstampCaptionLength/std": 0.39308255910873413, "rewards/TimestampFormat/mean": 0.8125, "rewards/TimestampFormat/std": 0.39339789748191833, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3672.0, "completions/mean_length": 1794.90625, "completions/min_length": 670.0, "entropy/max": 0.7734375, "entropy/mean": 0.51953125, "entropy/min": 0.337890625, "epoch": 0.1640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5417736172676086, "kl": 0.05722331255674362, "learning_rate": 9.652679879607843e-06, "loss": 0.0022972850129008293, "reward": 1.420531988143921, "reward_std": 0.9789804220199585, "rewards/DenseCaptionF1/mean": 0.460341215133667, "rewards/DenseCaptionF1/std": 0.37514445185661316, "rewards/DenseCaptionSodaM/mean": 0.3039408326148987, "rewards/DenseCaptionSodaM/std": 0.24638064205646515, "rewards/TiemstampCaptionLength/mean": 0.65625, "rewards/TiemstampCaptionLength/std": 0.4787135720252991, "rewards/TimestampFormat/mean": 0.65625, "rewards/TimestampFormat/std": 0.4787135720252991, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2397.0, "completions/mean_length": 1448.8125, "completions/min_length": 568.0, "entropy/max": 0.734375, "entropy/mean": 0.5546875, "entropy/min": 0.357421875, "epoch": 0.16796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.6057270169258118, "kl": 0.059576164931058884, "learning_rate": 9.628619846344453e-06, "loss": 0.0023994343355298042, "reward": 1.9311561584472656, "reward_std": 0.5075036287307739, "rewards/DenseCaptionF1/mean": 0.6298707723617554, "rewards/DenseCaptionF1/std": 0.2856041491031647, "rewards/DenseCaptionSodaM/mean": 0.41066038608551025, "rewards/DenseCaptionSodaM/std": 0.17797227203845978, "rewards/TiemstampCaptionLength/mean": 0.890625, "rewards/TiemstampCaptionLength/std": 0.3145764470100403, "rewards/TimestampFormat/mean": 0.890625, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2799.0, "completions/mean_length": 1738.515625, "completions/min_length": 554.0, "entropy/max": 0.77734375, "entropy/mean": 0.54296875, "entropy/min": 0.412109375, "epoch": 0.171875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5635643005371094, "kl": 0.05436231195926666, "learning_rate": 9.603786184221693e-06, "loss": 0.0021877912804484367, "reward": 1.4041552543640137, "reward_std": 0.8095611333847046, "rewards/DenseCaptionF1/mean": 0.44313710927963257, "rewards/DenseCaptionF1/std": 0.34724023938179016, "rewards/DenseCaptionSodaM/mean": 0.2735181152820587, "rewards/DenseCaptionSodaM/std": 0.19664731621742249, "rewards/TiemstampCaptionLength/mean": 0.6875, "rewards/TiemstampCaptionLength/std": 0.467176616191864, "rewards/TimestampFormat/mean": 0.6875, "rewards/TimestampFormat/std": 0.467176616191864, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3219.0, "completions/mean_length": 1852.859375, "completions/min_length": 1007.0, "entropy/max": 0.73046875, "entropy/mean": 0.56640625, "entropy/min": 0.310546875, "epoch": 0.17578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5410345196723938, "kl": 0.05509365350008011, "learning_rate": 9.578183043945031e-06, "loss": 0.0022120936773717403, "reward": 1.625002384185791, "reward_std": 0.6609989404678345, "rewards/DenseCaptionF1/mean": 0.5345268249511719, "rewards/DenseCaptionF1/std": 0.3231217563152313, "rewards/DenseCaptionSodaM/mean": 0.32485055923461914, "rewards/DenseCaptionSodaM/std": 0.2140761762857437, "rewards/TiemstampCaptionLength/mean": 0.765625, "rewards/TiemstampCaptionLength/std": 0.42695629596710205, "rewards/TimestampFormat/mean": 0.765625, "rewards/TimestampFormat/std": 0.42695629596710205, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3824.0, "completions/mean_length": 2272.515625, "completions/min_length": 706.0, "entropy/max": 0.82421875, "entropy/mean": 0.55859375, "entropy/min": 0.376953125, "epoch": 0.1796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.45778459310531616, "kl": 0.0554184764623642, "learning_rate": 9.551814704830734e-06, "loss": 0.002217470668256283, "reward": 1.5976332426071167, "reward_std": 0.651961088180542, "rewards/DenseCaptionF1/mean": 0.470289409160614, "rewards/DenseCaptionF1/std": 0.2826341390609741, "rewards/DenseCaptionSodaM/mean": 0.3460938334465027, "rewards/DenseCaptionSodaM/std": 0.22078247368335724, "rewards/TiemstampCaptionLength/mean": 0.78125, "rewards/TiemstampCaptionLength/std": 0.4166666865348816, "rewards/TimestampFormat/mean": 0.78125, "rewards/TimestampFormat/std": 0.4166666865348816, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2133.4375, "completions/min_length": 886.0, "entropy/max": 0.79296875, "entropy/mean": 0.57421875, "entropy/min": 0.396484375, "epoch": 0.18359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.495386004447937, "kl": 0.06056925281882286, "learning_rate": 9.524685574090627e-06, "loss": 0.002464357763528824, "reward": 1.3884316682815552, "reward_std": 0.8333114385604858, "rewards/DenseCaptionF1/mean": 0.41399046778678894, "rewards/DenseCaptionF1/std": 0.31623101234436035, "rewards/DenseCaptionSodaM/mean": 0.2882433235645294, "rewards/DenseCaptionSodaM/std": 0.20920830965042114, "rewards/TiemstampCaptionLength/mean": 0.6848958134651184, "rewards/TiemstampCaptionLength/std": 0.4658696949481964, "rewards/TimestampFormat/mean": 0.6875, "rewards/TimestampFormat/std": 0.467176616191864, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3831.0, "completions/mean_length": 1907.34375, "completions/min_length": 650.0, "entropy/max": 0.734375, "entropy/mean": 0.55078125, "entropy/min": 0.408203125, "epoch": 0.1875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5320143103599548, "kl": 0.06790857017040253, "learning_rate": 9.496800186095466e-06, "loss": 0.002733142115175724, "reward": 1.5999373197555542, "reward_std": 0.7917318344116211, "rewards/DenseCaptionF1/mean": 0.45771533250808716, "rewards/DenseCaptionF1/std": 0.3045267164707184, "rewards/DenseCaptionSodaM/mean": 0.3453470468521118, "rewards/DenseCaptionSodaM/std": 0.21235060691833496, "rewards/TiemstampCaptionLength/mean": 0.796875, "rewards/TiemstampCaptionLength/std": 0.40550529956817627, "rewards/TimestampFormat/mean": 0.796875, "rewards/TimestampFormat/std": 0.40550529956817627, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2016.375, "completions/min_length": 825.0, "entropy/max": 0.8828125, "entropy/mean": 0.69921875, "entropy/min": 0.130859375, "epoch": 0.19140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5382195711135864, "kl": 0.06226423755288124, "learning_rate": 9.468163201617063e-06, "loss": 0.002524332143366337, "reward": 1.8110225200653076, "reward_std": 0.4265463352203369, "rewards/DenseCaptionF1/mean": 0.5875532627105713, "rewards/DenseCaptionF1/std": 0.26714834570884705, "rewards/DenseCaptionSodaM/mean": 0.35042253136634827, "rewards/DenseCaptionSodaM/std": 0.16498103737831116, "rewards/TiemstampCaptionLength/mean": 0.87109375, "rewards/TiemstampCaptionLength/std": 0.3333100974559784, "rewards/TimestampFormat/mean": 0.875, "rewards/TimestampFormat/std": 0.3333333432674408, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4920.0, "completions/mean_length": 2190.953125, "completions/min_length": 965.0, "entropy/max": 0.94921875, "entropy/mean": 0.66796875, "entropy/min": 0.4453125, "epoch": 0.1953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5177400708198547, "kl": 0.06634458899497986, "learning_rate": 9.438779407049282e-06, "loss": 0.002665899693965912, "reward": 1.6761609315872192, "reward_std": 0.7608978748321533, "rewards/DenseCaptionF1/mean": 0.530105471611023, "rewards/DenseCaptionF1/std": 0.29278987646102905, "rewards/DenseCaptionSodaM/mean": 0.35648149251937866, "rewards/DenseCaptionSodaM/std": 0.19881953299045563, "rewards/TiemstampCaptionLength/mean": 0.7822730541229248, "rewards/TiemstampCaptionLength/std": 0.40720799565315247, "rewards/TimestampFormat/mean": 0.796875, "rewards/TimestampFormat/std": 0.40550529956817627, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2464.9375, "completions/min_length": 989.0, "entropy/max": 0.96875, "entropy/mean": 0.68359375, "entropy/min": 0.1298828125, "epoch": 0.19921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4651657044887543, "kl": 0.06646151840686798, "learning_rate": 9.40865371360804e-06, "loss": 0.002682502381503582, "reward": 1.781751036643982, "reward_std": 0.5297815799713135, "rewards/DenseCaptionF1/mean": 0.5445922613143921, "rewards/DenseCaptionF1/std": 0.24131445586681366, "rewards/DenseCaptionSodaM/mean": 0.3472440540790558, "rewards/DenseCaptionSodaM/std": 0.14093098044395447, "rewards/TiemstampCaptionLength/mean": 0.8892045617103577, "rewards/TiemstampCaptionLength/std": 0.31427985429763794, "rewards/TimestampFormat/mean": 0.890625, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 9216.0, "completions/mean_length": 2706.375, "completions/min_length": 759.0, "entropy/max": 0.90234375, "entropy/mean": 0.6328125, "entropy/min": 0.02294921875, "epoch": 0.203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.442674845457077, "kl": 0.06238989159464836, "learning_rate": 9.377791156510456e-06, "loss": 0.002551492303609848, "reward": 1.8445196151733398, "reward_std": 0.3831322193145752, "rewards/DenseCaptionF1/mean": 0.5301719903945923, "rewards/DenseCaptionF1/std": 0.19613783061504364, "rewards/DenseCaptionSodaM/mean": 0.3938821852207184, "rewards/DenseCaptionSodaM/std": 0.14461737871170044, "rewards/TiemstampCaptionLength/mean": 0.9190559387207031, "rewards/TiemstampCaptionLength/std": 0.2701304256916046, "rewards/TimestampFormat/mean": 0.921875, "rewards/TimestampFormat/std": 0.27048972249031067, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4269.0, "completions/mean_length": 2093.890625, "completions/min_length": 672.0, "entropy/max": 0.97265625, "entropy/mean": 0.69140625, "entropy/min": 0.478515625, "epoch": 0.20703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5645370483398438, "kl": 0.07713831961154938, "learning_rate": 9.346196894133239e-06, "loss": 0.003090888261795044, "reward": 1.8269987106323242, "reward_std": 0.6235939264297485, "rewards/DenseCaptionF1/mean": 0.5599989891052246, "rewards/DenseCaptionF1/std": 0.2531862258911133, "rewards/DenseCaptionSodaM/mean": 0.37865591049194336, "rewards/DenseCaptionSodaM/std": 0.15898536145687103, "rewards/TiemstampCaptionLength/mean": 0.8860626220703125, "rewards/TiemstampCaptionLength/std": 0.3145526647567749, "rewards/TimestampFormat/mean": 0.890625, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4122.0, "completions/mean_length": 2167.671875, "completions/min_length": 780.0, "entropy/max": 0.9296875, "entropy/mean": 0.62890625, "entropy/min": 0.419921875, "epoch": 0.2109375, "frac_reward_zero_std": 0.0, "grad_norm": 0.5097282528877258, "kl": 0.06437567621469498, "learning_rate": 9.313876207150544e-06, "loss": 0.002588218078017235, "reward": 1.8655548095703125, "reward_std": 0.37404483556747437, "rewards/DenseCaptionF1/mean": 0.5587145090103149, "rewards/DenseCaptionF1/std": 0.19311966001987457, "rewards/DenseCaptionSodaM/mean": 0.36938798427581787, "rewards/DenseCaptionSodaM/std": 0.14692431688308716, "rewards/TiemstampCaptionLength/mean": 0.9374046325683594, "rewards/TiemstampCaptionLength/std": 0.24395141005516052, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3360.0, "completions/mean_length": 1985.75, "completions/min_length": 785.0, "entropy/max": 0.890625, "entropy/mean": 0.640625, "entropy/min": 0.51171875, "epoch": 0.21484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.5162221193313599, "kl": 0.07181726396083832, "learning_rate": 9.280834497651334e-06, "loss": 0.0028788154013454914, "reward": 2.0226664543151855, "reward_std": 0.4651912450790405, "rewards/DenseCaptionF1/mean": 0.6559849381446838, "rewards/DenseCaptionF1/std": 0.23934565484523773, "rewards/DenseCaptionSodaM/mean": 0.43699392676353455, "rewards/DenseCaptionSodaM/std": 0.14905153214931488, "rewards/TiemstampCaptionLength/mean": 0.921875, "rewards/TiemstampCaptionLength/std": 0.25539806485176086, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3461.0, "completions/mean_length": 1875.28125, "completions/min_length": 538.0, "entropy/max": 0.8984375, "entropy/mean": 0.68359375, "entropy/min": 0.3984375, "epoch": 0.21875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5501840114593506, "kl": 0.08197345584630966, "learning_rate": 9.247077288236488e-06, "loss": 0.0032946080900728703, "reward": 2.073521137237549, "reward_std": 0.21284720301628113, "rewards/DenseCaptionF1/mean": 0.6387024521827698, "rewards/DenseCaptionF1/std": 0.1778193861246109, "rewards/DenseCaptionSodaM/mean": 0.4598187208175659, "rewards/DenseCaptionSodaM/std": 0.13882185518741608, "rewards/TiemstampCaptionLength/mean": 0.9656250476837158, "rewards/TiemstampCaptionLength/std": 0.1404162049293518, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4774.0, "completions/mean_length": 2623.734375, "completions/min_length": 1428.0, "entropy/max": 0.953125, "entropy/mean": 0.68359375, "entropy/min": 0.498046875, "epoch": 0.22265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.43559393286705017, "kl": 0.06752844899892807, "learning_rate": 9.212610221095748e-06, "loss": 0.0027080695144832134, "reward": 1.931420087814331, "reward_std": 0.3583586513996124, "rewards/DenseCaptionF1/mean": 0.575497031211853, "rewards/DenseCaptionF1/std": 0.21436206996440887, "rewards/DenseCaptionSodaM/mean": 0.4054606258869171, "rewards/DenseCaptionSodaM/std": 0.12318224459886551, "rewards/TiemstampCaptionLength/mean": 0.9477996826171875, "rewards/TiemstampCaptionLength/std": 0.21343040466308594, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4218.0, "completions/mean_length": 2411.5625, "completions/min_length": 890.0, "entropy/max": 1.0625, "entropy/mean": 0.7109375, "entropy/min": 0.56640625, "epoch": 0.2265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5348519682884216, "kl": 0.0774393305182457, "learning_rate": 9.177439057064684e-06, "loss": 0.0031305921729654074, "reward": 2.0812172889709473, "reward_std": 0.22512415051460266, "rewards/DenseCaptionF1/mean": 0.6437010765075684, "rewards/DenseCaptionF1/std": 0.17044788599014282, "rewards/DenseCaptionSodaM/mean": 0.45336809754371643, "rewards/DenseCaptionSodaM/std": 0.11320245265960693, "rewards/TiemstampCaptionLength/mean": 0.9839210510253906, "rewards/TiemstampCaptionLength/std": 0.12499511241912842, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4333.0, "completions/mean_length": 2394.359375, "completions/min_length": 1129.0, "entropy/max": 0.90625, "entropy/mean": 0.67578125, "entropy/min": 0.478515625, "epoch": 0.23046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.47797057032585144, "kl": 0.06732644885778427, "learning_rate": 9.141569674661816e-06, "loss": 0.0026968615129590034, "reward": 1.9791253805160522, "reward_std": 0.375088095664978, "rewards/DenseCaptionF1/mean": 0.6192713379859924, "rewards/DenseCaptionF1/std": 0.2090216875076294, "rewards/DenseCaptionSodaM/mean": 0.40720587968826294, "rewards/DenseCaptionSodaM/std": 0.1373559832572937, "rewards/TiemstampCaptionLength/mean": 0.9521713256835938, "rewards/TiemstampCaptionLength/std": 0.21295078098773956, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2370.875, "completions/min_length": 1364.0, "entropy/max": 1.0703125, "entropy/mean": 0.6640625, "entropy/min": 0.1572265625, "epoch": 0.234375, "frac_reward_zero_std": 0.0, "grad_norm": 0.44017094373703003, "kl": 0.07527967542409897, "learning_rate": 9.105008069106093e-06, "loss": 0.0030458117835223675, "reward": 2.054739475250244, "reward_std": 0.28885501623153687, "rewards/DenseCaptionF1/mean": 0.6691363453865051, "rewards/DenseCaptionF1/std": 0.1983274221420288, "rewards/DenseCaptionSodaM/mean": 0.41685330867767334, "rewards/DenseCaptionSodaM/std": 0.1314084678888321, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3686.0, "completions/mean_length": 2361.875, "completions/min_length": 863.0, "entropy/max": 0.7734375, "entropy/mean": 0.59765625, "entropy/min": 0.3359375, "epoch": 0.23828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4437764883041382, "kl": 0.07143499702215195, "learning_rate": 9.067760351314838e-06, "loss": 0.002866515889763832, "reward": 2.105388641357422, "reward_std": 0.20455285906791687, "rewards/DenseCaptionF1/mean": 0.6408991813659668, "rewards/DenseCaptionF1/std": 0.13143233954906464, "rewards/DenseCaptionSodaM/mean": 0.48089566826820374, "rewards/DenseCaptionSodaM/std": 0.11053228378295898, "rewards/TiemstampCaptionLength/mean": 0.9828125238418579, "rewards/TiemstampCaptionLength/std": 0.12542586028575897, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 1774.953125, "completions/min_length": 279.0, "entropy/max": 0.83203125, "entropy/mean": 0.609375, "entropy/min": 0.1376953125, "epoch": 0.2421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5490212440490723, "kl": 0.08521997928619385, "learning_rate": 9.029832746882372e-06, "loss": 0.0035130027681589127, "reward": 1.8784807920455933, "reward_std": 0.3403272032737732, "rewards/DenseCaptionF1/mean": 0.5604114532470703, "rewards/DenseCaptionF1/std": 0.2598591148853302, "rewards/DenseCaptionSodaM/mean": 0.35758763551712036, "rewards/DenseCaptionSodaM/std": 0.13501794636249542, "rewards/TiemstampCaptionLength/mean": 0.9522132873535156, "rewards/TiemstampCaptionLength/std": 0.21292772889137268, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3683.0, "completions/mean_length": 1927.125, "completions/min_length": 352.0, "entropy/max": 0.9765625, "entropy/mean": 0.6328125, "entropy/min": 0.369140625, "epoch": 0.24609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.5698288083076477, "kl": 0.07601820677518845, "learning_rate": 8.991231595039464e-06, "loss": 0.003107120282948017, "reward": 2.029665470123291, "reward_std": 0.31099215149879456, "rewards/DenseCaptionF1/mean": 0.6040011644363403, "rewards/DenseCaptionF1/std": 0.23040224611759186, "rewards/DenseCaptionSodaM/mean": 0.45886746048927307, "rewards/DenseCaptionSodaM/std": 0.15016283094882965, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4567.0, "completions/mean_length": 2457.421875, "completions/min_length": 1296.0, "entropy/max": 0.83984375, "entropy/mean": 0.66015625, "entropy/min": 0.5703125, "epoch": 0.25, "frac_reward_zero_std": 0.0, "grad_norm": 0.449542760848999, "kl": 0.07092197239398956, "learning_rate": 8.951963347593797e-06, "loss": 0.0028461660258471966, "reward": 1.8207156658172607, "reward_std": 0.4419528841972351, "rewards/DenseCaptionF1/mean": 0.5536099672317505, "rewards/DenseCaptionF1/std": 0.237579807639122, "rewards/DenseCaptionSodaM/mean": 0.362589567899704, "rewards/DenseCaptionSodaM/std": 0.14053674042224884, "rewards/TiemstampCaptionLength/mean": 0.9027824401855469, "rewards/TiemstampCaptionLength/std": 0.2932094633579254, "rewards/TimestampFormat/mean": 0.90625, "rewards/TimestampFormat/std": 0.29378482699394226, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3570.0, "completions/mean_length": 2080.3125, "completions/min_length": 927.0, "entropy/max": 0.96875, "entropy/mean": 0.66015625, "entropy/min": 0.4609375, "epoch": 0.25390625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5001714825630188, "kl": 0.07675490528345108, "learning_rate": 8.9120345678516e-06, "loss": 0.003085065633058548, "reward": 1.954641580581665, "reward_std": 0.32957372069358826, "rewards/DenseCaptionF1/mean": 0.6267664432525635, "rewards/DenseCaptionF1/std": 0.21088743209838867, "rewards/DenseCaptionSodaM/mean": 0.37475019693374634, "rewards/DenseCaptionSodaM/std": 0.12253134697675705, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4629.0, "completions/mean_length": 2415.90625, "completions/min_length": 1047.0, "entropy/max": 0.83984375, "entropy/mean": 0.65625, "entropy/min": 0.53515625, "epoch": 0.2578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.46256938576698303, "kl": 0.08071467280387878, "learning_rate": 8.871451929520662e-06, "loss": 0.003244481049478054, "reward": 2.0497398376464844, "reward_std": 0.12369006872177124, "rewards/DenseCaptionF1/mean": 0.6514697670936584, "rewards/DenseCaptionF1/std": 0.14243650436401367, "rewards/DenseCaptionSodaM/mean": 0.4017102122306824, "rewards/DenseCaptionSodaM/std": 0.07843148708343506, "rewards/TiemstampCaptionLength/mean": 0.9931193590164185, "rewards/TiemstampCaptionLength/std": 0.032912302762269974, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4181.0, "completions/mean_length": 2213.46875, "completions/min_length": 1139.0, "entropy/max": 0.8046875, "entropy/mean": 0.609375, "entropy/min": 0.3671875, "epoch": 0.26171875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4749186038970947, "kl": 0.0748860090970993, "learning_rate": 8.83022221559489e-06, "loss": 0.003019839059561491, "reward": 2.031412124633789, "reward_std": 0.3107888102531433, "rewards/DenseCaptionF1/mean": 0.65138179063797, "rewards/DenseCaptionF1/std": 0.1926232874393463, "rewards/DenseCaptionSodaM/mean": 0.4115608334541321, "rewards/DenseCaptionSodaM/std": 0.10604707151651382, "rewards/TiemstampCaptionLength/mean": 0.9681892395019531, "rewards/TiemstampCaptionLength/std": 0.1752953976392746, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3339.0, "completions/mean_length": 1957.53125, "completions/min_length": 721.0, "entropy/max": 0.7890625, "entropy/mean": 0.53125, "entropy/min": 0.310546875, "epoch": 0.265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4781399369239807, "kl": 0.07253287732601166, "learning_rate": 8.78835231722059e-06, "loss": 0.002902414184063673, "reward": 2.263742446899414, "reward_std": 0.11524605751037598, "rewards/DenseCaptionF1/mean": 0.7564222812652588, "rewards/DenseCaptionF1/std": 0.16660180687904358, "rewards/DenseCaptionSodaM/mean": 0.5138305425643921, "rewards/DenseCaptionSodaM/std": 0.14809079468250275, "rewards/TiemstampCaptionLength/mean": 0.9869791865348816, "rewards/TiemstampCaptionLength/std": 0.07456332445144653, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4310.0, "completions/mean_length": 2461.234375, "completions/min_length": 1231.0, "entropy/max": 0.75390625, "entropy/mean": 0.5625, "entropy/min": 0.412109375, "epoch": 0.26953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.42899295687675476, "kl": 0.07073007524013519, "learning_rate": 8.74584923254468e-06, "loss": 0.0028330343775451183, "reward": 2.133619546890259, "reward_std": 0.11275416612625122, "rewards/DenseCaptionF1/mean": 0.683053731918335, "rewards/DenseCaptionF1/std": 0.14080090820789337, "rewards/DenseCaptionSodaM/mean": 0.45099109411239624, "rewards/DenseCaptionSodaM/std": 0.1174091100692749, "rewards/TiemstampCaptionLength/mean": 0.9991493225097656, "rewards/TiemstampCaptionLength/std": 0.0065025631338357925, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4201.0, "completions/mean_length": 2458.375, "completions/min_length": 1269.0, "entropy/max": 0.7890625, "entropy/mean": 0.54296875, "entropy/min": 0.390625, "epoch": 0.2734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4160049259662628, "kl": 0.06441740691661835, "learning_rate": 8.702720065545024e-06, "loss": 0.0025962484069168568, "reward": 2.0632200241088867, "reward_std": 0.18613176047801971, "rewards/DenseCaptionF1/mean": 0.6221563220024109, "rewards/DenseCaptionF1/std": 0.17072495818138123, "rewards/DenseCaptionSodaM/mean": 0.45693108439445496, "rewards/DenseCaptionSodaM/std": 0.14009734988212585, "rewards/TiemstampCaptionLength/mean": 0.9838905334472656, "rewards/TiemstampCaptionLength/std": 0.12498046457767487, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3662.0, "completions/mean_length": 1988.75, "completions/min_length": 703.0, "entropy/max": 0.80078125, "entropy/mean": 0.546875, "entropy/min": 0.302734375, "epoch": 0.27734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4744465947151184, "kl": 0.06727895885705948, "learning_rate": 8.658972024843063e-06, "loss": 0.0026999362744390965, "reward": 2.0450263023376465, "reward_std": 0.31568199396133423, "rewards/DenseCaptionF1/mean": 0.6489371061325073, "rewards/DenseCaptionF1/std": 0.20903915166854858, "rewards/DenseCaptionSodaM/mean": 0.42799025774002075, "rewards/DenseCaptionSodaM/std": 0.14429260790348053, "rewards/TiemstampCaptionLength/mean": 0.9674479365348816, "rewards/TiemstampCaptionLength/std": 0.17544174194335938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2955.0, "completions/mean_length": 2171.765625, "completions/min_length": 1291.0, "entropy/max": 0.84375, "entropy/mean": 0.50390625, "entropy/min": 0.337890625, "epoch": 0.28125, "frac_reward_zero_std": 0.0, "grad_norm": 0.41763022541999817, "kl": 0.06876518577337265, "learning_rate": 8.614612422498965e-06, "loss": 0.0027553928084671497, "reward": 2.162646532058716, "reward_std": 0.1864587366580963, "rewards/DenseCaptionF1/mean": 0.6895356178283691, "rewards/DenseCaptionF1/std": 0.14416387677192688, "rewards/DenseCaptionSodaM/mean": 0.4887358844280243, "rewards/DenseCaptionSodaM/std": 0.10739783942699432, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3092.0, "completions/mean_length": 1879.375, "completions/min_length": 714.0, "entropy/max": 1.171875, "entropy/mean": 0.625, "entropy/min": 0.39453125, "epoch": 0.28515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5157968997955322, "kl": 0.07641156017780304, "learning_rate": 8.569648672789496e-06, "loss": 0.0030631432309746742, "reward": 2.1225454807281494, "reward_std": 0.15138301253318787, "rewards/DenseCaptionF1/mean": 0.6931498050689697, "rewards/DenseCaptionF1/std": 0.1821095198392868, "rewards/DenseCaptionSodaM/mean": 0.4309581518173218, "rewards/DenseCaptionSodaM/std": 0.08070362359285355, "rewards/TiemstampCaptionLength/mean": 0.996874988079071, "rewards/TiemstampCaptionLength/std": 0.02500000037252903, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3294.0, "completions/mean_length": 2040.59375, "completions/min_length": 702.0, "entropy/max": 0.7890625, "entropy/mean": 0.61328125, "entropy/min": 0.421875, "epoch": 0.2890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4754391610622406, "kl": 0.08198946714401245, "learning_rate": 8.524088290968781e-06, "loss": 0.003284250386059284, "reward": 1.951261043548584, "reward_std": 0.37935590744018555, "rewards/DenseCaptionF1/mean": 0.6092362403869629, "rewards/DenseCaptionF1/std": 0.22482743859291077, "rewards/DenseCaptionSodaM/mean": 0.40452486276626587, "rewards/DenseCaptionSodaM/std": 0.127100870013237, "rewards/TiemstampCaptionLength/mean": 0.9375, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4295.0, "completions/mean_length": 2238.734375, "completions/min_length": 1171.0, "entropy/max": 0.8828125, "entropy/mean": 0.59375, "entropy/min": 0.412109375, "epoch": 0.29296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.44021475315093994, "kl": 0.07665577530860901, "learning_rate": 8.477938892012209e-06, "loss": 0.003067487385123968, "reward": 1.9776023626327515, "reward_std": 0.241469606757164, "rewards/DenseCaptionF1/mean": 0.6269410252571106, "rewards/DenseCaptionF1/std": 0.18612676858901978, "rewards/DenseCaptionSodaM/mean": 0.3877677917480469, "rewards/DenseCaptionSodaM/std": 0.13575981557369232, "rewards/TiemstampCaptionLength/mean": 0.9570369124412537, "rewards/TiemstampCaptionLength/std": 0.1844019740819931, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4222.0, "completions/mean_length": 2058.890625, "completions/min_length": 1063.0, "entropy/max": 0.81640625, "entropy/mean": 0.54296875, "entropy/min": 0.31640625, "epoch": 0.296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.45110055804252625, "kl": 0.07034870237112045, "learning_rate": 8.43120818934367e-06, "loss": 0.002813545521348715, "reward": 2.110698699951172, "reward_std": 0.1968698799610138, "rewards/DenseCaptionF1/mean": 0.7020023465156555, "rewards/DenseCaptionF1/std": 0.15579208731651306, "rewards/DenseCaptionSodaM/mean": 0.42824020981788635, "rewards/DenseCaptionSodaM/std": 0.11006494611501694, "rewards/TiemstampCaptionLength/mean": 0.9765371084213257, "rewards/TiemstampCaptionLength/std": 0.1290905475616455, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3763.0, "completions/mean_length": 1922.3125, "completions/min_length": 989.0, "entropy/max": 0.796875, "entropy/mean": 0.5625, "entropy/min": 0.435546875, "epoch": 0.30078125, "frac_reward_zero_std": 0.0, "grad_norm": 0.471979022026062, "kl": 0.0769549310207367, "learning_rate": 8.38390399354631e-06, "loss": 0.003084153402596712, "reward": 2.0593576431274414, "reward_std": 0.3367141783237457, "rewards/DenseCaptionF1/mean": 0.6514508128166199, "rewards/DenseCaptionF1/std": 0.22580590844154358, "rewards/DenseCaptionSodaM/mean": 0.454781711101532, "rewards/DenseCaptionSodaM/std": 0.1670650690793991, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4218.0, "completions/mean_length": 2163.421875, "completions/min_length": 1350.0, "entropy/max": 0.7421875, "entropy/mean": 0.5390625, "entropy/min": 0.333984375, "epoch": 0.3046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4647288918495178, "kl": 0.076052725315094, "learning_rate": 8.336034211057098e-06, "loss": 0.003052730578929186, "reward": 2.110725164413452, "reward_std": 0.20956245064735413, "rewards/DenseCaptionF1/mean": 0.6667389869689941, "rewards/DenseCaptionF1/std": 0.18336373567581177, "rewards/DenseCaptionSodaM/mean": 0.45984014868736267, "rewards/DenseCaptionSodaM/std": 0.10147108137607574, "rewards/TiemstampCaptionLength/mean": 0.983917236328125, "rewards/TiemstampCaptionLength/std": 0.12499552220106125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4041.0, "completions/mean_length": 2145.015625, "completions/min_length": 942.0, "entropy/max": 0.671875, "entropy/mean": 0.52734375, "entropy/min": 0.408203125, "epoch": 0.30859375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4572528898715973, "kl": 0.07140101492404938, "learning_rate": 8.28760684284532e-06, "loss": 0.0028764642775058746, "reward": 2.0567917823791504, "reward_std": 0.25641506910324097, "rewards/DenseCaptionF1/mean": 0.6210831999778748, "rewards/DenseCaptionF1/std": 0.1876806765794754, "rewards/DenseCaptionSodaM/mean": 0.48258349299430847, "rewards/DenseCaptionSodaM/std": 0.13528074324131012, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3495.0, "completions/mean_length": 2219.125, "completions/min_length": 1093.0, "entropy/max": 0.81640625, "entropy/mean": 0.5859375, "entropy/min": 0.404296875, "epoch": 0.3125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4375706911087036, "kl": 0.08007296919822693, "learning_rate": 8.238629983075296e-06, "loss": 0.0032063666731119156, "reward": 2.0028772354125977, "reward_std": 0.19251595437526703, "rewards/DenseCaptionF1/mean": 0.5738047361373901, "rewards/DenseCaptionF1/std": 0.15946848690509796, "rewards/DenseCaptionSodaM/mean": 0.4459995925426483, "rewards/DenseCaptionSodaM/std": 0.10410931706428528, "rewards/TiemstampCaptionLength/mean": 0.9817708134651184, "rewards/TiemstampCaptionLength/std": 0.12639760971069336, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4308.0, "completions/mean_length": 2390.5, "completions/min_length": 923.0, "entropy/max": 0.74609375, "entropy/mean": 0.5390625, "entropy/min": 0.3984375, "epoch": 0.31640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4354075789451599, "kl": 0.06995181739330292, "learning_rate": 8.18911181775353e-06, "loss": 0.002805061638355255, "reward": 2.0832748413085938, "reward_std": 0.29736328125, "rewards/DenseCaptionF1/mean": 0.6486762762069702, "rewards/DenseCaptionF1/std": 0.17525248229503632, "rewards/DenseCaptionSodaM/mean": 0.46625107526779175, "rewards/DenseCaptionSodaM/std": 0.1081622987985611, "rewards/TiemstampCaptionLength/mean": 0.9679450988769531, "rewards/TiemstampCaptionLength/std": 0.175339475274086, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4986.0, "completions/mean_length": 2003.109375, "completions/min_length": 862.0, "entropy/max": 0.76953125, "entropy/mean": 0.61328125, "entropy/min": 0.451171875, "epoch": 0.3203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5022936463356018, "kl": 0.08349774777889252, "learning_rate": 8.139060623360494e-06, "loss": 0.0033505428582429886, "reward": 2.0526070594787598, "reward_std": 0.21284550428390503, "rewards/DenseCaptionF1/mean": 0.6233960390090942, "rewards/DenseCaptionF1/std": 0.1946212202310562, "rewards/DenseCaptionSodaM/mean": 0.44978249073028564, "rewards/DenseCaptionSodaM/std": 0.13011571764945984, "rewards/TiemstampCaptionLength/mean": 0.9744822382926941, "rewards/TiemstampCaptionLength/std": 0.13103936612606049, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3873.0, "completions/mean_length": 2122.046875, "completions/min_length": 704.0, "entropy/max": 0.84765625, "entropy/mean": 0.59375, "entropy/min": 0.421875, "epoch": 0.32421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.46683013439178467, "kl": 0.08198022097349167, "learning_rate": 8.088484765467286e-06, "loss": 0.0033003194257616997, "reward": 2.23862886428833, "reward_std": 0.21417886018753052, "rewards/DenseCaptionF1/mean": 0.7569527626037598, "rewards/DenseCaptionF1/std": 0.1854260116815567, "rewards/DenseCaptionSodaM/mean": 0.4982775151729584, "rewards/DenseCaptionSodaM/std": 0.11217108368873596, "rewards/TiemstampCaptionLength/mean": 0.982421875, "rewards/TiemstampCaptionLength/std": 0.12572643160820007, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4125.0, "completions/mean_length": 2011.546875, "completions/min_length": 838.0, "entropy/max": 0.8828125, "entropy/mean": 0.6171875, "entropy/min": 0.2890625, "epoch": 0.328125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5108813643455505, "kl": 0.08043907582759857, "learning_rate": 8.037392697337418e-06, "loss": 0.003254396840929985, "reward": 2.064764976501465, "reward_std": 0.13689574599266052, "rewards/DenseCaptionF1/mean": 0.6362853050231934, "rewards/DenseCaptionF1/std": 0.13021419942378998, "rewards/DenseCaptionSodaM/mean": 0.42983517050743103, "rewards/DenseCaptionSodaM/std": 0.12798793613910675, "rewards/TiemstampCaptionLength/mean": 0.9972890019416809, "rewards/TiemstampCaptionLength/std": 0.020837295800447464, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3911.0, "completions/mean_length": 2324.40625, "completions/min_length": 1031.0, "entropy/max": 0.87890625, "entropy/mean": 0.62890625, "entropy/min": 0.427734375, "epoch": 0.33203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4670025408267975, "kl": 0.07925336062908173, "learning_rate": 7.985792958513932e-06, "loss": 0.003175109624862671, "reward": 2.0984528064727783, "reward_std": 0.23483282327651978, "rewards/DenseCaptionF1/mean": 0.6740026473999023, "rewards/DenseCaptionF1/std": 0.17317916452884674, "rewards/DenseCaptionSodaM/mean": 0.440075159072876, "rewards/DenseCaptionSodaM/std": 0.1310994029045105, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3151.0, "completions/mean_length": 2096.640625, "completions/min_length": 1305.0, "entropy/max": 0.87890625, "entropy/mean": 0.6328125, "entropy/min": 0.39453125, "epoch": 0.3359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4730246365070343, "kl": 0.07844884693622589, "learning_rate": 7.93369417339209e-06, "loss": 0.0031482032500207424, "reward": 2.1352877616882324, "reward_std": 0.09777705371379852, "rewards/DenseCaptionF1/mean": 0.7098089456558228, "rewards/DenseCaptionF1/std": 0.12179086357355118, "rewards/DenseCaptionSodaM/mean": 0.4286037087440491, "rewards/DenseCaptionSodaM/std": 0.12594905495643616, "rewards/TiemstampCaptionLength/mean": 0.9937499761581421, "rewards/TiemstampCaptionLength/std": 0.05000000074505806, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2228.828125, "completions/min_length": 1055.0, "entropy/max": 0.7890625, "entropy/mean": 0.57421875, "entropy/min": 0.044677734375, "epoch": 0.33984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4559997618198395, "kl": 0.07476093620061874, "learning_rate": 7.881105049777902e-06, "loss": 0.0030210758559405804, "reward": 2.0786261558532715, "reward_std": 0.2752508223056793, "rewards/DenseCaptionF1/mean": 0.6454404592514038, "rewards/DenseCaptionF1/std": 0.17010398209095, "rewards/DenseCaptionSodaM/mean": 0.4657379388809204, "rewards/DenseCaptionSodaM/std": 0.1222817599773407, "rewards/TiemstampCaptionLength/mean": 0.9661458134651184, "rewards/TiemstampCaptionLength/std": 0.17613248527050018, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4043.0, "completions/mean_length": 2428.71875, "completions/min_length": 1281.0, "entropy/max": 0.80859375, "entropy/mean": 0.62109375, "entropy/min": 0.451171875, "epoch": 0.34375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4348693788051605, "kl": 0.08055365830659866, "learning_rate": 7.828034377432694e-06, "loss": 0.0032192887738347054, "reward": 2.068355083465576, "reward_std": 0.29523277282714844, "rewards/DenseCaptionF1/mean": 0.6584528684616089, "rewards/DenseCaptionF1/std": 0.19537685811519623, "rewards/DenseCaptionSodaM/mean": 0.4450583755970001, "rewards/DenseCaptionSodaM/std": 0.11957451701164246, "rewards/TiemstampCaptionLength/mean": 0.9609375, "rewards/TiemstampCaptionLength/std": 0.18000143766403198, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3186.0, "completions/mean_length": 1970.84375, "completions/min_length": 1282.0, "entropy/max": 0.79296875, "entropy/mean": 0.5703125, "entropy/min": 0.40234375, "epoch": 0.34765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4701857268810272, "kl": 0.07622550427913666, "learning_rate": 7.774491026603985e-06, "loss": 0.003049303777515888, "reward": 2.1712543964385986, "reward_std": 0.1918593943119049, "rewards/DenseCaptionF1/mean": 0.6795837879180908, "rewards/DenseCaptionF1/std": 0.13512182235717773, "rewards/DenseCaptionSodaM/mean": 0.5088581442832947, "rewards/DenseCaptionSodaM/std": 0.10053939372301102, "rewards/TiemstampCaptionLength/mean": 0.981249988079071, "rewards/TiemstampCaptionLength/std": 0.12708577513694763, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3679.0, "completions/mean_length": 2321.390625, "completions/min_length": 969.0, "entropy/max": 0.77734375, "entropy/mean": 0.59765625, "entropy/min": 0.4609375, "epoch": 0.3515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4848327338695526, "kl": 0.07753942906856537, "learning_rate": 7.720483946542913e-06, "loss": 0.0031137799378484488, "reward": 2.0770950317382812, "reward_std": 0.31855612993240356, "rewards/DenseCaptionF1/mean": 0.6584451794624329, "rewards/DenseCaptionF1/std": 0.1920374631881714, "rewards/DenseCaptionSodaM/mean": 0.4498997926712036, "rewards/DenseCaptionSodaM/std": 0.12485680729150772, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2294.125, "completions/min_length": 1317.0, "entropy/max": 0.78125, "entropy/mean": 0.609375, "entropy/min": 0.1572265625, "epoch": 0.35546875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4357386827468872, "kl": 0.07910288125276566, "learning_rate": 7.666022164008458e-06, "loss": 0.0032052365131676197, "reward": 2.1311159133911133, "reward_std": 0.18095478415489197, "rewards/DenseCaptionF1/mean": 0.6685265302658081, "rewards/DenseCaptionF1/std": 0.1838243305683136, "rewards/DenseCaptionSodaM/mean": 0.47953325510025024, "rewards/DenseCaptionSodaM/std": 0.09859392791986465, "rewards/TiemstampCaptionLength/mean": 0.9817373752593994, "rewards/TiemstampCaptionLength/std": 0.12643776834011078, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4702.0, "completions/mean_length": 2567.328125, "completions/min_length": 1335.0, "entropy/max": 0.78515625, "entropy/mean": 0.60546875, "entropy/min": 0.478515625, "epoch": 0.359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4263266921043396, "kl": 0.07881991565227509, "learning_rate": 7.6111147817586925e-06, "loss": 0.0031613344326615334, "reward": 2.128614902496338, "reward_std": 0.11585809290409088, "rewards/DenseCaptionF1/mean": 0.6696683764457703, "rewards/DenseCaptionF1/std": 0.1590665876865387, "rewards/DenseCaptionSodaM/mean": 0.46010249853134155, "rewards/DenseCaptionSodaM/std": 0.07637323439121246, "rewards/TiemstampCaptionLength/mean": 0.9976882934570312, "rewards/TiemstampCaptionLength/std": 0.01849365234375, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3329.0, "completions/mean_length": 2196.390625, "completions/min_length": 1305.0, "entropy/max": 0.78125, "entropy/mean": 0.59765625, "entropy/min": 0.37890625, "epoch": 0.36328125, "frac_reward_zero_std": 0.0, "grad_norm": 0.44893574714660645, "kl": 0.0778866857290268, "learning_rate": 7.5557709770293664e-06, "loss": 0.003123055212199688, "reward": 2.242305278778076, "reward_std": 0.146044060587883, "rewards/DenseCaptionF1/mean": 0.7382992506027222, "rewards/DenseCaptionF1/std": 0.14254668354988098, "rewards/DenseCaptionSodaM/mean": 0.5162829160690308, "rewards/DenseCaptionSodaM/std": 0.13311879336833954, "rewards/TiemstampCaptionLength/mean": 0.9754464626312256, "rewards/TiemstampCaptionLength/std": 0.08280186355113983, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3506.0, "completions/mean_length": 2073.765625, "completions/min_length": 917.0, "entropy/max": 0.91015625, "entropy/mean": 0.59375, "entropy/min": 0.478515625, "epoch": 0.3671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4797140955924988, "kl": 0.07541140913963318, "learning_rate": 7.500000000000001e-06, "loss": 0.0030153607949614525, "reward": 2.0866267681121826, "reward_std": 0.23808912932872772, "rewards/DenseCaptionF1/mean": 0.677537739276886, "rewards/DenseCaptionF1/std": 0.20984452962875366, "rewards/DenseCaptionSodaM/mean": 0.4403391480445862, "rewards/DenseCaptionSodaM/std": 0.10602159053087234, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4109.0, "completions/mean_length": 2264.859375, "completions/min_length": 1144.0, "entropy/max": 0.73046875, "entropy/mean": 0.59765625, "entropy/min": 0.40234375, "epoch": 0.37109375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4785752594470978, "kl": 0.08089148998260498, "learning_rate": 7.443811172247822e-06, "loss": 0.0032392162829637527, "reward": 2.1369659900665283, "reward_std": 0.30026695132255554, "rewards/DenseCaptionF1/mean": 0.6629904508590698, "rewards/DenseCaptionF1/std": 0.18864844739437103, "rewards/DenseCaptionSodaM/mean": 0.5067400932312012, "rewards/DenseCaptionSodaM/std": 0.12611261010169983, "rewards/TiemstampCaptionLength/mean": 0.9657212495803833, "rewards/TiemstampCaptionLength/std": 0.17562004923820496, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3040.0, "completions/mean_length": 2175.71875, "completions/min_length": 1565.0, "entropy/max": 0.78515625, "entropy/mean": 0.63671875, "entropy/min": 0.515625, "epoch": 0.375, "frac_reward_zero_std": 0.0, "grad_norm": 0.45520272850990295, "kl": 0.09045030176639557, "learning_rate": 7.387213885189746e-06, "loss": 0.0036247065290808678, "reward": 2.2175164222717285, "reward_std": 0.20569007098674774, "rewards/DenseCaptionF1/mean": 0.7261995673179626, "rewards/DenseCaptionF1/std": 0.1654254049062729, "rewards/DenseCaptionSodaM/mean": 0.5082440972328186, "rewards/DenseCaptionSodaM/std": 0.09253308922052383, "rewards/TiemstampCaptionLength/mean": 0.9817708134651184, "rewards/TiemstampCaptionLength/std": 0.12639760971069336, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4051.0, "completions/mean_length": 2542.875, "completions/min_length": 986.0, "entropy/max": 0.89453125, "entropy/mean": 0.63671875, "entropy/min": 0.447265625, "epoch": 0.37890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.43797022104263306, "kl": 0.0741940289735794, "learning_rate": 7.330217598512696e-06, "loss": 0.0029716668650507927, "reward": 2.086512327194214, "reward_std": 0.23860614001750946, "rewards/DenseCaptionF1/mean": 0.6499544978141785, "rewards/DenseCaptionF1/std": 0.20542526245117188, "rewards/DenseCaptionSodaM/mean": 0.4521827697753906, "rewards/DenseCaptionSodaM/std": 0.09262199699878693, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2951.0, "completions/mean_length": 2027.390625, "completions/min_length": 807.0, "entropy/max": 0.86328125, "entropy/mean": 0.58984375, "entropy/min": 0.390625, "epoch": 0.3828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4523578882217407, "kl": 0.08195482194423676, "learning_rate": 7.2728318385925035e-06, "loss": 0.0032738554291427135, "reward": 2.14823055267334, "reward_std": 0.22445625066757202, "rewards/DenseCaptionF1/mean": 0.6198608875274658, "rewards/DenseCaptionF1/std": 0.17194251716136932, "rewards/DenseCaptionSodaM/mean": 0.5439946055412292, "rewards/DenseCaptionSodaM/std": 0.12489025294780731, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4176.0, "completions/mean_length": 2919.421875, "completions/min_length": 1698.0, "entropy/max": 0.90234375, "entropy/mean": 0.60546875, "entropy/min": 0.39453125, "epoch": 0.38671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.377668172121048, "kl": 0.07640206813812256, "learning_rate": 7.215066196901676e-06, "loss": 0.00305812805891037, "reward": 2.1023716926574707, "reward_std": 0.17110249400138855, "rewards/DenseCaptionF1/mean": 0.6405081152915955, "rewards/DenseCaptionF1/std": 0.14213506877422333, "rewards/DenseCaptionSodaM/mean": 0.4818001985549927, "rewards/DenseCaptionSodaM/std": 0.1384231597185135, "rewards/TiemstampCaptionLength/mean": 0.9757517576217651, "rewards/TiemstampCaptionLength/std": 0.13296526670455933, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3982.0, "completions/mean_length": 2404.359375, "completions/min_length": 1510.0, "entropy/max": 0.84765625, "entropy/mean": 0.6328125, "entropy/min": 0.47265625, "epoch": 0.390625, "frac_reward_zero_std": 0.0, "grad_norm": 0.44833576679229736, "kl": 0.07901492714881897, "learning_rate": 7.156930328406268e-06, "loss": 0.003162129782140255, "reward": 2.113173007965088, "reward_std": 0.10366939008235931, "rewards/DenseCaptionF1/mean": 0.6593329310417175, "rewards/DenseCaptionF1/std": 0.17583686113357544, "rewards/DenseCaptionSodaM/mean": 0.4554026424884796, "rewards/DenseCaptionSodaM/std": 0.0941043421626091, "rewards/TiemstampCaptionLength/mean": 0.996874988079071, "rewards/TiemstampCaptionLength/std": 0.02500000037252903, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3739.0, "completions/mean_length": 2175.03125, "completions/min_length": 1281.0, "entropy/max": 0.89453125, "entropy/mean": 0.56640625, "entropy/min": 0.40625, "epoch": 0.39453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.45377618074417114, "kl": 0.07455649226903915, "learning_rate": 7.098433949952146e-06, "loss": 0.00298325065523386, "reward": 2.1517369747161865, "reward_std": 0.2411295622587204, "rewards/DenseCaptionF1/mean": 0.6895813941955566, "rewards/DenseCaptionF1/std": 0.18673183023929596, "rewards/DenseCaptionSodaM/mean": 0.49340564012527466, "rewards/DenseCaptionSodaM/std": 0.10936973989009857, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4237.0, "completions/mean_length": 2150.0625, "completions/min_length": 966.0, "entropy/max": 0.70703125, "entropy/mean": 0.55859375, "entropy/min": 0.40234375, "epoch": 0.3984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.47996532917022705, "kl": 0.08367238193750381, "learning_rate": 7.039586838640918e-06, "loss": 0.0033626602962613106, "reward": 2.017162799835205, "reward_std": 0.34179067611694336, "rewards/DenseCaptionF1/mean": 0.6233688592910767, "rewards/DenseCaptionF1/std": 0.22591255605220795, "rewards/DenseCaptionSodaM/mean": 0.45874860882759094, "rewards/DenseCaptionSodaM/std": 0.1617920696735382, "rewards/TiemstampCaptionLength/mean": 0.9325904846191406, "rewards/TiemstampCaptionLength/std": 0.2447463572025299, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4251.0, "completions/mean_length": 2408.359375, "completions/min_length": 984.0, "entropy/max": 0.83984375, "entropy/mean": 0.5703125, "entropy/min": 0.3671875, "epoch": 0.40234375, "frac_reward_zero_std": 0.0, "grad_norm": 0.46457499265670776, "kl": 0.06997324526309967, "learning_rate": 6.980398830195785e-06, "loss": 0.0028030076064169407, "reward": 2.2786498069763184, "reward_std": 0.11027923226356506, "rewards/DenseCaptionF1/mean": 0.7410256862640381, "rewards/DenseCaptionF1/std": 0.16160745918750763, "rewards/DenseCaptionSodaM/mean": 0.5409583449363708, "rewards/DenseCaptionSodaM/std": 0.07088367640972137, "rewards/TiemstampCaptionLength/mean": 0.9933311343193054, "rewards/TiemstampCaptionLength/std": 0.0350026860833168, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3882.0, "completions/mean_length": 2358.390625, "completions/min_length": 1220.0, "entropy/max": 0.6796875, "entropy/mean": 0.55078125, "entropy/min": 0.283203125, "epoch": 0.40625, "frac_reward_zero_std": 0.0, "grad_norm": 0.412352979183197, "kl": 0.07164886593818665, "learning_rate": 6.920879817317588e-06, "loss": 0.0028869202360510826, "reward": 2.1052591800689697, "reward_std": 0.19318059086799622, "rewards/DenseCaptionF1/mean": 0.6278367042541504, "rewards/DenseCaptionF1/std": 0.16896402835845947, "rewards/DenseCaptionSodaM/mean": 0.4961166977882385, "rewards/DenseCaptionSodaM/std": 0.13075561821460724, "rewards/TiemstampCaptionLength/mean": 0.9782366156578064, "rewards/TiemstampCaptionLength/std": 0.1292588710784912, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3634.0, "completions/mean_length": 2189.46875, "completions/min_length": 997.0, "entropy/max": 0.8125, "entropy/mean": 0.58984375, "entropy/min": 0.400390625, "epoch": 0.41015625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4769591987133026, "kl": 0.07978053390979767, "learning_rate": 6.861039748031351e-06, "loss": 0.0031947405077517033, "reward": 2.176485538482666, "reward_std": 0.14867156744003296, "rewards/DenseCaptionF1/mean": 0.7199992537498474, "rewards/DenseCaptionF1/std": 0.15194834768772125, "rewards/DenseCaptionSodaM/mean": 0.45648619532585144, "rewards/DenseCaptionSodaM/std": 0.09429176896810532, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2370.90625, "completions/min_length": 1174.0, "entropy/max": 0.93359375, "entropy/mean": 0.61328125, "entropy/min": 0.453125, "epoch": 0.4140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.45270076394081116, "kl": 0.0827193409204483, "learning_rate": 6.800888624023552e-06, "loss": 0.0033212807029485703, "reward": 2.1291093826293945, "reward_std": 0.29201728105545044, "rewards/DenseCaptionF1/mean": 0.679262638092041, "rewards/DenseCaptionF1/std": 0.1974390596151352, "rewards/DenseCaptionSodaM/mean": 0.4819648861885071, "rewards/DenseCaptionSodaM/std": 0.14247861504554749, "rewards/TiemstampCaptionLength/mean": 0.9670138955116272, "rewards/TiemstampCaptionLength/std": 0.17560364305973053, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2507.890625, "completions/min_length": 942.0, "entropy/max": 0.82421875, "entropy/mean": 0.59765625, "entropy/min": 0.2431640625, "epoch": 0.41796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4403754472732544, "kl": 0.07606419920921326, "learning_rate": 6.740436498970453e-06, "loss": 0.0030570104718208313, "reward": 2.0796656608581543, "reward_std": 0.2234925925731659, "rewards/DenseCaptionF1/mean": 0.6541966199874878, "rewards/DenseCaptionF1/std": 0.21035099029541016, "rewards/DenseCaptionSodaM/mean": 0.45958346128463745, "rewards/DenseCaptionSodaM/std": 0.10667937248945236, "rewards/TiemstampCaptionLength/mean": 0.9630208015441895, "rewards/TiemstampCaptionLength/std": 0.17729271948337555, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3815.0, "completions/mean_length": 2447.953125, "completions/min_length": 958.0, "entropy/max": 0.84375, "entropy/mean": 0.59375, "entropy/min": 0.451171875, "epoch": 0.421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4195391535758972, "kl": 0.075007364153862, "learning_rate": 6.679693476857712e-06, "loss": 0.0029998552054166794, "reward": 2.121521472930908, "reward_std": 0.19369511306285858, "rewards/DenseCaptionF1/mean": 0.6594398617744446, "rewards/DenseCaptionF1/std": 0.1779697984457016, "rewards/DenseCaptionSodaM/mean": 0.47770658135414124, "rewards/DenseCaptionSodaM/std": 0.13796070218086243, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2951.0, "completions/mean_length": 2098.671875, "completions/min_length": 1053.0, "entropy/max": 0.75, "entropy/mean": 0.5546875, "entropy/min": 0.2734375, "epoch": 0.42578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.45432087779045105, "kl": 0.07422572374343872, "learning_rate": 6.618669710291607e-06, "loss": 0.0029711276292800903, "reward": 2.2597405910491943, "reward_std": 0.10507716238498688, "rewards/DenseCaptionF1/mean": 0.7595625519752502, "rewards/DenseCaptionF1/std": 0.10846223682165146, "rewards/DenseCaptionSodaM/mean": 0.5001780986785889, "rewards/DenseCaptionSodaM/std": 0.10454103350639343, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3640.0, "completions/mean_length": 2099.5, "completions/min_length": 1332.0, "entropy/max": 0.765625, "entropy/mean": 0.578125, "entropy/min": 0.3984375, "epoch": 0.4296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4507025182247162, "kl": 0.08030343800783157, "learning_rate": 6.557375398802124e-06, "loss": 0.0032133255153894424, "reward": 2.052093029022217, "reward_std": 0.4489860534667969, "rewards/DenseCaptionF1/mean": 0.6731139421463013, "rewards/DenseCaptionF1/std": 0.2409752607345581, "rewards/DenseCaptionSodaM/mean": 0.4456457197666168, "rewards/DenseCaptionSodaM/std": 0.15500512719154358, "rewards/TiemstampCaptionLength/mean": 0.9291666746139526, "rewards/TiemstampCaptionLength/std": 0.24656368792057037, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4649.0, "completions/mean_length": 2456.265625, "completions/min_length": 984.0, "entropy/max": 0.86328125, "entropy/mean": 0.609375, "entropy/min": 0.4375, "epoch": 0.43359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.43649446964263916, "kl": 0.07889571785926819, "learning_rate": 6.495820787138209e-06, "loss": 0.003167032264173031, "reward": 2.207371234893799, "reward_std": 0.11937980353832245, "rewards/DenseCaptionF1/mean": 0.7265190482139587, "rewards/DenseCaptionF1/std": 0.1683252453804016, "rewards/DenseCaptionSodaM/mean": 0.48906826972961426, "rewards/DenseCaptionSodaM/std": 0.09366622567176819, "rewards/TiemstampCaptionLength/mean": 0.9835683107376099, "rewards/TiemstampCaptionLength/std": 0.05088389292359352, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3276.0, "completions/mean_length": 1968.25, "completions/min_length": 1133.0, "entropy/max": 0.94140625, "entropy/mean": 0.62109375, "entropy/min": 0.462890625, "epoch": 0.4375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4778197109699249, "kl": 0.08649688959121704, "learning_rate": 6.434016163555452e-06, "loss": 0.003462088294327259, "reward": 2.2246928215026855, "reward_std": 0.22315554320812225, "rewards/DenseCaptionF1/mean": 0.7509918212890625, "rewards/DenseCaptionF1/std": 0.1588038057088852, "rewards/DenseCaptionSodaM/mean": 0.4908885955810547, "rewards/DenseCaptionSodaM/std": 0.10360200703144073, "rewards/TiemstampCaptionLength/mean": 0.981249988079071, "rewards/TiemstampCaptionLength/std": 0.12708577513694763, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4535.0, "completions/mean_length": 2397.15625, "completions/min_length": 985.0, "entropy/max": 0.8359375, "entropy/mean": 0.60546875, "entropy/min": 0.44921875, "epoch": 0.44140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.45221975445747375, "kl": 0.07573950290679932, "learning_rate": 6.371971858096509e-06, "loss": 0.003038412658497691, "reward": 2.1565451622009277, "reward_std": 0.19371052086353302, "rewards/DenseCaptionF1/mean": 0.6990419626235962, "rewards/DenseCaptionF1/std": 0.19177736341953278, "rewards/DenseCaptionSodaM/mean": 0.48057520389556885, "rewards/DenseCaptionSodaM/std": 0.1356320083141327, "rewards/TiemstampCaptionLength/mean": 0.9694812297821045, "rewards/TiemstampCaptionLength/std": 0.13477176427841187, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3674.0, "completions/mean_length": 2573.8125, "completions/min_length": 1192.0, "entropy/max": 0.765625, "entropy/mean": 0.609375, "entropy/min": 0.4140625, "epoch": 0.4453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.41967564821243286, "kl": 0.07641534507274628, "learning_rate": 6.30969824086453e-06, "loss": 0.0030584577471017838, "reward": 2.154862403869629, "reward_std": 0.10016033053398132, "rewards/DenseCaptionF1/mean": 0.6651291847229004, "rewards/DenseCaptionF1/std": 0.1349097192287445, "rewards/DenseCaptionSodaM/mean": 0.4897330105304718, "rewards/DenseCaptionSodaM/std": 0.09691990911960602, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3362.0, "completions/mean_length": 2246.75, "completions/min_length": 740.0, "entropy/max": 0.765625, "entropy/mean": 0.625, "entropy/min": 0.357421875, "epoch": 0.44921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4850248396396637, "kl": 0.07770873606204987, "learning_rate": 6.247205720289907e-06, "loss": 0.0031074020080268383, "reward": 2.105367422103882, "reward_std": 0.2740570604801178, "rewards/DenseCaptionF1/mean": 0.6702495813369751, "rewards/DenseCaptionF1/std": 0.203532412648201, "rewards/DenseCaptionSodaM/mean": 0.4663679003715515, "rewards/DenseCaptionSodaM/std": 0.13411854207515717, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3658.0, "completions/mean_length": 2495.046875, "completions/min_length": 1110.0, "entropy/max": 0.796875, "entropy/mean": 0.578125, "entropy/min": 0.416015625, "epoch": 0.453125, "frac_reward_zero_std": 0.125, "grad_norm": 0.39278051257133484, "kl": 0.07527855038642883, "learning_rate": 6.184504741390596e-06, "loss": 0.003027956234291196, "reward": 1.781051754951477, "reward_std": 0.2833193838596344, "rewards/DenseCaptionF1/mean": 0.5420864820480347, "rewards/DenseCaptionF1/std": 0.2658858001232147, "rewards/DenseCaptionSodaM/mean": 0.39781951904296875, "rewards/DenseCaptionSodaM/std": 0.19784612953662872, "rewards/TiemstampCaptionLength/mean": 0.83984375, "rewards/TiemstampCaptionLength/std": 0.3656023144721985, "rewards/TimestampFormat/mean": 0.8424479365348816, "rewards/TimestampFormat/std": 0.36554577946662903, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3945.0, "completions/mean_length": 2073.890625, "completions/min_length": 1240.0, "entropy/max": 0.8046875, "entropy/mean": 0.59375, "entropy/min": 0.451171875, "epoch": 0.45703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.46272948384284973, "kl": 0.0793612152338028, "learning_rate": 6.121605784026339e-06, "loss": 0.0031837821006774902, "reward": 1.994809865951538, "reward_std": 0.33968663215637207, "rewards/DenseCaptionF1/mean": 0.5881167650222778, "rewards/DenseCaptionF1/std": 0.2010265439748764, "rewards/DenseCaptionSodaM/mean": 0.45356807112693787, "rewards/DenseCaptionSodaM/std": 0.15971964597702026, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3265.0, "completions/mean_length": 1820.046875, "completions/min_length": 1171.0, "entropy/max": 0.84765625, "entropy/mean": 0.5859375, "entropy/min": 0.421875, "epoch": 0.4609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.47077837586402893, "kl": 0.08538872003555298, "learning_rate": 6.058519361147055e-06, "loss": 0.0034187142737209797, "reward": 2.0582194328308105, "reward_std": 0.2934507727622986, "rewards/DenseCaptionF1/mean": 0.615313708782196, "rewards/DenseCaptionF1/std": 0.20698460936546326, "rewards/DenseCaptionSodaM/mean": 0.4941520094871521, "rewards/DenseCaptionSodaM/std": 0.1554199457168579, "rewards/TiemstampCaptionLength/mean": 0.9443824291229248, "rewards/TiemstampCaptionLength/std": 0.21501989662647247, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3361.0, "completions/mean_length": 2210.484375, "completions/min_length": 1170.0, "entropy/max": 0.83203125, "entropy/mean": 0.6484375, "entropy/min": 0.38671875, "epoch": 0.46484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.45301973819732666, "kl": 0.0787690132856369, "learning_rate": 5.995256017035703e-06, "loss": 0.0031627845019102097, "reward": 1.9711133241653442, "reward_std": 0.27808910608291626, "rewards/DenseCaptionF1/mean": 0.5771324634552002, "rewards/DenseCaptionF1/std": 0.22329936921596527, "rewards/DenseCaptionSodaM/mean": 0.42523080110549927, "rewards/DenseCaptionSodaM/std": 0.1310344636440277, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3498.0, "completions/mean_length": 2194.875, "completions/min_length": 1333.0, "entropy/max": 0.80078125, "entropy/mean": 0.61328125, "entropy/min": 0.486328125, "epoch": 0.46875, "frac_reward_zero_std": 0.0, "grad_norm": 0.45540276169776917, "kl": 0.07793214917182922, "learning_rate": 5.931826325545912e-06, "loss": 0.0031153373420238495, "reward": 2.0628232955932617, "reward_std": 0.45016881823539734, "rewards/DenseCaptionF1/mean": 0.6490108966827393, "rewards/DenseCaptionF1/std": 0.20744843780994415, "rewards/DenseCaptionSodaM/mean": 0.4774283170700073, "rewards/DenseCaptionSodaM/std": 0.15979807078838348, "rewards/TiemstampCaptionLength/mean": 0.9352678656578064, "rewards/TiemstampCaptionLength/std": 0.2440476268529892, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3085.0, "completions/mean_length": 1896.515625, "completions/min_length": 533.0, "entropy/max": 1.0703125, "entropy/mean": 0.61328125, "entropy/min": 0.341796875, "epoch": 0.47265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.5396003127098083, "kl": 0.07848010212182999, "learning_rate": 5.8682408883346535e-06, "loss": 0.0031436989083886147, "reward": 2.1525871753692627, "reward_std": 0.28493666648864746, "rewards/DenseCaptionF1/mean": 0.7212038636207581, "rewards/DenseCaptionF1/std": 0.2505525052547455, "rewards/DenseCaptionSodaM/mean": 0.46728357672691345, "rewards/DenseCaptionSodaM/std": 0.15110737085342407, "rewards/TiemstampCaptionLength/mean": 0.9594494104385376, "rewards/TiemstampCaptionLength/std": 0.17744947969913483, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3156.0, "completions/mean_length": 2252.875, "completions/min_length": 1010.0, "entropy/max": 0.77734375, "entropy/mean": 0.59765625, "entropy/min": 0.47265625, "epoch": 0.4765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.45987099409103394, "kl": 0.07764089852571487, "learning_rate": 5.804510333090287e-06, "loss": 0.003107200376689434, "reward": 2.0881872177124023, "reward_std": 0.21753573417663574, "rewards/DenseCaptionF1/mean": 0.6521599292755127, "rewards/DenseCaptionF1/std": 0.17497283220291138, "rewards/DenseCaptionSodaM/mean": 0.45686084032058716, "rewards/DenseCaptionSodaM/std": 0.1126566231250763, "rewards/TiemstampCaptionLength/mean": 0.9739583730697632, "rewards/TiemstampCaptionLength/std": 0.1367896944284439, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3198.0, "completions/mean_length": 1817.984375, "completions/min_length": 1068.0, "entropy/max": 0.89453125, "entropy/mean": 0.61328125, "entropy/min": 0.447265625, "epoch": 0.48046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5073015689849854, "kl": 0.0798579603433609, "learning_rate": 5.740645311756246e-06, "loss": 0.0031996332108974457, "reward": 2.1528115272521973, "reward_std": 0.37162232398986816, "rewards/DenseCaptionF1/mean": 0.7066170573234558, "rewards/DenseCaptionF1/std": 0.2179267257452011, "rewards/DenseCaptionSodaM/mean": 0.4930696189403534, "rewards/DenseCaptionSodaM/std": 0.15379729866981506, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3474.0, "completions/mean_length": 2020.171875, "completions/min_length": 1078.0, "entropy/max": 0.9765625, "entropy/mean": 0.62890625, "entropy/min": 0.3671875, "epoch": 0.484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.46273958683013916, "kl": 0.07388724386692047, "learning_rate": 5.6766564987506564e-06, "loss": 0.0029639480635523796, "reward": 2.0951733589172363, "reward_std": 0.36066603660583496, "rewards/DenseCaptionF1/mean": 0.6725229024887085, "rewards/DenseCaptionF1/std": 0.19911877810955048, "rewards/DenseCaptionSodaM/mean": 0.4695255160331726, "rewards/DenseCaptionSodaM/std": 0.16270573437213898, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3616.0, "completions/mean_length": 2323.375, "completions/min_length": 1270.0, "entropy/max": 0.8203125, "entropy/mean": 0.625, "entropy/min": 0.486328125, "epoch": 0.48828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.45393383502960205, "kl": 0.07782484591007233, "learning_rate": 5.612554589182228e-06, "loss": 0.003118175081908703, "reward": 2.130305767059326, "reward_std": 0.13083592057228088, "rewards/DenseCaptionF1/mean": 0.6734137535095215, "rewards/DenseCaptionF1/std": 0.13672827184200287, "rewards/DenseCaptionSodaM/mean": 0.4568920135498047, "rewards/DenseCaptionSodaM/std": 0.091212198138237, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3029.0, "completions/mean_length": 1909.359375, "completions/min_length": 1267.0, "entropy/max": 0.82421875, "entropy/mean": 0.59765625, "entropy/min": 0.439453125, "epoch": 0.4921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.47897541522979736, "kl": 0.0780659019947052, "learning_rate": 5.548350297062659e-06, "loss": 0.003124142996966839, "reward": 2.068937063217163, "reward_std": 0.3318726420402527, "rewards/DenseCaptionF1/mean": 0.6577168703079224, "rewards/DenseCaptionF1/std": 0.2262330800294876, "rewards/DenseCaptionSodaM/mean": 0.4737202823162079, "rewards/DenseCaptionSodaM/std": 0.1527242511510849, "rewards/TiemstampCaptionLength/mean": 0.9375, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 7959.0, "completions/mean_length": 2067.703125, "completions/min_length": 752.0, "entropy/max": 0.91796875, "entropy/mean": 0.65625, "entropy/min": 0.09228515625, "epoch": 0.49609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4522978365421295, "kl": 0.08107860386371613, "learning_rate": 5.484054353515896e-06, "loss": 0.00327342189848423, "reward": 2.1223559379577637, "reward_std": 0.26210957765579224, "rewards/DenseCaptionF1/mean": 0.7080155611038208, "rewards/DenseCaptionF1/std": 0.23213255405426025, "rewards/DenseCaptionSodaM/mean": 0.4455903470516205, "rewards/DenseCaptionSodaM/std": 0.11769149452447891, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3528.0, "completions/mean_length": 1888.6875, "completions/min_length": 1219.0, "entropy/max": 1.0078125, "entropy/mean": 0.6640625, "entropy/min": 0.421875, "epoch": 0.5, "frac_reward_zero_std": 0.0, "grad_norm": 0.5178694128990173, "kl": 0.08069656789302826, "learning_rate": 5.419677504984534e-06, "loss": 0.0032322704792022705, "reward": 1.9883326292037964, "reward_std": 0.4129721224308014, "rewards/DenseCaptionF1/mean": 0.6113201379776001, "rewards/DenseCaptionF1/std": 0.24685245752334595, "rewards/DenseCaptionSodaM/mean": 0.4414656162261963, "rewards/DenseCaptionSodaM/std": 0.19202734529972076, "rewards/TiemstampCaptionLength/mean": 0.93359375, "rewards/TiemstampCaptionLength/std": 0.244957834482193, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3457.0, "completions/mean_length": 2259.78125, "completions/min_length": 1303.0, "entropy/max": 0.87109375, "entropy/mean": 0.6484375, "entropy/min": 0.453125, "epoch": 0.50390625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4596750736236572, "kl": 0.07779379189014435, "learning_rate": 5.3552305114336515e-06, "loss": 0.003117215819656849, "reward": 1.9733624458312988, "reward_std": 0.28246039152145386, "rewards/DenseCaptionF1/mean": 0.5915824174880981, "rewards/DenseCaptionF1/std": 0.21047313511371613, "rewards/DenseCaptionSodaM/mean": 0.41303005814552307, "rewards/DenseCaptionSodaM/std": 0.10529915243387222, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3394.0, "completions/mean_length": 2013.109375, "completions/min_length": 1058.0, "entropy/max": 0.71484375, "entropy/mean": 0.59375, "entropy/min": 0.45703125, "epoch": 0.5078125, "frac_reward_zero_std": 0.0, "grad_norm": 0.47184762358665466, "kl": 0.07777858525514603, "learning_rate": 5.290724144552379e-06, "loss": 0.003114067018032074, "reward": 2.1788721084594727, "reward_std": 0.3331061899662018, "rewards/DenseCaptionF1/mean": 0.7039194107055664, "rewards/DenseCaptionF1/std": 0.21520444750785828, "rewards/DenseCaptionSodaM/mean": 0.5233902931213379, "rewards/DenseCaptionSodaM/std": 0.15150612592697144, "rewards/TiemstampCaptionLength/mean": 0.949999988079071, "rewards/TiemstampCaptionLength/std": 0.21380898356437683, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3048.0, "completions/mean_length": 1807.40625, "completions/min_length": 1015.0, "entropy/max": 0.8125, "entropy/mean": 0.65234375, "entropy/min": 0.416015625, "epoch": 0.51171875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5258738994598389, "kl": 0.07863827049732208, "learning_rate": 5.2261691859535325e-06, "loss": 0.00314484816044569, "reward": 2.192899227142334, "reward_std": 0.19421175122261047, "rewards/DenseCaptionF1/mean": 0.7292016744613647, "rewards/DenseCaptionF1/std": 0.21162056922912598, "rewards/DenseCaptionSodaM/mean": 0.4804387092590332, "rewards/DenseCaptionSodaM/std": 0.12279528379440308, "rewards/TiemstampCaptionLength/mean": 0.9821428656578064, "rewards/TiemstampCaptionLength/std": 0.1259881556034088, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3077.0, "completions/mean_length": 2114.984375, "completions/min_length": 1017.0, "entropy/max": 0.828125, "entropy/mean": 0.60546875, "entropy/min": 0.4375, "epoch": 0.515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4472525715827942, "kl": 0.07749108970165253, "learning_rate": 5.161576425371554e-06, "loss": 0.0031098229810595512, "reward": 2.1546554565429688, "reward_std": 0.2748919129371643, "rewards/DenseCaptionF1/mean": 0.7203766703605652, "rewards/DenseCaptionF1/std": 0.19003459811210632, "rewards/DenseCaptionSodaM/mean": 0.46552878618240356, "rewards/DenseCaptionSodaM/std": 0.12826240062713623, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3224.0, "completions/mean_length": 1893.296875, "completions/min_length": 1071.0, "entropy/max": 0.8125, "entropy/mean": 0.61328125, "entropy/min": 0.40625, "epoch": 0.51953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4892449676990509, "kl": 0.08212453871965408, "learning_rate": 5.096956658859122e-06, "loss": 0.003289756365120411, "reward": 2.0817782878875732, "reward_std": 0.20652133226394653, "rewards/DenseCaptionF1/mean": 0.6309571266174316, "rewards/DenseCaptionF1/std": 0.17405256628990173, "rewards/DenseCaptionSodaM/mean": 0.4690502882003784, "rewards/DenseCaptionSodaM/std": 0.10827261954545975, "rewards/TiemstampCaptionLength/mean": 0.9791666865348816, "rewards/TiemstampCaptionLength/std": 0.13113263249397278, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2830.0, "completions/mean_length": 1989.390625, "completions/min_length": 1229.0, "entropy/max": 1.0546875, "entropy/mean": 0.6796875, "entropy/min": 0.41796875, "epoch": 0.5234375, "frac_reward_zero_std": 0.0, "grad_norm": 0.47863513231277466, "kl": 0.07702918350696564, "learning_rate": 5.032320686982697e-06, "loss": 0.003087609075009823, "reward": 2.080071449279785, "reward_std": 0.21030884981155396, "rewards/DenseCaptionF1/mean": 0.6409437656402588, "rewards/DenseCaptionF1/std": 0.1598178744316101, "rewards/DenseCaptionSodaM/mean": 0.4649089574813843, "rewards/DenseCaptionSodaM/std": 0.13791081309318542, "rewards/TiemstampCaptionLength/mean": 0.964062511920929, "rewards/TiemstampCaptionLength/std": 0.16937531530857086, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3882.0, "completions/mean_length": 2071.984375, "completions/min_length": 996.0, "entropy/max": 0.85546875, "entropy/mean": 0.625, "entropy/min": 0.474609375, "epoch": 0.52734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.46847179532051086, "kl": 0.07634493708610535, "learning_rate": 4.967679313017304e-06, "loss": 0.0030563147738575935, "reward": 2.254507064819336, "reward_std": 0.1252697855234146, "rewards/DenseCaptionF1/mean": 0.745608389377594, "rewards/DenseCaptionF1/std": 0.16144125163555145, "rewards/DenseCaptionSodaM/mean": 0.5104612708091736, "rewards/DenseCaptionSodaM/std": 0.08606183528900146, "rewards/TiemstampCaptionLength/mean": 0.996874988079071, "rewards/TiemstampCaptionLength/std": 0.02500000037252903, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3804.0, "completions/mean_length": 2212.9375, "completions/min_length": 1171.0, "entropy/max": 0.84765625, "entropy/mean": 0.62109375, "entropy/min": 0.458984375, "epoch": 0.53125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4657875597476959, "kl": 0.07665620744228363, "learning_rate": 4.903043341140879e-06, "loss": 0.0030647674575448036, "reward": 2.2156639099121094, "reward_std": 0.2053491771221161, "rewards/DenseCaptionF1/mean": 0.7130922079086304, "rewards/DenseCaptionF1/std": 0.164754718542099, "rewards/DenseCaptionSodaM/mean": 0.5197591185569763, "rewards/DenseCaptionSodaM/std": 0.12633611261844635, "rewards/TiemstampCaptionLength/mean": 0.981249988079071, "rewards/TiemstampCaptionLength/std": 0.12708577513694763, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4474.0, "completions/mean_length": 2508.359375, "completions/min_length": 1452.0, "entropy/max": 0.83203125, "entropy/mean": 0.62890625, "entropy/min": 0.44140625, "epoch": 0.53515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.40959247946739197, "kl": 0.07662597298622131, "learning_rate": 4.838423574628447e-06, "loss": 0.003065949771553278, "reward": 1.9942350387573242, "reward_std": 0.4310782551765442, "rewards/DenseCaptionF1/mean": 0.6403563022613525, "rewards/DenseCaptionF1/std": 0.2952542304992676, "rewards/DenseCaptionSodaM/mean": 0.4634045362472534, "rewards/DenseCaptionSodaM/std": 0.20146074891090393, "rewards/TiemstampCaptionLength/mean": 0.8903236389160156, "rewards/TiemstampCaptionLength/std": 0.3144792318344116, "rewards/TimestampFormat/mean": 0.890625, "rewards/TimestampFormat/std": 0.3145764470100403, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4055.0, "completions/mean_length": 2282.65625, "completions/min_length": 800.0, "entropy/max": 0.78515625, "entropy/mean": 0.6484375, "entropy/min": 0.47265625, "epoch": 0.5390625, "frac_reward_zero_std": 0.0, "grad_norm": 0.46899816393852234, "kl": 0.07598844170570374, "learning_rate": 4.773830814046469e-06, "loss": 0.0030368417501449585, "reward": 2.105253219604492, "reward_std": 0.325213760137558, "rewards/DenseCaptionF1/mean": 0.6453598737716675, "rewards/DenseCaptionF1/std": 0.19740337133407593, "rewards/DenseCaptionSodaM/mean": 0.49114352464675903, "rewards/DenseCaptionSodaM/std": 0.13330049812793732, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4706.0, "completions/mean_length": 2358.6875, "completions/min_length": 868.0, "entropy/max": 0.8671875, "entropy/mean": 0.67578125, "entropy/min": 0.4375, "epoch": 0.54296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4552667438983917, "kl": 0.08017595112323761, "learning_rate": 4.7092758554476215e-06, "loss": 0.003209669142961502, "reward": 2.005809783935547, "reward_std": 0.27194541692733765, "rewards/DenseCaptionF1/mean": 0.617243766784668, "rewards/DenseCaptionF1/std": 0.18094880878925323, "rewards/DenseCaptionSodaM/mean": 0.4219282567501068, "rewards/DenseCaptionSodaM/std": 0.12984782457351685, "rewards/TiemstampCaptionLength/mean": 0.9645257592201233, "rewards/TiemstampCaptionLength/std": 0.1759679913520813, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3173.0, "completions/mean_length": 2263.890625, "completions/min_length": 1349.0, "entropy/max": 0.85546875, "entropy/mean": 0.69140625, "entropy/min": 0.490234375, "epoch": 0.546875, "frac_reward_zero_std": 0.0, "grad_norm": 0.44491004943847656, "kl": 0.08564892411231995, "learning_rate": 4.644769488566351e-06, "loss": 0.0034335777163505554, "reward": 2.034367799758911, "reward_std": 0.38397544622421265, "rewards/DenseCaptionF1/mean": 0.6195927858352661, "rewards/DenseCaptionF1/std": 0.21382057666778564, "rewards/DenseCaptionSodaM/mean": 0.4616501033306122, "rewards/DenseCaptionSodaM/std": 0.14214344322681427, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3409.0, "completions/mean_length": 2234.828125, "completions/min_length": 1226.0, "entropy/max": 0.85546875, "entropy/mean": 0.65625, "entropy/min": 0.49609375, "epoch": 0.55078125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4594617486000061, "kl": 0.0822211354970932, "learning_rate": 4.580322495015466e-06, "loss": 0.003294694237411022, "reward": 2.1689605712890625, "reward_std": 0.20229323208332062, "rewards/DenseCaptionF1/mean": 0.6771668195724487, "rewards/DenseCaptionF1/std": 0.14375855028629303, "rewards/DenseCaptionSodaM/mean": 0.5074186325073242, "rewards/DenseCaptionSodaM/std": 0.1291685849428177, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4437.0, "completions/mean_length": 2687.640625, "completions/min_length": 1354.0, "entropy/max": 0.921875, "entropy/mean": 0.6953125, "entropy/min": 0.50390625, "epoch": 0.5546875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4368516206741333, "kl": 0.08078444004058838, "learning_rate": 4.515945646484105e-06, "loss": 0.003236331045627594, "reward": 2.0671610832214355, "reward_std": 0.0964779257774353, "rewards/DenseCaptionF1/mean": 0.6158434152603149, "rewards/DenseCaptionF1/std": 0.10087098926305771, "rewards/DenseCaptionSodaM/mean": 0.4550629258155823, "rewards/DenseCaptionSodaM/std": 0.07321523129940033, "rewards/TiemstampCaptionLength/mean": 0.9925091862678528, "rewards/TiemstampCaptionLength/std": 0.02667914889752865, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4109.0, "completions/mean_length": 2490.921875, "completions/min_length": 1337.0, "entropy/max": 0.9296875, "entropy/mean": 0.6796875, "entropy/min": 0.474609375, "epoch": 0.55859375, "frac_reward_zero_std": 0.0, "grad_norm": 0.43431416153907776, "kl": 0.07664476335048676, "learning_rate": 4.451649702937343e-06, "loss": 0.003065924160182476, "reward": 2.0921058654785156, "reward_std": 0.19337058067321777, "rewards/DenseCaptionF1/mean": 0.6772254705429077, "rewards/DenseCaptionF1/std": 0.15021462738513947, "rewards/DenseCaptionSodaM/mean": 0.4391201138496399, "rewards/DenseCaptionSodaM/std": 0.11458110064268112, "rewards/TiemstampCaptionLength/mean": 0.967145562171936, "rewards/TiemstampCaptionLength/std": 0.1480521559715271, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4213.0, "completions/mean_length": 2231.84375, "completions/min_length": 1017.0, "entropy/max": 0.82421875, "entropy/mean": 0.65625, "entropy/min": 0.494140625, "epoch": 0.5625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4718267023563385, "kl": 0.08167318999767303, "learning_rate": 4.387445410817774e-06, "loss": 0.0032684034667909145, "reward": 2.172900438308716, "reward_std": 0.27565664052963257, "rewards/DenseCaptionF1/mean": 0.7096633911132812, "rewards/DenseCaptionF1/std": 0.19180156290531158, "rewards/DenseCaptionSodaM/mean": 0.49491995573043823, "rewards/DenseCaptionSodaM/std": 0.1180020347237587, "rewards/TiemstampCaptionLength/mean": 0.9678840637207031, "rewards/TiemstampCaptionLength/std": 0.1752786934375763, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4205.0, "completions/mean_length": 1944.765625, "completions/min_length": 656.0, "entropy/max": 0.9140625, "entropy/mean": 0.65234375, "entropy/min": 0.515625, "epoch": 0.56640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4986732602119446, "kl": 0.09024765342473984, "learning_rate": 4.323343501249346e-06, "loss": 0.003628816921263933, "reward": 2.1517534255981445, "reward_std": 0.2129601091146469, "rewards/DenseCaptionF1/mean": 0.6971390247344971, "rewards/DenseCaptionF1/std": 0.2230452001094818, "rewards/DenseCaptionSodaM/mean": 0.47044554352760315, "rewards/DenseCaptionSodaM/std": 0.1473081409931183, "rewards/TiemstampCaptionLength/mean": 0.9839630126953125, "rewards/TiemstampCaptionLength/std": 0.12499114125967026, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3401.0, "completions/mean_length": 2158.265625, "completions/min_length": 1113.0, "entropy/max": 1.1328125, "entropy/mean": 0.59765625, "entropy/min": 0.326171875, "epoch": 0.5703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.46137720346450806, "kl": 0.0771188735961914, "learning_rate": 4.259354688243758e-06, "loss": 0.0030826330184936523, "reward": 2.2336673736572266, "reward_std": 0.14201684296131134, "rewards/DenseCaptionF1/mean": 0.7033727765083313, "rewards/DenseCaptionF1/std": 0.15630793571472168, "rewards/DenseCaptionSodaM/mean": 0.5334195494651794, "rewards/DenseCaptionSodaM/std": 0.0781983733177185, "rewards/TiemstampCaptionLength/mean": 0.9937499761581421, "rewards/TiemstampCaptionLength/std": 0.05000000074505806, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3300.0, "completions/mean_length": 2201.375, "completions/min_length": 1103.0, "entropy/max": 0.89453125, "entropy/mean": 0.62890625, "entropy/min": 0.427734375, "epoch": 0.57421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.45495036244392395, "kl": 0.07991325855255127, "learning_rate": 4.195489666909714e-06, "loss": 0.003197193145751953, "reward": 2.087711811065674, "reward_std": 0.3588419258594513, "rewards/DenseCaptionF1/mean": 0.640007734298706, "rewards/DenseCaptionF1/std": 0.19632108509540558, "rewards/DenseCaptionSodaM/mean": 0.49457916617393494, "rewards/DenseCaptionSodaM/std": 0.14636559784412384, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3105.0, "completions/mean_length": 2104.15625, "completions/min_length": 1452.0, "entropy/max": 0.8125, "entropy/mean": 0.66796875, "entropy/min": 0.498046875, "epoch": 0.578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4562710225582123, "kl": 0.0780017152428627, "learning_rate": 4.131759111665349e-06, "loss": 0.003115745261311531, "reward": 2.1103854179382324, "reward_std": 0.12313683331012726, "rewards/DenseCaptionF1/mean": 0.65642249584198, "rewards/DenseCaptionF1/std": 0.1891339123249054, "rewards/DenseCaptionSodaM/mean": 0.4539627432823181, "rewards/DenseCaptionSodaM/std": 0.09051599353551865, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3722.0, "completions/mean_length": 2194.5, "completions/min_length": 1407.0, "entropy/max": 0.81640625, "entropy/mean": 0.60546875, "entropy/min": 0.484375, "epoch": 0.58203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4287312626838684, "kl": 0.0745185911655426, "learning_rate": 4.06817367445409e-06, "loss": 0.002987407147884369, "reward": 2.203336477279663, "reward_std": 0.18982213735580444, "rewards/DenseCaptionF1/mean": 0.6999586820602417, "rewards/DenseCaptionF1/std": 0.1766565889120102, "rewards/DenseCaptionSodaM/mean": 0.5190026164054871, "rewards/DenseCaptionSodaM/std": 0.10568448901176453, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3458.0, "completions/mean_length": 1971.984375, "completions/min_length": 783.0, "entropy/max": 1.09375, "entropy/mean": 0.703125, "entropy/min": 0.54296875, "epoch": 0.5859375, "frac_reward_zero_std": 0.0, "grad_norm": 0.542759120464325, "kl": 0.07930778712034225, "learning_rate": 4.004743982964298e-06, "loss": 0.0031747575849294662, "reward": 2.123366355895996, "reward_std": 0.1879543662071228, "rewards/DenseCaptionF1/mean": 0.6784964203834534, "rewards/DenseCaptionF1/std": 0.20692989230155945, "rewards/DenseCaptionSodaM/mean": 0.46049508452415466, "rewards/DenseCaptionSodaM/std": 0.13381001353263855, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4005.0, "completions/mean_length": 2177.953125, "completions/min_length": 1245.0, "entropy/max": 0.8828125, "entropy/mean": 0.625, "entropy/min": 0.400390625, "epoch": 0.58984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.43816837668418884, "kl": 0.08020733296871185, "learning_rate": 3.941480638852948e-06, "loss": 0.0032116477377712727, "reward": 2.2209386825561523, "reward_std": 0.28657346963882446, "rewards/DenseCaptionF1/mean": 0.7376999855041504, "rewards/DenseCaptionF1/std": 0.1764175295829773, "rewards/DenseCaptionSodaM/mean": 0.5164416432380676, "rewards/DenseCaptionSodaM/std": 0.12051527202129364, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3805.0, "completions/mean_length": 2246.703125, "completions/min_length": 1347.0, "entropy/max": 1.0078125, "entropy/mean": 0.67578125, "entropy/min": 0.4609375, "epoch": 0.59375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4635348320007324, "kl": 0.07573550194501877, "learning_rate": 3.878394215973663e-06, "loss": 0.003034967929124832, "reward": 2.129924774169922, "reward_std": 0.13099989295005798, "rewards/DenseCaptionF1/mean": 0.6344149708747864, "rewards/DenseCaptionF1/std": 0.1651492863893509, "rewards/DenseCaptionSodaM/mean": 0.4981141686439514, "rewards/DenseCaptionSodaM/std": 0.12987202405929565, "rewards/TiemstampCaptionLength/mean": 0.9947916865348816, "rewards/TiemstampCaptionLength/std": 0.0416666679084301, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3872.0, "completions/mean_length": 2001.15625, "completions/min_length": 997.0, "entropy/max": 0.78125, "entropy/mean": 0.59765625, "entropy/min": 0.4375, "epoch": 0.59765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4990518093109131, "kl": 0.07201633602380753, "learning_rate": 3.815495258609404e-06, "loss": 0.002882446628063917, "reward": 2.2244858741760254, "reward_std": 0.11344104260206223, "rewards/DenseCaptionF1/mean": 0.7057707905769348, "rewards/DenseCaptionF1/std": 0.12868660688400269, "rewards/DenseCaptionSodaM/mean": 0.5187150239944458, "rewards/DenseCaptionSodaM/std": 0.1085120216012001, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4950.0, "completions/mean_length": 2356.203125, "completions/min_length": 1256.0, "entropy/max": 0.77734375, "entropy/mean": 0.63671875, "entropy/min": 0.53125, "epoch": 0.6015625, "frac_reward_zero_std": 0.0, "grad_norm": 0.45741304755210876, "kl": 0.07944130897521973, "learning_rate": 3.752794279710094e-06, "loss": 0.0031835921108722687, "reward": 2.009526252746582, "reward_std": 0.20912650227546692, "rewards/DenseCaptionF1/mean": 0.5993127822875977, "rewards/DenseCaptionF1/std": 0.1969134509563446, "rewards/DenseCaptionSodaM/mean": 0.4295196831226349, "rewards/DenseCaptionSodaM/std": 0.0874083936214447, "rewards/TiemstampCaptionLength/mean": 0.9770126342773438, "rewards/TiemstampCaptionLength/std": 0.13045813143253326, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3683.0, "completions/mean_length": 1949.875, "completions/min_length": 667.0, "entropy/max": 0.91796875, "entropy/mean": 0.6328125, "entropy/min": 0.451171875, "epoch": 0.60546875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5073304176330566, "kl": 0.07963970303535461, "learning_rate": 3.690301759135471e-06, "loss": 0.003191078081727028, "reward": 2.1645946502685547, "reward_std": 0.31681501865386963, "rewards/DenseCaptionF1/mean": 0.7298088073730469, "rewards/DenseCaptionF1/std": 0.20084446668624878, "rewards/DenseCaptionSodaM/mean": 0.46896541118621826, "rewards/DenseCaptionSodaM/std": 0.11861751228570938, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.966796875, "rewards/TimestampFormat/std": 0.1757102757692337, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2783.0, "completions/mean_length": 1783.46875, "completions/min_length": 895.0, "entropy/max": 0.80078125, "entropy/mean": 0.54296875, "entropy/min": 0.3359375, "epoch": 0.609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4820803999900818, "kl": 0.07591649889945984, "learning_rate": 3.6280281419034934e-06, "loss": 0.003037598915398121, "reward": 2.2391483783721924, "reward_std": 0.250728964805603, "rewards/DenseCaptionF1/mean": 0.7175197601318359, "rewards/DenseCaptionF1/std": 0.19149324297904968, "rewards/DenseCaptionSodaM/mean": 0.5372534990310669, "rewards/DenseCaptionSodaM/std": 0.1222560852766037, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2440.0, "completions/mean_length": 1856.484375, "completions/min_length": 1144.0, "entropy/max": 0.90234375, "entropy/mean": 0.6328125, "entropy/min": 0.482421875, "epoch": 0.61328125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4854549169540405, "kl": 0.07825914025306702, "learning_rate": 3.5659838364445505e-06, "loss": 0.0031337589025497437, "reward": 2.1887636184692383, "reward_std": 0.22441458702087402, "rewards/DenseCaptionF1/mean": 0.7078425884246826, "rewards/DenseCaptionF1/std": 0.18556053936481476, "rewards/DenseCaptionSodaM/mean": 0.5082647800445557, "rewards/DenseCaptionSodaM/std": 0.12181976437568665, "rewards/TiemstampCaptionLength/mean": 0.9700521230697632, "rewards/TiemstampCaptionLength/std": 0.13957545161247253, "rewards/TimestampFormat/mean": 0.9752604365348816, "rewards/TimestampFormat/std": 0.13419009745121002, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3916.0, "completions/mean_length": 2258.6875, "completions/min_length": 531.0, "entropy/max": 0.79296875, "entropy/mean": 0.58984375, "entropy/min": 0.400390625, "epoch": 0.6171875, "frac_reward_zero_std": 0.0, "grad_norm": 0.44412437081336975, "kl": 0.07344679534435272, "learning_rate": 3.504179212861793e-06, "loss": 0.0029507786966860294, "reward": 2.173398971557617, "reward_std": 0.14470039308071136, "rewards/DenseCaptionF1/mean": 0.73322594165802, "rewards/DenseCaptionF1/std": 0.17344404757022858, "rewards/DenseCaptionSodaM/mean": 0.45319393277168274, "rewards/DenseCaptionSodaM/std": 0.10041268914937973, "rewards/TiemstampCaptionLength/mean": 0.9739583730697632, "rewards/TiemstampCaptionLength/std": 0.09375918656587601, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3890.0, "completions/mean_length": 2661.3125, "completions/min_length": 1681.0, "entropy/max": 0.71484375, "entropy/mean": 0.57421875, "entropy/min": 0.3828125, "epoch": 0.62109375, "frac_reward_zero_std": 0.0, "grad_norm": 0.3947578966617584, "kl": 0.0687277615070343, "learning_rate": 3.442624601197877e-06, "loss": 0.002743840217590332, "reward": 2.160353422164917, "reward_std": 0.2935871481895447, "rewards/DenseCaptionF1/mean": 0.7180777788162231, "rewards/DenseCaptionF1/std": 0.18129633367061615, "rewards/DenseCaptionSodaM/mean": 0.47352564334869385, "rewards/DenseCaptionSodaM/std": 0.11926417797803879, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4709.0, "completions/mean_length": 2226.484375, "completions/min_length": 1103.0, "entropy/max": 0.8203125, "entropy/mean": 0.59765625, "entropy/min": 0.443359375, "epoch": 0.625, "frac_reward_zero_std": 0.0, "grad_norm": 0.46847644448280334, "kl": 0.07471471279859543, "learning_rate": 3.3813302897083955e-06, "loss": 0.002989533357322216, "reward": 2.1062912940979004, "reward_std": 0.20779664814472198, "rewards/DenseCaptionF1/mean": 0.6103988885879517, "rewards/DenseCaptionF1/std": 0.18720845878124237, "rewards/DenseCaptionSodaM/mean": 0.5138769745826721, "rewards/DenseCaptionSodaM/std": 0.1282425820827484, "rewards/TiemstampCaptionLength/mean": 0.9796562194824219, "rewards/TiemstampCaptionLength/std": 0.12646403908729553, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3004.0, "completions/mean_length": 2021.84375, "completions/min_length": 1112.0, "entropy/max": 0.69140625, "entropy/mean": 0.578125, "entropy/min": 0.4375, "epoch": 0.62890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4503817856311798, "kl": 0.07432515919208527, "learning_rate": 3.3203065231422904e-06, "loss": 0.0029710540547966957, "reward": 2.2090325355529785, "reward_std": 0.1947193145751953, "rewards/DenseCaptionF1/mean": 0.7223544120788574, "rewards/DenseCaptionF1/std": 0.1703852266073227, "rewards/DenseCaptionSodaM/mean": 0.5023031830787659, "rewards/DenseCaptionSodaM/std": 0.11408014595508575, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3613.0, "completions/mean_length": 2257.78125, "completions/min_length": 620.0, "entropy/max": 0.75, "entropy/mean": 0.59765625, "entropy/min": 0.4140625, "epoch": 0.6328125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4628556966781616, "kl": 0.07528138905763626, "learning_rate": 3.259563501029548e-06, "loss": 0.0030170101672410965, "reward": 1.999921441078186, "reward_std": 0.18532216548919678, "rewards/DenseCaptionF1/mean": 0.5711463093757629, "rewards/DenseCaptionF1/std": 0.13621442019939423, "rewards/DenseCaptionSodaM/mean": 0.44440019130706787, "rewards/DenseCaptionSodaM/std": 0.11345592886209488, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2928.0, "completions/mean_length": 1688.109375, "completions/min_length": 590.0, "entropy/max": 0.89453125, "entropy/mean": 0.59765625, "entropy/min": 0.37890625, "epoch": 0.63671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.5050703287124634, "kl": 0.08230897784233093, "learning_rate": 3.1991113759764493e-06, "loss": 0.0032972879707813263, "reward": 2.1300344467163086, "reward_std": 0.2373379021883011, "rewards/DenseCaptionF1/mean": 0.6795761585235596, "rewards/DenseCaptionF1/std": 0.20928002893924713, "rewards/DenseCaptionSodaM/mean": 0.466864675283432, "rewards/DenseCaptionSodaM/std": 0.11047963798046112, "rewards/TiemstampCaptionLength/mean": 0.9828125238418579, "rewards/TiemstampCaptionLength/std": 0.12542586028575897, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4313.0, "completions/mean_length": 2701.3125, "completions/min_length": 1418.0, "entropy/max": 0.8359375, "entropy/mean": 0.62890625, "entropy/min": 0.431640625, "epoch": 0.640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4016496241092682, "kl": 0.07297959923744202, "learning_rate": 3.1389602519686515e-06, "loss": 0.002920348197221756, "reward": 2.137756109237671, "reward_std": 0.12692490220069885, "rewards/DenseCaptionF1/mean": 0.6812595129013062, "rewards/DenseCaptionF1/std": 0.17293255031108856, "rewards/DenseCaptionSodaM/mean": 0.457242488861084, "rewards/DenseCaptionSodaM/std": 0.08693484961986542, "rewards/TiemstampCaptionLength/mean": 0.9985084533691406, "rewards/TiemstampCaptionLength/std": 0.008417597971856594, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3828.0, "completions/mean_length": 2167.09375, "completions/min_length": 1130.0, "entropy/max": 0.8046875, "entropy/mean": 0.6015625, "entropy/min": 0.427734375, "epoch": 0.64453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4546695649623871, "kl": 0.07320497930049896, "learning_rate": 3.0791201826824117e-06, "loss": 0.0029290388338267803, "reward": 2.1534037590026855, "reward_std": 0.3382655084133148, "rewards/DenseCaptionF1/mean": 0.7191801071166992, "rewards/DenseCaptionF1/std": 0.21838988363742828, "rewards/DenseCaptionSodaM/mean": 0.48109862208366394, "rewards/DenseCaptionSodaM/std": 0.14587734639644623, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3871.0, "completions/mean_length": 2094.71875, "completions/min_length": 1055.0, "entropy/max": 0.77734375, "entropy/mean": 0.62109375, "entropy/min": 0.447265625, "epoch": 0.6484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.47961291670799255, "kl": 0.07660332322120667, "learning_rate": 3.019601169804216e-06, "loss": 0.00306691974401474, "reward": 2.1812386512756348, "reward_std": 0.12379679828882217, "rewards/DenseCaptionF1/mean": 0.7198240160942078, "rewards/DenseCaptionF1/std": 0.15444563329219818, "rewards/DenseCaptionSodaM/mean": 0.4633675813674927, "rewards/DenseCaptionSodaM/std": 0.10771329700946808, "rewards/TiemstampCaptionLength/mean": 0.99609375, "rewards/TiemstampCaptionLength/std": 0.03125, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2645.0, "completions/mean_length": 1818.34375, "completions/min_length": 1088.0, "entropy/max": 0.9140625, "entropy/mean": 0.671875, "entropy/min": 0.46875, "epoch": 0.65234375, "frac_reward_zero_std": 0.0, "grad_norm": 0.48787254095077515, "kl": 0.07863400131464005, "learning_rate": 2.9604131613590825e-06, "loss": 0.00315242912620306, "reward": 2.0957303047180176, "reward_std": 0.3016930818557739, "rewards/DenseCaptionF1/mean": 0.6641665697097778, "rewards/DenseCaptionF1/std": 0.17183725535869598, "rewards/DenseCaptionSodaM/mean": 0.4628139138221741, "rewards/DenseCaptionSodaM/std": 0.13749481737613678, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3935.0, "completions/mean_length": 2586.671875, "completions/min_length": 1089.0, "entropy/max": 0.80078125, "entropy/mean": 0.59375, "entropy/min": 0.3671875, "epoch": 0.65625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4076475203037262, "kl": 0.07010465860366821, "learning_rate": 2.901566050047855e-06, "loss": 0.0028206678107380867, "reward": 2.0712218284606934, "reward_std": 0.2646124064922333, "rewards/DenseCaptionF1/mean": 0.6718050241470337, "rewards/DenseCaptionF1/std": 0.22563011944293976, "rewards/DenseCaptionSodaM/mean": 0.44889605045318604, "rewards/DenseCaptionSodaM/std": 0.1393977701663971, "rewards/TiemstampCaptionLength/mean": 0.9479166865348816, "rewards/TiemstampCaptionLength/std": 0.21593283116817474, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4262.0, "completions/mean_length": 2314.046875, "completions/min_length": 663.0, "entropy/max": 1.0703125, "entropy/mean": 0.6640625, "entropy/min": 0.48828125, "epoch": 0.66015625, "frac_reward_zero_std": 0.0, "grad_norm": 0.497470885515213, "kl": 0.07848264276981354, "learning_rate": 2.843069671593734e-06, "loss": 0.0031392592936754227, "reward": 2.099266529083252, "reward_std": 0.24790078401565552, "rewards/DenseCaptionF1/mean": 0.6861398220062256, "rewards/DenseCaptionF1/std": 0.21495842933654785, "rewards/DenseCaptionSodaM/mean": 0.44522881507873535, "rewards/DenseCaptionSodaM/std": 0.1339079588651657, "rewards/TiemstampCaptionLength/mean": 0.9670459032058716, "rewards/TiemstampCaptionLength/std": 0.17558936774730682, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3184.0, "completions/mean_length": 2051.734375, "completions/min_length": 1218.0, "entropy/max": 0.8515625, "entropy/mean": 0.62109375, "entropy/min": 0.400390625, "epoch": 0.6640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4371926784515381, "kl": 0.07437405735254288, "learning_rate": 2.784933803098326e-06, "loss": 0.0029774000868201256, "reward": 2.052684783935547, "reward_std": 0.26392990350723267, "rewards/DenseCaptionF1/mean": 0.6261408925056458, "rewards/DenseCaptionF1/std": 0.19510464370250702, "rewards/DenseCaptionSodaM/mean": 0.4609188139438629, "rewards/DenseCaptionSodaM/std": 0.1267315149307251, "rewards/TiemstampCaptionLength/mean": 0.965624988079071, "rewards/TiemstampCaptionLength/std": 0.17658017575740814, "rewards/TimestampFormat/mean": 0.965624988079071, "rewards/TimestampFormat/std": 0.17658017575740814, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4037.0, "completions/mean_length": 2418.453125, "completions/min_length": 1151.0, "entropy/max": 0.88671875, "entropy/mean": 0.62109375, "entropy/min": 0.443359375, "epoch": 0.66796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4279203712940216, "kl": 0.07013701647520065, "learning_rate": 2.7271681614074973e-06, "loss": 0.0028105881065130234, "reward": 2.040663003921509, "reward_std": 0.2685054838657379, "rewards/DenseCaptionF1/mean": 0.6230917572975159, "rewards/DenseCaptionF1/std": 0.17188072204589844, "rewards/DenseCaptionSodaM/mean": 0.4505574107170105, "rewards/DenseCaptionSodaM/std": 0.12054003775119781, "rewards/TiemstampCaptionLength/mean": 0.9652777910232544, "rewards/TiemstampCaptionLength/std": 0.17693251371383667, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4019.0, "completions/mean_length": 2082.78125, "completions/min_length": 1236.0, "entropy/max": 0.84765625, "entropy/mean": 0.59375, "entropy/min": 0.431640625, "epoch": 0.671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4470483064651489, "kl": 0.07786456495523453, "learning_rate": 2.6697824014873076e-06, "loss": 0.003116773907095194, "reward": 2.1676316261291504, "reward_std": 0.397072434425354, "rewards/DenseCaptionF1/mean": 0.740259051322937, "rewards/DenseCaptionF1/std": 0.23550359904766083, "rewards/DenseCaptionSodaM/mean": 0.4755496084690094, "rewards/DenseCaptionSodaM/std": 0.14706814289093018, "rewards/TiemstampCaptionLength/mean": 0.9505208134651184, "rewards/TiemstampCaptionLength/std": 0.21347814798355103, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2941.0, "completions/mean_length": 2070.40625, "completions/min_length": 1096.0, "entropy/max": 0.8203125, "entropy/mean": 0.58984375, "entropy/min": 0.3046875, "epoch": 0.67578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.44076287746429443, "kl": 0.0743403285741806, "learning_rate": 2.6127861148102552e-06, "loss": 0.00297456793487072, "reward": 2.1409144401550293, "reward_std": 0.11926329135894775, "rewards/DenseCaptionF1/mean": 0.6629953384399414, "rewards/DenseCaptionF1/std": 0.17725545167922974, "rewards/DenseCaptionSodaM/mean": 0.4805231988430023, "rewards/DenseCaptionSodaM/std": 0.12674783170223236, "rewards/TiemstampCaptionLength/mean": 0.9947916865348816, "rewards/TiemstampCaptionLength/std": 0.0416666679084301, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3479.0, "completions/mean_length": 2512.09375, "completions/min_length": 1344.0, "entropy/max": 0.75390625, "entropy/mean": 0.55859375, "entropy/min": 0.38671875, "epoch": 0.6796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.395829975605011, "kl": 0.06750495731830597, "learning_rate": 2.5561888277521797e-06, "loss": 0.002698497148230672, "reward": 2.242475748062134, "reward_std": 0.12299925088882446, "rewards/DenseCaptionF1/mean": 0.7270916700363159, "rewards/DenseCaptionF1/std": 0.16096866130828857, "rewards/DenseCaptionSodaM/mean": 0.5153840184211731, "rewards/DenseCaptionSodaM/std": 0.10852368921041489, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3797.0, "completions/mean_length": 2257.265625, "completions/min_length": 1065.0, "entropy/max": 0.7890625, "entropy/mean": 0.63671875, "entropy/min": 0.484375, "epoch": 0.68359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4416440725326538, "kl": 0.0740923136472702, "learning_rate": 2.5000000000000015e-06, "loss": 0.002964516170322895, "reward": 2.0810370445251465, "reward_std": 0.19657137989997864, "rewards/DenseCaptionF1/mean": 0.6534109115600586, "rewards/DenseCaptionF1/std": 0.15201124548912048, "rewards/DenseCaptionSodaM/mean": 0.44987988471984863, "rewards/DenseCaptionSodaM/std": 0.09995286166667938, "rewards/TiemstampCaptionLength/mean": 0.9711174368858337, "rewards/TiemstampCaptionLength/std": 0.1381218135356903, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4328.0, "completions/mean_length": 2506.09375, "completions/min_length": 1061.0, "entropy/max": 0.8046875, "entropy/mean": 0.62109375, "entropy/min": 0.451171875, "epoch": 0.6875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4191126525402069, "kl": 0.07020944356918335, "learning_rate": 2.4442290229706344e-06, "loss": 0.002813016064465046, "reward": 2.133711099624634, "reward_std": 0.10300801694393158, "rewards/DenseCaptionF1/mean": 0.6849145293235779, "rewards/DenseCaptionF1/std": 0.1559366136789322, "rewards/DenseCaptionSodaM/mean": 0.44923704862594604, "rewards/DenseCaptionSodaM/std": 0.1093151867389679, "rewards/TiemstampCaptionLength/mean": 0.9991188049316406, "rewards/TiemstampCaptionLength/std": 0.007049560546875, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3366.0, "completions/mean_length": 2043.234375, "completions/min_length": 1205.0, "entropy/max": 0.8671875, "entropy/mean": 0.59765625, "entropy/min": 0.4375, "epoch": 0.69140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4535602033138275, "kl": 0.07541535794734955, "learning_rate": 2.3888852182413087e-06, "loss": 0.003023270983248949, "reward": 2.0415470600128174, "reward_std": 0.13503248989582062, "rewards/DenseCaptionF1/mean": 0.6082522869110107, "rewards/DenseCaptionF1/std": 0.20383715629577637, "rewards/DenseCaptionSodaM/mean": 0.4332948923110962, "rewards/DenseCaptionSodaM/std": 0.13324302434921265, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3483.0, "completions/mean_length": 2130.96875, "completions/min_length": 1160.0, "entropy/max": 0.76171875, "entropy/mean": 0.5703125, "entropy/min": 0.431640625, "epoch": 0.6953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.44665437936782837, "kl": 0.07155901193618774, "learning_rate": 2.333977835991545e-06, "loss": 0.002864268608391285, "reward": 2.2150917053222656, "reward_std": 0.32075563073158264, "rewards/DenseCaptionF1/mean": 0.7311955094337463, "rewards/DenseCaptionF1/std": 0.2124902456998825, "rewards/DenseCaptionSodaM/mean": 0.5227725505828857, "rewards/DenseCaptionSodaM/std": 0.14286459982395172, "rewards/TiemstampCaptionLength/mean": 0.953497052192688, "rewards/TiemstampCaptionLength/std": 0.18869297206401825, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3955.0, "completions/mean_length": 2342.3125, "completions/min_length": 1224.0, "entropy/max": 0.83984375, "entropy/mean": 0.64453125, "entropy/min": 0.435546875, "epoch": 0.69921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.44968166947364807, "kl": 0.07600906491279602, "learning_rate": 2.2795160534570866e-06, "loss": 0.0030437661334872246, "reward": 2.0526013374328613, "reward_std": 0.19291798770427704, "rewards/DenseCaptionF1/mean": 0.6455673575401306, "rewards/DenseCaptionF1/std": 0.142412468791008, "rewards/DenseCaptionSodaM/mean": 0.42265889048576355, "rewards/DenseCaptionSodaM/std": 0.09118315577507019, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3451.0, "completions/mean_length": 2206.578125, "completions/min_length": 1333.0, "entropy/max": 0.75390625, "entropy/mean": 0.5859375, "entropy/min": 0.431640625, "epoch": 0.703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.43218135833740234, "kl": 0.07586587965488434, "learning_rate": 2.2255089733960162e-06, "loss": 0.0030368072912096977, "reward": 2.200169563293457, "reward_std": 0.20373526215553284, "rewards/DenseCaptionF1/mean": 0.6856173872947693, "rewards/DenseCaptionF1/std": 0.15119345486164093, "rewards/DenseCaptionSodaM/mean": 0.5301769971847534, "rewards/DenseCaptionSodaM/std": 0.10045289248228073, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3360.0, "completions/mean_length": 2379.890625, "completions/min_length": 1621.0, "entropy/max": 0.93359375, "entropy/mean": 0.609375, "entropy/min": 0.4140625, "epoch": 0.70703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4222281277179718, "kl": 0.07173281908035278, "learning_rate": 2.171965622567308e-06, "loss": 0.0028846794739365578, "reward": 2.0830845832824707, "reward_std": 0.20682808756828308, "rewards/DenseCaptionF1/mean": 0.645068883895874, "rewards/DenseCaptionF1/std": 0.15822316706180573, "rewards/DenseCaptionSodaM/mean": 0.4556247293949127, "rewards/DenseCaptionSodaM/std": 0.12347334623336792, "rewards/TiemstampCaptionLength/mean": 0.9804067611694336, "rewards/TiemstampCaptionLength/std": 0.12650269269943237, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3016.0, "completions/mean_length": 1821.28125, "completions/min_length": 1047.0, "entropy/max": 0.94921875, "entropy/mean": 0.62109375, "entropy/min": 0.4296875, "epoch": 0.7109375, "frac_reward_zero_std": 0.0, "grad_norm": 0.49071553349494934, "kl": 0.07316526770591736, "learning_rate": 2.1188949502220987e-06, "loss": 0.002941974438726902, "reward": 2.1613147258758545, "reward_std": 0.21143783628940582, "rewards/DenseCaptionF1/mean": 0.6989374160766602, "rewards/DenseCaptionF1/std": 0.1964447796344757, "rewards/DenseCaptionSodaM/mean": 0.47995543479919434, "rewards/DenseCaptionSodaM/std": 0.09392297267913818, "rewards/TiemstampCaptionLength/mean": 0.98046875, "rewards/TiemstampCaptionLength/std": 0.1283649355173111, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3147.0, "completions/mean_length": 2061.71875, "completions/min_length": 811.0, "entropy/max": 0.96875, "entropy/mean": 0.64453125, "entropy/min": 0.44140625, "epoch": 0.71484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.48470523953437805, "kl": 0.07881060987710953, "learning_rate": 2.066305826607911e-06, "loss": 0.0031569981947541237, "reward": 2.147979736328125, "reward_std": 0.22912800312042236, "rewards/DenseCaptionF1/mean": 0.7178086042404175, "rewards/DenseCaptionF1/std": 0.18792113661766052, "rewards/DenseCaptionSodaM/mean": 0.45401784777641296, "rewards/DenseCaptionSodaM/std": 0.12819407880306244, "rewards/TiemstampCaptionLength/mean": 0.9679315090179443, "rewards/TiemstampCaptionLength/std": 0.13817273080348969, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2962.0, "completions/mean_length": 1975.1875, "completions/min_length": 1135.0, "entropy/max": 0.79296875, "entropy/mean": 0.60546875, "entropy/min": 0.4453125, "epoch": 0.71875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4550570845603943, "kl": 0.07269569486379623, "learning_rate": 2.0142070414860704e-06, "loss": 0.0029172184877097607, "reward": 2.123661994934082, "reward_std": 0.20257888734340668, "rewards/DenseCaptionF1/mean": 0.6824607849121094, "rewards/DenseCaptionF1/std": 0.19700302183628082, "rewards/DenseCaptionSodaM/mean": 0.45682621002197266, "rewards/DenseCaptionSodaM/std": 0.09912190586328506, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3525.0, "completions/mean_length": 2330.828125, "completions/min_length": 1014.0, "entropy/max": 0.77734375, "entropy/mean": 0.5859375, "entropy/min": 0.412109375, "epoch": 0.72265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.42335712909698486, "kl": 0.06866112351417542, "learning_rate": 1.962607302662582e-06, "loss": 0.002748417668044567, "reward": 2.190992832183838, "reward_std": 0.17999503016471863, "rewards/DenseCaptionF1/mean": 0.728723406791687, "rewards/DenseCaptionF1/std": 0.1993679702281952, "rewards/DenseCaptionSodaM/mean": 0.47789451479911804, "rewards/DenseCaptionSodaM/std": 0.10779637098312378, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4001.0, "completions/mean_length": 2398.203125, "completions/min_length": 1115.0, "entropy/max": 0.72265625, "entropy/mean": 0.58203125, "entropy/min": 0.416015625, "epoch": 0.7265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4338615834712982, "kl": 0.07361350208520889, "learning_rate": 1.9115152345327154e-06, "loss": 0.0029378142207860947, "reward": 2.087932825088501, "reward_std": 0.21515028178691864, "rewards/DenseCaptionF1/mean": 0.6555083394050598, "rewards/DenseCaptionF1/std": 0.18240214884281158, "rewards/DenseCaptionSodaM/mean": 0.4656275808811188, "rewards/DenseCaptionSodaM/std": 0.13835589587688446, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2724.0, "completions/mean_length": 2028.203125, "completions/min_length": 1090.0, "entropy/max": 0.84375, "entropy/mean": 0.609375, "entropy/min": 0.3671875, "epoch": 0.73046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.45324060320854187, "kl": 0.07853089272975922, "learning_rate": 1.8609393766395083e-06, "loss": 0.0031442586332559586, "reward": 2.0174880027770996, "reward_std": 0.3095704913139343, "rewards/DenseCaptionF1/mean": 0.5845763683319092, "rewards/DenseCaptionF1/std": 0.1757514774799347, "rewards/DenseCaptionSodaM/mean": 0.4676770567893982, "rewards/DenseCaptionSodaM/std": 0.12563413381576538, "rewards/TiemstampCaptionLength/mean": 0.961718738079071, "rewards/TiemstampCaptionLength/std": 0.1785617619752884, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3534.0, "completions/mean_length": 2102.265625, "completions/min_length": 645.0, "entropy/max": 0.87890625, "entropy/mean": 0.62890625, "entropy/min": 0.439453125, "epoch": 0.734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4552648961544037, "kl": 0.07522042095661163, "learning_rate": 1.8108881822464697e-06, "loss": 0.003008980769664049, "reward": 1.9012553691864014, "reward_std": 0.26180797815322876, "rewards/DenseCaptionF1/mean": 0.6142083406448364, "rewards/DenseCaptionF1/std": 0.27197957038879395, "rewards/DenseCaptionSodaM/mean": 0.41204696893692017, "rewards/DenseCaptionSodaM/std": 0.18474657833576202, "rewards/TiemstampCaptionLength/mean": 0.875, "rewards/TiemstampCaptionLength/std": 0.3333333432674408, "rewards/TimestampFormat/mean": 0.875, "rewards/TimestampFormat/std": 0.3333333432674408, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3448.0, "completions/mean_length": 1791.25, "completions/min_length": 920.0, "entropy/max": 0.890625, "entropy/mean": 0.60546875, "entropy/min": 0.416015625, "epoch": 0.73828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.50104820728302, "kl": 0.07191494107246399, "learning_rate": 1.7613700169247055e-06, "loss": 0.0028807390481233597, "reward": 2.0999393463134766, "reward_std": 0.22087660431861877, "rewards/DenseCaptionF1/mean": 0.6402316093444824, "rewards/DenseCaptionF1/std": 0.14945225417613983, "rewards/DenseCaptionSodaM/mean": 0.4753328561782837, "rewards/DenseCaptionSodaM/std": 0.09546773880720139, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3792.0, "completions/mean_length": 2096.3125, "completions/min_length": 1114.0, "entropy/max": 0.83984375, "entropy/mean": 0.6328125, "entropy/min": 0.462890625, "epoch": 0.7421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4427258372306824, "kl": 0.07179544121026993, "learning_rate": 1.7123931571546826e-06, "loss": 0.002877972088754177, "reward": 2.050466299057007, "reward_std": 0.21054741740226746, "rewards/DenseCaptionF1/mean": 0.5769652724266052, "rewards/DenseCaptionF1/std": 0.1798660010099411, "rewards/DenseCaptionSodaM/mean": 0.49540388584136963, "rewards/DenseCaptionSodaM/std": 0.13009656965732574, "rewards/TiemstampCaptionLength/mean": 0.9718191623687744, "rewards/TiemstampCaptionLength/std": 0.14217542111873627, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4705.0, "completions/mean_length": 2052.6875, "completions/min_length": 1169.0, "entropy/max": 0.85546875, "entropy/mean": 0.6015625, "entropy/min": 0.380859375, "epoch": 0.74609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.46291711926460266, "kl": 0.07411293685436249, "learning_rate": 1.6639657889429017e-06, "loss": 0.0029668142087757587, "reward": 2.1138978004455566, "reward_std": 0.3824462890625, "rewards/DenseCaptionF1/mean": 0.6552042961120605, "rewards/DenseCaptionF1/std": 0.20194876194000244, "rewards/DenseCaptionSodaM/mean": 0.5076112747192383, "rewards/DenseCaptionSodaM/std": 0.15611189603805542, "rewards/TiemstampCaptionLength/mean": 0.9490394592285156, "rewards/TiemstampCaptionLength/std": 0.21312199532985687, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3168.0, "completions/mean_length": 1958.484375, "completions/min_length": 1113.0, "entropy/max": 0.796875, "entropy/mean": 0.56640625, "entropy/min": 0.3984375, "epoch": 0.75, "frac_reward_zero_std": 0.0, "grad_norm": 0.45552197098731995, "kl": 0.06913820654153824, "learning_rate": 1.6160960064536907e-06, "loss": 0.0027688471600413322, "reward": 2.1777918338775635, "reward_std": 0.10849934816360474, "rewards/DenseCaptionF1/mean": 0.6881303787231445, "rewards/DenseCaptionF1/std": 0.17405274510383606, "rewards/DenseCaptionSodaM/mean": 0.48966145515441895, "rewards/DenseCaptionSodaM/std": 0.13072755932807922, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4225.0, "completions/mean_length": 2162.96875, "completions/min_length": 1045.0, "entropy/max": 0.828125, "entropy/mean": 0.57421875, "entropy/min": 0.419921875, "epoch": 0.75390625, "frac_reward_zero_std": 0.0, "grad_norm": 0.43679875135421753, "kl": 0.0711248368024826, "learning_rate": 1.5687918106563326e-06, "loss": 0.0028449371457099915, "reward": 2.109659194946289, "reward_std": 0.23365890979766846, "rewards/DenseCaptionF1/mean": 0.6740397214889526, "rewards/DenseCaptionF1/std": 0.21991942822933197, "rewards/DenseCaptionSodaM/mean": 0.4846953749656677, "rewards/DenseCaptionSodaM/std": 0.14436903595924377, "rewards/TiemstampCaptionLength/mean": 0.9487228393554688, "rewards/TiemstampCaptionLength/std": 0.21437354385852814, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3412.0, "completions/mean_length": 2167.484375, "completions/min_length": 1049.0, "entropy/max": 0.91015625, "entropy/mean": 0.62109375, "entropy/min": 0.44140625, "epoch": 0.7578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4549855589866638, "kl": 0.07042649388313293, "learning_rate": 1.52206110798779e-06, "loss": 0.0028215572237968445, "reward": 2.158311128616333, "reward_std": 0.1848626732826233, "rewards/DenseCaptionF1/mean": 0.7175110578536987, "rewards/DenseCaptionF1/std": 0.17548702657222748, "rewards/DenseCaptionSodaM/mean": 0.4609822928905487, "rewards/DenseCaptionSodaM/std": 0.1038520559668541, "rewards/TiemstampCaptionLength/mean": 0.9752604365348816, "rewards/TiemstampCaptionLength/std": 0.13419011235237122, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3686.0, "completions/mean_length": 2076.71875, "completions/min_length": 1283.0, "entropy/max": 0.85546875, "entropy/mean": 0.61328125, "entropy/min": 0.431640625, "epoch": 0.76171875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4611286520957947, "kl": 0.06882451474666595, "learning_rate": 1.4759117090312197e-06, "loss": 0.0027554999105632305, "reward": 2.1432876586914062, "reward_std": 0.09668301045894623, "rewards/DenseCaptionF1/mean": 0.6383467316627502, "rewards/DenseCaptionF1/std": 0.13292570412158966, "rewards/DenseCaptionSodaM/mean": 0.5071728825569153, "rewards/DenseCaptionSodaM/std": 0.09460372477769852, "rewards/TiemstampCaptionLength/mean": 0.9955357313156128, "rewards/TiemstampCaptionLength/std": 0.0357142835855484, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4485.0, "completions/mean_length": 2300.515625, "completions/min_length": 1425.0, "entropy/max": 0.78515625, "entropy/mean": 0.59765625, "entropy/min": 0.421875, "epoch": 0.765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.43017178773880005, "kl": 0.07375560700893402, "learning_rate": 1.4303513272105057e-06, "loss": 0.0029521677643060684, "reward": 2.105917453765869, "reward_std": 0.2647617757320404, "rewards/DenseCaptionF1/mean": 0.6670179963111877, "rewards/DenseCaptionF1/std": 0.21269913017749786, "rewards/DenseCaptionSodaM/mean": 0.48807311058044434, "rewards/DenseCaptionSodaM/std": 0.1310773491859436, "rewards/TiemstampCaptionLength/mean": 0.9485275149345398, "rewards/TiemstampCaptionLength/std": 0.21378369629383087, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3448.0, "completions/mean_length": 2158.984375, "completions/min_length": 1004.0, "entropy/max": 0.79296875, "entropy/mean": 0.625, "entropy/min": 0.439453125, "epoch": 0.76953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.46644216775894165, "kl": 0.0757053792476654, "learning_rate": 1.3853875775010355e-06, "loss": 0.0030341013334691525, "reward": 2.1779346466064453, "reward_std": 0.1979537010192871, "rewards/DenseCaptionF1/mean": 0.6974056959152222, "rewards/DenseCaptionF1/std": 0.19552253186702728, "rewards/DenseCaptionSodaM/mean": 0.49615389108657837, "rewards/DenseCaptionSodaM/std": 0.11029209941625595, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4013.0, "completions/mean_length": 2295.875, "completions/min_length": 1025.0, "entropy/max": 0.85546875, "entropy/mean": 0.578125, "entropy/min": 0.3359375, "epoch": 0.7734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4417324662208557, "kl": 0.06748858094215393, "learning_rate": 1.3410279751569399e-06, "loss": 0.0026979809626936913, "reward": 2.103792905807495, "reward_std": 0.10410730540752411, "rewards/DenseCaptionF1/mean": 0.6195416450500488, "rewards/DenseCaptionF1/std": 0.13912326097488403, "rewards/DenseCaptionSodaM/mean": 0.4842512309551239, "rewards/DenseCaptionSodaM/std": 0.06717878580093384, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4025.0, "completions/mean_length": 2578.0, "completions/min_length": 1260.0, "entropy/max": 0.7578125, "entropy/mean": 0.5859375, "entropy/min": 0.37109375, "epoch": 0.77734375, "frac_reward_zero_std": 0.0, "grad_norm": 0.3920497000217438, "kl": 0.07040741294622421, "learning_rate": 1.297279934454978e-06, "loss": 0.002820095978677273, "reward": 2.163327217102051, "reward_std": 0.12465543299913406, "rewards/DenseCaptionF1/mean": 0.6730664968490601, "rewards/DenseCaptionF1/std": 0.15805818140506744, "rewards/DenseCaptionSodaM/mean": 0.4933856725692749, "rewards/DenseCaptionSodaM/std": 0.0710998922586441, "rewards/TiemstampCaptionLength/mean": 0.9937499761581421, "rewards/TiemstampCaptionLength/std": 0.05000000074505806, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3922.0, "completions/mean_length": 2440.03125, "completions/min_length": 1408.0, "entropy/max": 1.0234375, "entropy/mean": 0.6484375, "entropy/min": 0.50390625, "epoch": 0.78125, "frac_reward_zero_std": 0.0, "grad_norm": 0.41854700446128845, "kl": 0.07008274644613266, "learning_rate": 1.25415076745532e-06, "loss": 0.002804812043905258, "reward": 1.972537875175476, "reward_std": 0.36541616916656494, "rewards/DenseCaptionF1/mean": 0.6237460374832153, "rewards/DenseCaptionF1/std": 0.23509538173675537, "rewards/DenseCaptionSodaM/mean": 0.41302791237831116, "rewards/DenseCaptionSodaM/std": 0.14560307562351227, "rewards/TiemstampCaptionLength/mean": 0.9340277910232544, "rewards/TiemstampCaptionLength/std": 0.24465179443359375, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2842.0, "completions/mean_length": 2028.125, "completions/min_length": 1415.0, "entropy/max": 0.75390625, "entropy/mean": 0.59765625, "entropy/min": 0.455078125, "epoch": 0.78515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.44143903255462646, "kl": 0.07384993135929108, "learning_rate": 1.2116476827794104e-06, "loss": 0.0029541028197854757, "reward": 2.1639225482940674, "reward_std": 0.3017212450504303, "rewards/DenseCaptionF1/mean": 0.7080820798873901, "rewards/DenseCaptionF1/std": 0.21013802289962769, "rewards/DenseCaptionSodaM/mean": 0.4882064759731293, "rewards/DenseCaptionSodaM/std": 0.11897902935743332, "rewards/TiemstampCaptionLength/mean": 0.9665178656578064, "rewards/TiemstampCaptionLength/std": 0.17587246000766754, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3123.0, "completions/mean_length": 2220.015625, "completions/min_length": 1318.0, "entropy/max": 0.95703125, "entropy/mean": 0.671875, "entropy/min": 0.482421875, "epoch": 0.7890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4494209885597229, "kl": 0.06831617653369904, "learning_rate": 1.1697777844051105e-06, "loss": 0.002736983820796013, "reward": 2.0604043006896973, "reward_std": 0.38972604274749756, "rewards/DenseCaptionF1/mean": 0.6555015444755554, "rewards/DenseCaptionF1/std": 0.2091411054134369, "rewards/DenseCaptionSodaM/mean": 0.4517776370048523, "rewards/DenseCaptionSodaM/std": 0.12859277427196503, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2567.0, "completions/mean_length": 1999.109375, "completions/min_length": 1406.0, "entropy/max": 0.9296875, "entropy/mean": 0.6015625, "entropy/min": 0.216796875, "epoch": 0.79296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4470736086368561, "kl": 0.0712474063038826, "learning_rate": 1.1285480704793378e-06, "loss": 0.002855958417057991, "reward": 2.1464757919311523, "reward_std": 0.2201235294342041, "rewards/DenseCaptionF1/mean": 0.6772543787956238, "rewards/DenseCaptionF1/std": 0.20828545093536377, "rewards/DenseCaptionSodaM/mean": 0.48797160387039185, "rewards/DenseCaptionSodaM/std": 0.15297161042690277, "rewards/TiemstampCaptionLength/mean": 0.9781249761581421, "rewards/TiemstampCaptionLength/std": 0.13389021158218384, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3641.0, "completions/mean_length": 2079.015625, "completions/min_length": 1053.0, "entropy/max": 0.875, "entropy/mean": 0.60546875, "entropy/min": 0.455078125, "epoch": 0.796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.44605034589767456, "kl": 0.06595249474048615, "learning_rate": 1.0879654321484012e-06, "loss": 0.0026347199454903603, "reward": 2.2877116203308105, "reward_std": 0.11608919501304626, "rewards/DenseCaptionF1/mean": 0.7574422955513, "rewards/DenseCaptionF1/std": 0.1552867442369461, "rewards/DenseCaptionSodaM/mean": 0.5302694439888, "rewards/DenseCaptionSodaM/std": 0.12350013107061386, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3713.0, "completions/mean_length": 2055.5, "completions/min_length": 1050.0, "entropy/max": 0.80859375, "entropy/mean": 0.59765625, "entropy/min": 0.47265625, "epoch": 0.80078125, "frac_reward_zero_std": 0.0, "grad_norm": 0.46936070919036865, "kl": 0.07235345244407654, "learning_rate": 1.0480366524062041e-06, "loss": 0.0029008351266384125, "reward": 2.112664222717285, "reward_std": 0.26479020714759827, "rewards/DenseCaptionF1/mean": 0.6766363382339478, "rewards/DenseCaptionF1/std": 0.24521754682064056, "rewards/DenseCaptionSodaM/mean": 0.4679291546344757, "rewards/DenseCaptionSodaM/std": 0.11289465427398682, "rewards/TiemstampCaptionLength/mean": 0.9674479365348816, "rewards/TiemstampCaptionLength/std": 0.17544174194335938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4430.0, "completions/mean_length": 2183.515625, "completions/min_length": 945.0, "entropy/max": 0.890625, "entropy/mean": 0.63671875, "entropy/min": 0.490234375, "epoch": 0.8046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4510256052017212, "kl": 0.0715785101056099, "learning_rate": 1.008768404960535e-06, "loss": 0.002870047464966774, "reward": 2.092207431793213, "reward_std": 0.2838000953197479, "rewards/DenseCaptionF1/mean": 0.653795063495636, "rewards/DenseCaptionF1/std": 0.17665718495845795, "rewards/DenseCaptionSodaM/mean": 0.4728514552116394, "rewards/DenseCaptionSodaM/std": 0.12559223175048828, "rewards/TiemstampCaptionLength/mean": 0.9623715281486511, "rewards/TiemstampCaptionLength/std": 0.17747832834720612, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2997.0, "completions/mean_length": 2052.40625, "completions/min_length": 1278.0, "entropy/max": 0.7734375, "entropy/mean": 0.60546875, "entropy/min": 0.4921875, "epoch": 0.80859375, "frac_reward_zero_std": 0.0, "grad_norm": 0.45914798974990845, "kl": 0.07530860602855682, "learning_rate": 9.701672531176287e-07, "loss": 0.003017054870724678, "reward": 2.0713613033294678, "reward_std": 0.36397799849510193, "rewards/DenseCaptionF1/mean": 0.6464629173278809, "rewards/DenseCaptionF1/std": 0.21029022336006165, "rewards/DenseCaptionSodaM/mean": 0.4873984158039093, "rewards/DenseCaptionSodaM/std": 0.15191198885440826, "rewards/TiemstampCaptionLength/mean": 0.9375, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 9216.0, "completions/mean_length": 2205.6875, "completions/min_length": 884.0, "entropy/max": 0.88671875, "entropy/mean": 0.609375, "entropy/min": 0.1259765625, "epoch": 0.8125, "frac_reward_zero_std": 0.0, "grad_norm": 0.45285290479660034, "kl": 0.06573300808668137, "learning_rate": 9.322396486851626e-07, "loss": 0.0026457724161446095, "reward": 2.1070380210876465, "reward_std": 0.2084147036075592, "rewards/DenseCaptionF1/mean": 0.7000912427902222, "rewards/DenseCaptionF1/std": 0.20154625177383423, "rewards/DenseCaptionSodaM/mean": 0.43819671869277954, "rewards/DenseCaptionSodaM/std": 0.1280461698770523, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3699.0, "completions/mean_length": 1969.6875, "completions/min_length": 1028.0, "entropy/max": 0.82421875, "entropy/mean": 0.64453125, "entropy/min": 0.47265625, "epoch": 0.81640625, "frac_reward_zero_std": 0.0, "grad_norm": 0.47152179479599, "kl": 0.07548464089632034, "learning_rate": 8.949919308939081e-07, "loss": 0.0030218875035643578, "reward": 2.082573652267456, "reward_std": 0.3463110625743866, "rewards/DenseCaptionF1/mean": 0.6819970607757568, "rewards/DenseCaptionF1/std": 0.25651249289512634, "rewards/DenseCaptionSodaM/mean": 0.44745153188705444, "rewards/DenseCaptionSodaM/std": 0.1388605684041977, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3282.0, "completions/mean_length": 2113.78125, "completions/min_length": 1020.0, "entropy/max": 0.8046875, "entropy/mean": 0.609375, "entropy/min": 0.38671875, "epoch": 0.8203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.44752559065818787, "kl": 0.06798422336578369, "learning_rate": 8.584303253381848e-07, "loss": 0.0027289558202028275, "reward": 2.0789904594421387, "reward_std": 0.21277743577957153, "rewards/DenseCaptionF1/mean": 0.6418662071228027, "rewards/DenseCaptionF1/std": 0.15732134878635406, "rewards/DenseCaptionSodaM/mean": 0.45274922251701355, "rewards/DenseCaptionSodaM/std": 0.11441058665513992, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3629.0, "completions/mean_length": 2243.0625, "completions/min_length": 1053.0, "entropy/max": 0.96875, "entropy/mean": 0.66015625, "entropy/min": 0.486328125, "epoch": 0.82421875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4434755742549896, "kl": 0.07127964496612549, "learning_rate": 8.225609429353187e-07, "loss": 0.0028593293391168118, "reward": 2.0070624351501465, "reward_std": 0.2700831890106201, "rewards/DenseCaptionF1/mean": 0.6369417905807495, "rewards/DenseCaptionF1/std": 0.20550128817558289, "rewards/DenseCaptionSodaM/mean": 0.4148314595222473, "rewards/DenseCaptionSodaM/std": 0.10662335902452469, "rewards/TiemstampCaptionLength/mean": 0.9418278932571411, "rewards/TiemstampCaptionLength/std": 0.19645355641841888, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3176.0, "completions/mean_length": 2029.359375, "completions/min_length": 1116.0, "entropy/max": 1.0390625, "entropy/mean": 0.61328125, "entropy/min": 0.416015625, "epoch": 0.828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.47614586353302, "kl": 0.06632761657238007, "learning_rate": 7.873897789042523e-07, "loss": 0.0026550842449069023, "reward": 2.3058950901031494, "reward_std": 0.14569929242134094, "rewards/DenseCaptionF1/mean": 0.7638165950775146, "rewards/DenseCaptionF1/std": 0.14367879927158356, "rewards/DenseCaptionSodaM/mean": 0.5420786142349243, "rewards/DenseCaptionSodaM/std": 0.09184540808200836, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2787.0, "completions/mean_length": 1889.140625, "completions/min_length": 855.0, "entropy/max": 0.8203125, "entropy/mean": 0.609375, "entropy/min": 0.412109375, "epoch": 0.83203125, "frac_reward_zero_std": 0.0, "grad_norm": 0.47536811232566833, "kl": 0.07015581429004669, "learning_rate": 7.529227117635135e-07, "loss": 0.0027969004586338997, "reward": 2.0682640075683594, "reward_std": 0.4049973487854004, "rewards/DenseCaptionF1/mean": 0.6707839965820312, "rewards/DenseCaptionF1/std": 0.21859075129032135, "rewards/DenseCaptionSodaM/mean": 0.4658394157886505, "rewards/DenseCaptionSodaM/std": 0.14330562949180603, "rewards/TiemstampCaptionLength/mean": 0.92578125, "rewards/TiemstampCaptionLength/std": 0.24672332406044006, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3565.0, "completions/mean_length": 2083.625, "completions/min_length": 1310.0, "entropy/max": 0.83984375, "entropy/mean": 0.63671875, "entropy/min": 0.40234375, "epoch": 0.8359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4679231643676758, "kl": 0.07617446035146713, "learning_rate": 7.191655023486682e-07, "loss": 0.0030477726832032204, "reward": 2.1063032150268555, "reward_std": 0.17786702513694763, "rewards/DenseCaptionF1/mean": 0.6722162961959839, "rewards/DenseCaptionF1/std": 0.1687920242547989, "rewards/DenseCaptionSodaM/mean": 0.4497119188308716, "rewards/DenseCaptionSodaM/std": 0.13003011047840118, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4273.0, "completions/mean_length": 2406.578125, "completions/min_length": 1258.0, "entropy/max": 0.8828125, "entropy/mean": 0.60546875, "entropy/min": 0.3671875, "epoch": 0.83984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4217173457145691, "kl": 0.07177823781967163, "learning_rate": 6.86123792849458e-07, "loss": 0.002874116413295269, "reward": 2.1588830947875977, "reward_std": 0.1843496412038803, "rewards/DenseCaptionF1/mean": 0.7002047896385193, "rewards/DenseCaptionF1/std": 0.16961494088172913, "rewards/DenseCaptionSodaM/mean": 0.47687089443206787, "rewards/DenseCaptionSodaM/std": 0.09769587218761444, "rewards/TiemstampCaptionLength/mean": 0.9792393445968628, "rewards/TiemstampCaptionLength/std": 0.12946131825447083, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3752.0, "completions/mean_length": 2368.515625, "completions/min_length": 1501.0, "entropy/max": 0.8359375, "entropy/mean": 0.58984375, "entropy/min": 0.453125, "epoch": 0.84375, "frac_reward_zero_std": 0.0, "grad_norm": 0.39841553568840027, "kl": 0.06628976762294769, "learning_rate": 6.53803105866761e-07, "loss": 0.0026589157059788704, "reward": 2.0928306579589844, "reward_std": 0.28863298892974854, "rewards/DenseCaptionF1/mean": 0.6491104364395142, "rewards/DenseCaptionF1/std": 0.2038692682981491, "rewards/DenseCaptionSodaM/mean": 0.476923406124115, "rewards/DenseCaptionSodaM/std": 0.11315272748470306, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3498.0, "completions/mean_length": 2042.828125, "completions/min_length": 1063.0, "entropy/max": 0.9296875, "entropy/mean": 0.6328125, "entropy/min": 0.482421875, "epoch": 0.84765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4707883298397064, "kl": 0.0769767165184021, "learning_rate": 6.222088434895462e-07, "loss": 0.003077911213040352, "reward": 2.0829648971557617, "reward_std": 0.27355125546455383, "rewards/DenseCaptionF1/mean": 0.6705271005630493, "rewards/DenseCaptionF1/std": 0.1908894032239914, "rewards/DenseCaptionSodaM/mean": 0.44564101099967957, "rewards/DenseCaptionSodaM/std": 0.12517456710338593, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3759.0, "completions/mean_length": 2335.328125, "completions/min_length": 883.0, "entropy/max": 0.8125, "entropy/mean": 0.60546875, "entropy/min": 0.3046875, "epoch": 0.8515625, "frac_reward_zero_std": 0.0, "grad_norm": 0.44571444392204285, "kl": 0.06844501197338104, "learning_rate": 5.9134628639196e-07, "loss": 0.002740604802966118, "reward": 2.1427454948425293, "reward_std": 0.27938559651374817, "rewards/DenseCaptionF1/mean": 0.6829416155815125, "rewards/DenseCaptionF1/std": 0.17771215736865997, "rewards/DenseCaptionSodaM/mean": 0.4910537898540497, "rewards/DenseCaptionSodaM/std": 0.15398336946964264, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3600.0, "completions/mean_length": 2080.234375, "completions/min_length": 991.0, "entropy/max": 0.8828125, "entropy/mean": 0.640625, "entropy/min": 0.416015625, "epoch": 0.85546875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4650964140892029, "kl": 0.07336148619651794, "learning_rate": 5.612205929507209e-07, "loss": 0.002939282450824976, "reward": 2.1364259719848633, "reward_std": 0.28768667578697205, "rewards/DenseCaptionF1/mean": 0.6924072504043579, "rewards/DenseCaptionF1/std": 0.18149390816688538, "rewards/DenseCaptionSodaM/mean": 0.4752686321735382, "rewards/DenseCaptionSodaM/std": 0.14733342826366425, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3113.0, "completions/mean_length": 2066.921875, "completions/min_length": 1293.0, "entropy/max": 0.859375, "entropy/mean": 0.625, "entropy/min": 0.435546875, "epoch": 0.859375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4474633038043976, "kl": 0.07305897027254105, "learning_rate": 5.318367983829393e-07, "loss": 0.0029309550300240517, "reward": 2.1579113006591797, "reward_std": 0.22205166518688202, "rewards/DenseCaptionF1/mean": 0.6792052388191223, "rewards/DenseCaptionF1/std": 0.16498726606369019, "rewards/DenseCaptionSodaM/mean": 0.4943310022354126, "rewards/DenseCaptionSodaM/std": 0.12913137674331665, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3457.0, "completions/mean_length": 2067.984375, "completions/min_length": 930.0, "entropy/max": 0.734375, "entropy/mean": 0.59375, "entropy/min": 0.41015625, "epoch": 0.86328125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4524148404598236, "kl": 0.07275983691215515, "learning_rate": 5.031998139045352e-07, "loss": 0.0029124626889824867, "reward": 2.168494462966919, "reward_std": 0.17572757601737976, "rewards/DenseCaptionF1/mean": 0.6836122274398804, "rewards/DenseCaptionF1/std": 0.14580166339874268, "rewards/DenseCaptionSodaM/mean": 0.5055854320526123, "rewards/DenseCaptionSodaM/std": 0.10375098884105682, "rewards/TiemstampCaptionLength/mean": 0.9742187261581421, "rewards/TiemstampCaptionLength/std": 0.13685591518878937, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4325.0, "completions/mean_length": 2887.171875, "completions/min_length": 1661.0, "entropy/max": 0.82421875, "entropy/mean": 0.6171875, "entropy/min": 0.4296875, "epoch": 0.8671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.380627304315567, "kl": 0.06471222639083862, "learning_rate": 4.753144259093734e-07, "loss": 0.0025849281810224056, "reward": 2.122527599334717, "reward_std": 0.09056463092565536, "rewards/DenseCaptionF1/mean": 0.6412855386734009, "rewards/DenseCaptionF1/std": 0.12630011141300201, "rewards/DenseCaptionSodaM/mean": 0.4820184111595154, "rewards/DenseCaptionSodaM/std": 0.08255140483379364, "rewards/TiemstampCaptionLength/mean": 0.9984474182128906, "rewards/TiemstampCaptionLength/std": 0.008777683600783348, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3261.0, "completions/mean_length": 2071.703125, "completions/min_length": 796.0, "entropy/max": 0.76171875, "entropy/mean": 0.5703125, "entropy/min": 0.42578125, "epoch": 0.87109375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4588277339935303, "kl": 0.06928963959217072, "learning_rate": 4.481852951692672e-07, "loss": 0.002766369841992855, "reward": 2.2557458877563477, "reward_std": 0.27973294258117676, "rewards/DenseCaptionF1/mean": 0.7644034624099731, "rewards/DenseCaptionF1/std": 0.19099953770637512, "rewards/DenseCaptionSodaM/mean": 0.5225926637649536, "rewards/DenseCaptionSodaM/std": 0.12312100827693939, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4333.0, "completions/mean_length": 2400.609375, "completions/min_length": 669.0, "entropy/max": 0.87890625, "entropy/mean": 0.62109375, "entropy/min": 0.462890625, "epoch": 0.875, "frac_reward_zero_std": 0.0, "grad_norm": 0.43985965847969055, "kl": 0.07196754217147827, "learning_rate": 4.2181695605497066e-07, "loss": 0.0028892047703266144, "reward": 2.0200648307800293, "reward_std": 0.2255019247531891, "rewards/DenseCaptionF1/mean": 0.6254401206970215, "rewards/DenseCaptionF1/std": 0.1799243539571762, "rewards/DenseCaptionSodaM/mean": 0.42632484436035156, "rewards/DenseCaptionSodaM/std": 0.14487968385219574, "rewards/TiemstampCaptionLength/mean": 0.9678497314453125, "rewards/TiemstampCaptionLength/std": 0.1753530204296112, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4103.0, "completions/mean_length": 2191.234375, "completions/min_length": 1137.0, "entropy/max": 0.9453125, "entropy/mean": 0.62890625, "entropy/min": 0.4609375, "epoch": 0.87890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4341824948787689, "kl": 0.07162442803382874, "learning_rate": 3.9621381577830855e-07, "loss": 0.0028738011606037617, "reward": 2.065830945968628, "reward_std": 0.27442771196365356, "rewards/DenseCaptionF1/mean": 0.6485331058502197, "rewards/DenseCaptionF1/std": 0.2045939713716507, "rewards/DenseCaptionSodaM/mean": 0.45051243901252747, "rewards/DenseCaptionSodaM/std": 0.11047396063804626, "rewards/TiemstampCaptionLength/mean": 0.9648208618164062, "rewards/TiemstampCaptionLength/std": 0.17742861807346344, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4282.0, "completions/mean_length": 2272.125, "completions/min_length": 1127.0, "entropy/max": 0.85546875, "entropy/mean": 0.65625, "entropy/min": 0.404296875, "epoch": 0.8828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.47349095344543457, "kl": 0.07164978981018066, "learning_rate": 3.7138015365554834e-07, "loss": 0.0028734710067510605, "reward": 2.250483989715576, "reward_std": 0.17411521077156067, "rewards/DenseCaptionF1/mean": 0.7337714433670044, "rewards/DenseCaptionF1/std": 0.19994576275348663, "rewards/DenseCaptionSodaM/mean": 0.532686710357666, "rewards/DenseCaptionSodaM/std": 0.13804049789905548, "rewards/TiemstampCaptionLength/mean": 0.9836769104003906, "rewards/TiemstampCaptionLength/std": 0.1250361055135727, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3917.0, "completions/mean_length": 2077.9375, "completions/min_length": 1069.0, "entropy/max": 0.9140625, "entropy/mean": 0.625, "entropy/min": 0.37890625, "epoch": 0.88671875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4496707022190094, "kl": 0.06940577924251556, "learning_rate": 3.473201203921578e-07, "loss": 0.002788152080029249, "reward": 2.157864570617676, "reward_std": 0.18150551617145538, "rewards/DenseCaptionF1/mean": 0.678928792476654, "rewards/DenseCaptionF1/std": 0.14928093552589417, "rewards/DenseCaptionSodaM/mean": 0.5075816512107849, "rewards/DenseCaptionSodaM/std": 0.11503242701292038, "rewards/TiemstampCaptionLength/mean": 0.9583333730697632, "rewards/TiemstampCaptionLength/std": 0.15141892433166504, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3290.0, "completions/mean_length": 2042.390625, "completions/min_length": 1132.0, "entropy/max": 0.88671875, "entropy/mean": 0.61328125, "entropy/min": 0.3828125, "epoch": 0.890625, "frac_reward_zero_std": 0.0, "grad_norm": 0.464011549949646, "kl": 0.069074347615242, "learning_rate": 3.2403773738905185e-07, "loss": 0.002766566351056099, "reward": 2.10378098487854, "reward_std": 0.2530044913291931, "rewards/DenseCaptionF1/mean": 0.6559926271438599, "rewards/DenseCaptionF1/std": 0.18993285298347473, "rewards/DenseCaptionSodaM/mean": 0.484897643327713, "rewards/DenseCaptionSodaM/std": 0.12367614358663559, "rewards/TiemstampCaptionLength/mean": 0.95703125, "rewards/TiemstampCaptionLength/std": 0.18123632669448853, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3662.0, "completions/mean_length": 2453.53125, "completions/min_length": 1540.0, "entropy/max": 0.86328125, "entropy/mean": 0.609375, "entropy/min": 0.388671875, "epoch": 0.89453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.40985172986984253, "kl": 0.06651058793067932, "learning_rate": 3.015368960704584e-07, "loss": 0.0026644468307495117, "reward": 2.2062201499938965, "reward_std": 0.1915397346019745, "rewards/DenseCaptionF1/mean": 0.7498728036880493, "rewards/DenseCaptionF1/std": 0.18998169898986816, "rewards/DenseCaptionSodaM/mean": 0.4719722270965576, "rewards/DenseCaptionSodaM/std": 0.1014145016670227, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3979.0, "completions/mean_length": 2211.734375, "completions/min_length": 1275.0, "entropy/max": 0.765625, "entropy/mean": 0.609375, "entropy/min": 0.4375, "epoch": 0.8984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4526543915271759, "kl": 0.06700141727924347, "learning_rate": 2.798213572335001e-07, "loss": 0.002682400867342949, "reward": 2.118042230606079, "reward_std": 0.19524793326854706, "rewards/DenseCaptionF1/mean": 0.7293181419372559, "rewards/DenseCaptionF1/std": 0.17116592824459076, "rewards/DenseCaptionSodaM/mean": 0.40434902906417847, "rewards/DenseCaptionSodaM/std": 0.08266216516494751, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3042.0, "completions/mean_length": 2002.90625, "completions/min_length": 1046.0, "entropy/max": 0.7578125, "entropy/mean": 0.62109375, "entropy/min": 0.423828125, "epoch": 0.90234375, "frac_reward_zero_std": 0.0, "grad_norm": 0.46941038966178894, "kl": 0.06912705302238464, "learning_rate": 2.5889475041961767e-07, "loss": 0.002772316802293062, "reward": 2.170058250427246, "reward_std": 0.3172154426574707, "rewards/DenseCaptionF1/mean": 0.694161593914032, "rewards/DenseCaptionF1/std": 0.17546270787715912, "rewards/DenseCaptionSodaM/mean": 0.5090996026992798, "rewards/DenseCaptionSodaM/std": 0.13624192774295807, "rewards/TiemstampCaptionLength/mean": 0.96484375, "rewards/TiemstampCaptionLength/std": 0.17743313312530518, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3563.0, "completions/mean_length": 1987.46875, "completions/min_length": 659.0, "entropy/max": 0.828125, "entropy/mean": 0.6171875, "entropy/min": 0.33984375, "epoch": 0.90625, "frac_reward_zero_std": 0.0, "grad_norm": 0.45779043436050415, "kl": 0.06570929288864136, "learning_rate": 2.3876057330792344e-07, "loss": 0.0026300763711333275, "reward": 2.1797404289245605, "reward_std": 0.17871052026748657, "rewards/DenseCaptionF1/mean": 0.7018101811408997, "rewards/DenseCaptionF1/std": 0.22568084299564362, "rewards/DenseCaptionSodaM/mean": 0.49467143416404724, "rewards/DenseCaptionSodaM/std": 0.12307889014482498, "rewards/TiemstampCaptionLength/mean": 0.9821428656578064, "rewards/TiemstampCaptionLength/std": 0.1259881556034088, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3909.0, "completions/mean_length": 2162.15625, "completions/min_length": 1290.0, "entropy/max": 0.7734375, "entropy/mean": 0.59765625, "entropy/min": 0.34375, "epoch": 0.91015625, "frac_reward_zero_std": 0.0, "grad_norm": 0.4510428309440613, "kl": 0.07253089547157288, "learning_rate": 2.1942219113060215e-07, "loss": 0.002905457280576229, "reward": 2.173069715499878, "reward_std": 0.2984904646873474, "rewards/DenseCaptionF1/mean": 0.6751847267150879, "rewards/DenseCaptionF1/std": 0.1709144115447998, "rewards/DenseCaptionSodaM/mean": 0.5291350483894348, "rewards/DenseCaptionSodaM/std": 0.13066641986370087, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2951.0, "completions/mean_length": 2158.0, "completions/min_length": 1542.0, "entropy/max": 0.8046875, "entropy/mean": 0.640625, "entropy/min": 0.51171875, "epoch": 0.9140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.43684422969818115, "kl": 0.07163859903812408, "learning_rate": 2.0088283611044034e-07, "loss": 0.0028678132221102715, "reward": 2.1748478412628174, "reward_std": 0.09829949587583542, "rewards/DenseCaptionF1/mean": 0.6855545043945312, "rewards/DenseCaptionF1/std": 0.15107811987400055, "rewards/DenseCaptionSodaM/mean": 0.48929333686828613, "rewards/DenseCaptionSodaM/std": 0.06877553462982178, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3572.0, "completions/mean_length": 2506.125, "completions/min_length": 1396.0, "entropy/max": 0.859375, "entropy/mean": 0.69921875, "entropy/min": 0.5546875, "epoch": 0.91796875, "frac_reward_zero_std": 0.0, "grad_norm": 0.41872698068618774, "kl": 0.06576144695281982, "learning_rate": 1.8314560692059836e-07, "loss": 0.0026304367929697037, "reward": 2.002267837524414, "reward_std": 0.3426184356212616, "rewards/DenseCaptionF1/mean": 0.633314847946167, "rewards/DenseCaptionF1/std": 0.19844883680343628, "rewards/DenseCaptionSodaM/mean": 0.4158281981945038, "rewards/DenseCaptionSodaM/std": 0.1418202966451645, "rewards/TiemstampCaptionLength/mean": 0.953125, "rewards/TiemstampCaptionLength/std": 0.21304203569889069, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3937.0, "completions/mean_length": 2436.171875, "completions/min_length": 1402.0, "entropy/max": 0.7890625, "entropy/mean": 0.62109375, "entropy/min": 0.5, "epoch": 0.921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4116588532924652, "kl": 0.07316441833972931, "learning_rate": 1.6621346816668993e-07, "loss": 0.0029328539967536926, "reward": 2.1459648609161377, "reward_std": 0.21624067425727844, "rewards/DenseCaptionF1/mean": 0.6648256182670593, "rewards/DenseCaptionF1/std": 0.18039190769195557, "rewards/DenseCaptionSodaM/mean": 0.49763229489326477, "rewards/DenseCaptionSodaM/std": 0.10255026072263718, "rewards/TiemstampCaptionLength/mean": 0.9826388955116272, "rewards/TiemstampCaptionLength/std": 0.12554994225502014, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3285.0, "completions/mean_length": 2254.84375, "completions/min_length": 1356.0, "entropy/max": 0.90625, "entropy/mean": 0.65625, "entropy/min": 0.3984375, "epoch": 0.92578125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4395812451839447, "kl": 0.070140540599823, "learning_rate": 1.500892498912826e-07, "loss": 0.0028052818961441517, "reward": 2.2381887435913086, "reward_std": 0.10850651562213898, "rewards/DenseCaptionF1/mean": 0.7510522603988647, "rewards/DenseCaptionF1/std": 0.1511983424425125, "rewards/DenseCaptionSodaM/mean": 0.4871364235877991, "rewards/DenseCaptionSodaM/std": 0.08491774648427963, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3432.0, "completions/mean_length": 2316.765625, "completions/min_length": 1277.0, "entropy/max": 0.7890625, "entropy/mean": 0.6015625, "entropy/min": 0.4140625, "epoch": 0.9296875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4198128879070282, "kl": 0.06609868258237839, "learning_rate": 1.3477564710088097e-07, "loss": 0.0026463111862540245, "reward": 2.1127736568450928, "reward_std": 0.2268976867198944, "rewards/DenseCaptionF1/mean": 0.7002504467964172, "rewards/DenseCaptionF1/std": 0.202296644449234, "rewards/DenseCaptionSodaM/mean": 0.4750233292579651, "rewards/DenseCaptionSodaM/std": 0.16162288188934326, "rewards/TiemstampCaptionLength/mean": 0.9375, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3404.0, "completions/mean_length": 2235.015625, "completions/min_length": 1098.0, "entropy/max": 0.98828125, "entropy/mean": 0.68359375, "entropy/min": 0.390625, "epoch": 0.93359375, "frac_reward_zero_std": 0.0, "grad_norm": 0.45057475566864014, "kl": 0.07315898686647415, "learning_rate": 1.2027521931548214e-07, "loss": 0.0029359692707657814, "reward": 2.0559120178222656, "reward_std": 0.1921595335006714, "rewards/DenseCaptionF1/mean": 0.6378650069236755, "rewards/DenseCaptionF1/std": 0.1667502373456955, "rewards/DenseCaptionSodaM/mean": 0.433671772480011, "rewards/DenseCaptionSodaM/std": 0.11927897483110428, "rewards/TiemstampCaptionLength/mean": 0.984375, "rewards/TiemstampCaptionLength/std": 0.125, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4122.0, "completions/mean_length": 2106.46875, "completions/min_length": 668.0, "entropy/max": 0.83203125, "entropy/mean": 0.62890625, "entropy/min": 0.41796875, "epoch": 0.9375, "frac_reward_zero_std": 0.0, "grad_norm": 0.5213373303413391, "kl": 0.07656201720237732, "learning_rate": 1.0659039014077943e-07, "loss": 0.00306682288646698, "reward": 2.0398974418640137, "reward_std": 0.353277325630188, "rewards/DenseCaptionF1/mean": 0.6235545873641968, "rewards/DenseCaptionF1/std": 0.2217920571565628, "rewards/DenseCaptionSodaM/mean": 0.46451741456985474, "rewards/DenseCaptionSodaM/std": 0.13420319557189941, "rewards/TiemstampCaptionLength/mean": 0.9505256414413452, "rewards/TiemstampCaptionLength/std": 0.21292540431022644, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4234.0, "completions/mean_length": 2567.890625, "completions/min_length": 1133.0, "entropy/max": 0.84375, "entropy/mean": 0.6484375, "entropy/min": 0.435546875, "epoch": 0.94140625, "frac_reward_zero_std": 0.0, "grad_norm": 0.42212745547294617, "kl": 0.06747667491436005, "learning_rate": 9.372344686307655e-08, "loss": 0.0027042608708143234, "reward": 2.0471315383911133, "reward_std": 0.38125166296958923, "rewards/DenseCaptionF1/mean": 0.617460310459137, "rewards/DenseCaptionF1/std": 0.18913257122039795, "rewards/DenseCaptionSodaM/mean": 0.47680580615997314, "rewards/DenseCaptionSodaM/std": 0.12752464413642883, "rewards/TiemstampCaptionLength/mean": 0.952606201171875, "rewards/TiemstampCaptionLength/std": 0.21296648681163788, "rewards/TimestampFormat/mean": 0.953125, "rewards/TimestampFormat/std": 0.21304203569889069, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3376.0, "completions/mean_length": 2288.578125, "completions/min_length": 1101.0, "entropy/max": 1.0859375, "entropy/mean": 0.59765625, "entropy/min": 0.3828125, "epoch": 0.9453125, "frac_reward_zero_std": 0.0, "grad_norm": 0.441020131111145, "kl": 0.06421603262424469, "learning_rate": 8.167654006699444e-08, "loss": 0.0025690067559480667, "reward": 2.1579861640930176, "reward_std": 0.12455077469348907, "rewards/DenseCaptionF1/mean": 0.6679261922836304, "rewards/DenseCaptionF1/std": 0.14132031798362732, "rewards/DenseCaptionSodaM/mean": 0.49005988240242004, "rewards/DenseCaptionSodaM/std": 0.1283525824546814, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4263.0, "completions/mean_length": 1993.25, "completions/min_length": 1075.0, "entropy/max": 0.80859375, "entropy/mean": 0.6171875, "entropy/min": 0.4609375, "epoch": 0.94921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.47914016246795654, "kl": 0.07047290354967117, "learning_rate": 7.04516832760177e-08, "loss": 0.0028210515156388283, "reward": 2.190624952316284, "reward_std": 0.2937573790550232, "rewards/DenseCaptionF1/mean": 0.7174718976020813, "rewards/DenseCaptionF1/std": 0.20946037769317627, "rewards/DenseCaptionSodaM/mean": 0.5048742294311523, "rewards/DenseCaptionSodaM/std": 0.1464603692293167, "rewards/TiemstampCaptionLength/mean": 0.9678077697753906, "rewards/TiemstampCaptionLength/std": 0.17528696358203888, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3077.0, "completions/mean_length": 2154.015625, "completions/min_length": 1308.0, "entropy/max": 0.73828125, "entropy/mean": 0.57421875, "entropy/min": 0.40625, "epoch": 0.953125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4197753965854645, "kl": 0.06408187001943588, "learning_rate": 6.005075261595495e-08, "loss": 0.0025652721524238586, "reward": 2.2150726318359375, "reward_std": 0.27494385838508606, "rewards/DenseCaptionF1/mean": 0.7120701670646667, "rewards/DenseCaptionF1/std": 0.20112037658691406, "rewards/DenseCaptionSodaM/mean": 0.5342526435852051, "rewards/DenseCaptionSodaM/std": 0.13056960701942444, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3090.0, "completions/mean_length": 1707.8125, "completions/min_length": 397.0, "entropy/max": 0.82421875, "entropy/mean": 0.5859375, "entropy/min": 0.41796875, "epoch": 0.95703125, "frac_reward_zero_std": 0.0, "grad_norm": 0.5193677544593811, "kl": 0.07979986071586609, "learning_rate": 5.047548650136513e-08, "loss": 0.003219340927898884, "reward": 2.2161216735839844, "reward_std": 0.22335359454154968, "rewards/DenseCaptionF1/mean": 0.7086374163627625, "rewards/DenseCaptionF1/std": 0.21759891510009766, "rewards/DenseCaptionSodaM/mean": 0.5244112610816956, "rewards/DenseCaptionSodaM/std": 0.12199661880731583, "rewards/TiemstampCaptionLength/mean": 0.9817708134651184, "rewards/TiemstampCaptionLength/std": 0.12639760971069336, "rewards/TimestampFormat/mean": 0.984375, "rewards/TimestampFormat/std": 0.125, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2493.0, "completions/mean_length": 1680.015625, "completions/min_length": 947.0, "entropy/max": 0.83203125, "entropy/mean": 0.62109375, "entropy/min": 0.4609375, "epoch": 0.9609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.506488561630249, "kl": 0.07751372456550598, "learning_rate": 4.172748534499449e-08, "loss": 0.0031061689369380474, "reward": 2.1879899501800537, "reward_std": 0.1074364110827446, "rewards/DenseCaptionF1/mean": 0.6649491190910339, "rewards/DenseCaptionF1/std": 0.19902512431144714, "rewards/DenseCaptionSodaM/mean": 0.5230408906936646, "rewards/DenseCaptionSodaM/std": 0.08708979189395905, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4872.0, "completions/mean_length": 2129.78125, "completions/min_length": 640.0, "entropy/max": 0.82421875, "entropy/mean": 0.6015625, "entropy/min": 0.451171875, "epoch": 0.96484375, "frac_reward_zero_std": 0.0, "grad_norm": 0.48763999342918396, "kl": 0.07435654103755951, "learning_rate": 3.3808211290284886e-08, "loss": 0.0029764333739876747, "reward": 2.144674301147461, "reward_std": 0.1485549360513687, "rewards/DenseCaptionF1/mean": 0.6928300857543945, "rewards/DenseCaptionF1/std": 0.16323688626289368, "rewards/DenseCaptionSodaM/mean": 0.45737937092781067, "rewards/DenseCaptionSodaM/std": 0.11042436212301254, "rewards/TiemstampCaptionLength/mean": 0.9889297485351562, "rewards/TiemstampCaptionLength/std": 0.03645797818899155, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3257.0, "completions/mean_length": 1980.921875, "completions/min_length": 905.0, "entropy/max": 0.74609375, "entropy/mean": 0.59375, "entropy/min": 0.435546875, "epoch": 0.96875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4555642604827881, "kl": 0.07218649983406067, "learning_rate": 2.6718987966992683e-08, "loss": 0.0028909044340252876, "reward": 2.045485734939575, "reward_std": 0.2386668473482132, "rewards/DenseCaptionF1/mean": 0.6260990500450134, "rewards/DenseCaptionF1/std": 0.22993408143520355, "rewards/DenseCaptionSodaM/mean": 0.45063668489456177, "rewards/DenseCaptionSodaM/std": 0.10911618918180466, "rewards/TiemstampCaptionLength/mean": 0.96875, "rewards/TiemstampCaptionLength/std": 0.17536810040473938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3933.0, "completions/mean_length": 2316.71875, "completions/min_length": 1155.0, "entropy/max": 0.84375, "entropy/mean": 0.6328125, "entropy/min": 0.470703125, "epoch": 0.97265625, "frac_reward_zero_std": 0.0, "grad_norm": 0.42503464221954346, "kl": 0.06713893264532089, "learning_rate": 2.0461000269953457e-08, "loss": 0.002684153150767088, "reward": 2.1603641510009766, "reward_std": 0.13414692878723145, "rewards/DenseCaptionF1/mean": 0.6677990555763245, "rewards/DenseCaptionF1/std": 0.17346404492855072, "rewards/DenseCaptionSodaM/mean": 0.49451813101768494, "rewards/DenseCaptionSodaM/std": 0.09743543714284897, "rewards/TiemstampCaptionLength/mean": 0.99609375, "rewards/TiemstampCaptionLength/std": 0.03125, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4524.0, "completions/mean_length": 2455.328125, "completions/min_length": 1329.0, "entropy/max": 0.79296875, "entropy/mean": 0.5859375, "entropy/min": 0.376953125, "epoch": 0.9765625, "frac_reward_zero_std": 0.0, "grad_norm": 0.41289544105529785, "kl": 0.06387481093406677, "learning_rate": 1.5035294161039882e-08, "loss": 0.002559009939432144, "reward": 2.2636756896972656, "reward_std": 0.12502282857894897, "rewards/DenseCaptionF1/mean": 0.7034400701522827, "rewards/DenseCaptionF1/std": 0.13659776747226715, "rewards/DenseCaptionSodaM/mean": 0.5678247213363647, "rewards/DenseCaptionSodaM/std": 0.06698809564113617, "rewards/TiemstampCaptionLength/mean": 0.9848217964172363, "rewards/TiemstampCaptionLength/std": 0.08733879774808884, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3199.0, "completions/mean_length": 2053.171875, "completions/min_length": 713.0, "entropy/max": 0.96484375, "entropy/mean": 0.5859375, "entropy/min": 0.2578125, "epoch": 0.98046875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4668898582458496, "kl": 0.0702609121799469, "learning_rate": 1.044277649433989e-08, "loss": 0.002815072424709797, "reward": 2.1282243728637695, "reward_std": 0.24179862439632416, "rewards/DenseCaptionF1/mean": 0.6647729873657227, "rewards/DenseCaptionF1/std": 0.21551796793937683, "rewards/DenseCaptionSodaM/mean": 0.497305691242218, "rewards/DenseCaptionSodaM/std": 0.13626523315906525, "rewards/TiemstampCaptionLength/mean": 0.9635416865348816, "rewards/TiemstampCaptionLength/std": 0.17933039367198944, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3601.0, "completions/mean_length": 2079.109375, "completions/min_length": 1178.0, "entropy/max": 0.81640625, "entropy/mean": 0.5546875, "entropy/min": 0.35546875, "epoch": 0.984375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4461975395679474, "kl": 0.06337602436542511, "learning_rate": 6.6842148645840374e-09, "loss": 0.0025420826859772205, "reward": 2.2069387435913086, "reward_std": 0.11307065188884735, "rewards/DenseCaptionF1/mean": 0.6973945498466492, "rewards/DenseCaptionF1/std": 0.15259438753128052, "rewards/DenseCaptionSodaM/mean": 0.5109056830406189, "rewards/DenseCaptionSodaM/std": 0.10468944162130356, "rewards/TiemstampCaptionLength/mean": 0.9972774982452393, "rewards/TiemstampCaptionLength/std": 0.015293176285922527, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3537.0, "completions/mean_length": 1864.875, "completions/min_length": 1038.0, "entropy/max": 0.75390625, "entropy/mean": 0.58984375, "entropy/min": 0.298828125, "epoch": 0.98828125, "frac_reward_zero_std": 0.0, "grad_norm": 0.4846988618373871, "kl": 0.06949920952320099, "learning_rate": 3.760237478849793e-09, "loss": 0.0027765128761529922, "reward": 2.2026705741882324, "reward_std": 0.11286143213510513, "rewards/DenseCaptionF1/mean": 0.6730291843414307, "rewards/DenseCaptionF1/std": 0.14994429051876068, "rewards/DenseCaptionSodaM/mean": 0.5296412706375122, "rewards/DenseCaptionSodaM/std": 0.10844793915748596, "rewards/TiemstampCaptionLength/mean": 1.0, "rewards/TiemstampCaptionLength/std": 0.0, "rewards/TimestampFormat/mean": 1.0, "rewards/TimestampFormat/std": 0.0, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3433.0, "completions/mean_length": 1971.46875, "completions/min_length": 1014.0, "entropy/max": 0.76953125, "entropy/mean": 0.62109375, "entropy/min": 0.494140625, "epoch": 0.9921875, "frac_reward_zero_std": 0.0, "grad_norm": 0.4856790006160736, "kl": 0.06836636364459991, "learning_rate": 1.6713330515627512e-09, "loss": 0.0027369549497962, "reward": 2.1835498809814453, "reward_std": 0.2145911157131195, "rewards/DenseCaptionF1/mean": 0.7142390012741089, "rewards/DenseCaptionF1/std": 0.21817132830619812, "rewards/DenseCaptionSodaM/mean": 0.5012121200561523, "rewards/DenseCaptionSodaM/std": 0.1440695971250534, "rewards/TiemstampCaptionLength/mean": 0.9674479365348816, "rewards/TiemstampCaptionLength/std": 0.17544174194335938, "rewards/TimestampFormat/mean": 0.96875, "rewards/TimestampFormat/std": 0.17536810040473938, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4529.0, "completions/mean_length": 2173.328125, "completions/min_length": 1104.0, "entropy/max": 0.79296875, "entropy/mean": 0.61328125, "entropy/min": 0.365234375, "epoch": 0.99609375, "frac_reward_zero_std": 0.0, "grad_norm": 0.4448992908000946, "kl": 0.07250708341598511, "learning_rate": 4.178507228136397e-10, "loss": 0.002903016284108162, "reward": 2.0148417949676514, "reward_std": 0.4292619526386261, "rewards/DenseCaptionF1/mean": 0.6329973936080933, "rewards/DenseCaptionF1/std": 0.2630157470703125, "rewards/DenseCaptionSodaM/mean": 0.4614725112915039, "rewards/DenseCaptionSodaM/std": 0.17066454887390137, "rewards/TiemstampCaptionLength/mean": 0.9188690185546875, "rewards/TiemstampCaptionLength/std": 0.270029753446579, "rewards/TimestampFormat/mean": 0.921875, "rewards/TimestampFormat/std": 0.27048972249031067, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3888.0, "completions/mean_length": 2174.671875, "completions/min_length": 576.0, "entropy/max": 0.84765625, "entropy/mean": 0.66015625, "entropy/min": 0.404296875, "epoch": 1.0, "frac_reward_zero_std": 0.0, "grad_norm": 0.4850475788116455, "kl": 0.06848195195198059, "learning_rate": 0.0, "loss": 0.0027359342202544212, "reward": 1.9704103469848633, "reward_std": 0.35087159276008606, "rewards/DenseCaptionF1/mean": 0.6019693613052368, "rewards/DenseCaptionF1/std": 0.2335011512041092, "rewards/DenseCaptionSodaM/mean": 0.43094098567962646, "rewards/DenseCaptionSodaM/std": 0.17221741378307343, "rewards/TiemstampCaptionLength/mean": 0.9375, "rewards/TiemstampCaptionLength/std": 0.24397502839565277, "rewards/TimestampFormat/mean": 0.9375, "rewards/TimestampFormat/std": 0.24397502839565277, "step": 256 } ], "logging_steps": 1, "max_steps": 256, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }